Optimization: principles and algorithms¶

Bierlaire, M. (2015). Optimization: Principles and Algorithms. EPFL Press.
The numbering of the algorithms, tables and page refer to the book.

Chapter 12: Trust region¶

Algorithm 12.1: Intersection with the trust region¶

In [1]:
import numpy as np
from scipy import linalg

# Define a function simple to call for the inner product
def scalarInner(a,b):
return(np.asscalar(a.T.dot(b)))

# Trust region of radius delta, centered at xhat
# xc in in the trust region.
# Define dc = xc - xhat. We have ||dc|| <= delta
# Consider xd outside of the trust region.
# Define dd = xd - xhat. We have ||dd|| > delta
# Find lbd such that || dc + lbd (dd - dc)|| = delta
def trustRegionIntersection(dc,d,delta):
a = scalarInner(d,d)
b = 2 * scalarInner(dc,d)
c = scalarInner(dc,dc) - delta ** 2
discriminant = b * b - 4.0 * a * c
return (- b + np.sqrt(discriminant) ) / (2 * a)



The following example is not in the book.

In [2]:
xhat = np.array([[1],[1]])
delta = 1.0
xc = np.array([[1],[1.5]])
xd = np.array([[2.5],[5]])
dc = xc - xhat
dd = xd - xhat
lbd = trustRegionIntersection(dc,dd-dc,delta)
lbd

Out[2]:
0.13677904350117376

We verify that $||d_C + \lambda (d_d - d_C)|| = \Delta$.

In [3]:
linalg.norm(dc + lbd *(dd - dc))

Out[3]:
0.9999999999999999

The point on the border is $x_C + \lambda (x_d - x_C)$.

In [4]:
xborder = xc + lbd * (xd - xc)
xborder

Out[4]:
array([[1.20516857],
[1.97872665]])

The following figure illustrates the various points involved.

In [5]:
def myplot(ax,x,name,shift=0.05):
ax.plot(x.item(0),x.item(1),marker='.')
ax.annotate(name,xy=(x.item(0),x.item(1)),xytext=(x.item(0)+shift,x.item(1)+shift))

%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = [3,6]
circle = plt.Circle((1,1),1.0,fill=False)
ax = plt.gca()
ax.set_xlim((0, 3))
ax.set_ylim((0, 6))
shift=0.05

myplot(ax,xhat,'xhat')
myplot(ax,xc,'xc')
myplot(ax,xd,'xd')
myplot(ax,xborder,'xborder')

# Plot the line
plt.plot([xc.item(0),xd.item(0)],[xc.item(1),xd.item(1)])

plt.show()


Algorithm 12.2: Dogleg method¶

In [6]:
def cauchyNewtonDogleg(g,H):
alpha = scalarInner(g,g)
beta = scalarInner(g,H.dot(g))
dc = - (alpha / beta ) * g
dn = linalg.solve(H,-g).reshape(2,1)
eta = 0.2 + (0.8 * alpha * alpha / (beta * abs(scalarInner(g,dn))))
return dc,dn,eta*dn

def dogleg(g,H,delta):
"""
Returned type interpretation
type -2: negative curvature along Newton's direction
type -1: negative curvature along Cauchy's direction (i.e. along the gradient)
type  1: Partial Cauchy step
type  2: Newton step
type  3: Partial Newton step
type  4: Dogleg
"""

(dc,dn,dl) = cauchyNewtonDogleg(g,H)

# Check if the model is convex along the gradient direction

alpha = scalarInner(g,g)
beta = scalarInner(g,H.dot(g))
if beta <= 0:
dstar = -delta * g / np.sqrt(alpha)
return dstar,-1

# Compute the Cauchy point

normdc = alpha * np.sqrt(alpha) / beta ;
if normdc >= delta:
# The Cauchy point is outside the trust
# region. We move along the Cauchy
# direction until the border of the trut
# region.

dstar = (delta / normdc) * dc
return dstar, 1

# Compute Newton's point

normdn = linalg.norm(dn)

# Check the convexity of the model along Newton's direction

if scalarInner(dn,H.dot(dn)) <= 0.0:
# Return the Cauchy point
return dc,-2

if normdn <= delta:
# Newton's point is inside the trust region
return dn,2

# Compute the dogleg point

eta = 0.2 + (0.8 * alpha * alpha / (beta * abs(scalarInner(g,dn))))

partieldn = eta * linalg.norm(dn)

if partieldn <= delta:
# Dogleg point is inside the trust region
dstar = (delta / normdn) * dn ;
return dstar,3

# Between Cauchy and dogleg
nu = dl - dc
lbd = trustRegionIntersection(dc,nu,delta)
dstar = dc + lbd * nu ;
return dstar,4


Example 12.3 p. 295

In [7]:
xhat = np.array([[9],[1]])
g = np.array([[9],[9]])
H = np.array([[1., 0],
[0, 9.]])
delta = 1
(dstar1,type1) = dogleg(g,H,delta)
x1 = xhat + dstar1
print("Delta: {} Type: {}\nx={}".format(delta,type1,x1))
delta = 4
(dstar4,type4) = dogleg(g,H,delta)
x4 = xhat + dstar4
print("Delta: {} Type: {}\nx={}".format(delta,type4,x4))
delta = 8
(dstar8,type8) = dogleg(g,H,delta)
x8 = xhat + dstar8
print("Delta: {} Type: {}\nx={}".format(delta,type8,x8))

Delta: 1 Type: 1
x=[[8.29289322]
[0.29289322]]
Delta: 4 Type: 4
x=[[5.06523072]
[0.28056223]]
Delta: 8 Type: 3
x=[[1.04893012]
[0.11654779]]


Figure 12.2 (a) p. 296

In [8]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = [6,6]

def theFunctionToPlot(x,y):
return(0.5 * x * x + 4.5 * y * y)

xlist = np.linspace(-1.0,10.0,1000)
ylist = np.linspace(-5.0,5.0,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)

ax = plt.gca()
ax.set_xlim((-1, 10))
ax.set_ylim((-5, 5))
(dc,dn,dl) = cauchyNewtonDogleg(g,H)
myplot(ax,xhat,'xhat')
xc = xhat + dc
myplot(ax,xc,'xC')
xn = xhat + dn
myplot(ax,xn,'xN')
xd = xhat + dl
myplot(ax,xhat+dl,'xd')
x_points = [xhat.item(0),xc.item(0),xd.item(0),xn.item(0)]
y_points = [xhat.item(1),xc.item(1),xd.item(1),xn.item(1)]
plt.plot(x_points,y_points)
plt.show()


Figure 12.2 (b) p. 296

In [9]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = [6,6]

def theFunctionToPlot(x,y):
return(0.5 * x * x + 4.5 * y * y)

xlist = np.linspace(-1.0,10.0,1000)
ylist = np.linspace(-5.0,5.0,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)

circle1 = plt.Circle((9,1),1.0,fill=False)
circle4 = plt.Circle((9,1),4.0,fill=False)
circle8 = plt.Circle((9,1),8.0,fill=False)
ax = plt.gca()
ax.set_xlim((-1, 10))
ax.set_ylim((-5, 5))
myplot(ax,x1,'x1')
myplot(ax,x4,'x4')
myplot(ax,x8,'x8')
x_points = [xhat.item(0),xc.item(0),xd.item(0),xn.item(0)]
y_points = [xhat.item(1),xc.item(1),xd.item(1),xn.item(1)]
plt.plot(x_points,y_points)
plt.show()


Algorithm 12.3: Steihaug-Toint truncated conjugate gradient method¶

In [10]:
def truncatedConjugateGradient(g,H,delta):
"""
Returned type interpretation
type  1: convergence
type  2: out of the trust region
type  3: negative curvature detected
"""
tol = 1.0e-6
n = g.shape[0]
xk = np.zeros((n,1))
gk = g
dk = -gk
for k in range(n):
curv = scalarInner(dk,H.dot(dk))
if  curv <= 0:
# Negative curvature has been detected
type = 3
a = scalarInner(dk,dk)
b = 2 * scalarInner(xk,dk)
c = scalarInner(xk,xk) - delta * delta
rho = b * b - 4 * a * c ;
step = xk + ((-b + np.sqrt(rho)) / (2*a)) * dk
return step,type
alphak = - scalarInner(dk,gk) / curv
xkp1 = xk + alphak * dk
if linalg.norm(xkp1) > delta:
# Out of the trust region
type = 2
a = scalarInner(dk,dk)
b = 2 * scalarInner(xk,dk)
c = scalarInner(xk,xk) - delta * delta
rho = b * b - 4 * a * c ;
step = xk + ((-b + np.sqrt(rho)) / (2*a)) * dk
return step, type
xk = xkp1 ;
gkp1 = H.dot(xk) + g
betak = scalarInner(gkp1,gkp1) / scalarInner(gk,gk)
dk = -gkp1 + betak * dk
gk = gkp1
if linalg.norm(gkp1) <= tol:
type = 1
step = xk
return step,type
type = 1
step = xk
return step, type


We illustrate the method on the same example as before. This is not reported in the book. Note that there is no negative curvature here. Also, we have a large trust region ($\Delta=10$) to illustrate the case when the CG algorithm converges without hitting the trust region boundaries.

In [11]:
xhat = np.array([[9],[1]])
g = np.array([[9],[9]])
H = np.array([[1., 0],
[0, 9.]])
delta = 1
x1 = xhat + step1
print("Delta: {} Type: {}\nx={}".format(delta,type1,x1))
delta = 4
x4 = xhat + step4
print("Delta: {} Type: {}\nx={}".format(delta,type4,x4))
delta = 8
x8 = xhat + step8
print("Delta: {} Type: {}\nx={}".format(delta,type8,x8))
delta = 10
x10 = xhat + step10
print("Delta: {} Type: {}\nx={}".format(delta,type10,x10))

Delta: 1 Type: 2
x=[[8.29289322]
[0.29289322]]
Delta: 4 Type: 2
x=[[ 5.33058286]
[-0.59228698]]
Delta: 8 Type: 2
x=[[ 1.07876863]
[-0.11986318]]
Delta: 10 Type: 1
x=[[0.00000000e+00]
[2.22044605e-16]]

In [12]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = [6,6]

def theFunctionToPlot(x,y):
return(0.5 * x * x + 4.5 * y * y)

xlist = np.linspace(-1.0,10.0,1000)
ylist = np.linspace(-5.0,5.0,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)

circle1 = plt.Circle((9,1),1.0,fill=False)
circle4 = plt.Circle((9,1),4.0,fill=False)
circle8 = plt.Circle((9,1),8.0,fill=False)
circle10 = plt.Circle((9,1),10.0,fill=False)
ax = plt.gca()
ax.set_xlim((-1, 10))
ax.set_ylim((-5, 5))
myplot(ax,x1,'x1')
myplot(ax,x4,'x4')
myplot(ax,x8,'x8')
myplot(ax,x10,'x10')
plt.show()


Algorithm 12.4: Newton's method with trust region¶

In [13]:
def newtonTrustRegion(obj,x0,delta0=10,eps=1.0e-6,dl=True,maxiter=1000):

eta1 = 0.01
eta2 = 0.9
k = 0
xk = x0
(f,g,H) = obj(xk)
iters = list()
delta = delta0 ;
iters.append([xk,f,linalg.norm(g),delta,0.0,'',''])
while linalg.norm(g) > eps and k < maxiter:
k=k+1
if dl:
(step,type) = dogleg(g,H,delta)
else:
(fc,gc,Hc) = obj(xk+step)
num = f - fc;
denom = -scalarInner(step,g) - 0.5 * scalarInner(step,H.dot(step))
rho = num / denom
if rho < eta1:
# Failure: reduce the trust region
delta = linalg.norm(step) / 2.0
status = "- " ;
else:
# Candidate accepted
xk = xk + step
f = fc
g = gc
H = Hc
if rho >= eta2:
# Enlarge the trust region
delta = 2 * delta
status = "++"
else:
status = "+ "
iters.append([xk,f,linalg.norm(g),delta,rho,type,status])
return xk,iters


Example 5.8: $f(x_1,x_2) = \frac{1}{2} x_1^2 + x_1 \cos(x_2)$

In [14]:
import math
def ex0508(x):
f = 0.5 * x.item(0) * x.item(0) + x.item(0) * math.cos(x.item(1))
g = np.array([[ x.item(0) + math.cos(x.item(1))],[ -x.item(0) * math.sin(x.item(1)) ] ])
H = np.array([[ 1 , -math.sin(x.item(1))],[ -math.sin(x.item(1)),-x.item(0)*math.cos(x.item(1))]])
return (f,g,H)


In [15]:
x0 = np.array([[1],[1]])
(sol,iters) = newtonTrustRegion(ex0508,x0)
sol

Out[15]:
array([[-1.00000025e+00],
[-5.40690858e-07]])

Table 12.1, page 304

In [16]:
print("k\txk\t\t\t\tf(xk)\t\t||Grad(xk)||\tDeltak\t\tRho\t\tStatus")
for k in range(len(iters)):
print("{}\t{:+E}\t{:+E}\t{:+E}\t{:+E}\t{:+E}\t{:+E}\t{}\t{}".format(k,iters[k][0].item(0),iters[k][0].item(1),iters[k][1],iters[k][2],iters[k][3],iters[k][4],iters[k][5],iters[k][6]))

k	xk				f(xk)		||Grad(xk)||	Deltak		Rho		Status
0	+1.000000E+00	+1.000000E+00	+1.040302E+00	+1.755165E+00	+1.000000E+01	+0.000000E+00
1	-2.338451E-01	+1.364192E+00	-2.062862E-02	+2.306654E-01	+2.000000E+01	+9.614446E-01	2	++
2	-1.395487E-01	+6.124147E-01	-1.044505E-01	+6.834378E-01	+4.000000E+01	+9.592373E-01	-2	++
3	-9.344969E-01	+5.184577E-01	-3.750473E-01	+4.677489E-01	+8.000000E+01	+9.892413E-01	-2	++
4	-1.245337E+00	-2.418279E-01	-4.336678E-01	+4.052853E-01	+8.000000E+01	+3.535767E-01	2	+
5	-1.019246E+00	-3.995314E-02	-4.990014E-01	+4.537823E-02	+1.600000E+02	+1.068830E+00	2	++
6	-1.000770E+00	-7.033741E-04	-4.999995E-01	+1.043234E-03	+3.200000E+02	+1.014141E+00	2	++
7	-1.000000E+00	-5.406909E-07	-5.000000E-01	+5.944321E-07	+6.400000E+02	+1.000350E+00	2	++


Figure 12.3 (a) p. 303

In [17]:
%matplotlib inline
import matplotlib.pyplot as plt
def theFunctionToPlot(x,y):
return(0.5 * x * x + x * np.cos(y))

xlist = np.linspace(-2.0,2.0,1000)
ylist = np.linspace(-6.0,6.0,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.plot(xiter,yiter, linewidth=5, color='r')
plt.show()


Figure 12.3 (b)

In [18]:
%matplotlib inline
import matplotlib.pyplot as plt
def theFunctionToPlot(x,y):
return(0.5 * x * x + x * np.cos(y))

xlist = np.linspace(-1.3,1.1,1000)
ylist = np.linspace(-1,2,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.plot(xiter,yiter, linewidth=5, color='r')
plt.show()

In [19]:
x0 = np.array([[1],[1]])
(sol,iters) = newtonTrustRegion(ex0508,x0,delta0=1)
sol

Out[19]:
array([[-1.00000000e+00],
[ 7.23075033e-12]])

Table 12.2, page 305

In [20]:
print("k\txk\t\t\t\tf(xk)\t\t||Grad(xk)||\tDeltak\t\tRho\t\tStatus")
for k in range(len(iters)):
print("{}\t{:+E}\t{:+E}\t{:+E}\t{:+E}\t{:+E}\t{:+E}\t{}\t{}".format(k,iters[k][0].item(0),iters[k][0].item(1),iters[k][1],iters[k][2],iters[k][3],iters[k][4],iters[k][5],iters[k][6]))

k	xk				f(xk)		||Grad(xk)||	Deltak		Rho		Status
0	+1.000000E+00	+1.000000E+00	+1.040302E+00	+1.755165E+00	+1.000000E+00	+0.000000E+00
1	+1.224174E-01	+1.479426E+00	+1.866284E-02	+2.459926E-01	+2.000000E+00	+9.475885E-01	1	++
2	-1.016285E-03	+1.570031E+00	-2.614639E-07	+1.046790E-03	+4.000000E+00	+9.975358E-01	2	++
3	-5.364085E-04	+1.568087E+00	-1.309492E-06	+2.238245E-03	+8.000000E+00	+1.000000E+00	-2	++
4	-5.089851E-03	+1.566963E+00	-6.558303E-06	+5.242594E-03	+1.600000E+01	+9.999983E-01	-2	++
5	-2.686573E-03	+1.557227E+00	-3.284477E-05	+1.120891E-02	+3.200000E+01	+1.000001E+00	-2	++
6	-2.548821E-02	+1.551598E+00	-1.644658E-04	+2.624866E-02	+6.400000E+01	+9.999570E-01	-2	++
7	-1.346383E-02	+1.502894E+00	-8.228867E-04	+5.602070E-02	+1.280000E+02	+1.000022E+00	-2	++
8	-1.272299E-01	+1.474795E+00	-4.101757E-03	+1.304729E-01	+2.560000E+02	+9.989287E-01	-2	++
9	-6.847501E-02	+1.237641E+00	-2.004875E-02	+2.665266E-01	+5.120000E+02	+1.000511E+00	-2	++
10	-5.884662E-01	+1.107498E+00	-8.983993E-02	+5.451343E-01	+1.024000E+03	+9.770147E-01	-2	++
11	-4.025327E-01	+4.160752E-01	-2.871732E-01	+5.373698E-01	+2.048000E+03	+1.011165E+00	-2	++
12	-4.025327E-01	+4.160752E-01	-2.871732E-01	+5.373698E-01	+1.095337E+00	-2.885648E+00	2	-
13	-1.093499E+00	-4.338238E-01	-3.943328E-01	+4.959024E-01	+1.095337E+00	+2.994889E-01	4	+
14	-1.103954E+00	+3.386290E-02	-4.939639E-01	+1.110088E-01	+2.190673E+00	+9.353985E-01	2	++
15	-1.000466E+00	+3.162676E-03	-4.999949E-01	+3.199024E-03	+4.381346E+00	+1.008126E+00	2	++
16	-1.000005E+00	+1.447122E-06	-5.000000E-01	+5.202009E-06	+8.762693E+00	+1.000448E+00	2	++
17	-1.000000E+00	+7.230750E-12	-5.000000E-01	+7.306182E-12	+1.752539E+01	+1.000006E+00	2	++


Figure 12.4 (a), page 307

In [21]:
%matplotlib inline
import matplotlib.pyplot as plt
def theFunctionToPlot(x,y):
return(0.5 * x * x + x * np.cos(y))

xlist = np.linspace(-2.0,2.0,1000)
ylist = np.linspace(-6.0,6.0,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.plot(xiter,yiter, linewidth=5, color='r')
plt.show()


Figure 12.4 (b) page 307

In [22]:
%matplotlib inline
import matplotlib.pyplot as plt
def theFunctionToPlot(x,y):
return(0.5 * x * x + x * np.cos(y))

xlist = np.linspace(-1.3,1.1,1000)
ylist = np.linspace(-1,2,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.plot(xiter,yiter, linewidth=5, color='r')
plt.show()

In [23]:
x0 = np.array([[1],[1]])
(sol,iters) = newtonTrustRegion(ex0508,x0,dl=False)
sol

Out[23]:
array([[0.99999957],
[3.14159243]])

Table 12.3, page 306

In [24]:
print("k\txk\t\t\t\tf(xk)\t\t||Grad(xk)||\tDeltak\t\tRho\t\tStatus")
for k in range(len(iters)):
print("{}\t{:+E}\t{:+E}\t{:+E}\t{:+E}\t{:+E}\t{:+E}\t{}\t{}".format(k,iters[k][0].item(0),iters[k][0].item(1),iters[k][1],iters[k][2],iters[k][3],iters[k][4],iters[k][5],iters[k][6]))

k	xk				f(xk)		||Grad(xk)||	Deltak		Rho		Status
0	+1.000000E+00	+1.000000E+00	+1.040302E+00	+1.755165E+00	+1.000000E+01	+0.000000E+00
1	+1.000000E+00	+1.000000E+00	+1.040302E+00	+1.755165E+00	+5.000000E+00	-7.519745E-02	3	-
2	+1.000000E+00	+1.000000E+00	+1.040302E+00	+1.755165E+00	+2.500000E+00	-1.239910E-01	3	-
3	+5.502300E-01	+3.459209E+00	-3.713324E-01	+4.351213E-01	+2.500000E+00	+4.196236E-01	3	+
4	+1.167903E+00	+2.761422E+00	-4.025175E-01	+4.950629E-01	+2.500000E+00	+1.700281E-01	1	+
5	+1.063652E+00	+3.125362E+00	-4.978341E-01	+6.607830E-02	+5.000000E+00	+1.043574E+00	1	++
6	+1.000116E+00	+3.140624E+00	-4.999995E-01	+9.752760E-04	+1.000000E+01	+1.003425E+00	1	++
7	+9.999996E-01	+3.141592E+00	-5.000000E-01	+4.819965E-07	+2.000000E+01	+1.000114E+00	1	++


Figure 12.5 (a), page 307

In [25]:
%matplotlib inline
import matplotlib.pyplot as plt
def theFunctionToPlot(x,y):
return(0.5 * x * x + x * np.cos(y))

xlist = np.linspace(-2.0,2.0,1000)
ylist = np.linspace(-6.0,6.0,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.plot(xiter,yiter, linewidth=5, color='r')
plt.show()