Bierlaire, M. (2015). Optimization: Principles and Algorithms. EPFL Press.
The numbering of the algorithms, tables and page refer to the book.
import numpy as np
from scipy import linalg
# Define a function simple to call for the inner product
def scalarInner(a,b):
return(np.asscalar(a.T.dot(b)))
# Trust region of radius delta, centered at xhat
# xc in in the trust region.
# Define dc = xc - xhat. We have ||dc|| <= delta
# Consider xd outside of the trust region.
# Define dd = xd - xhat. We have ||dd|| > delta
# Find lbd such that || dc + lbd (dd - dc)|| = delta
def trustRegionIntersection(dc,d,delta):
a = scalarInner(d,d)
b = 2 * scalarInner(dc,d)
c = scalarInner(dc,dc) - delta ** 2
discriminant = b * b - 4.0 * a * c
return (- b + np.sqrt(discriminant) ) / (2 * a)
The following example is not in the book.
xhat = np.array([[1],[1]])
delta = 1.0
xc = np.array([[1],[1.5]])
xd = np.array([[2.5],[5]])
dc = xc - xhat
dd = xd - xhat
lbd = trustRegionIntersection(dc,dd-dc,delta)
lbd
We verify that $||d_C + \lambda (d_d - d_C)|| = \Delta$.
linalg.norm(dc + lbd *(dd - dc))
The point on the border is $x_C + \lambda (x_d - x_C)$.
xborder = xc + lbd * (xd - xc)
xborder
The following figure illustrates the various points involved.
def myplot(ax,x,name,shift=0.05):
ax.plot(x.item(0),x.item(1),marker='.')
ax.annotate(name,xy=(x.item(0),x.item(1)),xytext=(x.item(0)+shift,x.item(1)+shift))
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = [3,6]
circle = plt.Circle((1,1),1.0,fill=False)
ax = plt.gca()
ax.set_xlim((0, 3))
ax.set_ylim((0, 6))
ax.add_artist(circle)
shift=0.05
myplot(ax,xhat,'xhat')
myplot(ax,xc,'xc')
myplot(ax,xd,'xd')
myplot(ax,xborder,'xborder')
# Plot the line
plt.plot([xc.item(0),xd.item(0)],[xc.item(1),xd.item(1)])
plt.show()
def cauchyNewtonDogleg(g,H):
alpha = scalarInner(g,g)
beta = scalarInner(g,H.dot(g))
dc = - (alpha / beta ) * g
dn = linalg.solve(H,-g).reshape(2,1)
eta = 0.2 + (0.8 * alpha * alpha / (beta * abs(scalarInner(g,dn))))
return dc,dn,eta*dn
def dogleg(g,H,delta):
"""
Returned type interpretation
type -2: negative curvature along Newton's direction
type -1: negative curvature along Cauchy's direction (i.e. along the gradient)
type 1: Partial Cauchy step
type 2: Newton step
type 3: Partial Newton step
type 4: Dogleg
"""
(dc,dn,dl) = cauchyNewtonDogleg(g,H)
# Check if the model is convex along the gradient direction
alpha = scalarInner(g,g)
beta = scalarInner(g,H.dot(g))
if beta <= 0:
dstar = -delta * g / np.sqrt(alpha)
return dstar,-1
# Compute the Cauchy point
normdc = alpha * np.sqrt(alpha) / beta ;
if normdc >= delta:
# The Cauchy point is outside the trust
# region. We move along the Cauchy
# direction until the border of the trut
# region.
dstar = (delta / normdc) * dc
return dstar, 1
# Compute Newton's point
normdn = linalg.norm(dn)
# Check the convexity of the model along Newton's direction
if scalarInner(dn,H.dot(dn)) <= 0.0:
# Return the Cauchy point
return dc,-2
if normdn <= delta:
# Newton's point is inside the trust region
return dn,2
# Compute the dogleg point
eta = 0.2 + (0.8 * alpha * alpha / (beta * abs(scalarInner(g,dn))))
partieldn = eta * linalg.norm(dn)
if partieldn <= delta:
# Dogleg point is inside the trust region
dstar = (delta / normdn) * dn ;
return dstar,3
# Between Cauchy and dogleg
nu = dl - dc
lbd = trustRegionIntersection(dc,nu,delta)
dstar = dc + lbd * nu ;
return dstar,4
Example 12.3 p. 295
xhat = np.array([[9],[1]])
g = np.array([[9],[9]])
H = np.array([[1., 0],
[0, 9.]])
delta = 1
(dstar1,type1) = dogleg(g,H,delta)
x1 = xhat + dstar1
print("Delta: {} Type: {}\nx={}".format(delta,type1,x1))
delta = 4
(dstar4,type4) = dogleg(g,H,delta)
x4 = xhat + dstar4
print("Delta: {} Type: {}\nx={}".format(delta,type4,x4))
delta = 8
(dstar8,type8) = dogleg(g,H,delta)
x8 = xhat + dstar8
print("Delta: {} Type: {}\nx={}".format(delta,type8,x8))
Figure 12.2 (a) p. 296
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = [6,6]
def theFunctionToPlot(x,y):
return(0.5 * x * x + 4.5 * y * y)
xlist = np.linspace(-1.0,10.0,1000)
ylist = np.linspace(-5.0,5.0,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)
ax = plt.gca()
ax.set_xlim((-1, 10))
ax.set_ylim((-5, 5))
(dc,dn,dl) = cauchyNewtonDogleg(g,H)
myplot(ax,xhat,'xhat')
xc = xhat + dc
myplot(ax,xc,'xC')
xn = xhat + dn
myplot(ax,xn,'xN')
xd = xhat + dl
myplot(ax,xhat+dl,'xd')
x_points = [xhat.item(0),xc.item(0),xd.item(0),xn.item(0)]
y_points = [xhat.item(1),xc.item(1),xd.item(1),xn.item(1)]
plt.plot(x_points,y_points)
plt.show()
Figure 12.2 (b) p. 296
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = [6,6]
def theFunctionToPlot(x,y):
return(0.5 * x * x + 4.5 * y * y)
xlist = np.linspace(-1.0,10.0,1000)
ylist = np.linspace(-5.0,5.0,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)
circle1 = plt.Circle((9,1),1.0,fill=False)
circle4 = plt.Circle((9,1),4.0,fill=False)
circle8 = plt.Circle((9,1),8.0,fill=False)
ax = plt.gca()
ax.set_xlim((-1, 10))
ax.set_ylim((-5, 5))
ax.add_artist(circle1)
ax.add_artist(circle4)
ax.add_artist(circle8)
myplot(ax,x1,'x1')
myplot(ax,x4,'x4')
myplot(ax,x8,'x8')
x_points = [xhat.item(0),xc.item(0),xd.item(0),xn.item(0)]
y_points = [xhat.item(1),xc.item(1),xd.item(1),xn.item(1)]
plt.plot(x_points,y_points)
plt.show()
def truncatedConjugateGradient(g,H,delta):
"""
Returned type interpretation
type 1: convergence
type 2: out of the trust region
type 3: negative curvature detected
"""
tol = 1.0e-6
n = g.shape[0]
xk = np.zeros((n,1))
gk = g
dk = -gk
for k in range(n):
curv = scalarInner(dk,H.dot(dk))
if curv <= 0:
# Negative curvature has been detected
type = 3
a = scalarInner(dk,dk)
b = 2 * scalarInner(xk,dk)
c = scalarInner(xk,xk) - delta * delta
rho = b * b - 4 * a * c ;
step = xk + ((-b + np.sqrt(rho)) / (2*a)) * dk
return step,type
alphak = - scalarInner(dk,gk) / curv
xkp1 = xk + alphak * dk
if linalg.norm(xkp1) > delta:
# Out of the trust region
type = 2
a = scalarInner(dk,dk)
b = 2 * scalarInner(xk,dk)
c = scalarInner(xk,xk) - delta * delta
rho = b * b - 4 * a * c ;
step = xk + ((-b + np.sqrt(rho)) / (2*a)) * dk
return step, type
xk = xkp1 ;
gkp1 = H.dot(xk) + g
betak = scalarInner(gkp1,gkp1) / scalarInner(gk,gk)
dk = -gkp1 + betak * dk
gk = gkp1
if linalg.norm(gkp1) <= tol:
type = 1
step = xk
return step,type
type = 1
step = xk
return step, type
We illustrate the method on the same example as before. This is not reported in the book. Note that there is no negative curvature here. Also, we have a large trust region ($\Delta=10$) to illustrate the case when the CG algorithm converges without hitting the trust region boundaries.
xhat = np.array([[9],[1]])
g = np.array([[9],[9]])
H = np.array([[1., 0],
[0, 9.]])
delta = 1
(step1,type1) = truncatedConjugateGradient(g,H,delta)
x1 = xhat + step1
print("Delta: {} Type: {}\nx={}".format(delta,type1,x1))
delta = 4
(step4,type4) = truncatedConjugateGradient(g,H,delta)
x4 = xhat + step4
print("Delta: {} Type: {}\nx={}".format(delta,type4,x4))
delta = 8
(step8,type8) = truncatedConjugateGradient(g,H,delta)
x8 = xhat + step8
print("Delta: {} Type: {}\nx={}".format(delta,type8,x8))
delta = 10
(step10,type10) = truncatedConjugateGradient(g,H,delta)
x10 = xhat + step10
print("Delta: {} Type: {}\nx={}".format(delta,type10,x10))
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = [6,6]
def theFunctionToPlot(x,y):
return(0.5 * x * x + 4.5 * y * y)
xlist = np.linspace(-1.0,10.0,1000)
ylist = np.linspace(-5.0,5.0,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)
circle1 = plt.Circle((9,1),1.0,fill=False)
circle4 = plt.Circle((9,1),4.0,fill=False)
circle8 = plt.Circle((9,1),8.0,fill=False)
circle10 = plt.Circle((9,1),10.0,fill=False)
ax = plt.gca()
ax.set_xlim((-1, 10))
ax.set_ylim((-5, 5))
ax.add_artist(circle1)
ax.add_artist(circle4)
ax.add_artist(circle8)
ax.add_artist(circle10)
myplot(ax,x1,'x1')
myplot(ax,x4,'x4')
myplot(ax,x8,'x8')
myplot(ax,x10,'x10')
plt.show()
def newtonTrustRegion(obj,x0,delta0=10,eps=1.0e-6,dl=True,maxiter=1000):
eta1 = 0.01
eta2 = 0.9
k = 0
xk = x0
(f,g,H) = obj(xk)
iters = list()
delta = delta0 ;
iters.append([xk,f,linalg.norm(g),delta,0.0,'',''])
while linalg.norm(g) > eps and k < maxiter:
k=k+1
if dl:
(step,type) = dogleg(g,H,delta)
else:
(step,type) = truncatedConjugateGradient(g,H,delta)
(fc,gc,Hc) = obj(xk+step)
num = f - fc;
denom = -scalarInner(step,g) - 0.5 * scalarInner(step,H.dot(step))
rho = num / denom
if rho < eta1:
# Failure: reduce the trust region
delta = linalg.norm(step) / 2.0
status = "- " ;
else:
# Candidate accepted
xk = xk + step
f = fc
g = gc
H = Hc
if rho >= eta2:
# Enlarge the trust region
delta = 2 * delta
status = "++"
else:
status = "+ "
iters.append([xk,f,linalg.norm(g),delta,rho,type,status])
return xk,iters
Example 5.8: $f(x_1,x_2) = \frac{1}{2} x_1^2 + x_1 \cos(x_2)$
import math
def ex0508(x):
f = 0.5 * x.item(0) * x.item(0) + x.item(0) * math.cos(x.item(1))
g = np.array([[ x.item(0) + math.cos(x.item(1))],[ -x.item(0) * math.sin(x.item(1)) ] ])
H = np.array([[ 1 , -math.sin(x.item(1))],[ -math.sin(x.item(1)),-x.item(0)*math.cos(x.item(1))]])
return (f,g,H)
x0 = np.array([[1],[1]])
(sol,iters) = newtonTrustRegion(ex0508,x0)
sol
Table 12.1, page 304
print("k\txk\t\t\t\tf(xk)\t\t||Grad(xk)||\tDeltak\t\tRho\t\tStatus")
for k in range(len(iters)):
print("{}\t{:+E}\t{:+E}\t{:+E}\t{:+E}\t{:+E}\t{:+E}\t{}\t{}".format(k,iters[k][0].item(0),iters[k][0].item(1),iters[k][1],iters[k][2],iters[k][3],iters[k][4],iters[k][5],iters[k][6]))
Figure 12.3 (a) p. 303
%matplotlib inline
import matplotlib.pyplot as plt
def theFunctionToPlot(x,y):
return(0.5 * x * x + x * np.cos(y))
xlist = np.linspace(-2.0,2.0,1000)
ylist = np.linspace(-6.0,6.0,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.plot(xiter,yiter, linewidth=5, color='r')
plt.show()
Figure 12.3 (b)
%matplotlib inline
import matplotlib.pyplot as plt
def theFunctionToPlot(x,y):
return(0.5 * x * x + x * np.cos(y))
xlist = np.linspace(-1.3,1.1,1000)
ylist = np.linspace(-1,2,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.plot(xiter,yiter, linewidth=5, color='r')
plt.show()
x0 = np.array([[1],[1]])
(sol,iters) = newtonTrustRegion(ex0508,x0,delta0=1)
sol
Table 12.2, page 305
print("k\txk\t\t\t\tf(xk)\t\t||Grad(xk)||\tDeltak\t\tRho\t\tStatus")
for k in range(len(iters)):
print("{}\t{:+E}\t{:+E}\t{:+E}\t{:+E}\t{:+E}\t{:+E}\t{}\t{}".format(k,iters[k][0].item(0),iters[k][0].item(1),iters[k][1],iters[k][2],iters[k][3],iters[k][4],iters[k][5],iters[k][6]))
Figure 12.4 (a), page 307
%matplotlib inline
import matplotlib.pyplot as plt
def theFunctionToPlot(x,y):
return(0.5 * x * x + x * np.cos(y))
xlist = np.linspace(-2.0,2.0,1000)
ylist = np.linspace(-6.0,6.0,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.plot(xiter,yiter, linewidth=5, color='r')
plt.show()
Figure 12.4 (b) page 307
%matplotlib inline
import matplotlib.pyplot as plt
def theFunctionToPlot(x,y):
return(0.5 * x * x + x * np.cos(y))
xlist = np.linspace(-1.3,1.1,1000)
ylist = np.linspace(-1,2,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.plot(xiter,yiter, linewidth=5, color='r')
plt.show()
x0 = np.array([[1],[1]])
(sol,iters) = newtonTrustRegion(ex0508,x0,dl=False)
sol
Table 12.3, page 306
print("k\txk\t\t\t\tf(xk)\t\t||Grad(xk)||\tDeltak\t\tRho\t\tStatus")
for k in range(len(iters)):
print("{}\t{:+E}\t{:+E}\t{:+E}\t{:+E}\t{:+E}\t{:+E}\t{}\t{}".format(k,iters[k][0].item(0),iters[k][0].item(1),iters[k][1],iters[k][2],iters[k][3],iters[k][4],iters[k][5],iters[k][6]))
Figure 12.5 (a), page 307
%matplotlib inline
import matplotlib.pyplot as plt
def theFunctionToPlot(x,y):
return(0.5 * x * x + x * np.cos(y))
xlist = np.linspace(-2.0,2.0,1000)
ylist = np.linspace(-6.0,6.0,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.plot(xiter,yiter, linewidth=5, color='r')
plt.show()
Figure 12.5 (b), page 307
%matplotlib inline
import matplotlib.pyplot as plt
def theFunctionToPlot(x,y):
return(0.5 * x * x + x * np.cos(y))
xlist = np.linspace(0.5,1.2,1000)
ylist = np.linspace(0.5,4,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.plot(xiter,yiter, linewidth=5, color='r')
plt.show()
from optimizationExamples import *
def exRosenbrockWithHessian(x):
return exRosenbrock(x,True)
x0 = np.array([[-1.5],[1.5]])
ex0 = np.array([[1],[1]])
(sol,iters) = newtonTrustRegion(exRosenbrockWithHessian,x0)
sol
Figure 12.6 (a), page 308
def theFunctionToPlot(x,y):
return(100 * (y-x*x)**2+(1-x)**2)
xlist = np.linspace(-1.6,1.1,1000)
ylist = np.linspace(-1,2.22,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,20)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.plot(xiter,yiter, linewidth=5, color='r',marker='o',mfc='blue')
plt.show()
Figure 12.6 (b), page 308
def theFunctionToPlot(x,y):
return(100 * (y-x*x)**2+(1-x)**2)
xmin = -0.5
xmax = 1.1
ymin = -0.1
ymax = 1.1
xlist = np.linspace(xmin,xmax,1000)
ylist = np.linspace(ymin,ymax,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,20)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.xlim([xmin,xmax])
plt.ylim([ymin,ymax])
plt.plot(xiter,yiter, linewidth=5, color='r',marker='o',mfc='blue')
plt.show()