Bierlaire, M. (2015). Optimization: Principles and Algorithms. EPFL Press.
The numbering of the algorithms, tables and page refer to the book.
Note that there is a typo in Eq. (13.13) in the book. See the errata.
import numpy as np
from scipy import linalg
# The package optimization contains all the functions presented earlier. In this case, we need the quadraticDirect and the conjugateGrsdient functions.
import optimization as opt
# Define a function simple to call for the inner product
def scalarInner(a,b):
return(np.asscalar(a.T.dot(b)))
def bfgs(obj,x0,eps=1.0e-7,maxiter=100):
n = x0.shape[0]
I = np.eye(n)
Hinv = np.eye(n)
xk = x0
(f,g) = obj(xk)
iters = list()
iters.append([xk,f,linalg.norm(g),Hinv,0])
k = 0 ;
while linalg.norm(g) > eps and k < maxiter:
d = - Hinv.dot(g)
(alpha,lsIters) = opt.lineSearch(obj,xk,d,1.0,0.3,0.7,2)
xold = xk
gold = g
xk = xk + alpha * d
(f,g) = obj(xk)
k=k+1
dbar = xk - xold
y = g - gold
denom = scalarInner(dbar,y)
t1 = (I - np.outer(dbar,y) / denom)
# Note that there is a typo in the book.
t2 = (I - np.outer(y,dbar) / denom)
t3 = t1.dot(Hinv)
t4 = t3.dot(t2)
Hinv = t4 + np.outer(dbar,dbar) / denom
iters.append([xk,f,linalg.norm(g),Hinv,denom])
return xk, iters
Example 5.8: $f(x_1,x_2) = \frac{1}{2} x_1^2 + x_1 \cos(x_2)$
import math
def ex0508noHessian(x):
f = 0.5 * x.item(0) * x.item(0) + x.item(0) * math.cos(x.item(1))
g = np.array([[ x.item(0) + math.cos(x.item(1))],[ -x.item(0) * math.sin(x.item(1)) ] ])
return (f,g)
x0 = np.array([[1],[1]])
(sol,iters) = bfgs(ex0508noHessian,x0)
sol
Table 13.1, page 317
print("k\txk\t\t\t\tf(xk)\t\t||Grad(xk)||")
for k in range(len(iters)):
print("{}\t{:+E}\t{:+E}\t{:+E}\t{:+E}".format(k,iters[k][0].item(0),iters[k][0].item(1),iters[k][1],iters[k][2]))
Table 13.2, page 317
print("k\tHinv(1,1)\tHinv(2,2)\tHinv(1,2)\td'y")
for k in range(1,len(iters)):
print("{}\t{:+E}\t{:+E}\t{:+E}\t{:+E}".format(k,iters[k][3].item((0,0)),iters[k][3].item((1,1)),iters[k][3].item((0,1)),iters[k][4]))
Figure 13.3 (a), page 316.
%matplotlib inline
import matplotlib.pyplot as plt
def theFunctionToPlot(x,y):
return(0.5 * x * x + x * np.cos(y))
xmin = -2
xmax = 2
ymin = -6
ymax = 6
xlist = np.linspace(xmin,xmax,1000)
ylist = np.linspace(ymin,ymax,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.xlim([xmin,xmax])
plt.ylim([ymin,ymax])
plt.plot(xiter,yiter, linewidth=5, color='r',marker='o',mfc='blue')
plt.show()
Figure 13.3 (b), page 316.
%matplotlib inline
import matplotlib.pyplot as plt
def theFunctionToPlot(x,y):
return(0.5 * x * x + x * np.cos(y))
xmin = -0.1
xmax = 1.1
ymin = 0.9
ymax = 4
xlist = np.linspace(xmin,xmax,1000)
ylist = np.linspace(ymin,ymax,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.xlim([xmin,xmax])
plt.ylim([ymin,ymax])
plt.plot(xiter,yiter, linewidth=5, color='r',marker='o',mfc='blue')
plt.show()
import optimization as opt
def sr1(obj,x0,delta0,eps=1.0e-8,dl=True,maxiter=100):
n = x0.shape[0]
xk = x0
(f,g) = obj(xk)
H = np.eye(n)
iters = list()
k = 0
eta1 = 0.01
eta2 = 0.9
delta = delta0
iters.append([xk,f,linalg.norm(g),H,delta,0.0,'',''])
while linalg.norm(g) > eps and k < maxiter:
k = k + 1
if dl:
(step,type) = opt.dogleg(g,H,delta)
else:
(step,type) = opt.truncatedConjugateGradient(g,H,delta)
(fc,gc) = obj(xk+step)
y = gc - g
num = f - fc;
denom = -scalarInner(step,g) - 0.5 * scalarInner(step,H.dot(step))
rho = num / denom
if rho < eta1:
# Failure: reduce the trust region
delta = linalg.norm(step) / 2.0
status = "- " ;
else:
# Candidate accepted
xk = xk + step
f = fc
g = gc
if rho >= eta2:
# Enlarge the trust region
delta = 2 * delta
status = "++"
else:
status = "+ "
# Update the matrix
term = y - H.dot(step)
sr1denom = scalarInner(step,term)
if abs(sr1denom) >= 1.0e-8 * linalg.norm(step) * linalg.norm(term):
H = H + np.outer(term,term) / sr1denom
iters.append([xk,f,linalg.norm(g),H,delta,0.0,type,status])
return xk,iters
x0 = np.array([[1],[1]])
(sol,iters) = sr1(ex0508noHessian,x0,10,dl=False)
sol
Table 13.4, p. 322
print("k\txk\t\t\t\tf(xk)\t\t||Grad(xk)||\tDelta\t\tStatus")
for k in range(len(iters)):
print("{}\t{:+E}\t{:+E}\t{:+E}\t{:+E}\t{:+E} {:2s}".format(k,iters[k][0].item(0),iters[k][0].item(1),iters[k][1],iters[k][2],iters[k][4],iters[k][7]))
Table 13.3, page 321
print("k\tHinv(1,1)\tHinv(2,2)\tHinv(1,2)\tlambda1\t\tlambda2")
for k in range(1,len(iters)):
lbd,v = linalg.eigh(iters[k][3])
print("{}\t{:+E}\t{:+E}\t{:+E}\t{:+E}\t{:+E}".format(k,iters[k][3].item((0,0)),iters[k][3].item((1,1)),iters[k][3].item((0,1)),lbd.item(0),lbd.item(1)))
Figure 13.4 (a), page 320
%matplotlib inline
import matplotlib.pyplot as plt
def theFunctionToPlot(x,y):
return(0.5 * x * x + x * np.cos(y))
xmin = -2
xmax = 2
ymin = -6
ymax = 6
xlist = np.linspace(xmin,xmax,1000)
ylist = np.linspace(ymin,ymax,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.xlim([xmin,xmax])
plt.ylim([ymin,ymax])
plt.plot(xiter,yiter, linewidth=5, color='r',marker='o',mfc='blue')
plt.show()
Figure 13.4 (b), page 320
%matplotlib inline
import matplotlib.pyplot as plt
def theFunctionToPlot(x,y):
return(0.5 * x * x + x * np.cos(y))
xmin = -1.47
xmax = 1.15
ymin = -0.66
ymax = 1.88
xlist = np.linspace(xmin,xmax,1000)
ylist = np.linspace(ymin,ymax,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.xlim([xmin,xmax])
plt.ylim([ymin,ymax])
plt.plot(xiter,yiter, linewidth=5, color='r',marker='o',mfc='blue')
plt.show()
from optimizationExamples import exRosenbrock
x0 = np.array([[-1.5],[1.5]])
(sol,iters) = bfgs(exRosenbrock,x0)
print(sol)
print("Number of iterations: {}".format(len(iters)))
Figure 13.5 (a), page 323
def theFunctionToPlot(x,y):
return(100 * (y-x*x)**2+(1-x)**2)
xmin = -1.6
xmax = 1.1
xlist = np.linspace(xmin,xmax,1000)
ymin = -1
ymax = 2
ylist = np.linspace(ymin,ymax,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.rcParams["figure.figsize"] = [15,10]
plt.contour(X,Y,Z,20)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.xlim([xmin,xmax])
plt.ylim([ymin,ymax])
plt.plot(xiter,yiter, linewidth=1, color='r',marker='o',mfc='blue')
plt.plot(1,1,marker='*')
plt.show()
xmin = -0.5
xmax = 1.1
xlist = np.linspace(xmin,xmax,1000)
ymin = -0.1
ymax = 1.1
ylist = np.linspace(ymin,ymax,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.rcParams["figure.figsize"] = [15,10]
plt.contour(X,Y,Z,20)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.xlim([xmin,xmax])
plt.ylim([ymin,ymax])
plt.plot(xiter,yiter, linewidth=1, color='r',marker='o',mfc='blue')
plt.plot(1,1,marker='*')
plt.show()
SR1 with trust region
x0 = np.array([[-1.5],[1.5]])
(sol,iters) = sr1(exRosenbrock,x0,10,dl=False,maxiter=1000)
print(sol)
print("Number of iterations: {}".format(len(iters)))
x = np.array([k for k in range(len(iters))])
y = np.array([iters[k][4] for k in range(len(iters))])
plt.semilogy(x,y)
plt.show()
Figure 13.6 (a), page 324
def theFunctionToPlot(x,y):
return(100 * (y-x*x)**2+(1-x)**2)
xmin = -1.6
xmax = 1.1
xlist = np.linspace(xmin,xmax,1000)
ymin = -1
ymax = 2
ylist = np.linspace(ymin,ymax,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.rcParams["figure.figsize"] = [15,10]
plt.contour(X,Y,Z,20)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.xlim([xmin,xmax])
plt.ylim([ymin,ymax])
plt.plot(xiter,yiter, linewidth=1, color='r',marker='o',mfc='blue')
plt.plot(1,1,marker='*')
plt.show()
Figure 13.6 (b), page 324
def theFunctionToPlot(x,y):
return(100 * (y-x*x)**2+(1-x)**2)
xmin = -0.5
xmax = 1.1
xlist = np.linspace(xmin,xmax,1000)
ymin = -0.1
ymax = 1.1
ylist = np.linspace(ymin,ymax,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.rcParams["figure.figsize"] = [15,10]
plt.contour(X,Y,Z,20)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.xlim([xmin,xmax])
plt.ylim([ymin,ymax])
plt.plot(xiter,yiter, linewidth=1, color='r',marker='o',mfc='blue')
plt.plot(1,1,marker='*')
plt.show()
SR1 with linesearch
import numpy as np
from scipy import linalg
# The package optimization contains all the functions presented earlier. In this case, we need the quadraticDirect and the conjugateGrsdient functions.
import optimization as opt
# Define a function simple to call for the inner product
def scalarInner(a,b):
return(np.asscalar(a.T.dot(b)))
def sr1LineSearch(obj,x0,eps=1.0e-7,maxiter=100):
n = x0.shape[0]
I = np.eye(n)
H = np.eye(n)
xk = x0
(f,g) = obj(xk)
iters = list()
iters.append([xk,f,linalg.norm(g)])
k = 0 ;
while linalg.norm(g) > eps and k < maxiter:
(L,tau) = opt.modifiedCholesky(H)
z = linalg.solve_triangular(L,g,lower=True)
d = linalg.solve_triangular(L.T,-z,lower=False)
(alpha,lsIters) = opt.lineSearch(obj,xk,d,1.0,0.3,0.7,2)
xold = xk
gold = g
step = alpha * d
xk = xk + step
(f,g) = obj(xk)
k=k+1
dbar = xk - xold
y = g - gold
term = y - H.dot(step)
sr1denom = scalarInner(step,term)
if abs(sr1denom) >= 1.0e-8 * linalg.norm(step) * linalg.norm(term):
H = H + np.outer(term,term) / sr1denom
iters.append([xk,f,linalg.norm(g)])
return xk, iters
from optimizationExamples import exRosenbrock
x0 = np.array([[-1.5],[1.5]])
(sol,iters) = sr1LineSearch(exRosenbrock,x0)
print(sol)
print("Number of iterations: {}".format(len(iters)))
Figure 13.7 (a), page 325
def theFunctionToPlot(x,y):
return(100 * (y-x*x)**2+(1-x)**2)
xmin = -1.6
xmax = 1.1
xlist = np.linspace(xmin,xmax,1000)
ymin = -1
ymax = 2
ylist = np.linspace(ymin,ymax,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.rcParams["figure.figsize"] = [15,10]
plt.contour(X,Y,Z,20)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.xlim([xmin,xmax])
plt.ylim([ymin,ymax])
plt.plot(xiter,yiter, linewidth=1, color='r',marker='o',mfc='blue')
plt.plot(1,1,marker='*')
plt.show()
Figure 13.7 (b), page 325
def theFunctionToPlot(x,y):
return(100 * (y-x*x)**2+(1-x)**2)
xmin = -0.5
xmax = 1.1
xlist = np.linspace(xmin,xmax,1000)
ymin = -0.1
ymax = 1.1
ylist = np.linspace(ymin,ymax,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.rcParams["figure.figsize"] = [15,10]
plt.contour(X,Y,Z,20)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.xlim([xmin,xmax])
plt.ylim([ymin,ymax])
plt.plot(xiter,yiter, linewidth=1, color='r',marker='o',mfc='blue')
plt.plot(1,1,marker='*')
plt.show()