Bierlaire, M. (2015). Optimization: Principles and Algorithms. EPFL Press.
The numbering of the algorithms, tables and page refer to the book.
def initLineSearch(obj,delta):
xkm1 = 0
hxkm1 = obj(xkm1)
xk = delta
hxk = obj(xk)
while hxk <= hxkm1:
xkm2 = xkm1
hxkm2 = hxkm1
xkm1 = xk ;
hxkm1 = hxk ;
xk = 2.0 * xk ;
hxk = obj(xk) ;
return(xkm2,hxkm2,xkm1,hxkm1,xk,hxk)
Test with Example 11.3: $h(x) = (2+x) \cos(2+x)$.
import numpy as np
def h(x):
return (2.0+x)*np.cos(2.0+x)
delta = 6
(a,ha,b,hb,c,hc) = initLineSearch(h,delta)
print (a,ha,b,hb,c,hc)
def quadraticInterpolation(obj,delta,eps):
(a,ha,b,hb,c,hc) = initLineSearch(obj,delta)
k = 1
iters = list()
s1 = max(ha,hc)-hb
s2 = c-a
while s1 > eps and s2 > eps:
beta1 = ((b-c)*ha+(c-a)*hb+(a-b)*hc)/((a-b)*(c-a)*(c-b))
beta2 = hb / (b-a)
beta3 = ha / (a-b)
xplus = (beta1 * (a+b) - beta2 - beta3) / (2 * beta1)
if xplus == b:
if (b-a) < (c-b):
xplus = b + eps / 2.0
else:
xplus = b - eps / 2.0
hxplus = obj(xplus)
iters.append([a,b,c,xplus,ha,hb,hc,hxplus])
if xplus > b:
if hxplus > hb:
c = xplus
hc = hxplus
else:
a = b
ha = hb
b = xplus
hb = hxplus
else:
if hxplus > hb:
a = xplus
ha = hxplus
else:
c = b
hc = hb
b = xplus
hb = hxplus
s1 = max(ha,hc)-hb
s2 = c-a
k = k + 1
iters.append([a,b,c,xplus,ha,hb,hc,hxplus])
return(b,iters)
Test with Example 11.3: $h(x) = (2+x) \cos(2+x)$.
(xstar,iters) = quadraticInterpolation(h,6,1.0e-3)
xstar
Table 11.2, p.256.
print("k\ta\t\tb\t\tc\t\tx*\t\th(a)\t\th(b)\t\th(c)\th(x^*)")
for k in range(len(iters)):
print("{}\t{:8.5f}\t{:8.5f}\t{:8.5f}\t{:8.5f}\t{:8.5f}\t{:8.5f}\t{:8.5f}\t{:8.5f}".format(k+1,*(iters[k])))
Figure 11.2, page 255.
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
# Quadratic imerpolant. See pp. 252-253
def interpolant(iters,iteration):
(a,b,c,x,ha,hb,hc,hx) = tuple(iters[iteration])
(ap,bp,cp,xp,hap,hbp,hcp,hxp) = tuple(iters[iteration])
b3 = ha / (a-b)
b2 = hb / (b-a)
b1num = (b-c) * ha + (c-a) * hb + (a-b) * hc
b1denom = (a-b) * (c-a) * (c-b)
b1 = b1num / b1denom
def q(x):
return(b1 * (x-a) * (x-b) + b2 * (x-a) + b3 * (x-b))
return q
iteration = 0
q = interpolant(iters,iteration)
(a,b,c,x,ha,hb,hc,hx) = tuple(iters[iteration])
xrange = np.arange(0.0,13.0,0.001)
plt.plot(xrange,h(xrange),'-',xrange,q(xrange),'--')
shift = 0.2
plt.annotate('a',xy=(a,ha),xytext=(a+shift,ha+2*shift))
plt.annotate('b',xy=(b,hb),xytext=(b+shift,hb+2*shift))
plt.annotate('c',xy=(c,hc),xytext=(c+shift,hc+2*shift))
plt.annotate('x',xy=(x,hx),xytext=(x+shift,hx+2*shift))
xopt,yopt = [x,x],[q(x),(h(x))]
plt.xlim(0, 13)
plt.ylim(-20, 20)
plt.plot(xopt,yopt)
plt.show()
Figure 11.3, page 255
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
iteration = 1
q = interpolant(iters,iteration)
(a,b,c,x,ha,hb,hc,hx) = tuple(iters[iteration])
xrange = np.arange(0.0,13.0,0.001)
plt.plot(xrange,h(xrange),'-',xrange,q(xrange),'--')
shift = 0.2
plt.annotate('a',xy=(a,ha),xytext=(a+shift,ha+2*shift))
plt.annotate('b',xy=(b,hb),xytext=(b+shift,hb+2*shift))
plt.annotate('c',xy=(c,hc),xytext=(c+shift,hc+2*shift))
plt.annotate('x',xy=(x,hx),xytext=(x+shift,hx+2*shift))
xopt,yopt = [x,x],[q(x),(h(x))]
plt.xlim(0, 13)
plt.ylim(-20, 20)
plt.plot(xopt,yopt)
plt.show()
Figure 11.4, page 257
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
iteration = 2
q = interpolant(iters,iteration)
(a,b,c,x,ha,hb,hc,hx) = tuple(iters[iteration])
xrange = np.arange(0.0,13.0,0.001)
plt.plot(xrange,h(xrange),'-',xrange,q(xrange),'--')
shift = 0.2
plt.annotate('a',xy=(a,ha),xytext=(a+shift,ha+2*shift))
plt.annotate('b',xy=(b,hb),xytext=(b+shift,hb+2*shift))
plt.annotate('c',xy=(c,hc),xytext=(c+shift,hc+2*shift))
plt.annotate('x',xy=(x,hx),xytext=(x+shift,hx+2*shift))
xopt,yopt = [x,x],[q(x),(h(x))]
plt.xlim(4, 13)
plt.ylim(-20, 20)
plt.plot(xopt,yopt)
plt.show()
Figure 11.5, page 257
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
iteration = 3
q = interpolant(iters,iteration)
(a,b,c,x,ha,hb,hc,hx) = tuple(iters[iteration])
xrange = np.arange(0.0,13.0,0.001)
plt.plot(xrange,h(xrange),'-',xrange,q(xrange),'--')
shift = 0.2
plt.annotate('a',xy=(a,ha),xytext=(a+shift,ha+2*shift))
plt.annotate('b',xy=(b,hb),xytext=(b+shift,hb+2*shift))
plt.annotate('c',xy=(c,hc),xytext=(c+shift,hc+2*shift))
plt.annotate('x',xy=(x,hx),xytext=(x+shift,hx+2*shift))
xopt,yopt = [x,x],[q(x),(h(x))]
plt.xlim(4, 13)
plt.ylim(-20, 20)
plt.plot(xopt,yopt)
plt.show()
def goldenSection(obj,l,u,eps):
rho = (3.0 - np.sqrt(5.0)) / 2.0
alpha1 = l + rho * (u - l)
h1 = obj(alpha1)
alpha2 = u - rho * (u - l)
h2 = obj(alpha2)
iters = list()
iters.append([l,alpha1,alpha2,u,h1,h2])
k = 1
while (u - l) > eps:
if (h1 == h2):
l = alpha1
u = alpha2
alpha1 = l + rho * (u - l)
h1 = obj(alpha1)
alpha2 = u - rho * (u - l)
h2 = obj(alpha2) ;
elif (h1 > h2):
l = alpha1
alpha1 = alpha2
h1 = h2
alpha2 = u - rho * (u - l)
h2 = obj(alpha2)
else:
u = alpha2
alpha2 = alpha1
h2 = h1
alpha1 = l + rho * (u - l)
h1 = obj(alpha1)
k += 1
iters.append([l,alpha1,alpha2,u,h1,h2])
xstar = (l+u)/2.0
return (xstar,iters)
Test with Example 11.3: $h(x) = (2+x) \cos(2+x)$.
(xstar,iters) = goldenSection(h,5,10,1.0e-3)
xstar
Table 11.3, page 261.
print("k\tl\t\talpha1\t\talpha2\t\tu\t\th(alpha1)\th(alpha2)")
for k in range(len(iters)):
print("{}\t{:8.5f}\t{:8.5f}\t{:8.5f}\t{:8.5f}\t{:8.5f}\t{:8.5f}".format(k+1,*(iters[k])))
Figure 11.8, page 261
nbr = 10
plt.xlim(5,10)
for k in range(nbr):
l = iters[k][0]
u = iters[k][3]
plt.plot([l,u],[nbr-k/2,nbr-k/2])
plt.tick_params(
axis='y',
left=False,
labelleft = False)
plt.show()
# Declare an exception
from optimizationExceptions import *
# Define a function simple to call for the inner product
def scalarInner(a,b):
return np.asscalar(a.T.dot(b))
def lineSearch(obj,x,d,alpha0,beta1,beta2,lbd):
if lbd <= 1:
raise OptimizationValueOutOfRange("lambda is {} and must be > 1".format(lbd))
if alpha0 <= 0:
raise OptimizationValueOutOfRange("alpha0 is {} and must be > 0".format(alpha0))
if beta1 >= beta2:
raise OptimizationValueOutOfRange("Incompatible Wolfe cond. parsmeters: beta1={} is greater than beta2={}".format(beta1,beta2))
(f,g) = obj(x)
deriv = scalarInner(g,d)
if deriv >= 0:
raise OptimizationInputError("d is not a descent direction: {} >= 0".format(deriv))
i = 0
alpha = alpha0
alphal = 0
alphar = np.finfo(np.float128).max
finished = False
iters = list()
iters.append([alpha,alphal,alphar,""])
while not finished:
xnew = x+alpha * d ;
(fnew,gnew) = obj(xnew)
# First Wolfe condition
if fnew > f + alpha * beta1 * deriv:
reason = "too long"
alphar = alpha ;
alpha = (alphal + alphar) / 2.0 ;
# Second Wolfe condition
elif scalarInner(gnew,d) < beta2 * deriv:
reason = "too short"
alphal = alpha ;
if alphar == np.finfo(np.float128).max:
alpha = lbd * alpha ;
else:
alpha = (alphal + alphar) / 2.0
else:
reason = "ok"
finished = True
iters.append([alpha,alphal,alphar,reason])
return (alpha,iters)
Example 11.2: $f(x) = \frac{1}{2} x_1^2 + \frac{9}{2} x_2^2$.
def ex1102(x):
f = 0.5 * x.item(0) * x.item(0) + 4.5 * x.item(1) * x.item(1)
g = np.array([[x.item(0)],[9 * x.item(1)]])
H = np.array([[1,0],[0,9]])
return (f,g)
x = np.array([[10],[1]])
d = np.array([[-2/np.sqrt(5)],[1/np.sqrt(5)]])
alpha0 = 1.0e-3
beta1 = 0.3
beta2 = 0.7
lbd = 20
(alpha,iters) = lineSearch(ex1102,x,d,alpha0,beta1,beta2,lbd)
print(alpha)
Table 11.6, page 275
print("alpha\t\talpha_l\t\talpha_u\t\tReason")
for k in range(len(iters)):
print("{:+E}\t{:+E}\t{:+13E}\t{:}".format(*(iters[k])))
from scipy import linalg
# Parameters for the line search
alpha0 = 1.0
beta1 = 1.0e-4
beta2 = 0.99
lbd = 2
def steepestDescent(obj,x0,eps,maxiter = 100):
xk = x0
(f,g) = obj(xk)
iters = list()
iters.append([xk,f,linalg.norm(g)])
k = 0
while linalg.norm(g) > eps and k < maxiter:
alpha,lsiters = lineSearch(obj,xk,-g,alpha0,beta1,beta2,lbd)
xk = xk - alpha * g ;
(f,g) = obj(xk)
k += 1
iters.append([xk,f,linalg.norm(g)])
return (xk,iters)
from optimizationExamples import exRosenbrock
x0 = np.array([[-1.5],[1.5]])
eps = 1.0e-15
(sol,iters) = steepestDescent(exRosenbrock,x0,eps,10000)
print(sol)
Figure 11.19 (a), page 282
print("({},{})".format(iters[0][0][0],iters[0][0][1]))
def theFunctionToPlot(x,y):
return(100 * (y-x*x)**2+(1-x)**2)
xmin = -1.6
xmax = 1.1
xlist = np.linspace(xmin,xmax,1000)
ymin = -1
ymax = 2
ylist = np.linspace(ymin,ymax,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.rcParams["figure.figsize"] = [15,10]
plt.contour(X,Y,Z,20)
xiter = [iters[k][0].item(0) for k in range(200)]
yiter = [iters[k][0].item(1) for k in range(200)]
plt.xlim([xmin,xmax])
plt.ylim([ymin,ymax])
plt.plot(xiter,yiter, linewidth=1, color='r')
plt.plot(1,1,marker='*')
plt.show()
Figure 11.19 (b), page 282
xmin = 0.71
xmax = 0.81
xlist = np.linspace(xmin,xmax,1000)
ymin = 0.5
ymax = 0.67
ylist = np.linspace(ymin,ymax,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,20)
xiter = [iters[k][0].item(0) for k in range(200)]
yiter = [iters[k][0].item(1) for k in range(200)]
plt.xlim([xmin,xmax])
plt.ylim([ymin,ymax])
plt.plot(xiter,yiter, linewidth=1, color='r')
plt.rcParams["figure.figsize"] = [15,10]
plt.show()
from optimizationExceptions import *
def modifiedCholesky(A):
tau = 0.0
(m,n) = A.shape
if m != n:
raise OptimizationInputError("The matrix must be square and not ({},{})".format(m,n))
if not (A.transpose() == A).all():
raise OptimizationInputError("The matrix must be symmetric.")
frobeniusNorm = linalg.norm(A)
if frobeniusNorm <= 1.0e-6:
frobeniusNorm = 1.0e-6
mindiag = min(A.diagonal())
if mindiag >= 0:
tau = 0
R = A
else:
tau = frobeniusNorm
R = A + tau * np.eye(n)
mineig = min(linalg.eigvalsh(R))
while mineig <= 0:
tau = max(2 * tau, 0.5 * frobeniusNorm)
R = A + tau * np.eye(n)
mineig = min(linalg.eigvalsh(R))
return linalg.cholesky(R).T ,tau
A = np.array([[ 1.0000 , 3.5000 , 6.0000 , 8.5000],
[ 3.5000 , 6.0000 , 8.5000 , 11.0000],
[ 6.0000 , 8.5000 , 11.0000 , 13.5000],
[ 8.5000 , 11.0000 , 13.5000 , 16.0000]])
(L,tau) = modifiedCholesky(A)
print("tau = ",tau)
print(L)
We have that $L L^T = A + \tau I$ or, equavalently, $L L^T - A = \tau I$.
np.matmul(L,L.T)-A