Optimization: principles and algorithms¶

Bierlaire, M. (2015). Optimization: Principles and Algorithms. EPFL Press.
The numbering of the algorithms, tables and page refer to the book.

Chapter 10: Newton's local method¶

Algorithm 10.1: Newton's local method¶

import numpy as np
from scipy import linalg
def newtonLocal(obj,x0,eps,maxiter=100):
    xk = x0
    (f,g,H) = obj(xk)
    iters = list()
    iters.append([xk,f,g,H])
    k = 0
    while linalg.norm(g) > eps and k < maxiter:
        d = linalg.solve(H,-g)
        xk = xk + d
        (f,g,H) = obj(xk)
        iters.append([xk,f,g,H])
        k += 1
    return (xk,iters)

Example 5.8: $f(x_1,x_2) = \frac{1}{2} x_1^2 + x_1 \cos(x_2)$

import math
def ex0508(x):
    f = 0.5 * x.item(0) * x.item(0) + x.item(0) * math.cos(x.item(1))
    g = np.array([[ x.item(0) + math.cos(x.item(1))],[ -x.item(0) * math.sin(x.item(1)) ] ])
    H = np.array([[ 1 , -math.sin(x.item(1))],[ -math.sin(x.item(1)),-x.item(0)*math.cos(x.item(1))]])
    return (f,g,H)

x0 = np.array([[1],[1]])
(sol,iters) = newtonLocal(ex0508,x0,1e-15)
print(sol)

[[1.99048507e-17]
 [1.57079633e+00]]

Table 10.1, page 237

print("k\txk\t\tGrad(xk)\t\t||Grad(xk)||\t\tf(xk)")
for k in range(len(iters)):
    print("{}\t{:+E}\t{:+E}\t{:+E}\t{:+E}".format(k,iters[k][0].item(0),iters[k][2].item(0),linalg.norm(iters[k][2]),iters[k][1]))
    print("\t{:+E}\t{:+E}".format(iters[k][0].item(1),iters[k][2].item(1)))

k	xk		Grad(xk)		||Grad(xk)||		f(xk)
0	+1.000000E+00	+1.540302E+00	+1.755165E+00	+1.040302E+00
	+1.000000E+00	-8.414710E-01
1	-2.338451E-01	-2.870770E-02	+2.306654E-01	-2.062862E-02
	+1.364192E+00	+2.288720E-01
2	+1.081438E-02	-3.225248E-03	+1.128405E-02	-9.335440E-05
	+1.584836E+00	-1.081331E-02
3	-2.132377E-06	+9.228287E-07	+2.323498E-06	-4.241334E-12
	+1.570793E+00	+2.132377E-06
4	+1.990485E-17	+8.113719E-17	+8.354308E-17	+1.416922E-33
	+1.570796E+00	-1.990485E-17

Figure 10.1 (a), page 238

%matplotlib inline
import matplotlib.pyplot as plt
def theFunctionToPlot(x,y):
    return(0.5 * x * x + x * np.cos(y))

xlist = np.linspace(-2.0,2.0,1000)
ylist = np.linspace(-6.0,6.0,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
plt.plot(xiter,yiter, linewidth=5, color='r')
plt.show()

Figure 10.1 (b), page 238

xlist = np.linspace(-0.5,1.1,1000)
ylist = np.linspace(0.9,2,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,15)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
xiter
plt.plot(xiter,yiter, linewidth=5, color='r')
plt.show()

Algorithm 10.2: Newton's local method by quadratic modeling¶

# The package optimization contains all the functions presented earlier. In this case, we need the quadraticDirect and the conjugateGrsdient functions.
import optimization as opt
def newtonLocalQuadratic(obj,x0,eps,cg=False,maxiter=100):
    n = len(x0)
    xk = x0
    iters = list()
    (f,g,H) = obj(xk)
    iters.append([xk,f,g,H])
    k = 0
    while linalg.norm(g) > eps and k < maxiter:
        if cg:
            d = opt.conjugateGradient(H,g,np.zeros((n,1)))[0]
        else:
            d = opt.quadraticDirect(H,g)
        xk = xk + d
        (f,g,H) = obj(xk)
        iters.append([xk,f,g,H])
        k += 1
    return (xk,iters)

We test the algorithm on the Rosenbrock function. It is described in Section 11.6 of the book for two variables. The implementation below involves $n$ variables: $$f(x) = \sum_{i=1}^{n-1} f_i(x) = \sum_{i=1}^{n-1} 100 (x_{i+1}-x_i^2)^2 + (1-x_i)^2.$$ The calculation of the derivatives is based on the partial derivatives of the functions $f_i$: $$ \begin{array}{rcl} \partial f_i/\partial x_i&=& -400 x_i (x_{i+1}-x_i^2) - 2(1-x_i), \\\\ \partial f_i/\partial x_{i+1}&=& 200 (x_{i+1}-x_i^2), \\\\ \partial^2 f_i/\partial x^2_i &=& -400 x_{i+1}+ 1200 x_i^2 + 2, \\\\ \partial^2 f_i/\partial x_i \partial x_{i+1} &=& -400 x_i^2, \\\\ \partial^2 f_i/\partial x^2_{i+1} &=& 200. \end{array} $$

def exRosenbrock(x,hessian=False):
    n = len(x)
    f = sum(100.0 * (x.item(i+1)-x.item(i)**2)**2 + (1.0-x.item(i))**2 for i in range(n-1))
    g = np.zeros((n,1))
    for i in range(n-1):
        g[[i],[0]] = g[[i],[0]] - 400 * x.item(i) * (x.item(i+1)-x.item(i)**2)  - 2 * (1-x.item(i))
        g[[i+1],[0]] = g[[i+1],[0]] + 200 * (x.item(i+1)-x.item(i)**2)
    if hessian:
        H = np.zeros((n,n))
        for i in range(n-1):
            H[[i],[i]] = H[[i],[i]] - 400 * x.item(i+1) + 1200 * x.item(i)**2 + 2
            H[[i+1],[i]] = H[[i+1],[i]] - 400 * x.item(i)
            H[[i],[i+1]] = H[[i],[i+1]] - 400 * x.item(i)
            H[[i+1],[i+1]] = H[[i+1],[i+1]] + 200
        return f,g,H
    else:
        return f,g

def exRosenbrockWithHessian(x):
    return exRosenbrock(x,True)

x0 = np.array([[-1.5],[2]])
(sol,iters) = newtonLocalQuadratic(exRosenbrockWithHessian,x0,1e-15)
print(sol)

[[1.]
 [1.]]

def theFunctionToPlot(x,y):
    return(100 * (y-x*x)**2+(1-x)**2)

xlist = np.linspace(-3.0,3.0,1000)
ylist = np.linspace(-3.0,3.0,1000)
X,Y = np.meshgrid(xlist,ylist)
Z = theFunctionToPlot(X,Y)
plt.contour(X,Y,Z,20)
xiter = [iters[k][0].item(0) for k in range(len(iters))]
yiter = [iters[k][0].item(1) for k in range(len(iters))]
xiter
plt.plot(xiter,yiter, linewidth=5, color='r')
plt.show()

We now apply the algorithm on example 5.8. In this case, the algorithm fails to converge, and one hessian is not positive definite. We try first using the direct method to solve the quadratic problem. An error is triggered.

x0 = np.array([[1.1],[1.1]])
(sol,iters) = newtonLocalQuadratic(ex0508,x0,1e-15)
print(sol)

---------------------------------------------------------------------------
LinAlgError                               Traceback (most recent call last)
<ipython-input-11-0486b4bba1f1> in <module>()
      1 x0 = np.array([[1.1],[1.1]])
----> 2 (sol,iters) = newtonLocalQuadratic(ex0508,x0,1e-15)
      3 print(sol)

<ipython-input-7-9e4bcf1e5e48> in newtonLocalQuadratic(obj, x0, eps, cg, maxiter)
     12             d = opt.conjugateGradient(H,g,np.zeros((n,1)))[0]
     13         else:
---> 14             d = opt.quadraticDirect(H,g)
     15         xk = xk + d
     16         (f,g,H) = obj(xk)

~/GoogleDrive/books/Optimisation/algorithms/pythonAlgorithms/optimization.py in quadraticDirect(Q, b)
    140 
    141 def quadraticDirect(Q,b):
--> 142     L = linalg.cholesky(Q).T
    143     y = linalg.solve_triangular(L,-b,lower=True)
    144     solution = linalg.solve_triangular(L.T,y,lower=False)

/usr/local/lib/python3.6/site-packages/scipy/linalg/decomp_cholesky.py in cholesky(a, lower, overwrite_a, check_finite)
     89     """
     90     c, lower = _cholesky(a, lower=lower, overwrite_a=overwrite_a, clean=True,
---> 91                          check_finite=check_finite)
     92     return c
     93 

/usr/local/lib/python3.6/site-packages/scipy/linalg/decomp_cholesky.py in _cholesky(a, lower, overwrite_a, clean, check_finite)
     38     if info > 0:
     39         raise LinAlgError("%d-th leading minor of the array is not positive "
---> 40                           "definite" % info)
     41     if info < 0:
     42         raise ValueError('LAPACK reported an illegal value in {}-th argument'

LinAlgError: 2-th leading minor of the array is not positive definite

If we try with the conjugate gradient method, an error is also triggered.

x0 = np.array([[1.1],[1.1]])
(sol,iters) = newtonLocalQuadratic(ex0508,x0,1e-15,True)
print(sol)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-12-5fbb039caf02> in <module>()
      1 x0 = np.array([[1.1],[1.1]])
----> 2 (sol,iters) = newtonLocalQuadratic(ex0508,x0,1e-15,True)
      3 print(sol)

<ipython-input-7-9e4bcf1e5e48> in newtonLocalQuadratic(obj, x0, eps, cg, maxiter)
     10     while linalg.norm(g) > eps and k < maxiter:
     11         if cg:
---> 12             d = opt.conjugateGradient(H,g,np.zeros((n,1)))[0]
     13         else:
     14             d = opt.quadraticDirect(H,g)

~/GoogleDrive/books/Optimisation/algorithms/pythonAlgorithms/optimization.py in conjugateGradient(Q, b, x0)
    158         denom = scalarInner(dk,Q.dot(dk))
    159         if (denom <= 0):
--> 160             raise ValueError('The matrix must be positive definite')
    161         alphak = - np.asscalar(dk.T.dot(gk)) / denom
    162         iters.append([xk,gk,dk,alphak,betak])

ValueError: The matrix must be positive definite