module optimization: examples of use of each function¶

This webpage is for programmers who need examples of use of the functions of the class. The examples are designed to illustrate the syntax. They do not correspond to any meaningful model. For examples of models, visit biogeme.epfl.ch.

import datetime
print(datetime.datetime.now())

2019-12-29 21:18:54.941948

import biogeme.version as ver
print(ver.getText())

biogeme 3.2.5 [2019-12-29]
Version entirely written in Python
Home page: http://biogeme.epfl.ch
Submit questions to https://groups.google.com/d/forum/biogeme
Michel Bierlaire, Transport and Mobility Laboratory, Ecole Polytechnique Fédérale de Lausanne (EPFL)

import biogeme.optimization as opt

import numpy as np

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

Defne the verbosity of Biogeme

import biogeme.messaging as msg
logger = msg.bioMessage()
logger.setSilent()
#logger.setDetailed()
#logger.setDebug()

Modified Cholesky factorization by Schnabel and Eskow (1999)¶

Example by Eskow and Schnabel, 1991¶

A = np.array([[0.3571,-0.1030,0.0274,-0.0459],[-0.1030,0.2525,0.0736,-0.3845],[0.0274,0.0736,0.2340,-0.2878],[-0.0459,-0.3845,-0.2878,0.5549]])
A

array([[ 0.3571, -0.103 ,  0.0274, -0.0459],
       [-0.103 ,  0.2525,  0.0736, -0.3845],
       [ 0.0274,  0.0736,  0.234 , -0.2878],
       [-0.0459, -0.3845, -0.2878,  0.5549]])

L,E,P = opt.schnabelEskow(A)
L

array([[ 0.7449161 ,  0.        ,  0.        ,  0.        ],
       [-0.06161768,  0.59439319,  0.        ,  0.        ],
       [-0.38635223,  0.00604629,  0.46941286,  0.        ],
       [-0.51616551, -0.22679419, -0.26511936,  0.00152451]])

The factor $L$ is such that $A + E = PLL^TP^T$. Therefore, the expression below should be the null matrix.

P @ L @ L.T @ P.T - E - A

array([[-5.55111512e-17, -1.38777878e-17,  0.00000000e+00,
         0.00000000e+00],
       [-1.38777878e-17,  5.55111512e-17,  0.00000000e+00,
         0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00]])

Example by Schnabel and Eskow (1999)¶

A2 = np.array([[1890.3,-1705.6,-315.8,3000.3],[-1705.6,1538.3,284.9,-2706.6],[-315.8,284.9,52.5,-501.2],[3000.3,-2706.6,-501.2,4760.8]])
A2

array([[ 1890.3, -1705.6,  -315.8,  3000.3],
       [-1705.6,  1538.3,   284.9, -2706.6],
       [ -315.8,   284.9,    52.5,  -501.2],
       [ 3000.3, -2706.6,  -501.2,  4760.8]])

L,E,P = opt.schnabelEskow(A2)
L

array([[ 6.89985507e+01,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00],
       [-7.26392069e+00,  3.19413321e-01,  0.00000000e+00,
         0.00000000e+00],
       [-3.92269109e+01, -1.28891153e-01,  4.44731720e-01,
         0.00000000e+00],
       [ 4.34835220e+01,  1.90522168e-01,  3.34584739e-01,
         1.71484739e-03]])

The factor $L$ is such that $A + E = PLL^TP^T$. Therefore, the expression below should be the null matrix.

P @ L @ L.T @ P.T - E - A2

array([[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        -9.09494702e-13]])

Rosenbrock problem¶

class rosenbrock(opt.functionToMinimize):
    def __init__(self):
        self.x = None
        
    def setVariables(self,x):
        self.x = x

    def f(self, batch = None):
        if batch is not None:
            raise excep.biogemeError('This function is not data driven.')
        n = len(self.x)
        f = sum(100.0 * (self.x[i+1]-self.x[i]**2)**2 + (1.0-self.x[i])**2 for i in range(n-1))
        return f

    def g(self):
        n = len(self.x)
        g = np.zeros(n)
        for i in range(n-1):
            g[i] = g[i] - 400 * self.x[i] * (self.x[i+1]-self.x[i]**2) - 2 * (1-self.x[i])
            g[i+1] = g[i+1] + 200 * (self.x[i+1]-self.x[i]**2)

        return g
    def h(self):
        n = len(self.x)
        H = np.zeros((n,n))
        for i in range(n-1):
            H[[i],[i]] = H[[i],[i]] - 400 * self.x[i+1] + 1200 * self.x[i]**2 + 2
            H[[i+1],[i]] = H[[i+1],[i]] - 400 * self.x[i]
            H[[i],[i+1]] = H[[i],[i+1]] - 400 * self.x[i]
            H[[i+1],[i+1]] = H[[i+1],[i+1]] + 200
        return H 

    def f_g(self, batch = None):
        if batch is not None:
            raise excep.biogemeError('This function is not data driven.')
        return self.f(), self.g()

    def f_g_h(self, batch= None):
        if batch is not None:
            raise excep.biogemeError('This function is not data driven.')
        return self.f(), self.g(), self.h()
    
    def f_g_bhhh(self, batch = None):
        raise excep.biogemeError('This function is not data driven.')

theFunction = rosenbrock()

Example 5.8 from Bierlaire (2015)¶

class example58(opt.functionToMinimize):
    def __init__(self):
        self.x = None
        
    def setVariables(self,x):
        self.x = x

    def f(self, batch = None):
        if batch is not None:
            raise excep.biogemeError('This function is not data driven.')
        n = len(self.x)
        f = 0.5 * self.x[0] * self.x[0] + self.x[0] * np.cos(self.x[1])
        return f

    def g(self):
        n = len(self.x)
        g = np.array([self.x[0]+np.cos(self.x[1]),-self.x[0]*np.sin(self.x[1])])
        return g

    def h(self):
        n = len(self.x)
        H = np.array([[1,-np.sin(self.x[1])],[-np.sin(self.x[1]),-self.x[0]*np.cos(self.x[1])]])
        return H 

    def f_g(self, batch = None):
        if batch is not None:
            raise excep.biogemeError('This function is not data driven.')
        return self.f(), self.g()

    def f_g_h(self, batch= None):
        if batch is not None:
            raise excep.biogemeError('This function is not data driven.')
        return self.f(), self.g(), self.h()
    
    def f_g_bhhh(self, batch = None):
        raise excep.biogemeError('This function is not data driven.')

ex58 = example58()

Line search algorithm¶

x = np.array([-1.5,1.5])
theFunction.setVariables(x)
f,g = theFunction.f_g()
alpha,nfev = opt.lineSearch(theFunction,x,f,g,-g)
print(f"alpha={alpha} nfev={nfev}")

alpha=0.0009765625 nfev=12

Newton with linesearch¶

Rosenbrock

x0 = np.array([-1.5,1.5])
xstar,nit,nfev,msg = opt.newtonLineSearch(theFunction,x0)
xstar

array([0.99999961, 0.99999918])

print(f"nit={nit} nfev={nfev} {msg}")

nit=22 nfev=76 Relative gradient = 2.0646130808264894e-07 <= 6.06273418136464e-06

Example 5.8

x0 = np.array([1,1])
xstar,nit,nfev,msg = opt.newtonLineSearch(ex58,x0)
xstar

array([1.        , 3.14159265])

print(f"nit={nit} nfev={nfev} {msg}")

nit=5 nfev=27 Relative gradient = 2.117211853102874e-10 <= 6.06273418136464e-06

Newton with trust region¶

Rosenbrock

x0 = np.array([-1.5,1.5])
xstar,nit,nfev,msg = opt.newtonTrustRegion(theFunction,x0)
xstar

array([0.99999995, 0.99999989])

print(f"nit={nit} nfev={nfev} {msg}")

nit=28 nfev=28 Relative gradient = 9.752977092603033e-10 <= 6.06273418136464e-06

Example 5.8

x0 = np.array([1.0,1.0])
xstar,nit,nfev,msg = opt.newtonTrustRegion(ex58,x0)
xstar

array([-1.00000000e+00, -1.56439954e-09])

print(f"nit={nit} nfev={nfev} {msg}")

nit=5 nfev=5 Relative gradient = 1.5037932097369974e-09 <= 6.06273418136464e-06

BFGS and linesearch¶

Rosenbrock

x0 = np.array([-1.5,1.5])
xstar,nit,nfev,msg = opt.bfgsLineSearch(theFunction,x0,maxiter=10000)
xstar

array([0.99999897, 0.99999797])

print(f"nit={nit} nfev={nfev} {msg}")

nit=32 nfev=120 Relative gradient = 1.5315863397701923e-07 <= 6.06273418136464e-06

Example 5.8

x0 = np.array([1,1])
xstar,nit,nfev,msg = opt.bfgsLineSearch(ex58,x0,maxiter=10000)
xstar

array([-1.00000142e+00,  3.57636862e-06])

print(f"nit={nit} nfev={nfev} {msg}")

nit=10 nfev=38 Relative gradient = 3.4378215567897343e-06 <= 6.06273418136464e-06

BFGS and trust region¶

Rosenbrock

x0 = np.array([-1.5,1.5])
xstar,nit,nfev,msg = opt.bfgsLineSearch(theFunction,x0,maxiter=10000)
xstar

array([0.99999897, 0.99999797])

print(f"nit={nit} nfev={nfev} {msg}")

nit=32 nfev=120 Relative gradient = 1.5315863397701923e-07 <= 6.06273418136464e-06

Example 5.8

x0 = np.array([1,1])
xstar,nit,nfev,msg = opt.bfgsTrustRegion(ex58,x0,maxiter=10000)
xstar

array([-9.99999972e-01,  1.58776353e-08])

print(f"nit={nit} nfev={nfev} {msg}")

nit=17 nfev=18 Relative gradient = 2.7200302000474536e-08 <= 6.06273418136464e-06

Biogeme example¶

import pandas as pd
import biogeme.biogeme as bio
import biogeme.database as db
import biogeme.models as models
from biogeme.expressions import *
df = pd.DataFrame({'Person':[1,1,1,2,2],
                   'Exclude':[0,0,1,0,1],
                   'Variable1':[1,2,3,4,5],
                   'Variable2':[10,20,30,40,50],
                   'Choice':[1,2,3,1,2],
                   'Av1':[0,1,1,1,1],
                   'Av2':[1,1,1,1,1],
                   'Av3':[0,1,1,1,1]})
myData = db.Database('test',df)

Choice=Variable('Choice')
Variable1=Variable('Variable1')
Variable2=Variable('Variable2')
beta1 = Beta('beta1',0,None,None,0)
beta2 = Beta('beta2',0,None,None,0)
V1 = beta1 * Variable1
V2 = beta2 * Variable2
V3 = 0
V ={1:V1,2:V2,3:V3}

likelihood = models.loglogit(V,av=None,i=Choice)
myBiogeme = bio.BIOGEME(myData,likelihood)
myBiogeme.modelName = 'simpleExample'
print(myBiogeme)

simpleExample: database [test]{'loglike': _bioLogLogitFullChoiceSet1:(beta1(0) * Variable1),2:(beta2(0) * Variable2),3:`0`)}
simpleExample: database [test]{'loglike': _bioLogLogitFullChoiceSet1:(beta1(0) * Variable1),2:(beta2(0) * Variable2),3:`0`)}

scipy¶

The default optimization algorithm is from scipy. It is possible to transfer parameters to the algorithm. We include here an irrelevant parameter to illustrate the warning.

results = myBiogeme.estimate(algoParameters={'myparam':3})
results.getEstimatedParameters()

/Users/michelbierlaire/opt/anaconda3/envs/python38/lib/python3.8/site-packages/biogeme-3.2.5-py3.8-macosx-10.9-x86_64.egg/biogeme/optimization.py:157: OptimizeWarning: Unknown solver options: myparam
  results =  sc.minimize(f_and_grad,initBetas,bounds=bounds,jac=True,options=opts)

print(f"nit={results.data.numberOfIterations} nfev={results.data.numberOfFunctionEval}: {results.data.optimizationMessage}")

nit=13 nfev=16: b'CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL'

Newton with linesearch¶

results = myBiogeme.estimate(algorithm=opt.newtonLineSearchForBiogeme)
results.getEstimatedParameters()

print(f"nit={results.data.numberOfIterations} nfev={results.data.numberOfFunctionEval}: {results.data.optimizationMessage}")

nit=3 nfev=9: Relative gradient = 3.2893676167559224e-07 <= 6.06273418136464e-06

Changing the requested precision

results = myBiogeme.estimate(algorithm=opt.newtonLineSearchForBiogeme,algoParameters = {'tolerance': 0.1})
results.getEstimatedParameters()

print(f"nit={results.data.numberOfIterations} nfev={results.data.numberOfFunctionEval}: {results.data.optimizationMessage}")

nit=2 nfev=6: Relative gradient = 0.014746524905599795 <= 0.1

CFSQP algorithm¶

results = myBiogeme.estimate(algorithm=None)
results.getEstimatedParameters()

print(f"nit={results.data.numberOfIterations} nfev={results.data.numberOfFunctionEval}: {results.data.optimizationMessage}")

nit=7 nfev=20: b'Normal termination. Obj: 6.05545e-06 Const: 6.05545e-06'

Newton with trust region¶

results = myBiogeme.estimate(algorithm=opt.newtonTrustRegionForBiogeme)
results.getEstimatedParameters()

print(f"nit={results.data.numberOfIterations} nfev={results.data.numberOfFunctionEval}: {results.data.optimizationMessage}")

nit=3 nfev=3: Relative gradient = 3.289367618372832e-07 <= 6.06273418136464e-06

We illustrate the parameters. We use the truncated conjugate gradient instead of dogleg for the trust region subproblem, starting with a small trust region of radius 0.001, and a maximum of 3 iterations.

results = myBiogeme.estimate(algorithm=opt.newtonTrustRegionForBiogeme,algoParameters={'dogleg':False,'radius':0.001,'maxiter':3})
results.getEstimatedParameters()

print(f"nit={results.data.numberOfIterations} nfev={results.data.numberOfFunctionEval}: {results.data.optimizationMessage}")

nit=3 nfev=3: Maximum number of iterations reached: 3

Changing the requested precision

results = myBiogeme.estimate(algorithm=opt.newtonTrustRegionForBiogeme,algoParameters={'tolerance':0.1})
results.getEstimatedParameters()

print(f"nit={results.data.numberOfIterations} nfev={results.data.numberOfFunctionEval}: {results.data.optimizationMessage}")

nit=2 nfev=2: Relative gradient = 0.014746524905603029 <= 0.1

BFGS with line search¶

results = myBiogeme.estimate(algorithm=opt.bfgsLineSearchForBiogeme)
results.getEstimatedParameters()

print(f"nit={results.data.numberOfIterations} nfev={results.data.numberOfFunctionEval}: {results.data.optimizationMessage}")

nit=5 nfev=23: Relative gradient = 5.933646980468922e-07 <= 6.06273418136464e-06

BFGS with trust region¶

results = myBiogeme.estimate(algorithm=opt.bfgsTrustRegionForBiogeme)
results.getEstimatedParameters()

print(f"nit={results.data.numberOfIterations} nfev={results.data.numberOfFunctionEval}: {results.data.optimizationMessage}")

nit=14 nfev=15: Relative gradient = 5.039394505504404e-07 <= 6.06273418136464e-06

	Value	Std err	t-test	p-value	Rob. Std err	Rob. t-test	Rob. p-value
beta1	0.144546	0.391017	0.369666	0.711632	0.366198	0.394720	0.693049
beta2	0.023502	0.036947	0.636086	0.524720	0.034280	0.685574	0.492982

	Value	Std err	t-test	p-value	Rob. Std err	Rob. t-test	Rob. p-value
beta1	0.144545	0.391017	0.369665	0.711632	0.366198	0.394720	0.693050
beta2	0.023502	0.036947	0.636086	0.524720	0.034280	0.685573	0.492982

	Value	Std err	t-test	p-value	Rob. Std err	Rob. t-test	Rob. p-value
beta1	0.144317	0.390742	0.369340	0.711874	0.365691	0.394641	0.693108
beta2	0.023428	0.036930	0.634377	0.525835	0.034256	0.683887	0.494047

	Value	Std err	t-test	p-value	Rob. Std err	Rob. t-test	Rob. p-value
beta1	0.144544	0.391018	0.369661	0.711635	0.366199	0.394714	0.693054
beta2	0.023502	0.036947	0.636090	0.524718	0.034280	0.685578	0.492979

	Value	Std err	t-test	p-value	Rob. Std err	Rob. t-test	Rob. p-value
beta1	0.144545	0.391017	0.369665	0.711632	0.366198	0.394720	0.693050
beta2	0.023502	0.036947	0.636086	0.524720	0.034280	0.685573	0.492982

	Value	Std err	t-test	p-value	Rob. Std err	Rob. t-test	Rob. p-value
beta1	0.000053	0.348093	0.000151	0.99988	0.308910	0.000170	0.999864
beta2	0.007000	0.032583	0.214830	0.82990	0.030019	0.233173	0.815627