Source code for biogeme.algorithms

"""
Optimization algorithms.

:author: Michel Bierlaire
:date: Sun Apr  5 16:48:54 2020

"""

# There seems to be a bug in PyLint.
# pylint: disable=invalid-unary-operand-type, no-member

# Too constraining
# pylint: disable=invalid-name
# pylint: disable=too-many-lines, too-many-locals
# pylint: disable=too-many-arguments, too-many-branches
# pylint: disable=too-many-statements, too-many-return-statements
# pylint: disable=bare-except


from abc import abstractmethod
import numpy as np
import scipy.linalg as la
import biogeme.exceptions as excep
import biogeme.messaging as msg


logger = msg.bioMessage()


[docs]class functionToMinimize: """This is an abstract class. The actual function to minimize must be implemented in a concrete class deriving from this one. """
[docs] @abstractmethod def setVariables(self, x): """Set the values of the variables for which the function has to be calculated. :param x: values :type x: numpy.array """
[docs] @abstractmethod def f(self, batch=None): """Calculate the value of the function :param batch: for data driven functions (such as a log likelikood function), it is possible to approximate the value of the function using a sample of the data called a batch. This argument is a value between 0 and 1 representing the percentage of the data that should be used for thre random batch. If None, the full data set is used. Default: None pass :type batch: float :return: value of the function :rtype: float """
[docs] @abstractmethod def f_g(self, batch=None): """Calculate the value of the function and the gradient :param batch: for data driven functions (such as a log likelikood function), it is possible to approximate the value of the function using a sample of the data called a batch. This argument is a value between 0 and 1 representing the percentage of the data that should be used for the random batch. If None, the full data set is used. Default: None pass :type batch: float :return: value of the function and the gradient :rtype: tuple float, numpy.array """
[docs] @abstractmethod def f_g_h(self, batch=None): """Calculate the value of the function, the gradient and the Hessian :param batch: for data driven functions (such as a log likelikood function), it is possible to approximate the value of the function using a sample of the data called a batch. This argument is a value between 0 and 1 representing the percentage of the data that should be used for the random batch. If None, the full data set is used. Default: None pass :type batch: float :return: value of the function, the gradient and the Hessian :rtype: tuple float, numpy.array, numpy.array """
[docs] @abstractmethod def f_g_bhhh(self, batch=None): """Calculate the value of the function, the gradient and the BHHH matrix :param batch: for data driven functions (such as a log likelikood function), it is possible to approximate the value of the function using a sample of the data called a batch. This argument is a value between 0 and 1 representing the percentage of the data that should be used for the random batch. If None, the full data set is used. Default: None pass :type batch: float :return: value of the function, the gradient and the BHHH :rtype: tuple float, numpy.array, numpy.array """
[docs]class bioBounds: """This class is designed for the management of simple bound constraints"""
[docs] def __init__(self, b): """ :param b: list of tuples (ell,u) containing the lower and upper bounds for each free parameter. :type b: list(tuple) :raises biogeme.exceptions.biogemeError: if the bounds are incompatible """ def noneToMinusInfinity(x): if x is None: return -np.inf return x def noneToPlusInfinity(x): if x is None: return np.inf return x self.bounds = b """list of tuples (ell,u) containing the lower and upper bounds for each free parameter. """ self.n = len(b) #: number of optimization variables self.lowerBounds = [noneToMinusInfinity(bb[0]) for bb in b] """ List of lower bounds """ self.upperBounds = [noneToPlusInfinity(bb[1]) for bb in b] """ List of upper bounds """ wrongBounds = np.array( [ bb[0] > bb[1] for bb in b if bb[0] is not None and bb[1] is not None ] ) if wrongBounds.any(): errorMsg = ( f'Incompatible bounds for indice(s) ' f'{np.nonzero(wrongBounds)[0]}: ' f'{[b[i] for i in np.nonzero(wrongBounds)[0]]}' ) raise excep.biogemeError(errorMsg)
def __str__(self): return self.bounds.__str__() def __repr__(self): return self.bounds.__repr__()
[docs] def project(self, x): """Project a point onto the feasible domain defined by the bounds. :param x: point to project :type x: numpy.array :return: projected point :rtype: numpy.array :raises biogeme.exceptions.biogemeError: if the dimensions are inconsistent """ if len(x) != self.n: raise excep.biogemeError( f'Incompatible size: {len(x)}' f' and {self.n}' ) y = x for i in range(self.n): if self.lowerBounds[i] is not None and x[i] < self.lowerBounds[i]: y[i] = self.lowerBounds[i] if self.upperBounds[i] is not None and x[i] > self.upperBounds[i]: y[i] = self.upperBounds[i] return y
[docs] def intersect(self, otherBounds): """ Create a bounds object representing the intersection of two regions. :param otherBounds: other bound object that must be intersected. :type otherBounds: class bioBounds :return: bound object, intersection of the two. :rtype: class bioBounds :raises biogeme.exceptions.biogemeError: if the dimensions are inconsistent """ if otherBounds.n != self.n: raise excep.biogemeError( f'Incompatible size: {otherBounds.n} ' f'and {self.n}' ) newBounds = [ ( np.maximum(self.lowerBounds[i], otherBounds.lowerBounds[i]), np.minimum(self.upperBounds[i], otherBounds.upperBounds[i]), ) for i in range(self.n) ] # Note that an exception with be raised at the creation of the # new 'bioBounds' object is the bounds are incompatible. result = bioBounds(newBounds) return result
[docs] def intersectionWithTrustRegion(self, x, delta): """Create a bioBounds object representing the intersection between the feasible domain and the trust region. :param x: center of the trust region :type x: numpy.array :param delta: radius of the tust region (infinity norm) :type delta: float :return: intersection between the feasible region and the trus region :rtype: class bioBounds :raises biogeme.exceptions.biogemeError: if the dimensions are inconsistent """ if len(x) != self.n: raise excep.biogemeError( f'Incompatible size: {len(x)}' f' and {self.n}' ) trustRegion = bioBounds([(xk - delta, xk + delta) for xk in x]) return self.intersect(trustRegion)
[docs] def subspace(self, selectedVariables): """Generate a bioBounds object for selected variables :param selectedVariables: boolean vector. If an entry is True, the corresponding variables is considered. :type selectedVariables: numpy.array(bool) :return: bound object :rtype: class bioBounds :raises biogeme.exceptions.biogemeError: if the dimensions are inconsistent """ if len(selectedVariables) != self.n: raise excep.biogemeError( f'Incompatible size: ' f'{len(selectedVariables)} and {self.n}' ) return bioBounds( [i for i, j in zip(self.bounds, selectedVariables) if j] )
[docs] def feasible(self, x): """Check if point verifies the bound constraints :param x: point to project :type x: numpy.array :return: True if x is feasible, False otherwise. :rtype: bool :raises biogeme.exceptions.biogemeError: if the dimensions are inconsistent """ if len(x) != self.n: raise excep.biogemeError( f'Incompatible size: ' f'{len(x)} and {self.n}' ) for i in range(self.n): if ( self.lowerBounds[i] is not None and self.lowerBounds[i] - x[i] > np.finfo(float).eps ): return False if ( self.upperBounds[i] is not None and x[i] - self.upperBounds[i] > np.finfo(float).eps ): return False return True
[docs] def maximumStep(self, x, d): """Calculates the maximum step that can be performed along a direction while staying feasible. :param x: reference point :type x: numpy.array :param d: direction :type d: numpy.array :return: the largest alpha such that x + alpha * d is feasible and the list of indices achieving this value. :rtype: float, int :raises biogeme.exceptions.biogemeError: if the point is infeasible """ if not self.feasible(x): raise excep.biogemeError(f'Infeasible point: {x}') alpha = np.array( [ (self.upperBounds[i] - x[i]) / d[i] if d[i] > np.finfo(float).eps else (self.lowerBounds[i] - x[i]) / d[i] if d[i] < -np.finfo(float).eps else np.inf for i in range(self.n) ] ) m = np.amin(alpha) return m, np.where(alpha == m)[0]
[docs] def activity(self, x, epsilon=np.finfo(float).eps): """Determines the activity status of each variable. :param x: point for which the activity must be determined. :type x: numpy.array :param epsilon: a bound is considered active if the distance to it is less rhan epsilon. :type epsilon: float :return: a vector, same length as x, where each entry reports the activity of the corresponding variable: - 0 if no bound is active - -1 if the lower bound is active - 1 if the upper bound is active :rtype: numpy.array :raises biogeme.exceptions.biogemeError: if the vector x is not feasible :raises biogeme.exceptions.biogemeError: if the dimensions of x and bounds do not match. """ if len(x) != self.n: raise excep.biogemeError( f'Incompatible size: {len(x)}' f' and {self.n}' ) if not self.feasible(x): raise excep.biogemeError( f'{x} is not feasible for the ' f'bounds {self.bounds}' ) activity = np.zeros_like(x, dtype=int) for i in range(self.n): if ( self.lowerBounds[i] != -np.inf and x[i] - self.lowerBounds[i] <= epsilon ): activity[i] = -1 elif ( self.upperBounds[i] != np.inf and self.upperBounds[i] - x[i] <= epsilon ): activity[i] = 1 return activity
[docs] def breakpoints(self, x, d): """Projects the direction d, starting from x, on the intersection of the bound constraints :param x: current point :type x: numpy.array :param d: search direction :type d: numpy.array :return: list of tuple (index, value), where index is the index of the variable, and value the value of the corresponding breakpoint. :rtype: list(tuple(int,float)) :raises biogeme.exceptions.biogemeError: if the dimensions are inconsistent :raises biogeme.exceptions.biogemeError: if x is infeasible """ if len(d) != self.n: raise excep.biogemeError( f'Incompatible size: {self.n}' f' and {len(d)}' ) bp = [ (self.upperBounds[i] - x[i]) / d[i] if d[i] > np.finfo(float).eps else (self.lowerBounds[i] - x[i]) / d[i] if d[i] < -np.finfo(float).eps else 0 for i in range(self.n) ] if any(b < 0 for b in bp): raise excep.biogemeError('Infeasible point') return sorted(enumerate(bp), key=lambda x: x[1])
[docs] def generalizedCauchyPoint(self, xk, gk, H, direction): """Implementation of Step 2 of the Specific Algorithm by `Conn et al. (1988)`_. .. _`Conn et al. (1988)`: https://www.ams.org/journals/mcom/1988-50-182/S0025-5718-1988-0929544-3/S0025-5718-1988-0929544-3.pdf The quadratic model is defined as .. math:: m(x) = f(x_k) + (x - x_k)^T g_k + \\frac{1}{2} (x-x_k)^T H (x-x_k). :param xk: current point :type xk: numpy.array. Dimension n. :param gk: vector g involved in the quadratic model definition. :type gk: numpy.array. Dimension n. :param H: matrix H involved in the quadratic model definition. :type H: numpy.array. Dimension n x n. :param direction: direction along which the GCP is searched. :type direction: numpy.array. Dimension n. :return: generalized Cauchy point based on inexact line search. :rtype: numpy.array. Dimension n. :raises biogeme.exceptions.biogemeError: if the dimensions are inconsistent :raises biogeme.exceptions.biogemeError: if xk is infeasible """ if len(xk) != self.n: raise excep.biogemeError( f'Incompatible size: {len(xk)}' f' and {self.n}' ) if len(gk) != self.n: raise excep.biogemeError( f'Incompatible size: {len(gk)}' f' and {self.n}' ) if H.shape[0] != self.n or H.shape[1] != self.n: raise excep.biogemeError( f'Incompatible size: {H.shape}' f' and {self.n}' ) if not self.feasible(xk): raise excep.biogemeError('Infeasible iterate') x = xk g = gk - H @ xk d = direction n = len(xk) J = set() fprime = np.inner(gk, d) if fprime >= 0: if n <= 10: logger.warning( f'GCP: direction {d} is not a descent ' f'direction at {x}.' ) else: logger.warning('GCP: not a descent direction.') fsecond = np.inner(d, H @ d) while len(J) < self.n: delta_t, ind = self.maximumStep(x, d) # J is the set of active variables J.update(ind) # Test whether the GCP has been found ratio = -fprime / fsecond if fsecond > 0 and 0 < ratio < delta_t: x = x + ratio * d return x # Update line derivatives bd = np.zeros_like(d) bd[ind] = d[ind] b = H @ bd # In theory, x + delta_t * d must be feasible. However, # there may be some numerical problem. Therefore, we # project it on the feasible domain to make sure to obtain # a feasible point. x = self.project(x + delta_t * d) dg = np.sum([d[i] * g[i] for i in ind]) fprime += delta_t * fsecond - np.inner(b, x) - dg fsecond += np.inner(b, bd - 2 * d) d[ind] = 0.0 if fprime >= 0: return x return x
[docs]def schnabelEskow( A, tau=np.finfo(np.float64).eps ** 0.3333, taubar=np.finfo(np.float64).eps ** 0.6666, mu=0.1, ): """Modified Cholesky factorization by `Schnabel and Eskow (1999)`_. .. _`Schnabel and Eskow (1999)`: https://doi.org/10.1137/s105262349833266x If the matrix is 'safely' positive definite, the output is the classical Cholesky factor. If not, the diagonal elements are inflated in order to make it positive definite. The factor :math:`L` is such that :math:`A + E = PLL^TP^T`, where :math:`E` is a diagonal matrix contaninig the terms added to the diagonal, :math:`P` is a permutation matrix, and :math:`L` is w lower triangular matrix. :param A: matrix to factorize. Must be square and symmetric. :type A: numpy.array :param tau: tolerance factor. Default: :math:`\\varepsilon^{\\frac{1}{3}}`. See `Schnabel and Eskow (1999)`_ :type tau: float :param taubar: tolerance factor. Default: :math:`\\varepsilon^{\\frac{2}{3}}`. See `Schnabel and Eskow (1999)`_ :type taubar: float :param mu: tolerance factor. Default: 0.1. See `Schnabel and Eskow (1999)`_ :type mu: float :return: tuple :math:`L`, :math:`E`, :math:`P`, where :math:`A + E = PLL^TP^T`. :rtype: numpy.array, numpy.array, numpy.array :raises biogeme.exceptions.biogemeError: if the matrix A is not square. :raises biogeme.exceptions.biogemeError: if the matrix A is not symmetric. """ def pivot(j): A[j, j] = np.sqrt(A[j, j]) for i in range(j + 1, dim): A[j, i] = A[i, j] = A[i, j] / A[j, j] A[i, j + 1 : i + 1] -= A[i, j] * A[j + 1 : i + 1, j] A[j + 1 : i + 1, i] = A[i, j + 1 : i + 1] def permute(i, j): A[[i, j]] = A[[j, i]] E[[i, j]] = E[[j, i]] A[:, [i, j]] = A[:, [j, i]] P[:, [i, j]] = P[:, [j, i]] A = A.astype(np.float64) dim = A.shape[0] if A.shape[1] != dim: raise excep.biogemeError('The matrix must be square') if not np.all(np.abs(A - A.T) < np.sqrt(np.finfo(np.float64).eps)): raise excep.biogemeError('The matrix must be symmetric') E = np.zeros(dim, dtype=np.float64) P = np.identity(dim) phaseOne = True gamma = abs(A.diagonal()).max() j = 0 while j < dim and phaseOne is True: a_max = A.diagonal()[j:].max() a_min = A.diagonal()[j:].min() if a_max < taubar * gamma or a_min < -mu * a_max: phaseOne = False break # Pivot on maximum diagonal of remaining submatrix i = j + np.argmax(A.diagonal()[j:]) if i != j: # Switch rows and columns of i and j of A permute(i, j) if j < dim - 1 and ( ( A.diagonal()[j + 1 :] - A[j + 1 :, j] ** 2 / A.diagonal()[j] ).min() < -mu * gamma ): phaseOne = False # go to phase two else: # perform jth iteration of factorization pivot(j) j += 1 # Phase two, A not positive-definite if not phaseOne: if j == dim - 1: E[-1] = delta = -A[-1, -1] + max( tau * (-A[-1, -1]) / (1 - tau), taubar * gamma ) A[-1, -1] += delta A[-1, -1] = np.sqrt(A[-1, -1]) else: deltaPrev = 0.0 g = np.zeros(dim) k = j - 1 # k = number of iterations performed in phase one # Calculate lower Gerschgorin bounds of A[k+1] for i in range(k + 1, dim): g[i] = ( A[i, i] - abs(A[i, k + 1 : i]).sum() - abs(A[i + 1 : dim, i]).sum() ) # Modified Cholesky Decomposition for j in range(k + 1, dim - 2): # Pivot on maximum lower Gerschgorin bound estimate i = j + np.argmax(g[j:]) if i != j: # Switch rows and columns of i and j of A permute(i, j) # Calculate E[j, j] and add to diagonal norm_j = abs(A[j + 1 : dim, j]).sum() E[j] = delta = max( 0, -A[j, j] + max(norm_j, taubar * gamma), deltaPrev ) if delta > 0: A[j, j] += delta deltaPrev = delta # deltaPrev will contain E_inf # Update Gerschgorin bound estimates if A[j, j] != norm_j: temp = 1.0 - norm_j / A[j, j] g[j + 1 :] += abs(A[j + 1 :, j]) * temp # perform jth iteration of factorization pivot(j) # Final 2 by 2 submatrix e = np.linalg.eigvalsh(A[-2:, -2:]) e.sort() E[-2] = E[-1] = delta = max( 0, -e[0] + max(tau * (e[1] - e[0]) / (1 - tau), taubar * gamma), deltaPrev, ) if delta > 0: A[-2, -2] += delta A[-1, -1] += delta deltaPrev = delta A[-2, -2] = np.sqrt(A[-2, -2]) # overwrites A[-2, -2] A[-1, -2] = A[-1, -2] / A[-2, -2] # overwrites A[-1, -2] A[-2, -1] = A[-1, -2] # overwrites A[-1, -1] A[-1, -1] = np.sqrt(A[-1, -1] - A[-1, -2] * A[-1, -2]) return np.tril(A), np.diag(P @ E), P
[docs]def lineSearch(fct, x, f, g, d, alpha0=1.0, beta1=1.0e-4, beta2=0.99, lbd=2.0): """ Calculate a step along a direction that satisfies both Wolfe conditions :param fct: object to calculate the objective function and its derivatives. :type fct: optimization.functionToMinimize :param x: current iterate. :type x: numpy.array :param f: value of the objective function at the current iterate. :type f: float :param g: value of the gradient at the current iterate. :type g: numpy.array :param d: descent direction. :type d: numpy.array :param alpha0: first step to test. :type alpha0: float :param beta1: parameter of the first Wolfe condition. :type beta1: float :param beta2: parameter of the second Wolfe condition. :type beta2: float :param lbd: expansion factor for a short step. :type lbd: float :return: a step verifing both Wolfe conditions :rtype: float :raises biogeme.exceptions.biogemeError: if ``lbd`` :math:`\\leq` 1 :raises biogeme.exceptions.biogemeError: if ``alpha0`` :math:`\\leq` 0 :raises biogeme.exceptions.biogemeError: if ``beta1`` :math:`\\geq` beta2 :raises biogeme.exceptions.biogemeError: if ``d`` is not a descent direction """ if lbd <= 1: raise excep.biogemeError(f'lambda is {lbd} and must be > 1') if alpha0 <= 0: raise excep.biogemeError(f'alpha0 is {alpha0} and must be > 0') if beta1 >= beta2: errorMsg = ( f'Incompatible Wolfe cond. parameters: ' f'beta1= {beta1} is greater than ' f'beta2={beta2}' ) raise excep.biogemeError(errorMsg) nfev = 1 deriv = np.inner(g, d) if deriv >= 0: raise excep.biogemeError(f'd is not a descent direction: {deriv} >= 0') alpha = alpha0 alphal = 0 alphar = np.finfo(float).max finished = False while not finished: xnew = x + alpha * d fct.setVariables(xnew) fnew, gnew = fct.f_g() nfev += 1 finished = True # First Wolfe condition violated? if fnew > f + alpha * beta1 * deriv: alphar = alpha alpha = (alphal + alphar) / 2.0 finished = False elif np.inner(gnew, d) < beta2 * deriv: alphal = alpha if alphar == np.finfo(float).max: alpha = lbd * alpha else: alpha = (alphal + alphar) / 2.0 finished = False return alpha, nfev
[docs]def relativeGradient(x, f, g, typx, typf): """Calculates the relative gradients. It is typically used as stopping criterion. :param x: current iterate. :type x: numpy.array :param f: value of f(x) :type f: float :param g: :math:`\\nabla f(x)`, gradient of f at x :type g: numpy.array :param typx: typical value for x. :type typx: numpy.array :param typf: typical value for f. :type typf: float :return: relative gradient .. math:: \\max_{i=1,\\ldots,n}\\frac{(\\nabla f(x))_i \\max(x_i,\\text{typx}_i)} {\\max(|f(x)|, \\text{typf})} :rtype: float """ relgrad = np.array( [ g[i] * max(abs(x[i]), typx[i]) / max(abs(f), typf) for i in range(len(x)) ] ) result = abs(relgrad).max() if np.isfinite(result): return result return np.finfo(float).max
[docs]def newtonLineSearch( fct, x0, eps=np.finfo(np.float64).eps ** 0.3333, maxiter=100 ): """ Newton method with inexact line search (Wolfe conditions) :param fct: object to calculate the objective function and its derivatives. :type fct: optimization.functionToMinimize :param x0: starting point :type x0: numpy.array :param eps: the algorithm stops when this precision is reached. Default: :math:`\\varepsilon^{\\frac{1}{3}}` :type eps: float :param maxiter: the algorithm stops if this number of iterations is reached. Defaut: 100 :type maxiter: int :return: x, messages - x is the solution generated by the algorithm, - messages is a dictionary describing information about the lagorithm :rtype: numpay.array, dict(str:object) """ xk = x0 fct.setVariables(xk) f, g, H = fct.f_g_h() nfev = 1 ngev = 1 nhev = 1 typx = np.ones(np.asarray(xk).shape) typf = max(np.abs(f), 1.0) relgrad = relativeGradient(xk, f, g, typx, typf) if relgrad <= eps: message = f'Relative gradient = {relgrad:.3g} <= {eps:.2g}' messages = { 'Algorithm': 'Unconstrained Newton with line search', 'Relative gradient': relgrad, 'Number of iterations': 0, 'Number of function evaluations': nfev, 'Number of gradient evaluations': ngev, 'Number of hessian evaluations': nhev, 'Cause of termination': message, } return xk, messages k = 0 cont = True while cont: L, _, P = schnabelEskow(H) y3 = -P.T @ g y2 = la.solve_triangular(L, y3, lower=True) y1 = la.solve_triangular(L.T, y2, lower=False) d = P @ y1 alpha, nfls = lineSearch(fct, xk, f, g, d) nfev += nfls ngev += nfls xk = xk + alpha * d fct.setVariables(xk) f, g, H = fct.f_g_h() nfev += 1 ngev += 1 nhev += 1 k += 1 typf = max(np.abs(f), 1.0) relgrad = relativeGradient(xk, f, g, typx, typf) if relgrad <= eps: message = f'Relative gradient = {relgrad:.2g} <= {eps:.2g}' cont = False if k == maxiter: message = f'Maximum number of iterations reached: {maxiter}' cont = False logger.detailed( f'{k} f={f:10.7g} relgrad={relgrad:6.2g}' f' alpha={alpha:6.2g}' ) messages = { 'Algorithm': 'Unconstrained Newton with line search', 'Relative gradient': relgrad, 'Number of iterations': k, 'Number of function evaluations': nfev, 'Number of gradient evaluations': ngev, 'Number of hessian evaluations': nhev, 'Cause of termination': message, } return xk, messages
[docs]def trustRegionIntersection(dc, d, delta): """Calculates the intersection with the boundary of the trust region. Consider a trust region of radius :math:`\\delta`, centered at :math:`\\hat{x}`. Let :math:`x_c` be in the trust region, and :math:`d_c = x_c - \\hat{x}`, so that :math:`\\|d_c\\| \\leq \\delta`. Let :math:`x_d` be out of the trust region, and :math:`d_d = x_d - \\hat{x}`, so that :math:`\\|d_d\\| \\geq \\delta`. We calculate :math:`\\lambda` such that .. math:: \\| d_c + \\lambda (d_d - d_c)\\| = \\delta :param dc: xc - xhat. :type dc: numpy.array :param d: dd - dc. :type d: numpy.array :param delta: radius of the trust region. :type delta: float :return: :math:`\\lambda` such that :math:`\\| d_c + \\lambda (d_d - d_c)\\| = \\delta` :rtype: float """ a = np.inner(d, d) b = 2 * np.inner(dc, d) c = np.inner(dc, dc) - delta**2 discriminant = b * b - 4.0 * a * c return (-b + np.sqrt(discriminant)) / (2 * a)
[docs]def cauchyNewtonDogleg(g, H): """Calculate the Cauchy, the Newton and the dogleg points. The Cauchy point is defined as .. math:: d_c = - \\frac{\\nabla f(x)^T \\nabla f(x)} {\\nabla f(x)^T \\nabla^2 f(x) \\nabla f(x)} \\nabla f(x) The Newton point :math:`d_n` verifies Newton equation: .. math:: H_s d_n = - \\nabla f(x) where :math:`H_s` is a positive definite matrix generated with the method by `Schnabel and Eskow (1999)`_. The Dogleg point is .. math:: d_d = \\eta d_n where .. math:: \\eta = 0.2 + 0.8 \\frac{\\alpha^2}{\\beta |\\nabla f(x)^T d_n|} and :math:`\\alpha= \\nabla f(x)^T \\nabla f(x)`, :math:`\\beta=\\nabla f(x)^T \\nabla^2 f(x)\\nabla f(x).` :param g: gradient :math:`\\nabla f(x)` :type g: numpy.array :param H: hessian :math:`\\nabla^2 f(x)` :type H: numpy.array :return: tuple with Cauchy point, Newton point, Dogleg point :rtype: numpy.array, numpy.array, numpy.array :raises biogeme.exceptions.biogemeError: if the quadratic model is not convex. """ alpha = np.inner(g, g) beta = np.inner(g, H @ g) dc = -(alpha / beta) * g L, E, P = schnabelEskow(H) negativeEigenvalue = E < 0 if np.any(negativeEigenvalue): print(E) raise excep.biogemeError( 'The dogleg method requires a ' 'convex optimization problem.' ) y3 = -P.T @ g y2 = la.solve_triangular(L, y3, lower=True) y1 = la.solve_triangular(L.T, y2, lower=False) dn = P @ y1 eta = 0.2 + (0.8 * alpha * alpha / (beta * abs(np.inner(g, dn)))) return dc, dn, eta * dn
[docs]def dogleg(g, H, delta): """ Find an approximation of the trust region subproblem using the dogleg method :param g: gradient of the quadratic model. :type g: numpy.array :param H: hessian of the quadratic model. :type H: numpy.array :param delta: radius of the trust region. :type delta: float :return: d, diagnostic where - d is an approximate solution of the trust region subproblem - diagnostic is the nature of the solution: * -2 if negative curvature along Newton direction * -1 if negative curvature along Cauchy direction (i.e. along the gradient) * 1 if partial Cauchy step * 2 if Newton step * 3 if partial Newton step * 4 if Dogleg :rtype: numpy.array, int """ dc, dn, dl = cauchyNewtonDogleg(g, H) # Check if the model is convex along the gradient direction alpha = np.inner(g, g) beta = np.inner(g, H @ g) if beta <= 0: dstar = -delta * g / np.sqrt(alpha) return dstar, -1 # Compute the Cauchy point normdc = alpha * np.sqrt(alpha) / beta if normdc >= delta: # The Cauchy point is outside the trust # region. We move along the Cauchy # direction until the border of the trust # region. dstar = (delta / normdc) * dc return dstar, 1 # Compute Newton point normdn = la.norm(dn) # Check the convexity of the model along Newton direction if np.inner(dn, H @ dn) <= 0.0: # Return the Cauchy point return dc, -2 if normdn <= delta: # Newton point is inside the trust region return dn, 2 # Compute the dogleg point eta = 0.2 + (0.8 * alpha * alpha / (beta * abs(np.inner(g, dn)))) partieldn = eta * la.norm(dn) if partieldn <= delta: # Dogleg point is inside the trust region dstar = (delta / normdn) * dn return dstar, 3 # Between Cauchy and dogleg nu = dl - dc lbd = trustRegionIntersection(dc, nu, delta) dstar = dc + lbd * nu return dstar, 4
[docs]def truncatedConjugateGradient(g, H, delta): """Find an approximation of the trust region subproblem using the truncated conjugate gradient method :param g: gradient of the quadratic model. :type g: numpy.array :param H: hessian of the quadrartic model. :type H: numpy.array :param delta: radius of the trust region. :type delta: float :return: d, diagnostic, where - d is the approximate solution of the trust region subproblem, - diagnostic is the nature of the solution: * 1 for convergence, * 2 if out of the trust region, * 3 if negative curvature detected. * 4 if a numerical problem has been encountered :rtype: numpy.array, int """ tol = 1.0e-6 n = len(g) xk = np.zeros(n) gk = g dk = -gk for _ in range(n): try: curv = np.inner(dk, H @ dk) if curv <= 0: # Negative curvature has been detected diagnostic = 3 a = np.inner(dk, dk) b = 2 * np.inner(xk, dk) c = np.inner(xk, xk) - delta * delta rho = b * b - 4 * a * c step = xk + ((-b + np.sqrt(rho)) / (2 * a)) * dk return step, diagnostic alphak = -np.inner(dk, gk) / curv xkp1 = xk + alphak * dk if np.isnan(xkp1).any() or la.norm(xkp1) > delta: # Out of the trust region diagnostic = 2 a = np.inner(dk, dk) b = 2 * np.inner(xk, dk) c = np.inner(xk, xk) - delta * delta rho = b * b - 4 * a * c step = xk + ((-b + np.sqrt(rho)) / (2 * a)) * dk return step, diagnostic xk = xkp1 gkp1 = H @ xk + g betak = np.inner(gkp1, gkp1) / np.inner(gk, gk) dk = -gkp1 + betak * dk gk = gkp1 if la.norm(gkp1) <= tol: diagnostic = 1 step = xk return step, diagnostic except ValueError: # Numerical problem diagnostic = 4 a = np.inner(dk, dk) b = 2 * np.inner(xk, dk) c = np.inner(xk, xk) - delta * delta rho = b * b - 4 * a * c step = xk + ((-b + np.sqrt(rho)) / (2 * a)) * dk return step, diagnostic diagnostic = 1 step = xk return step, diagnostic
[docs]def newtonTrustRegion( fct, x0, delta0=1.0, eps=np.finfo(np.float64).eps ** 0.3333, dl=False, maxiter=1000, eta1=0.01, eta2=0.9, ): """Newton method with trust region :param fct: object to calculate the objective function and its derivatives. :type fct: optimization.functionToMinimize :param x0: starting point :type x0: numpy.array :param delta0: initial radius of the trust region. Default: 100. :type delta0: float :param eps: the algorithm stops when this precision is reached. Default: :math:`\\varepsilon^{\\frac{1}{3}}` :type eps: float :param dl: If True, the Dogleg method is used to solve the trut region subproblem. If False, the truncated conjugate gradient is used. Default: False. :type dl: bool :param maxiter: the algorithm stops if this number of iterations is reached. Default: 1000. :type maxiter: int :param eta1: threshold for failed iterations. Default: 0.01. :type eta1: float :param eta2: threshold for very successful iterations. Default 0.9. :type eta2: float :return: tuple x, messages, where - x is the solution found, - messages is a dictionary reporting various aspects related to the run of the algorithm. :rtype: numpy.array, dict(str:object) """ k = 0 xk = x0 fct.setVariables(xk) f, g, H = fct.f_g_h() nfev = 1 ngev = 1 nhev = 1 typx = np.ones(np.asarray(xk).shape) typf = max(np.abs(f), 1.0) relgrad = relativeGradient(xk, f, g, typx, typf) if relgrad <= eps: message = f'Relative gradient = {relgrad:.2g} <= {eps:.2g}' messages = { 'Algorithm': 'Unconstrained Newton with trust region', 'Relative gradient': relgrad, 'Cause of termination': message, 'Number of iterations': 0, 'Number of function evaluations': nfev, 'Number of gradient evaluations': ngev, 'Number of hessian evaluations': nhev, } return xk, messages delta = delta0 nfev = 0 cont = True maxDelta = np.finfo(float).max minDelta = np.finfo(float).eps rho = 0.0 while cont: k += 1 if dl: step, _ = dogleg(g, H, delta) else: step, _ = truncatedConjugateGradient(g, H, delta) xc = xk + step fct.setVariables(xc) # Calculate the value of the function fc = fct.f() nfev += 1 num = f - fc denom = -np.inner(step, g) - 0.5 * np.inner(step, H @ step) rho = num / denom if rho < eta1: # Failure: reduce the trust region delta = la.norm(step) / 2.0 status = '-' else: # Candidate accepted fc, gc, Hc = fct.f_g_h() nfev += 1 ngev += 1 nhev += 1 xk = xc f = fc g = gc H = Hc if rho >= eta2: # Enlarge the trust region delta = min(2 * delta, maxDelta) status = '++' else: status = '+' relgrad = relativeGradient(xk, f, g, typx, typf) if relgrad <= eps: message = f'Relative gradient = {relgrad:.2g} <= {eps:.2g}' cont = False if delta <= minDelta: message = f'Trust region is too small: {delta}' cont = False if k == maxiter: message = f'Maximum number of iterations reached: {maxiter}' cont = False logger.detailed( f'{k} f={f:10.7g} relgrad={relgrad:6.2g} ' f'delta={delta:6.2g} ' f'rho={rho:6.2g} {status}' ) messages = { 'Algorithm': 'Unconstrained Newton with trust region', 'Relative gradient': relgrad, 'Cause of termination': message, 'Number of iterations': k, 'Number of function evaluations': nfev, 'Number of gradient evaluations': ngev, 'Number of hessian evaluations': nhev, } return xk, messages
[docs]def bfgs(H, d, y): """Update the BFGS matrix. Formula (13.12) of `Bierlaire (2015)`_ where the method proposed by `Powell (1977)`_ is applied .. _`Bierlaire (2015)`: http://optimizationprinciplesalgorithms.com/ .. _`Powell (1977)`: https://link.springer.com/content/pdf/10.1007/BFb0067703.pdf :param H: current approximation of the inverse of the Hessian :type H: numpy.array (2D) :param d: difference between two consecutive iterates. :type d: numpy.array (1D) :param y: difference between two consecutive gradients. :type y: numpy.array (1D) :return: updated approximation of the inverse of the Hessian. :rtype: numpy.array (2D) """ Hd = H @ d dHd = np.inner(d, Hd) denom = np.inner(d, y) if denom >= 0.2 * dHd: eta = y else: theta = 0.8 * dHd / (dHd - denom) eta = theta * y + (1 - theta) * Hd return H - np.outer(Hd, Hd) / dHd + np.outer(eta, eta) / np.inner(d, eta)
[docs]def inverseBfgs(Hinv, d, y): """Update the inverse BFGS matrix. Formula (13.13) of `Bierlaire (2015)`_ .. _`Bierlaire (2015)`: http://optimizationprinciplesalgorithms.com/ :param Hinv: current approximation of the inverse of the Hessian :type Hinv: numpy.array (2D) :param d: difference between two consecutive iterates. :type d: numpy.array (1D) :param y: difference between two consecutive gradients. :type y: numpy.array (1D) :return: updated approximation of the inverse of the Hessian. :rtype: numpy.array (2D) """ n = len(d) denom = np.inner(d, y) if denom <= 0.0: logger.warning(f"Unable to perform BFGS update as d'y = {denom} <= 0") return Hinv dy = np.outer(d, y) yd = np.outer(y, d) dd = np.outer(d, d) eye = np.identity(n) return ((eye - (dy / denom)) @ Hinv @ (eye - (yd / denom))) + dd / denom
[docs]def bfgsLineSearch( fct, x0, initBfgs=None, eps=np.finfo(np.float64).eps ** 0.3333, maxiter=1000, ): """BFGS method with inexact line search (Wolfe conditions) :param fct: object to calculate the objective function and its derivatives. :type fct: optimization.functionToMinimize :param x0: starting point :type x0: numpy.array :param initBfgs: matrix used to initialize BFGS. If None, the identity matrix is used. Default: None. :type initBfgs: numpy.array :param eps: the algorithm stops when this precision is reached. Default: :math:`\\varepsilon^{\\frac{1}{3}}` :type eps: float :param maxiter: the algorithm stops if this number of iterations is reached. Default: 1000 :type maxiter: int :return: tuple x, messages, where - x is the solution found, - messages is a dictionary reporting various aspects related to the run of the algorithm. :rtype: numpy.array, dict(str:object) :raises biogeme.exceptions.biogemeError: if the dimensions of the matrix initBfgs do not match the length of x0. """ n = len(x0) xk = x0 fct.setVariables(xk) f, g = fct.f_g() nfev = 1 ngev = 1 if initBfgs is None: Hinv = np.identity(n) else: if initBfgs.shape != (n, n): errorMsg = ( f'BFGS must be initialized with a {n}x{n} ' f'matrix and not a {initBfgs.shape[0]}' 'x{initBfgs.shape[1]} matrix.' ) raise excep.biogemeError(errorMsg) Hinv = initBfgs typx = np.ones(np.asarray(xk).shape) typf = max(np.abs(f), 1.0) relgrad = relativeGradient(xk, f, g, typx, typf) if relgrad <= eps: message = f'Relative gradient = {relgrad:.2g} <= {eps:.2g}' messages = { 'Algorithm': 'Inverse BFGS with line search', 'Relative gradient': relgrad, 'Cause of termination': message, 'Number of iterations': 0, 'Number of function evaluations': nfev, 'Number of gradient evaluations': ngev, } return xk, messages k = 0 nfev = 0 cont = True while cont: d = -Hinv @ g alpha, nfls = lineSearch(fct, xk, f, g, d) nfev += nfls delta = alpha * d xk = xk + delta gprev = g fct.setVariables(xk) f, g = fct.f_g() nfev += 1 ngev += 1 Hinv = inverseBfgs(Hinv, delta, g - gprev) nfev += 1 k += 1 relgrad = relativeGradient(xk, f, g, typx, typf) if relgrad <= eps: message = f'Relative gradient = {relgrad:.2g} <= {eps:.2g}' cont = False if k == maxiter: message = f'Maximum number of iterations reached: {maxiter}' cont = False logger.detailed( f'{k} f={f:10.7g} relgrad={relgrad:6.2g}' f' alpha={alpha:6.2g}' ) messages = { 'Algorithm': 'Inverse BFGS with line search', 'Relative gradient': relgrad, 'Cause of termination': message, 'Number of iterations': k, 'Number of function evaluations': nfev, 'Number of gradient evaluations': ngev, } return xk, messages
[docs]def bfgsTrustRegion( fct, x0, initBfgs=None, delta0=1.0, eps=np.finfo(np.float64).eps ** 0.3333, dl=False, maxiter=1000, eta1=0.01, eta2=0.9, ): """BFGS method with trust region :param fct: object to calculate the objective function and its derivatives. :type fct: optimization.functionToMinimize :param x0: starting point :type x0: numpy.array :param initBfgs: matrix used to initialize BFGS. If None, the identity matrix is used. Default: None. :type initBfgs: numpy.array :param delta0: initial radius of the trust region. Default: 100. :type delta0: float :param eps: the algorithm stops when this precision is reached. Default: :math:`\\varepsilon^{\\frac{1}{3}}` :type eps: float :param dl: If True, the Dogleg method is used to solve the trut region subproblem. If False, the truncated conjugate gradient is used. Default: False. :type dl: bool :param maxiter: the algorithm stops if this number of iterations is reached. Default: 1000. :type maxiter: int :param eta1: threshold for failed iterations. Default: 0.01. :type eta1: float :param eta2: threshold for very successful iterations. Default 0.9. :type eta2: float :return: tuple x, messages, where - x is the solution found, - messages is a dictionary reporting various aspects related to the run of the algorithm. :rtype: numpy.array, dict(str:object) :raises biogeme.exceptions.biogemeError: if the dimensions of the matrix initBfgs do not match the length of x0. """ k = 0 xk = x0 n = len(x0) fct.setVariables(xk) f, g = fct.f_g() nfev = 1 ngev = 1 if initBfgs is None: H = np.identity(n) else: if initBfgs.shape != (n, n): errorMsg = ( f'BFGS must be initialized with a {n}x{n} ' f'matrix and not a {initBfgs.shape[0]}' f'x{initBfgs.shape[1]} matrix.' ) raise excep.biogemeError(errorMsg) H = initBfgs typx = np.ones(np.asarray(xk).shape) typf = max(np.abs(f), 1.0) relgrad = relativeGradient(xk, f, g, typx, typf) if relgrad <= eps: message = f'Relative gradient = {relgrad:.2g} <= {eps:.2g}' messages = { 'Algorithm': 'BFGS with trust region', 'Relative gradient': relgrad, 'Cause of termination': message, 'Number of iterations': 0, 'Number of function evaluations': nfev, 'Number of gradient evaluations': ngev, } return xk, messages delta = delta0 cont = True maxDelta = np.finfo(float).max minDelta = np.finfo(float).eps rho = 0.0 while cont: k += 1 if dl: step, _ = dogleg(g, H, delta) else: step, _ = truncatedConjugateGradient(g, H, delta) xc = xk + step fct.setVariables(xc) # Calculate the value of the function fc = fct.f() nfev += 1 if fc >= f: delta = la.norm(step) / 2.0 status = '-' else: num = f - fc denom = -np.inner(step, g) - 0.5 * np.inner(step, H @ step) rho = num / denom if rho < eta1: # Failure: reduce the trust region delta = la.norm(step) / 2.0 status = '-' else: # Candidate accepted fc, gc = fct.f_g() nfev += 1 ngev += 1 d = xc - xk y = gc - g xk = xc f = fc g = gc H = bfgs(H, d, y) if rho >= eta2: # Enlarge the trust region delta = min(2 * delta, maxDelta) status = '++' else: status = '+' relgrad = relativeGradient(xk, f, g, typx, typf) if relgrad <= eps: message = f'Relative gradient = {relgrad:.2g} <= {eps:.2g}' cont = False if delta <= minDelta: message = f'Trust region is too small: {delta}' cont = False if k == maxiter: message = f'Maximum number of iterations reached: {maxiter}' cont = False logger.detailed( f'{k} f={f:10.7g} relgrad={relgrad:6.2g}' f' delta={delta:6.2g} ' f'rho={rho:6.2g} {status}' ) messages = { 'Algorithm': 'BFGS with trust region', 'Relative gradient': relgrad, 'Cause of termination': message, 'Number of iterations': k, 'Number of function evaluations': nfev, 'Number of gradient evaluations': ngev, } return xk, messages
[docs]def truncatedConjugateGradientSubspace( xk, gk, Hk, delta, bounds, infeasibleIterate=False, tol=np.finfo(np.float64).eps ** 0.3333, ): """Find an approximation of the solution of the trust region subproblem using the truncated conjugate gradient method within the subspace of free variables. Free variables are those corresponding to inactive constraints at the generalized Cauchy point. :param xk: current iterate. :type xk: numpy.array :param gk: gradient of the quadratic model. :type gk: numpy.array :param Hk: hessian of the quadrartic model. :type Hk: numpy.array :param delta: radius of the trust region. :type delta: float :param bounds: bounds on the variables. :type bounds: class bioBounds :param infeasibleIterate: if True, the algorithm may continue until termination. The result will then be projected on the feasible domain. If False, the algorithm stops as soon as an infeasible iterate is generated. Default: False. :type infeasibleIterate: bool :param tol: tolerance used for stopping criterion. :type tol: float :return: d, diagnostic, where - d is the approximate solution of the trust region subproblem, - diagnostic is the nature of the solution: * 1 for convergence, * 2 if out of the trust region, * 3 if negative curvature detected. * 4 if a numerical problem has been encountered :rtype: numpy.array, int :raises biogeme.exceptions.biogemeError: if the dimensions are inconsistent """ if np.isnan(xk).any(): raise excep.biogemeError(f'Invalid xk: {xk}') if np.isnan(gk).any(): raise excep.biogemeError(f'Invalid gk: {gk}') if np.isnan(Hk).any(): raise excep.biogemeError(f'Invalid Hk: {Hk}') # First, we calculate the intersection between the trust region on # the bounds. The trust region is also a bound constraint (based # on infinity norm) of radius 'delta', centered at xk. intersection = bounds.intersectionWithTrustRegion(xk, delta) # Then, we calculate the generalized Cauchy point gcp = intersection.generalizedCauchyPoint(xk, gk, Hk, -gk) x = gcp r = -gk - Hk @ (x - xk) etak = tol activityStatus = intersection.activity(gcp) freeVariables = activityStatus == 0 if not freeVariables.any(): return gcp, 1 xbar = x[freeVariables] rbar = r[freeVariables] # Extract the bounds for the free variables boundsBar = intersection.subspace(freeVariables) Bbar = Hk[freeVariables][:, freeVariables] pbar = np.zeros_like(rbar) rho1 = 1 rho2 = np.inner(rbar, rbar) while rho2 >= etak * etak: try: beta = rho2 / rho1 pbar = rbar + beta * pbar ybar = Bbar @ pbar if boundsBar.feasible(xbar): feasible = True alpha1, _ = boundsBar.maximumStep(xbar, pbar) else: feasible = False if np.inner(pbar, ybar) <= 0: # Negative curvature has been detected. if feasible: x[freeVariables] = xbar + alpha1 * pbar else: x[freeVariables] = xbar return bounds.project(x), 3 alpha2 = rho2 / np.inner(pbar, ybar) if not infeasibleIterate and alpha2 > alpha1: # Infeasible iterate x[freeVariables] = xbar + alpha1 * pbar return x, 2 xbar = xbar + alpha2 * pbar rbar = rbar - alpha2 * ybar rho1 = rho2 rho2 = np.inner(rbar, rbar) except ValueError: # Numerical problem detected. Return the current value of x logger.warning('Numerical problem in conjugate gradient algorithm') x[freeVariables] = xbar if infeasibleIterate: return bounds.project(x), 4 return x, 4 x[freeVariables] = xbar if infeasibleIterate: return bounds.project(x), 4 return x, 1
[docs]def simpleBoundsNewtonAlgorithm( fct, bounds, x0, proportionTrueHessian=1.0, infeasibleConjugateGradient=False, delta0=1.0, tol=np.finfo(np.float64).eps ** 0.3333, steptol=1.0e-5, cgtol=np.finfo(np.float64).eps ** 0.3333, maxiter=1000, eta1=0.01, eta2=0.9, enlargingFactor=10, ): """Trust region algorithm for problems with simple bounds :param fct: object to calculate the objective function and its derivatives. :type fct: optimization.functionToMinimize :param bounds: bounds on the variables :type bounds: class bounds :param x0: starting point :type x0: numpy.array :param proportionTrueHessian: proportion of the iterations where the true hessian is calculated. When not, the BFGS update is used. If 1.0, it is used for all iterations. If 0.0, it is not used at all. :type proportionTrueHessian: float :param infeasibleConjugateGradient: if True, the conjugate gradient algorithm may generate until termination. The result will then be projected on the feasible domain. If False, the algorithm stops as soon as an infeasible iterate is generated. Default: False. :type infeasibleConjugateGradient: bool :param delta0: initial radius of the trust region. Default: 100. :type delta0: float :param tol: the algorithm stops when this precision is reached. Default: :math:`\\varepsilon^{\\frac{1}{3}}` :type tol: float :param steptol: the algorithm stops when the relative change in x is below this threshold. Basically, if p significant digits of x are needed, steptol should be set to 1.0e-p. Default: :math:`10^{-5}` :type steptol: float :param cgtol: the conjugate gradient algorithm stops when this precision is reached. Default: :math:`\\varepsilon^{\\frac{1}{3}}` :type cgtol: float :param maxiter: the algorithm stops if this number of iterations is reached. Default: 1000. :type maxiter: int :param eta1: threshold for failed iterations. Default: 0.01. :type eta1: float :param eta2: threshold for very successful iterations. Default 0.9. :type eta2: float :param enlargingFactor: if an iteration is very successful, the radius of the trust region is multiplied by this factor. Default 10. :type enlargingFactor: float :return: x, messages - x is the solution generated by the algorithm, - messages is a dictionary describing information about the lagorithm :rtype: numpay.array, dict(str:object) :raises biogeme.exceptions.biogemeError: if the dimensions of the matrix initBfgs do not match the length of x0. """ if len(x0) != bounds.n: raise excep.biogemeError( f'Incompatible size:' f' {len(x0)} and {len(bounds)}' ) if not bounds.feasible(x0): logger.warning( 'Initial point not feasible. ' 'It will be projected onto the feasible domain.' ) numberOfTrueHessian = 0 numberOfMatrices = 0 if proportionTrueHessian == 1.0: algo = 'Newton with trust region for simple bound constraints' elif proportionTrueHessian == 0.0: algo = 'BFGS with trust region for simple bound constraints' else: algo = ( f'Hybrid Newton [{100*proportionTrueHessian}%] with trust ' f'region for simple bound constraints' ) k = 0 xk = bounds.project(x0) fct.setVariables(xk) nfev = 0 ngev = 0 nhev = 0 if proportionTrueHessian > 0: f, g, H = fct.f_g_h() nfev += 1 ngev += 1 nhev += 1 numberOfTrueHessian += 1 numberOfMatrices += 1 # If there is a numerical problem with the Hessian, we use BFGS instead if np.isnan(H).any() or np.linalg.norm(H) > 1.0e100: logger.warning( f'Numerical problem with the second derivative' f' matrix at the starting point. Norm = ' f'{np.linalg.norm(H)}. Replaced by the ' f'identity matrix.' ) H = np.eye(len(xk)) else: f, g = fct.f_g() nfev += 1 ngev += 1 H = np.eye(len(xk)) numberOfMatrices += 1 projectedGradient = bounds.project(xk - g) - xk typx = np.ones(np.asarray(xk).shape) typf = max(np.abs(f), 1.0) relchange = None relgrad = relativeGradient(xk, f, projectedGradient, typx, typf) if relgrad <= tol: message = f'Relative gradient = {relgrad:.2g} <= {tol:.2g}' messages = { 'Algorithm': algo, 'Relative projected gradient': relgrad, 'Number of iterations': 0, 'Number of function evaluations': nfev, 'Number of gradient evaluations': ngev, 'Number of hessian evaluations': nhev, 'Cause of termination': message, } return xk, messages delta = delta0 cont = True maxDelta = np.finfo(float).max minDelta = np.finfo(float).eps rho = 0.0 while cont: k += 1 # Solve the quandratic problem in the subspace defined by the GCP xc, _ = truncatedConjugateGradientSubspace( xk, g, H, delta, bounds, infeasibleConjugateGradient, cgtol ) if np.isnan(xc).any(): delta = delta / 2.0 status = '-' else: failed = False try: fct.setVariables(xc) fc = fct.f() nfev += 1 num = f - fc step = xc - xk denom = -np.inner(step, g) - 0.5 * np.inner(step, H @ step) rho = num / denom failed = rho < eta1 except RuntimeError as e: raise e logger.debug(xc) failed = True if failed: # Failure: reduce the trust region delta = min(delta / 2.0, la.norm(step, np.inf) / 2.0) status = '-' else: # Candidate accepted if ( proportionTrueHessian > 0 and float(numberOfTrueHessian) / float(numberOfMatrices) <= proportionTrueHessian ): try: fc, gc, Hc = fct.f_g_h() nfev += 1 ngev += 1 nhev += 1 numberOfTrueHessian += 1 numberOfMatrices += 1 # If there is a numerical problem with the # Hessian, we apply BFGS if np.linalg.norm(Hc) > 1.0e100: logger.warning( f'Numerical problem with the second ' f'derivative matrix. ' f'Norm = {np.linalg.norm(Hc)}. ' f'Replaced by the identity matrix.' ) y = gc - g Hc = bfgs(H, step, y) fghCalculated = True except excep.biogemeError: # Failure: reduce the trust region delta = min(delta / 2.0, la.norm(step, np.inf) / 2.0) status = '-' fghCalculated = False else: try: fc, gc = fct.f_g() nfev += 1 ngev += 1 y = gc - g Hc = bfgs(H, step, y) numberOfMatrices += 1 fghCalculated = True except excep.biogemeError: # Failure: reduce the trust region delta = min(delta / 2.0, la.norm(step, np.inf) / 2.0) status = '-' fghCalculated = False if fghCalculated: nfev += 1 xpred = xk xk = xc f = fc g = gc H = Hc if rho >= eta2: # Enlarge the trust region delta = min(enlargingFactor * delta, maxDelta) status = '++' else: status = '+' projectedGradient = bounds.project(xk - g) - xk relgrad = relativeGradient( xk, f, projectedGradient, typx, typf ) if relgrad <= tol: message = ( f'Relative gradient = {relgrad:.2g} <= {tol:.2g}' ) cont = False relchange = relativeChange(xk, xpred, typx) if relchange <= steptol: message = ( f'Relative change = ' f'{relchange:.3g} <= {steptol:.2g}' ) cont = False if delta <= minDelta: message = f'Trust region is too small: {delta}' cont = False if k == maxiter: message = f'Maximum number of iterations reached: {maxiter}' cont = False if relchange is None: logger.detailed( f'{k} f={f:10.7g} projected ' f'rel. grad.={relgrad:6.2g} ' f'delta={delta:6.2g} rho={rho:6.2g} {status}' ) else: logger.detailed( f'{k} f={f:10.7g} projected ' f'rel. grad.={relgrad:6.2g} ' f'rel. change={relchange:6.2g} ' f'delta={delta:6.2g} rho={rho:6.2g} {status}' ) if numberOfMatrices != 0: actualProp = 100 * float(numberOfTrueHessian) / float(numberOfMatrices) else: actualProp = 0 m = f'Proportion of Hessian calculation: ' f'{actualProp}%' logger.detailed(m) messages = { 'Algorithm': algo, 'Proportion analytical hessian': f'{actualProp:.1f}%', 'Relative projected gradient': relgrad, 'Relative change': relchange, 'Number of iterations': k, 'Number of function evaluations': nfev, 'Number of gradient evaluations': ngev, 'Number of hessian evaluations': nhev, 'Cause of termination': message, } return xk, messages
[docs]def relativeChange(x, xpred, typx): """Calculates the relative change. It is typically used as stopping criterion. :param x: current iterate. :type x: numpy.array :param xpred: previous iterate. :type xpred: numpy.array :param typx: typical value for x. :type typx: numpy.array :return: relative change: .. math:: \\max_i \\frac{|(x_k)_i - (x_{k-1})_i|}{\\max(|(x_k)_i|, \\text{typx}_i)} :rtype: float """ relx = np.array( [abs(x[i] - xpred[i]) / max(abs(x[i]), typx[i]) for i in range(len(x))] ) result = relx.max() if np.isfinite(result): return result return np.finfo(float).max