"""Trust-region optimization.""" import math import numpy as np import scipy.linalg from .optimize import (_check_unknown_options, _wrap_function, _status_message, OptimizeResult, _prepare_scalar_function) from scipy.optimize._hessian_update_strategy import HessianUpdateStrategy from scipy.optimize._differentiable_functions import FD_METHODS __all__ = [] class BaseQuadraticSubproblem: """ Base/abstract class defining the quadratic model for trust-region minimization. Child classes must implement the ``solve`` method. Values of the objective function, Jacobian and Hessian (if provided) at the current iterate ``x`` are evaluated on demand and then stored as attributes ``fun``, ``jac``, ``hess``. """ def __init__(self, x, fun, jac, hess=None, hessp=None): self._x = x self._f = None self._g = None self._h = None self._g_mag = None self._cauchy_point = None self._newton_point = None self._fun = fun self._jac = jac self._hess = hess self._hessp = hessp def __call__(self, p): return self.fun + np.dot(self.jac, p) + 0.5 * np.dot(p, self.hessp(p)) @property def fun(self): """Value of objective function at current iteration.""" if self._f is None: self._f = self._fun(self._x) return self._f @property def jac(self): """Value of Jacobian of objective function at current iteration.""" if self._g is None: self._g = self._jac(self._x) return self._g @property def hess(self): """Value of Hessian of objective function at current iteration.""" if self._h is None: self._h = self._hess(self._x) return self._h def hessp(self, p): if self._hessp is not None: return self._hessp(self._x, p) else: return np.dot(self.hess, p) @property def jac_mag(self): """Magnitude of jacobian of objective function at current iteration.""" if self._g_mag is None: self._g_mag = scipy.linalg.norm(self.jac) return self._g_mag def get_boundaries_intersections(self, z, d, trust_radius): """ Solve the scalar quadratic equation ||z + t d|| == trust_radius. This is like a line-sphere intersection. Return the two values of t, sorted from low to high. """ a = np.dot(d, d) b = 2 * np.dot(z, d) c = np.dot(z, z) - trust_radius**2 sqrt_discriminant = math.sqrt(b*b - 4*a*c) # The following calculation is mathematically # equivalent to: # ta = (-b - sqrt_discriminant) / (2*a) # tb = (-b + sqrt_discriminant) / (2*a) # but produce smaller round off errors. # Look at Matrix Computation p.97 # for a better justification. aux = b + math.copysign(sqrt_discriminant, b) ta = -aux / (2*a) tb = -2*c / aux return sorted([ta, tb]) def solve(self, trust_radius): raise NotImplementedError('The solve method should be implemented by ' 'the child class') def _minimize_trust_region(fun, x0, args=(), jac=None, hess=None, hessp=None, subproblem=None, initial_trust_radius=1.0, max_trust_radius=1000.0, eta=0.15, gtol=1e-4, maxiter=None, disp=False, return_all=False, callback=None, inexact=True, **unknown_options): """ Minimization of scalar function of one or more variables using a trust-region algorithm. Options for the trust-region algorithm are: initial_trust_radius : float Initial trust radius. max_trust_radius : float Never propose steps that are longer than this value. eta : float Trust region related acceptance stringency for proposed steps. gtol : float Gradient norm must be less than `gtol` before successful termination. maxiter : int Maximum number of iterations to perform. disp : bool If True, print convergence message. inexact : bool Accuracy to solve subproblems. If True requires less nonlinear iterations, but more vector products. Only effective for method trust-krylov. This function is called by the `minimize` function. It is not supposed to be called directly. """ _check_unknown_options(unknown_options) if jac is None: raise ValueError('Jacobian is currently required for trust-region ' 'methods') if hess is None and hessp is None: raise ValueError('Either the Hessian or the Hessian-vector product ' 'is currently required for trust-region methods') if subproblem is None: raise ValueError('A subproblem solving strategy is required for ' 'trust-region methods') if not (0 <= eta < 0.25): raise Exception('invalid acceptance stringency') if max_trust_radius <= 0: raise Exception('the max trust radius must be positive') if initial_trust_radius <= 0: raise ValueError('the initial trust radius must be positive') if initial_trust_radius >= max_trust_radius: raise ValueError('the initial trust radius must be less than the ' 'max trust radius') # force the initial guess into a nice format x0 = np.asarray(x0).flatten() # A ScalarFunction representing the problem. This caches calls to fun, jac, # hess. sf = _prepare_scalar_function(fun, x0, jac=jac, hess=hess, args=args) fun = sf.fun jac = sf.grad if callable(hess): hess = sf.hess elif callable(hessp): # this elif statement must come before examining whether hess # is estimated by FD methods or a HessianUpdateStrategy pass elif (hess in FD_METHODS or isinstance(hess, HessianUpdateStrategy)): # If the Hessian is being estimated by finite differences or a # Hessian update strategy then ScalarFunction.hess returns a # LinearOperator or a HessianUpdateStrategy. This enables the # calculation/creation of a hessp. BUT you only want to do this # if the user *hasn't* provided a callable(hessp) function. hess = None def hessp(x, p, *args): return sf.hess(x).dot(p) else: raise ValueError('Either the Hessian or the Hessian-vector product ' 'is currently required for trust-region methods') # ScalarFunction doesn't represent hessp nhessp, hessp = _wrap_function(hessp, args) # limit the number of iterations if maxiter is None: maxiter = len(x0)*200 # init the search status warnflag = 0 # initialize the search trust_radius = initial_trust_radius x = x0 if return_all: allvecs = [x] m = subproblem(x, fun, jac, hess, hessp) k = 0 # search for the function min # do not even start if the gradient is small enough while m.jac_mag >= gtol: # Solve the sub-problem. # This gives us the proposed step relative to the current position # and it tells us whether the proposed step # has reached the trust region boundary or not. try: p, hits_boundary = m.solve(trust_radius) except np.linalg.linalg.LinAlgError: warnflag = 3 break # calculate the predicted value at the proposed point predicted_value = m(p) # define the local approximation at the proposed point x_proposed = x + p m_proposed = subproblem(x_proposed, fun, jac, hess, hessp) # evaluate the ratio defined in equation (4.4) actual_reduction = m.fun - m_proposed.fun predicted_reduction = m.fun - predicted_value if predicted_reduction <= 0: warnflag = 2 break rho = actual_reduction / predicted_reduction # update the trust radius according to the actual/predicted ratio if rho < 0.25: trust_radius *= 0.25 elif rho > 0.75 and hits_boundary: trust_radius = min(2*trust_radius, max_trust_radius) # if the ratio is high enough then accept the proposed step if rho > eta: x = x_proposed m = m_proposed # append the best guess, call back, increment the iteration count if return_all: allvecs.append(np.copy(x)) if callback is not None: callback(np.copy(x)) k += 1 # check if the gradient is small enough to stop if m.jac_mag < gtol: warnflag = 0 break # check if we have looked at enough iterations if k >= maxiter: warnflag = 1 break # print some stuff if requested status_messages = ( _status_message['success'], _status_message['maxiter'], 'A bad approximation caused failure to predict improvement.', 'A linalg error occurred, such as a non-psd Hessian.', ) if disp: if warnflag == 0: print(status_messages[warnflag]) else: print('Warning: ' + status_messages[warnflag]) print(" Current function value: %f" % m.fun) print(" Iterations: %d" % k) print(" Function evaluations: %d" % sf.nfev) print(" Gradient evaluations: %d" % sf.ngev) print(" Hessian evaluations: %d" % (sf.nhev + nhessp[0])) result = OptimizeResult(x=x, success=(warnflag == 0), status=warnflag, fun=m.fun, jac=m.jac, nfev=sf.nfev, njev=sf.ngev, nhev=sf.nhev + nhessp[0], nit=k, message=status_messages[warnflag]) if hess is not None: result['hess'] = m.hess if return_all: result['allvecs'] = allvecs return result