some new features
This commit is contained in:
896
.venv/lib/python3.12/site-packages/scipy/optimize/_linesearch.py
Normal file
896
.venv/lib/python3.12/site-packages/scipy/optimize/_linesearch.py
Normal file
@ -0,0 +1,896 @@
|
||||
"""
|
||||
Functions
|
||||
---------
|
||||
.. autosummary::
|
||||
:toctree: generated/
|
||||
|
||||
line_search_armijo
|
||||
line_search_wolfe1
|
||||
line_search_wolfe2
|
||||
scalar_search_wolfe1
|
||||
scalar_search_wolfe2
|
||||
|
||||
"""
|
||||
from warnings import warn
|
||||
|
||||
from ._dcsrch import DCSRCH
|
||||
import numpy as np
|
||||
|
||||
__all__ = ['LineSearchWarning', 'line_search_wolfe1', 'line_search_wolfe2',
|
||||
'scalar_search_wolfe1', 'scalar_search_wolfe2',
|
||||
'line_search_armijo']
|
||||
|
||||
class LineSearchWarning(RuntimeWarning):
|
||||
pass
|
||||
|
||||
|
||||
def _check_c1_c2(c1, c2):
|
||||
if not (0 < c1 < c2 < 1):
|
||||
raise ValueError("'c1' and 'c2' do not satisfy"
|
||||
"'0 < c1 < c2 < 1'.")
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Minpack's Wolfe line and scalar searches
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
def line_search_wolfe1(f, fprime, xk, pk, gfk=None,
|
||||
old_fval=None, old_old_fval=None,
|
||||
args=(), c1=1e-4, c2=0.9, amax=50, amin=1e-8,
|
||||
xtol=1e-14):
|
||||
"""
|
||||
As `scalar_search_wolfe1` but do a line search to direction `pk`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
f : callable
|
||||
Function `f(x)`
|
||||
fprime : callable
|
||||
Gradient of `f`
|
||||
xk : array_like
|
||||
Current point
|
||||
pk : array_like
|
||||
Search direction
|
||||
gfk : array_like, optional
|
||||
Gradient of `f` at point `xk`
|
||||
old_fval : float, optional
|
||||
Value of `f` at point `xk`
|
||||
old_old_fval : float, optional
|
||||
Value of `f` at point preceding `xk`
|
||||
|
||||
The rest of the parameters are the same as for `scalar_search_wolfe1`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
stp, f_count, g_count, fval, old_fval
|
||||
As in `line_search_wolfe1`
|
||||
gval : array
|
||||
Gradient of `f` at the final point
|
||||
|
||||
Notes
|
||||
-----
|
||||
Parameters `c1` and `c2` must satisfy ``0 < c1 < c2 < 1``.
|
||||
|
||||
"""
|
||||
if gfk is None:
|
||||
gfk = fprime(xk, *args)
|
||||
|
||||
gval = [gfk]
|
||||
gc = [0]
|
||||
fc = [0]
|
||||
|
||||
def phi(s):
|
||||
fc[0] += 1
|
||||
return f(xk + s*pk, *args)
|
||||
|
||||
def derphi(s):
|
||||
gval[0] = fprime(xk + s*pk, *args)
|
||||
gc[0] += 1
|
||||
return np.dot(gval[0], pk)
|
||||
|
||||
derphi0 = np.dot(gfk, pk)
|
||||
|
||||
stp, fval, old_fval = scalar_search_wolfe1(
|
||||
phi, derphi, old_fval, old_old_fval, derphi0,
|
||||
c1=c1, c2=c2, amax=amax, amin=amin, xtol=xtol)
|
||||
|
||||
return stp, fc[0], gc[0], fval, old_fval, gval[0]
|
||||
|
||||
|
||||
def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None,
|
||||
c1=1e-4, c2=0.9,
|
||||
amax=50, amin=1e-8, xtol=1e-14):
|
||||
"""
|
||||
Scalar function search for alpha that satisfies strong Wolfe conditions
|
||||
|
||||
alpha > 0 is assumed to be a descent direction.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
phi : callable phi(alpha)
|
||||
Function at point `alpha`
|
||||
derphi : callable phi'(alpha)
|
||||
Objective function derivative. Returns a scalar.
|
||||
phi0 : float, optional
|
||||
Value of phi at 0
|
||||
old_phi0 : float, optional
|
||||
Value of phi at previous point
|
||||
derphi0 : float, optional
|
||||
Value derphi at 0
|
||||
c1 : float, optional
|
||||
Parameter for Armijo condition rule.
|
||||
c2 : float, optional
|
||||
Parameter for curvature condition rule.
|
||||
amax, amin : float, optional
|
||||
Maximum and minimum step size
|
||||
xtol : float, optional
|
||||
Relative tolerance for an acceptable step.
|
||||
|
||||
Returns
|
||||
-------
|
||||
alpha : float
|
||||
Step size, or None if no suitable step was found
|
||||
phi : float
|
||||
Value of `phi` at the new point `alpha`
|
||||
phi0 : float
|
||||
Value of `phi` at `alpha=0`
|
||||
|
||||
Notes
|
||||
-----
|
||||
Uses routine DCSRCH from MINPACK.
|
||||
|
||||
Parameters `c1` and `c2` must satisfy ``0 < c1 < c2 < 1`` as described in [1]_.
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
.. [1] Nocedal, J., & Wright, S. J. (2006). Numerical optimization.
|
||||
In Springer Series in Operations Research and Financial Engineering.
|
||||
(Springer Series in Operations Research and Financial Engineering).
|
||||
Springer Nature.
|
||||
|
||||
"""
|
||||
_check_c1_c2(c1, c2)
|
||||
|
||||
if phi0 is None:
|
||||
phi0 = phi(0.)
|
||||
if derphi0 is None:
|
||||
derphi0 = derphi(0.)
|
||||
|
||||
if old_phi0 is not None and derphi0 != 0:
|
||||
alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
|
||||
if alpha1 < 0:
|
||||
alpha1 = 1.0
|
||||
else:
|
||||
alpha1 = 1.0
|
||||
|
||||
maxiter = 100
|
||||
|
||||
dcsrch = DCSRCH(phi, derphi, c1, c2, xtol, amin, amax)
|
||||
stp, phi1, phi0, task = dcsrch(
|
||||
alpha1, phi0=phi0, derphi0=derphi0, maxiter=maxiter
|
||||
)
|
||||
|
||||
return stp, phi1, phi0
|
||||
|
||||
|
||||
line_search = line_search_wolfe1
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Pure-Python Wolfe line and scalar searches
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
# Note: `line_search_wolfe2` is the public `scipy.optimize.line_search`
|
||||
|
||||
def line_search_wolfe2(f, myfprime, xk, pk, gfk=None, old_fval=None,
|
||||
old_old_fval=None, args=(), c1=1e-4, c2=0.9, amax=None,
|
||||
extra_condition=None, maxiter=10):
|
||||
"""Find alpha that satisfies strong Wolfe conditions.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
f : callable f(x,*args)
|
||||
Objective function.
|
||||
myfprime : callable f'(x,*args)
|
||||
Objective function gradient.
|
||||
xk : ndarray
|
||||
Starting point.
|
||||
pk : ndarray
|
||||
Search direction. The search direction must be a descent direction
|
||||
for the algorithm to converge.
|
||||
gfk : ndarray, optional
|
||||
Gradient value for x=xk (xk being the current parameter
|
||||
estimate). Will be recomputed if omitted.
|
||||
old_fval : float, optional
|
||||
Function value for x=xk. Will be recomputed if omitted.
|
||||
old_old_fval : float, optional
|
||||
Function value for the point preceding x=xk.
|
||||
args : tuple, optional
|
||||
Additional arguments passed to objective function.
|
||||
c1 : float, optional
|
||||
Parameter for Armijo condition rule.
|
||||
c2 : float, optional
|
||||
Parameter for curvature condition rule.
|
||||
amax : float, optional
|
||||
Maximum step size
|
||||
extra_condition : callable, optional
|
||||
A callable of the form ``extra_condition(alpha, x, f, g)``
|
||||
returning a boolean. Arguments are the proposed step ``alpha``
|
||||
and the corresponding ``x``, ``f`` and ``g`` values. The line search
|
||||
accepts the value of ``alpha`` only if this
|
||||
callable returns ``True``. If the callable returns ``False``
|
||||
for the step length, the algorithm will continue with
|
||||
new iterates. The callable is only called for iterates
|
||||
satisfying the strong Wolfe conditions.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations to perform.
|
||||
|
||||
Returns
|
||||
-------
|
||||
alpha : float or None
|
||||
Alpha for which ``x_new = x0 + alpha * pk``,
|
||||
or None if the line search algorithm did not converge.
|
||||
fc : int
|
||||
Number of function evaluations made.
|
||||
gc : int
|
||||
Number of gradient evaluations made.
|
||||
new_fval : float or None
|
||||
New function value ``f(x_new)=f(x0+alpha*pk)``,
|
||||
or None if the line search algorithm did not converge.
|
||||
old_fval : float
|
||||
Old function value ``f(x0)``.
|
||||
new_slope : float or None
|
||||
The local slope along the search direction at the
|
||||
new value ``<myfprime(x_new), pk>``,
|
||||
or None if the line search algorithm did not converge.
|
||||
|
||||
|
||||
Notes
|
||||
-----
|
||||
Uses the line search algorithm to enforce strong Wolfe
|
||||
conditions. See Wright and Nocedal, 'Numerical Optimization',
|
||||
1999, pp. 59-61.
|
||||
|
||||
The search direction `pk` must be a descent direction (e.g.
|
||||
``-myfprime(xk)``) to find a step length that satisfies the strong Wolfe
|
||||
conditions. If the search direction is not a descent direction (e.g.
|
||||
``myfprime(xk)``), then `alpha`, `new_fval`, and `new_slope` will be None.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> from scipy.optimize import line_search
|
||||
|
||||
A objective function and its gradient are defined.
|
||||
|
||||
>>> def obj_func(x):
|
||||
... return (x[0])**2+(x[1])**2
|
||||
>>> def obj_grad(x):
|
||||
... return [2*x[0], 2*x[1]]
|
||||
|
||||
We can find alpha that satisfies strong Wolfe conditions.
|
||||
|
||||
>>> start_point = np.array([1.8, 1.7])
|
||||
>>> search_gradient = np.array([-1.0, -1.0])
|
||||
>>> line_search(obj_func, obj_grad, start_point, search_gradient)
|
||||
(1.0, 2, 1, 1.1300000000000001, 6.13, [1.6, 1.4])
|
||||
|
||||
"""
|
||||
fc = [0]
|
||||
gc = [0]
|
||||
gval = [None]
|
||||
gval_alpha = [None]
|
||||
|
||||
def phi(alpha):
|
||||
fc[0] += 1
|
||||
return f(xk + alpha * pk, *args)
|
||||
|
||||
fprime = myfprime
|
||||
|
||||
def derphi(alpha):
|
||||
gc[0] += 1
|
||||
gval[0] = fprime(xk + alpha * pk, *args) # store for later use
|
||||
gval_alpha[0] = alpha
|
||||
return np.dot(gval[0], pk)
|
||||
|
||||
if gfk is None:
|
||||
gfk = fprime(xk, *args)
|
||||
derphi0 = np.dot(gfk, pk)
|
||||
|
||||
if extra_condition is not None:
|
||||
# Add the current gradient as argument, to avoid needless
|
||||
# re-evaluation
|
||||
def extra_condition2(alpha, phi):
|
||||
if gval_alpha[0] != alpha:
|
||||
derphi(alpha)
|
||||
x = xk + alpha * pk
|
||||
return extra_condition(alpha, x, phi, gval[0])
|
||||
else:
|
||||
extra_condition2 = None
|
||||
|
||||
alpha_star, phi_star, old_fval, derphi_star = scalar_search_wolfe2(
|
||||
phi, derphi, old_fval, old_old_fval, derphi0, c1, c2, amax,
|
||||
extra_condition2, maxiter=maxiter)
|
||||
|
||||
if derphi_star is None:
|
||||
warn('The line search algorithm did not converge',
|
||||
LineSearchWarning, stacklevel=2)
|
||||
else:
|
||||
# derphi_star is a number (derphi) -- so use the most recently
|
||||
# calculated gradient used in computing it derphi = gfk*pk
|
||||
# this is the gradient at the next step no need to compute it
|
||||
# again in the outer loop.
|
||||
derphi_star = gval[0]
|
||||
|
||||
return alpha_star, fc[0], gc[0], phi_star, old_fval, derphi_star
|
||||
|
||||
|
||||
def scalar_search_wolfe2(phi, derphi, phi0=None,
|
||||
old_phi0=None, derphi0=None,
|
||||
c1=1e-4, c2=0.9, amax=None,
|
||||
extra_condition=None, maxiter=10):
|
||||
"""Find alpha that satisfies strong Wolfe conditions.
|
||||
|
||||
alpha > 0 is assumed to be a descent direction.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
phi : callable phi(alpha)
|
||||
Objective scalar function.
|
||||
derphi : callable phi'(alpha)
|
||||
Objective function derivative. Returns a scalar.
|
||||
phi0 : float, optional
|
||||
Value of phi at 0.
|
||||
old_phi0 : float, optional
|
||||
Value of phi at previous point.
|
||||
derphi0 : float, optional
|
||||
Value of derphi at 0
|
||||
c1 : float, optional
|
||||
Parameter for Armijo condition rule.
|
||||
c2 : float, optional
|
||||
Parameter for curvature condition rule.
|
||||
amax : float, optional
|
||||
Maximum step size.
|
||||
extra_condition : callable, optional
|
||||
A callable of the form ``extra_condition(alpha, phi_value)``
|
||||
returning a boolean. The line search accepts the value
|
||||
of ``alpha`` only if this callable returns ``True``.
|
||||
If the callable returns ``False`` for the step length,
|
||||
the algorithm will continue with new iterates.
|
||||
The callable is only called for iterates satisfying
|
||||
the strong Wolfe conditions.
|
||||
maxiter : int, optional
|
||||
Maximum number of iterations to perform.
|
||||
|
||||
Returns
|
||||
-------
|
||||
alpha_star : float or None
|
||||
Best alpha, or None if the line search algorithm did not converge.
|
||||
phi_star : float
|
||||
phi at alpha_star.
|
||||
phi0 : float
|
||||
phi at 0.
|
||||
derphi_star : float or None
|
||||
derphi at alpha_star, or None if the line search algorithm
|
||||
did not converge.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Uses the line search algorithm to enforce strong Wolfe
|
||||
conditions. See Wright and Nocedal, 'Numerical Optimization',
|
||||
1999, pp. 59-61.
|
||||
|
||||
"""
|
||||
_check_c1_c2(c1, c2)
|
||||
|
||||
if phi0 is None:
|
||||
phi0 = phi(0.)
|
||||
|
||||
if derphi0 is None:
|
||||
derphi0 = derphi(0.)
|
||||
|
||||
alpha0 = 0
|
||||
if old_phi0 is not None and derphi0 != 0:
|
||||
alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0)
|
||||
else:
|
||||
alpha1 = 1.0
|
||||
|
||||
if alpha1 < 0:
|
||||
alpha1 = 1.0
|
||||
|
||||
if amax is not None:
|
||||
alpha1 = min(alpha1, amax)
|
||||
|
||||
phi_a1 = phi(alpha1)
|
||||
#derphi_a1 = derphi(alpha1) evaluated below
|
||||
|
||||
phi_a0 = phi0
|
||||
derphi_a0 = derphi0
|
||||
|
||||
if extra_condition is None:
|
||||
def extra_condition(alpha, phi):
|
||||
return True
|
||||
|
||||
for i in range(maxiter):
|
||||
if alpha1 == 0 or (amax is not None and alpha0 > amax):
|
||||
# alpha1 == 0: This shouldn't happen. Perhaps the increment has
|
||||
# slipped below machine precision?
|
||||
alpha_star = None
|
||||
phi_star = phi0
|
||||
phi0 = old_phi0
|
||||
derphi_star = None
|
||||
|
||||
if alpha1 == 0:
|
||||
msg = 'Rounding errors prevent the line search from converging'
|
||||
else:
|
||||
msg = "The line search algorithm could not find a solution " + \
|
||||
"less than or equal to amax: %s" % amax
|
||||
|
||||
warn(msg, LineSearchWarning, stacklevel=2)
|
||||
break
|
||||
|
||||
not_first_iteration = i > 0
|
||||
if (phi_a1 > phi0 + c1 * alpha1 * derphi0) or \
|
||||
((phi_a1 >= phi_a0) and not_first_iteration):
|
||||
alpha_star, phi_star, derphi_star = \
|
||||
_zoom(alpha0, alpha1, phi_a0,
|
||||
phi_a1, derphi_a0, phi, derphi,
|
||||
phi0, derphi0, c1, c2, extra_condition)
|
||||
break
|
||||
|
||||
derphi_a1 = derphi(alpha1)
|
||||
if (abs(derphi_a1) <= -c2*derphi0):
|
||||
if extra_condition(alpha1, phi_a1):
|
||||
alpha_star = alpha1
|
||||
phi_star = phi_a1
|
||||
derphi_star = derphi_a1
|
||||
break
|
||||
|
||||
if (derphi_a1 >= 0):
|
||||
alpha_star, phi_star, derphi_star = \
|
||||
_zoom(alpha1, alpha0, phi_a1,
|
||||
phi_a0, derphi_a1, phi, derphi,
|
||||
phi0, derphi0, c1, c2, extra_condition)
|
||||
break
|
||||
|
||||
alpha2 = 2 * alpha1 # increase by factor of two on each iteration
|
||||
if amax is not None:
|
||||
alpha2 = min(alpha2, amax)
|
||||
alpha0 = alpha1
|
||||
alpha1 = alpha2
|
||||
phi_a0 = phi_a1
|
||||
phi_a1 = phi(alpha1)
|
||||
derphi_a0 = derphi_a1
|
||||
|
||||
else:
|
||||
# stopping test maxiter reached
|
||||
alpha_star = alpha1
|
||||
phi_star = phi_a1
|
||||
derphi_star = None
|
||||
warn('The line search algorithm did not converge',
|
||||
LineSearchWarning, stacklevel=2)
|
||||
|
||||
return alpha_star, phi_star, phi0, derphi_star
|
||||
|
||||
|
||||
def _cubicmin(a, fa, fpa, b, fb, c, fc):
|
||||
"""
|
||||
Finds the minimizer for a cubic polynomial that goes through the
|
||||
points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa.
|
||||
|
||||
If no minimizer can be found, return None.
|
||||
|
||||
"""
|
||||
# f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
|
||||
|
||||
with np.errstate(divide='raise', over='raise', invalid='raise'):
|
||||
try:
|
||||
C = fpa
|
||||
db = b - a
|
||||
dc = c - a
|
||||
denom = (db * dc) ** 2 * (db - dc)
|
||||
d1 = np.empty((2, 2))
|
||||
d1[0, 0] = dc ** 2
|
||||
d1[0, 1] = -db ** 2
|
||||
d1[1, 0] = -dc ** 3
|
||||
d1[1, 1] = db ** 3
|
||||
[A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
|
||||
fc - fa - C * dc]).flatten())
|
||||
A /= denom
|
||||
B /= denom
|
||||
radical = B * B - 3 * A * C
|
||||
xmin = a + (-B + np.sqrt(radical)) / (3 * A)
|
||||
except ArithmeticError:
|
||||
return None
|
||||
if not np.isfinite(xmin):
|
||||
return None
|
||||
return xmin
|
||||
|
||||
|
||||
def _quadmin(a, fa, fpa, b, fb):
|
||||
"""
|
||||
Finds the minimizer for a quadratic polynomial that goes through
|
||||
the points (a,fa), (b,fb) with derivative at a of fpa.
|
||||
|
||||
"""
|
||||
# f(x) = B*(x-a)^2 + C*(x-a) + D
|
||||
with np.errstate(divide='raise', over='raise', invalid='raise'):
|
||||
try:
|
||||
D = fa
|
||||
C = fpa
|
||||
db = b - a * 1.0
|
||||
B = (fb - D - C * db) / (db * db)
|
||||
xmin = a - C / (2.0 * B)
|
||||
except ArithmeticError:
|
||||
return None
|
||||
if not np.isfinite(xmin):
|
||||
return None
|
||||
return xmin
|
||||
|
||||
|
||||
def _zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo,
|
||||
phi, derphi, phi0, derphi0, c1, c2, extra_condition):
|
||||
"""Zoom stage of approximate linesearch satisfying strong Wolfe conditions.
|
||||
|
||||
Part of the optimization algorithm in `scalar_search_wolfe2`.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Implements Algorithm 3.6 (zoom) in Wright and Nocedal,
|
||||
'Numerical Optimization', 1999, pp. 61.
|
||||
|
||||
"""
|
||||
|
||||
maxiter = 10
|
||||
i = 0
|
||||
delta1 = 0.2 # cubic interpolant check
|
||||
delta2 = 0.1 # quadratic interpolant check
|
||||
phi_rec = phi0
|
||||
a_rec = 0
|
||||
while True:
|
||||
# interpolate to find a trial step length between a_lo and
|
||||
# a_hi Need to choose interpolation here. Use cubic
|
||||
# interpolation and then if the result is within delta *
|
||||
# dalpha or outside of the interval bounded by a_lo or a_hi
|
||||
# then use quadratic interpolation, if the result is still too
|
||||
# close, then use bisection
|
||||
|
||||
dalpha = a_hi - a_lo
|
||||
if dalpha < 0:
|
||||
a, b = a_hi, a_lo
|
||||
else:
|
||||
a, b = a_lo, a_hi
|
||||
|
||||
# minimizer of cubic interpolant
|
||||
# (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
|
||||
#
|
||||
# if the result is too close to the end points (or out of the
|
||||
# interval), then use quadratic interpolation with phi_lo,
|
||||
# derphi_lo and phi_hi if the result is still too close to the
|
||||
# end points (or out of the interval) then use bisection
|
||||
|
||||
if (i > 0):
|
||||
cchk = delta1 * dalpha
|
||||
a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi,
|
||||
a_rec, phi_rec)
|
||||
if (i == 0) or (a_j is None) or (a_j > b - cchk) or (a_j < a + cchk):
|
||||
qchk = delta2 * dalpha
|
||||
a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
|
||||
if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk):
|
||||
a_j = a_lo + 0.5*dalpha
|
||||
|
||||
# Check new value of a_j
|
||||
|
||||
phi_aj = phi(a_j)
|
||||
if (phi_aj > phi0 + c1*a_j*derphi0) or (phi_aj >= phi_lo):
|
||||
phi_rec = phi_hi
|
||||
a_rec = a_hi
|
||||
a_hi = a_j
|
||||
phi_hi = phi_aj
|
||||
else:
|
||||
derphi_aj = derphi(a_j)
|
||||
if abs(derphi_aj) <= -c2*derphi0 and extra_condition(a_j, phi_aj):
|
||||
a_star = a_j
|
||||
val_star = phi_aj
|
||||
valprime_star = derphi_aj
|
||||
break
|
||||
if derphi_aj*(a_hi - a_lo) >= 0:
|
||||
phi_rec = phi_hi
|
||||
a_rec = a_hi
|
||||
a_hi = a_lo
|
||||
phi_hi = phi_lo
|
||||
else:
|
||||
phi_rec = phi_lo
|
||||
a_rec = a_lo
|
||||
a_lo = a_j
|
||||
phi_lo = phi_aj
|
||||
derphi_lo = derphi_aj
|
||||
i += 1
|
||||
if (i > maxiter):
|
||||
# Failed to find a conforming step size
|
||||
a_star = None
|
||||
val_star = None
|
||||
valprime_star = None
|
||||
break
|
||||
return a_star, val_star, valprime_star
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Armijo line and scalar searches
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
def line_search_armijo(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
|
||||
"""Minimize over alpha, the function ``f(xk+alpha pk)``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
f : callable
|
||||
Function to be minimized.
|
||||
xk : array_like
|
||||
Current point.
|
||||
pk : array_like
|
||||
Search direction.
|
||||
gfk : array_like
|
||||
Gradient of `f` at point `xk`.
|
||||
old_fval : float
|
||||
Value of `f` at point `xk`.
|
||||
args : tuple, optional
|
||||
Optional arguments.
|
||||
c1 : float, optional
|
||||
Value to control stopping criterion.
|
||||
alpha0 : scalar, optional
|
||||
Value of `alpha` at start of the optimization.
|
||||
|
||||
Returns
|
||||
-------
|
||||
alpha
|
||||
f_count
|
||||
f_val_at_alpha
|
||||
|
||||
Notes
|
||||
-----
|
||||
Uses the interpolation algorithm (Armijo backtracking) as suggested by
|
||||
Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57
|
||||
|
||||
"""
|
||||
xk = np.atleast_1d(xk)
|
||||
fc = [0]
|
||||
|
||||
def phi(alpha1):
|
||||
fc[0] += 1
|
||||
return f(xk + alpha1*pk, *args)
|
||||
|
||||
if old_fval is None:
|
||||
phi0 = phi(0.)
|
||||
else:
|
||||
phi0 = old_fval # compute f(xk) -- done in past loop
|
||||
|
||||
derphi0 = np.dot(gfk, pk)
|
||||
alpha, phi1 = scalar_search_armijo(phi, phi0, derphi0, c1=c1,
|
||||
alpha0=alpha0)
|
||||
return alpha, fc[0], phi1
|
||||
|
||||
|
||||
def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
|
||||
"""
|
||||
Compatibility wrapper for `line_search_armijo`
|
||||
"""
|
||||
r = line_search_armijo(f, xk, pk, gfk, old_fval, args=args, c1=c1,
|
||||
alpha0=alpha0)
|
||||
return r[0], r[1], 0, r[2]
|
||||
|
||||
|
||||
def scalar_search_armijo(phi, phi0, derphi0, c1=1e-4, alpha0=1, amin=0):
|
||||
"""Minimize over alpha, the function ``phi(alpha)``.
|
||||
|
||||
Uses the interpolation algorithm (Armijo backtracking) as suggested by
|
||||
Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57
|
||||
|
||||
alpha > 0 is assumed to be a descent direction.
|
||||
|
||||
Returns
|
||||
-------
|
||||
alpha
|
||||
phi1
|
||||
|
||||
"""
|
||||
phi_a0 = phi(alpha0)
|
||||
if phi_a0 <= phi0 + c1*alpha0*derphi0:
|
||||
return alpha0, phi_a0
|
||||
|
||||
# Otherwise, compute the minimizer of a quadratic interpolant:
|
||||
|
||||
alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0)
|
||||
phi_a1 = phi(alpha1)
|
||||
|
||||
if (phi_a1 <= phi0 + c1*alpha1*derphi0):
|
||||
return alpha1, phi_a1
|
||||
|
||||
# Otherwise, loop with cubic interpolation until we find an alpha which
|
||||
# satisfies the first Wolfe condition (since we are backtracking, we will
|
||||
# assume that the value of alpha is not too small and satisfies the second
|
||||
# condition.
|
||||
|
||||
while alpha1 > amin: # we are assuming alpha>0 is a descent direction
|
||||
factor = alpha0**2 * alpha1**2 * (alpha1-alpha0)
|
||||
a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \
|
||||
alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0)
|
||||
a = a / factor
|
||||
b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \
|
||||
alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0)
|
||||
b = b / factor
|
||||
|
||||
alpha2 = (-b + np.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a)
|
||||
phi_a2 = phi(alpha2)
|
||||
|
||||
if (phi_a2 <= phi0 + c1*alpha2*derphi0):
|
||||
return alpha2, phi_a2
|
||||
|
||||
if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96:
|
||||
alpha2 = alpha1 / 2.0
|
||||
|
||||
alpha0 = alpha1
|
||||
alpha1 = alpha2
|
||||
phi_a0 = phi_a1
|
||||
phi_a1 = phi_a2
|
||||
|
||||
# Failed to find a suitable step length
|
||||
return None, phi_a1
|
||||
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# Non-monotone line search for DF-SANE
|
||||
#------------------------------------------------------------------------------
|
||||
|
||||
def _nonmonotone_line_search_cruz(f, x_k, d, prev_fs, eta,
|
||||
gamma=1e-4, tau_min=0.1, tau_max=0.5):
|
||||
"""
|
||||
Nonmonotone backtracking line search as described in [1]_
|
||||
|
||||
Parameters
|
||||
----------
|
||||
f : callable
|
||||
Function returning a tuple ``(f, F)`` where ``f`` is the value
|
||||
of a merit function and ``F`` the residual.
|
||||
x_k : ndarray
|
||||
Initial position.
|
||||
d : ndarray
|
||||
Search direction.
|
||||
prev_fs : float
|
||||
List of previous merit function values. Should have ``len(prev_fs) <= M``
|
||||
where ``M`` is the nonmonotonicity window parameter.
|
||||
eta : float
|
||||
Allowed merit function increase, see [1]_
|
||||
gamma, tau_min, tau_max : float, optional
|
||||
Search parameters, see [1]_
|
||||
|
||||
Returns
|
||||
-------
|
||||
alpha : float
|
||||
Step length
|
||||
xp : ndarray
|
||||
Next position
|
||||
fp : float
|
||||
Merit function value at next position
|
||||
Fp : ndarray
|
||||
Residual at next position
|
||||
|
||||
References
|
||||
----------
|
||||
[1] "Spectral residual method without gradient information for solving
|
||||
large-scale nonlinear systems of equations." W. La Cruz,
|
||||
J.M. Martinez, M. Raydan. Math. Comp. **75**, 1429 (2006).
|
||||
|
||||
"""
|
||||
f_k = prev_fs[-1]
|
||||
f_bar = max(prev_fs)
|
||||
|
||||
alpha_p = 1
|
||||
alpha_m = 1
|
||||
alpha = 1
|
||||
|
||||
while True:
|
||||
xp = x_k + alpha_p * d
|
||||
fp, Fp = f(xp)
|
||||
|
||||
if fp <= f_bar + eta - gamma * alpha_p**2 * f_k:
|
||||
alpha = alpha_p
|
||||
break
|
||||
|
||||
alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k)
|
||||
|
||||
xp = x_k - alpha_m * d
|
||||
fp, Fp = f(xp)
|
||||
|
||||
if fp <= f_bar + eta - gamma * alpha_m**2 * f_k:
|
||||
alpha = -alpha_m
|
||||
break
|
||||
|
||||
alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k)
|
||||
|
||||
alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p)
|
||||
alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m)
|
||||
|
||||
return alpha, xp, fp, Fp
|
||||
|
||||
|
||||
def _nonmonotone_line_search_cheng(f, x_k, d, f_k, C, Q, eta,
|
||||
gamma=1e-4, tau_min=0.1, tau_max=0.5,
|
||||
nu=0.85):
|
||||
"""
|
||||
Nonmonotone line search from [1]
|
||||
|
||||
Parameters
|
||||
----------
|
||||
f : callable
|
||||
Function returning a tuple ``(f, F)`` where ``f`` is the value
|
||||
of a merit function and ``F`` the residual.
|
||||
x_k : ndarray
|
||||
Initial position.
|
||||
d : ndarray
|
||||
Search direction.
|
||||
f_k : float
|
||||
Initial merit function value.
|
||||
C, Q : float
|
||||
Control parameters. On the first iteration, give values
|
||||
Q=1.0, C=f_k
|
||||
eta : float
|
||||
Allowed merit function increase, see [1]_
|
||||
nu, gamma, tau_min, tau_max : float, optional
|
||||
Search parameters, see [1]_
|
||||
|
||||
Returns
|
||||
-------
|
||||
alpha : float
|
||||
Step length
|
||||
xp : ndarray
|
||||
Next position
|
||||
fp : float
|
||||
Merit function value at next position
|
||||
Fp : ndarray
|
||||
Residual at next position
|
||||
C : float
|
||||
New value for the control parameter C
|
||||
Q : float
|
||||
New value for the control parameter Q
|
||||
|
||||
References
|
||||
----------
|
||||
.. [1] W. Cheng & D.-H. Li, ''A derivative-free nonmonotone line
|
||||
search and its application to the spectral residual
|
||||
method'', IMA J. Numer. Anal. 29, 814 (2009).
|
||||
|
||||
"""
|
||||
alpha_p = 1
|
||||
alpha_m = 1
|
||||
alpha = 1
|
||||
|
||||
while True:
|
||||
xp = x_k + alpha_p * d
|
||||
fp, Fp = f(xp)
|
||||
|
||||
if fp <= C + eta - gamma * alpha_p**2 * f_k:
|
||||
alpha = alpha_p
|
||||
break
|
||||
|
||||
alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k)
|
||||
|
||||
xp = x_k - alpha_m * d
|
||||
fp, Fp = f(xp)
|
||||
|
||||
if fp <= C + eta - gamma * alpha_m**2 * f_k:
|
||||
alpha = -alpha_m
|
||||
break
|
||||
|
||||
alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k)
|
||||
|
||||
alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p)
|
||||
alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m)
|
||||
|
||||
# Update C and Q
|
||||
Q_next = nu * Q + 1
|
||||
C = (nu * Q * (C + eta) + fp) / Q_next
|
||||
Q = Q_next
|
||||
|
||||
return alpha, xp, fp, Fp, C, Q
|
||||
Reference in New Issue
Block a user