1
from __future__ import nested_scopes
2
# ******NOTICE***************
3
# optimize.py module by Travis E. Oliphant
5
# You may copy and use this module as you see fit with no
6
# guarantee implied provided you keep this notice in all copies.
7
# *****END NOTICE************
9
# A collection of optimization algorithms. Version 0.5
11
# Added fminbound (July 2001)
12
# Added brute (Aug. 2002)
13
# Finished line search satisfying strong Wolfe conditions (Mar. 2004)
14
# Updated strong Wolfe conditions line search to use cubic-interpolation (Mar. 2004)
16
# Minimization routines
19
A collection of general-purpose optimization routines using Numeric
24
fmin --- Nelder-Mead Simplex algorithm (uses only function calls).
25
fmin_powell --- Powell (direction set) method (uses only function calls).
26
fmin_cg --- Non-linear (Polak-Rubiere) conjugate gradient algorithm
27
(can use function and gradient).
28
fmin_bfgs --- Quasi-Newton method (Broyden-Fletcher-Goldfarb-Shanno
29
(can use function and gradient).
30
fmin_ncg --- Line-search Newton Conjugate Gradient (can use function,
31
gradient and hessian).
34
brute --- Perform a brute force search for the minimum
35
with final optimization if desired.
39
brent --- Use Brent's method (does not need inital guess)
40
fminbound --- Bounded minimization for scalar functions on an
41
interval using Brent's parabolic/golden_mean method.
42
golden -- Use Golden Section method (does not need initial guess)
44
bracket --- Find a bracket containing the minimum.
51
__all__ = ['fmin', 'fmin_powell','fmin_bfgs', 'fmin_ncg', 'fmin_cg',
52
'fminbound','brent', 'golden','bracket','rosen','rosen_der',
53
'rosen_hess', 'rosen_hess_prod', 'brute', 'approx_fprime',
54
'line_search', 'check_grad']
58
from scipy_base import atleast_1d, eye, mgrid, argmin, zeros, shape, \
59
squeeze, isscalar, vectorize, asarray, absolute, sqrt, Inf
67
pymin = __builtin__.min
68
pymax = __builtin__.max
70
_epsilon = sqrt(scipy_base.limits.double_epsilon)
72
def vecnorm(x, ord=2):
74
return scipy_base.amax(abs(x))
76
return scipy_base.amin(abs(x))
78
return scipy_base.sum(abs(x)**ord)**(1.0/ord)
80
def rosen(x): # The Rosenbrock function
82
return MLab.sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)
89
der = MLab.zeros(x.shape,x.typecode())
90
der[1:-1] = 200*(xm-xm_m1**2) - 400*(xm_p1 - xm**2)*xm - 2*(1-xm)
91
der[0] = -400*x[0]*(x[1]-x[0]**2) - 2*(1-x[0])
92
der[-1] = 200*(x[-1]-x[-2]**2)
97
H = MLab.diag(-400*x[:-1],1) - MLab.diag(400*x[:-1],-1)
98
diagonal = Num.zeros(len(x),x.typecode())
99
diagonal[0] = 1200*x[0]-400*x[1]+2
101
diagonal[1:-1] = 202 + 1200*x[1:-1]**2 - 400*x[2:]
102
H = H + MLab.diag(diagonal)
105
def rosen_hess_prod(x,p):
107
Hp = Num.zeros(len(x),x.typecode())
108
Hp[0] = (1200*x[0]**2 - 400*x[1] + 2)*p[0] - 400*x[0]*p[1]
109
Hp[1:-1] = -400*x[:-2]*p[:-2]+(202+1200*x[1:-1]**2-400*x[2:])*p[1:-1] \
111
Hp[-1] = -400*x[-2]*p[-2] + 200*p[-1]
114
def fmin(func, x0, args=(), xtol=1e-4, ftol=1e-4, maxiter=None, maxfun=None,
115
full_output=0, disp=1, retall=0):
116
"""Minimize a function using the downhill simplex algorithm.
120
Uses a Nelder-Mead simplex algorithm to find the minimum of function
121
of one or more variables.
125
func -- the Python function or method to be minimized.
126
x0 -- the initial guess.
127
args -- extra arguments for func.
129
Outputs: (xopt, {fopt, iter, funcalls, warnflag})
131
xopt -- minimizer of function
133
fopt -- value of function at minimum: fopt = func(xopt)
134
iter -- number of iterations
135
funcalls -- number of function calls
136
warnflag -- Integer warning flag:
137
1 : 'Maximum number of function evaluations.'
138
2 : 'Maximum number of iterations.'
139
allvecs -- a list of solutions at each iteration
143
xtol -- acceptable relative error in xopt for convergence.
144
ftol -- acceptable relative error in func(xopt) for convergence.
145
maxiter -- the maximum number of iterations to perform.
146
maxfun -- the maximum number of function evaluations.
147
full_output -- non-zero if fval and warnflag outputs are desired.
148
disp -- non-zero to print convergence messages.
149
retall -- non-zero to return list of solutions at each iteration
155
if not -1 < rank < 2:
156
raise ValueError, "Initial guess must be a scalar or rank-1 sequence."
162
rho = 1; chi = 2; psi = 0.5; sigma = 0.5;
163
one2np1 = range(1,N+1)
166
sim = Num.zeros((N+1,),x0.typecode())
168
sim = Num.zeros((N+1,N),x0.typecode())
169
fsim = Num.zeros((N+1,),'d')
173
fsim[0] = apply(func,(x0,)+args)
177
y = Num.array(x0,copy=1)
179
y[k] = (1+nonzdelt)*y[k]
184
f = apply(func,(y,)+args)
187
ind = Num.argsort(fsim)
188
fsim = Num.take(fsim,ind) # sort so sim[0,:] has the lowest function value
189
sim = Num.take(sim,ind,0)
194
while (funcalls < maxfun and iterations < maxiter):
195
if (max(Num.ravel(abs(sim[1:]-sim[0]))) <= xtol \
196
and max(abs(fsim[0]-fsim[1:])) <= ftol):
199
xbar = Num.add.reduce(sim[:-1],0) / N
200
xr = (1+rho)*xbar - rho*sim[-1]
201
fxr = apply(func,(xr,)+args)
202
funcalls = funcalls + 1
206
xe = (1+rho*chi)*xbar - rho*chi*sim[-1]
207
fxe = apply(func,(xe,)+args)
208
funcalls = funcalls + 1
216
else: # fsim[0] <= fxr
220
else: # fxr >= fsim[-2]
221
# Perform contraction
223
xc = (1+psi*rho)*xbar - psi*rho*sim[-1]
224
fxc = apply(func,(xc,)+args)
225
funcalls = funcalls + 1
233
# Perform an inside contraction
234
xcc = (1-psi)*xbar + psi*sim[-1]
235
fxcc = apply(func,(xcc,)+args)
236
funcalls = funcalls + 1
246
sim[j] = sim[0] + sigma*(sim[j] - sim[0])
247
fsim[j] = apply(func,(sim[j],)+args)
248
funcalls = funcalls + N
250
ind = Num.argsort(fsim)
251
sim = Num.take(sim,ind,0)
252
fsim = Num.take(fsim,ind)
253
iterations = iterations + 1
255
allvecs.append(sim[0])
261
if funcalls >= maxfun:
264
print "Warning: Maximum number of function evaluations has "\
266
elif iterations >= maxiter:
269
print "Warning: Maximum number of iterations has been exceeded"
272
print "Optimization terminated successfully."
273
print " Current function value: %f" % fval
274
print " Iterations: %d" % iterations
275
print " Function evaluations: %d" % funcalls
279
retlist = x, fval, iterations, funcalls, warnflag
281
retlist += (allvecs,)
285
retlist = (x, allvecs)
291
def _cubicmin(a,fa,fpa,b,fb,c,fc):
292
# finds the minimizer for a cubic polynomial that goes through the
293
# points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa.
295
# if no minimizer can be found return None
297
# f(x) = A *(x-a)^3 + B*(x-a)^2 + C*(x-a) + D
303
if (db == 0) or (dc == 0) or (b==c): return None
304
denom = (db*dc)**2 * (db-dc)
305
[A,B] = Num.dot([[dc**2, -db**2],[-dc**3, db**3]],[fb-fa-C*db,fc-fa-C*dc])
309
if radical < 0: return None
310
if (A == 0): return None
311
xmin = a + (-B + sqrt(radical))/(3*A)
315
def _quadmin(a,fa,fpa,b,fb):
316
# finds the minimizer for a quadratic polynomial that goes through
317
# the points (a,fa), (b,fb) with derivative at a of fpa
318
# f(x) = B*(x-a)^2 + C*(x-a) + D
322
if (db==0): return None
323
B = (fb-D-C*db)/(db*db)
324
if (B <= 0): return None
325
xmin = a - C / (2.0*B)
328
def zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo,
329
phi, derphi, phi0, derphi0, c1, c2):
332
delta1 = 0.2 # cubic interpolant check
333
delta2 = 0.1 # quadratic interpolant check
337
# interpolate to find a trial step length between a_lo and a_hi
338
# Need to choose interpolation here. Use cubic interpolation and then if the
339
# result is within delta * dalpha or outside of the interval bounded by a_lo or a_hi
340
# then use quadratic interpolation, if the result is still too close, then use bisection
343
if dalpha < 0: a,b = a_hi,a_lo
344
else: a,b = a_lo, a_hi
346
# minimizer of cubic interpolant
347
# (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
348
# if the result is too close to the end points (or out of the interval)
349
# then use quadratic interpolation with phi_lo, derphi_lo and phi_hi
350
# if the result is stil too close to the end points (or out of the interval)
355
a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi, a_rec, phi_rec)
356
if (i==0) or (a_j is None) or (a_j > b-cchk) or (a_j < a+cchk):
358
a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
359
if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk):
360
a_j = a_lo + 0.5*dalpha
361
# print "Using bisection."
362
# else: print "Using quadratic."
363
# else: print "Using cubic."
365
# Check new value of a_j
368
if (phi_aj > phi0 + c1*a_j*derphi0) or (phi_aj >= phi_lo):
374
derphi_aj = derphi(a_j)
375
if abs(derphi_aj) <= -c2*derphi0:
378
valprime_star = derphi_aj
380
if derphi_aj*(a_hi - a_lo) >= 0:
390
derphi_lo = derphi_aj
397
return a_star, val_star, valprime_star
399
def line_search(f, myfprime, xk, pk, gfk, old_fval, old_old_fval,
400
args=(), c1=1e-4, c2=0.9, amax=50):
401
"""Find alpha that satisfies strong Wolfe conditions.
403
Uses the line search algorithm to enforce strong Wolfe conditions
404
Wright and Nocedal, 'Numerical Optimization', 1999, pg. 59-60
406
For the zoom phase it uses an algorithm by
407
Outputs: (alpha0, gc, fc)
410
global _ls_fc, _ls_gc, _ls_ingfk
417
return f(xk+alpha*pk,*args)
419
if isinstance(myfprime,type(())):
421
global _ls_fc, _ls_ingfk
425
newargs = (f,eps) + args
426
_ls_ingfk = fprime(xk+alpha*pk,*newargs) # store for later use
427
return Num.dot(_ls_ingfk,pk)
431
global _ls_gc, _ls_ingfk
433
_ls_ingfk = fprime(xk+alpha*pk,*args) # store for later use
434
return Num.dot(_ls_ingfk,pk)
438
derphi0 = Num.dot(gfk,pk)
440
alpha1 = pymin(1.0,1.01*2*(phi0-old_old_fval)/derphi0)
442
#derphi_a1 = phiprime(alpha1) evaluated below
449
while 1: # bracketing phase
450
if (phi_a1 > phi0 + c1*alpha1*derphi0) or \
451
((phi_a1 >= phi_a0) and (i > 1)):
452
alpha_star, fval_star, fprime_star = \
453
zoom(alpha0, alpha1, phi_a0,
454
phi_a1, derphi_a0, phi, phiprime,
455
phi0, derphi0, c1, c2)
458
derphi_a1 = phiprime(alpha1)
459
if (abs(derphi_a1) <= -c2*derphi0):
462
fprime_star = derphi_a1
466
alpha_star, fval_star, fprime_star = \
467
zoom(alpha1, alpha0, phi_a1,
468
phi_a0, derphi_a1, phi, phiprime,
469
phi0, derphi0, c1, c2)
472
alpha2 = 2 * alpha1 # increase by factor of two on each iteration
478
derphi_a0 = derphi_a1
480
# stopping test if lower function not found
487
if fprime_star is not None:
488
# fprime_star is a number (derphi) -- so use the most recently calculated gradient
489
# used in computing it derphi = gfk*pk
490
# this is the gradient at the next step
491
# no need to compute it again in the outer loop.
492
fprime_star = _ls_ingfk
494
return alpha_star, _ls_fc, _ls_gc, fval_star, old_fval, fprime_star
497
def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
498
"""Minimize over alpha, the function f(xk+alpha pk)
500
Uses the interpolation algorithm (Armiijo backtracking) as suggested by
501
Wright and Nocedal in 'Numerical Optimization', 1999, pg. 56-57
503
Outputs: (alpha, fc, gc)
507
phi0 = old_fval # compute f(xk) -- done in past loop
508
phi_a0 = apply(f,(xk+alpha0*pk,)+args) # compute f
510
derphi0 = Num.dot(gfk,pk)
512
if (phi_a0 <= phi0 + c1*alpha0*derphi0):
513
return alpha0, fc, 0, phi_a0
515
# Otherwise compute the minimizer of a quadratic interpolant:
517
alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0)
518
phi_a1 = apply(f,(xk+alpha1*pk,)+args)
521
if (phi_a1 <= phi0 + c1*alpha1*derphi0):
522
return alpha1, fc, 0, phi_a1
524
# Otherwise loop with cubic interpolation until we find an alpha which
525
# satifies the first Wolfe condition (since we are backtracking, we will
526
# assume that the value of alpha is not too small and satisfies the second
529
while 1: # we are assuming pk is a descent direction
530
factor = alpha0**2 * alpha1**2 * (alpha1-alpha0)
531
a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \
532
alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0)
534
b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \
535
alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0)
538
alpha2 = (-b + Num.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a)
539
phi_a2 = apply(f,(xk+alpha2*pk,)+args)
542
if (phi_a2 <= phi0 + c1*alpha2*derphi0):
543
return alpha2, fc, 0, phi_a2
545
if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96:
546
alpha2 = alpha1 / 2.0
554
def approx_fprime(xk,f,epsilon,*args):
555
f0 = apply(f,(xk,)+args)
556
grad = Num.zeros((len(xk),),'d')
557
ei = Num.zeros((len(xk),),'d')
558
for k in range(len(xk)):
560
grad[k] = (apply(f,(xk+ei,)+args) - f0)/epsilon
564
def check_grad(func, grad, x0, *args):
565
return sqrt(sum((grad(x0,*args)-approx_fprime(x0,func,_epsilon,*args))**2))
567
def approx_fhess_p(x0,p,fprime,epsilon,*args):
568
f2 = apply(fprime,(x0+epsilon*p,)+args)
569
f1 = apply(fprime,(x0,)+args)
570
return (f2 - f1)/epsilon
573
def fmin_bfgs(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf,
574
epsilon=_epsilon, maxiter=None, full_output=0, disp=1,
576
"""Minimize a function using the BFGS algorithm.
580
Optimize the function, f, whose gradient is given by fprime using the
581
quasi-Newton method of Broyden, Fletcher, Goldfarb, and Shanno (BFGS)
582
See Wright, and Nocedal 'Numerical Optimization', 1999, pg. 198.
586
f -- the Python function or method to be minimized.
587
x0 -- the initial guess for the minimizer.
589
fprime -- a function to compute the gradient of f.
590
args -- extra arguments to f and fprime.
591
gtol -- gradient norm must be less than gtol before succesful termination
592
norm -- order of norm (Inf is max, -Inf is min)
593
epsilon -- if fprime is approximated use this value for
594
the step size (can be scalar or vector)
596
Outputs: (xopt, {fopt, gopt, Hopt, func_calls, grad_calls, warnflag}, <allvecs>)
598
xopt -- the minimizer of f.
600
fopt -- the value of f(xopt).
601
gopt -- the value of f'(xopt). (Should be near 0)
602
Bopt -- the value of 1/f''(xopt). (inverse hessian matrix)
603
func_calls -- the number of function_calls.
604
grad_calls -- the number of gradient calls.
605
warnflag -- an integer warning flag:
606
1 : 'Maximum number of iterations exceeded.'
607
2 : 'Gradient and/or function calls not changing'
608
allvecs -- a list of all iterates (only returned if retall==1)
612
maxiter -- the maximum number of iterations.
613
full_output -- if non-zero then return fopt, func_calls, grad_calls,
614
and warnflag in addition to xopt.
615
disp -- print convergence message if non-zero.
616
retall -- return a list of results at each iteration if non-zero
624
maxiter = len(x0)*200
631
old_fval = f(x0,*args)
632
old_old_fval = old_fval + 5000
635
gfk = apply(approx_fprime,(x0,f,epsilon)+args)
636
myfprime = (approx_fprime,epsilon)
637
func_calls = func_calls + len(x0) + 1
639
gfk = apply(fprime,(x0,)+args)
641
grad_calls = grad_calls + 1
647
gnorm = vecnorm(gfk,ord=norm)
648
while (gnorm > gtol) and (k < maxiter):
649
pk = -Num.dot(Hk,gfk)
650
alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
651
linesearch.line_search(f,myfprime,xk,pk,gfk,
652
old_fval,old_old_fval,args=args)
653
if alpha_k is None: # line search failed try different one.
654
func_calls = func_calls + fc
655
grad_calls = grad_calls + gc
656
alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
657
line_search(f,myfprime,xk,pk,gfk,
658
old_fval,old_old_fval,args=args)
660
func_calls = func_calls + fc
661
grad_calls = grad_calls + gc
662
xkp1 = xk + alpha_k * pk
669
gfkp1 = apply(approx_fprime,(xkp1,f,epsilon)+args)
670
func_calls = func_calls + len(x0) + 1
672
gfkp1 = apply(fprime,(xkp1,)+args)
673
grad_calls = grad_calls + 1
678
gnorm = vecnorm(gfk,ord=norm)
683
rhok = 1 / Num.dot(yk,sk)
684
except ZeroDivisionError:
687
#print "Divide by zero encountered: Hessian calculation reset."
690
A1 = I - sk[:,Num.NewAxis] * yk[Num.NewAxis,:] * rhok
691
A2 = I - yk[:,Num.NewAxis] * sk[Num.NewAxis,:] * rhok
692
Hk = Num.dot(A1,Num.dot(Hk,A2)) + rhok * sk[:,Num.NewAxis] \
695
if disp or full_output:
699
print "Warning: Desired error not necessarily achieved due to precision loss"
700
print " Current function value: %f" % fval
701
print " Iterations: %d" % k
702
print " Function evaluations: %d" % func_calls
703
print " Gradient evaluations: %d" % grad_calls
708
print "Warning: Maximum number of iterations has been exceeded"
709
print " Current function value: %f" % fval
710
print " Iterations: %d" % k
711
print " Function evaluations: %d" % func_calls
712
print " Gradient evaluations: %d" % grad_calls
715
print "Optimization terminated successfully."
716
print " Current function value: %f" % fval
717
print " Iterations: %d" % k
718
print " Function evaluations: %d" % func_calls
719
print " Gradient evaluations: %d" % grad_calls
722
retlist = xk, fval, gfk, Hk, func_calls, grad_calls, warnflag
724
retlist += (allvecs,)
728
retlist = (xk, allvecs)
733
def fmin_cg(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf, epsilon=_epsilon,
734
maxiter=None, full_output=0, disp=1, retall=0):
735
"""Minimize a function with nonlinear conjugate gradient algorithm.
739
Optimize the function, f, whose gradient is given by fprime using the
740
nonlinear conjugate gradient algorithm of Polak and Ribiere
741
See Wright, and Nocedal 'Numerical Optimization', 1999, pg. 120-122.
745
f -- the Python function or method to be minimized.
746
x0 -- the initial guess for the minimizer.
748
fprime -- a function to compute the gradient of f.
749
args -- extra arguments to f and fprime.
750
gtol -- stop when norm of gradient is less than gtol
751
norm -- order of vector norm to use
752
epsilon -- if fprime is approximated use this value for
753
the step size (can be scalar or vector)
755
Outputs: (xopt, {fopt, func_calls, grad_calls, warnflag}, {allvecs})
757
xopt -- the minimizer of f.
759
fopt -- the value of f(xopt).
760
func_calls -- the number of function_calls.
761
grad_calls -- the number of gradient calls.
762
warnflag -- an integer warning flag:
763
1 : 'Maximum number of iterations exceeded.'
764
2 : 'Gradient and/or function calls not changing'
765
allvecs -- if retall then this vector of the iterates is returned
769
maxiter -- the maximum number of iterations.
770
full_output -- if non-zero then return fopt, func_calls, grad_calls,
771
and warnflag in addition to xopt.
772
disp -- print convergence message if non-zero.
773
retall -- return a list of results at each iteration if True
781
maxiter = len(x0)*200
787
old_fval = f(xk,*args)
788
old_old_fval = old_fval + 5000
792
gfk = apply(approx_fprime,(x0,f,epsilon)+args)
793
myfprime = (approx_fprime,epsilon)
794
func_calls = func_calls + len(x0) + 1
796
gfk = apply(fprime,(x0,)+args)
798
grad_calls = grad_calls + 1
804
gnorm = vecnorm(gfk,ord=norm)
805
while (gnorm > gtol) and (k < maxiter):
806
deltak = Num.dot(gfk,gfk)
807
alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
808
linesearch.line_search(f,myfprime,xk,pk,gfk,old_fval,
809
old_old_fval,args=args,c2=0.4)
810
if alpha_k is None: # line search failed -- use different one.
813
alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
814
line_search(f,myfprime,xk,pk,gfk,
815
old_fval,old_old_fval,args=args)
823
gfkp1 = apply(approx_fprime,(xk,f,epsilon)+args)
824
func_calls = func_calls + len(x0) + 1
826
gfkp1 = apply(fprime,(xk,)+args)
827
grad_calls = grad_calls + 1
830
beta_k = pymax(0,Num.dot(yk,gfkp1)/deltak)
831
pk = -gfkp1 + beta_k * pk
833
gnorm = vecnorm(gfk,ord=norm)
837
if disp or full_output:
841
print "Warning: Desired error not necessarily achieved due to precision loss"
842
print " Current function value: %f" % fval
843
print " Iterations: %d" % k
844
print " Function evaluations: %d" % func_calls
845
print " Gradient evaluations: %d" % grad_calls
850
print "Warning: Maximum number of iterations has been exceeded"
851
print " Current function value: %f" % fval
852
print " Iterations: %d" % k
853
print " Function evaluations: %d" % func_calls
854
print " Gradient evaluations: %d" % grad_calls
857
print "Optimization terminated successfully."
858
print " Current function value: %f" % fval
859
print " Iterations: %d" % k
860
print " Function evaluations: %d" % func_calls
861
print " Gradient evaluations: %d" % grad_calls
865
retlist = xk, fval, func_calls, grad_calls, warnflag
867
retlist += (allvecs,)
871
retlist = (xk, allvecs)
875
def fmin_ncg(f, x0, fprime, fhess_p=None, fhess=None, args=(), avextol=1e-5,
876
epsilon=_epsilon, maxiter=None, full_output=0, disp=1, retall=0):
879
Minimize the function, f, whose gradient is given by fprime using the
880
Newton-CG method. fhess_p must compute the hessian times an arbitrary
881
vector. If it is not given, finite-differences on fprime are used to
882
compute it. See Wright, and Nocedal 'Numerical Optimization', 1999,
887
f -- the Python function or method to be minimized.
888
x0 -- the initial guess for the minimizer.
889
fprime -- a function to compute the gradient of f: fprime(x, *args)
890
fhess_p -- a function to compute the Hessian of f times an
891
arbitrary vector: fhess_p (x, p, *args)
892
fhess -- a function to compute the Hessian matrix of f.
893
args -- extra arguments for f, fprime, fhess_p, and fhess (the same
894
set of extra arguments is supplied to all of these functions).
896
epsilon -- if fhess is approximated use this value for
897
the step size (can be scalar or vector)
899
Outputs: (xopt, {fopt, fcalls, gcalls, hcalls, warnflag},{allvecs})
901
xopt -- the minimizer of f
903
fopt -- the value of the function at xopt: fopt = f(xopt)
904
fcalls -- the number of function calls.
905
gcalls -- the number of gradient calls.
906
hcalls -- the number of hessian calls.
907
warnflag -- algorithm warnings:
908
1 : 'Maximum number of iterations exceeded.'
909
allvecs -- a list of all tried iterates
913
avextol -- Convergence is assumed when the average relative error in
914
the minimizer falls below this amount.
915
maxiter -- Maximum number of iterations to allow.
916
full_output -- If non-zero return the optional outputs.
917
disp -- If non-zero print convergence message.
918
retall -- return a list of results at each iteration if True
922
Only one of fhess_p or fhess need be given. If fhess is provided,
923
then fhess_p will be ignored. If neither fhess nor fhess_p is
924
provided, then the hessian product will be approximated using finite
925
differences on fprime.
934
maxiter = len(x0)*200
936
xtol = len(x0)*avextol
942
old_fval = f(x0,*args)
944
while (Num.add.reduce(abs(update)) > xtol) and (k < maxiter):
945
# Compute a search direction pk by applying the CG method to
946
# del2 f(xk) p = - grad f(xk) starting from 0.
947
b = -apply(fprime,(xk,)+args)
949
maggrad = Num.add.reduce(abs(b))
950
eta = min([0.5,Num.sqrt(maggrad)])
951
termcond = eta * maggrad
956
dri0 = Num.dot(ri,ri)
958
if fhess is not None: # you want to compute hessian once.
959
A = apply(fhess,(xk,)+args)
962
while Num.add.reduce(abs(ri)) > termcond:
965
Ap = apply(approx_fhess_p,(xk,psupi,fprime,epsilon)+args)
968
Ap = apply(fhess_p,(xk,psupi)+args)
971
Ap = Num.dot(A,psupi)
973
curv = Num.dot(psupi,Ap)
978
xsupi = xsupi + dri0/curv * psupi
981
xsupi = xsupi + alphai * psupi
982
ri = ri + alphai * Ap
983
dri1 = Num.dot(ri,ri)
985
psupi = -ri + betai * psupi
987
dri0 = dri1 # update Num.dot(ri,ri) for next time.
989
pk = xsupi # search direction is solution to system.
990
gfk = -b # gradient at xk
991
alphak, fc, gc, old_fval = line_search_BFGS(f,xk,pk,gfk,old_fval,args)
1001
if disp or full_output:
1006
print "Warning: Maximum number of iterations has been exceeded"
1007
print " Current function value: %f" % fval
1008
print " Iterations: %d" % k
1009
print " Function evaluations: %d" % fcalls
1010
print " Gradient evaluations: %d" % gcalls
1011
print " Hessian evaluations: %d" % hcalls
1015
print "Optimization terminated successfully."
1016
print " Current function value: %f" % fval
1017
print " Iterations: %d" % k
1018
print " Function evaluations: %d" % fcalls
1019
print " Gradient evaluations: %d" % gcalls
1020
print " Hessian evaluations: %d" % hcalls
1023
retlist = xk, fval, fcalls, gcalls, hcalls, warnflag
1025
retlist += (allvecs,)
1029
retlist = (xk, allvecs)
1034
def fminbound(func, x1, x2, args=(), xtol=1e-5, maxfun=500,
1035
full_output=0, disp=1):
1036
"""Bounded minimization for scalar functions.
1040
Finds a local minimizer of the scalar function func in the interval
1041
x1 < xopt < x2 using Brent's method. (See brent for auto-bracketing).
1045
func -- the function to be minimized (must accept scalar input and return
1047
x1, x2 -- the optimization bounds.
1048
args -- extra arguments to pass to function.
1049
xtol -- the convergence tolerance.
1050
maxfun -- maximum function evaluations.
1051
full_output -- Non-zero to return optional outputs.
1052
disp -- Non-zero to print messages.
1053
0 : no message printing.
1054
1 : non-convergence notification messages only.
1055
2 : print a message on convergence too.
1056
3 : print iteration results.
1059
Outputs: (xopt, {fval, ierr, numfunc})
1061
xopt -- The minimizer of the function over the interval.
1062
fval -- The function value at the minimum point.
1063
ierr -- An error flag (0 if converged, 1 if maximum number of
1064
function calls reached).
1065
numfunc -- The number of function calls.
1069
raise ValueError, "The lower bound exceeds the upper bound."
1072
header = ' Func-count x f(x) Procedure'
1075
sqrt_eps = sqrt(2.2e-16)
1076
golden_mean = 0.5*(3.0-sqrt(5.0))
1078
fulc = a + golden_mean*(b-a)
1079
nfc, xf = fulc, fulc
1084
fmin_data = (1, xf, fx)
1088
tol1 = sqrt_eps*abs(xf) + xtol / 3.0
1094
print "%5.0f %12.6g %12.6g %s" % (fmin_data + (step,))
1097
while ( abs(xf-xm) > (tol2 - 0.5*(b-a)) ):
1099
# Check for parabolic fit
1102
r = (xf-nfc)*(fx-ffulc)
1103
q = (xf-fulc)*(fx-fnfc)
1104
p = (xf-fulc)*q - (xf-nfc)*r
1111
# Check for acceptability of parabola
1112
if ( (abs(p) < abs(0.5*q*r)) and (p > q*(a-xf)) and (p < q*(b-xf))):
1117
if ((x-a) < tol2) or ((b-x) < tol2):
1118
si = Numeric.sign(xm-xf) + ((xm-xf)==0)
1120
else: # do a golden section step
1123
if golden: # Do a golden-section step
1131
si = Numeric.sign(rat) + (rat == 0)
1132
x = xf + si*max([abs(rat), tol1])
1135
fmin_data = (num, x, fu)
1137
print "%5.0f %12.6g %12.6g %s" % (fmin_data + (step,))
1144
fulc, ffulc = nfc, fnfc
1152
if (fu <= fnfc) or (nfc == xf):
1153
fulc, ffulc = nfc, fnfc
1155
elif (fu <= ffulc) or (fulc == xf) or (fulc == nfc):
1159
tol1 = sqrt_eps*abs(xf) + xtol/3.0
1166
_endprint(x, flag, fval, maxfun, tol, disp)
1168
return xf, fval, flag, num
1174
_endprint(x, flag, fval, maxfun, xtol, disp)
1177
return xf, fval, flag, num
1181
def brent(func, args=(), brack=None, tol=1.48e-8, full_output=0, maxiter=500):
1182
""" Given a function of one-variable and a possible bracketing interval,
1183
return the minimum of the function isolated to a fractional precision of
1184
tol. A bracketing interval is a triple (a,b,c) where (a<b<c) and
1185
func(b) < func(a),func(c). If bracket is two numbers then they are
1186
assumed to be a starting interval for a downhill bracket search
1189
Uses inverse parabolic interpolation when possible to speed up convergence
1190
of golden section method.
1196
xa,xb,xc,fa,fb,fc,funcalls = bracket(func, args=args)
1197
elif len(brack) == 2:
1198
xa,xb,xc,fa,fb,fc,funcalls = bracket(func, xa=brack[0], xb=brack[1], args=args)
1199
elif len(brack) == 3:
1201
if (xa > xc): # swap so xa < xc can be assumed
1202
dum = xa; xa=xc; xc=dum
1203
assert ((xa < xb) and (xb < xc)), "Not a bracketing interval."
1204
fa = apply(func, (xa,)+args)
1205
fb = apply(func, (xb,)+args)
1206
fc = apply(func, (xc,)+args)
1207
assert ((fb<fa) and (fb < fc)), "Not a bracketing interval."
1210
raise ValuError, "Bracketing interval must be length 2 or 3 sequence."
1213
fw=fv=fx=apply(func, (x,)+args)
1221
while (iter < maxiter):
1222
tol1 = tol*abs(x) + _mintol
1225
if abs(x-xmid) < (tol2-0.5*(b-a)): # check for convergence
1228
if (abs(deltax) <= tol1):
1229
if (x>=xmid): deltax=a-x # do a golden section step
1232
else: # do a parabolic step
1233
tmp1 = (x-w)*(fx-fv)
1234
tmp2 = (x-v)*(fx-fw)
1235
p = (x-v)*tmp2 - (x-w)*tmp1;
1236
tmp2 = 2.0*(tmp2-tmp1)
1237
if (tmp2 > 0.0): p = -p
1241
# check parabolic fit
1242
if ((p > tmp2*(a-x)) and (p < tmp2*(b-x)) and (abs(p) < abs(0.5*tmp2*dx_temp))):
1243
rat = p*1.0/tmp2 # if parabolic step is useful.
1245
if ((u-a) < tol2 or (b-u) < tol2):
1246
if xmid-x >= 0: rat = tol1
1249
if (x>=xmid): deltax=a-x # if it's not do a golden section step
1253
if (abs(rat) < tol1): # update by at least tol1
1254
if rat >= 0: u = x + tol1
1258
fu = apply(func, (u,)+args) # calculate new output value
1261
if (fu > fx): # if it's bigger than current
1264
if (fu<=fw) or (w==x):
1265
v=w; w=u; fv=fw; fw=fu
1266
elif (fu<=fv) or (v==x) or (v==w):
1277
return xmin, fval, iter, funcalls
1281
def golden(func, args=(), brack=None, tol=_epsilon, full_output=0):
1282
""" Given a function of one-variable and a possible bracketing interval,
1283
return the minimum of the function isolated to a fractional precision of
1284
tol. A bracketing interval is a triple (a,b,c) where (a<b<c) and
1285
func(b) < func(a),func(c). If bracket is two numbers then they are
1286
assumed to be a starting interval for a downhill bracket search
1289
Uses analog of bisection method to decrease the bracketed interval.
1292
xa,xb,xc,fa,fb,fc,funcalls = bracket(func, args=args)
1293
elif len(brack) == 2:
1294
xa,xb,xc,fa,fb,fc,funcalls = bracket(func, xa=brack[0], xb=brack[1], args=args)
1295
elif len(brack) == 3:
1297
if (xa > xc): # swap so xa < xc can be assumed
1298
dum = xa; xa=xc; xc=dum
1299
assert ((xa < xb) and (xb < xc)), "Not a bracketing interval."
1300
fa = apply(func, (xa,)+args)
1301
fb = apply(func, (xb,)+args)
1302
fc = apply(func, (xc,)+args)
1303
assert ((fb<fa) and (fb < fc)), "Not a bracketing interval."
1306
raise ValuError, "Bracketing interval must be length 2 or 3 sequence."
1312
if (abs(xc-xb) > abs(xb-xa)):
1314
x2 = xb + _gC*(xc-xb)
1317
x1 = xb - _gC*(xb-xa)
1318
f1 = apply(func, (x1,)+args)
1319
f2 = apply(func, (x2,)+args)
1321
while (abs(x3-x0) > tol*(abs(x1)+abs(x2))):
1323
x0 = x1; x1 = x2; x2 = _gR*x1 + _gC*x3
1324
f1 = f2; f2 = apply(func, (x2,)+args)
1326
x3 = x2; x2 = x1; x1 = _gR*x2 + _gC*x0
1327
f2 = f1; f1 = apply(func, (x1,)+args)
1336
return xmin, fval, funcalls
1341
def bracket(func, xa=0.0, xb=1.0, args=(), grow_limit=110.0):
1342
"""Given a function and distinct initial points, search in the downhill
1343
direction (as defined by the initital points) and return new points
1344
xa, xb, xc that bracket the minimum of the function:
1345
f(xa) > f(xb) < f(xc)
1348
_verysmall_num = 1e-21
1349
fa = apply(func, (xa,)+args)
1350
fb = apply(func, (xb,)+args)
1351
if (fa < fb): # Switch so fa > fb
1352
dum = xa; xa = xb; xb = dum
1353
dum = fa; fa = fb; fb = dum
1354
xc = xb + _gold*(xb-xa)
1355
fc = apply(func, (xc,)+args)
1359
tmp1 = (xb - xa)*(fb-fc)
1360
tmp2 = (xb - xc)*(fb-fa)
1362
if abs(val) < _verysmall_num:
1363
denom = 2.0*_verysmall_num
1366
w = xb - ((xb-xc)*tmp2-(xb-xa)*tmp1)/denom
1367
wlim = xb + grow_limit*(xc-xb)
1369
raise RunTimeError, "Too many iterations."
1370
if (w-xc)*(xb-w) > 0.0:
1371
fw = apply(func, (w,)+args)
1374
xa = xb; xb=w; fa=fb; fb=fw
1375
return xa, xb, xc, fa, fb, fc, funcalls
1378
return xa, xb, xc, fa, fb, fc, funcalls
1379
w = xc + _gold*(xc-xb)
1380
fw = apply(func, (w,)+args)
1382
elif (w-wlim)*(wlim-xc) >= 0.0:
1384
fw = apply(func, (w,)+args)
1386
elif (w-wlim)*(xc-w) > 0.0:
1387
fw = apply(func, (w,)+args)
1390
xb=xc; xc=w; w=xc+_gold*(xc-xb)
1391
fb=fc; fc=fw; fw=apply(func, (w,)+args)
1394
w = xc + _gold*(xc-xb)
1395
fw = apply(func, (w,)+args)
1399
return xa, xb, xc, fa, fb, fc, funcalls
1402
global _powell_funcalls
1404
def _myfunc(alpha, func, x0, direc, args=()):
1405
funcargs = (x0 + alpha * direc,)+args
1406
return func(*funcargs)
1408
def _linesearch_powell(func, p, xi, args=(), tol=1e-3):
1409
# line-search algorithm using fminbound
1410
# find the minimium of the function
1411
# func(x0+ alpha*direc)
1412
global _powell_funcalls
1413
extra_args = (func, p, xi) + args
1414
alpha_min, fret, iter, num = brent(_myfunc, args=extra_args,
1415
full_output=1, tol=tol)
1417
_powell_funcalls += num
1418
return squeeze(fret), p+xi, xi
1421
def fmin_powell(func, x0, args=(), xtol=1e-4, ftol=1e-4, maxiter=None,
1422
maxfun=None, full_output=0, disp=1, retall=0):
1423
"""Minimize a function using modified Powell's method.
1427
Uses a modification of Powell's method to find the minimum of a function
1432
func -- the Python function or method to be minimized.
1433
x0 -- the initial guess.
1434
args -- extra arguments for func.
1436
Outputs: (xopt, {fopt, xi, direc, iter, funcalls, warnflag}, {allvecs})
1438
xopt -- minimizer of function
1440
fopt -- value of function at minimum: fopt = func(xopt)
1441
direc -- current direction set
1442
iter -- number of iterations
1443
funcalls -- number of function calls
1444
warnflag -- Integer warning flag:
1445
1 : 'Maximum number of function evaluations.'
1446
2 : 'Maximum number of iterations.'
1447
allvecs -- a list of solutions at each iteration
1451
xtol -- line-search error tolerance.
1452
ftol -- acceptable relative error in func(xopt) for convergence.
1453
maxiter -- the maximum number of iterations to perform.
1454
maxfun -- the maximum number of function evaluations.
1455
full_output -- non-zero if fval and warnflag outputs are desired.
1456
disp -- non-zero to print convergence messages.
1457
retall -- non-zero to return a list of the solution at each iteration
1460
global _powell_funcalls
1466
if not -1 < rank < 2:
1467
raise ValueError, "Initial guess must be a scalar or rank-1 sequence."
1473
direc = eye(N,typecode='d')
1474
fval = squeeze(apply(func, (x,)+args))
1475
_powell_funcalls = 1
1486
fval, x, direc1 = _linesearch_powell(func, x, direc1, args=args, tol=xtol*100)
1487
if (fx2 - fval) > delta:
1493
if (2.0*(fx - fval) <= ftol*(abs(fx)+abs(fval))+1e-20): break
1494
if _powell_funcalls >= maxfun: break
1495
if iter >= maxiter: break
1497
# Construct the extrapolated point
1501
fx2 = squeeze(apply(func, (x2,)+args))
1502
_powell_funcalls +=1
1505
t = 2.0*(fx+fx2-2.0*fval)
1506
temp = (fx-fval-delta)
1509
t -= delta*temp*temp
1511
fval, x, direc1 = _linesearch_powell(func, x, direc1, args=args, tol=xtol*100)
1512
direc[bigind] = direc[-1]
1516
if _powell_funcalls >= maxfun:
1519
print "Warning: Maximum number of function evaluations has "\
1521
elif iter >= maxiter:
1524
print "Warning: Maximum number of iterations has been exceeded"
1527
print "Optimization terminated successfully."
1528
print " Current function value: %f" % fval
1529
print " Iterations: %d" % iter
1530
print " Function evaluations: %d" % _powell_funcalls
1535
retlist = x, fval, direc, iter, _powell_funcalls, warnflag
1537
retlist += (allvecs,)
1541
retlist = (x, allvecs)
1548
def _endprint(x, flag, fval, maxfun, xtol, disp):
1551
print "\nOptimization terminated successfully;\n the returned value" + \
1552
" satisfies the termination criteria\n (using xtol = ", xtol, ")"
1554
print "\nMaximum number of function evaluations exceeded --- increase maxfun argument.\n"
1558
def brute(func, ranges, args=(), Ns=20, full_output=0, finish=fmin):
1559
"""Minimize a function over a given range by brute force.
1561
That is find the minimum of a function evaluated on a grid
1562
given by the tuple ranges.
1566
func -- Function to be optimized
1567
ranges -- Tuple where each element is a tuple of parameters
1568
or a slice object to be handed to scipy.mgrid
1570
args -- Extra arguments to function.
1571
Ns -- Default number of samples if not given
1572
full_output -- Nonzero to return evaluation grid.
1574
Outputs: (x0, fval, {grid, Jout})
1576
x0 -- Value of arguments giving minimum over the grird
1577
fval -- Function value at minimum
1578
grid -- tuple with same length as x0 representing the
1580
Jout -- Function values over grid: Jout = func(*grid)
1584
raise ValueError, "Brute Force not possible with more than 40 variables."
1585
lrange = list(ranges)
1587
if type(lrange[k]) is not type(slice(None)):
1588
if len(lrange[k]) < 3:
1589
lrange[k] = tuple(lrange[k]) + (complex(Ns),)
1590
lrange[k] = slice(*lrange[k])
1594
def _scalarfunc(*params):
1595
params = squeeze(asarray(params))
1596
return func(params,*args)
1598
vecfunc = vectorize(_scalarfunc)
1599
grid = mgrid[lrange]
1602
Jout = vecfunc(*grid)
1603
Nshape = shape(Jout)
1604
indx = argmin(Jout.flat)
1607
for k in range(N-1,-1,-1):
1609
Nindx[k] = indx % Nshape[k]
1612
xmin[k] = grid[k][tuple(Nindx)]
1614
Jmin = Jout[tuple(Nindx)]
1618
if callable(finish):
1619
vals = finish(func,xmin,args=args,full_output=1, disp=0)
1623
print "Warning: Final optimization did not succeed"
1625
return xmin, Jmin, grid, Jout
1630
if __name__ == "__main__":
1638
print "Nelder-Mead Simplex"
1639
print "==================="
1643
times.append(time.time() - start)
1644
algor.append('Nelder-Mead Simplex\t')
1647
print "Powell Direction Set Method"
1648
print "==========================="
1650
x = fmin_powell(rosen,x0)
1652
times.append(time.time() - start)
1653
algor.append('Powell Direction Set Method.')
1656
print "Nonlinear CG"
1657
print "============"
1659
x = fmin_cg(rosen, x0, fprime=rosen_der, maxiter=200)
1661
times.append(time.time() - start)
1662
algor.append('Nonlinear CG \t')
1665
print "BFGS Quasi-Newton"
1666
print "================="
1668
x = fmin_bfgs(rosen, x0, fprime=rosen_der, maxiter=80)
1670
times.append(time.time() - start)
1671
algor.append('BFGS Quasi-Newton\t')
1674
print "BFGS approximate gradient"
1675
print "========================="
1677
x = fmin_bfgs(rosen, x0, gtol=1e-4, maxiter=100)
1679
times.append(time.time() - start)
1680
algor.append('BFGS without gradient\t')
1684
print "Newton-CG with Hessian product"
1685
print "=============================="
1687
x = fmin_ncg(rosen, x0, rosen_der, fhess_p=rosen_hess_prod, maxiter=80)
1689
times.append(time.time() - start)
1690
algor.append('Newton-CG with hessian product')
1694
print "Newton-CG with full Hessian"
1695
print "==========================="
1697
x = fmin_ncg(rosen, x0, rosen_der, fhess=rosen_hess, maxiter=80)
1699
times.append(time.time() - start)
1700
algor.append('Newton-CG with full hessian')
1703
print "\nMinimizing the Rosenbrock function of order 3\n"
1704
print " Algorithm \t\t\t Seconds"
1705
print "===========\t\t\t ========="
1706
for k in range(len(algor)):
1707
print algor[k], "\t -- ", times[k]