~ubuntu-branches/ubuntu/saucy/yorick-yutils/saucy

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
/* 
 * lmfit.i --
 *
 *	Non-linear least-squares fit by Levenberg-Marquardt method.
 *
 * Copyright (c) 1997, Eric THIEBAUT (thiebaut@obs.univ-lyon1.fr, Centre de
 * Recherche Astrophysique de Lyon, 9 avenue Charles  Andre,  F-69561 Saint
 * Genis Laval Cedex).
 *
 * This program is free software; you can redistribute it and/or  modify it
 * under the terms of the GNU General Public License  as  published  by the
 * Free Software Foundation; either version 2 of the License,  or  (at your
 * option) any later version.
 *
 * This program is distributed in the hope  that  it  will  be  useful, but
 * WITHOUT  ANY   WARRANTY;   without   even   the   implied   warranty  of
 * MERCHANTABILITY or  FITNESS  FOR  A  PARTICULAR  PURPOSE.   See  the GNU
 * General Public License for more details (to receive a  copy  of  the GNU
 * General Public License, write to the Free Software Foundation, Inc., 675
 * Mass Ave, Cambridge, MA 02139, USA).
 *
 * History:
 *	$Id: lmfit.i,v 1.1 2007-12-11 23:55:12 frigaut Exp $
 *	$Log: lmfit.i,v $
 *	Revision 1.1  2007-12-11 23:55:12  frigaut
 *	Initial revision
 *
 *	Revision 1.4  2003/06/17 12:17:10  eric
 *	 - fix doc.
 *
 *	Revision 1.3  1998/09/08 15:31:06  eric
 *	 - Make sure that input parameters have floating-point values.
 *
 *	Revision 1.2  1997/07/28 08:26:25  eric
 *	 - Fix the doc.
 *
 *	Revision 1.1  1997/04/21 08:34:04  eric
 *	Initial revision
 *-----------------------------------------------------------------------------
 */

require, "random.i";

struct lmfit_result {
/* DOCUMENT lmfit_result -- structure returned by lmfit
 */
    long	neval;
    long	niter;
    long	nfit;
    long	nfree;
    long	monte_carlo;
    double	chi2_first;
    double	chi2_last;
    double	conv;
    double	sigma;
    double	lambda;
    pointer	stdev;
    pointer	stdev_monte_carlo;
    pointer	correl;
}

func lmfit(f, x, &a, y, w, fit=, correl=, stdev=, gain=, tol=, deriv=, itmax=,
	lambda=, eps=, monte_carlo=)
/* DOCUMENT lmfit -- Non-linear least-squares fit by Levenberg-Marquardt
                     method.

   DESCRIPTION:
     Implement Levenberg-Marquardt method  to  perform  a  non-linear least
     squares fit to a function of an arbitrary number of  parameters.   The
     function  may  be  any  non-linear  function.   If  available, partial
     derivatives can be calculated by the user function, else  this routine
     will  estimate  partial   derivatives   with   a   forward  difference
     approximation.

   CATEGORY:
     E2 - Curve and Surface Fitting.

   SYNTAX:
     result= lmfit(f, x, a, y, w, ...);

   INPUTS:
     F:  The model function  to  fit.   The  function  must  be  written as
         described under RESTRICTIONS, below.
     X:  Anything useful for the model function, for  instance: independent
         variables, a complex structure of  data  or  even  nothing!.   The
         LMFIT routine does not manipulate or use values  in  X,  it simply
         passes X to the user-written function F.
     A:  A vector that contains the initial estimate for each parameter.
     Y:  Array of dependent variables (i.e., the  data).   Y  can  have any
         geometry, but it must be the same as the result returned by F.
     W:  Optional weight,  must be conformable  with Y and all  values of W
         must be positive  or null (default = 1.0).   Data points with zero
	 weight are not fitted.	 Here are some examples:
           - For no weighting (lest square fit): W = 1.0
           - For instrumental weighting: W(i) = 1.0/Y(i)
           - Gaussian noise: W(i) = 1.0/Var(Y(i))

   OUTPUTS:
     A:  The vector of fitted parameters.
     Returns a structure lmfit_result with fields:
       NEVAL:  (long) number of model function evaluations.
       NITER:  (long) number of iteration, i.e. successful CHI2 reductions.
       NFIT:   (long) number of fitted parameters.
       NFREE:  (long) number of degrees of freedom  (i.e.,  number of valid
               data points minus number of fitted parameters).
       MONTE_CARLO: (long) number of Monte Carlo simulations.
       CHI2_FIRST: (double) starting error value: CHI2=sum(W*(F(X,A)-Y)^2).
       CHI2_LAST: (double) last best error value: CHI2=sum(W*(F(X,A)-Y)^2).
       CONV:   (double) relative variation of CHI2.
       SIGMA:  (double) estimated uniform standard deviation of data.  If a
               weight is provided, a  value  of  SIGMA  different  from one
               indicates  that,  if  the  model  is  correct,  W  should be
               multiplied    by     1/SIGMA^2.      Computed     so    that
               sum(W*(F(X,A)-Y)^2)/SIGMA^2=NFREE.
       LAMBDA: (double) last value of LAMBDA.
       STDEV:  (pointer) standard deviation vector of the parameters.
       STDEV_MONTE_CARLO: (pointer)  standard   deviation  vector  of  the
               parameters estimated by Monte Carlo simulations.
       CORREL: (pointer) correlation matrice of the parameters.

   KEYWORDS:
     FIT: List  of  indices  of  parameters  to  fit,  the  others  remaing
          constant.  The default is to tune all parameters.
     CORREL: If  set  to a  non  zero and  non-nil  value, the  correlation
          matrice of the parameters is stored into LMFIT result.
     STDEV: If set to a non zero and non-nil value, the standard deviation
          vector of the parameters is stored into LMFIT result.
     DERIV: When set to a non zero and non-nil  value,  indicates  that the
          model function F is able to compute its derivatives  with respect
          to  the  parameters   (see   RESTRICTIONS).    By   default,  the
          derivatives will be estimated by LMFIT using  forward difference.
          If analytical derivatives are  available  they  should  always be
          used.
     EPS: Small positive value  used  to  estimate  derivatives  by forward
          difference.  Must be such that 1.0+EPS  and  1.0  are numerically
          different  and   should   be   about  sqrt(machine_precision)/100
	  (default = 1e-6).
     TOL: Stop criteria for the convergence (default =  1e-7).   Should not
          be smaller  than  sqrt(machine_precision).   The  routine returns
          when  the  relative  decrease of  CHI2 is  less  than  TOL  in an
          interation.
     ITMAX: Maximum number of iterations. Default = 100.
     GAIN: Gain factor for tuning LAMBDA (default = 10.0).
     LAMBDA: Starting value for parameter LAMBDA (default = 1.0e-3).
     MONTE_CARLO: Number of Monte Carlo  simulations to perform to estimate
          standard  deviation  of  parameters  (by default  no Monte  Carlo
	  simulations are undergone).  May spend a lot of time if you use a
	  large number; but should not be too small!

   GLOBAL VARIABLES:
     None.

   SIDE EFFECTS:
     The values of the vector of parameters A are modified.

   PROCEDURE:
     The function to be fitted must be defined as follow:

       func F(x, a) {....}

     and returns a model with same shape as data Y.  If you want to provide
     analytic derivatives, F should be defined as:

       func F(x, a, &grad, deriv=)
       {
           y= ...;
	   if (deriv) {
	       grad= ...;
	   }
	   return y;
       }

     Where X are the independent variables (anything the function  needs to
     compute synthetic data except the model parameters), A  are  the model
     parameters, DERIV is  a  flag  set  to  non-nil  and  non-zero  if the
     gradient is needed and the output gradient GRAD  is  a  numberof(Y) by
     numberof(A) array: GRAD(i,j) = derivative of ith data point model with
     respect to jth parameter.

     LMFIT tune  parameters A so as to minimize:  CHI2=sum(W*(F(X,A)-Y)^2).
     The  Levenberg-Marquardt  method   consists  in  varying  between  the
     inverse-Hessian  method  and the  steepest  descent  method  where the
     quadratic  expansion of  CHI2  does  not  yield  a better  model.  The
     initial  guess of  the parameter  values  should  be as  close to  the
     actual values as possible or the solution may not converge or may give
     a wrong answer.

   RESTRICTIONS:
     Beware that  the result  does depend on your  initial guess A.  In the
     case of  numerous  local  minima,  the only  way to  get  the  correct
     solution is to start with A close enough to this solution.
     
     The estimates of  standard  deviation of the  parameters are  rescaled
     assuming that, for a correct model  and weights, the expected value of
     CHI2 should  be of the  order of NFREE=numberof(Y)-numberof(A)  (LMFIT
     actually compute NFREE from the number of valid data points and number
     of fitted parameters).   If you don't like this you'll have to rescale
     the returned  standard  deviation  to meet  your needs  (all necessary
     information are in the structure returned by LMFIT).

   EXAMPLE:
     This example is from ODRPACK (version 2.01).  The function to fit is
     of the form:
       f(x) = a1+a2*(exp(a3*x)-1.0)^2
     Starting guess:
       a= [1500.0,  -50.0,   -0.1];
     Independent variables:
       x= [   0.0,    0.0,    5.0,    7.0,    7.5,   10.0,
             16.0,   26.0,   30.0,   34.0,   34.5,  100.0];
     Data:
       y= [1265.0, 1263.6, 1258.0, 1254.0, 1253.0, 1249.8,
           1237.0, 1218.0, 1220.6, 1213.8, 1215.5, 1212.0];
     Function definition (without any optimization):
       func foo(x, a, &grad, deriv=)
       {
           if (deriv)
	       grad= [array(1.0, dimsof(x)),
	              (exp(a(3)*x)-1.0)^2,
	              2.0*a(2)*x*exp(a(3)*x)*(exp(a(3)*x)-1.0)];
           return a(1)+a(2)*(exp(a(3)*x)-1.0)^2;
       }
       
     Fitting this model by:
       r= lmfit(foo, x, a, y, 1., deriv=1, stdev=1, monte_carlo=500, correl=1)
     produces typically the following result:
        a                   = [1264.84, -54.9987, -0.0829835]
        r.neval             = 12
        r.niter             = 6
        r.nfit              = 3
        r.nfree             = 9
        r.monte_carlo       = 500
        r.chi2_first        = 40.4383
        r.chi2_last         = 40.4383
        r.conv              = 3.84967e-09
        r.sigma             = 0.471764
        r.lambda            = 1e-09
       *r.stdev             = [1.23727, 1.78309, 0.00575123]
       *r.stdev_monte_carlo = [1.20222, 1.76120, 0.00494790]
       *r.correl            = [[ 1.000, -0.418, -0.574],
                               [-0.418,  1.000, -0.340],
			       [-0.574, -0.340,  1.000]]

   HISTORY:
     - Basic ideas borrowed from "Numerical Recipes in C", CURVEFIT.PRO (an
       IDL version by DMS, RSI, of the routine "CURFIT: least squares fit to
       a non-linear function", Bevington, Data Reduction and Error Analysis
       for the Physical Sciences) and ODRPACK ("Software for Weigthed
       Orthogonal Distance Regression" freely available at: www.netlib.org).
     - Added: fitting of a subset of the parameters, Monte-Carlo
       simulations...
 */
{
    local grad;

    /* Maybe subset of parameters to fit. */
    if (structof(a)!=double) {
	a+= 0.0;
	if (structof(a)!=double)
	    error, "bad data type for parameters (complex unsupported)";
    }
    na= numberof(a);
    if (is_void(fit))
	fit= indgen(na);
    else if (dimsof(fit)(1) == 0)
	fit= [fit];
    nfit= numberof(fit);
    if (!nfit)
	error, "no parameters to fit.";
    
    /* Check weights. */
    if (is_void(w)) w= 1.0;
    else if (anyof(w < 0.0))
	error, "bad weights.";
    if (numberof(w) != numberof(y))
	w += array(0.0, dimsof(y));
    nfree= sum(w != 0.0) - nfit;	// Degrees of freedom
    if (nfree <= 0)
	error, "not enough data points.";

    /* Other settings. */
    diag= indgen(1:nfit^2:nfit+1);	// Subscripts of diagonal elements
    if (is_void(lambda)) lambda= 1e-3;
    if (is_void(gain)) gain= 10.0;
    if (is_void(itmax)) itmax= 100;
    if (is_void(eps)) eps= 1e-6;	// sqrt(machine_precision)/100
    if (1.0+eps <= 1.0)
	error, "bad value for EPS.";
    if (is_void(tol)) tol= 1e-7;
    monte_carlo= is_void(monte_carlo) ? 0 : long(monte_carlo);
    warn_zero= 0;
    warn= "*** Warning: LMFIT ";
    neval= 0;
    conv= 0.0;
    niter= 0;
    
    while (1) {
	if (deriv) {
	    m= f(x, a, grad, deriv=1);
	    neval++;
	    grad= nfit == na ? grad(*,) : grad(*,fit);
	} else {
	    if (!niter) {
		m= f(x, a);
		neval++;
	    }
	    inc= eps * abs(a(fit));
	    if (numberof((i= where(inc <= 0.0)))) inc(i)= eps;
	    grad= array(double, numberof(y), nfit);
	    for (i=1; i<=nfit; i++) {
		anew= a;	// Copy current parameters
		anew(fit(i)) += inc(i);
		grad(,i)= (f(x,anew)-m)(*)/inc(i);
	    }
	    neval += nfit;
	}
	beta= w * (chi2= y-m);
	if (niter) chi2= chi2new;
	else       chi2= chi2_first= sum(beta * chi2);
	beta= grad(+,) * beta(*)(+);
	alpha= ((w(*)(,-) * grad)(+,) * grad(+,));
	gamma= sqrt(alpha(diag));
	if (anyof(gamma <= 0.0)) {
	    /* Some derivatives are null (certainly because of rounding
	     * errors). */
	    if (!warn_zero) {
		write, warn+"founds zero derivatives.";
		warn_zero= 1;
	    }
	    gamma(where(gamma <= 0.0))= eps * max(gamma);
	    /* goto done; */
	}
	gamma= 1.0 / gamma;
	beta *= gamma;
	alpha *= gamma(,-) * gamma(-,);
	
	while (1) {
	    alpha(diag)= 1.0 + lambda;
	    anew= a;
	    anew(fit) += gamma * LUsolve(alpha, beta);
	    m= f(x, anew);
	    neval++;
	    d= y-m;
	    chi2new= sum(w*d*d);
	    if (chi2new < chi2)
		break;
	    lambda *= gain;
	    if (allof(anew == a)) {
		/* No change in parameters. */
		write, warn+"makes no progress.";
		goto done;
	    }
	}
	a= anew;
	lambda /= gain;
	niter++;
	conv= 2.0*(chi2-chi2new)/(chi2+chi2new);
	if (conv <= tol)
	    break;
	if (niter >= itmax) {
	    write, format=warn+"reached maximum number of iterations (%d).\n",
		itmax;
	    break;
	}
    }
    
done:
    sigma= sqrt(nfree/chi2);
    result= lmfit_result(neval=neval, niter=niter, nfree=nfree, nfit=nfit,
	    lambda=lambda, chi2_first=chi2_first, chi2_last=chi2, conv=conv,
	    sigma=sigma);
    if (correl || stdev) {
	/* Compute correlation matrice and/or standard deviation vector. */
	alpha(diag)= 1.0;
	alpha= LUsolve(alpha);
	if (anyof((tmp1= alpha(diag)) < 0.0))
	    write, format=warn+"%s\n", "found negative variance(s)";
	tmp1= sqrt(abs(tmp1));
	if (stdev) {
	    /* Standard deviation is rescaled assuming that statistically
	     * chi2 = nfree +/- sqrt(2*nfree). */
	    (tmp2= array(double,na))(fit)= gamma * tmp1 / sigma;
	    result.stdev= &tmp2;
	}
	if (correl) {
	    gamma= 1.0 / tmp1;
	    alpha *= gamma(-,) * gamma(,-);
	    if (nfit == na) {
		result.correl= &alpha;
	    } else {
		(tmp2= array(double, na, na))(fit,fit)= alpha;
		result.correl= &tmp2;
	    }
	}
    }
    alpha= beta= gamma= [];	// Free some memory.
    if (monte_carlo >= 1) {
	saa= 0.0*a;
	sig= (w > 0.0) /(sqrt(max(nfree/chi2*w, 0.0)) + (w == 0.0));
	for (i=1; i<=monte_carlo; i++) {
	    anew= a;
	    ynew= y + sig * random_n(dimsof(y));
	    lmfit, f, x, anew, ynew, w, fit=fit, gain=gain, tol=tol,
		deriv=deriv, itmax=itmax, lambda=lambda, eps=eps;
	    anew -= a;
	    saa += anew * anew;
	}
	result.monte_carlo= monte_carlo;
	result.stdev_monte_carlo= &sqrt(saa / monte_carlo);
    }
    return result;
}