1
{ **********************************************************************
2
* Program REGMULT.PAS *
4
* (c) J. Debord, August 2000 *
5
**********************************************************************
6
This program performs a weighted multiple linear least squares fit :
8
y = b0 + b1 * x1 + b2 * x2 + ...
10
The following parameters are passed on the command line :
12
1st parameter = name of input file (default extension = .DAT)
13
2nd parameter = 1 if the equation includes a constant term b0
15
Input files are ASCII files with the following structure :
17
Line 1 : Title of study
18
Line 2 : Number of variables (must be >= 2 here !)
19
Next lines : Names of variables x1, x2, ..., y
20
Next line : Number of observations (must be > number of variables !)
22
The next lines contain the coordinates (x1, x2, ..., y) of the
23
observations (1 observation by line). The coordinates must be
24
separated by spaces or tabulations.
26
The file INHIB.DAT is an example of data relating the inhibition of an
27
enzyme to the physico-chemical properties of the inhibitors (J. DEBORD,
28
P. N'DIAYE, J. C. BOLLINGER et al, J. Enzyme Inhib., 1997, 12, 13-26).
29
The program parameters are : INHIB 1
31
The program may be executed from Turbo Pascal's integrated environment,
32
in which case the parameters are entered through the "Parameters" option
33
of the menu, or from DOS (after compilation into an executable file),
34
in which case the parameters are entered on the command line (e.g.
36
********************************************************************** }
41
SysUtils,FMath, Matrices, Regress, Models, PaString,messages,dialogs,classes,define_types;
48
// TIVra = array [1..kMaxFact,1..kMaxObs] of integer;
50
mrix,mriy,mriz,fobx,foby,fobz: integer;
52
function MultipleRegression (lnObservations,lnFactors: integer; var X: PMatrix; var lImgIntensity: DoubleP0; var lOutT: DoubleP0): boolean;
53
function MultipleRegressionVec (lnObservations,lnFactors: integer; var X: PMatrix; var Y: PVector; var lOutT,lOutSlope: DoubleP0): boolean;
56
// gMRIFOBra: array [1..kMaxRA] of SpaceType;
57
// gCoregRA: array[1..3,0..3] of double; {MRIx,y,z, Offset,FOBx,FOBy,FOBz}
61
InFName : String; { Name of input file }
62
Title : String; { Title of study }
63
XName : PStrVector; { Names of independent variables }
64
YName : String; { Name of dependent variable }
65
N : Integer; { Number of observations }
66
X : PMatrix; { Matrix of independent variables }
67
Y : PVector; { Vector of dependent variable }
68
Z : PVector; { Vector of independent variable (not used here) }
69
Ycalc : PVector; { Expected Y values }
70
S : PVector; { Standard deviations of Y values }
71
CstPar : PVector; { Constant parameters }
72
B : PVector; { Regression parameters }
73
B_min, B_max : PVector; { Parameter bounds (not used, but must be
74
declared in order to use the WLSFit routine ) }
75
V : PMatrix; { Variance-covariance matrix of regression parameters }
76
Theta : PVector; { Variance parameters }
77
RegTest : TRegTest; { Regression tests }
78
gErrCode : Integer; { Error code }
81
(* procedure ReadCmdLine(var InFName : String; var CstPar : PVector);
82
{ ----------------------------------------------------------------------
83
Reads command line parameters. Stores constant parameters in CstPar,
86
CstPar^[0] = Number of independent variables
87
(this one is set by ReadInputFile)
88
CstPar^[1] = 1 to include a constant term (b0)
90
The contents of CstPar are defined in the unit FITMULT.PAS,
91
in the subdirectory REG of the TP Math units directory.
92
---------------------------------------------------------------------- }
98
{ Name of input file }
99
InFName := ParamStr(1);
100
if Pos('.', InFName) = 0 then InFName := InFName + '.dat';
102
{ Presence of constant term }
104
Val(ParamStr(2), I, gErrCode);
108
function ReadInputFile(InFName : String;
110
var XName : PStrVector;
115
CstPar : PVector) : Integer;
117
InF : Textfile; { Input file }
118
Nvar : Integer; { Nb of independent variables }
119
I, K : Integer; { Loop variables }
121
Assign(InF, InFName);
125
ReadLn(InF, Nvar); { Total number of variables }
128
showmessage('Data file must contain at least 2 variables !');
129
ReadInputFile := - 1;
133
showmessage('trap3x'+inttostr(NVar));
134
DimStrVector(XName, Nvar);{crashes here}
135
showmessage('trap4x'+inttostr(NVar));
136
for I := 1 to Nvar do begin
137
ReadLn(InF, XName^[I]);
138
showmessage(XName^[I]);
144
DimMatrix(X, Nvar, N);
149
for I := 1 to Nvar do
150
Read(InF, X^[I]^[K]);
159
procedure WriteOutputFile(InFName, Title : String;
163
Y, CstPar, Ycalc, S, B : PVector;
167
OutFName : String; { Name of output file }
168
OutF : TextFile; { Output file }
170
Line2 : String; { Separating lines }
171
Nvar : Integer; { Nb of independent variables }
172
Delta : Float; { Residual }
173
Sr : Float; { Residual error }
174
SB : PVector; { Standard deviations of parameters }
175
T : PVector; { Student's t }
176
Prob : PVector; { Probabilities }
177
I, K : Integer; { Loop variables }
179
Nvar := Round(CstPar^[0]);
181
DimVector(SB, LastParam);
182
DimVector(T, LastParam);
183
DimVector(Prob, LastParam);
185
K := Pos('.', InFName);
186
OutFName := Copy(InFName, 1, Pred(K)) + '.out';
187
Assign(OutF, OutFName);
190
Line1 := StrChar(73, '-');
191
Line2 := StrChar(73, '=');
193
WriteLn(OutF, Line2);
194
WriteLn(OutF, 'Data file : ', InFName);
195
WriteLn(OutF, 'Study name : ', Title);
196
for I := 1 to Nvar do
197
WriteLn(OutF, 'x', I:1, ' : ', XName^[I]);
198
WriteLn(OutF, 'y : ', YName);
199
WriteLn(OutF, 'Function : ', FuncName);
201
{ Perform tests on parameters }
202
ParamTest(B, V, N, FirstParam, LastParam, SB, T, Prob);
204
WriteLn(OutF, Line1);
205
WriteLn(OutF, 'Parameter Est.value Std.dev. t Student Prob(>|t|)');
206
WriteLn(OutF, Line1);
207
showmessage(inttostr(nVar)+':'+inttostr(FirstParam)+':'+inttostr(LastParam));
208
for I := FirstParam to LastParam do
210
WriteLn(OutF, ParamName(I):5, B^[I]:17:8, SB^[I]:17:8, T^[I]:17:2, Prob^[I]:17:4)
212
WriteLn(OutF, ParamName(I):5, B^[I]:17:8);
214
WriteLn(OutF, Line1);
215
WriteLn(OutF, 'Number of observations : n = ', N:5);
220
WriteLn(OutF, 'Residual error : s = ', Sr:10:8);
221
if (R2 >= 0.0) and (R2 <= 1.0) then
222
WriteLn(OutF, 'Coefficient of determination : r2 = ', R2:10:8);
223
if (R2a >= 0.0) and (R2a <= 1.0) then
224
WriteLn(OutF, 'Adjusted coeff. of determination : r2a = ', R2a:10:8);
225
Write(OutF, 'Variance ratio (explained/resid.) : F = ', F:10:4);
226
WriteLn(OutF, ' Prob(>F) = ', Prob:6:4);
229
WriteLn(OutF, Line1);
230
WriteLn(OutF, ' i Y obs. Y calc. Residual Std.dev. Std.res.');
231
WriteLn(OutF, Line1);
235
Delta := Y^[K] - Ycalc^[K];
236
WriteLn(OutF, K:3, Y^[K]:14:4, Ycalc^[K]:14:4, Delta:14:4, S^[K]:14:4, (Delta / S^[K]):14:4);
238
WriteLn(OutF, Line2);
241
Showmessage('Results written to file '+OutFName);
243
DelVector(SB, LastParam);
244
DelVector(T, LastParam);
245
DelVector(Prob, LastParam);
248
{ *************************** Main program ***************************** }
251
{ Read command line parameters }
252
//ReadCmdLine(InFName, CstPar);
253
InFName := 'C:\inhib.dat';
254
DimVector(CstPar, 1);
258
if ReadInputFile(InFName, Title, XName, YName, N, X, Y, CstPar) <> 0 then
260
showmessage('Error reading file '+ InFName);
263
{ Initialize regression and variance models.
264
See MODELS.PAS in the REG subdirectory for a list of available models }
266
VAR_CONST, { Here we use a constant variance }
269
{ Set the regression algorithm which must be GAUSS_JORDAN or SVD.
270
The default algorithm is SVD. Comment off the following line if
271
you wish to change the algorithm. }
273
{ SetRegAlgo(GAUSS_JORDAN); }
276
Note: the variance parameters Theta^[1]..Theta^[LastVarParam]
277
must be supplied if we use a non-constant variance model }
278
DimVector(Theta, LastVarParam);
279
DimVector(B, LastParam);
280
DimMatrix(V, LastParam, LastParam);
284
{ Perform regression. The numbers 1 and 0.1 denote the maximal number
285
of iterations and the tolerance on the parameters. They are purely
286
formal values here since the multiple linear regression does not use
287
an iterative minimization algorithm. }
288
gErrCode := WLSFit(Z, X, Y, N, True, 1, 0.1, Theta, B,
289
B_min, B_max, V, Ycalc, S, RegTest);
293
MAT_OK : WriteOutputFile(InFName, Title, XName, YName,
294
N, Y, CstPar, Ycalc, S, B, V, RegTest);
295
MAT_SINGUL : WriteLn('Singular matrix !');
296
MAT_NON_CONV : WriteLn('Non-convergence of SVD algorithm !');
301
//ComputeRegress(lnObservations,lnFactors, Y, CstPar, Ycalc, S, B, V, lRegTest);
302
procedure ComputeRegress (N,lnFactors : Integer;
303
var Y, CstPar, Ycalc, S, B : PVector;
305
var Test : TRegTest; var lOutT: DoubleP0);
308
SB : PVector; { Standard deviations of parameters }
309
T : PVector; { Student's t }
310
Prob : PVector; { Probabilities }
312
DimVector(SB, LastParam);
313
DimVector(T, LastParam);
314
DimVector(Prob, LastParam);
315
{ Perform tests on parameters }
316
ParamTest(B, V, N, FirstParam, LastParam, SB, T, Prob);
317
for I := 0 to (lnFactors-1) do
318
lOutT[I] := T^[FirstParam+I+1];//first parameter is global fit
320
lOutT[lnFactors] := T^[FirstParam];//global fit
322
//for I := FirstParam to LastParam do
323
// Showmessage(floattostr(T^[I]) );
324
DelVector(SB, LastParam);
325
DelVector(T, LastParam);
326
DelVector(Prob, LastParam);
330
(* procedure ScreenOutputFile(
332
N,ldimension : Integer;
333
var Y, CstPar, Ycalc, S, B : PVector;
336
var lDynStr: String);
338
lA,lB,lC,lD : String; { Name of output file }
339
Nvar : Integer; { Nb of independent variables }
340
Delta : Float; { Residual }
341
Sr : Float; { Residual error }
342
SB : PVector; { Standard deviations of parameters }
343
T : PVector; { Student's t }
344
Prob : PVector; { Probabilities }
345
I, K : Integer; { Loop variables }
347
Nvar := Round(CstPar^[0]);
349
DimVector(SB, LastParam);
350
DimVector(T, LastParam);
351
DimVector(Prob, LastParam);
352
{ Perform tests on parameters }
353
ParamTest(B, V, N, FirstParam, LastParam, SB, T, Prob);
354
lDynStr:=lDynStr+'|'+( 'Parameter Est.value Std.dev. t Student Prob(>|t|)');
355
//showmessage(inttostr(nVar)+':'+inttostr(FirstParam)+':'+inttostr(LastParam));
356
for I := FirstParam to LastParam do begin
357
if SB^[I] > 0.0 then begin
361
Str(Prob^[I]:17:4,lD);
362
lDynStr:=lDynStr+'|'+(ParamName(I)+lA+lB+'T='+lC+lD);
366
lDynStr:=lDynStr+'|'+(ParamName(I)+lA);
368
//gCoregRA[lDImension,I]:= B^[I];
370
DelVector(SB, LastParam);
371
DelVector(T, LastParam);
372
DelVector(Prob, LastParam);
376
//function PredictData(lnObservations: integer; var lStr: tstringlist): boolean;
377
function MultipleRegression (lnObservations,lnFactors: integer; var X: PMatrix; var lImgIntensity: DoubleP0; var lOutT: DoubleP0): boolean;
379
K : Integer; { Nb of independent variables }
380
//X : PMatrix; { Matrix of independent variables }
381
Y : PVector; { Vector of dependent variable }
382
Z : PVector; { Vector of independent variable (not used here) }
383
Ycalc : PVector; { Expected Y values }
384
S : PVector; { Standard deviations of Y values }
385
CstPar : PVector; { Constant parameters }
386
B : PVector; { Regression parameters }
387
B_min, B_max : PVector; { Parameter bounds (not used, but must be
388
declared in order to use the WLSFit routine ) }
389
V : PMatrix; { Variance-covariance matrix of regression parameters }
390
Theta : PVector; { Variance parameters }
391
lRegTest : TRegTest; { Regression tests }
392
gErrCode : Integer; { Error code }
395
if lnObservations < 5 then begin
396
showmessage('At least 5 samples required for 3D registration.');
399
DimVector(CstPar, 1);
400
DimVector(Y, lnObservations);
402
CstPar^[0] := lnFactors;
403
for K := 1 to lnObservations do
404
Y^[K] := lImgIntensity[K-1];
405
{ Initialize regression and variance models.}
406
InitModel(REG_MULT,VAR_CONST,{ Here we use a constant variance }CstPar);
407
{ Set the regression algorithm which must be GAUSS_JORDAN or SVD.
408
The default algorithm is SVD. Comment off the following line if
409
you wish to change the algorithm. }
410
{ SetRegAlgo(GAUSS_JORDAN); }
411
DimVector(Theta, LastVarParam);
412
DimVector(B, LastParam);
413
DimMatrix(V, LastParam, LastParam);
414
DimVector(Ycalc, lnObservations);
415
DimVector(S, lnObservations);
416
{ Perform regression. The numbers 1 and 0.1 denote the maximal number
417
of iterations and the tolerance on the parameters. They are purely
418
formal values here since the multiple linear regression does not use
419
an iterative minimization algorithm. }
420
gErrCode := WLSFit(Z, X, Y, lnObservations, True, 1, 0.1, Theta, B,B_min, B_max, V, Ycalc, S, lRegTest);
422
//showmessage(inttostr(xx));
425
//ScreenOutputFile({XName,}YName,lnObservations,lDim, Y, CstPar, Ycalc, S, B, V, lRegTest,lStr);
427
ComputeRegress(lnObservations,lnFactors, Y, CstPar, Ycalc, S, B, V, lRegTest,lOutT);
429
{ MAT_OK : WriteOutputFile(InFName, Title, XName, YName,
430
N, Y, CstPar, Ycalc, S, B, V, RegTest);
431
} MAT_SINGUL : Showmessage('Singular matrix !');
432
MAT_NON_CONV : Showmessage('Non-convergence of SVD algorithm !');
434
DelVector(CstPar, 1);
435
DelVector(Y, lnObservations);
436
//DelStrVector(XName,lnXFactors);
438
DelVector(Theta, LastVarParam);
439
DelVector(B, LastParam);
440
DelMatrix(V, LastParam, LastParam);
441
DelVector(Ycalc, lnObservations);
442
DelVector(S, lnObservations);
447
function MultipleRegressionVec (lnObservations,lnFactors: integer; var X: PMatrix; var Y: PVector; var lOutT,lOutSlope: DoubleP0): boolean;
449
K : Integer; { Nb of independent variables }
450
Z : PVector; { Vector of independent variable (not used here) }
451
Ycalc : PVector; { Expected Y values }
452
S : PVector; { Standard deviations of Y values }
453
CstPar : PVector; { Constant parameters }
454
B : PVector; { Regression parameters }
455
B_min, B_max : PVector; { Parameter bounds (not used, but must be
456
declared in order to use the WLSFit routine ) }
457
V : PMatrix; { Variance-covariance matrix of regression parameters }
458
Theta : PVector; { Variance parameters }
459
lRegTest : TRegTest; { Regression tests }
460
gErrCode : Integer; { Error code }
463
if lnObservations < 5 then begin
464
showmessage('At least 5 samples required for 3D registration.');
467
DimVector(CstPar, 1);
469
CstPar^[0] := lnFactors;
470
{ Initialize regression and variance models.}
471
InitModel(REG_MULT,VAR_CONST,{ Here we use a constant variance }CstPar);
472
{ Set the regression algorithm which must be GAUSS_JORDAN or SVD.
473
The default algorithm is SVD. Comment off the following line if
474
you wish to change the algorithm. }
475
{ SetRegAlgo(GAUSS_JORDAN); }
476
DimVector(Theta, LastVarParam);
477
DimVector(B, LastParam);
478
DimMatrix(V, LastParam, LastParam);
479
DimVector(Ycalc, lnObservations);
480
DimVector(S, lnObservations);
481
{ Perform regression. The numbers 1 and 0.1 denote the maximal number
482
of iterations and the tolerance on the parameters. They are purely
483
formal values here since the multiple linear regression does not use
484
an iterative minimization algorithm. }
485
gErrCode := WLSFit(Z, X, Y, lnObservations, True, 1, 0.1, Theta, B,B_min, B_max, V, Ycalc, S, lRegTest);
487
//showmessage(inttostr(xx));
490
//ScreenOutputFile({XName,}YName,lnObservations,lDim, Y, CstPar, Ycalc, S, B, V, lRegTest,lStr);
492
ComputeRegress(lnObservations,lnFactors, Y, CstPar, Ycalc, S, B, V, lRegTest,lOutT);
494
{ MAT_OK : WriteOutputFile(InFName, Title, XName, YName,
495
N, Y, CstPar, Ycalc, S, B, V, RegTest);
496
} MAT_SINGUL : Showmessage('Singular matrix !');
497
MAT_NON_CONV : Showmessage('Non-convergence of SVD algorithm !');
499
for K := 0 to (lnFactors-1) do
500
lOutSlope^[K] := B^[FirstParam+K+1];//first parameter is global fit
502
lOutSlope^[lnFactors] := B^[FirstParam];//global fit
504
DelVector(CstPar, 1);
505
//DelVector(Y, lnObservations);
506
//DelStrVector(XName,lnXFactors);
508
DelVector(Theta, LastVarParam);
509
DelVector(B, LastParam);
510
DelMatrix(V, LastParam, LastParam);
511
DelVector(Ycalc, lnObservations);
512
DelVector(S, lnObservations);