2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
18
* Copyright (C) 2002 University of Waikato, Hamilton, New Zealand
21
package weka.classifiers.functions;
23
import weka.classifiers.Classifier;
24
import weka.classifiers.functions.pace.ChisqMixture;
25
import weka.classifiers.functions.pace.MixtureDistribution;
26
import weka.classifiers.functions.pace.NormalMixture;
27
import weka.classifiers.functions.pace.PaceMatrix;
28
import weka.core.Capabilities;
29
import weka.core.Instance;
30
import weka.core.Instances;
31
import weka.core.NoSupportForMissingValuesException;
32
import weka.core.Option;
33
import weka.core.OptionHandler;
34
import weka.core.SelectedTag;
36
import weka.core.TechnicalInformation;
37
import weka.core.TechnicalInformationHandler;
38
import weka.core.Utils;
39
import weka.core.WeightedInstancesHandler;
40
import weka.core.WekaException;
41
import weka.core.Capabilities.Capability;
42
import weka.core.TechnicalInformation.Field;
43
import weka.core.TechnicalInformation.Type;
44
import weka.core.matrix.DoubleVector;
45
import weka.core.matrix.IntVector;
47
import java.util.Enumeration;
48
import java.util.Vector;
51
<!-- globalinfo-start -->
52
* Class for building pace regression linear models and using them for prediction. <br/>
54
* Under regularity conditions, pace regression is provably optimal when the number of coefficients tends to infinity. It consists of a group of estimators that are either overall optimal or optimal under certain conditions.<br/>
56
* The current work of the pace regression theory, and therefore also this implementation, do not handle: <br/>
58
* - missing values <br/>
59
* - non-binary nominal attributes <br/>
60
* - the case that n - k is small where n is the number of instances and k is the number of coefficients (the threshold used in this implmentation is 20)<br/>
62
* For more information see:<br/>
64
* Wang, Y (2000). A new approach to fitting linear models in high dimensional spaces. Hamilton, New Zealand.<br/>
66
* Wang, Y., Witten, I. H.: Modeling for optimal probability prediction. In: Proceedings of the Nineteenth International Conference in Machine Learning, Sydney, Australia, 650-657, 2002.
68
<!-- globalinfo-end -->
70
<!-- technical-bibtex-start -->
73
* @phdthesis{Wang2000,
74
* address = {Hamilton, New Zealand},
76
* school = {Department of Computer Science, University of Waikato},
77
* title = {A new approach to fitting linear models in high dimensional spaces},
81
* @inproceedings{Wang2002,
82
* address = {Sydney, Australia},
83
* author = {Wang, Y. and Witten, I. H.},
84
* booktitle = {Proceedings of the Nineteenth International Conference in Machine Learning},
86
* title = {Modeling for optimal probability prediction},
91
<!-- technical-bibtex-end -->
93
<!-- options-start -->
94
* Valid options are: <p/>
97
* Produce debugging output.
98
* (default no debugging output)</pre>
100
* <pre> -E <estimator>
101
* The estimator can be one of the following:
102
* eb -- Empirical Bayes estimator for noraml mixture (default)
103
* nested -- Optimal nested model selector for normal mixture
104
* subset -- Optimal subset selector for normal mixture
105
* pace2 -- PACE2 for Chi-square mixture
106
* pace4 -- PACE4 for Chi-square mixture
107
* pace6 -- PACE6 for Chi-square mixture
109
* ols -- Ordinary least squares estimator
110
* aic -- AIC estimator
111
* bic -- BIC estimator
112
* ric -- RIC estimator
113
* olsc -- Ordinary least squares subset selector with a threshold</pre>
115
* <pre> -S <threshold value>
116
* Threshold value for the OLSC estimator</pre>
120
* @author Yong Wang (yongwang@cs.waikato.ac.nz)
121
* @author Gabi Schmidberger (gabi@cs.waikato.ac.nz)
122
* @version $Revision: 1.8 $
124
public class PaceRegression
126
implements OptionHandler, WeightedInstancesHandler, TechnicalInformationHandler {
128
/** for serialization */
129
static final long serialVersionUID = 7230266976059115435L;
131
/** The model used */
132
Instances m_Model = null;
134
/** Array for storing coefficients of linear regression. */
135
private double[] m_Coefficients;
137
/** The index of the class attribute */
138
private int m_ClassIndex;
140
/** True if debug output will be printed */
141
private boolean m_Debug;
143
/** estimator type: Ordinary least squares */
144
private static final int olsEstimator = 0;
145
/** estimator type: Empirical Bayes */
146
private static final int ebEstimator = 1;
147
/** estimator type: Nested model selector */
148
private static final int nestedEstimator = 2;
149
/** estimator type: Subset selector */
150
private static final int subsetEstimator = 3;
151
/** estimator type:PACE2 */
152
private static final int pace2Estimator = 4;
153
/** estimator type: PACE4 */
154
private static final int pace4Estimator = 5;
155
/** estimator type: PACE6 */
156
private static final int pace6Estimator = 6;
157
/** estimator type: Ordinary least squares selection */
158
private static final int olscEstimator = 7;
159
/** estimator type: AIC */
160
private static final int aicEstimator = 8;
161
/** estimator type: BIC */
162
private static final int bicEstimator = 9;
163
/** estimator type: RIC */
164
private static final int ricEstimator = 10;
165
/** estimator types */
166
public static final Tag [] TAGS_ESTIMATOR = {
167
new Tag(olsEstimator, "Ordinary least squares"),
168
new Tag(ebEstimator, "Empirical Bayes"),
169
new Tag(nestedEstimator, "Nested model selector"),
170
new Tag(subsetEstimator, "Subset selector"),
171
new Tag(pace2Estimator, "PACE2"),
172
new Tag(pace4Estimator, "PACE4"),
173
new Tag(pace6Estimator, "PACE6"),
174
new Tag(olscEstimator, "Ordinary least squares selection"),
175
new Tag(aicEstimator, "AIC"),
176
new Tag(bicEstimator, "BIC"),
177
new Tag(ricEstimator, "RIC")
181
private int paceEstimator = ebEstimator;
183
private double olscThreshold = 2; // AIC
186
* Returns a string describing this classifier
187
* @return a description of the classifier suitable for
188
* displaying in the explorer/experimenter gui
190
public String globalInfo() {
191
return "Class for building pace regression linear models and using them for "
193
+"Under regularity conditions, pace regression is provably optimal when "
194
+"the number of coefficients tends to infinity. It consists of a group of "
195
+"estimators that are either overall optimal or optimal under certain "
197
+"The current work of the pace regression theory, and therefore also this "
198
+"implementation, do not handle: \n\n"
199
+"- missing values \n"
200
+"- non-binary nominal attributes \n"
201
+"- the case that n - k is small where n is the number of instances and k is "
202
+"the number of coefficients (the threshold used in this implmentation is 20)\n\n"
203
+"For more information see:\n\n"
204
+ getTechnicalInformation().toString();
208
* Returns an instance of a TechnicalInformation object, containing
209
* detailed information about the technical background of this class,
210
* e.g., paper reference or book this class is based on.
212
* @return the technical information about this class
214
public TechnicalInformation getTechnicalInformation() {
215
TechnicalInformation result;
216
TechnicalInformation additional;
218
result = new TechnicalInformation(Type.PHDTHESIS);
219
result.setValue(Field.AUTHOR, "Wang, Y");
220
result.setValue(Field.YEAR, "2000");
221
result.setValue(Field.TITLE, "A new approach to fitting linear models in high dimensional spaces");
222
result.setValue(Field.SCHOOL, "Department of Computer Science, University of Waikato");
223
result.setValue(Field.ADDRESS, "Hamilton, New Zealand");
225
additional = result.add(Type.INPROCEEDINGS);
226
additional.setValue(Field.AUTHOR, "Wang, Y. and Witten, I. H.");
227
additional.setValue(Field.YEAR, "2002");
228
additional.setValue(Field.TITLE, "Modeling for optimal probability prediction");
229
additional.setValue(Field.BOOKTITLE, "Proceedings of the Nineteenth International Conference in Machine Learning");
230
additional.setValue(Field.YEAR, "2002");
231
additional.setValue(Field.PAGES, "650-657");
232
additional.setValue(Field.ADDRESS, "Sydney, Australia");
238
* Returns default capabilities of the classifier.
240
* @return the capabilities of this classifier
242
public Capabilities getCapabilities() {
243
Capabilities result = super.getCapabilities();
246
result.enable(Capability.BINARY_ATTRIBUTES);
247
result.enable(Capability.NUMERIC_ATTRIBUTES);
250
result.enable(Capability.NUMERIC_CLASS);
251
result.enable(Capability.DATE_CLASS);
252
result.enable(Capability.MISSING_CLASS_VALUES);
258
* Builds a pace regression model for the given data.
260
* @param data the training data to be used for generating the
261
* linear regression function
262
* @throws Exception if the classifier could not be built successfully
264
public void buildClassifier(Instances data) throws Exception {
266
// can classifier handle the data?
267
Capabilities cap = getCapabilities();
268
cap.setMinimumNumberInstances(20 + data.numAttributes());
269
cap.testWithFail(data);
271
// remove instances with missing class
272
data = new Instances(data);
273
data.deleteWithMissingClass();
276
* initialize the following
278
m_Model = new Instances(data, 0);
279
m_ClassIndex = data.classIndex();
280
double[][] transformedDataMatrix =
281
getTransformedDataMatrix(data, m_ClassIndex);
282
double[] classValueVector = data.attributeToDoubleArray(m_ClassIndex);
284
m_Coefficients = null;
287
* Perform pace regression
289
m_Coefficients = pace(transformedDataMatrix, classValueVector);
295
* @param matrix_X matrix with observations
296
* @param vector_Y vektor with class values
297
* @return vector with coefficients
299
private double [] pace(double[][] matrix_X, double [] vector_Y) {
301
PaceMatrix X = new PaceMatrix( matrix_X );
302
PaceMatrix Y = new PaceMatrix( vector_Y, vector_Y.length );
303
IntVector pvt = IntVector.seq(0, X.getColumnDimension()-1);
304
int n = X.getRowDimension();
305
int kr = X.getColumnDimension();
307
X.lsqrSelection( Y, pvt, 1 );
308
X.positiveDiagonal( Y, pvt );
310
PaceMatrix sol = (PaceMatrix) Y.clone();
311
X.rsolve( sol, pvt, pvt.size() );
312
DoubleVector r = Y.getColumn( pvt.size(), n-1, 0);
313
double sde = Math.sqrt(r.sum2() / r.size());
315
DoubleVector aHat = Y.getColumn( 0, pvt.size()-1, 0).times( 1./sde );
317
DoubleVector aTilde = null;
318
switch( paceEstimator) {
320
case nestedEstimator:
321
case subsetEstimator:
322
NormalMixture d = new NormalMixture();
323
d.fit( aHat, MixtureDistribution.NNMMethod );
324
if( paceEstimator == ebEstimator )
325
aTilde = d.empiricalBayesEstimate( aHat );
326
else if( paceEstimator == ebEstimator )
327
aTilde = d.subsetEstimate( aHat );
328
else aTilde = d.nestedEstimate( aHat );
333
DoubleVector AHat = aHat.square();
334
ChisqMixture dc = new ChisqMixture();
335
dc.fit( AHat, MixtureDistribution.NNMMethod );
337
if( paceEstimator == pace6Estimator )
338
ATilde = dc.pace6( AHat );
339
else if( paceEstimator == pace2Estimator )
340
ATilde = dc.pace2( AHat );
341
else ATilde = dc.pace4( AHat );
342
aTilde = ATilde.sqrt().times( aHat.sign() );
345
aTilde = aHat.copy();
351
if(paceEstimator == aicEstimator) olscThreshold = 2;
352
else if(paceEstimator == bicEstimator) olscThreshold = Math.log( n );
353
else if(paceEstimator == ricEstimator) olscThreshold = 2*Math.log( kr );
354
aTilde = aHat.copy();
355
for( int i = 0; i < aTilde.size(); i++ )
356
if( Math.abs(aTilde.get(i)) < Math.sqrt(olscThreshold) )
359
PaceMatrix YTilde = new PaceMatrix((new PaceMatrix(aTilde)).times( sde ));
360
X.rsolve( YTilde, pvt, pvt.size() );
361
DoubleVector betaTilde = YTilde.getColumn(0).unpivoting( pvt, kr );
363
return betaTilde.getArrayCopy();
367
* Checks if an instance has a missing value.
368
* @param instance the instance
369
* @param model the data
370
* @return true if missing value is present
372
public boolean checkForMissing(Instance instance, Instances model) {
374
for (int j = 0; j < instance.numAttributes(); j++) {
375
if (j != model.classIndex()) {
376
if (instance.isMissing(j)) {
385
* Transforms dataset into a two-dimensional array.
387
* @param data dataset
388
* @param classIndex index of the class attribute
389
* @return the transformed data
391
private double [][] getTransformedDataMatrix(Instances data,
393
int numInstances = data.numInstances();
394
int numAttributes = data.numAttributes();
395
int middle = classIndex;
397
middle = numAttributes;
400
double[][] result = new double[numInstances]
402
for (int i = 0; i < numInstances; i++) {
403
Instance inst = data.instance(i);
407
// the class value (lies on index middle) is left out
408
for (int j = 0; j < middle; j++) {
409
result[i][j + 1] = inst.value(j);
411
for (int j = middle + 1; j < numAttributes; j++) {
412
result[i][j] = inst.value(j);
420
* Classifies the given instance using the linear regression function.
422
* @param instance the test instance
423
* @return the classification
424
* @throws Exception if classification can't be done successfully
426
public double classifyInstance(Instance instance) throws Exception {
428
if (m_Coefficients == null) {
429
throw new Exception("Pace Regression: No model built yet.");
432
// check for missing data and throw exception if some are found
433
if (checkForMissing(instance, m_Model)) {
434
throw new NoSupportForMissingValuesException("Can't handle missing values!");
437
// Calculate the dependent variable from the regression model
438
return regressionPrediction(instance,
443
* Outputs the linear regression model as a string.
445
* @return the model as string
447
public String toString() {
449
if (m_Coefficients == null) {
450
return "Pace Regression: No model built yet.";
453
StringBuffer text = new StringBuffer();
455
text.append("\nPace Regression Model\n\n");
457
text.append(m_Model.classAttribute().name()+" =\n\n");
460
text.append(Utils.doubleToString(m_Coefficients[0],
463
for (int i = 1; i < m_Coefficients.length; i++) {
465
// jump over the class attribute
466
if (index == m_ClassIndex) index++;
468
if (m_Coefficients[i] != 0.0) {
469
// output a coefficient if unequal zero
471
text.append(Utils.doubleToString(m_Coefficients[i], 12, 4)
473
text.append(m_Model.attribute(index).name());
478
return text.toString();
482
* Returns an enumeration describing the available options.
484
* @return an enumeration of all the available options.
486
public Enumeration listOptions() {
488
Vector newVector = new Vector(2);
489
newVector.addElement(new Option("\tProduce debugging output.\n"
490
+ "\t(default no debugging output)",
492
newVector.addElement(new Option("\tThe estimator can be one of the following:\n" +
493
"\t\teb -- Empirical Bayes estimator for noraml mixture (default)\n" +
494
"\t\tnested -- Optimal nested model selector for normal mixture\n" +
495
"\t\tsubset -- Optimal subset selector for normal mixture\n" +
496
"\t\tpace2 -- PACE2 for Chi-square mixture\n" +
497
"\t\tpace4 -- PACE4 for Chi-square mixture\n" +
498
"\t\tpace6 -- PACE6 for Chi-square mixture\n\n" +
499
"\t\tols -- Ordinary least squares estimator\n" +
500
"\t\taic -- AIC estimator\n" +
501
"\t\tbic -- BIC estimator\n" +
502
"\t\tric -- RIC estimator\n" +
503
"\t\tolsc -- Ordinary least squares subset selector with a threshold",
504
"E", 0, "-E <estimator>"));
505
newVector.addElement(new Option("\tThreshold value for the OLSC estimator",
506
"S", 0, "-S <threshold value>"));
507
return newVector.elements();
511
* Parses a given list of options. <p/>
513
<!-- options-start -->
514
* Valid options are: <p/>
517
* Produce debugging output.
518
* (default no debugging output)</pre>
520
* <pre> -E <estimator>
521
* The estimator can be one of the following:
522
* eb -- Empirical Bayes estimator for noraml mixture (default)
523
* nested -- Optimal nested model selector for normal mixture
524
* subset -- Optimal subset selector for normal mixture
525
* pace2 -- PACE2 for Chi-square mixture
526
* pace4 -- PACE4 for Chi-square mixture
527
* pace6 -- PACE6 for Chi-square mixture
529
* ols -- Ordinary least squares estimator
530
* aic -- AIC estimator
531
* bic -- BIC estimator
532
* ric -- RIC estimator
533
* olsc -- Ordinary least squares subset selector with a threshold</pre>
535
* <pre> -S <threshold value>
536
* Threshold value for the OLSC estimator</pre>
540
* @param options the list of options as an array of strings
541
* @throws Exception if an option is not supported
543
public void setOptions(String[] options) throws Exception {
545
setDebug(Utils.getFlag('D', options));
547
String estimator = Utils.getOption('E', options);
548
if ( estimator.equals("ols") ) paceEstimator = olsEstimator;
549
else if ( estimator.equals("olsc") ) paceEstimator = olscEstimator;
550
else if( estimator.equals("eb") || estimator.equals("") )
551
paceEstimator = ebEstimator;
552
else if ( estimator.equals("nested") ) paceEstimator = nestedEstimator;
553
else if ( estimator.equals("subset") ) paceEstimator = subsetEstimator;
554
else if ( estimator.equals("pace2") ) paceEstimator = pace2Estimator;
555
else if ( estimator.equals("pace4") ) paceEstimator = pace4Estimator;
556
else if ( estimator.equals("pace6") ) paceEstimator = pace6Estimator;
557
else if ( estimator.equals("aic") ) paceEstimator = aicEstimator;
558
else if ( estimator.equals("bic") ) paceEstimator = bicEstimator;
559
else if ( estimator.equals("ric") ) paceEstimator = ricEstimator;
560
else throw new WekaException("unknown estimator " + estimator +
563
String string = Utils.getOption('S', options);
564
if( ! string.equals("") ) olscThreshold = Double.parseDouble( string );
569
* Returns the coefficients for this linear model.
571
* @return the coefficients for this linear model
573
public double[] coefficients() {
575
double[] coefficients = new double[m_Coefficients.length];
576
for (int i = 0; i < coefficients.length; i++) {
577
coefficients[i] = m_Coefficients[i];
583
* Gets the current settings of the classifier.
585
* @return an array of strings suitable for passing to setOptions
587
public String [] getOptions() {
589
String [] options = new String [6];
593
options[current++] = "-D";
596
options[current++] = "-E";
597
switch (paceEstimator) {
598
case olsEstimator: options[current++] = "ols";
600
case olscEstimator: options[current++] = "olsc";
601
options[current++] = "-S";
602
options[current++] = "" + olscThreshold;
604
case ebEstimator: options[current++] = "eb";
606
case nestedEstimator: options[current++] = "nested";
608
case subsetEstimator: options[current++] = "subset";
610
case pace2Estimator: options[current++] = "pace2";
612
case pace4Estimator: options[current++] = "pace4";
614
case pace6Estimator: options[current++] = "pace6";
616
case aicEstimator: options[current++] = "aic";
618
case bicEstimator: options[current++] = "bic";
620
case ricEstimator: options[current++] = "ric";
624
while (current < options.length) {
625
options[current++] = "";
632
* Get the number of coefficients used in the model
634
* @return the number of coefficients
636
public int numParameters()
638
return m_Coefficients.length-1;
642
* Returns the tip text for this property
643
* @return tip text for this property suitable for
644
* displaying in the explorer/experimenter gui
646
public String debugTipText() {
647
return "Output debug information to the console.";
651
* Controls whether debugging output will be printed
653
* @param debug true if debugging output should be printed
655
public void setDebug(boolean debug) {
661
* Controls whether debugging output will be printed
663
* @return true if debugging output should be printed
665
public boolean getDebug() {
671
* Returns the tip text for this property
672
* @return tip text for this property suitable for
673
* displaying in the explorer/experimenter gui
675
public String estimatorTipText() {
676
return "The estimator to use.\n\n"
677
+"eb -- Empirical Bayes estimator for noraml mixture (default)\n"
678
+"nested -- Optimal nested model selector for normal mixture\n"
679
+"subset -- Optimal subset selector for normal mixture\n"
680
+"pace2 -- PACE2 for Chi-square mixture\n"
681
+"pace4 -- PACE4 for Chi-square mixture\n"
682
+"pace6 -- PACE6 for Chi-square mixture\n"
683
+"ols -- Ordinary least squares estimator\n"
684
+"aic -- AIC estimator\n"
685
+"bic -- BIC estimator\n"
686
+"ric -- RIC estimator\n"
687
+"olsc -- Ordinary least squares subset selector with a threshold";
693
* @return the estimator
695
public SelectedTag getEstimator() {
697
return new SelectedTag(paceEstimator, TAGS_ESTIMATOR);
701
* Sets the estimator.
703
* @param estimator the new estimator
705
public void setEstimator(SelectedTag estimator) {
707
if (estimator.getTags() == TAGS_ESTIMATOR) {
708
paceEstimator = estimator.getSelectedTag().getID();
713
* Returns the tip text for this property
714
* @return tip text for this property suitable for
715
* displaying in the explorer/experimenter gui
717
public String thresholdTipText() {
718
return "Threshold for the olsc estimator.";
722
* Set threshold for the olsc estimator
724
* @param newThreshold the threshold for the olsc estimator
726
public void setThreshold(double newThreshold) {
728
olscThreshold = newThreshold;
732
* Gets the threshold for olsc estimator
734
* @return the threshold
736
public double getThreshold() {
738
return olscThreshold;
743
* Calculate the dependent value for a given instance for a
744
* given regression model.
746
* @param transformedInstance the input instance
747
* @param coefficients an array of coefficients for the regression
749
* @return the regression value for the instance.
750
* @throws Exception if the class attribute of the input instance
753
private double regressionPrediction(Instance transformedInstance,
754
double [] coefficients)
758
double result = coefficients[column];
759
for (int j = 0; j < transformedInstance.numAttributes(); j++) {
760
if (m_ClassIndex != j) {
762
result += coefficients[column] * transformedInstance.value(j);
770
* Generates a linear regression function predictor.
772
* @param argv the options
774
public static void main(String argv[]) {
775
runClassifier(new PaceRegression(), argv);