2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
* LearningRateResultProducer.java
19
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
24
package weka.experiment;
26
import weka.core.AdditionalMeasureProducer;
27
import weka.core.Instances;
28
import weka.core.Option;
29
import weka.core.OptionHandler;
30
import weka.core.Utils;
32
import java.util.Enumeration;
33
import java.util.Random;
34
import java.util.Vector;
37
<!-- globalinfo-start -->
38
* Tells a sub-ResultProducer to reproduce the current run for varying sized subsamples of the dataset. Normally used with an AveragingResultProducer and CrossValidationResultProducer combo to generate learning curve results. For non-numeric result fields, the first value is used.
40
<!-- globalinfo-end -->
42
<!-- options-start -->
43
* Valid options are: <p/>
45
* <pre> -X <num steps>
46
* The number of steps in the learning rate curve.
49
* <pre> -W <class name>
50
* The full class name of a ResultProducer.
51
* eg: weka.experiment.CrossValidationResultProducer</pre>
54
* Options specific to result producer weka.experiment.AveragingResultProducer:
57
* <pre> -F <field name>
58
* The name of the field to average over.
59
* (default "Fold")</pre>
61
* <pre> -X <num results>
62
* The number of results expected per average.
66
* Calculate standard deviations.
67
* (default only averages)</pre>
69
* <pre> -W <class name>
70
* The full class name of a ResultProducer.
71
* eg: weka.experiment.CrossValidationResultProducer</pre>
74
* Options specific to result producer weka.experiment.CrossValidationResultProducer:
77
* <pre> -X <number of folds>
78
* The number of folds to use for the cross-validation.
82
* Save raw split evaluator output.</pre>
84
* <pre> -O <file/directory name/path>
85
* The filename where raw output will be stored.
86
* If a directory name is specified then then individual
87
* outputs will be gzipped, otherwise all output will be
88
* zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)</pre>
90
* <pre> -W <class name>
91
* The full class name of a SplitEvaluator.
92
* eg: weka.experiment.ClassifierSplitEvaluator</pre>
95
* Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
98
* <pre> -W <class name>
99
* The full class name of the classifier.
100
* eg: weka.classifiers.bayes.NaiveBayes</pre>
102
* <pre> -C <index>
103
* The index of the class for which IR statistics
104
* are to be output. (default 1)</pre>
106
* <pre> -I <index>
107
* The index of an attribute to output in the
108
* results. This attribute should identify an
109
* instance in order to know which instances are
110
* in the test set of a cross validation. if 0
111
* no output (default 0).</pre>
114
* Add target and prediction columns to the result
115
* for each fold.</pre>
118
* Options specific to classifier weka.classifiers.rules.ZeroR:
122
* If set, classifier is run in debug mode and
123
* may output additional info to the console</pre>
127
* All options after -- will be passed to the result producer.
129
* @author Len Trigg (trigg@cs.waikato.ac.nz)
130
* @version $Revision: 1.8 $
132
public class LearningRateResultProducer
133
implements ResultListener, ResultProducer, OptionHandler,
134
AdditionalMeasureProducer {
136
/** for serialization */
137
static final long serialVersionUID = -3841159673490861331L;
139
/** The dataset of interest */
140
protected Instances m_Instances;
142
/** The ResultListener to send results to */
143
protected ResultListener m_ResultListener = new CSVResultListener();
145
/** The ResultProducer used to generate results */
146
protected ResultProducer m_ResultProducer
147
= new AveragingResultProducer();
149
/** The names of any additional measures to look for in SplitEvaluators */
150
protected String [] m_AdditionalMeasures = null;
153
* The minimum number of instances to use. If this is zero, the first
154
* step will contain m_StepSize instances
156
protected int m_LowerSize = 0;
159
* The maximum number of instances to use. -1 indicates no maximum
160
* (other than the total number of instances)
162
protected int m_UpperSize = -1;
164
/** The number of instances to add at each step */
165
protected int m_StepSize = 10;
167
/** The current dataset size during stepping */
168
protected int m_CurrentSize = 0;
170
/** The name of the key field containing the learning rate step number */
171
public static String STEP_FIELD_NAME = "Total_instances";
174
* Returns a string describing this result producer
175
* @return a description of the result producer suitable for
176
* displaying in the explorer/experimenter gui
178
public String globalInfo() {
179
return "Tells a sub-ResultProducer to reproduce the current run for "
180
+"varying sized subsamples of the dataset. Normally used with "
181
+"an AveragingResultProducer and CrossValidationResultProducer "
182
+"combo to generate learning curve results. For non-numeric "
183
+"result fields, the first value is used.";
188
* Determines if there are any constraints (imposed by the
189
* destination) on the result columns to be produced by
190
* resultProducers. Null should be returned if there are NO
191
* constraints, otherwise a list of column names should be
192
* returned as an array of Strings.
193
* @param rp the ResultProducer to which the constraints will apply
194
* @return an array of column names to which resutltProducer's
195
* results will be restricted.
196
* @throws Exception if constraints can't be determined
198
public String [] determineColumnConstraints(ResultProducer rp)
204
* Gets the keys for a specified run number. Different run
205
* numbers correspond to different randomizations of the data. Keys
206
* produced should be sent to the current ResultListener
208
* @param run the run number to get keys for.
209
* @throws Exception if a problem occurs while getting the keys
211
public void doRunKeys(int run) throws Exception {
213
if (m_ResultProducer == null) {
214
throw new Exception("No ResultProducer set");
216
if (m_ResultListener == null) {
217
throw new Exception("No ResultListener set");
219
if (m_Instances == null) {
220
throw new Exception("No Instances set");
223
// Tell the resultproducer to send results to us
224
m_ResultProducer.setResultListener(this);
225
m_ResultProducer.setInstances(m_Instances);
227
// For each subsample size
228
if (m_LowerSize == 0) {
229
m_CurrentSize = m_StepSize;
231
m_CurrentSize = m_LowerSize;
233
while (m_CurrentSize <= m_Instances.numInstances() &&
234
((m_UpperSize == -1) ||
235
(m_CurrentSize <= m_UpperSize))) {
236
m_ResultProducer.doRunKeys(run);
237
m_CurrentSize += m_StepSize;
243
* Gets the results for a specified run number. Different run
244
* numbers correspond to different randomizations of the data. Results
245
* produced should be sent to the current ResultListener
247
* @param run the run number to get results for.
248
* @throws Exception if a problem occurs while getting the results
250
public void doRun(int run) throws Exception {
252
if (m_ResultProducer == null) {
253
throw new Exception("No ResultProducer set");
255
if (m_ResultListener == null) {
256
throw new Exception("No ResultListener set");
258
if (m_Instances == null) {
259
throw new Exception("No Instances set");
262
// Randomize on a copy of the original dataset
263
Instances runInstances = new Instances(m_Instances);
264
runInstances.randomize(new Random(run));
265
if (runInstances.classAttribute().isNominal()) {
266
runInstances.stratify(m_StepSize);
269
// Tell the resultproducer to send results to us
270
m_ResultProducer.setResultListener(this);
272
// For each subsample size
273
if (m_LowerSize == 0) {
274
m_CurrentSize = m_StepSize;
276
m_CurrentSize = m_LowerSize;
278
while (m_CurrentSize <= m_Instances.numInstances() &&
279
((m_UpperSize == -1) ||
280
(m_CurrentSize <= m_UpperSize))) {
281
m_ResultProducer.setInstances(new Instances(runInstances, 0,
283
m_ResultProducer.doRun(run);
284
m_CurrentSize += m_StepSize;
291
* Prepare for the results to be received.
293
* @param rp the ResultProducer that will generate the results
294
* @throws Exception if an error occurs during preprocessing.
296
public void preProcess(ResultProducer rp) throws Exception {
298
if (m_ResultListener == null) {
299
throw new Exception("No ResultListener set");
301
m_ResultListener.preProcess(this);
305
* Prepare to generate results. The ResultProducer should call
306
* preProcess(this) on the ResultListener it is to send results to.
308
* @throws Exception if an error occurs during preprocessing.
310
public void preProcess() throws Exception {
312
if (m_ResultProducer == null) {
313
throw new Exception("No ResultProducer set");
315
// Tell the resultproducer to send results to us
316
m_ResultProducer.setResultListener(this);
317
m_ResultProducer.preProcess();
321
* When this method is called, it indicates that no more results
322
* will be sent that need to be grouped together in any way.
324
* @param rp the ResultProducer that generated the results
325
* @throws Exception if an error occurs
327
public void postProcess(ResultProducer rp) throws Exception {
329
m_ResultListener.postProcess(this);
333
* When this method is called, it indicates that no more requests to
334
* generate results for the current experiment will be sent. The
335
* ResultProducer should call preProcess(this) on the
336
* ResultListener it is to send results to.
338
* @throws Exception if an error occurs
340
public void postProcess() throws Exception {
342
m_ResultProducer.postProcess();
346
* Accepts results from a ResultProducer.
348
* @param rp the ResultProducer that generated the results
349
* @param key an array of Objects (Strings or Doubles) that uniquely
350
* identify a result for a given ResultProducer with given compatibilityState
351
* @param result the results stored in an array. The objects stored in
352
* the array may be Strings, Doubles, or null (for the missing value).
353
* @throws Exception if the result could not be accepted.
355
public void acceptResult(ResultProducer rp, Object [] key, Object [] result)
358
if (m_ResultProducer != rp) {
359
throw new Error("Unrecognized ResultProducer sending results!!");
361
// Add in current step as key field
362
Object [] newKey = new Object [key.length + 1];
363
System.arraycopy(key, 0, newKey, 0, key.length);
364
newKey[key.length] = new String("" + m_CurrentSize);
365
// Pass on to result listener
366
m_ResultListener.acceptResult(this, newKey, result);
370
* Determines whether the results for a specified key must be
373
* @param rp the ResultProducer wanting to generate the results
374
* @param key an array of Objects (Strings or Doubles) that uniquely
375
* identify a result for a given ResultProducer with given compatibilityState
376
* @return true if the result should be generated
377
* @throws Exception if it could not be determined if the result
380
public boolean isResultRequired(ResultProducer rp, Object [] key)
383
if (m_ResultProducer != rp) {
384
throw new Error("Unrecognized ResultProducer sending results!!");
386
// Add in current step as key field
387
Object [] newKey = new Object [key.length + 1];
388
System.arraycopy(key, 0, newKey, 0, key.length);
389
newKey[key.length] = new String("" + m_CurrentSize);
390
// Pass on request to result listener
391
return m_ResultListener.isResultRequired(this, newKey);
395
* Gets the names of each of the columns produced for a single run.
397
* @return an array containing the name of each column
398
* @throws Exception if key names cannot be generated
400
public String [] getKeyNames() throws Exception {
402
String [] keyNames = m_ResultProducer.getKeyNames();
403
String [] newKeyNames = new String [keyNames.length + 1];
404
System.arraycopy(keyNames, 0, newKeyNames, 0, keyNames.length);
405
// Think of a better name for this key field
406
newKeyNames[keyNames.length] = STEP_FIELD_NAME;
411
* Gets the data types of each of the columns produced for a single run.
412
* This method should really be static.
414
* @return an array containing objects of the type of each column. The
415
* objects should be Strings, or Doubles.
416
* @throws Exception if the key types could not be determined (perhaps
417
* because of a problem from a nested sub-resultproducer)
419
public Object [] getKeyTypes() throws Exception {
421
Object [] keyTypes = m_ResultProducer.getKeyTypes();
422
Object [] newKeyTypes = new Object [keyTypes.length + 1];
423
System.arraycopy(keyTypes, 0, newKeyTypes, 0, keyTypes.length);
424
newKeyTypes[keyTypes.length] = "";
429
* Gets the names of each of the columns produced for a single run.
430
* A new result field is added for the number of results used to
431
* produce each average.
432
* If only averages are being produced the names are not altered, if
433
* standard deviations are produced then "Dev_" and "Avg_" are prepended
434
* to each result deviation and average field respectively.
436
* @return an array containing the name of each column
437
* @throws Exception if the result names could not be determined (perhaps
438
* because of a problem from a nested sub-resultproducer)
440
public String [] getResultNames() throws Exception {
442
return m_ResultProducer.getResultNames();
446
* Gets the data types of each of the columns produced for a single run.
448
* @return an array containing objects of the type of each column. The
449
* objects should be Strings, or Doubles.
450
* @throws Exception if the result types could not be determined (perhaps
451
* because of a problem from a nested sub-resultproducer)
453
public Object [] getResultTypes() throws Exception {
455
return m_ResultProducer.getResultTypes();
459
* Gets a description of the internal settings of the result
460
* producer, sufficient for distinguishing a ResultProducer
461
* instance from another with different settings (ignoring
462
* those settings set through this interface). For example,
463
* a cross-validation ResultProducer may have a setting for the
464
* number of folds. For a given state, the results produced should
465
* be compatible. Typically if a ResultProducer is an OptionHandler,
466
* this string will represent the command line arguments required
467
* to set the ResultProducer to that state.
469
* @return the description of the ResultProducer state, or null
470
* if no state is defined
472
public String getCompatibilityState() {
475
// + "-F " + Utils.quote(getKeyFieldName())
476
// + " -X " + getStepSize() + " ";
477
if (m_ResultProducer == null) {
478
result += "<null ResultProducer>";
480
result += "-W " + m_ResultProducer.getClass().getName();
482
result += " -- " + m_ResultProducer.getCompatibilityState();
483
return result.trim();
488
* Returns an enumeration describing the available options..
490
* @return an enumeration of all the available options.
492
public Enumeration listOptions() {
494
Vector newVector = new Vector(2);
496
newVector.addElement(new Option(
497
"\tThe number of steps in the learning rate curve.\n"
501
newVector.addElement(new Option(
502
"\tThe full class name of a ResultProducer.\n"
503
+"\teg: weka.experiment.CrossValidationResultProducer",
507
if ((m_ResultProducer != null) &&
508
(m_ResultProducer instanceof OptionHandler)) {
509
newVector.addElement(new Option(
511
"", 0, "\nOptions specific to result producer "
512
+ m_ResultProducer.getClass().getName() + ":"));
513
Enumeration enu = ((OptionHandler)m_ResultProducer).listOptions();
514
while (enu.hasMoreElements()) {
515
newVector.addElement(enu.nextElement());
518
return newVector.elements();
522
* Parses a given list of options. <p/>
524
<!-- options-start -->
525
* Valid options are: <p/>
527
* <pre> -X <num steps>
528
* The number of steps in the learning rate curve.
531
* <pre> -W <class name>
532
* The full class name of a ResultProducer.
533
* eg: weka.experiment.CrossValidationResultProducer</pre>
536
* Options specific to result producer weka.experiment.AveragingResultProducer:
539
* <pre> -F <field name>
540
* The name of the field to average over.
541
* (default "Fold")</pre>
543
* <pre> -X <num results>
544
* The number of results expected per average.
548
* Calculate standard deviations.
549
* (default only averages)</pre>
551
* <pre> -W <class name>
552
* The full class name of a ResultProducer.
553
* eg: weka.experiment.CrossValidationResultProducer</pre>
556
* Options specific to result producer weka.experiment.CrossValidationResultProducer:
559
* <pre> -X <number of folds>
560
* The number of folds to use for the cross-validation.
564
* Save raw split evaluator output.</pre>
566
* <pre> -O <file/directory name/path>
567
* The filename where raw output will be stored.
568
* If a directory name is specified then then individual
569
* outputs will be gzipped, otherwise all output will be
570
* zipped to the named file. Use in conjuction with -D. (default splitEvalutorOut.zip)</pre>
572
* <pre> -W <class name>
573
* The full class name of a SplitEvaluator.
574
* eg: weka.experiment.ClassifierSplitEvaluator</pre>
577
* Options specific to split evaluator weka.experiment.ClassifierSplitEvaluator:
580
* <pre> -W <class name>
581
* The full class name of the classifier.
582
* eg: weka.classifiers.bayes.NaiveBayes</pre>
584
* <pre> -C <index>
585
* The index of the class for which IR statistics
586
* are to be output. (default 1)</pre>
588
* <pre> -I <index>
589
* The index of an attribute to output in the
590
* results. This attribute should identify an
591
* instance in order to know which instances are
592
* in the test set of a cross validation. if 0
593
* no output (default 0).</pre>
596
* Add target and prediction columns to the result
597
* for each fold.</pre>
600
* Options specific to classifier weka.classifiers.rules.ZeroR:
604
* If set, classifier is run in debug mode and
605
* may output additional info to the console</pre>
609
* All options after -- will be passed to the result producer.
611
* @param options the list of options as an array of strings
612
* @throws Exception if an option is not supported
614
public void setOptions(String[] options) throws Exception {
616
String stepSize = Utils.getOption('S', options);
617
if (stepSize.length() != 0) {
618
setStepSize(Integer.parseInt(stepSize));
623
String lowerSize = Utils.getOption('L', options);
624
if (lowerSize.length() != 0) {
625
setLowerSize(Integer.parseInt(lowerSize));
630
String upperSize = Utils.getOption('U', options);
631
if (upperSize.length() != 0) {
632
setUpperSize(Integer.parseInt(upperSize));
637
String rpName = Utils.getOption('W', options);
638
if (rpName.length() == 0) {
639
throw new Exception("A ResultProducer must be specified with"
640
+ " the -W option.");
642
// Do it first without options, so if an exception is thrown during
643
// the option setting, listOptions will contain options for the actual
645
setResultProducer((ResultProducer)Utils.forName(
646
ResultProducer.class,
649
if (getResultProducer() instanceof OptionHandler) {
650
((OptionHandler) getResultProducer())
651
.setOptions(Utils.partitionOptions(options));
656
* Gets the current settings of the result producer.
658
* @return an array of strings suitable for passing to setOptions
660
public String [] getOptions() {
662
String [] seOptions = new String [0];
663
if ((m_ResultProducer != null) &&
664
(m_ResultProducer instanceof OptionHandler)) {
665
seOptions = ((OptionHandler)m_ResultProducer).getOptions();
668
String [] options = new String [seOptions.length + 9];
671
options[current++] = "-S";
672
options[current++] = "" + getStepSize();
673
options[current++] = "-L";
674
options[current++] = "" + getLowerSize();
675
options[current++] = "-U";
676
options[current++] = "" + getUpperSize();
677
if (getResultProducer() != null) {
678
options[current++] = "-W";
679
options[current++] = getResultProducer().getClass().getName();
681
options[current++] = "--";
683
System.arraycopy(seOptions, 0, options, current,
685
current += seOptions.length;
686
while (current < options.length) {
687
options[current++] = "";
693
* Set a list of method names for additional measures to look for
694
* in SplitEvaluators. This could contain many measures (of which only a
695
* subset may be produceable by the current resultProducer) if an experiment
696
* is the type that iterates over a set of properties.
697
* @param additionalMeasures an array of measure names, null if none
699
public void setAdditionalMeasures(String [] additionalMeasures) {
700
m_AdditionalMeasures = additionalMeasures;
702
if (m_ResultProducer != null) {
703
System.err.println("LearningRateResultProducer: setting additional "
706
m_ResultProducer.setAdditionalMeasures(m_AdditionalMeasures);
711
* Returns an enumeration of any additional measure names that might be
712
* in the result producer
713
* @return an enumeration of the measure names
715
public Enumeration enumerateMeasures() {
716
Vector newVector = new Vector();
717
if (m_ResultProducer instanceof AdditionalMeasureProducer) {
718
Enumeration en = ((AdditionalMeasureProducer)m_ResultProducer).
720
while (en.hasMoreElements()) {
721
String mname = (String)en.nextElement();
722
newVector.addElement(mname);
725
return newVector.elements();
729
* Returns the value of the named measure
730
* @param additionalMeasureName the name of the measure to query for its value
731
* @return the value of the named measure
732
* @throws IllegalArgumentException if the named measure is not supported
734
public double getMeasure(String additionalMeasureName) {
735
if (m_ResultProducer instanceof AdditionalMeasureProducer) {
736
return ((AdditionalMeasureProducer)m_ResultProducer).
737
getMeasure(additionalMeasureName);
739
throw new IllegalArgumentException("LearningRateResultProducer: "
740
+"Can't return value for : "+additionalMeasureName
741
+". "+m_ResultProducer.getClass().getName()+" "
742
+"is not an AdditionalMeasureProducer");
747
* Sets the dataset that results will be obtained for.
749
* @param instances a value of type 'Instances'.
751
public void setInstances(Instances instances) {
753
m_Instances = instances;
758
* Returns the tip text for this property
759
* @return tip text for this property suitable for
760
* displaying in the explorer/experimenter gui
762
public String lowerSizeTipText() {
763
return "Set the minmum number of instances in a dataset. Setting zero "
764
+ "here will actually use <stepSize> number of instances at the first "
765
+ "step (since it makes no sense to use zero instances :-))";
769
* Get the value of LowerSize.
771
* @return Value of LowerSize.
773
public int getLowerSize() {
779
* Set the value of LowerSize.
781
* @param newLowerSize Value to assign to
784
public void setLowerSize(int newLowerSize) {
786
m_LowerSize = newLowerSize;
790
* Returns the tip text for this property
791
* @return tip text for this property suitable for
792
* displaying in the explorer/experimenter gui
794
public String upperSizeTipText() {
795
return "Set the maximum number of instances in a dataset. Setting -1 "
796
+ "sets no upper limit (other than the total number of instances "
797
+ "in the full dataset)";
801
* Get the value of UpperSize.
803
* @return Value of UpperSize.
805
public int getUpperSize() {
811
* Set the value of UpperSize.
813
* @param newUpperSize Value to assign to
816
public void setUpperSize(int newUpperSize) {
818
m_UpperSize = newUpperSize;
823
* Returns the tip text for this property
824
* @return tip text for this property suitable for
825
* displaying in the explorer/experimenter gui
827
public String stepSizeTipText() {
828
return "Set the number of instances to add at each step.";
832
* Get the value of StepSize.
834
* @return Value of StepSize.
836
public int getStepSize() {
842
* Set the value of StepSize.
844
* @param newStepSize Value to assign to
847
public void setStepSize(int newStepSize) {
849
m_StepSize = newStepSize;
853
* Sets the object to send results of each run to.
855
* @param listener a value of type 'ResultListener'
857
public void setResultListener(ResultListener listener) {
859
m_ResultListener = listener;
863
* Returns the tip text for this property
864
* @return tip text for this property suitable for
865
* displaying in the explorer/experimenter gui
867
public String resultProducerTipText() {
868
return "Set the resultProducer for which learning rate results should be "
873
* Get the ResultProducer.
875
* @return the ResultProducer.
877
public ResultProducer getResultProducer() {
879
return m_ResultProducer;
883
* Set the ResultProducer.
885
* @param newResultProducer new ResultProducer to use.
887
public void setResultProducer(ResultProducer newResultProducer) {
889
m_ResultProducer = newResultProducer;
890
m_ResultProducer.setResultListener(this);
894
* Gets a text descrption of the result producer.
896
* @return a text description of the result producer.
898
public String toString() {
900
String result = "LearningRateResultProducer: ";
901
result += getCompatibilityState();
902
if (m_Instances == null) {
903
result += ": <null Instances>";
905
result += ": " + Utils.backQuoteChars(m_Instances.relationName());
910
} // LearningRateResultProducer