2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
23
package weka.estimators;
25
import weka.core.Attribute;
26
import weka.core.FastVector;
27
import weka.core.Instance;
28
import weka.core.Instances;
29
import weka.core.Option;
30
import weka.core.OptionHandler;
31
import weka.core.TestInstances;
32
import weka.core.Utils;
33
import weka.core.WeightedInstancesHandler;
35
import java.util.Enumeration;
36
import java.util.Random;
37
import java.util.Vector;
40
* Class for examining the capabilities and finding problems with
41
* estimators. If you implement a estimator using the WEKA.libraries,
42
* you should run the checks on it to ensure robustness and correct
43
* operation. Passing all the tests of this object does not mean
44
* bugs in the estimator don't exist, but this will help find some
48
* <code>java weka.estimators.CheckEstimator -W estimator_name
49
* estimator_options </code><p/>
51
* This class uses code from the CheckEstimatorClass
52
* ATTENTION! Current estimators can only
53
* 1. split on a nominal class attribute
54
* 2. build estimators for nominal and numeric attributes
55
* 3. build estimators independendly of the class type
56
* The functionality to test on other class and attribute types
57
* is left in big parts in the code.
59
* CheckEstimator reports on the following:
61
* <li> Estimator abilities
63
* <li> Possible command line options to the estimator </li>
64
* <li> Whether the estimator can predict nominal, numeric, string,
65
* date or relational class attributes. Warnings will be displayed if
66
* performance is worse than ZeroR </li>
67
* <li> Whether the estimator can be trained incrementally </li>
68
* <li> Whether the estimator can build estimates for numeric attributes </li>
69
* <li> Whether the estimator can handle nominal attributes </li>
70
* <li> Whether the estimator can handle string attributes </li>
71
* <li> Whether the estimator can handle date attributes </li>
72
* <li> Whether the estimator can handle relational attributes </li>
73
* <li> Whether the estimator build estimates for multi-instance data </li>
74
* <li> Whether the estimator can handle missing attribute values </li>
75
* <li> Whether the estimator can handle missing class values </li>
76
* <li> Whether a nominal estimator only handles 2 class problems </li>
77
* <li> Whether the estimator can handle instance weights </li>
80
* <li> Correct functioning
82
* <li> Correct initialisation during addvalues (i.e. no result
83
* changes when addValues called repeatedly) </li>
84
* <li> Whether incremental training produces the same results
85
* as during non-incremental training (which may or may not
87
* <li> Whether the estimator alters the data pased to it
88
* (number of instances, instance order, instance weights, etc) </li>
91
* <li> Degenerate cases
93
* <li> building estimator with zero training instances </li>
94
* <li> all but one attribute attribute values missing </li>
95
* <li> all attribute attribute values missing </li>
96
* <li> all but one class values missing </li>
97
* <li> all class values missing </li>
101
* Running CheckEstimator with the debug option set will output the
102
* training and test datasets for any failed tests.<p/>
104
* The <code>weka.estimators.AbstractEstimatorTest</code> uses this
105
* class to test all the estimators. Any changes here, have to be
106
* checked in that abstract test class, too. <p/>
108
<!-- options-start -->
109
* Valid options are: <p/>
112
* Turn on debugging output.</pre>
115
* Silent mode - prints nothing to stdout.</pre>
117
* <pre> -N <num>
118
* The number of instances in the datasets (default 100).</pre>
121
* Full name of the estimator analysed.
122
* eg: weka.estimators.bayes.NaiveBayes</pre>
125
* Options specific to estimator weka.estimators.rules.ZeroR:
129
* If set, estimator is run in debug mode and
130
* may output additional info to the console</pre>
134
* Options after -- are passed to the designated estimator.<p/>
136
* @author Len Trigg (trigg@cs.waikato.ac.nz)
137
* @author FracPete (fracpete at waikato dot ac dot nz)
138
* @version $Revision: 1.3 $
141
public class CheckEstimator implements OptionHandler {
144
* Note about test methods:
145
* - methods return array of booleans
146
* - first index: success or not
147
* - second index: acceptable or not (e.g., Exception is OK)
148
* - in case the performance is worse than that of ZeroR both indices are true
150
* FracPete (fracpete at waikato dot ac dot nz)
153
/** a class for postprocessing the test-data
155
public class PostProcessor {
157
* Provides a hook for derived classes to further modify the data. Currently,
158
* the data is just passed through.
160
* @param data the data to process
161
* @return the processed data
163
protected Instances process(Instances data) {
168
/*** The estimator to be examined */
169
protected Estimator m_Estimator = (Estimator) new weka.estimators.NormalEstimator(0.000001);
171
/** The options to be passed to the base estimator. */
172
protected String[] m_EstimatorOptions;
174
/** The results of the analysis as a string */
175
protected String m_AnalysisResults;
177
/** Debugging mode, gives extra output if true */
178
protected boolean m_Debug = false;
180
/** Silent mode, for no output at all to stdout */
181
protected boolean m_Silent = false;
183
/** The number of instances in the datasets */
184
protected int m_NumInstances = 100;
186
/** for post-processing the data even further */
187
protected PostProcessor m_PostProcessor = null;
189
/** whether classpath problems occurred */
190
protected boolean m_ClasspathProblems = false;
193
* class that contains info about the attribute types the estimator can estimate
194
* estimator work on one attribute only
196
public static class AttrTypes{
197
boolean nominal = false;
198
boolean numeric = false;
199
boolean string = false;
200
boolean date = false;
201
boolean relational = false;
206
AttrTypes (AttrTypes newTypes) {
207
nominal = newTypes.nominal;
208
numeric = newTypes.numeric;
209
string = newTypes.string;
210
date = newTypes.date;
211
relational = newTypes.relational;
214
AttrTypes (int type) {
215
if (type == Attribute.NOMINAL) nominal = true;
216
if (type == Attribute.NUMERIC) numeric = true;
217
if (type == Attribute.STRING) string = true;
218
if (type == Attribute.DATE) date = true;
219
if (type == Attribute.RELATIONAL) relational = true;
222
int getSetType() throws Exception {
225
if (nominal) { sum ++; type = Attribute.NOMINAL; }
226
if (numeric) { sum ++; type = Attribute.NUMERIC; }
227
if (string) { sum ++; type = Attribute.STRING; }
228
if (date) { sum ++; type = Attribute.DATE; }
229
if (relational) { sum ++; type = Attribute.RELATIONAL; }
231
throw new Exception("Expected to have only one type set used wrongly.");
233
throw new Exception("No type set.");
238
return (nominal || numeric || string || date || relational);
241
public Vector getVectorOfAttrTypes() {
242
Vector attrs = new Vector();
243
if (nominal) attrs.add(new Integer(Attribute.NOMINAL));
244
if (numeric) attrs.add(new Integer(Attribute.NUMERIC));
245
if (string) attrs.add(new Integer(Attribute.STRING));
246
if (date) attrs.add(new Integer(Attribute.DATE));
247
if (relational) attrs.add(new Integer(Attribute.RELATIONAL));
253
* public class that contains info about the chosen attribute type
254
* estimator work on one attribute only
256
public static class EstTypes {
257
boolean incremental = false;
258
boolean weighted = false;
259
boolean supervised = false;
270
public EstTypes (boolean i, boolean w, boolean s) {
278
* Returns an enumeration describing the available options.
280
* @return an enumeration of all the available options.
282
public Enumeration listOptions() {
284
Vector newVector = new Vector(2);
286
newVector.addElement(new Option(
287
"\tTurn on debugging output.",
290
newVector.addElement(new Option(
291
"\tSilent mode - prints nothing to stdout.",
294
newVector.addElement(new Option(
295
"\tThe number of instances in the datasets (default 100).",
296
"N", 1, "-N <num>"));
298
newVector.addElement(new Option(
299
"\tFull name of the estimator analysed.\n"
300
+"\teg: weka.estimators.NormalEstimator",
303
if ((m_Estimator != null)
304
&& (m_Estimator instanceof OptionHandler)) {
305
newVector.addElement(new Option("", "", 0,
306
"\nOptions specific to estimator "
307
+ m_Estimator.getClass().getName()
309
Enumeration enu = ((OptionHandler)m_Estimator).listOptions();
310
while (enu.hasMoreElements())
311
newVector.addElement(enu.nextElement());
314
return newVector.elements();
318
* Parses a given list of options.
320
<!-- options-start -->
321
* Valid options are: <p/>
324
* Turn on debugging output.</pre>
327
* Silent mode - prints nothing to stdout.</pre>
329
* <pre> -N <num>
330
* The number of instances in the datasets (default 100).</pre>
333
* Full name of the estimator analysed.
334
* eg: weka.estimators.NormalEstimator</pre>
337
* Options specific to estimator weka.estimators.NormalEstimator:
341
* If set, estimator is run in debug mode and
342
* may output additional info to the console</pre>
346
* @param options the list of options as an array of strings
347
* @throws Exception if an option is not supported
349
public void setOptions(String[] options) throws Exception {
352
setDebug(Utils.getFlag('D', options));
354
setSilent(Utils.getFlag('S', options));
356
tmpStr = Utils.getOption('N', options);
357
if (tmpStr.length() != 0)
358
setNumInstances(Integer.parseInt(tmpStr));
360
setNumInstances(100);
362
tmpStr = Utils.getOption('W', options);
363
if (tmpStr.length() == 0)
364
throw new Exception("A estimator must be specified with the -W option.");
365
setEstimator(Estimator.forName(tmpStr, Utils.partitionOptions(options)));
369
* Gets the current settings of the CheckEstimator.
371
* @return an array of strings suitable for passing to setOptions
373
public String[] getOptions() {
378
result = new Vector();
387
result.add("" + getNumInstances());
389
if (getEstimator() != null) {
391
result.add(getEstimator().getClass().getName());
394
if ((m_Estimator != null) && (m_Estimator instanceof OptionHandler))
395
options = ((OptionHandler) m_Estimator).getOptions();
397
options = new String[0];
399
if (options.length > 0) {
401
for (i = 0; i < options.length; i++)
402
result.add(options[i]);
405
return (String[]) result.toArray(new String[result.size()]);
409
* sets the PostProcessor to use
411
* @param value the new PostProcessor
412
* @see #m_PostProcessor
414
public void setPostProcessor(PostProcessor value) {
415
m_PostProcessor = value;
419
* returns the current PostProcessor, can be null
421
* @return the current PostProcessor
423
public PostProcessor getPostProcessor() {
424
return m_PostProcessor;
428
* returns TRUE if the estimator returned a "not in classpath" Exception
430
* @return true if CLASSPATH problems occurred
432
public boolean hasClasspathProblems() {
433
return m_ClasspathProblems;
437
* Begin the tests, reporting results to System.out
439
public void doTests() {
441
if (getEstimator() == null) {
442
println("\n=== No estimator set ===");
445
println("\n=== Check on Estimator: "
446
+ getEstimator().getClass().getName()
449
m_ClasspathProblems = false;
451
// Start tests with test for options
454
// test what type of estimator it is
455
EstTypes estTypes = new EstTypes();
456
estTypes.incremental = incrementalEstimator()[0];
457
estTypes.weighted = weightedInstancesHandler()[0];
458
estTypes.supervised = supervisedEstimator()[0];
460
// in none of the estimators yet the functionality is depending on the class type
461
// since this could change the basic structure taken from checkclassifiers is kept here
462
int classType = Attribute.NOMINAL;
463
AttrTypes attrTypes = testsPerClassType(classType, estTypes);
466
// only nominal class can be split up so far
467
canSplitUpClass(attrTypes, classType);
474
* @param debug true if debug output should be printed
476
public void setDebug(boolean debug) {
479
// disable silent mode, if necessary
485
* Get whether debugging is turned on
487
* @return true if debugging output is on
489
public boolean getDebug() {
494
* Set slient mode, i.e., no output at all to stdout
496
* @param value whether silent mode is active or not
498
public void setSilent(boolean value) {
503
* Get whether silent mode is turned on
505
* @return true if silent mode is on
507
public boolean getSilent() {
512
* Sets the number of instances to use in the datasets (some estimators
513
* might require more instances).
515
* @param value the number of instances to use
517
public void setNumInstances(int value) {
518
m_NumInstances = value;
522
* Gets the current number of instances to use for the datasets.
524
* @return the number of instances
526
public int getNumInstances() {
527
return m_NumInstances;
531
* Set the estimator for boosting.
533
* @param newEstimator the Estimator to use.
535
public void setEstimator(Estimator newEstimator) {
536
m_Estimator = newEstimator;
540
* Get the estimator used as the estimator
542
* @return the estimator used as the estimator
544
public Estimator getEstimator() {
549
* prints the given message to stdout, if not silent mode
551
* @param msg the text to print to stdout
553
protected void print(Object msg) {
555
System.out.print(msg);
559
* prints the given message (+ LF) to stdout, if not silent mode
561
* @param msg the message to println to stdout
563
protected void println(Object msg) {
568
* prints a LF to stdout, if not silent mode
570
protected void println() {
575
* Run a battery of tests for a given class attribute type
577
* @param classType true if the class attribute should be numeric
578
* @param estTypes types the estimator is, like incremental, weighted, supervised etc
579
* @return attribute types estimator can work with
581
protected AttrTypes testsPerClassType(int classType, EstTypes estTypes) {
583
// in none of the estimators yet is the estimation depending on the class type
584
// since this could change the basic structure taken from checkclassifiers is kept here
586
// test A: simple test - if can estimate
587
AttrTypes attrTypes = new AttrTypes();
588
AttrTypes at = new AttrTypes(Attribute.NOMINAL);
589
attrTypes.nominal = canEstimate(at, estTypes.supervised, classType)[0];
590
at = new AttrTypes(Attribute.NUMERIC);
591
attrTypes.numeric = canEstimate(at, estTypes.supervised, classType)[0];
592
attrTypes.string = false;
593
attrTypes.date = false;
594
attrTypes.relational = false;
596
// if (!multiInstance)
597
// PRel = canEstimate(false, false, false, false, true, classType)[0];
601
// one of the attribute types succeeded
603
if (attrTypes.oneIsSet()) {
604
Vector attributesSet = attrTypes.getVectorOfAttrTypes();
606
// make tests for each attribute
607
for (int i = 0; i < attributesSet.size(); i++) {
608
AttrTypes workAttrTypes = new AttrTypes(((Integer) attributesSet.elementAt(i)).intValue());
610
// test B: weights change estimate or not
611
if (estTypes.weighted)
612
instanceWeights(workAttrTypes, classType);
614
if (classType == Attribute.NOMINAL) {
616
canHandleNClasses(workAttrTypes, numClasses);
619
// tests with class not the last attribute and the attribute not the first
621
// if (!multiInstance) {
624
canHandleClassAsNthAttribute(workAttrTypes, numAtt, 0, classType, 1);
626
//TODOTODOcanHandleAttrAsNthAttribute(workAttrTypes, numAtt, 2, classType);
629
canHandleZeroTraining(workAttrTypes, classType);
630
boolean handleMissingAttributes = canHandleMissing(workAttrTypes,
631
classType, true, false, 20)[0];
632
if (handleMissingAttributes)
633
canHandleMissing(workAttrTypes, classType, true, false, 100);
635
boolean handleMissingClass = canHandleMissing(workAttrTypes,
638
if (handleMissingClass)
639
canHandleMissing(workAttrTypes, classType, false, true, 100);
641
correctBuildInitialisation(workAttrTypes, classType);
642
datasetIntegrity(workAttrTypes, classType,
643
handleMissingAttributes, handleMissingClass);
645
if (estTypes.incremental)
646
incrementingEquality(workAttrTypes, classType);
653
* Checks whether the scheme can take command line options.
655
* @return index 0 is true if the estimator can take options
657
protected boolean[] canTakeOptions() {
659
boolean[] result = new boolean[2];
662
if (m_Estimator instanceof OptionHandler) {
665
println("\n=== Full report ===");
666
Enumeration enu = ((OptionHandler)m_Estimator).listOptions();
667
while (enu.hasMoreElements()) {
668
Option option = (Option) enu.nextElement();
669
print(option.synopsis() + "\n"
670
+ option.description() + "\n");
685
* Checks whether the scheme can build models incrementally.
687
* @return index 0 is true if the estimator can train incrementally
689
protected boolean[] incrementalEstimator() {
691
boolean[] result = new boolean[2];
693
print("incremental estimator...");
694
if (m_Estimator instanceof IncrementalEstimator) {
707
* Checks whether the scheme says it can handle instance weights.
709
* @return true if the estimator handles instance weights
711
protected boolean[] weightedInstancesHandler() {
713
boolean[] result = new boolean[2];
715
print("weighted instances estimator...");
716
if (m_Estimator instanceof WeightedInstancesHandler) {
729
* Checks whether the estimator is supervised.
731
* @return true if the estimator handles instance weights
733
protected boolean[] supervisedEstimator() {
734
boolean[] result = new boolean[2];
740
* Checks basic estimation of one attribute of the scheme, for simple non-troublesome
743
* @param attrTypes the types the estimator can work with
744
* @param classType the class type (NOMINAL, NUMERIC, etc.)
745
* @return index 0 is true if the test was passed, index 1 is true if test
748
protected boolean[] canEstimate(AttrTypes attrTypes, boolean supervised, int classType) {
750
// supervised is ignored, no supervised estimators used yet
752
print("basic estimation");
753
printAttributeSummary(attrTypes, classType);
755
FastVector accepts = new FastVector();
756
accepts.addElement("nominal");
757
accepts.addElement("numeric");
758
accepts.addElement("string");
759
accepts.addElement("date");
760
accepts.addElement("relational");
761
accepts.addElement("not in classpath");
762
int numTrain = getNumInstances(), numTest = getNumInstances(),
763
numClasses = 2, missingLevel = 0;
764
boolean attributeMissing = false, classMissing = false;
765
int numAtts = 1, attrIndex = 0;
767
return runBasicTest(attrTypes, numAtts, attrIndex,
769
missingLevel, attributeMissing, classMissing,
770
numTrain, numTest, numClasses,
775
* Checks basic estimation of one attribute of the scheme, for simple non-troublesome
778
* @param attrTypes the types the estimator can work with
779
* @param classType the class type (NOMINAL, NUMERIC, etc.)
781
protected void canSplitUpClass(AttrTypes attrTypes, int classType) {
783
if (attrTypes.nominal)
784
canSplitUpClass(Attribute.NOMINAL, classType);
785
if (attrTypes.numeric)
786
canSplitUpClass(Attribute.NUMERIC, classType);
790
* Checks basic estimation of one attribute of the scheme, for simple non-troublesome
793
* @param attrType the type of the estimator
794
* @param classType the class type (NOMINAL, NUMERIC, etc.)
795
* @return index 0 is true if the test was passed, index 1 is true if test
798
protected boolean[] canSplitUpClass(int attrType, int classType) {
800
boolean[] result = new boolean[2];
802
FastVector accepts = new FastVector();
803
accepts.addElement("not in classpath");
805
// supervised is ignored, no supervised estimators used yet
806
print("split per class type ");
807
printAttributeSummary(attrType, Attribute.NOMINAL);
810
int numTrain = getNumInstances(), numTest = getNumInstances(),
812
boolean attributeMissing = false, classMissing = false;
813
int numAtts = 3, attrIndex = 0, classIndex = 1;
814
Instances train = null;
816
Estimator estimator = null;
817
boolean built = false;
820
AttrTypes at = new AttrTypes(attrType);
821
train = makeTestDataset(42, numTrain, numAtts, at,
822
numClasses, classType, classIndex);
824
// prepare training data set and test value list
825
test = makeTestValueList(24, numTest, train, attrIndex,
828
estimator = Estimator.makeCopies(getEstimator(), 1)[0];
829
} catch (Exception ex) {
830
ex.printStackTrace();
831
throw new Error("Error setting up for tests: " + ex.getMessage());
834
estimator.addValues(train, attrIndex, classType, classIndex);
837
testWithTestValues(estimator, test);
842
catch (Exception ex) {
843
boolean acceptable = false;
845
if (ex.getMessage() == null)
848
msg = ex.getMessage().toLowerCase();
849
if (msg.indexOf("not in classpath") > -1)
850
m_ClasspathProblems = true;
852
for (int i = 0; i < accepts.size(); i++) {
853
if (msg.indexOf((String)accepts.elementAt(i)) >= 0) {
858
println("no" + (acceptable ? " (OK error message)" : ""));
859
result[1] = acceptable;
863
println("\n=== Full Report ===");
864
print("Problem during");
870
println(": " + ex.getMessage() + "\n");
872
if (accepts.size() > 0) {
873
print("Error message doesn't mention ");
874
for (int i = 0; i < accepts.size(); i++) {
878
print('"' + (String)accepts.elementAt(i) + '"');
881
println("here are the datasets:\n");
882
println("=== Train Dataset ===\n"
883
+ train.toString() + "\n");
884
println("=== Test Dataset ===\n"
885
+ test.toString() + "\n\n");
894
* Checks whether nominal schemes can handle more than two classes.
895
* If a scheme is only designed for two-class problems it should
896
* throw an appropriate exception for multi-class problems.
898
* @param attrTypes attribute types the estimator excepts
899
* @param numClasses the number of classes to test
900
* @return index 0 is true if the test was passed, index 1 is true if test
903
protected boolean[] canHandleNClasses(AttrTypes attrTypes, int numClasses) {
905
print("more than two class problems");
906
printAttributeSummary(attrTypes, Attribute.NOMINAL);
909
FastVector accepts = new FastVector();
910
accepts.addElement("number");
911
accepts.addElement("class");
913
int numTrain = getNumInstances(), numTest = getNumInstances(),
915
boolean attributeMissing = false, classMissing = false;
916
int numAttr = 1, attrIndex = 0;
918
return runBasicTest(attrTypes,
921
missingLevel, attributeMissing, classMissing,
922
numTrain, numTest, numClasses,
927
* Checks whether the scheme can handle class attributes as Nth attribute.
929
* @param attrTypes the attribute types the estimator accepts
930
* @param numAtts of attributes
931
* @param attrIndex the index of the attribute
932
* @param classType the class type (NUMERIC, NOMINAL, etc.)
933
* @param classIndex the index of the class attribute (0-based, -1 means last attribute)
934
* @return index 0 is true if the test was passed, index 1 is true if test
936
* @see TestInstances#CLASS_IS_LAST
938
protected boolean[] canHandleClassAsNthAttribute(AttrTypes attrTypes,
944
if (classIndex == TestInstances.CLASS_IS_LAST)
945
print("class attribute as last attribute");
947
print("class attribute as " + (classIndex + 1) + ". attribute");
948
printAttributeSummary(attrTypes, classType);
950
FastVector accepts = new FastVector();
951
int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2,
953
boolean attributeMissing = false, classMissing = false;
955
return runBasicTest(attrTypes,
957
classType, classIndex,
958
missingLevel, attributeMissing, classMissing,
959
numTrain, numTest, numClasses,
964
* Checks whether the scheme can handle zero training instances.
966
* @param attrTypes attribute types that can be estimated
967
* @param classType the class type (NUMERIC, NOMINAL, etc.)
968
* @return index 0 is true if the test was passed, index 1 is true if test
971
protected boolean[] canHandleZeroTraining(AttrTypes attrTypes, int classType) {
973
print("handle zero training instances");
974
printAttributeSummary(attrTypes, classType);
977
FastVector accepts = new FastVector();
978
accepts.addElement("train");
979
accepts.addElement("value");
980
int numTrain = 0, numTest = getNumInstances(), numClasses = 2,
982
boolean attributeMissing = false, classMissing = false;
986
attrTypes, numAtts, attrIndex,
988
missingLevel, attributeMissing, classMissing,
989
numTrain, numTest, numClasses,
994
* Checks whether the scheme correctly initialises models when
995
* buildEstimator is called. This test calls buildEstimator with
996
* one training dataset and records performance on a test set.
997
* buildEstimator is then called on a training set with different
998
* structure, and then again with the original training set. The
999
* performance on the test set is compared with the original results
1000
* and any performance difference noted as incorrect build initialisation.
1002
* @param attrTypes attribute types that can be estimated
1003
* @param classType the class type (NUMERIC, NOMINAL, etc.)
1004
* @return index 0 is true if the test was passed, index 1 is true if the
1005
* scheme performs worse than ZeroR, but without error (index 0 is
1008
protected boolean[] correctBuildInitialisation(AttrTypes attrTypes,
1011
boolean[] result = new boolean[2];
1013
print("correct initialisation during buildEstimator");
1014
printAttributeSummary(attrTypes, classType);
1017
int numTrain = getNumInstances(), numTest = getNumInstances(),
1018
numClasses = 2, missingLevel = 0;
1019
boolean attributeMissing = false, classMissing = false;
1021
Instances train1 = null;
1022
Instances test1 = null;
1023
Instances train2 = null;
1024
Instances test2 = null;
1025
Estimator estimator = null;
1026
Estimator estimator1 = null;
1028
boolean built = false;
1035
// Make two sets of train/test splits with different
1036
// numbers of attributes
1037
train1 = makeTestDataset(42, numTrain, 2, attrTypes,
1040
train2 = makeTestDataset(84, numTrain, 3, attrTypes,
1043
if (missingLevel > 0) {
1044
addMissing(train1, missingLevel, attributeMissing, classMissing, attrIndex1);
1045
addMissing(train2, missingLevel, attributeMissing, classMissing, attrIndex2);
1048
estimator = Estimator.makeCopies(getEstimator(), 1)[0];
1049
} catch (Exception ex) {
1050
throw new Error("Error setting up for tests: " + ex.getMessage());
1055
estimator.addValues(train1, attrIndex1);
1058
estimator1 = estimator.makeCopies(getEstimator(), 1)[0];
1062
estimator.addValues(train2, attrIndex2);
1067
estimator.addValues(train1, attrIndex1);
1071
if (!estimator.equals(estimator1)) {
1073
println("\n=== Full report ===\n"
1074
+ "\nFirst build estimator\n"+
1075
estimator.toString() + "\n\n");
1076
println("\nSecond build estimator\n"+
1077
estimator.toString() + "\n\n");
1079
throw new Exception("Results differ between buildEstimator calls");
1084
if (false && m_Debug) {
1085
println("\n=== Full report ===\n"
1086
+ "\nFirst buildEstimator()"
1088
println("\nSecond buildEstimator()"
1092
catch (Exception ex) {
1093
String msg = ex.getMessage().toLowerCase();
1094
if (msg.indexOf("worse than zeror") >= 0) {
1095
println("warning: performs worse than ZeroR");
1103
println("\n=== Full Report ===");
1104
print("Problem during");
1112
print(" of dataset 1");
1115
print(" of dataset 2");
1118
print(" of dataset 1 (2nd build)");
1121
print(", comparing results from builds of dataset 1");
1124
println(": " + ex.getMessage() + "\n");
1125
println("here are the datasets:\n");
1126
println("=== Train1 Dataset ===\n"
1127
+ train1.toString() + "\n");
1128
println("=== Test1 Dataset ===\n"
1129
+ test1.toString() + "\n\n");
1130
println("=== Train2 Dataset ===\n"
1131
+ train2.toString() + "\n");
1132
println("=== Test2 Dataset ===\n"
1133
+ test2.toString() + "\n\n");
1141
* Checks basic missing value handling of the scheme. If the missing
1142
* values cause an exception to be thrown by the scheme, this will be
1145
* @param attrTypes attribute types that can be estimated
1146
* @param classType the class type (NUMERIC, NOMINAL, etc.)
1147
* @param attributeMissing true if the missing values may be in
1149
* @param classMissing true if the missing values may be in the class
1150
* @param missingLevel the percentage of missing values
1151
* @return index 0 is true if the test was passed, index 1 is true if test
1154
protected boolean[] canHandleMissing(AttrTypes attrTypes,
1156
boolean attributeMissing,
1157
boolean classMissing,
1160
if (missingLevel == 100)
1163
if (attributeMissing) {
1164
print(" attribute");
1171
printAttributeSummary(attrTypes, classType);
1174
FastVector accepts = new FastVector();
1175
accepts.addElement("missing");
1176
accepts.addElement("value");
1177
accepts.addElement("train");
1178
int numTrain = getNumInstances(), numTest = getNumInstances(),
1181
int numAtts = 1, attrIndex = 0;
1182
return runBasicTest(attrTypes,
1185
missingLevel, attributeMissing, classMissing,
1186
numTrain, numTest, numClasses,
1191
* Checks whether an incremental scheme produces the same model when
1192
* trained incrementally as when batch trained. The model itself
1193
* cannot be compared, so we compare the evaluation on test data
1194
* for both models. It is possible to get a false positive on this
1195
* test (likelihood depends on the estimator).
1197
* @param attrTypes attribute types that can be estimated
1198
* @param classType the class type (NUMERIC, NOMINAL, etc.)
1199
* @return index 0 is true if the test was passed
1201
protected boolean[] incrementingEquality(AttrTypes attrTypes,
1204
print("incremental training produces the same results"
1205
+ " as batch training");
1206
printAttributeSummary(attrTypes, classType);
1209
int numTrain = getNumInstances(), numTest = getNumInstances(),
1210
numClasses = 2, missingLevel = 0;
1211
boolean attributeMissing = false, classMissing = false;
1213
boolean[] result = new boolean[2];
1214
Instances train = null;
1215
Estimator [] estimators = null;
1216
boolean built = false;
1220
train = makeTestDataset(42, numTrain, 1, attrTypes,
1225
// prepare training data set and test value list
1226
test = makeTestValueList(24, numTest, train, attrIndex,
1227
attrTypes.getSetType());
1229
if (missingLevel > 0) {
1230
addMissing(train, missingLevel, attributeMissing, classMissing, attrIndex);
1232
estimators = Estimator.makeCopies(getEstimator(), 2);
1233
estimators[0].addValues(train, attrIndex);
1234
} catch (Exception ex) {
1235
throw new Error("Error setting up for tests: " + ex.getMessage());
1238
for (int i = 0; i < train.numInstances(); i++) {
1239
((IncrementalEstimator)estimators[1]).addValue(train.instance(i).value(attrIndex), 1.0);
1242
if (!estimators[0].equals(estimators[1])) {
1247
println("\n=== Full Report ===");
1248
println("Results differ between batch and "
1249
+ "incrementally built models.\n"
1250
+ "Depending on the estimator, this may be OK");
1251
println("Here are the results:\n");
1252
println("batch built results\n" + estimators[0].toString());
1253
println("incrementally built results\n" + estimators[1].toString());
1254
println("Here are the datasets:\n");
1255
println("=== Train Dataset ===\n"
1256
+ train.toString() + "\n");
1257
println("=== Test Dataset ===\n"
1258
+ test.toString() + "\n\n");
1265
} catch (Exception ex) {
1268
print("Problem during");
1273
println(": " + ex.getMessage() + "\n");
1281
* Checks whether the estimator can handle instance weights.
1282
* This test compares the estimator performance on two datasets
1283
* that are identical except for the training weights. If the
1284
* results change, then the estimator must be using the weights. It
1285
* may be possible to get a false positive from this test if the
1286
* weight changes aren't significant enough to induce a change
1287
* in estimator performance (but the weights are chosen to minimize
1288
* the likelihood of this).
1290
* @param attrTypes attribute types that can be estimated
1291
* @param classType the class type (NUMERIC, NOMINAL, etc.)
1292
* @return index 0 true if the test was passed
1294
protected boolean[] instanceWeights(AttrTypes attrTypes,
1297
print("estimator uses instance weights");
1298
printAttributeSummary(attrTypes, classType);
1302
int numTrain = 2 * getNumInstances(), numTest = getNumInstances(),
1303
numClasses = 2, missingLevel = 0;
1304
boolean attributeMissing = false, classMissing = false;
1306
boolean[] result = new boolean[2];
1307
Instances train = null;
1309
Estimator [] estimators = null;
1311
Vector resultProbsO = null;
1312
Vector resultProbsW = null;
1313
boolean built = false;
1314
boolean evalFail = false;
1317
train = makeTestDataset(42, numTrain, 1,
1318
attrTypes, numClasses,
1321
// prepare training data set and test value list
1322
test = makeTestValueList(24, numTest, train, attrIndex,
1323
attrTypes.getSetType());
1325
if (missingLevel > 0) {
1326
addMissing(train, missingLevel, attributeMissing, classMissing, attrIndex);
1329
estimators = Estimator.makeCopies(getEstimator(), 2);
1331
estimators[0].addValues(train, attrIndex);
1332
resultProbsO = testWithTestValues(estimators[0], test);
1334
} catch (Exception ex) {
1335
throw new Error("Error setting up for tests: " + ex.getMessage());
1339
// Now modify instance weights and re-built
1340
for (int i = 0; i < train.numInstances(); i++) {
1341
train.instance(i).setWeight(0);
1343
Random random = new Random(1);
1344
for (int i = 0; i < train.numInstances() / 2; i++) {
1345
int inst = Math.abs(random.nextInt()) % train.numInstances();
1346
int weight = Math.abs(random.nextInt()) % 10 + 1;
1347
train.instance(inst).setWeight(weight);
1349
estimators[1].addValues(train, attrIndex);
1350
resultProbsW = testWithTestValues(estimators[1], test);
1353
if (resultProbsO.equals(resultProbsW)) {
1356
throw new Exception("evalFail");
1361
} catch (Exception ex) {
1366
println("\n=== Full Report ===");
1369
println("Results don't differ between non-weighted and "
1370
+ "weighted instance models.");
1371
println("Here are the results:\n");
1372
println(probsToString(resultProbsO));
1374
print("Problem during");
1380
println(": " + ex.getMessage() + "\n");
1382
println("Here are the datasets:\n");
1383
println("=== Train Dataset ===\n"
1384
+ train.toString() + "\n");
1385
println("=== Train Weights ===\n");
1386
for (int i = 0; i < train.numInstances(); i++) {
1387
println(" " + (i + 1)
1388
+ " " + train.instance(i).weight());
1390
println("=== Test Dataset ===\n"
1391
+ test.toString() + "\n\n");
1392
println("(test weights all 1.0\n");
1400
* Checks whether the scheme alters the training dataset during
1401
* training. If the scheme needs to modify the training
1402
* data it should take a copy of the training data. Currently checks
1403
* for changes to header structure, number of instances, order of
1404
* instances, instance weights.
1406
* @param attrTypes attribute types that can be estimated
1407
* @param classType the class type (NUMERIC, NOMINAL, etc.)
1408
* @param attributeMissing true if we know the estimator can handle
1409
* (at least) moderate missing attribute values
1410
* @param classMissing true if we know the estimator can handle
1411
* (at least) moderate missing class values
1412
* @return index 0 is true if the test was passed
1414
protected boolean[] datasetIntegrity(AttrTypes attrTypes,
1416
boolean attributeMissing,
1417
boolean classMissing) {
1419
Estimator estimator = null;
1420
print("estimator doesn't alter original datasets");
1421
printAttributeSummary(attrTypes, classType);
1423
int numTrain = getNumInstances(), numTest = getNumInstances(),
1424
numClasses = 2, missingLevel = 100;
1426
boolean[] result = new boolean[2];
1427
Instances train = null;
1428
boolean built = false;
1430
train = makeTestDataset(42, numTrain, 1, attrTypes,
1435
if (missingLevel > 0) {
1436
addMissing(train, missingLevel, attributeMissing, classMissing, attrIndex);
1438
estimator = Estimator.makeCopies(getEstimator(), 1)[0];
1439
} catch (Exception ex) {
1440
throw new Error("Error setting up for tests: " + ex.getMessage());
1443
Instances trainCopy = new Instances(train);
1445
estimator.addValues(trainCopy, attrIndex);
1446
compareDatasets(train, trainCopy);
1451
} catch (Exception ex) {
1456
println("\n=== Full Report ===");
1457
print("Problem during");
1463
println(": " + ex.getMessage() + "\n");
1464
println("Here are the datasets:\n");
1465
println("=== Train Dataset ===\n"
1466
+ train.toString() + "\n");
1474
* Runs a text on the datasets with the given characteristics.
1476
* @param attrTypes attribute types that can be estimated
1477
* @param numAtts number of attributes
1478
* @param attrIndex attribute index
1479
* @param classType the class type (NUMERIC, NOMINAL, etc.)
1480
* @param missingLevel the percentage of missing values
1481
* @param attributeMissing true if the missing values may be in
1483
* @param classMissing true if the missing values may be in the class
1484
* @param numTrain the number of instances in the training set
1485
* @param numTest the number of instaces in the test set
1486
* @param numClasses the number of classes
1487
* @param accepts the acceptable string in an exception
1488
* @return index 0 is true if the test was passed, index 1 is true if test
1491
protected boolean[] runBasicTest(AttrTypes attrTypes,
1496
boolean attributeMissing,
1497
boolean classMissing,
1501
FastVector accepts) {
1503
return runBasicTest(attrTypes,
1507
TestInstances.CLASS_IS_LAST,
1518
* Runs a text on the datasets with the given characteristics.
1520
* @param attrTypes attribute types that can be estimated
1521
* @param numAtts number of attributes
1522
* @param classType the class type (NUMERIC, NOMINAL, etc.)
1523
* @param classIndex the attribute index of the class
1524
* @param missingLevel the percentage of missing values
1525
* @param attributeMissing true if the missing values may be in
1527
* @param classMissing true if the missing values may be in the class
1528
* @param numTrain the number of instances in the training set
1529
* @param numTest the number of instaces in the test set
1530
* @param numClasses the number of classes
1531
* @param accepts the acceptable string in an exception
1532
* @return index 0 is true if the test was passed, index 1 is true if test
1535
protected boolean[] runBasicTest(AttrTypes attrTypes,
1541
boolean attributeMissing,
1542
boolean classMissing,
1546
FastVector accepts) {
1548
boolean[] result = new boolean[2];
1549
Instances train = null;
1551
Estimator estimator = null;
1552
boolean built = false;
1555
train = makeTestDataset(42, numTrain, numAtts, attrTypes,
1560
// prepare training data set and test value list
1562
test = makeTestValueList(24, numTest, train, attrIndex,
1563
attrTypes.getSetType());
1568
test = makeTestValueList(24, numTest, min, max,
1569
attrTypes.getSetType());
1572
if (missingLevel > 0) {
1573
addMissing(train, missingLevel, attributeMissing, classMissing, attrIndex);
1575
estimator = Estimator.makeCopies(getEstimator(), 1)[0];
1576
} catch (Exception ex) {
1577
ex.printStackTrace();
1578
throw new Error("Error setting up for tests: " + ex.getMessage());
1581
estimator.addValues(train, attrIndex);
1584
testWithTestValues(estimator, test);
1589
catch (Exception ex) {
1590
boolean acceptable = false;
1592
if (ex.getMessage() == null)
1595
msg = ex.getMessage().toLowerCase();
1596
if (msg.indexOf("not in classpath") > -1)
1597
m_ClasspathProblems = true;
1599
for (int i = 0; i < accepts.size(); i++) {
1600
if (msg.indexOf((String)accepts.elementAt(i)) >= 0) {
1605
println("no" + (acceptable ? " (OK error message)" : ""));
1606
result[1] = acceptable;
1610
println("\n=== Full Report ===");
1611
print("Problem during");
1617
println(": " + ex.getMessage() + "\n");
1619
if (accepts.size() > 0) {
1620
print("Error message doesn't mention ");
1621
for (int i = 0; i < accepts.size(); i++) {
1625
print('"' + (String)accepts.elementAt(i) + '"');
1628
println("here are the datasets:\n");
1629
println("=== Train Dataset ===\n"
1630
+ train.toString() + "\n");
1631
println("=== Test Dataset ===\n"
1632
+ test.toString() + "\n\n");
1641
* Compare two datasets to see if they differ.
1643
* @param data1 one set of instances
1644
* @param data2 the other set of instances
1645
* @throws Exception if the datasets differ
1647
protected void compareDatasets(Instances data1, Instances data2)
1649
if (!data2.equalHeaders(data1)) {
1650
throw new Exception("header has been modified");
1652
if (!(data2.numInstances() == data1.numInstances())) {
1653
throw new Exception("number of instances has changed");
1655
for (int i = 0; i < data2.numInstances(); i++) {
1656
Instance orig = data1.instance(i);
1657
Instance copy = data2.instance(i);
1658
for (int j = 0; j < orig.numAttributes(); j++) {
1659
if (orig.isMissing(j)) {
1660
if (!copy.isMissing(j)) {
1661
throw new Exception("instances have changed");
1663
} else if (orig.value(j) != copy.value(j)) {
1664
throw new Exception("instances have changed");
1666
if (orig.weight() != copy.weight()) {
1667
throw new Exception("instance weights have changed");
1674
* Add missing values to a dataset.
1676
* @param data the instances to add missing values to
1677
* @param level the level of missing values to add (if positive, this
1678
* is the probability that a value will be set to missing, if negative
1679
* all but one value will be set to missing (not yet implemented))
1680
* @param attributeMissing if true, attributes will be modified
1681
* @param classMissing if true, the class attribute will be modified
1682
* @param attrIndex index of the attribute
1684
protected void addMissing(Instances data, int level,
1685
boolean attributeMissing, boolean classMissing,
1688
int classIndex = data.classIndex();
1689
Random random = new Random(1);
1690
for (int i = 0; i < data.numInstances(); i++) {
1691
Instance current = data.instance(i);
1693
for (int j = 0; j < data.numAttributes(); j++) {
1694
if (((j == classIndex) && classMissing) ||
1695
((j == attrIndex) && attributeMissing)) {
1696
if (Math.abs(random.nextInt()) % 100 < level)
1697
current.setMissing(j);
1704
* Make a simple set of instances, which can later be modified
1705
* for use in specific tests.
1707
* @param seed the random number seed
1708
* @param numInstances the number of instances to generate
1709
* @param numAttr the number of attributes
1710
* @param attrTypes the attribute types
1711
* @param numClasses the number of classes (if nominal class)
1712
* @param classType the class type (NUMERIC, NOMINAL, etc.)
1713
* @return the test dataset
1714
* @throws Exception if the dataset couldn't be generated
1715
* @see #process(Instances)
1717
protected Instances makeTestDataset(int seed,
1720
AttrTypes attrTypes,
1725
return makeTestDataset(
1732
TestInstances.CLASS_IS_LAST);
1737
* Make a simple set of instances with variable position of the class
1738
* attribute, which can later be modified for use in specific tests.
1740
* @param seed the random number seed
1741
* @param numInstances the number of instances to generate
1742
* @param numAttr the number of attributes to generate
1743
* @param attrTypes the type of attrbute that is excepted
1744
* @param numClasses the number of classes (if nominal class)
1745
* @param classType the class type (NUMERIC, NOMINAL, etc.)
1746
* @param classIndex the index of the class (0-based, -1 as last)
1747
* @return the test dataset
1748
* @throws Exception if the dataset couldn't be generated
1749
* @see TestInstances#CLASS_IS_LAST
1750
* @see #process(Instances)
1752
protected Instances makeTestDataset(int seed, int numInstances,
1753
int numAttr, AttrTypes attrTypes,
1754
int numClasses, int classType,
1758
TestInstances dataset = new TestInstances();
1760
dataset.setSeed(seed);
1761
dataset.setNumInstances(numInstances);
1762
dataset.setNumNominal (attrTypes.nominal ? numAttr : 0);
1763
dataset.setNumNumeric (attrTypes.numeric ? numAttr : 0);
1764
dataset.setNumString (attrTypes.string ? numAttr : 0);
1765
dataset.setNumDate (attrTypes.date ? numAttr : 0);
1766
dataset.setNumRelational(attrTypes.relational ? numAttr : 0);
1767
dataset.setNumClasses(numClasses);
1768
dataset.setClassType(classType);
1769
dataset.setClassIndex(classIndex);
1771
return process(dataset.generate());
1775
* Make a simple set of values. Only one of the num'type' parameters should be larger 0.
1776
* (just to make parameter similar to the makeTestDataset parameters)
1778
* @param seed the random number seed
1779
* @param numValues the number of values to generate
1780
* @param data the dataset to make test examples for
1781
* @param attrIndex index of the attribute
1782
* @param attrType the class type (NUMERIC, NOMINAL, etc.)
1783
* @throws Exception if the dataset couldn't be generated
1784
* @see #process(Instances)
1786
protected Vector makeTestValueList(int seed, int numValues,
1787
Instances data, int attrIndex, int attrType)
1791
double []minMax = getMinimumMaximum(data, attrIndex);
1792
double minValue = minMax[0];
1793
double maxValue = minMax[1];
1795
// make value list and put into a VECTOR
1796
double range = maxValue - minValue;
1797
Vector values = new Vector(numValues);
1798
Random random = new Random(seed);
1800
if (attrType == Attribute.NOMINAL) {
1801
for (int i = 0; i < numValues; i++) {
1802
Double v = new Double((Math.abs(random.nextInt()) % (int)range)+ (int)minValue);
1806
if (attrType == Attribute.NUMERIC) {
1807
for (int i = 0; i < numValues; i++) {
1808
Double v = new Double(random.nextDouble() * range + minValue);
1816
* Make a simple set of values. Only one of the num'type' parameters should be larger 0.
1817
* (just to make parameter similar to the makeTestDataset parameters)
1819
* @param seed the random number seed
1820
* @param numValues the number of values to generate
1821
* @param minValue the minimal data value
1822
* @param maxValue the maximal data value
1823
* @param attrType the class type (NUMERIC, NOMINAL, etc.)
1824
* @throws Exception if the dataset couldn't be generated
1825
* @see #process(Instances)
1827
protected Vector makeTestValueList(int seed, int numValues,
1828
double minValue, double maxValue, int attrType)
1832
// make value list and put into a VECTOR
1833
double range = maxValue - minValue;
1834
Vector values = new Vector(numValues);
1835
Random random = new Random(seed);
1837
if (attrType == Attribute.NOMINAL) {
1838
for (int i = 0; i < numValues; i++) {
1839
Double v = new Double((Math.abs(random.nextInt()) % (int)range)+ (int)minValue);
1843
if (attrType == Attribute.NUMERIC) {
1844
for (int i = 0; i < numValues; i++) {
1845
Double v = new Double(random.nextDouble() * range + minValue);
1853
* Test with test values.
1855
* @param est estimator to be tested
1856
* @param test vector with test values
1859
protected Vector testWithTestValues(Estimator est, Vector test) {
1861
Vector results = new Vector();
1862
for (int i = 0; i < test.size(); i++) {
1863
double testValue = ((Double)(test.elementAt(i))).doubleValue();
1864
double prob = est.getProbability(testValue);
1865
Double p = new Double(prob);
1872
* Gets the minimum and maximum of the values a the first attribute
1873
* of the given data set
1875
* @param inst the instance
1876
* @param attrIndex the index of the attribut to find min and max
1877
* @return the array with the minimum value on index 0 and the max on index 1
1880
protected double[] getMinimumMaximum(Instances inst, int attrIndex) {
1881
double []minMax = new double[2];
1884
int num = getMinMax(inst, attrIndex, minMax);
1885
} catch (Exception ex) {
1886
ex.printStackTrace();
1887
System.out.println(ex.getMessage());
1890
// double minValue = minMax[0];
1891
// double maxValue = minMax[1];
1895
* Find the minimum and the maximum of the attribute and return it in
1896
* the last parameter..
1897
* @param inst instances used to build the estimator
1898
* @param attrIndex index of the attribute
1899
* @param minMax the array to return minimum and maximum in
1900
* @return number of not missing values
1901
* @exception Exception if parameter minMax wasn't initialized properly
1903
public static int getMinMax(Instances inst, int attrIndex, double [] minMax)
1905
double min = Double.NaN;
1906
double max = Double.NaN;
1907
Instance instance = null;
1908
int numNotMissing = 0;
1909
if ((minMax == null) || (minMax.length < 2)) {
1910
throw new Exception("Error in Program, privat method getMinMax");
1913
Enumeration enumInst = inst.enumerateInstances();
1914
if (enumInst.hasMoreElements()) {
1916
instance = (Instance) enumInst.nextElement();
1917
} while (instance.isMissing(attrIndex) && (enumInst.hasMoreElements()));
1919
// add values if not missing
1920
if (!instance.isMissing(attrIndex)) {
1922
min = instance.value(attrIndex);
1923
max = instance.value(attrIndex);
1925
while (enumInst.hasMoreElements()) {
1926
instance = (Instance) enumInst.nextElement();
1927
if (!instance.isMissing(attrIndex)) {
1929
if (instance.value(attrIndex) < min) {
1930
min = (instance.value(attrIndex));
1932
if (instance.value(attrIndex) > max) {
1933
max = (instance.value(attrIndex));
1941
return numNotMissing;
1945
* Print the probabilities after testing
1946
* @param probs vector with probability values
1947
* @return string with probability values printed
1949
private String probsToString(Vector probs) {
1950
StringBuffer txt = new StringBuffer (" ");
1951
for (int i = 0; i < probs.size(); i++) {
1952
txt.append("" + ((Double)(probs.elementAt(i))).doubleValue() + " ");
1954
return txt.toString();
1958
* Provides a hook for derived classes to further modify the data.
1960
* @param data the data to process
1961
* @return the processed data
1962
* @see #m_PostProcessor
1964
protected Instances process(Instances data) {
1965
if (getPostProcessor() == null)
1968
return getPostProcessor().process(data);
1972
* Print out a short summary string for the dataset characteristics
1974
* @param attrTypes the attribute types used (NUMERIC, NOMINAL, etc.)
1975
* @param classType the class type (NUMERIC, NOMINAL, etc.)
1977
protected void printAttributeSummary(AttrTypes attrTypes, int classType) {
1981
if (attrTypes.numeric)
1984
if (attrTypes.nominal) {
1985
if (str.length() > 0)
1990
if (attrTypes.string) {
1991
if (str.length() > 0)
1996
if (attrTypes.date) {
1997
if (str.length() > 0)
2002
if (attrTypes.relational) {
2003
if (str.length() > 0)
2005
str += " relational";
2008
str += " attributes)";
2010
switch (classType) {
2011
case Attribute.NUMERIC:
2012
str = " (numeric class," + str;
2014
case Attribute.NOMINAL:
2015
str = " (nominal class," + str;
2017
case Attribute.STRING:
2018
str = " (string class," + str;
2020
case Attribute.DATE:
2021
str = " (date class," + str;
2023
case Attribute.RELATIONAL:
2024
str = " (relational class," + str;
2032
* Print out a short summary string for the dataset characteristics
2034
* @param attrType the attribute type (NUMERIC, NOMINAL, etc.)
2035
* @param classType the class type (NUMERIC, NOMINAL, etc.)
2037
protected void printAttributeSummary(int attrType, int classType) {
2042
case Attribute.NUMERIC:
2043
str = " numeric" + str;
2045
case Attribute.NOMINAL:
2046
str = " nominal" + str;
2048
case Attribute.STRING:
2049
str = " string" + str;
2051
case Attribute.DATE:
2052
str = " date" + str;
2054
case Attribute.RELATIONAL:
2055
str = " relational" + str;
2058
str += " attribute(s))";
2060
switch (classType) {
2061
case Attribute.NUMERIC:
2062
str = " (numeric class," + str;
2064
case Attribute.NOMINAL:
2065
str = " (nominal class," + str;
2067
case Attribute.STRING:
2068
str = " (string class," + str;
2070
case Attribute.DATE:
2071
str = " (date class," + str;
2073
case Attribute.RELATIONAL:
2074
str = " (relational class," + str;
2082
* Test method for this class
2084
* @param args the commandline parameters
2086
public static void main(String [] args) {
2088
CheckEstimator check = new CheckEstimator();
2091
check.setOptions(args);
2092
Utils.checkForRemainingOptions(args);
2093
} catch (Exception ex) {
2094
String result = ex.getMessage() + "\n\n" + check.getClass().getName().replaceAll(".*\\.", "") + " Options:\n\n";
2095
Enumeration enu = check.listOptions();
2096
while (enu.hasMoreElements()) {
2097
Option option = (Option) enu.nextElement();
2098
result += option.synopsis() + "\n" + option.description() + "\n";
2100
throw new Exception(result);
2104
} catch (Exception ex) {
2105
System.err.println(ex.getMessage());