2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 2006 University of Waikato, Hamilton, New Zealand
23
package weka.classifiers.functions.supportVector;
25
import weka.core.Attribute;
26
import weka.core.CheckScheme;
27
import weka.core.FastVector;
28
import weka.core.Instances;
29
import weka.core.MultiInstanceCapabilitiesHandler;
30
import weka.core.Option;
31
import weka.core.OptionHandler;
32
import weka.core.SerializationHelper;
33
import weka.core.TestInstances;
34
import weka.core.Utils;
35
import weka.core.WeightedInstancesHandler;
37
import java.util.Enumeration;
38
import java.util.Random;
39
import java.util.Vector;
42
* Class for examining the capabilities and finding problems with
43
* kernels. If you implement an kernels using the WEKA.libraries,
44
* you should run the checks on it to ensure robustness and correct
45
* operation. Passing all the tests of this object does not mean
46
* bugs in the kernels don't exist, but this will help find some
50
* <code>java weka.classifiers.functions.supportVector.CheckKernel -W kernel_name
51
* -- kernel_options </code><p/>
53
* CheckKernel reports on the following:
55
* <li> Kernel abilities
57
* <li> Possible command line options to the kernels </li>
58
* <li> Whether the kernels can predict nominal, numeric, string,
59
* date or relational class attributes. </li>
60
* <li> Whether the kernels can handle numeric predictor attributes </li>
61
* <li> Whether the kernels can handle nominal predictor attributes </li>
62
* <li> Whether the kernels can handle string predictor attributes </li>
63
* <li> Whether the kernels can handle date predictor attributes </li>
64
* <li> Whether the kernels can handle relational predictor attributes </li>
65
* <li> Whether the kernels can handle multi-instance data </li>
66
* <li> Whether the kernels can handle missing predictor values </li>
67
* <li> Whether the kernels can handle missing class values </li>
68
* <li> Whether a nominal kernels only handles 2 class problems </li>
69
* <li> Whether the kernels can handle instance weights </li>
72
* <li> Correct functioning
74
* <li> Correct initialisation during buildKernel (i.e. no result
75
* changes when buildKernel called repeatedly) </li>
76
* <li> Whether the kernels alters the data passed to it
77
* (number of instances, instance order, instance weights, etc) </li>
80
* <li> Degenerate cases
82
* <li> building kernels with zero training instances </li>
83
* <li> all but one predictor attribute values missing </li>
84
* <li> all predictor attribute values missing </li>
85
* <li> all but one class values missing </li>
86
* <li> all class values missing </li>
90
* Running CheckKernel with the debug option set will output the
91
* training and test datasets for any failed tests.<p/>
93
* The <code>weka.classifiers.AbstractKernelTest</code> uses this
94
* class to test all the kernels. Any changes here, have to be
95
* checked in that abstract test class, too. <p/>
97
<!-- options-start -->
98
* Valid options are: <p/>
101
* Turn on debugging output.</pre>
104
* Silent mode - prints nothing to stdout.</pre>
106
* <pre> -N <num>
107
* The number of instances in the datasets (default 20).</pre>
109
* <pre> -nominal <num>
110
* The number of nominal attributes (default 2).</pre>
112
* <pre> -nominal-values <num>
113
* The number of values for nominal attributes (default 1).</pre>
115
* <pre> -numeric <num>
116
* The number of numeric attributes (default 1).</pre>
118
* <pre> -string <num>
119
* The number of string attributes (default 1).</pre>
121
* <pre> -date <num>
122
* The number of date attributes (default 1).</pre>
124
* <pre> -relational <num>
125
* The number of relational attributes (default 1).</pre>
127
* <pre> -num-instances-relational <num>
128
* The number of instances in relational/bag attributes (default 10).</pre>
130
* <pre> -words <comma-separated-list>
131
* The words to use in string attributes.</pre>
133
* <pre> -word-separators <chars>
134
* The word separators to use in string attributes.</pre>
137
* Full name of the kernel analysed.
138
* eg: weka.classifiers.functions.supportVector.RBFKernel
139
* (default weka.classifiers.functions.supportVector.RBFKernel)</pre>
142
* Options specific to kernel weka.classifiers.functions.supportVector.RBFKernel:
146
* Enables debugging output (if available) to be printed.
147
* (default: off)</pre>
150
* Turns off all checks - use with caution!
151
* (default: checks on)</pre>
153
* <pre> -C <num>
154
* The size of the cache (a prime number), 0 for full cache and
156
* (default: 250007)</pre>
158
* <pre> -G <num>
159
* The Gamma parameter.
160
* (default: 0.01)</pre>
164
* Options after -- are passed to the designated kernel.<p/>
166
* @author Len Trigg (trigg@cs.waikato.ac.nz)
167
* @author FracPete (fracpete at waikato dot ac dot nz)
168
* @version $Revision: 1.2 $
171
public class CheckKernel
172
extends CheckScheme {
175
* Note about test methods:
176
* - methods return array of booleans
177
* - first index: success or not
178
* - second index: acceptable or not (e.g., Exception is OK)
180
* FracPete (fracpete at waikato dot ac dot nz)
183
/*** The kernel to be examined */
184
protected Kernel m_Kernel = new weka.classifiers.functions.supportVector.RBFKernel();
187
* Returns an enumeration describing the available options.
189
* @return an enumeration of all the available options.
191
public Enumeration listOptions() {
192
Vector result = new Vector();
194
Enumeration en = super.listOptions();
195
while (en.hasMoreElements())
196
result.addElement(en.nextElement());
198
result.addElement(new Option(
199
"\tFull name of the kernel analysed.\n"
200
+"\teg: weka.classifiers.functions.supportVector.RBFKernel\n"
201
+ "\t(default weka.classifiers.functions.supportVector.RBFKernel)",
204
if ((m_Kernel != null)
205
&& (m_Kernel instanceof OptionHandler)) {
206
result.addElement(new Option("", "", 0,
207
"\nOptions specific to kernel "
208
+ m_Kernel.getClass().getName()
210
Enumeration enu = ((OptionHandler)m_Kernel).listOptions();
211
while (enu.hasMoreElements())
212
result.addElement(enu.nextElement());
215
return result.elements();
219
* Parses a given list of options.
221
<!-- options-start -->
222
* Valid options are: <p/>
225
* Turn on debugging output.</pre>
228
* Silent mode - prints nothing to stdout.</pre>
230
* <pre> -N <num>
231
* The number of instances in the datasets (default 20).</pre>
233
* <pre> -nominal <num>
234
* The number of nominal attributes (default 2).</pre>
236
* <pre> -nominal-values <num>
237
* The number of values for nominal attributes (default 1).</pre>
239
* <pre> -numeric <num>
240
* The number of numeric attributes (default 1).</pre>
242
* <pre> -string <num>
243
* The number of string attributes (default 1).</pre>
245
* <pre> -date <num>
246
* The number of date attributes (default 1).</pre>
248
* <pre> -relational <num>
249
* The number of relational attributes (default 1).</pre>
251
* <pre> -num-instances-relational <num>
252
* The number of instances in relational/bag attributes (default 10).</pre>
254
* <pre> -words <comma-separated-list>
255
* The words to use in string attributes.</pre>
257
* <pre> -word-separators <chars>
258
* The word separators to use in string attributes.</pre>
261
* Full name of the kernel analysed.
262
* eg: weka.classifiers.functions.supportVector.RBFKernel
263
* (default weka.classifiers.functions.supportVector.RBFKernel)</pre>
266
* Options specific to kernel weka.classifiers.functions.supportVector.RBFKernel:
270
* Enables debugging output (if available) to be printed.
271
* (default: off)</pre>
274
* Turns off all checks - use with caution!
275
* (default: checks on)</pre>
277
* <pre> -C <num>
278
* The size of the cache (a prime number), 0 for full cache and
280
* (default: 250007)</pre>
282
* <pre> -G <num>
283
* The Gamma parameter.
284
* (default: 0.01)</pre>
288
* @param options the list of options as an array of strings
289
* @throws Exception if an option is not supported
291
public void setOptions(String[] options) throws Exception {
294
super.setOptions(options);
296
tmpStr = Utils.getOption('W', options);
297
if (tmpStr.length() == 0)
298
tmpStr = weka.classifiers.functions.supportVector.RBFKernel.class.getName();
301
"weka.classifiers.functions.supportVector",
304
Utils.partitionOptions(options)));
308
* Gets the current settings of the CheckKernel.
310
* @return an array of strings suitable for passing to setOptions
312
public String[] getOptions() {
317
result = new Vector();
319
options = super.getOptions();
320
for (i = 0; i < options.length; i++)
321
result.add(options[i]);
323
if (getKernel() != null) {
325
result.add(getKernel().getClass().getName());
328
if ((m_Kernel != null) && (m_Kernel instanceof OptionHandler))
329
options = ((OptionHandler) m_Kernel).getOptions();
331
options = new String[0];
333
if (options.length > 0) {
335
for (i = 0; i < options.length; i++)
336
result.add(options[i]);
339
return (String[]) result.toArray(new String[result.size()]);
343
* Begin the tests, reporting results to System.out
345
public void doTests() {
347
if (getKernel() == null) {
348
println("\n=== No kernel set ===");
351
println("\n=== Check on kernel: "
352
+ getKernel().getClass().getName()
356
m_ClasspathProblems = false;
357
println("--> Checking for interfaces");
359
boolean weightedInstancesHandler = weightedInstancesHandler()[0];
360
boolean multiInstanceHandler = multiInstanceHandler()[0];
361
println("--> Kernel tests");
362
declaresSerialVersionUID();
363
testsPerClassType(Attribute.NOMINAL, weightedInstancesHandler, multiInstanceHandler);
364
testsPerClassType(Attribute.NUMERIC, weightedInstancesHandler, multiInstanceHandler);
365
testsPerClassType(Attribute.DATE, weightedInstancesHandler, multiInstanceHandler);
366
testsPerClassType(Attribute.STRING, weightedInstancesHandler, multiInstanceHandler);
367
testsPerClassType(Attribute.RELATIONAL, weightedInstancesHandler, multiInstanceHandler);
371
* Set the lernel to test.
373
* @param value the kernel to use.
375
public void setKernel(Kernel value) {
380
* Get the kernel being tested
382
* @return the kernel being tested
384
public Kernel getKernel() {
389
* Run a battery of tests for a given class attribute type
391
* @param classType true if the class attribute should be numeric
392
* @param weighted true if the kernel says it handles weights
393
* @param multiInstance true if the kernel is a multi-instance kernel
395
protected void testsPerClassType(int classType,
397
boolean multiInstance) {
399
boolean PNom = canPredict(true, false, false, false, false, multiInstance, classType)[0];
400
boolean PNum = canPredict(false, true, false, false, false, multiInstance, classType)[0];
401
boolean PStr = canPredict(false, false, true, false, false, multiInstance, classType)[0];
402
boolean PDat = canPredict(false, false, false, true, false, multiInstance, classType)[0];
405
PRel = canPredict(false, false, false, false, true, multiInstance, classType)[0];
409
if (PNom || PNum || PStr || PDat || PRel) {
411
instanceWeights(PNom, PNum, PStr, PDat, PRel, multiInstance, classType);
413
if (classType == Attribute.NOMINAL)
414
canHandleNClasses(PNom, PNum, PStr, PDat, PRel, multiInstance, 4);
416
if (!multiInstance) {
417
canHandleClassAsNthAttribute(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, 0);
418
canHandleClassAsNthAttribute(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, 1);
421
canHandleZeroTraining(PNom, PNum, PStr, PDat, PRel, multiInstance, classType);
422
boolean handleMissingPredictors = canHandleMissing(PNom, PNum, PStr, PDat, PRel,
423
multiInstance, classType,
425
if (handleMissingPredictors)
426
canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, true, false, 100);
428
boolean handleMissingClass = canHandleMissing(PNom, PNum, PStr, PDat, PRel,
429
multiInstance, classType,
431
if (handleMissingClass)
432
canHandleMissing(PNom, PNum, PStr, PDat, PRel, multiInstance, classType, false, true, 100);
434
correctBuildInitialisation(PNom, PNum, PStr, PDat, PRel, multiInstance, classType);
435
datasetIntegrity(PNom, PNum, PStr, PDat, PRel, multiInstance, classType,
436
handleMissingPredictors, handleMissingClass);
441
* Checks whether the scheme can take command line options.
443
* @return index 0 is true if the kernel can take options
445
protected boolean[] canTakeOptions() {
447
boolean[] result = new boolean[2];
450
if (m_Kernel instanceof OptionHandler) {
453
println("\n=== Full report ===");
454
Enumeration enu = ((OptionHandler)m_Kernel).listOptions();
455
while (enu.hasMoreElements()) {
456
Option option = (Option) enu.nextElement();
457
print(option.synopsis() + "\n"
458
+ option.description() + "\n");
473
* Checks whether the scheme says it can handle instance weights.
475
* @return true if the kernel handles instance weights
477
protected boolean[] weightedInstancesHandler() {
479
boolean[] result = new boolean[2];
481
print("weighted instances kernel...");
482
if (m_Kernel instanceof WeightedInstancesHandler) {
495
* Checks whether the scheme handles multi-instance data.
497
* @return true if the kernel handles multi-instance data
499
protected boolean[] multiInstanceHandler() {
500
boolean[] result = new boolean[2];
502
print("multi-instance kernel...");
503
if (m_Kernel instanceof MultiInstanceCapabilitiesHandler) {
516
* tests for a serialVersionUID. Fails in case the scheme doesn't declare
519
* @return index 0 is true if the scheme declares a UID
521
protected boolean[] declaresSerialVersionUID() {
522
boolean[] result = new boolean[2];
524
print("serialVersionUID...");
526
result[0] = !SerializationHelper.needsUID(m_Kernel.getClass());
537
* Checks basic prediction of the scheme, for simple non-troublesome
540
* @param nominalPredictor if true use nominal predictor attributes
541
* @param numericPredictor if true use numeric predictor attributes
542
* @param stringPredictor if true use string predictor attributes
543
* @param datePredictor if true use date predictor attributes
544
* @param relationalPredictor if true use relational predictor attributes
545
* @param multiInstance whether multi-instance is needed
546
* @param classType the class type (NOMINAL, NUMERIC, etc.)
547
* @return index 0 is true if the test was passed, index 1 is true if test
550
protected boolean[] canPredict(
551
boolean nominalPredictor,
552
boolean numericPredictor,
553
boolean stringPredictor,
554
boolean datePredictor,
555
boolean relationalPredictor,
556
boolean multiInstance,
559
print("basic predict");
560
printAttributeSummary(
561
nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
563
FastVector accepts = new FastVector();
564
accepts.addElement("unary");
565
accepts.addElement("binary");
566
accepts.addElement("nominal");
567
accepts.addElement("numeric");
568
accepts.addElement("string");
569
accepts.addElement("date");
570
accepts.addElement("relational");
571
accepts.addElement("multi-instance");
572
accepts.addElement("not in classpath");
573
int numTrain = getNumInstances(), numClasses = 2, missingLevel = 0;
574
boolean predictorMissing = false, classMissing = false;
576
return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
577
datePredictor, relationalPredictor,
580
missingLevel, predictorMissing, classMissing,
581
numTrain, numClasses,
586
* Checks whether nominal schemes can handle more than two classes.
587
* If a scheme is only designed for two-class problems it should
588
* throw an appropriate exception for multi-class problems.
590
* @param nominalPredictor if true use nominal predictor attributes
591
* @param numericPredictor if true use numeric predictor attributes
592
* @param stringPredictor if true use string predictor attributes
593
* @param datePredictor if true use date predictor attributes
594
* @param relationalPredictor if true use relational predictor attributes
595
* @param multiInstance whether multi-instance is needed
596
* @param numClasses the number of classes to test
597
* @return index 0 is true if the test was passed, index 1 is true if test
600
protected boolean[] canHandleNClasses(
601
boolean nominalPredictor,
602
boolean numericPredictor,
603
boolean stringPredictor,
604
boolean datePredictor,
605
boolean relationalPredictor,
606
boolean multiInstance,
609
print("more than two class problems");
610
printAttributeSummary(
611
nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, Attribute.NOMINAL);
613
FastVector accepts = new FastVector();
614
accepts.addElement("number");
615
accepts.addElement("class");
616
int numTrain = getNumInstances(), missingLevel = 0;
617
boolean predictorMissing = false, classMissing = false;
619
return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
620
datePredictor, relationalPredictor,
623
missingLevel, predictorMissing, classMissing,
624
numTrain, numClasses,
629
* Checks whether the scheme can handle class attributes as Nth attribute.
631
* @param nominalPredictor if true use nominal predictor attributes
632
* @param numericPredictor if true use numeric predictor attributes
633
* @param stringPredictor if true use string predictor attributes
634
* @param datePredictor if true use date predictor attributes
635
* @param relationalPredictor if true use relational predictor attributes
636
* @param multiInstance whether multi-instance is needed
637
* @param classType the class type (NUMERIC, NOMINAL, etc.)
638
* @param classIndex the index of the class attribute (0-based, -1 means last attribute)
639
* @return index 0 is true if the test was passed, index 1 is true if test
641
* @see TestInstances#CLASS_IS_LAST
643
protected boolean[] canHandleClassAsNthAttribute(
644
boolean nominalPredictor,
645
boolean numericPredictor,
646
boolean stringPredictor,
647
boolean datePredictor,
648
boolean relationalPredictor,
649
boolean multiInstance,
653
if (classIndex == TestInstances.CLASS_IS_LAST)
654
print("class attribute as last attribute");
656
print("class attribute as " + (classIndex + 1) + ". attribute");
657
printAttributeSummary(
658
nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
660
FastVector accepts = new FastVector();
661
int numTrain = getNumInstances(), numClasses = 2, missingLevel = 0;
662
boolean predictorMissing = false, classMissing = false;
664
return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
665
datePredictor, relationalPredictor,
669
missingLevel, predictorMissing, classMissing,
670
numTrain, numClasses,
675
* Checks whether the scheme can handle zero training instances.
677
* @param nominalPredictor if true use nominal predictor attributes
678
* @param numericPredictor if true use numeric predictor attributes
679
* @param stringPredictor if true use string predictor attributes
680
* @param datePredictor if true use date predictor attributes
681
* @param relationalPredictor if true use relational predictor attributes
682
* @param multiInstance whether multi-instance is needed
683
* @param classType the class type (NUMERIC, NOMINAL, etc.)
684
* @return index 0 is true if the test was passed, index 1 is true if test
687
protected boolean[] canHandleZeroTraining(
688
boolean nominalPredictor,
689
boolean numericPredictor,
690
boolean stringPredictor,
691
boolean datePredictor,
692
boolean relationalPredictor,
693
boolean multiInstance,
696
print("handle zero training instances");
697
printAttributeSummary(
698
nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
700
FastVector accepts = new FastVector();
701
accepts.addElement("train");
702
accepts.addElement("value");
703
int numTrain = 0, numClasses = 2, missingLevel = 0;
704
boolean predictorMissing = false, classMissing = false;
707
nominalPredictor, numericPredictor, stringPredictor,
708
datePredictor, relationalPredictor,
711
missingLevel, predictorMissing, classMissing,
712
numTrain, numClasses,
717
* Checks whether the scheme correctly initialises models when
718
* buildKernel is called. This test calls buildKernel with
719
* one training dataset. buildKernel is then called on a training
720
* set with different structure, and then again with the original training
721
* set. If the equals method of the KernelEvaluation class returns
722
* false, this is noted as incorrect build initialisation.
724
* @param nominalPredictor if true use nominal predictor attributes
725
* @param numericPredictor if true use numeric predictor attributes
726
* @param stringPredictor if true use string predictor attributes
727
* @param datePredictor if true use date predictor attributes
728
* @param relationalPredictor if true use relational predictor attributes
729
* @param multiInstance whether multi-instance is needed
730
* @param classType the class type (NUMERIC, NOMINAL, etc.)
731
* @return index 0 is true if the test was passed
733
protected boolean[] correctBuildInitialisation(
734
boolean nominalPredictor,
735
boolean numericPredictor,
736
boolean stringPredictor,
737
boolean datePredictor,
738
boolean relationalPredictor,
739
boolean multiInstance,
742
boolean[] result = new boolean[2];
744
print("correct initialisation during buildKernel");
745
printAttributeSummary(
746
nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
748
int numTrain = getNumInstances(),
749
numClasses = 2, missingLevel = 0;
750
boolean predictorMissing = false, classMissing = false;
752
Instances train1 = null;
753
Instances train2 = null;
754
Kernel kernel = null;
755
KernelEvaluation evaluation1A = null;
756
KernelEvaluation evaluation1B = null;
757
KernelEvaluation evaluation2 = null;
761
// Make two sets of train/test splits with different
762
// numbers of attributes
763
train1 = makeTestDataset(42, numTrain,
764
nominalPredictor ? getNumNominal() : 0,
765
numericPredictor ? getNumNumeric() : 0,
766
stringPredictor ? getNumString() : 0,
767
datePredictor ? getNumDate() : 0,
768
relationalPredictor ? getNumRelational() : 0,
772
train2 = makeTestDataset(84, numTrain,
773
nominalPredictor ? getNumNominal() + 1 : 0,
774
numericPredictor ? getNumNumeric() + 1 : 0,
775
stringPredictor ? getNumString() + 1 : 0,
776
datePredictor ? getNumDate() + 1 : 0,
777
relationalPredictor ? getNumRelational() + 1 : 0,
781
if (missingLevel > 0) {
782
addMissing(train1, missingLevel, predictorMissing, classMissing);
783
addMissing(train2, missingLevel, predictorMissing, classMissing);
786
kernel = Kernel.makeCopy(getKernel());
787
evaluation1A = new KernelEvaluation();
788
evaluation1B = new KernelEvaluation();
789
evaluation2 = new KernelEvaluation();
790
} catch (Exception ex) {
791
throw new Error("Error setting up for tests: " + ex.getMessage());
795
evaluation1A.evaluate(kernel, train1);
798
evaluation2.evaluate(kernel, train2);
801
evaluation1B.evaluate(kernel, train1);
804
if (!evaluation1A.equals(evaluation1B)) {
806
println("\n=== Full report ===\n"
807
+ evaluation1A.toSummaryString("\nFirst buildKernel()")
810
evaluation1B.toSummaryString("\nSecond buildKernel()")
813
throw new Exception("Results differ between buildKernel calls");
818
if (false && m_Debug) {
819
println("\n=== Full report ===\n"
820
+ evaluation1A.toSummaryString("\nFirst buildKernel()")
823
evaluation1B.toSummaryString("\nSecond buildKernel()")
827
catch (Exception ex) {
832
println("\n=== Full Report ===");
833
print("Problem during building");
836
print(" of dataset 1");
839
print(" of dataset 2");
842
print(" of dataset 1 (2nd build)");
845
print(", comparing results from builds of dataset 1");
848
println(": " + ex.getMessage() + "\n");
849
println("here are the datasets:\n");
850
println("=== Train1 Dataset ===\n"
851
+ train1.toString() + "\n");
852
println("=== Train2 Dataset ===\n"
853
+ train2.toString() + "\n");
861
* Checks basic missing value handling of the scheme. If the missing
862
* values cause an exception to be thrown by the scheme, this will be
865
* @param nominalPredictor if true use nominal predictor attributes
866
* @param numericPredictor if true use numeric predictor attributes
867
* @param stringPredictor if true use string predictor attributes
868
* @param datePredictor if true use date predictor attributes
869
* @param relationalPredictor if true use relational predictor attributes
870
* @param multiInstance whether multi-instance is needed
871
* @param classType the class type (NUMERIC, NOMINAL, etc.)
872
* @param predictorMissing true if the missing values may be in
874
* @param classMissing true if the missing values may be in the class
875
* @param missingLevel the percentage of missing values
876
* @return index 0 is true if the test was passed, index 1 is true if test
879
protected boolean[] canHandleMissing(
880
boolean nominalPredictor,
881
boolean numericPredictor,
882
boolean stringPredictor,
883
boolean datePredictor,
884
boolean relationalPredictor,
885
boolean multiInstance,
887
boolean predictorMissing,
888
boolean classMissing,
891
if (missingLevel == 100)
894
if (predictorMissing) {
902
printAttributeSummary(
903
nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
905
FastVector accepts = new FastVector();
906
accepts.addElement("missing");
907
accepts.addElement("value");
908
accepts.addElement("train");
909
int numTrain = getNumInstances(), numClasses = 2;
911
return runBasicTest(nominalPredictor, numericPredictor, stringPredictor,
912
datePredictor, relationalPredictor,
915
missingLevel, predictorMissing, classMissing,
916
numTrain, numClasses,
921
* Checks whether the kernel can handle instance weights.
922
* This test compares the kernel performance on two datasets
923
* that are identical except for the training weights. If the
924
* results change, then the kernel must be using the weights. It
925
* may be possible to get a false positive from this test if the
926
* weight changes aren't significant enough to induce a change
927
* in kernel performance (but the weights are chosen to minimize
928
* the likelihood of this).
930
* @param nominalPredictor if true use nominal predictor attributes
931
* @param numericPredictor if true use numeric predictor attributes
932
* @param stringPredictor if true use string predictor attributes
933
* @param datePredictor if true use date predictor attributes
934
* @param relationalPredictor if true use relational predictor attributes
935
* @param multiInstance whether multi-instance is needed
936
* @param classType the class type (NUMERIC, NOMINAL, etc.)
937
* @return index 0 true if the test was passed
939
protected boolean[] instanceWeights(
940
boolean nominalPredictor,
941
boolean numericPredictor,
942
boolean stringPredictor,
943
boolean datePredictor,
944
boolean relationalPredictor,
945
boolean multiInstance,
948
print("kernel uses instance weights");
949
printAttributeSummary(
950
nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
952
int numTrain = 2*getNumInstances(),
953
numClasses = 2, missingLevel = 0;
954
boolean predictorMissing = false, classMissing = false;
956
boolean[] result = new boolean[2];
957
Instances train = null;
958
Kernel[] kernels = null;
959
KernelEvaluation evaluationB = null;
960
KernelEvaluation evaluationI = null;
961
boolean evalFail = false;
963
train = makeTestDataset(42, numTrain,
964
nominalPredictor ? getNumNominal() + 1 : 0,
965
numericPredictor ? getNumNumeric() + 1 : 0,
966
stringPredictor ? getNumString() : 0,
967
datePredictor ? getNumDate() : 0,
968
relationalPredictor ? getNumRelational() : 0,
972
if (missingLevel > 0)
973
addMissing(train, missingLevel, predictorMissing, classMissing);
974
kernels = Kernel.makeCopies(getKernel(), 2);
975
evaluationB = new KernelEvaluation();
976
evaluationI = new KernelEvaluation();
977
evaluationB.evaluate(kernels[0], train);
978
} catch (Exception ex) {
979
throw new Error("Error setting up for tests: " + ex.getMessage());
983
// Now modify instance weights and re-built/test
984
for (int i = 0; i < train.numInstances(); i++) {
985
train.instance(i).setWeight(0);
987
Random random = new Random(1);
988
for (int i = 0; i < train.numInstances() / 2; i++) {
989
int inst = Math.abs(random.nextInt()) % train.numInstances();
990
int weight = Math.abs(random.nextInt()) % 10 + 1;
991
train.instance(inst).setWeight(weight);
993
evaluationI.evaluate(kernels[1], train);
994
if (evaluationB.equals(evaluationI)) {
997
throw new Exception("evalFail");
1002
} catch (Exception ex) {
1007
println("\n=== Full Report ===");
1010
println("Results don't differ between non-weighted and "
1011
+ "weighted instance models.");
1012
println("Here are the results:\n");
1013
println(evaluationB.toSummaryString("\nboth methods\n"));
1015
print("Problem during building");
1016
println(": " + ex.getMessage() + "\n");
1018
println("Here is the dataset:\n");
1019
println("=== Train Dataset ===\n"
1020
+ train.toString() + "\n");
1021
println("=== Train Weights ===\n");
1022
for (int i = 0; i < train.numInstances(); i++) {
1023
println(" " + (i + 1)
1024
+ " " + train.instance(i).weight());
1033
* Checks whether the scheme alters the training dataset during
1034
* building. If the scheme needs to modify the data it should take
1035
* a copy of the training data. Currently checks for changes to header
1036
* structure, number of instances, order of instances, instance weights.
1038
* @param nominalPredictor if true use nominal predictor attributes
1039
* @param numericPredictor if true use numeric predictor attributes
1040
* @param stringPredictor if true use string predictor attributes
1041
* @param datePredictor if true use date predictor attributes
1042
* @param relationalPredictor if true use relational predictor attributes
1043
* @param multiInstance whether multi-instance is needed
1044
* @param classType the class type (NUMERIC, NOMINAL, etc.)
1045
* @param predictorMissing true if we know the kernel can handle
1046
* (at least) moderate missing predictor values
1047
* @param classMissing true if we know the kernel can handle
1048
* (at least) moderate missing class values
1049
* @return index 0 is true if the test was passed
1051
protected boolean[] datasetIntegrity(
1052
boolean nominalPredictor,
1053
boolean numericPredictor,
1054
boolean stringPredictor,
1055
boolean datePredictor,
1056
boolean relationalPredictor,
1057
boolean multiInstance,
1059
boolean predictorMissing,
1060
boolean classMissing) {
1062
print("kernel doesn't alter original datasets");
1063
printAttributeSummary(
1064
nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType);
1066
int numTrain = getNumInstances(),
1067
numClasses = 2, missingLevel = 20;
1069
boolean[] result = new boolean[2];
1070
Instances train = null;
1071
Kernel kernel = null;
1073
train = makeTestDataset(42, numTrain,
1074
nominalPredictor ? getNumNominal() : 0,
1075
numericPredictor ? getNumNumeric() : 0,
1076
stringPredictor ? getNumString() : 0,
1077
datePredictor ? getNumDate() : 0,
1078
relationalPredictor ? getNumRelational() : 0,
1082
if (missingLevel > 0)
1083
addMissing(train, missingLevel, predictorMissing, classMissing);
1084
kernel = Kernel.makeCopies(getKernel(), 1)[0];
1085
} catch (Exception ex) {
1086
throw new Error("Error setting up for tests: " + ex.getMessage());
1089
Instances trainCopy = new Instances(train);
1090
kernel.buildKernel(trainCopy);
1091
compareDatasets(train, trainCopy);
1095
} catch (Exception ex) {
1100
println("\n=== Full Report ===");
1101
print("Problem during building");
1102
println(": " + ex.getMessage() + "\n");
1103
println("Here is the dataset:\n");
1104
println("=== Train Dataset ===\n"
1105
+ train.toString() + "\n");
1113
* Runs a text on the datasets with the given characteristics.
1115
* @param nominalPredictor if true use nominal predictor attributes
1116
* @param numericPredictor if true use numeric predictor attributes
1117
* @param stringPredictor if true use string predictor attributes
1118
* @param datePredictor if true use date predictor attributes
1119
* @param relationalPredictor if true use relational predictor attributes
1120
* @param multiInstance whether multi-instance is needed
1121
* @param classType the class type (NUMERIC, NOMINAL, etc.)
1122
* @param missingLevel the percentage of missing values
1123
* @param predictorMissing true if the missing values may be in
1125
* @param classMissing true if the missing values may be in the class
1126
* @param numTrain the number of instances in the training set
1127
* @param numClasses the number of classes
1128
* @param accepts the acceptable string in an exception
1129
* @return index 0 is true if the test was passed, index 1 is true if test
1132
protected boolean[] runBasicTest(boolean nominalPredictor,
1133
boolean numericPredictor,
1134
boolean stringPredictor,
1135
boolean datePredictor,
1136
boolean relationalPredictor,
1137
boolean multiInstance,
1140
boolean predictorMissing,
1141
boolean classMissing,
1144
FastVector accepts) {
1146
return runBasicTest(
1151
relationalPredictor,
1154
TestInstances.CLASS_IS_LAST,
1164
* Runs a text on the datasets with the given characteristics.
1166
* @param nominalPredictor if true use nominal predictor attributes
1167
* @param numericPredictor if true use numeric predictor attributes
1168
* @param stringPredictor if true use string predictor attributes
1169
* @param datePredictor if true use date predictor attributes
1170
* @param relationalPredictor if true use relational predictor attributes
1171
* @param multiInstance whether multi-instance is needed
1172
* @param classType the class type (NUMERIC, NOMINAL, etc.)
1173
* @param classIndex the attribute index of the class
1174
* @param missingLevel the percentage of missing values
1175
* @param predictorMissing true if the missing values may be in
1177
* @param classMissing true if the missing values may be in the class
1178
* @param numTrain the number of instances in the training set
1179
* @param numClasses the number of classes
1180
* @param accepts the acceptable string in an exception
1181
* @return index 0 is true if the test was passed, index 1 is true if test
1184
protected boolean[] runBasicTest(boolean nominalPredictor,
1185
boolean numericPredictor,
1186
boolean stringPredictor,
1187
boolean datePredictor,
1188
boolean relationalPredictor,
1189
boolean multiInstance,
1193
boolean predictorMissing,
1194
boolean classMissing,
1197
FastVector accepts) {
1199
boolean[] result = new boolean[2];
1200
Instances train = null;
1201
Kernel kernel = null;
1203
train = makeTestDataset(42, numTrain,
1204
nominalPredictor ? getNumNominal() : 0,
1205
numericPredictor ? getNumNumeric() : 0,
1206
stringPredictor ? getNumString() : 0,
1207
datePredictor ? getNumDate() : 0,
1208
relationalPredictor ? getNumRelational() : 0,
1213
if (missingLevel > 0)
1214
addMissing(train, missingLevel, predictorMissing, classMissing);
1215
kernel = Kernel.makeCopies(getKernel(), 1)[0];
1216
} catch (Exception ex) {
1217
ex.printStackTrace();
1218
throw new Error("Error setting up for tests: " + ex.getMessage());
1221
kernel.buildKernel(train);
1225
catch (Exception ex) {
1226
boolean acceptable = false;
1228
if (ex.getMessage() == null)
1231
msg = ex.getMessage().toLowerCase();
1232
if (msg.indexOf("not in classpath") > -1)
1233
m_ClasspathProblems = true;
1235
for (int i = 0; i < accepts.size(); i++) {
1236
if (msg.indexOf((String)accepts.elementAt(i)) >= 0) {
1241
println("no" + (acceptable ? " (OK error message)" : ""));
1242
result[1] = acceptable;
1245
println("\n=== Full Report ===");
1246
print("Problem during building");
1247
println(": " + ex.getMessage() + "\n");
1249
if (accepts.size() > 0) {
1250
print("Error message doesn't mention ");
1251
for (int i = 0; i < accepts.size(); i++) {
1255
print('"' + (String)accepts.elementAt(i) + '"');
1258
println("here is the dataset:\n");
1259
println("=== Train Dataset ===\n"
1260
+ train.toString() + "\n");
1269
* Make a simple set of instances, which can later be modified
1270
* for use in specific tests.
1272
* @param seed the random number seed
1273
* @param numInstances the number of instances to generate
1274
* @param numNominal the number of nominal attributes
1275
* @param numNumeric the number of numeric attributes
1276
* @param numString the number of string attributes
1277
* @param numDate the number of date attributes
1278
* @param numRelational the number of relational attributes
1279
* @param numClasses the number of classes (if nominal class)
1280
* @param classType the class type (NUMERIC, NOMINAL, etc.)
1281
* @param multiInstance whether the dataset should a multi-instance dataset
1282
* @return the test dataset
1283
* @throws Exception if the dataset couldn't be generated
1284
* @see #process(Instances)
1286
protected Instances makeTestDataset(int seed, int numInstances,
1287
int numNominal, int numNumeric,
1288
int numString, int numDate,
1290
int numClasses, int classType,
1291
boolean multiInstance)
1294
return makeTestDataset(
1304
TestInstances.CLASS_IS_LAST,
1309
* Make a simple set of instances with variable position of the class
1310
* attribute, which can later be modified for use in specific tests.
1312
* @param seed the random number seed
1313
* @param numInstances the number of instances to generate
1314
* @param numNominal the number of nominal attributes
1315
* @param numNumeric the number of numeric attributes
1316
* @param numString the number of string attributes
1317
* @param numDate the number of date attributes
1318
* @param numRelational the number of relational attributes
1319
* @param numClasses the number of classes (if nominal class)
1320
* @param classType the class type (NUMERIC, NOMINAL, etc.)
1321
* @param classIndex the index of the class (0-based, -1 as last)
1322
* @param multiInstance whether the dataset should a multi-instance dataset
1323
* @return the test dataset
1324
* @throws Exception if the dataset couldn't be generated
1325
* @see TestInstances#CLASS_IS_LAST
1326
* @see #process(Instances)
1328
protected Instances makeTestDataset(int seed, int numInstances,
1329
int numNominal, int numNumeric,
1330
int numString, int numDate,
1332
int numClasses, int classType,
1334
boolean multiInstance)
1337
TestInstances dataset = new TestInstances();
1339
dataset.setSeed(seed);
1340
dataset.setNumInstances(numInstances);
1341
dataset.setNumNominal(numNominal);
1342
dataset.setNumNumeric(numNumeric);
1343
dataset.setNumString(numString);
1344
dataset.setNumDate(numDate);
1345
dataset.setNumRelational(numRelational);
1346
dataset.setNumClasses(numClasses);
1347
dataset.setClassType(classType);
1348
dataset.setClassIndex(classIndex);
1349
dataset.setNumClasses(numClasses);
1350
dataset.setMultiInstance(multiInstance);
1351
dataset.setWords(getWords());
1352
dataset.setWordSeparators(getWordSeparators());
1354
return process(dataset.generate());
1358
* Print out a short summary string for the dataset characteristics
1360
* @param nominalPredictor true if nominal predictor attributes are present
1361
* @param numericPredictor true if numeric predictor attributes are present
1362
* @param stringPredictor true if string predictor attributes are present
1363
* @param datePredictor true if date predictor attributes are present
1364
* @param relationalPredictor true if relational predictor attributes are present
1365
* @param multiInstance whether multi-instance is needed
1366
* @param classType the class type (NUMERIC, NOMINAL, etc.)
1368
protected void printAttributeSummary(boolean nominalPredictor,
1369
boolean numericPredictor,
1370
boolean stringPredictor,
1371
boolean datePredictor,
1372
boolean relationalPredictor,
1373
boolean multiInstance,
1378
if (numericPredictor)
1381
if (nominalPredictor) {
1382
if (str.length() > 0)
1387
if (stringPredictor) {
1388
if (str.length() > 0)
1393
if (datePredictor) {
1394
if (str.length() > 0)
1399
if (relationalPredictor) {
1400
if (str.length() > 0)
1402
str += " relational";
1405
str += " predictors)";
1407
switch (classType) {
1408
case Attribute.NUMERIC:
1409
str = " (numeric class," + str;
1411
case Attribute.NOMINAL:
1412
str = " (nominal class," + str;
1414
case Attribute.STRING:
1415
str = " (string class," + str;
1417
case Attribute.DATE:
1418
str = " (date class," + str;
1420
case Attribute.RELATIONAL:
1421
str = " (relational class," + str;
1429
* Test method for this class
1431
* @param args the commandline parameters
1433
public static void main(String [] args) {
1434
runCheck(new CheckKernel(), args);