2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 2006 University of Waikato, Hamilton, New Zealand
25
import java.util.Enumeration;
26
import java.util.Random;
27
import java.util.StringTokenizer;
28
import java.util.Vector;
31
* Abstract general class for testing schemes in Weka. Derived classes are
32
* also used for JUnit tests.
34
* @author FracPete (fracpete at waikato dot ac dot nz)
35
* @version $Revision: 1.3 $
38
public abstract class CheckScheme
41
/** a class for postprocessing the test-data */
42
public static class PostProcessor {
44
* Provides a hook for derived classes to further modify the data. Currently,
45
* the data is just passed through.
47
* @param data the data to process
48
* @return the processed data
50
public Instances process(Instances data) {
55
/** The number of instances in the datasets */
56
protected int m_NumInstances = 20;
58
/** the number of nominal attributes */
59
protected int m_NumNominal = 2;
61
/** the number of numeric attributes */
62
protected int m_NumNumeric = 1;
64
/** the number of string attributes */
65
protected int m_NumString = 1;
67
/** the number of date attributes */
68
protected int m_NumDate = 1;
70
/** the number of relational attributes */
71
protected int m_NumRelational = 1;
73
/** the number of instances in relational attributes (applies also for bags
74
* in multi-instance) */
75
protected int m_NumInstancesRelational = 10;
77
/** for generating String attributes/classes */
78
protected String[] m_Words = TestInstances.DEFAULT_WORDS;
80
/** for generating String attributes/classes */
81
protected String m_WordSeparators = TestInstances.DEFAULT_SEPARATORS;
83
/** for post-processing the data even further */
84
protected PostProcessor m_PostProcessor = null;
86
/** whether classpath problems occurred */
87
protected boolean m_ClasspathProblems = false;
90
* Returns an enumeration describing the available options.
92
* @return an enumeration of all the available options.
94
public Enumeration listOptions() {
95
Vector result = new Vector();
97
Enumeration en = super.listOptions();
98
while (en.hasMoreElements())
99
result.addElement(en.nextElement());
101
result.addElement(new Option(
102
"\tThe number of instances in the datasets (default 20).",
103
"N", 1, "-N <num>"));
105
result.addElement(new Option(
106
"\tThe number of nominal attributes (default 2).",
107
"nominal", 1, "-nominal <num>"));
109
result.addElement(new Option(
110
"\tThe number of values for nominal attributes (default 1).",
111
"nominal-values", 1, "-nominal-values <num>"));
113
result.addElement(new Option(
114
"\tThe number of numeric attributes (default 1).",
115
"numeric", 1, "-numeric <num>"));
117
result.addElement(new Option(
118
"\tThe number of string attributes (default 1).",
119
"string", 1, "-string <num>"));
121
result.addElement(new Option(
122
"\tThe number of date attributes (default 1).",
123
"date", 1, "-date <num>"));
125
result.addElement(new Option(
126
"\tThe number of relational attributes (default 1).",
127
"relational", 1, "-relational <num>"));
129
result.addElement(new Option(
130
"\tThe number of instances in relational/bag attributes (default 10).",
131
"num-instances-relational", 1, "-num-instances-relational <num>"));
133
result.addElement(new Option(
134
"\tThe words to use in string attributes.",
135
"words", 1, "-words <comma-separated-list>"));
137
result.addElement(new Option(
138
"\tThe word separators to use in string attributes.",
139
"word-separators", 1, "-word-separators <chars>"));
141
return result.elements();
145
* Parses a given list of options.
147
* @param options the list of options as an array of strings
148
* @throws Exception if an option is not supported
150
public void setOptions(String[] options) throws Exception {
153
super.setOptions(options);
155
tmpStr = Utils.getOption('N', options);
156
if (tmpStr.length() != 0)
157
setNumInstances(Integer.parseInt(tmpStr));
161
tmpStr = Utils.getOption("nominal", options);
162
if (tmpStr.length() != 0)
163
setNumNominal(Integer.parseInt(tmpStr));
167
tmpStr = Utils.getOption("numeric", options);
168
if (tmpStr.length() != 0)
169
setNumNumeric(Integer.parseInt(tmpStr));
173
tmpStr = Utils.getOption("string", options);
174
if (tmpStr.length() != 0)
175
setNumString(Integer.parseInt(tmpStr));
179
tmpStr = Utils.getOption("date", options);
180
if (tmpStr.length() != 0)
181
setNumDate(Integer.parseInt(tmpStr));
185
tmpStr = Utils.getOption("relational", options);
186
if (tmpStr.length() != 0)
187
setNumRelational(Integer.parseInt(tmpStr));
191
tmpStr = Utils.getOption("num-instances-relational", options);
192
if (tmpStr.length() != 0)
193
setNumInstancesRelational(Integer.parseInt(tmpStr));
195
setNumInstancesRelational(10);
197
tmpStr = Utils.getOption("words", options);
198
if (tmpStr.length() != 0)
201
setWords(new TestInstances().getWords());
203
if (Utils.getOptionPos("word-separators", options) > -1) {
204
tmpStr = Utils.getOption("word-separators", options);
205
setWordSeparators(tmpStr);
208
setWordSeparators(TestInstances.DEFAULT_SEPARATORS);
213
* Gets the current settings of the CheckClassifier.
215
* @return an array of strings suitable for passing to setOptions
217
public String[] getOptions() {
222
result = new Vector();
224
options = super.getOptions();
225
for (i = 0; i < options.length; i++)
226
result.add(options[i]);
229
result.add("" + getNumInstances());
231
result.add("-nominal");
232
result.add("" + getNumNominal());
234
result.add("-numeric");
235
result.add("" + getNumNumeric());
237
result.add("-string");
238
result.add("" + getNumString());
241
result.add("" + getNumDate());
243
result.add("-relational");
244
result.add("" + getNumRelational());
246
result.add("-words");
247
result.add("" + getWords());
249
result.add("-word-separators");
250
result.add("" + getWordSeparators());
252
return (String[]) result.toArray(new String[result.size()]);
256
* sets the PostProcessor to use
258
* @param value the new PostProcessor
259
* @see #m_PostProcessor
261
public void setPostProcessor(PostProcessor value) {
262
m_PostProcessor = value;
266
* returns the current PostProcessor, can be null
268
* @return the current PostProcessor
270
public PostProcessor getPostProcessor() {
271
return m_PostProcessor;
275
* returns TRUE if the classifier returned a "not in classpath" Exception
277
* @return true if CLASSPATH problems occurred
279
public boolean hasClasspathProblems() {
280
return m_ClasspathProblems;
284
* Begin the tests, reporting results to System.out
286
public abstract void doTests();
289
* Sets the number of instances to use in the datasets (some classifiers
290
* might require more instances).
292
* @param value the number of instances to use
294
public void setNumInstances(int value) {
295
m_NumInstances = value;
299
* Gets the current number of instances to use for the datasets.
301
* @return the number of instances
303
public int getNumInstances() {
304
return m_NumInstances;
308
* sets the number of nominal attributes
310
* @param value the number of nominal attributes
312
public void setNumNominal(int value) {
313
m_NumNominal = value;
317
* returns the current number of nominal attributes
319
* @return the number of nominal attributes
321
public int getNumNominal() {
326
* sets the number of numeric attributes
328
* @param value the number of numeric attributes
330
public void setNumNumeric(int value) {
331
m_NumNumeric = value;
335
* returns the current number of numeric attributes
337
* @return the number of numeric attributes
339
public int getNumNumeric() {
344
* sets the number of string attributes
346
* @param value the number of string attributes
348
public void setNumString(int value) {
353
* returns the current number of string attributes
355
* @return the number of string attributes
357
public int getNumString() {
362
* sets the number of data attributes
364
* @param value the number of date attributes
366
public void setNumDate(int value) {
371
* returns the current number of date attributes
373
* @return the number of date attributes
375
public int getNumDate() {
380
* sets the number of relational attributes
382
* @param value the number of relational attributes
384
public void setNumRelational(int value) {
385
m_NumRelational = value;
389
* returns the current number of relational attributes
391
* @return the number of relational attributes
393
public int getNumRelational() {
394
return m_NumRelational;
398
* sets the number of instances in relational/bag attributes to produce
400
* @param value the number of instances
402
public void setNumInstancesRelational(int value) {
403
m_NumInstancesRelational = value;
407
* returns the current number of instances in relational/bag attributes to produce
409
* @return the number of instances
411
public int getNumInstancesRelational() {
412
return m_NumInstancesRelational;
416
* turns the comma-separated list into an array
418
* @param value the list to process
419
* @return the list as array
421
protected static String[] listToArray(String value) {
426
tok = new StringTokenizer(value, ",");
427
while (tok.hasMoreTokens())
428
list.add(tok.nextToken());
430
return (String[]) list.toArray(new String[list.size()]);
434
* turns the array into a comma-separated list
436
* @param value the array to process
437
* @return the array as list
439
protected static String arrayToList(String[] value) {
445
for (i = 0; i < value.length; i++) {
455
* returns a string representation of the attribute type
457
* @param type the attribute type to get a string rerpresentation for
458
* @return the string representation
460
public static String attributeTypeToString(int type) {
464
case Attribute.NUMERIC:
468
case Attribute.NOMINAL:
472
case Attribute.STRING:
480
case Attribute.RELATIONAL:
481
result = "relational";
492
* Sets the comma-separated list of words to use for generating strings. The
493
* list must contain at least 2 words, otherwise an exception will be thrown.
495
* @param value the list of words
496
* @throws IllegalArgumentException if not at least 2 words are provided
498
public void setWords(String value) {
499
if (listToArray(value).length < 2)
500
throw new IllegalArgumentException("At least 2 words must be provided!");
502
m_Words = listToArray(value);
506
* returns the words used for assembling strings in a comma-separated list.
508
* @return the words as comma-separated list
510
public String getWords() {
511
return arrayToList(m_Words);
515
* sets the word separators (chars) to use for assembling strings.
517
* @param value the characters to use as separators
519
public void setWordSeparators(String value) {
520
m_WordSeparators = value;
524
* returns the word separators (chars) to use for assembling strings.
526
* @return the current separators
528
public String getWordSeparators() {
529
return m_WordSeparators;
533
* Compare two datasets to see if they differ.
535
* @param data1 one set of instances
536
* @param data2 the other set of instances
537
* @throws Exception if the datasets differ
539
protected void compareDatasets(Instances data1, Instances data2)
542
if (!data2.equalHeaders(data1)) {
543
throw new Exception("header has been modified");
545
if (!(data2.numInstances() == data1.numInstances())) {
546
throw new Exception("number of instances has changed");
548
for (int i = 0; i < data2.numInstances(); i++) {
549
Instance orig = data1.instance(i);
550
Instance copy = data2.instance(i);
551
for (int j = 0; j < orig.numAttributes(); j++) {
552
if (orig.isMissing(j)) {
553
if (!copy.isMissing(j)) {
554
throw new Exception("instances have changed");
556
} else if (orig.value(j) != copy.value(j)) {
557
throw new Exception("instances have changed");
559
if (orig.weight() != copy.weight()) {
560
throw new Exception("instance weights have changed");
567
* Add missing values to a dataset.
569
* @param data the instances to add missing values to
570
* @param level the level of missing values to add (if positive, this
571
* is the probability that a value will be set to missing, if negative
572
* all but one value will be set to missing (not yet implemented))
573
* @param predictorMissing if true, predictor attributes will be modified
574
* @param classMissing if true, the class attribute will be modified
576
protected void addMissing(Instances data, int level,
577
boolean predictorMissing, boolean classMissing) {
579
int classIndex = data.classIndex();
580
Random random = new Random(1);
581
for (int i = 0; i < data.numInstances(); i++) {
582
Instance current = data.instance(i);
583
for (int j = 0; j < data.numAttributes(); j++) {
584
if (((j == classIndex) && classMissing) ||
585
((j != classIndex) && predictorMissing)) {
586
if (Math.abs(random.nextInt()) % 100 < level)
587
current.setMissing(j);
594
* Provides a hook for derived classes to further modify the data.
596
* @param data the data to process
597
* @return the processed data
598
* @see #m_PostProcessor
600
protected Instances process(Instances data) {
601
if (getPostProcessor() == null)
604
return getPostProcessor().process(data);