2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 2004-2005 University of Waikato, Hamilton, New Zealand
23
package weka.classifiers.meta;
25
import weka.classifiers.Classifier;
26
import weka.classifiers.RandomizableIteratedSingleClassifierEnhancer;
27
import weka.core.Capabilities;
28
import weka.core.Instance;
29
import weka.core.Instances;
30
import weka.core.Randomizable;
31
import weka.core.TechnicalInformation;
32
import weka.core.TechnicalInformationHandler;
33
import weka.core.Utils;
34
import weka.core.TechnicalInformation.Field;
35
import weka.core.TechnicalInformation.Type;
37
import java.util.Hashtable;
38
import java.util.Random;
41
<!-- globalinfo-start -->
42
* A meta classifier for handling multi-class datasets with 2-class classifiers by building an ensemble of nested dichotomies.<br/>
44
* For more info, check<br/>
46
* Lin Dong, Eibe Frank, Stefan Kramer: Ensembles of Balanced Nested Dichotomies for Multi-class Problems. In: PKDD, 84-95, 2005.<br/>
48
* Eibe Frank, Stefan Kramer: Ensembles of nested dichotomies for multi-class problems. In: Twenty-first International Conference on Machine Learning, 2004.
50
<!-- globalinfo-end -->
52
<!-- technical-bibtex-start -->
55
* @inproceedings{Dong2005,
56
* author = {Lin Dong and Eibe Frank and Stefan Kramer},
59
* publisher = {Springer},
60
* title = {Ensembles of Balanced Nested Dichotomies for Multi-class Problems},
64
* @inproceedings{Frank2004,
65
* author = {Eibe Frank and Stefan Kramer},
66
* booktitle = {Twenty-first International Conference on Machine Learning},
68
* title = {Ensembles of nested dichotomies for multi-class problems},
73
<!-- technical-bibtex-end -->
75
<!-- options-start -->
76
* Valid options are: <p/>
78
* <pre> -S <num>
82
* <pre> -I <num>
83
* Number of iterations.
87
* If set, classifier is run in debug mode and
88
* may output additional info to the console</pre>
91
* Full name of base classifier.
92
* (default: weka.classifiers.meta.nestedDichotomies.ND)</pre>
95
* Options specific to classifier weka.classifiers.meta.nestedDichotomies.ND:
98
* <pre> -S <num>
103
* If set, classifier is run in debug mode and
104
* may output additional info to the console</pre>
107
* Full name of base classifier.
108
* (default: weka.classifiers.trees.J48)</pre>
111
* Options specific to classifier weka.classifiers.trees.J48:
115
* Use unpruned tree.</pre>
117
* <pre> -C <pruning confidence>
118
* Set confidence threshold for pruning.
119
* (default 0.25)</pre>
121
* <pre> -M <minimum number of instances>
122
* Set minimum number of instances per leaf.
126
* Use reduced error pruning.</pre>
128
* <pre> -N <number of folds>
129
* Set number of folds for reduced error
130
* pruning. One fold is used as pruning set.
134
* Use binary splits only.</pre>
137
* Don't perform subtree raising.</pre>
140
* Do not clean up after the tree has been built.</pre>
143
* Laplace smoothing for predicted probabilities.</pre>
145
* <pre> -Q <seed>
146
* Seed for random data shuffling (default 1).</pre>
150
* Options after -- are passed to the designated classifier.<p>
154
* @version $Revision: 1.7 $
157
extends RandomizableIteratedSingleClassifierEnhancer
158
implements TechnicalInformationHandler {
160
/** for serialization */
161
static final long serialVersionUID = -4143242362912214956L;
164
* The hashtable containing the classifiers for the END.
166
protected Hashtable m_hashtable = null;
173
m_Classifier = new weka.classifiers.meta.nestedDichotomies.ND();
177
* String describing default classifier.
179
* @return the default classifier classname
181
protected String defaultClassifierString() {
183
return "weka.classifiers.meta.nestedDichotomies.ND";
187
* Returns a string describing classifier
188
* @return a description suitable for
189
* displaying in the explorer/experimenter gui
191
public String globalInfo() {
193
return "A meta classifier for handling multi-class datasets with 2-class "
194
+ "classifiers by building an ensemble of nested dichotomies.\n\n"
195
+ "For more info, check\n\n"
196
+ getTechnicalInformation().toString();
200
* Returns an instance of a TechnicalInformation object, containing
201
* detailed information about the technical background of this class,
202
* e.g., paper reference or book this class is based on.
204
* @return the technical information about this class
206
public TechnicalInformation getTechnicalInformation() {
207
TechnicalInformation result;
208
TechnicalInformation additional;
210
result = new TechnicalInformation(Type.INPROCEEDINGS);
211
result.setValue(Field.AUTHOR, "Lin Dong and Eibe Frank and Stefan Kramer");
212
result.setValue(Field.TITLE, "Ensembles of Balanced Nested Dichotomies for Multi-class Problems");
213
result.setValue(Field.BOOKTITLE, "PKDD");
214
result.setValue(Field.YEAR, "2005");
215
result.setValue(Field.PAGES, "84-95");
216
result.setValue(Field.PUBLISHER, "Springer");
218
additional = result.add(Type.INPROCEEDINGS);
219
additional.setValue(Field.AUTHOR, "Eibe Frank and Stefan Kramer");
220
additional.setValue(Field.TITLE, "Ensembles of nested dichotomies for multi-class problems");
221
additional.setValue(Field.BOOKTITLE, "Twenty-first International Conference on Machine Learning");
222
additional.setValue(Field.YEAR, "2004");
223
additional.setValue(Field.PUBLISHER, "ACM");
229
* Returns default capabilities of the classifier.
231
* @return the capabilities of this classifier
233
public Capabilities getCapabilities() {
234
Capabilities result = super.getCapabilities();
237
result.setMinimumNumberInstances(1); // at least 1 for the RandomNumberGenerator!
243
* Builds the committee of randomizable classifiers.
245
* @param data the training data to be used for generating the
247
* @throws Exception if the classifier could not be built successfully
249
public void buildClassifier(Instances data) throws Exception {
251
// can classifier handle the data?
252
getCapabilities().testWithFail(data);
254
// remove instances with missing class
255
data = new Instances(data);
256
data.deleteWithMissingClass();
258
if (!(m_Classifier instanceof weka.classifiers.meta.nestedDichotomies.ND) &&
259
!(m_Classifier instanceof weka.classifiers.meta.nestedDichotomies.ClassBalancedND) &&
260
!(m_Classifier instanceof weka.classifiers.meta.nestedDichotomies.DataNearBalancedND)) {
261
throw new IllegalArgumentException("END only works with ND, ClassBalancedND " +
262
"or DataNearBalancedND classifier");
265
m_hashtable = new Hashtable();
267
m_Classifiers = Classifier.makeCopies(m_Classifier, m_NumIterations);
269
Random random = data.getRandomNumberGenerator(m_Seed);
270
for (int j = 0; j < m_Classifiers.length; j++) {
272
// Set the random number seed for the current classifier.
273
((Randomizable) m_Classifiers[j]).setSeed(random.nextInt());
276
if (m_Classifier instanceof weka.classifiers.meta.nestedDichotomies.ND)
277
((weka.classifiers.meta.nestedDichotomies.ND)m_Classifiers[j]).setHashtable(m_hashtable);
278
else if (m_Classifier instanceof weka.classifiers.meta.nestedDichotomies.ClassBalancedND)
279
((weka.classifiers.meta.nestedDichotomies.ClassBalancedND)m_Classifiers[j]).setHashtable(m_hashtable);
280
else if (m_Classifier instanceof weka.classifiers.meta.nestedDichotomies.DataNearBalancedND)
281
((weka.classifiers.meta.nestedDichotomies.DataNearBalancedND)m_Classifiers[j]).
282
setHashtable(m_hashtable);
284
// Build the classifier.
285
m_Classifiers[j].buildClassifier(data);
290
* Calculates the class membership probabilities for the given test
293
* @param instance the instance to be classified
294
* @return preedicted class probability distribution
295
* @throws Exception if distribution can't be computed successfully
297
public double[] distributionForInstance(Instance instance) throws Exception {
299
double [] sums = new double [instance.numClasses()], newProbs;
301
for (int i = 0; i < m_NumIterations; i++) {
302
if (instance.classAttribute().isNumeric() == true) {
303
sums[0] += m_Classifiers[i].classifyInstance(instance);
305
newProbs = m_Classifiers[i].distributionForInstance(instance);
306
for (int j = 0; j < newProbs.length; j++)
307
sums[j] += newProbs[j];
310
if (instance.classAttribute().isNumeric() == true) {
311
sums[0] /= (double)m_NumIterations;
313
} else if (Utils.eq(Utils.sum(sums), 0)) {
316
Utils.normalize(sums);
322
* Returns description of the committee.
324
* @return description of the committee as a string
326
public String toString() {
328
if (m_Classifiers == null) {
329
return "END: No model built yet.";
331
StringBuffer text = new StringBuffer();
332
text.append("All the base classifiers: \n\n");
333
for (int i = 0; i < m_Classifiers.length; i++)
334
text.append(m_Classifiers[i].toString() + "\n\n");
336
return text.toString();
340
* Main method for testing this class.
342
* @param argv the options
344
public static void main(String [] argv) {
345
runClassifier(new END(), argv);