2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 2007 Geoff Webb & Janice Boughton
20
* (adapted from code written by Eibe Frank).
23
package weka.classifiers.trees;
25
import weka.classifiers.Classifier;
26
import weka.classifiers.Sourcable;
27
import weka.classifiers.trees.j48.BinC45ModelSelection;
28
import weka.classifiers.trees.j48.C45ModelSelection;
29
import weka.classifiers.trees.j48.C45PruneableClassifierTreeG;
30
import weka.classifiers.trees.j48.ClassifierTree;
31
import weka.classifiers.trees.j48.ModelSelection;
32
import weka.core.AdditionalMeasureProducer;
33
import weka.core.Capabilities;
34
import weka.core.Drawable;
35
import weka.core.Instance;
36
import weka.core.Instances;
37
import weka.core.Matchable;
38
import weka.core.Option;
39
import weka.core.OptionHandler;
40
import weka.core.Summarizable;
41
import weka.core.TechnicalInformation;
42
import weka.core.TechnicalInformationHandler;
43
import weka.core.Utils;
44
import weka.core.WeightedInstancesHandler;
45
import weka.core.TechnicalInformation.Field;
46
import weka.core.TechnicalInformation.Type;
48
import java.util.Enumeration;
49
import java.util.Vector;
52
<!-- globalinfo-start -->
53
* Class for generating a grafted (pruned or unpruned) C4.5 decision tree. For more information, see:<br/>
55
* Geoff Webb (1999). Decision Tree Grafting From the All-Tests-But-One Partition. Morgan Kaufmann, San Francisco, CA.
58
* Webb, G. I. (1996). Further Experimental Evidence Against The Utility Of Occams Razor. Journal of Artificial Intelligence Research 4. Menlo Park, CA: AAAI Press, pages 397-417.
60
<!-- globalinfo-end -->
62
<!-- technical-bibtex-start -->
65
* @INPROCEEDINGS{Webb99,
67
* title = {Decision Tree Grafting From The All Tests But One Partition},
68
* booktitle = {Proceedings of the Sixteenth International Joint Conference on Artificial Intelligence (IJCAI 99)},
69
* publisher = {Morgan Kaufmann},
71
* address = {San Francisco},
72
* author = {G. I. Webb},
73
* location = {Stockholm, Sweden},
76
* @:article{Webb96b,
78
* title = {Further Experimental Evidence Against The Utility Of Occams Razor},
79
* journal = {Journal of Artificial Intelligence Research},
82
* publisher = {AAAI Press},
83
* address = {Menlo Park, CA},
84
* author = {G. I. Webb}
88
<!-- technical-bibtex-end -->
90
<!-- options-start -->
91
* Valid options are: <p/>
94
* Use unpruned tree.</pre>
96
* <pre> -C <pruning confidence>
97
* Set confidence threshold for pruning.
98
* (default 0.25)</pre>
100
* <pre> -M <minimum number of instances>
101
* Set minimum number of instances per leaf.
105
* Use binary splits only.</pre>
108
* Don't perform subtree raising.</pre>
111
* Do not clean up after the tree has been built.</pre>
114
* Laplace smoothing for predicted probabilities.
115
* (note: this option only affects initial tree; grafting process always uses laplace).</pre>
118
* Option to allow relabeling during grafting.</pre>
122
* @author Janice Boughton (jrbought@csse.monash.edu.au)
123
* (based on J48.java written by Eibe Frank)
124
* @version $Revision: 1.1 $
126
public class J48graft extends Classifier implements OptionHandler,
127
Drawable, Matchable, Sourcable, WeightedInstancesHandler, Summarizable,
128
AdditionalMeasureProducer, TechnicalInformationHandler {
130
/** for serialization */
131
static final long serialVersionUID = 8823716098042427799L;
133
/** The decision tree */
134
private ClassifierTree m_root;
136
/** Unpruned tree? */
137
private boolean m_unpruned = false;
139
/** Confidence level */
140
private float m_CF = 0.25f;
142
/** Minimum number of instances */
143
private int m_minNumObj = 2;
145
/** Determines whether probabilities are smoothed using
146
Laplace correction when predictions are generated */
147
private boolean m_useLaplace = false;
149
/** Number of folds for reduced error pruning. */
150
private int m_numFolds = 3;
152
/** Binary splits on nominal attributes? */
153
private boolean m_binarySplits = false;
155
/** Subtree raising to be performed? */
156
private boolean m_subtreeRaising = true;
158
/** Cleanup after the tree has been built. */
159
private boolean m_noCleanup = false;
161
/** relabel instances when grafting */
162
private boolean m_relabel = false;
165
* Returns a string describing classifier
166
* @return a description suitable for
167
* displaying in the explorer/experimenter gui
169
public String globalInfo() {
170
return "Class for generating a grafted (pruned or unpruned) C4.5 "
171
+ "decision tree. For more information, see\n\n"
172
+ getTechnicalInformation().toString();
176
* Returns an instance of a TechnicalInformation object, containing
177
* detailed information about the technical background of this class,
178
* e.g., paper reference or book this class is based on.
180
* @return the technical information about this class
182
public TechnicalInformation getTechnicalInformation() {
183
TechnicalInformation result;
185
result = new TechnicalInformation(Type.INPROCEEDINGS);
186
result.setValue(Field.AUTHOR, "Geoff Webb");
187
result.setValue(Field.YEAR, "1999");
188
result.setValue(Field.TITLE, "Decision Tree Grafting From the All-Tests-But-One Partition");
189
result.setValue(Field.PUBLISHER, "Morgan Kaufmann");
190
result.setValue(Field.ADDRESS, "San Francisco, CA");
196
* Returns default capabilities of the classifier.
198
* @return the capabilities of this classifier
200
public Capabilities getCapabilities() {
204
result = new C45PruneableClassifierTreeG(null, !m_unpruned, m_CF, m_subtreeRaising, m_relabel, !m_noCleanup).getCapabilities();
206
catch (Exception e) {
207
result = new Capabilities(this);
210
result.setOwner(this);
216
* Generates the classifier.
218
* @param instances the data to train the classifier with
219
* @throws Exception if classifier can't be built successfully
221
public void buildClassifier(Instances instances)
224
ModelSelection modSelection;
227
modSelection = new BinC45ModelSelection(m_minNumObj, instances);
229
modSelection = new C45ModelSelection(m_minNumObj, instances);
230
m_root = new C45PruneableClassifierTreeG(modSelection,
231
!m_unpruned, m_CF, m_subtreeRaising,
232
m_relabel, !m_noCleanup);
233
m_root.buildClassifier(instances);
235
if (m_binarySplits) {
236
((BinC45ModelSelection)modSelection).cleanup();
238
((C45ModelSelection)modSelection).cleanup();
243
* Classifies an instance.
245
* @param instance the instance to classify
246
* @return the classification for the instance
247
* @throws Exception if instance can't be classified successfully
249
public double classifyInstance(Instance instance) throws Exception {
251
return m_root.classifyInstance(instance);
255
* Returns class probabilities for an instance.
257
* @param instance the instance to calculate the class probabilities for
258
* @return the class probabilities
259
* @throws Exception if distribution can't be computed successfully
261
public final double [] distributionForInstance(Instance instance)
264
return m_root.distributionForInstance(instance, m_useLaplace);
268
* Returns the type of graph this classifier
270
* @return Drawable.TREE
272
public int graphType() {
273
return Drawable.TREE;
277
* Returns graph describing the tree.
279
* @return the graph describing the tree
280
* @throws Exception if graph can't be computed
282
public String graph() throws Exception {
284
return m_root.graph();
288
* Returns tree in prefix order.
290
* @return the tree in prefix order
291
* @throws Exception if something goes wrong
293
public String prefix() throws Exception {
295
return m_root.prefix();
300
* Returns tree as an if-then statement.
302
* @param className the name of the Java class
303
* @return the tree as a Java if-then type statement
304
* @throws Exception if something goes wrong
306
public String toSource(String className) throws Exception {
308
StringBuffer [] source = m_root.toSource(className);
310
"class " + className + " {\n\n"
311
+" public static double classify(Object [] i)\n"
312
+" throws Exception {\n\n"
313
+" double p = Double.NaN;\n"
314
+ source[0] // Assignment code
317
+ source[1] // Support code
322
* Returns an enumeration describing the available options.
324
* Valid options are: <p>
327
* Use unpruned tree.<p>
330
* Set confidence threshold for pruning. (Default: 0.25) <p>
333
* Set minimum number of instances per leaf. (Default: 2) <p>
336
* Use binary splits for nominal attributes. <p>
339
* Don't perform subtree raising. <p>
342
* Do not clean up after the tree has been built.
345
* If set, Laplace smoothing is used for predicted probabilites.
346
* (note: this option only affects initial tree; grafting process always uses laplace). <p>
349
* Allow relabelling when grafting. <p>
351
* @return an enumeration of all the available options.
353
public Enumeration listOptions() {
355
Vector newVector = new Vector(9);
358
addElement(new Option("\tUse unpruned tree.",
361
addElement(new Option("\tSet confidence threshold for pruning.\n" +
363
"C", 1, "-C <pruning confidence>"));
365
addElement(new Option("\tSet minimum number of instances per leaf.\n" +
367
"M", 1, "-M <minimum number of instances>"));
369
addElement(new Option("\tUse binary splits only.",
372
addElement(new Option("\tDon't perform subtree raising.",
375
addElement(new Option("\tDo not clean up after the tree has been built.",
378
addElement(new Option("\tLaplace smoothing for predicted probabilities. (note: this option only affects initial tree; grafting process always uses laplace).",
382
addElement(new Option("\tRelabel when grafting.",
384
return newVector.elements();
388
* Parses a given list of options.
390
<!-- options-start -->
391
* Valid options are: <p/>
394
* Use unpruned tree.</pre>
396
* <pre> -C <pruning confidence>
397
* Set confidence threshold for pruning.
398
* (default 0.25)</pre>
400
* <pre> -M <minimum number of instances>
401
* Set minimum number of instances per leaf.
405
* Use binary splits only.</pre>
408
* Don't perform subtree raising.</pre>
411
* Do not clean up after the tree has been built.</pre>
414
* Laplace smoothing for predicted probabilities.
415
* (note: this option only affects initial tree; grafting process always uses laplace). </pre>
418
* Allow relabelling when performing grafting.</pre>
422
* @param options the list of options as an array of strings
423
* @throws Exception if an option is not supported
425
public void setOptions(String[] options) throws Exception {
428
String minNumString = Utils.getOption('M', options);
429
if (minNumString.length() != 0) {
430
m_minNumObj = Integer.parseInt(minNumString);
434
m_binarySplits = Utils.getFlag('B', options);
435
m_useLaplace = Utils.getFlag('A', options);
438
m_unpruned = Utils.getFlag('U', options);
439
m_subtreeRaising = !Utils.getFlag('S', options);
440
m_noCleanup = Utils.getFlag('L', options);
441
if ((m_unpruned) && (!m_subtreeRaising)) {
442
throw new Exception("Subtree raising doesn't need to be unset for unpruned tree!");
444
m_relabel = Utils.getFlag('E', options);
445
String confidenceString = Utils.getOption('C', options);
446
if (confidenceString.length() != 0) {
448
throw new Exception("Doesn't make sense to change confidence for unpruned "
451
m_CF = (new Float(confidenceString)).floatValue();
452
if ((m_CF <= 0) || (m_CF >= 1)) {
453
throw new Exception("Confidence has to be greater than zero and smaller " +
463
* Gets the current settings of the Classifier.
465
* @return an array of strings suitable for passing to setOptions
467
public String [] getOptions() {
469
String [] options = new String [10];
473
options[current++] = "-L";
476
options[current++] = "-U";
478
if (!m_subtreeRaising) {
479
options[current++] = "-S";
481
options[current++] = "-C"; options[current++] = "" + m_CF;
483
if (m_binarySplits) {
484
options[current++] = "-B";
486
options[current++] = "-M"; options[current++] = "" + m_minNumObj;
488
options[current++] = "-A";
492
options[current++] = "-E";
495
while (current < options.length) {
496
options[current++] = "";
502
* Returns the tip text for this property
503
* @return tip text for this property suitable for
504
* displaying in the explorer/experimenter gui
506
public String useLaplaceTipText() {
507
return "Whether counts at leaves are smoothed based on Laplace.";
511
* Get the value of useLaplace.
513
* @return Value of useLaplace.
515
public boolean getUseLaplace() {
521
* Set the value of useLaplace.
523
* @param newuseLaplace Value to assign to useLaplace.
525
public void setUseLaplace(boolean newuseLaplace) {
527
m_useLaplace = newuseLaplace;
531
* Returns a description of the classifier.
533
* @return a description of the classifier
535
public String toString() {
537
if (m_root == null) {
538
return "No classifier built";
541
return "J48graft unpruned tree\n------------------\n" + m_root.toString();
543
return "J48graft pruned tree\n------------------\n" + m_root.toString();
547
* Returns a superconcise version of the model
549
* @return a summary of the model
551
public String toSummaryString() {
553
return "Number of leaves: " + m_root.numLeaves() + "\n"
554
+ "Size of the tree: " + m_root.numNodes() + "\n";
558
* Returns the size of the tree
559
* @return the size of the tree
561
public double measureTreeSize() {
562
return m_root.numNodes();
566
* Returns the number of leaves
567
* @return the number of leaves
569
public double measureNumLeaves() {
570
return m_root.numLeaves();
574
* Returns the number of rules (same as number of leaves)
575
* @return the number of rules
577
public double measureNumRules() {
578
return m_root.numLeaves();
582
* Returns an enumeration of the additional measure names
583
* @return an enumeration of the measure names
585
public Enumeration enumerateMeasures() {
586
Vector newVector = new Vector(3);
587
newVector.addElement("measureTreeSize");
588
newVector.addElement("measureNumLeaves");
589
newVector.addElement("measureNumRules");
590
return newVector.elements();
594
* Returns the value of the named measure
595
* @param additionalMeasureName the name of the measure to query for its value
596
* @return the value of the named measure
597
* @throws IllegalArgumentException if the named measure is not supported
599
public double getMeasure(String additionalMeasureName) {
600
if (additionalMeasureName.compareTo("measureNumRules") == 0) {
601
return measureNumRules();
602
} else if (additionalMeasureName.compareTo("measureTreeSize") == 0) {
603
return measureTreeSize();
604
} else if (additionalMeasureName.compareTo("measureNumLeaves") == 0) {
605
return measureNumLeaves();
607
throw new IllegalArgumentException(additionalMeasureName
608
+ " not supported (j48)");
613
* Returns the tip text for this property
614
* @return tip text for this property suitable for
615
* displaying in the explorer/experimenter gui
617
public String unprunedTipText() {
618
return "Whether pruning is performed.";
622
* Get the value of unpruned.
624
* @return Value of unpruned.
626
public boolean getUnpruned() {
632
* Set the value of unpruned.
633
* @param v Value to assign to unpruned.
635
public void setUnpruned(boolean v) {
641
* Returns the tip text for this property
642
* @return tip text for this property suitable for
643
* displaying in the explorer/experimenter gui
645
public String relabelTipText() {
646
return "Whether relabelling is allowed during grafting.";
650
* Get the value of relabelling
652
* @return Value of relabelling.
654
public boolean getRelabel() {
660
* Set the value of relabelling.
662
* @param v Value to assign to relabelling flag.
664
public void setRelabel(boolean v) {
669
* Returns the tip text for this property
670
* @return tip text for this property suitable for
671
* displaying in the explorer/experimenter gui
673
public String confidenceFactorTipText() {
674
return "The confidence factor used for pruning (smaller values incur "
679
* Get the value of CF.
681
* @return Value of CF.
683
public float getConfidenceFactor() {
689
* Set the value of CF.
691
* @param v Value to assign to CF.
693
public void setConfidenceFactor(float v) {
699
* Returns the tip text for this property
700
* @return tip text for this property suitable for
701
* displaying in the explorer/experimenter gui
703
public String minNumObjTipText() {
704
return "The minimum number of instances per leaf.";
708
* Get the value of minNumObj.
710
* @return Value of minNumObj.
712
public int getMinNumObj() {
718
* Set the value of minNumObj.
720
* @param v Value to assign to minNumObj.
722
public void setMinNumObj(int v) {
728
* Returns the tip text for this property
729
* @return tip text for this property suitable for
730
* displaying in the explorer/experimenter gui
732
public String binarySplitsTipText() {
733
return "Whether to use binary splits on nominal attributes when "
734
+ "building the trees.";
738
* Get the value of binarySplits.
740
* @return Value of binarySplits.
742
public boolean getBinarySplits() {
744
return m_binarySplits;
748
* Set the value of binarySplits.
750
* @param v Value to assign to binarySplits.
752
public void setBinarySplits(boolean v) {
758
* Returns the tip text for this property
759
* @return tip text for this property suitable for
760
* displaying in the explorer/experimenter gui
762
public String subtreeRaisingTipText() {
763
return "Whether to consider the subtree raising operation when pruning.";
767
* Get the value of subtreeRaising.
769
* @return Value of subtreeRaising.
771
public boolean getSubtreeRaising() {
773
return m_subtreeRaising;
777
* Set the value of subtreeRaising.
779
* @param v Value to assign to subtreeRaising.
781
public void setSubtreeRaising(boolean v) {
783
m_subtreeRaising = v;
787
* Returns the tip text for this property
788
* @return tip text for this property suitable for
789
* displaying in the explorer/experimenter gui
791
public String saveInstanceDataTipText() {
792
return "Whether to save the training data for visualization.";
796
* Check whether instance data is to be saved.
798
* @return true if instance data is saved
800
public boolean getSaveInstanceData() {
806
* Set whether instance data is to be saved.
807
* @param v true if instance data is to be saved
809
public void setSaveInstanceData(boolean v) {
815
* Main method for testing this class
817
* @param argv the commandline options
819
public static void main(String [] argv){
820
runClassifier(new J48graft(), argv);