2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
* PruneableClassifierTree.java
19
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
23
package weka.classifiers.trees.j48;
25
import weka.core.Capabilities;
26
import weka.core.Instances;
27
import weka.core.Utils;
28
import weka.core.Capabilities.Capability;
30
import java.util.Random;
33
* Class for handling a tree structure that can
34
* be pruned using a pruning set.
36
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
37
* @version $Revision: 1.11 $
39
public class PruneableClassifierTree
40
extends ClassifierTree {
42
/** for serialization */
43
static final long serialVersionUID = -555775736857600201L;
45
/** True if the tree is to be pruned. */
46
private boolean pruneTheTree = false;
48
/** How many subsets of equal size? One used for pruning, the rest for training. */
49
private int numSets = 3;
51
/** Cleanup after the tree has been built. */
52
private boolean m_cleanup = true;
54
/** The random number seed. */
55
private int m_seed = 1;
58
* Constructor for pruneable tree structure. Stores reference
59
* to associated training data at each node.
61
* @param toSelectLocModel selection method for local splitting model
62
* @param pruneTree true if the tree is to be pruned
63
* @param num number of subsets of equal size
65
* @param seed the seed value to use
66
* @throws Exception if something goes wrong
68
public PruneableClassifierTree(ModelSelection toSelectLocModel,
69
boolean pruneTree, int num, boolean cleanup,
73
super(toSelectLocModel);
75
pruneTheTree = pruneTree;
82
* Returns default capabilities of the classifier tree.
84
* @return the capabilities of this classifier tree
86
public Capabilities getCapabilities() {
87
Capabilities result = super.getCapabilities();
90
result.enable(Capability.NOMINAL_ATTRIBUTES);
91
result.enable(Capability.NUMERIC_ATTRIBUTES);
92
result.enable(Capability.DATE_ATTRIBUTES);
93
result.enable(Capability.MISSING_VALUES);
96
result.enable(Capability.NOMINAL_CLASS);
97
result.enable(Capability.MISSING_CLASS_VALUES);
100
result.setMinimumNumberInstances(0);
106
* Method for building a pruneable classifier tree.
108
* @param data the data to build the tree from
109
* @throws Exception if tree can't be built successfully
111
public void buildClassifier(Instances data)
114
// can classifier tree handle the data?
115
getCapabilities().testWithFail(data);
117
// remove instances with missing class
118
data = new Instances(data);
119
data.deleteWithMissingClass();
121
Random random = new Random(m_seed);
122
data.stratify(numSets);
123
buildTree(data.trainCV(numSets, numSets - 1, random),
124
data.testCV(numSets, numSets - 1), false);
129
cleanup(new Instances(data, 0));
136
* @throws Exception if tree can't be pruned successfully
138
public void prune() throws Exception {
142
// Prune all subtrees.
143
for (int i = 0; i < m_sons.length; i++)
146
// Decide if leaf is best choice.
147
if (Utils.smOrEq(errorsForLeaf(),errorsForTree())) {
153
// Get NoSplit Model for node.
154
m_localModel = new NoSplit(localModel().distribution());
160
* Returns a newly created tree.
162
* @param train the training data
163
* @param test the test data
164
* @return the generated tree
165
* @throws Exception if something goes wrong
167
protected ClassifierTree getNewTree(Instances train, Instances test)
170
PruneableClassifierTree newTree =
171
new PruneableClassifierTree(m_toSelectModel, pruneTheTree, numSets, m_cleanup,
173
newTree.buildTree(train, test, false);
178
* Computes estimated errors for tree.
180
* @return the estimated errors
181
* @throws Exception if error estimate can't be computed
183
private double errorsForTree() throws Exception {
188
return errorsForLeaf();
190
for (int i = 0; i < m_sons.length; i++)
191
if (Utils.eq(localModel().distribution().perBag(i), 0)) {
192
errors += m_test.perBag(i)-
193
m_test.perClassPerBag(i,localModel().distribution().
196
errors += son(i).errorsForTree();
203
* Computes estimated errors for leaf.
205
* @return the estimated errors
206
* @throws Exception if error estimate can't be computed
208
private double errorsForLeaf() throws Exception {
210
return m_test.total()-
211
m_test.perClass(localModel().distribution().maxClass());
215
* Method just exists to make program easier to read.
217
private ClassifierSplitModel localModel() {
219
return (ClassifierSplitModel)m_localModel;
223
* Method just exists to make program easier to read.
225
private PruneableClassifierTree son(int index) {
227
return (PruneableClassifierTree)m_sons[index];