2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
* ClassifierDecList.java
19
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
23
package weka.classifiers.rules.part;
25
import weka.classifiers.trees.j48.ClassifierSplitModel;
26
import weka.classifiers.trees.j48.Distribution;
27
import weka.classifiers.trees.j48.EntropySplitCrit;
28
import weka.classifiers.trees.j48.ModelSelection;
29
import weka.classifiers.trees.j48.NoSplit;
30
import weka.core.Instance;
31
import weka.core.Instances;
32
import weka.core.Utils;
34
import java.io.Serializable;
37
* Class for handling a rule (partial tree) for a decision list.
39
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
40
* @version $Revision: 1.12 $
42
public class ClassifierDecList
43
implements Serializable {
45
/** for serialization */
46
private static final long serialVersionUID = 7284358349711992497L;
48
/** Minimum number of objects */
49
protected int m_minNumObj;
51
/** To compute the entropy. */
52
protected static EntropySplitCrit m_splitCrit = new EntropySplitCrit();
54
/** The model selection method. */
55
protected ModelSelection m_toSelectModel;
57
/** Local model at node. */
58
protected ClassifierSplitModel m_localModel;
60
/** References to sons. */
61
protected ClassifierDecList [] m_sons;
63
/** True if node is leaf. */
64
protected boolean m_isLeaf;
66
/** True if node is empty. */
67
protected boolean m_isEmpty;
69
/** The training instances. */
70
protected Instances m_train;
72
/** The pruning instances. */
73
protected Distribution m_test;
75
/** Which son to expand? */
79
* Constructor - just calls constructor of class DecList.
81
public ClassifierDecList(ModelSelection toSelectLocModel, int minNum){
83
m_toSelectModel = toSelectLocModel;
88
* Method for building a pruned partial tree.
90
* @exception Exception if something goes wrong
92
public void buildRule(Instances data) throws Exception {
94
buildDecList(data, false);
96
cleanup(new Instances(data, 0));
100
* Builds the partial tree without hold out set.
102
* @exception Exception if something goes wrong
104
public void buildDecList(Instances data, boolean leaf) throws Exception {
106
Instances [] localInstances,localPruneInstances;
118
sumOfWeights = data.sumOfWeights();
119
noSplit = new NoSplit (new Distribution((Instances)data));
121
m_localModel = noSplit;
123
m_localModel = m_toSelectModel.selectModel(data);
124
if (m_localModel.numSubsets() > 1) {
125
localInstances = m_localModel.split(data);
127
m_sons = new ClassifierDecList [m_localModel.numSubsets()];
133
for (j = 0; j < m_sons.length; j++)
134
if (m_sons[j] == null)
135
m_sons[j] = getNewDecList(localInstances[j],true);
137
m_localModel = noSplit;
140
if (Utils.eq(sumOfWeights,0))
147
m_sons[ind] = getNewDecList(localInstances[ind],false);
148
} while ((i < m_sons.length) && (m_sons[ind].m_isLeaf));
151
indeX = chooseLastIndex();
154
if (Utils.eq(sumOfWeights, 0))
160
* Classifies an instance.
162
* @exception Exception if something goes wrong
164
public double classifyInstance(Instance instance)
172
for (j = 0; j < instance.numClasses();
174
currentProb = getProbs(j,instance,1);
175
if (Utils.gr(currentProb,maxProb)){
177
maxProb = currentProb;
180
if (Utils.eq(maxProb,0))
183
return (double)maxIndex;
187
* Returns class probabilities for a weighted instance.
189
* @exception Exception if something goes wrong
191
public final double [] distributionForInstance(Instance instance)
196
new double[instance.numClasses()];
198
for (int i = 0; i < doubles.length; i++)
199
doubles[i] = getProbs(i,instance,1);
205
* Returns the weight a rule assigns to an instance.
207
* @exception Exception if something goes wrong
209
public double weight(Instance instance) throws Exception {
215
subset = m_localModel.whichSubset(instance);
217
return (m_localModel.weights(instance))[indeX]*
218
m_sons[indeX].weight(instance);
220
return m_sons[indeX].weight(instance);
225
* Cleanup in order to save memory.
227
public final void cleanup(Instances justHeaderInfo) {
229
m_train = justHeaderInfo;
232
for (int i = 0; i < m_sons.length; i++)
233
if (m_sons[i] != null)
234
m_sons[i].cleanup(justHeaderInfo);
240
public String toString(){
245
text = new StringBuffer();
248
text.append(m_localModel.dumpLabel(0,m_train)+"\n");
253
return text.toString();
254
} catch (Exception e) {
255
return "Can't print rule.";
260
* Returns a newly created tree.
262
* @exception Exception if something goes wrong
264
protected ClassifierDecList getNewDecList(Instances train, boolean leaf)
267
ClassifierDecList newDecList = new ClassifierDecList(m_toSelectModel,
269
newDecList.buildDecList(train,leaf);
275
* Method for choosing a subset to expand.
277
public final int chooseIndex() {
280
double estimated, min = Double.MAX_VALUE;
283
for (i = 0; i < m_sons.length; i++)
284
if (son(i) == null) {
285
if (Utils.sm(localModel().distribution().perBag(i),
286
(double)m_minNumObj))
287
estimated = Double.MAX_VALUE;
290
for (j = 0; j < localModel().distribution().numClasses(); j++)
291
estimated -= m_splitCrit.logFunc(localModel().distribution().
292
perClassPerBag(i,j));
293
estimated += m_splitCrit.logFunc(localModel().distribution().
295
estimated /= localModel().distribution().perBag(i);
297
if (Utils.smOrEq(estimated,0))
299
if (Utils.sm(estimated,min)) {
309
* Choose last index (ie. choose rule).
311
public final int chooseLastIndex() {
314
double estimated, min = Double.MAX_VALUE;
317
for (int i = 0; i < m_sons.length; i++)
318
if (son(i) != null) {
319
if (Utils.grOrEq(localModel().distribution().perBag(i),
320
(double)m_minNumObj)) {
321
estimated = son(i).getSizeOfBranch();
322
if (Utils.sm(estimated,min)) {
333
* Returns the number of instances covered by a branch
335
protected double getSizeOfBranch() {
338
return -localModel().distribution().total();
340
return son(indeX).getSizeOfBranch();
344
* Help method for printing tree structure.
346
private void dumpDecList(StringBuffer text) throws Exception {
348
text.append(m_localModel.leftSide(m_train));
349
text.append(m_localModel.rightSide(indeX, m_train));
350
if (m_sons[indeX].m_isLeaf){
352
text.append(m_localModel.dumpLabel(indeX,m_train)+"\n");
354
text.append(" AND\n");
355
m_sons[indeX].dumpDecList(text);
360
* Dumps the partial tree (only used for debugging)
362
* @exception Exception Exception if something goes wrong
364
private void dumpTree(int depth,StringBuffer text)
369
for (i=0;i<m_sons.length;i++){
371
for (j=0;j<depth;j++)
373
text.append(m_localModel.leftSide(m_train));
374
text.append(m_localModel.rightSide(i, m_train));
375
if (m_sons[i] == null)
377
else if (m_sons[i].m_isLeaf){
379
text.append(m_localModel.dumpLabel(i,m_train));
381
m_sons[i].dumpTree(depth+1,text);
386
* Help method for computing class probabilities of
389
* @exception Exception Exception if something goes wrong
391
private double getProbs(int classIndex,Instance instance,
392
double weight) throws Exception {
398
return weight * localModel().classProb(classIndex, instance, -1);
400
treeIndex = localModel().whichSubset(instance);
401
if (treeIndex == -1) {
402
weights = localModel().weights(instance);
403
return son(indeX).getProbs(classIndex, instance,
404
weights[indeX] * weight);
406
if (treeIndex == indeX) {
407
return son(indeX).getProbs(classIndex, instance, weight);
416
* Method just exists to make program easier to read.
418
protected ClassifierSplitModel localModel(){
420
return (ClassifierSplitModel)m_localModel;
424
* Method just exists to make program easier to read.
426
protected ClassifierDecList son(int index){
428
return m_sons[index];