2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
* NBTreeModelSelection.java
19
* Copyright (C) 2004 University of Waikato, Hamilton, New Zealand
23
package weka.classifiers.trees.j48;
25
import weka.core.Attribute;
26
import weka.core.Instances;
27
import weka.core.Utils;
29
import java.util.Enumeration;
32
* Class for selecting a NB tree split.
34
* @author Mark Hall (mhall@cs.waikato.ac.nz)
35
* @version $Revision: 1.4 $
37
public class NBTreeModelSelection
38
extends ModelSelection {
40
/** for serialization */
41
private static final long serialVersionUID = 990097748931976704L;
43
/** Minimum number of objects in interval. */
44
private int m_minNoObj;
46
/** All the training data */
47
private Instances m_allData; //
50
* Initializes the split selection method with the given parameters.
52
* @param minNoObj minimum number of instances that have to occur in at least two
53
* subsets induced by split
54
* @param allData FULL training dataset (necessary for
55
* selection of split points).
57
public NBTreeModelSelection(int minNoObj, Instances allData) {
58
m_minNoObj = minNoObj;
63
* Sets reference to training data to null.
65
public void cleanup() {
71
* Selects NBTree-type split for the given dataset.
73
public final ClassifierSplitModel selectModel(Instances data){
75
double globalErrors = 0;
79
NBTreeSplit [] currentModel;
80
NBTreeSplit bestModel = null;
81
NBTreeNoSplit noSplitModel = null;
83
boolean multiVal = true;
84
Distribution checkDistribution;
90
// build the global model at this node
91
noSplitModel = new NBTreeNoSplit();
92
noSplitModel.buildClassifier(data);
93
if (data.numInstances() < 5) {
98
globalErrors = noSplitModel.getErrors();
99
if (globalErrors == 0) {
103
// Check if all Instances belong to one class or if not
104
// enough Instances to split.
105
checkDistribution = new Distribution(data);
106
if (Utils.sm(checkDistribution.total(), m_minNoObj) ||
107
Utils.eq(checkDistribution.total(),
108
checkDistribution.perClass(checkDistribution.maxClass()))) {
112
// Check if all attributes are nominal and have a
114
if (m_allData != null) {
115
Enumeration enu = data.enumerateAttributes();
116
while (enu.hasMoreElements()) {
117
attribute = (Attribute) enu.nextElement();
118
if ((attribute.isNumeric()) ||
119
(Utils.sm((double)attribute.numValues(),
120
(0.3*(double)m_allData.numInstances())))){
127
currentModel = new NBTreeSplit[data.numAttributes()];
128
sumOfWeights = data.sumOfWeights();
130
// For each attribute.
131
for (i = 0; i < data.numAttributes(); i++){
133
// Apart from class attribute.
134
if (i != (data).classIndex()){
136
// Get models for current attribute.
137
currentModel[i] = new NBTreeSplit(i,m_minNoObj,sumOfWeights);
138
currentModel[i].setGlobalModel(noSplitModel);
139
currentModel[i].buildClassifier(data);
141
// Check if useful split for current attribute
142
// exists and check for enumerated attributes with
144
if (currentModel[i].checkModel()){
148
currentModel[i] = null;
152
// Check if any useful split was found.
153
if (validModels == 0) {
157
// Find "best" attribute to split on.
158
minResult = globalErrors;
159
for (i=0;i<data.numAttributes();i++){
160
if ((i != (data).classIndex()) &&
161
(currentModel[i].checkModel())) {
162
/* System.err.println("Errors for "+data.attribute(i).name()+" "+
163
currentModel[i].getErrors()); */
164
if (currentModel[i].getErrors() < minResult) {
165
bestModel = currentModel[i];
166
minResult = currentModel[i].getErrors();
171
// Check if useful split was found.
174
if (((globalErrors - minResult) / globalErrors) < 0.05) {
178
/* if (bestModel == null) {
179
System.err.println("This shouldn't happen! glob : "+globalErrors+
180
" minRes : "+minResult);
183
// Set the global model for the best split
184
// bestModel.setGlobalModel(noSplitModel);
194
* Selects NBTree-type split for the given dataset.
196
public final ClassifierSplitModel selectModel(Instances train, Instances test) {
198
return selectModel(train);