2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 2002 University of Waikato, Hamilton, New Zealand
23
package weka.classifiers.misc;
25
import weka.classifiers.Classifier;
26
import weka.core.Attribute;
27
import weka.core.Capabilities;
28
import weka.core.Instance;
29
import weka.core.Instances;
30
import weka.core.UnsupportedAttributeTypeException;
31
import weka.core.Utils;
32
import weka.core.Capabilities.Capability;
34
import java.io.Serializable;
37
<!-- globalinfo-start -->
38
* Class implementing a HyperPipe classifier. For each category a HyperPipe is constructed that contains all points of that category (essentially records the attribute bounds observed for each category). Test instances are classified according to the category that "most contains the instance".<br/>
39
* Does not handle numeric class, or missing values in test cases. Extremely simple algorithm, but has the advantage of being extremely fast, and works quite well when you have "smegloads" of attributes.
41
<!-- globalinfo-end -->
43
<!-- options-start -->
44
* Valid options are: <p/>
47
* If set, classifier is run in debug mode and
48
* may output additional info to the console</pre>
52
* @author Lucio de Souza Coelho (lucio@intelligenesis.net)
53
* @author Len Trigg (len@reeltwo.com)
54
* @version $Revision: 1.20 $
56
public class HyperPipes
59
/** for serialization */
60
static final long serialVersionUID = -7527596632268975274L;
62
/** The index of the class attribute */
63
protected int m_ClassIndex;
65
/** The structure of the training data */
66
protected Instances m_Instances;
68
/** Stores the HyperPipe for each class */
69
protected HyperPipe [] m_HyperPipes;
71
/** a ZeroR model in case no model can be built from the data */
72
protected Classifier m_ZeroR;
75
* Returns a string describing classifier
76
* @return a description suitable for
77
* displaying in the explorer/experimenter gui
79
public String globalInfo() {
81
return "Class implementing a HyperPipe classifier. For each category a "
82
+ "HyperPipe is constructed that contains all points of that category "
83
+ "(essentially records the attribute bounds observed for each category). "
84
+ "Test instances are classified according to the category that \"most "
85
+ "contains the instance\".\n"
86
+ "Does not handle numeric class, or missing values in test cases. Extremely "
87
+ "simple algorithm, but has the advantage of being extremely fast, and "
88
+ "works quite well when you have \"smegloads\" of attributes.";
92
* Represents an n-dimensional structure that bounds all instances
93
* passed to it (generally all of a given class value).
96
implements Serializable {
98
/** for serialization */
99
static final long serialVersionUID = 3972254260367902025L;
101
/** Contains the numeric bounds of all instances in the HyperPipe */
102
protected double [][] m_NumericBounds;
104
/** Contains the nominal bounds of all instances in the HyperPipe */
105
protected boolean [][] m_NominalBounds;
108
* Creates the HyperPipe as the n-dimensional parallel-piped
109
* with minimum volume containing all the points in
112
* @param instances all instances belonging to the same class
113
* @throws Exception if missing values are found
115
public HyperPipe(Instances instances) throws Exception {
117
m_NumericBounds = new double [instances.numAttributes()][];
118
m_NominalBounds = new boolean [instances.numAttributes()][];
120
for (int i = 0; i < instances.numAttributes(); i++) {
121
switch (instances.attribute(i).type()) {
122
case Attribute.NUMERIC:
123
m_NumericBounds[i] = new double [2];
124
m_NumericBounds[i][0] = Double.POSITIVE_INFINITY;
125
m_NumericBounds[i][1] = Double.NEGATIVE_INFINITY;
127
case Attribute.NOMINAL:
128
m_NominalBounds[i] = new boolean [instances.attribute(i).numValues()];
131
throw new UnsupportedAttributeTypeException("Cannot process string attributes!");
135
for (int i = 0; i < instances.numInstances(); i++) {
136
addInstance(instances.instance(i));
142
* Updates the bounds arrays with a single instance. Missing values
143
* are ignored (i.e. they don't change the bounds for that attribute)
145
* @param instance the instance
146
* @throws Exception if any missing values are encountered
148
public void addInstance(Instance instance) throws Exception {
150
for (int j = 0; j < instance.numAttributes(); j++) {
151
if ((j != m_ClassIndex) && (!instance.isMissing(j))) {
153
double current = instance.value(j);
155
if (m_NumericBounds[j] != null) { // i.e. a numeric attribute
156
if (current < m_NumericBounds[j][0])
157
m_NumericBounds[j][0] = current;
158
if (current > m_NumericBounds[j][1])
159
m_NumericBounds[j][1] = current;
161
} else { // i.e. a nominal attribute
162
m_NominalBounds[j][(int) current] = true;
170
* Returns the fraction of the dimensions of a given instance with
171
* values lying within the corresponding bounds of the HyperPipe.
173
* @param instance the instance
174
* @return the fraction of dimensions
175
* @throws Exception if any missing values are encountered
177
public double partialContains(Instance instance) throws Exception {
180
for (int i = 0; i < instance.numAttributes(); i++) {
182
if (i == m_ClassIndex) {
185
if (instance.isMissing(i)) {
189
double current = instance.value(i);
191
if (m_NumericBounds[i] != null) { // i.e. a numeric attribute
192
if ((current >= m_NumericBounds[i][0])
193
&& (current <= m_NumericBounds[i][1])) {
196
} else { // i.e. a nominal attribute
197
if (m_NominalBounds[i][(int) current]) {
203
return ((double)count) / (instance.numAttributes() - 1);
209
* Returns default capabilities of the classifier.
211
* @return the capabilities of this classifier
213
public Capabilities getCapabilities() {
214
Capabilities result = super.getCapabilities();
217
result.enable(Capability.NOMINAL_ATTRIBUTES);
218
result.enable(Capability.NUMERIC_ATTRIBUTES);
219
result.enable(Capability.DATE_ATTRIBUTES);
220
result.enable(Capability.MISSING_VALUES);
223
result.enable(Capability.NOMINAL_CLASS);
224
result.enable(Capability.MISSING_CLASS_VALUES);
227
result.setMinimumNumberInstances(0);
233
* Generates the classifier.
235
* @param instances set of instances serving as training data
236
* @throws Exception if the classifier has not been generated successfully
238
public void buildClassifier(Instances instances) throws Exception {
240
// can classifier handle the data?
241
getCapabilities().testWithFail(instances);
243
// remove instances with missing class
244
instances = new Instances(instances);
245
instances.deleteWithMissingClass();
247
// only class? -> build ZeroR model
248
if (instances.numAttributes() == 1) {
250
"Cannot build model (only class attribute present in data!), "
251
+ "using ZeroR model instead!");
252
m_ZeroR = new weka.classifiers.rules.ZeroR();
253
m_ZeroR.buildClassifier(instances);
260
m_ClassIndex = instances.classIndex();
261
m_Instances = new Instances(instances, 0); // Copy the structure for ref
263
// Create the HyperPipe for each class
264
m_HyperPipes = new HyperPipe [instances.numClasses()];
265
for (int i = 0; i < m_HyperPipes.length; i++) {
266
m_HyperPipes[i] = new HyperPipe(new Instances(instances, 0));
270
for (int i = 0; i < instances.numInstances(); i++) {
271
updateClassifier(instances.instance(i));
277
* Updates the classifier.
279
* @param instance the instance to be put into the classifier
280
* @throws Exception if the instance could not be included successfully
282
public void updateClassifier(Instance instance) throws Exception {
284
if (instance.classIsMissing()) {
287
m_HyperPipes[(int) instance.classValue()].addInstance(instance);
292
* Classifies the given test instance.
294
* @param instance the instance to be classified
295
* @return the predicted class for the instance
296
* @throws Exception if the instance can't be classified
298
public double [] distributionForInstance(Instance instance) throws Exception {
301
if (m_ZeroR != null) {
302
return m_ZeroR.distributionForInstance(instance);
305
double [] dist = new double[m_HyperPipes.length];
307
for (int j = 0; j < m_HyperPipes.length; j++) {
308
dist[j] = m_HyperPipes[j].partialContains(instance);
311
double sum = Utils.sum(dist);
313
for (int j = 0; j < dist.length; j++) {
314
dist[j] = 1.0 / (double)dist.length;
318
Utils.normalize(dist, sum);
325
* Returns a description of this classifier.
327
* @return a description of this classifier as a string.
329
public String toString() {
332
if (m_ZeroR != null) {
333
StringBuffer buf = new StringBuffer();
334
buf.append(this.getClass().getName().replaceAll(".*\\.", "") + "\n");
335
buf.append(this.getClass().getName().replaceAll(".*\\.", "").replaceAll(".", "=") + "\n\n");
336
buf.append("Warning: No model could be built, hence ZeroR model is used:\n\n");
337
buf.append(m_ZeroR.toString());
338
return buf.toString();
341
if (m_HyperPipes == null) {
342
return ("HyperPipes classifier");
345
StringBuffer text = new StringBuffer("HyperPipes classifier\n");
347
/* Perhaps print out the bounds for each HyperPipe.
348
for (int i = 0; i < m_HyperPipes.length; i++) {
349
text.append("HyperPipe for class: "
350
+ m_Instances.attribute(m_ClassIndex).value(i) + "\n");
351
text.append(m_HyperPipes[i] + "\n\n");
355
return text.toString();
360
* Main method for testing this class.
362
* @param argv should contain command line arguments for evaluation
365
public static void main(String [] argv) {
366
runClassifier(new HyperPipes(), argv);