2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 1999 University of Waikato, Hamilton, New Zealand
23
package weka.classifiers.lazy;
25
import weka.classifiers.Classifier;
26
import weka.classifiers.UpdateableClassifier;
27
import weka.core.Capabilities;
28
import weka.core.Instance;
29
import weka.core.Instances;
30
import weka.core.TechnicalInformation;
31
import weka.core.TechnicalInformationHandler;
32
import weka.core.Utils;
33
import weka.core.Capabilities.Capability;
34
import weka.core.TechnicalInformation.Field;
35
import weka.core.TechnicalInformation.Type;
37
import java.util.Enumeration;
40
<!-- globalinfo-start -->
41
* Nearest-neighbour classifier. Uses normalized Euclidean distance to find the training instance closest to the given test instance, and predicts the same class as this training instance. If multiple instances have the same (smallest) distance to the test instance, the first one found is used.<br/>
43
* For more information, see <br/>
45
* D. Aha, D. Kibler (1991). Instance-based learning algorithms. Machine Learning. 6:37-66.
47
<!-- globalinfo-end -->
49
<!-- technical-bibtex-start -->
52
* @article{Aha1991,
53
* author = {D. Aha and D. Kibler},
54
* journal = {Machine Learning},
56
* title = {Instance-based learning algorithms},
62
<!-- technical-bibtex-end -->
64
<!-- options-start -->
65
* Valid options are: <p/>
68
* If set, classifier is run in debug mode and
69
* may output additional info to the console</pre>
73
* @author Stuart Inglis (singlis@cs.waikato.ac.nz)
74
* @author Len Trigg (trigg@cs.waikato.ac.nz)
75
* @author Eibe Frank (eibe@cs.waikato.ac.nz)
76
* @version $Revision: 1.19 $
80
implements UpdateableClassifier, TechnicalInformationHandler {
82
/** for serialization */
83
static final long serialVersionUID = -6152184127304895851L;
85
/** The training instances used for classification. */
86
private Instances m_Train;
88
/** The minimum values for numeric attributes. */
89
private double [] m_MinArray;
91
/** The maximum values for numeric attributes. */
92
private double [] m_MaxArray;
95
* Returns a string describing classifier
96
* @return a description suitable for
97
* displaying in the explorer/experimenter gui
99
public String globalInfo() {
101
return "Nearest-neighbour classifier. Uses normalized Euclidean distance to "
102
+ "find the training instance closest to the given test instance, and predicts "
103
+ "the same class as this training instance. If multiple instances have "
104
+ "the same (smallest) distance to the test instance, the first one found is "
106
+ "For more information, see \n\n"
107
+ getTechnicalInformation().toString();
111
* Returns an instance of a TechnicalInformation object, containing
112
* detailed information about the technical background of this class,
113
* e.g., paper reference or book this class is based on.
115
* @return the technical information about this class
117
public TechnicalInformation getTechnicalInformation() {
118
TechnicalInformation result;
120
result = new TechnicalInformation(Type.ARTICLE);
121
result.setValue(Field.AUTHOR, "D. Aha and D. Kibler");
122
result.setValue(Field.YEAR, "1991");
123
result.setValue(Field.TITLE, "Instance-based learning algorithms");
124
result.setValue(Field.JOURNAL, "Machine Learning");
125
result.setValue(Field.VOLUME, "6");
126
result.setValue(Field.PAGES, "37-66");
132
* Returns default capabilities of the classifier.
134
* @return the capabilities of this classifier
136
public Capabilities getCapabilities() {
137
Capabilities result = super.getCapabilities();
140
result.enable(Capability.NOMINAL_ATTRIBUTES);
141
result.enable(Capability.NUMERIC_ATTRIBUTES);
142
result.enable(Capability.DATE_ATTRIBUTES);
143
result.enable(Capability.MISSING_VALUES);
146
result.enable(Capability.NOMINAL_CLASS);
147
result.enable(Capability.MISSING_CLASS_VALUES);
150
result.setMinimumNumberInstances(0);
156
* Generates the classifier.
158
* @param instances set of instances serving as training data
159
* @throws Exception if the classifier has not been generated successfully
161
public void buildClassifier(Instances instances) throws Exception {
163
// can classifier handle the data?
164
getCapabilities().testWithFail(instances);
166
// remove instances with missing class
167
instances = new Instances(instances);
168
instances.deleteWithMissingClass();
170
m_Train = new Instances(instances, 0, instances.numInstances());
172
m_MinArray = new double [m_Train.numAttributes()];
173
m_MaxArray = new double [m_Train.numAttributes()];
174
for (int i = 0; i < m_Train.numAttributes(); i++) {
175
m_MinArray[i] = m_MaxArray[i] = Double.NaN;
177
Enumeration enu = m_Train.enumerateInstances();
178
while (enu.hasMoreElements()) {
179
updateMinMax((Instance) enu.nextElement());
184
* Updates the classifier.
186
* @param instance the instance to be put into the classifier
187
* @throws Exception if the instance could not be included successfully
189
public void updateClassifier(Instance instance) throws Exception {
191
if (m_Train.equalHeaders(instance.dataset()) == false) {
192
throw new Exception("Incompatible instance types");
194
if (instance.classIsMissing()) {
197
m_Train.add(instance);
198
updateMinMax(instance);
202
* Classifies the given test instance.
204
* @param instance the instance to be classified
205
* @return the predicted class for the instance
206
* @throws Exception if the instance can't be classified
208
public double classifyInstance(Instance instance) throws Exception {
210
if (m_Train.numInstances() == 0) {
211
throw new Exception("No training instances!");
214
double distance, minDistance = Double.MAX_VALUE, classValue = 0;
215
updateMinMax(instance);
216
Enumeration enu = m_Train.enumerateInstances();
217
while (enu.hasMoreElements()) {
218
Instance trainInstance = (Instance) enu.nextElement();
219
if (!trainInstance.classIsMissing()) {
220
distance = distance(instance, trainInstance);
221
if (distance < minDistance) {
222
minDistance = distance;
223
classValue = trainInstance.classValue();
232
* Returns a description of this classifier.
234
* @return a description of this classifier as a string.
236
public String toString() {
238
return ("IB1 classifier");
242
* Calculates the distance between two instances
244
* @param first the first instance
245
* @param second the second instance
246
* @return the distance between the two given instances
248
private double distance(Instance first, Instance second) {
250
double diff, distance = 0;
252
for(int i = 0; i < m_Train.numAttributes(); i++) {
253
if (i == m_Train.classIndex()) {
256
if (m_Train.attribute(i).isNominal()) {
258
// If attribute is nominal
259
if (first.isMissing(i) || second.isMissing(i) ||
260
((int)first.value(i) != (int)second.value(i))) {
265
// If attribute is numeric
266
if (first.isMissing(i) || second.isMissing(i)){
267
if (first.isMissing(i) && second.isMissing(i)) {
270
if (second.isMissing(i)) {
271
diff = norm(first.value(i), i);
273
diff = norm(second.value(i), i);
280
diff = norm(first.value(i), i) - norm(second.value(i), i);
282
distance += diff * diff;
290
* Normalizes a given value of a numeric attribute.
292
* @param x the value to be normalized
293
* @param i the attribute's index
294
* @return the normalized value
296
private double norm(double x,int i) {
298
if (Double.isNaN(m_MinArray[i])
299
|| Utils.eq(m_MaxArray[i], m_MinArray[i])) {
302
return (x - m_MinArray[i]) / (m_MaxArray[i] - m_MinArray[i]);
307
* Updates the minimum and maximum values for all the attributes
308
* based on a new instance.
310
* @param instance the new instance
312
private void updateMinMax(Instance instance) {
314
for (int j = 0;j < m_Train.numAttributes(); j++) {
315
if ((m_Train.attribute(j).isNumeric()) && (!instance.isMissing(j))) {
316
if (Double.isNaN(m_MinArray[j])) {
317
m_MinArray[j] = instance.value(j);
318
m_MaxArray[j] = instance.value(j);
320
if (instance.value(j) < m_MinArray[j]) {
321
m_MinArray[j] = instance.value(j);
323
if (instance.value(j) > m_MaxArray[j]) {
324
m_MaxArray[j] = instance.value(j);
333
* Main method for testing this class.
335
* @param argv should contain command line arguments for evaluation
338
public static void main(String [] argv) {
339
runClassifier(new IB1(), argv);