2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
* Copyright (C) 1999-2007 University of Waikato
22
package weka.core.neighboursearch;
24
import weka.core.Instance;
25
import weka.core.Instances;
26
import weka.core.Option;
27
import weka.core.Utils;
29
import java.util.Enumeration;
30
import java.util.Vector;
33
<!-- globalinfo-start -->
34
* Class implementing the brute force search algorithm for nearest neighbour search.
36
<!-- globalinfo-end -->
38
<!-- options-start -->
39
* Valid options are: <p/>
42
* Skip identical instances (distances equal to zero).
47
* @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
48
* @version $Revision: 1.1 $
50
public class LinearNNSearch
51
extends NearestNeighbourSearch {
53
/** for serialization. */
54
private static final long serialVersionUID = 1915484723703917241L;
56
/** Array holding the distances of the nearest neighbours. It is filled up
57
* both by nearestNeighbour() and kNearestNeighbours().
59
protected double[] m_Distances;
61
/** Whether to skip instances from the neighbours that are identical to the query instance. */
62
protected boolean m_SkipIdentical = false;
65
* Constructor. Needs setInstances(Instances)
66
* to be called before the class is usable.
68
public LinearNNSearch() {
73
* Constructor that uses the supplied set of
76
* @param insts the instances to use
78
public LinearNNSearch(Instances insts) {
80
m_DistanceFunction.setInstances(insts);
84
* Returns a string describing this nearest neighbour search algorithm.
86
* @return a description of the algorithm for displaying in the
87
* explorer/experimenter gui
89
public String globalInfo() {
91
"Class implementing the brute force search algorithm for nearest "
92
+ "neighbour search.";
96
* Returns an enumeration describing the available options.
98
* @return an enumeration of all the available options.
100
public Enumeration listOptions() {
101
Vector result = new Vector();
103
result.add(new Option(
104
"\tSkip identical instances (distances equal to zero).\n",
107
return result.elements();
111
* Parses a given list of options. <p/>
113
<!-- options-start -->
114
* Valid options are: <p/>
117
* Skip identical instances (distances equal to zero).
122
* @param options the list of options as an array of strings
123
* @throws Exception if an option is not supported
125
public void setOptions(String[] options) throws Exception {
126
super.setOptions(options);
128
setSkipIdentical(Utils.getFlag('S', options));
132
* Gets the current settings.
134
* @return an array of strings suitable for passing to setOptions()
136
public String[] getOptions() {
137
Vector<String> result;
141
result = new Vector<String>();
143
options = super.getOptions();
144
for (i = 0; i < options.length; i++)
145
result.add(options[i]);
147
if (getSkipIdentical())
150
return result.toArray(new String[result.size()]);
154
* Returns the tip text for this property.
156
* @return tip text for this property suitable for
157
* displaying in the explorer/experimenter gui
159
public String skipIdenticalTipText() {
160
return "Whether to skip identical instances (with distance 0 to the target)";
164
* Sets the property to skip identical instances (with distance zero from
165
* the target) from the set of neighbours returned.
167
* @param skip if true, identical intances are skipped
169
public void setSkipIdentical(boolean skip) {
170
m_SkipIdentical = skip;
174
* Gets whether if identical instances are skipped from the neighbourhood.
176
* @return true if identical instances are skipped
178
public boolean getSkipIdentical() {
179
return m_SkipIdentical;
184
* Returns the nearest instance in the current neighbourhood to the supplied
187
* @param target The instance to find the nearest neighbour for.
188
* @return the nearest instance
189
* @throws Exception if the nearest neighbour could not be found.
191
public Instance nearestNeighbour(Instance target) throws Exception {
192
return (kNearestNeighbours(target, 1)).instance(0);
196
* Returns k nearest instances in the current neighbourhood to the supplied
199
* @param target The instance to find the k nearest neighbours for.
200
* @param kNN The number of nearest neighbours to find.
201
* @return the k nearest neighbors
202
* @throws Exception if the neighbours could not be found.
204
public Instances kNearestNeighbours(Instance target, int kNN) throws Exception {
210
m_Stats.searchStart();
212
MyHeap heap = new MyHeap(kNN);
213
double distance; int firstkNN=0;
214
for(int i=0; i<m_Instances.numInstances(); i++) {
215
if(target == m_Instances.instance(i)) //for hold-one-out cross-validation
218
m_Stats.incrPointCount();
221
System.out.println("K(a): "+(heap.size()+heap.noOfKthNearest()));
222
distance = m_DistanceFunction.distance(target, m_Instances.instance(i), Double.POSITIVE_INFINITY, m_Stats);
223
if(distance == 0.0 && m_SkipIdentical)
224
if(i<m_Instances.numInstances()-1)
227
heap.put(i, distance);
228
heap.put(i, distance);
232
MyHeapElement temp = heap.peek();
234
System.out.println("K(b): "+(heap.size()+heap.noOfKthNearest()));
235
distance = m_DistanceFunction.distance(target, m_Instances.instance(i), temp.distance, m_Stats);
236
if(distance == 0.0 && m_SkipIdentical)
238
if(distance < temp.distance) {
239
heap.putBySubstitute(i, distance);
241
else if(distance == temp.distance) {
242
heap.putKthNearest(i, distance);
248
Instances neighbours = new Instances(m_Instances, (heap.size()+heap.noOfKthNearest()));
249
m_Distances = new double[heap.size()+heap.noOfKthNearest()];
250
int [] indices = new int[heap.size()+heap.noOfKthNearest()];
251
int i=1; MyHeapElement h;
252
while(heap.noOfKthNearest()>0) {
253
h = heap.getKthNearest();
254
indices[indices.length-i] = h.index;
255
m_Distances[indices.length-i] = h.distance;
258
while(heap.size()>0) {
260
indices[indices.length-i] = h.index;
261
m_Distances[indices.length-i] = h.distance;
265
m_DistanceFunction.postProcessDistances(m_Distances);
267
for(int k=0; k<indices.length; k++) {
268
neighbours.add(m_Instances.instance(indices[k]));
272
m_Stats.searchFinish();
278
* Returns the distances of the k nearest neighbours. The kNearestNeighbours
279
* or nearestNeighbour must always be called before calling this function. If
280
* this function is called before calling either the kNearestNeighbours or
281
* the nearestNeighbour, then it throws an exception. If, however, if either
282
* of the nearestNeighbour functions are called at any point in the
283
* past then no exception is thrown and the distances of the training set from
284
* the last supplied target instance (to either one of the nearestNeighbour
285
* functions) is/are returned.
287
* @return array containing the distances of the
288
* nearestNeighbours. The length and ordering of the
289
* array is the same as that of the instances returned
290
* by nearestNeighbour functions.
291
* @throws Exception if called before calling kNearestNeighbours
292
* or nearestNeighbours.
294
public double[] getDistances() throws Exception {
295
if(m_Distances==null)
296
throw new Exception("No distances available. Please call either "+
297
"kNearestNeighbours or nearestNeighbours first.");
302
* Sets the instances comprising the current neighbourhood.
304
* @param insts The set of instances on which the nearest neighbour
305
* search is carried out. Usually this set is the
307
* @throws Exception if setting of instances fails
309
public void setInstances(Instances insts) throws Exception {
311
m_DistanceFunction.setInstances(insts);
315
* Updates the LinearNNSearch to cater for the new added instance. This
316
* implementation only updates the ranges of the DistanceFunction class,
317
* since our set of instances is passed by reference and should already have
318
* the newly added instance.
320
* @param ins The instance to add. Usually this is the instance that
321
* is added to our neighbourhood i.e. the training
323
* @throws Exception if the given instances are null
325
public void update(Instance ins) throws Exception {
326
if(m_Instances==null)
327
throw new Exception("No instances supplied yet. Cannot update without"+
328
"supplying a set of instances first.");
329
m_DistanceFunction.update(ins);
333
* Adds the given instance info. This implementation updates the range
334
* datastructures of the DistanceFunction class.
336
* @param ins The instance to add the information of. Usually this is
337
* the test instance supplied to update the range of
338
* attributes in the distance function.
340
public void addInstanceInfo(Instance ins) {
341
if(m_Instances!=null)
343
catch(Exception ex) { ex.printStackTrace(); }