2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
* MedianDistanceFromArbitraryPoint.java
19
* Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
22
package weka.core.neighboursearch.balltrees;
24
import weka.core.EuclideanDistance;
25
import weka.core.Instance;
26
import weka.core.Instances;
27
import weka.core.Option;
28
import weka.core.TechnicalInformation;
29
import weka.core.TechnicalInformationHandler;
30
import weka.core.Utils;
31
import weka.core.TechnicalInformation.Field;
32
import weka.core.TechnicalInformation.Type;
34
import java.util.Enumeration;
35
import java.util.Random;
36
import java.util.Vector;
39
<!-- globalinfo-start -->
40
* Class that splits a BallNode of a ball tree using Uhlmann's described method.<br/>
42
* For information see:<br/>
44
* Jeffrey K. Uhlmann (1991). Satisfying general proximity/similarity queries with metric trees. Information Processing Letters. 40(4):175-179.<br/>
46
* Ashraf Masood Kibriya (2007). Fast Algorithms for Nearest Neighbour Search. Hamilton, New Zealand.
48
<!-- globalinfo-end -->
50
<!-- technical-bibtex-start -->
53
* @article{Uhlmann1991,
54
* author = {Jeffrey K. Uhlmann},
55
* journal = {Information Processing Letters},
59
* title = {Satisfying general proximity/similarity queries with metric trees},
64
* @mastersthesis{Kibriya2007,
65
* address = {Hamilton, New Zealand},
66
* author = {Ashraf Masood Kibriya},
67
* school = {Department of Computer Science, School of Computing and Mathematical Sciences, University of Waikato},
68
* title = {Fast Algorithms for Nearest Neighbour Search},
73
<!-- technical-bibtex-end -->
75
<!-- options-start -->
76
* Valid options are: <p/>
78
* <pre> -S <num>
79
* The seed value for the random number generator.
84
* @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)
85
* @version $Revision: 1.1 $
87
public class MedianDistanceFromArbitraryPoint
89
implements TechnicalInformationHandler {
91
/** for serialization. */
92
private static final long serialVersionUID = 5617378551363700558L;
94
/** Seed for random number generator. */
95
protected int m_RandSeed = 17;
98
* Random number generator for selecting
99
* an abitrary (random) point.
101
protected Random m_Rand;
104
public MedianDistanceFromArbitraryPoint() {
109
* @param instList The master index array.
110
* @param insts The instances on which the tree
111
* is (or is to be) built.
112
* @param e The Euclidean distance function to
115
public MedianDistanceFromArbitraryPoint(int[] instList, Instances insts, EuclideanDistance e) {
116
super(instList, insts, e);
120
* Returns a string describing this nearest neighbour search algorithm.
122
* @return a description of the algorithm for displaying in the
123
* explorer/experimenter gui
125
public String globalInfo() {
127
"Class that splits a BallNode of a ball tree using Uhlmann's "
128
+ "described method.\n\n"
129
+ "For information see:\n\n"
130
+ getTechnicalInformation().toString();
134
* Returns an instance of a TechnicalInformation object, containing detailed
135
* information about the technical background of this class, e.g., paper
136
* reference or book this class is based on.
138
* @return the technical information about this class
140
public TechnicalInformation getTechnicalInformation() {
141
TechnicalInformation result;
142
TechnicalInformation additional;
144
result = new TechnicalInformation(Type.ARTICLE);
145
result.setValue(Field.AUTHOR, "Jeffrey K. Uhlmann");
146
result.setValue(Field.TITLE, "Satisfying general proximity/similarity queries with metric trees");
147
result.setValue(Field.JOURNAL, "Information Processing Letters");
148
result.setValue(Field.MONTH, "November");
149
result.setValue(Field.YEAR, "1991");
150
result.setValue(Field.NUMBER, "4");
151
result.setValue(Field.VOLUME, "40");
152
result.setValue(Field.PAGES, "175-179");
154
additional = result.add(Type.MASTERSTHESIS);
155
additional.setValue(Field.AUTHOR, "Ashraf Masood Kibriya");
156
additional.setValue(Field.TITLE, "Fast Algorithms for Nearest Neighbour Search");
157
additional.setValue(Field.YEAR, "2007");
158
additional.setValue(Field.SCHOOL, "Department of Computer Science, School of Computing and Mathematical Sciences, University of Waikato");
159
additional.setValue(Field.ADDRESS, "Hamilton, New Zealand");
165
* Returns an enumeration describing the available options.
167
* @return an enumeration of all the available options.
169
public Enumeration listOptions() {
170
Vector result = new Vector();
172
Enumeration enm = super.listOptions();
173
while (enm.hasMoreElements())
174
result.addElement(enm.nextElement());
176
result.addElement(new Option(
177
"\tThe seed value for the random number generator.\n"
179
"S", 1, "-S <num>"));
181
return result.elements();
185
* Parses a given list of options.
187
<!-- options-start -->
188
* Valid options are: <p/>
190
* <pre> -S <num>
191
* The seed value for the random number generator.
192
* (default: 17)</pre>
196
* @param options the list of options as an array of strings
197
* @throws Exception if an option is not supported
199
public void setOptions(String[] options) throws Exception {
202
super.setOptions(options);
204
tmpStr = Utils.getOption('S', options);
205
if (tmpStr.length() > 0)
206
setRandomSeed(Integer.parseInt(tmpStr));
212
* Gets the current settings of the object.
214
* @return an array of strings suitable for passing to setOptions
216
public String[] getOptions() {
217
Vector<String> result;
221
result = new Vector<String>();
223
options = super.getOptions();
224
for (i = 0; i < options.length; i++)
225
result.add(options[i]);
228
result.add("" + getRandomSeed());
230
return result.toArray(new String[result.size()]);
234
* Sets the seed for random number generator.
235
* @param seed The seed value to set.
237
public void setRandomSeed(int seed) {
242
* Returns the seed value of random
244
* @return The random seed currently in use.
246
public int getRandomSeed() {
251
* Returns the tip text for this property.
253
* @return tip text for this property suitable for
254
* displaying in the explorer/experimenter gui.
256
public String randomSeedTipText() {
257
return "The seed value for the random number generator.";
261
* Splits a ball into two.
262
* @param node The node to split.
263
* @param numNodesCreated The number of nodes that so far have been
264
* created for the tree, so that the newly created nodes are
265
* assigned correct/meaningful node numbers/ids.
266
* @throws Exception If there is some problem in splitting the
269
public void splitNode(BallNode node, int numNodesCreated) throws Exception {
270
correctlyInitialized();
272
m_Rand = new Random(m_RandSeed);
274
int ridx = node.m_Start+m_Rand.nextInt(node.m_NumInstances);
275
Instance randomInst = (Instance)
276
m_Instances.instance( m_Instlist[ridx] ).copy();
277
double [] distList = new double[node.m_NumInstances-1];
279
for(int i=node.m_Start, j=0; i<node.m_End; i++, j++) {
280
temp = m_Instances.instance( m_Instlist[i] );
281
distList[j] = m_DistanceFunction.distance(randomInst, temp,
282
Double.POSITIVE_INFINITY);
285
int medianIdx = select(distList, m_Instlist, 0, distList.length-1,
286
node.m_Start, (node.m_End-node.m_Start)/2+1) +
290
node.m_Left = new BallNode(node.m_Start, medianIdx, numNodesCreated+1,
291
(pivot=BallNode.calcCentroidPivot(node.m_Start,
292
medianIdx, m_Instlist,
294
BallNode.calcRadius(node.m_Start, medianIdx,
295
m_Instlist, m_Instances,
296
pivot, m_DistanceFunction)
299
node.m_Right = new BallNode(medianIdx+1, node.m_End, numNodesCreated+2,
300
(pivot=BallNode.calcCentroidPivot(medianIdx+1,
301
node.m_End, m_Instlist,
303
BallNode.calcRadius(medianIdx+1, node.m_End,
304
m_Instlist, m_Instances,
305
pivot, m_DistanceFunction)
310
* Partitions the instances around a pivot. Used by quicksort and
313
* @param array The array of distances of the points to the
314
* arbitrarily selected point.
315
* @param index The master index array containing indices of the
317
* @param l The relative begining index of the portion of master
318
* index array that should be partitioned.
319
* @param r The relative end index of the portion of master index
320
* array that should be partitioned.
321
* @param indexStart The absolute begining index of the portion
322
* of master index array that should be partitioned.
323
* @return the index of the middle element (in the master
324
* index array, i.e. index of the index of middle element).
326
protected int partition(double[] array, int[] index, int l, int r,
327
final int indexStart) {
329
double pivot = array[(l + r) / 2];
333
while ((array[l] < pivot) && (l < r)) {
336
while ((array[r] > pivot) && (l < r)) {
340
help = index[indexStart+l];
341
index[indexStart+l] = index[indexStart+r];
342
index[indexStart+r] = help;
347
if ((l == r) && (array[r] > pivot)) {
355
* Implements computation of the kth-smallest element according
356
* to Manber's "Introduction to Algorithms".
358
* @param array Array containing the distances of points from
359
* the arbitrarily selected.
360
* @param indices The master index array containing indices of
362
* @param left The relative begining index of the portion of the
363
* master index array in which to find the kth-smallest element.
364
* @param right The relative end index of the portion of the
365
* master index array in which to find the kth-smallest element.
366
* @param indexStart The absolute begining index of the portion
367
* of the master index array in which to find the kth-smallest
369
* @param k The value of k
370
* @return The index of the kth-smallest element
372
protected int select(double[] array, int[] indices,
373
int left, int right, final int indexStart, int k) {
378
int middle = partition(array, indices, left, right, indexStart);
379
if ((middle - left + 1) >= k) {
380
return select(array, indices, left, middle, indexStart, k);
382
return select(array, indices, middle + 1, right,
383
indexStart, k - (middle - left + 1));