2
* This program is free software; you can redistribute it and/or modify
3
* it under the terms of the GNU General Public License as published by
4
* the Free Software Foundation; either version 2 of the License, or
5
* (at your option) any later version.
7
* This program is distributed in the hope that it will be useful,
8
* but WITHOUT ANY WARRANTY; without even the implied warranty of
9
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
* GNU General Public License for more details.
12
* You should have received a copy of the GNU General Public License
13
* along with this program; if not, write to the Free Software
14
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
* SlidingMidPointOfWidestSide.java
19
* Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
22
package weka.core.neighboursearch.kdtrees;
24
import weka.core.TechnicalInformation;
25
import weka.core.TechnicalInformationHandler;
26
import weka.core.TechnicalInformation.Field;
27
import weka.core.TechnicalInformation.Type;
30
<!-- globalinfo-start -->
31
* The class that splits a node into two based on the midpoint value of the dimension in which the node's rectangle is widest. If after splitting one side is empty then it is slided towards the non-empty side until there is at least one point on the empty side.<br/>
33
* For more information see also:<br/>
35
* David M. Mount (2006). ANN Programming Manual. College Park, MD, USA.
37
<!-- globalinfo-end -->
39
<!-- technical-bibtex-start -->
42
* @manual{Mount2006,
43
* address = {College Park, MD, USA},
44
* author = {David M. Mount},
45
* organization = {Department of Computer Science, University of Maryland},
46
* title = {ANN Programming Manual},
48
* HTTP = {Available from http://www.cs.umd.edu/\~mount/ANN/}
52
<!-- technical-bibtex-end -->
54
<!-- options-start -->
57
* @author Ashraf M. Kibriya (amk14@waikato.ac.nz)
58
* @version $Revision: 1.2 $
60
public class SlidingMidPointOfWidestSide
61
extends KDTreeNodeSplitter
62
implements TechnicalInformationHandler {
64
/** for serialization. */
65
private static final long serialVersionUID = 852857628205680562L;
67
/** The floating point error to tolerate in finding the widest
68
* rectangular side. */
69
protected static double ERR = 0.001;
72
* Returns a string describing this nearest neighbour search algorithm.
74
* @return a description of the algorithm for displaying in the
75
* explorer/experimenter gui
77
public String globalInfo() {
79
"The class that splits a node into two based on the midpoint value of "
80
+ "the dimension in which the node's rectangle is widest. If after "
81
+ "splitting one side is empty then it is slided towards the non-empty "
82
+ "side until there is at least one point on the empty side.\n\n"
83
+ "For more information see also:\n\n"
84
+ getTechnicalInformation().toString();
88
* Returns an instance of a TechnicalInformation object, containing detailed
89
* information about the technical background of this class, e.g., paper
90
* reference or book this class is based on.
92
* @return the technical information about this class
94
public TechnicalInformation getTechnicalInformation() {
95
TechnicalInformation result;
97
result = new TechnicalInformation(Type.MANUAL);
98
result.setValue(Field.AUTHOR, "David M. Mount");
99
result.setValue(Field.YEAR, "2006");
100
result.setValue(Field.TITLE, "ANN Programming Manual");
101
result.setValue(Field.ORGANIZATION, "Department of Computer Science, University of Maryland");
102
result.setValue(Field.ADDRESS,
103
"College Park, MD, USA");
104
result.setValue(Field.HTTP,
105
"Available from http://www.cs.umd.edu/~mount/ANN/");
111
* Splits a node into two based on the midpoint value of the dimension
112
* in which the node's rectangle is widest. If after splitting one side
113
* is empty then it is slided towards the non-empty side until there is
114
* at least one point on the empty side. The two nodes created after the
115
* whole splitting are correctly initialised. And, node.left and
116
* node.right are set appropriately.
117
* @param node The node to split.
118
* @param numNodesCreated The number of nodes that so far have been
119
* created for the tree, so that the newly created nodes are
120
* assigned correct/meaningful node numbers/ids.
121
* @param nodeRanges The attributes' range for the points inside
122
* the node that is to be split.
123
* @param universe The attributes' range for the whole
125
* @throws Exception If there is some problem in splitting the
128
public void splitNode(KDTreeNode node, int numNodesCreated,
129
double[][] nodeRanges, double[][] universe) throws Exception {
131
correctlyInitialized();
133
if (node.m_NodesRectBounds == null) {
134
node.m_NodesRectBounds = new double[2][node.m_NodeRanges.length];
135
for (int i = 0; i < node.m_NodeRanges.length; i++) {
136
node.m_NodesRectBounds[MIN][i] = node.m_NodeRanges[i][MIN];
137
node.m_NodesRectBounds[MAX][i] = node.m_NodeRanges[i][MAX];
141
// finding widest side of the hyper rectangle
142
double maxRectWidth = Double.NEGATIVE_INFINITY, maxPtWidth = Double.NEGATIVE_INFINITY, tempval;
143
int splitDim = -1, classIdx = m_Instances.classIndex();
145
for (int i = 0; i < node.m_NodesRectBounds[0].length; i++) {
148
tempval = node.m_NodesRectBounds[MAX][i] - node.m_NodesRectBounds[MIN][i];
149
if (m_NormalizeNodeWidth) {
150
tempval = tempval / universe[i][WIDTH];
152
if (tempval > maxRectWidth && node.m_NodeRanges[i][WIDTH] > 0.0)
153
maxRectWidth = tempval;
156
for (int i = 0; i < node.m_NodesRectBounds[0].length; i++) {
159
tempval = node.m_NodesRectBounds[MAX][i] - node.m_NodesRectBounds[MIN][i];
160
if (m_NormalizeNodeWidth) {
161
tempval = tempval / universe[i][WIDTH];
163
if (tempval >= maxRectWidth * (1 - ERR)
164
&& node.m_NodeRanges[i][WIDTH] > 0.0) {
165
if (node.m_NodeRanges[i][WIDTH] > maxPtWidth) {
166
maxPtWidth = node.m_NodeRanges[i][WIDTH];
167
if (m_NormalizeNodeWidth)
168
maxPtWidth = maxPtWidth / universe[i][WIDTH];
174
double splitVal = node.m_NodesRectBounds[MIN][splitDim]
175
+ (node.m_NodesRectBounds[MAX][splitDim] - node.m_NodesRectBounds[MIN][splitDim])
177
// might want to try to slide it further to contain more than one point on
179
// side that is resulting empty
180
if (splitVal < node.m_NodeRanges[splitDim][MIN])
181
splitVal = node.m_NodeRanges[splitDim][MIN];
182
else if (splitVal >= node.m_NodeRanges[splitDim][MAX])
183
splitVal = node.m_NodeRanges[splitDim][MAX]
184
- node.m_NodeRanges[splitDim][WIDTH] * 0.001;
186
int rightStart = rearrangePoints(m_InstList, node.m_Start, node.m_End,
189
if (rightStart == node.m_Start || rightStart > node.m_End) {
190
if (rightStart == node.m_Start)
191
throw new Exception("Left child is empty in node " + node.m_NodeNumber
192
+ ". Not possible with "
193
+ "SlidingMidPointofWidestSide splitting method. Please "
196
throw new Exception("Right child is empty in node " + node.m_NodeNumber
197
+ ". Not possible with "
198
+ "SlidingMidPointofWidestSide splitting method. Please "
202
node.m_SplitDim = splitDim;
203
node.m_SplitValue = splitVal;
205
double[][] widths = new double[2][node.m_NodesRectBounds[0].length];
207
System.arraycopy(node.m_NodesRectBounds[MIN], 0, widths[MIN], 0,
208
node.m_NodesRectBounds[MIN].length);
209
System.arraycopy(node.m_NodesRectBounds[MAX], 0, widths[MAX], 0,
210
node.m_NodesRectBounds[MAX].length);
211
widths[MAX][splitDim] = splitVal;
213
node.m_Left = new KDTreeNode(numNodesCreated + 1, node.m_Start,
214
rightStart - 1, m_EuclideanDistance.initializeRanges(m_InstList,
215
node.m_Start, rightStart - 1), widths);
217
widths = new double[2][node.m_NodesRectBounds[0].length];
218
System.arraycopy(node.m_NodesRectBounds[MIN], 0, widths[MIN], 0,
219
node.m_NodesRectBounds[MIN].length);
220
System.arraycopy(node.m_NodesRectBounds[MAX], 0, widths[MAX], 0,
221
node.m_NodesRectBounds[MAX].length);
222
widths[MIN][splitDim] = splitVal;
224
node.m_Right = new KDTreeNode(numNodesCreated + 2, rightStart, node.m_End,
225
m_EuclideanDistance.initializeRanges(m_InstList, rightStart, node.m_End), widths);
229
* Re-arranges the indices array such that the points <= to the splitVal
230
* are on the left of the array and those > the splitVal are on the right.
232
* @param indices The master index array.
233
* @param startidx The begining index of portion of indices that needs
235
* @param endidx The end index of portion of indices that needs
237
* @param splitDim The split dimension/attribute.
238
* @param splitVal The split value.
239
* @return The startIdx of the points > the splitVal (the points
240
* belonging to the right child of the node).
242
protected int rearrangePoints(int[] indices, final int startidx,
243
final int endidx, final int splitDim, final double splitVal) {
245
int tmp, left = startidx - 1;
246
for (int i = startidx; i <= endidx; i++) {
247
if (m_EuclideanDistance.valueIsSmallerEqual(m_Instances
248
.instance(indices[i]), splitDim, splitVal)) {
251
indices[left] = indices[i];
253
}// end valueIsSmallerEqual