~ubuntu-branches/ubuntu/precise/weka/precise

* Jerome H. Friedman, Jon Luis Bentley, Raphael Ari Finkel (1977). An Algorithm for Finding Best Matches in Logarithmic Expected Time. ACM Transactions on Mathematics Software. 3(3):209-226.

*

* BibTeX:

* <pre>

* @article{Friedman1977,

* author = {Jerome H. Friedman and Jon Luis Bentley and Raphael Ari Finkel},

* journal = {ACM Transactions on Mathematics Software},

* month = {September},

* number = {3},

* pages = {209-226},

* title = {An Algorithm for Finding Best Matches in Logarithmic Expected Time},

* volume = {3},

* year = {1977}

* }

* </pre>

*

* @author Ashraf M. Kibriya (amk14[at-the-rate]cs[dot]waikato[dot]ac[dot]nz)

* @version $Revision: 1.1 $

public class MedianOfWidestDimension

extends KDTreeNodeSplitter

implements TechnicalInformationHandler {

/** for serialization. */

private static final long serialVersionUID = 1383443320160540663L;

/**

* Returns a string describing this nearest neighbour search algorithm.

* @return a description of the algorithm for displaying in the

* explorer/experimenter gui

public String globalInfo() {

return

"The class that splits a KDTree node based on the median value of "

+ "a dimension in which the node's points have the widest spread.\n\n"

+ "For more information see also:\n\n"

+ getTechnicalInformation().toString();

}

/**

* Returns an instance of a TechnicalInformation object, containing detailed

* information about the technical background of this class, e.g., paper

* reference or book this class is based on.

* @return the technical information about this class

public TechnicalInformation getTechnicalInformation() {

TechnicalInformation result;

result = new TechnicalInformation(Type.ARTICLE);

result.setValue(Field.AUTHOR, "Jerome H. Friedman and Jon Luis Bentley and Raphael Ari Finkel");

result.setValue(Field.YEAR, "1977");

result.setValue(Field.TITLE, "An Algorithm for Finding Best Matches in Logarithmic Expected Time");

result.setValue(Field.JOURNAL, "ACM Transactions on Mathematics Software");

result.setValue(Field.PAGES, "209-226");

result.setValue(Field.MONTH, "September");

100

result.setValue(Field.VOLUME, "3");

101

result.setValue(Field.NUMBER, "3");

102

103

return result;

104

}

105

106

/**

107

* Splits a node into two based on the median value of the dimension

108

* in which the points have the widest spread. After splitting two

109

* new nodes are created and correctly initialised. And, node.left

110

* and node.right are set appropriately.

111

112

* @param node The node to split.

113

* @param numNodesCreated The number of nodes that so far have been

114

* created for the tree, so that the newly created nodes are

115

* assigned correct/meaningful node numbers/ids.

116

* @param nodeRanges The attributes' range for the points inside

117

* the node that is to be split.

118

* @param universe The attributes' range for the whole

119

* point-space.

120

* @throws Exception If there is some problem in splitting the

121

* given node.

122

123

public void splitNode(KDTreeNode node, int numNodesCreated,

124

double[][] nodeRanges, double[][] universe) throws Exception {

125

126

correctlyInitialized();

127

128

int splitDim = widestDim(nodeRanges, universe);

129

130

//In this case median is defined to be either the middle value (in case of

131

//odd number of values) or the left of the two middle values (in case of

132

//even number of values).

133

int medianIdxIdx = node.m_Start + (node.m_End-node.m_Start)/2;

134

//the following finds the median and also re-arranges the array so all

135

//elements to the left are < median and those to the right are > median.

136

int medianIdx = select(splitDim, m_InstList, node.m_Start, node.m_End, (node.m_End-node.m_Start)/2+1);

137

138

139

node.m_SplitDim = splitDim;

140

node.m_SplitValue = m_Instances.instance(m_InstList[medianIdx]).value(splitDim);

141

142

node.m_Left = new KDTreeNode(numNodesCreated+1, node.m_Start, medianIdxIdx,

143

m_EuclideanDistance.initializeRanges(m_InstList, node.m_Start, medianIdxIdx));

144

node.m_Right = new KDTreeNode(numNodesCreated+2, medianIdxIdx+1, node.m_End,

145

m_EuclideanDistance.initializeRanges(m_InstList, medianIdxIdx+1, node.m_End));

146

}

147

148

/**

149

* Partitions the instances around a pivot. Used by quicksort and

150

* kthSmallestValue.

151

152

* @param attIdx The attribution/dimension based on which the

153

* instances should be partitioned.

154

* @param index The master index array containing indices of the

155

* instances.

156

* @param l The begining index of the portion of master index

157

* array that should be partitioned.

158

* @param r The end index of the portion of master index array

159

* that should be partitioned.

160

* @return the index of the middle element

161

162

protected int partition(int attIdx, int[] index, int l, int r) {

163

164

double pivot = m_Instances.instance(index[(l + r) / 2]).value(attIdx);

165

int help;

166

167

while (l < r) {

168

while ((m_Instances.instance(index[l]).value(attIdx) < pivot) && (l < r)) {

169

l++;

170

}

171

while ((m_Instances.instance(index[r]).value(attIdx) > pivot) && (l < r)) {

172

r--;

173

}

174

if (l < r) {

175

help = index[l];

176

index[l] = index[r];

177

index[r] = help;

178

l++;

179

r--;

180

}

181

}

182

if ((l == r) && (m_Instances.instance(index[r]).value(attIdx) > pivot)) {

183

r--;

184

}

185

186

return r;

187

}

188

189

/**

190

* Implements computation of the kth-smallest element according

191

* to Manber's "Introduction to Algorithms".

192

193

* @param attIdx The dimension/attribute of the instances in

194

* which to find the kth-smallest element.

195

* @param indices The master index array containing indices of

196

* the instances.

197

* @param left The begining index of the portion of the master

198

* index array in which to find the kth-smallest element.

199

* @param right The end index of the portion of the master index

200

* array in which to find the kth-smallest element.

201

* @param k The value of k

202

* @return The index of the kth-smallest element

203

204

public int select(int attIdx, int[] indices, int left, int right, int k) {

205

206

if (left == right) {

207

return left;

208

} else {

209

int middle = partition(attIdx, indices, left, right);

210

if ((middle - left + 1) >= k) {

211

return select(attIdx, indices, left, middle, k);

212

} else {

213

return select(attIdx, indices, middle + 1, right, k - (middle - left + 1));

214

}

215

}

216

}

217

}

Older »