1
/***********************************************************************
2
* Software License Agreement (BSD License)
4
* Copyright 2008-2009 Marius Muja (mariusm@cs.ubc.ca). All rights reserved.
5
* Copyright 2008-2009 David G. Lowe (lowe@cs.ubc.ca). All rights reserved.
9
* Redistribution and use in source and binary forms, with or without
10
* modification, are permitted provided that the following conditions
13
* 1. Redistributions of source code must retain the above copyright
14
* notice, this list of conditions and the following disclaimer.
15
* 2. Redistributions in binary form must reproduce the above copyright
16
* notice, this list of conditions and the following disclaimer in the
17
* documentation and/or other materials provided with the distribution.
19
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
*************************************************************************/
31
/***********************************************************************
32
* Author: Vincent Rabaud
33
*************************************************************************/
35
#ifndef OPENCV_FLANN_LSH_INDEX_H_
36
#define OPENCV_FLANN_LSH_INDEX_H_
47
#include "result_set.h"
49
#include "lsh_table.h"
50
#include "allocator.h"
57
struct LshIndexParams : public IndexParams
59
LshIndexParams(unsigned int table_number = 12, unsigned int key_size = 20, unsigned int multi_probe_level = 2)
61
(* this)["algorithm"] = FLANN_INDEX_LSH;
62
// The number of hash tables to use
63
(*this)["table_number"] = table_number;
64
// The length of the key in the hash tables
65
(*this)["key_size"] = key_size;
66
// Number of levels to use in multi-probe (0 for standard LSH)
67
(*this)["multi_probe_level"] = multi_probe_level;
72
* Randomized kd-tree index
74
* Contains the k-d trees and other information for indexing a set of points
75
* for nearest-neighbor matching.
77
template<typename Distance>
78
class LshIndex : public NNIndex<Distance>
81
typedef typename Distance::ElementType ElementType;
82
typedef typename Distance::ResultType DistanceType;
85
* @param input_data dataset with the input features
86
* @param params parameters passed to the LSH algorithm
87
* @param d the distance used
89
LshIndex(const Matrix<ElementType>& input_data, const IndexParams& params = LshIndexParams(),
90
Distance d = Distance()) :
91
dataset_(input_data), index_params_(params), distance_(d)
93
// cv::flann::IndexParams sets integer params as 'int', so it is used with get_param
94
// in place of 'unsigned int'
95
table_number_ = (unsigned int)get_param<int>(index_params_,"table_number",12);
96
key_size_ = (unsigned int)get_param<int>(index_params_,"key_size",20);
97
multi_probe_level_ = (unsigned int)get_param<int>(index_params_,"multi_probe_level",2);
99
feature_size_ = (unsigned)dataset_.cols;
100
fill_xor_mask(0, key_size_, multi_probe_level_, xor_masks_);
104
LshIndex(const LshIndex&);
105
LshIndex& operator=(const LshIndex&);
112
tables_.resize(table_number_);
113
for (unsigned int i = 0; i < table_number_; ++i) {
114
lsh::LshTable<ElementType>& table = tables_[i];
115
table = lsh::LshTable<ElementType>(feature_size_, key_size_);
117
// Add the features to the table
122
flann_algorithm_t getType() const
124
return FLANN_INDEX_LSH;
128
void saveIndex(FILE* stream)
130
save_value(stream,table_number_);
131
save_value(stream,key_size_);
132
save_value(stream,multi_probe_level_);
133
save_value(stream, dataset_);
136
void loadIndex(FILE* stream)
138
load_value(stream, table_number_);
139
load_value(stream, key_size_);
140
load_value(stream, multi_probe_level_);
141
load_value(stream, dataset_);
142
// Building the index is so fast we can afford not storing it
145
index_params_["algorithm"] = getType();
146
index_params_["table_number"] = table_number_;
147
index_params_["key_size"] = key_size_;
148
index_params_["multi_probe_level"] = multi_probe_level_;
152
* Returns size of index.
156
return dataset_.rows;
160
* Returns the length of an index feature.
162
size_t veclen() const
164
return feature_size_;
168
* Computes the index memory usage
169
* Returns: memory used by the index
171
int usedMemory() const
173
return (int)(dataset_.rows * sizeof(int));
177
IndexParams getParameters() const
179
return index_params_;
183
* \brief Perform k-nearest neighbor search
184
* \param[in] queries The query points for which to find the nearest neighbors
185
* \param[out] indices The indices of the nearest neighbors found
186
* \param[out] dists Distances to the nearest neighbors found
187
* \param[in] knn Number of nearest neighbors to return
188
* \param[in] params Search parameters
190
virtual void knnSearch(const Matrix<ElementType>& queries, Matrix<int>& indices, Matrix<DistanceType>& dists, int knn, const SearchParams& params)
192
assert(queries.cols == veclen());
193
assert(indices.rows >= queries.rows);
194
assert(dists.rows >= queries.rows);
195
assert(int(indices.cols) >= knn);
196
assert(int(dists.cols) >= knn);
199
KNNUniqueResultSet<DistanceType> resultSet(knn);
200
for (size_t i = 0; i < queries.rows; i++) {
202
std::fill_n(indices[i], knn, -1);
203
std::fill_n(dists[i], knn, std::numeric_limits<DistanceType>::max());
204
findNeighbors(resultSet, queries[i], params);
205
if (get_param(params,"sorted",true)) resultSet.sortAndCopy(indices[i], dists[i], knn);
206
else resultSet.copy(indices[i], dists[i], knn);
212
* Find set of nearest neighbors to vec. Their indices are stored inside
216
* result = the result object in which the indices of the nearest-neighbors are stored
217
* vec = the vector for which to search the nearest neighbors
218
* maxCheck = the maximum number of restarts (in a best-bin-first manner)
220
void findNeighbors(ResultSet<DistanceType>& result, const ElementType* vec, const SearchParams& /*searchParams*/)
222
getNeighbors(vec, result);
226
/** Defines the comparator on score and index
228
typedef std::pair<float, unsigned int> ScoreIndexPair;
229
struct SortScoreIndexPairOnSecond
231
bool operator()(const ScoreIndexPair& left, const ScoreIndexPair& right) const
233
return left.second < right.second;
237
/** Fills the different xor masks to use when getting the neighbors in multi-probe LSH
238
* @param key the key we build neighbors from
239
* @param lowest_index the lowest index of the bit set
240
* @param level the multi-probe level we are at
241
* @param xor_masks all the xor mask
243
void fill_xor_mask(lsh::BucketKey key, int lowest_index, unsigned int level,
244
std::vector<lsh::BucketKey>& xor_masks)
246
xor_masks.push_back(key);
247
if (level == 0) return;
248
for (int index = lowest_index - 1; index >= 0; --index) {
250
lsh::BucketKey new_key = key | (1 << index);
251
fill_xor_mask(new_key, index, level - 1, xor_masks);
255
/** Performs the approximate nearest-neighbor search.
256
* @param vec the feature to analyze
257
* @param do_radius flag indicating if we check the radius too
258
* @param radius the radius if it is a radius search
259
* @param do_k flag indicating if we limit the number of nn
260
* @param k_nn the number of nearest neighbors
261
* @param checked_average used for debugging
263
void getNeighbors(const ElementType* vec, bool /*do_radius*/, float radius, bool do_k, unsigned int k_nn,
264
float& /*checked_average*/)
266
static std::vector<ScoreIndexPair> score_index_heap;
269
unsigned int worst_score = std::numeric_limits<unsigned int>::max();
270
typename std::vector<lsh::LshTable<ElementType> >::const_iterator table = tables_.begin();
271
typename std::vector<lsh::LshTable<ElementType> >::const_iterator table_end = tables_.end();
272
for (; table != table_end; ++table) {
273
size_t key = table->getKey(vec);
274
std::vector<lsh::BucketKey>::const_iterator xor_mask = xor_masks_.begin();
275
std::vector<lsh::BucketKey>::const_iterator xor_mask_end = xor_masks_.end();
276
for (; xor_mask != xor_mask_end; ++xor_mask) {
277
size_t sub_key = key ^ (*xor_mask);
278
const lsh::Bucket* bucket = table->getBucketFromKey(sub_key);
279
if (bucket == 0) continue;
281
// Go over each descriptor index
282
std::vector<lsh::FeatureIndex>::const_iterator training_index = bucket->begin();
283
std::vector<lsh::FeatureIndex>::const_iterator last_training_index = bucket->end();
284
DistanceType hamming_distance;
286
// Process the rest of the candidates
287
for (; training_index < last_training_index; ++training_index) {
288
hamming_distance = distance_(vec, dataset_[*training_index], dataset_.cols);
290
if (hamming_distance < worst_score) {
291
// Insert the new element
292
score_index_heap.push_back(ScoreIndexPair(hamming_distance, training_index));
293
std::push_heap(score_index_heap.begin(), score_index_heap.end());
295
if (score_index_heap.size() > (unsigned int)k_nn) {
296
// Remove the highest distance value as we have too many elements
297
std::pop_heap(score_index_heap.begin(), score_index_heap.end());
298
score_index_heap.pop_back();
299
// Keep track of the worst score
300
worst_score = score_index_heap.front().first;
308
typename std::vector<lsh::LshTable<ElementType> >::const_iterator table = tables_.begin();
309
typename std::vector<lsh::LshTable<ElementType> >::const_iterator table_end = tables_.end();
310
for (; table != table_end; ++table) {
311
size_t key = table->getKey(vec);
312
std::vector<lsh::BucketKey>::const_iterator xor_mask = xor_masks_.begin();
313
std::vector<lsh::BucketKey>::const_iterator xor_mask_end = xor_masks_.end();
314
for (; xor_mask != xor_mask_end; ++xor_mask) {
315
size_t sub_key = key ^ (*xor_mask);
316
const lsh::Bucket* bucket = table->getBucketFromKey(sub_key);
317
if (bucket == 0) continue;
319
// Go over each descriptor index
320
std::vector<lsh::FeatureIndex>::const_iterator training_index = bucket->begin();
321
std::vector<lsh::FeatureIndex>::const_iterator last_training_index = bucket->end();
322
DistanceType hamming_distance;
324
// Process the rest of the candidates
325
for (; training_index < last_training_index; ++training_index) {
326
// Compute the Hamming distance
327
hamming_distance = distance_(vec, dataset_[*training_index], dataset_.cols);
328
if (hamming_distance < radius) score_index_heap.push_back(ScoreIndexPair(hamming_distance, training_index));
335
/** Performs the approximate nearest-neighbor search.
336
* This is a slower version than the above as it uses the ResultSet
337
* @param vec the feature to analyze
339
void getNeighbors(const ElementType* vec, ResultSet<DistanceType>& result)
341
typename std::vector<lsh::LshTable<ElementType> >::const_iterator table = tables_.begin();
342
typename std::vector<lsh::LshTable<ElementType> >::const_iterator table_end = tables_.end();
343
for (; table != table_end; ++table) {
344
size_t key = table->getKey(vec);
345
std::vector<lsh::BucketKey>::const_iterator xor_mask = xor_masks_.begin();
346
std::vector<lsh::BucketKey>::const_iterator xor_mask_end = xor_masks_.end();
347
for (; xor_mask != xor_mask_end; ++xor_mask) {
348
size_t sub_key = key ^ (*xor_mask);
349
const lsh::Bucket* bucket = table->getBucketFromKey((lsh::BucketKey)sub_key);
350
if (bucket == 0) continue;
352
// Go over each descriptor index
353
std::vector<lsh::FeatureIndex>::const_iterator training_index = bucket->begin();
354
std::vector<lsh::FeatureIndex>::const_iterator last_training_index = bucket->end();
355
DistanceType hamming_distance;
357
// Process the rest of the candidates
358
for (; training_index < last_training_index; ++training_index) {
359
// Compute the Hamming distance
360
hamming_distance = distance_(vec, dataset_[*training_index], (int)dataset_.cols);
361
result.addPoint(hamming_distance, *training_index);
367
/** The different hash tables */
368
std::vector<lsh::LshTable<ElementType> > tables_;
370
/** The data the LSH tables where built from */
371
Matrix<ElementType> dataset_;
373
/** The size of the features (as ElementType[]) */
374
unsigned int feature_size_;
376
IndexParams index_params_;
379
unsigned int table_number_;
381
unsigned int key_size_;
382
/** How far should we look for neighbors in multi-probe LSH */
383
unsigned int multi_probe_level_;
385
/** The XOR masks to apply to a key to get the neighboring buckets */
386
std::vector<lsh::BucketKey> xor_masks_;
392
#endif //OPENCV_FLANN_LSH_INDEX_H_