1
/*M///////////////////////////////////////////////////////////////////////////////////////
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
11
// For Open Source Computer Vision Library
13
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15
// Third party copyrights are property of their respective owners.
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
20
// * Redistribution's of source code must retain the above copyright notice,
21
// this list of conditions and the following disclaimer.
23
// * Redistribution's in binary form must reproduce the above copyright notice,
24
// this list of conditions and the following disclaimer in the documentation
25
// and/or other materials provided with the distribution.
27
// * The name of the copyright holders may not be used to endorse or promote products
28
// derived from this software without specific prior written permission.
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
43
#include "precomp.hpp"
46
using namespace cv::cuda;
48
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
50
Ptr<cv::cuda::DescriptorMatcher> cv::cuda::DescriptorMatcher::createBFMatcher(int) { throw_no_cuda(); return Ptr<cv::cuda::DescriptorMatcher>(); }
52
#else /* !defined (HAVE_CUDA) */
54
namespace cv { namespace cuda { namespace device
58
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
59
const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
61
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
62
const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
64
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
65
const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
68
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
69
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
71
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
72
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
74
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
75
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
81
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
82
const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
84
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
85
const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
87
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
88
const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
91
template <typename T> void match2L1_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
92
const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
94
template <typename T> void match2L2_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
95
const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
97
template <typename T> void match2Hamming_gpu(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
98
const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
102
namespace bf_radius_match
104
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
105
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
106
cudaStream_t stream);
107
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
108
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
109
cudaStream_t stream);
110
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
111
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
112
cudaStream_t stream);
114
template <typename T> void matchL1_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
115
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
116
cudaStream_t stream);
118
template <typename T> void matchL2_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
119
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
120
cudaStream_t stream);
122
template <typename T> void matchHamming_gpu(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
123
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
124
cudaStream_t stream);
130
static void makeGpuCollection(const std::vector<GpuMat>& trainDescCollection,
131
const std::vector<GpuMat>& masks,
132
GpuMat& trainCollection,
133
GpuMat& maskCollection)
135
if (trainDescCollection.empty())
140
Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
142
PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
144
for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr)
145
*trainCollectionCPU_ptr = trainDescCollection[i];
147
trainCollection.upload(trainCollectionCPU);
148
maskCollection.release();
152
CV_Assert( masks.size() == trainDescCollection.size() );
154
Mat trainCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepSzb)));
155
Mat maskCollectionCPU(1, static_cast<int>(trainDescCollection.size()), CV_8UC(sizeof(PtrStepb)));
157
PtrStepSzb* trainCollectionCPU_ptr = trainCollectionCPU.ptr<PtrStepSzb>();
158
PtrStepb* maskCollectionCPU_ptr = maskCollectionCPU.ptr<PtrStepb>();
160
for (size_t i = 0, size = trainDescCollection.size(); i < size; ++i, ++trainCollectionCPU_ptr, ++maskCollectionCPU_ptr)
162
const GpuMat& train = trainDescCollection[i];
163
const GpuMat& mask = masks[i];
165
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.cols == train.rows) );
167
*trainCollectionCPU_ptr = train;
168
*maskCollectionCPU_ptr = mask;
171
trainCollection.upload(trainCollectionCPU);
172
maskCollection.upload(maskCollectionCPU);
176
class BFMatcher_Impl : public cv::cuda::DescriptorMatcher
179
explicit BFMatcher_Impl(int norm) : norm_(norm)
181
CV_Assert( norm == NORM_L1 || norm == NORM_L2 || norm == NORM_HAMMING );
184
virtual bool isMaskSupported() const { return true; }
186
virtual void add(const std::vector<GpuMat>& descriptors)
188
trainDescCollection_.insert(trainDescCollection_.end(), descriptors.begin(), descriptors.end());
191
virtual const std::vector<GpuMat>& getTrainDescriptors() const
193
return trainDescCollection_;
198
trainDescCollection_.clear();
201
virtual bool empty() const
203
return trainDescCollection_.empty();
210
virtual void match(InputArray queryDescriptors, InputArray trainDescriptors,
211
std::vector<DMatch>& matches,
212
InputArray mask = noArray());
214
virtual void match(InputArray queryDescriptors,
215
std::vector<DMatch>& matches,
216
const std::vector<GpuMat>& masks = std::vector<GpuMat>());
218
virtual void matchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
220
InputArray mask = noArray(),
221
Stream& stream = Stream::Null());
223
virtual void matchAsync(InputArray queryDescriptors,
225
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
226
Stream& stream = Stream::Null());
228
virtual void matchConvert(InputArray gpu_matches,
229
std::vector<DMatch>& matches);
231
virtual void knnMatch(InputArray queryDescriptors, InputArray trainDescriptors,
232
std::vector<std::vector<DMatch> >& matches,
234
InputArray mask = noArray(),
235
bool compactResult = false);
237
virtual void knnMatch(InputArray queryDescriptors,
238
std::vector<std::vector<DMatch> >& matches,
240
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
241
bool compactResult = false);
243
virtual void knnMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
246
InputArray mask = noArray(),
247
Stream& stream = Stream::Null());
249
virtual void knnMatchAsync(InputArray queryDescriptors,
252
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
253
Stream& stream = Stream::Null());
255
virtual void knnMatchConvert(InputArray gpu_matches,
256
std::vector< std::vector<DMatch> >& matches,
257
bool compactResult = false);
259
virtual void radiusMatch(InputArray queryDescriptors, InputArray trainDescriptors,
260
std::vector<std::vector<DMatch> >& matches,
262
InputArray mask = noArray(),
263
bool compactResult = false);
265
virtual void radiusMatch(InputArray queryDescriptors,
266
std::vector<std::vector<DMatch> >& matches,
268
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
269
bool compactResult = false);
271
virtual void radiusMatchAsync(InputArray queryDescriptors, InputArray trainDescriptors,
274
InputArray mask = noArray(),
275
Stream& stream = Stream::Null());
277
virtual void radiusMatchAsync(InputArray queryDescriptors,
280
const std::vector<GpuMat>& masks = std::vector<GpuMat>(),
281
Stream& stream = Stream::Null());
283
virtual void radiusMatchConvert(InputArray gpu_matches,
284
std::vector< std::vector<DMatch> >& matches,
285
bool compactResult = false);
289
std::vector<GpuMat> trainDescCollection_;
296
void BFMatcher_Impl::match(InputArray _queryDescriptors, InputArray _trainDescriptors,
297
std::vector<DMatch>& matches,
301
matchAsync(_queryDescriptors, _trainDescriptors, d_matches, _mask);
302
matchConvert(d_matches, matches);
305
void BFMatcher_Impl::match(InputArray _queryDescriptors,
306
std::vector<DMatch>& matches,
307
const std::vector<GpuMat>& masks)
310
matchAsync(_queryDescriptors, d_matches, masks);
311
matchConvert(d_matches, matches);
314
void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
315
OutputArray _matches,
319
using namespace cv::cuda::device::bf_match;
321
const GpuMat query = _queryDescriptors.getGpuMat();
322
const GpuMat train = _trainDescriptors.getGpuMat();
323
const GpuMat mask = _mask.getGpuMat();
325
if (query.empty() || train.empty())
331
CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
332
CV_Assert( train.cols == query.cols && train.type() == query.type() );
333
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
335
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, const PtrStepSzb& mask,
336
const PtrStepSzi& trainIdx, const PtrStepSzf& distance,
337
cudaStream_t stream);
339
static const caller_t callersL1[] =
341
matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
342
matchL1_gpu<unsigned short>, matchL1_gpu<short>,
343
matchL1_gpu<int>, matchL1_gpu<float>
345
static const caller_t callersL2[] =
347
0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
348
0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
349
0/*matchL2_gpu<int>*/, matchL2_gpu<float>
351
static const caller_t callersHamming[] =
353
matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
354
matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
355
matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
358
const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
360
const caller_t func = callers[query.depth()];
363
CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
366
const int nQuery = query.rows;
368
_matches.create(2, nQuery, CV_32SC1);
369
GpuMat matches = _matches.getGpuMat();
371
GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0));
372
GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(1));
374
func(query, train, mask, trainIdx, distance, StreamAccessor::getStream(stream));
377
void BFMatcher_Impl::matchAsync(InputArray _queryDescriptors,
378
OutputArray _matches,
379
const std::vector<GpuMat>& masks,
382
using namespace cv::cuda::device::bf_match;
384
const GpuMat query = _queryDescriptors.getGpuMat();
386
if (query.empty() || trainDescCollection_.empty())
392
CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
394
GpuMat trainCollection, maskCollection;
395
makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
397
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
398
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance,
399
cudaStream_t stream);
401
static const caller_t callersL1[] =
403
matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
404
matchL1_gpu<unsigned short>, matchL1_gpu<short>,
405
matchL1_gpu<int>, matchL1_gpu<float>
407
static const caller_t callersL2[] =
409
0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
410
0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
411
0/*matchL2_gpu<int>*/, matchL2_gpu<float>
413
static const caller_t callersHamming[] =
415
matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
416
matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
417
matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
420
const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
422
const caller_t func = callers[query.depth()];
425
CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
428
const int nQuery = query.rows;
430
_matches.create(3, nQuery, CV_32SC1);
431
GpuMat matches = _matches.getGpuMat();
433
GpuMat trainIdx(1, nQuery, CV_32SC1, matches.ptr(0));
434
GpuMat imgIdx(1, nQuery, CV_32SC1, matches.ptr(1));
435
GpuMat distance(1, nQuery, CV_32FC1, matches.ptr(2));
437
func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream));
440
void BFMatcher_Impl::matchConvert(InputArray _gpu_matches,
441
std::vector<DMatch>& matches)
444
if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
446
_gpu_matches.getGpuMat().download(gpu_matches);
450
gpu_matches = _gpu_matches.getMat();
453
if (gpu_matches.empty())
459
CV_Assert( (gpu_matches.type() == CV_32SC1) && (gpu_matches.rows == 2 || gpu_matches.rows == 3) );
461
const int nQuery = gpu_matches.cols;
464
matches.reserve(nQuery);
466
const int* trainIdxPtr = NULL;
467
const int* imgIdxPtr = NULL;
468
const float* distancePtr = NULL;
470
if (gpu_matches.rows == 2)
472
trainIdxPtr = gpu_matches.ptr<int>(0);
473
distancePtr = gpu_matches.ptr<float>(1);
477
trainIdxPtr = gpu_matches.ptr<int>(0);
478
imgIdxPtr = gpu_matches.ptr<int>(1);
479
distancePtr = gpu_matches.ptr<float>(2);
482
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
484
const int trainIdx = trainIdxPtr[queryIdx];
488
const int imgIdx = imgIdxPtr ? imgIdxPtr[queryIdx] : 0;
489
const float distance = distancePtr[queryIdx];
491
DMatch m(queryIdx, trainIdx, imgIdx, distance);
493
matches.push_back(m);
501
void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors, InputArray _trainDescriptors,
502
std::vector<std::vector<DMatch> >& matches,
508
knnMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, k, _mask);
509
knnMatchConvert(d_matches, matches, compactResult);
512
void BFMatcher_Impl::knnMatch(InputArray _queryDescriptors,
513
std::vector<std::vector<DMatch> >& matches,
515
const std::vector<GpuMat>& masks,
521
knnMatchAsync(_queryDescriptors, d_matches, k, masks);
522
knnMatchConvert(d_matches, matches, compactResult);
526
const GpuMat query = _queryDescriptors.getGpuMat();
528
if (query.empty() || trainDescCollection_.empty())
534
CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
536
std::vector< std::vector<DMatch> > curMatches;
537
std::vector<DMatch> temp;
540
matches.resize(query.rows);
541
for (size_t i = 0; i < matches.size(); ++i)
542
matches[i].reserve(k);
544
for (size_t imgIdx = 0; imgIdx < trainDescCollection_.size(); ++imgIdx)
546
knnMatch(query, trainDescCollection_[imgIdx], curMatches, k, masks.empty() ? GpuMat() : masks[imgIdx]);
548
for (int queryIdx = 0; queryIdx < query.rows; ++queryIdx)
550
std::vector<DMatch>& localMatch = curMatches[queryIdx];
551
std::vector<DMatch>& globalMatch = matches[queryIdx];
553
for (size_t i = 0; i < localMatch.size(); ++i)
554
localMatch[i].imgIdx = imgIdx;
557
std::merge(globalMatch.begin(), globalMatch.end(), localMatch.begin(), localMatch.end(), std::back_inserter(temp));
560
const size_t count = std::min(static_cast<size_t>(k), temp.size());
561
std::copy(temp.begin(), temp.begin() + count, std::back_inserter(globalMatch));
567
std::vector< std::vector<DMatch> >::iterator new_end = std::remove_if(matches.begin(), matches.end(), std::mem_fun_ref(&std::vector<DMatch>::empty));
568
matches.erase(new_end, matches.end());
573
void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
574
OutputArray _matches,
579
using namespace cv::cuda::device::bf_knnmatch;
581
const GpuMat query = _queryDescriptors.getGpuMat();
582
const GpuMat train = _trainDescriptors.getGpuMat();
583
const GpuMat mask = _mask.getGpuMat();
585
if (query.empty() || train.empty())
591
CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
592
CV_Assert( train.cols == query.cols && train.type() == query.type() );
593
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
595
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, int k, const PtrStepSzb& mask,
596
const PtrStepSzb& trainIdx, const PtrStepSzb& distance, const PtrStepSzf& allDist,
597
cudaStream_t stream);
599
static const caller_t callersL1[] =
601
matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
602
matchL1_gpu<unsigned short>, matchL1_gpu<short>,
603
matchL1_gpu<int>, matchL1_gpu<float>
605
static const caller_t callersL2[] =
607
0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
608
0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
609
0/*matchL2_gpu<int>*/, matchL2_gpu<float>
611
static const caller_t callersHamming[] =
613
matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
614
matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
615
matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
618
const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
620
const caller_t func = callers[query.depth()];
623
CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
626
const int nQuery = query.rows;
627
const int nTrain = train.rows;
629
GpuMat trainIdx, distance, allDist;
632
_matches.create(2, nQuery, CV_32SC2);
633
GpuMat matches = _matches.getGpuMat();
635
trainIdx = GpuMat(1, nQuery, CV_32SC2, matches.ptr(0));
636
distance = GpuMat(1, nQuery, CV_32FC2, matches.ptr(1));
640
_matches.create(2 * nQuery, k, CV_32SC1);
641
GpuMat matches = _matches.getGpuMat();
643
trainIdx = GpuMat(nQuery, k, CV_32SC1, matches.ptr(0), matches.step);
644
distance = GpuMat(nQuery, k, CV_32FC1, matches.ptr(nQuery), matches.step);
646
BufferPool pool(stream);
647
allDist = pool.getBuffer(nQuery, nTrain, CV_32FC1);
650
trainIdx.setTo(Scalar::all(-1), stream);
652
func(query, train, k, mask, trainIdx, distance, allDist, StreamAccessor::getStream(stream));
655
void BFMatcher_Impl::knnMatchAsync(InputArray _queryDescriptors,
656
OutputArray _matches,
658
const std::vector<GpuMat>& masks,
661
using namespace cv::cuda::device::bf_knnmatch;
665
CV_Error(Error::StsNotImplemented, "only k=2 mode is supported for now");
668
const GpuMat query = _queryDescriptors.getGpuMat();
670
if (query.empty() || trainDescCollection_.empty())
676
CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
678
GpuMat trainCollection, maskCollection;
679
makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
681
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& trains, const PtrStepSz<PtrStepb>& masks,
682
const PtrStepSzb& trainIdx, const PtrStepSzb& imgIdx, const PtrStepSzb& distance,
683
cudaStream_t stream);
685
static const caller_t callersL1[] =
687
match2L1_gpu<unsigned char>, 0/*match2L1_gpu<signed char>*/,
688
match2L1_gpu<unsigned short>, match2L1_gpu<short>,
689
match2L1_gpu<int>, match2L1_gpu<float>
691
static const caller_t callersL2[] =
693
0/*match2L2_gpu<unsigned char>*/, 0/*match2L2_gpu<signed char>*/,
694
0/*match2L2_gpu<unsigned short>*/, 0/*match2L2_gpu<short>*/,
695
0/*match2L2_gpu<int>*/, match2L2_gpu<float>
697
static const caller_t callersHamming[] =
699
match2Hamming_gpu<unsigned char>, 0/*match2Hamming_gpu<signed char>*/,
700
match2Hamming_gpu<unsigned short>, 0/*match2Hamming_gpu<short>*/,
701
match2Hamming_gpu<int>, 0/*match2Hamming_gpu<float>*/
704
const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
706
const caller_t func = callers[query.depth()];
709
CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
712
const int nQuery = query.rows;
714
_matches.create(3, nQuery, CV_32SC2);
715
GpuMat matches = _matches.getGpuMat();
717
GpuMat trainIdx(1, nQuery, CV_32SC2, matches.ptr(0));
718
GpuMat imgIdx(1, nQuery, CV_32SC2, matches.ptr(1));
719
GpuMat distance(1, nQuery, CV_32FC2, matches.ptr(2));
721
trainIdx.setTo(Scalar::all(-1), stream);
723
func(query, trainCollection, maskCollection, trainIdx, imgIdx, distance, StreamAccessor::getStream(stream));
726
void BFMatcher_Impl::knnMatchConvert(InputArray _gpu_matches,
727
std::vector< std::vector<DMatch> >& matches,
731
if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
733
_gpu_matches.getGpuMat().download(gpu_matches);
737
gpu_matches = _gpu_matches.getMat();
740
if (gpu_matches.empty())
746
CV_Assert( ((gpu_matches.type() == CV_32SC2) && (gpu_matches.rows == 2 || gpu_matches.rows == 3)) ||
747
(gpu_matches.type() == CV_32SC1) );
749
int nQuery = -1, k = -1;
751
const int* trainIdxPtr = NULL;
752
const int* imgIdxPtr = NULL;
753
const float* distancePtr = NULL;
755
if (gpu_matches.type() == CV_32SC2)
757
nQuery = gpu_matches.cols;
760
if (gpu_matches.rows == 2)
762
trainIdxPtr = gpu_matches.ptr<int>(0);
763
distancePtr = gpu_matches.ptr<float>(1);
767
trainIdxPtr = gpu_matches.ptr<int>(0);
768
imgIdxPtr = gpu_matches.ptr<int>(1);
769
distancePtr = gpu_matches.ptr<float>(2);
774
nQuery = gpu_matches.rows / 2;
775
k = gpu_matches.cols;
777
trainIdxPtr = gpu_matches.ptr<int>(0);
778
distancePtr = gpu_matches.ptr<float>(nQuery);
782
matches.reserve(nQuery);
784
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
786
matches.push_back(std::vector<DMatch>());
787
std::vector<DMatch>& curMatches = matches.back();
788
curMatches.reserve(k);
790
for (int i = 0; i < k; ++i)
792
const int trainIdx = *trainIdxPtr;
796
const int imgIdx = imgIdxPtr ? *imgIdxPtr : 0;
797
const float distance = *distancePtr;
799
DMatch m(queryIdx, trainIdx, imgIdx, distance);
801
curMatches.push_back(m);
809
if (compactResult && curMatches.empty())
820
void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors, InputArray _trainDescriptors,
821
std::vector<std::vector<DMatch> >& matches,
827
radiusMatchAsync(_queryDescriptors, _trainDescriptors, d_matches, maxDistance, _mask);
828
radiusMatchConvert(d_matches, matches, compactResult);
831
void BFMatcher_Impl::radiusMatch(InputArray _queryDescriptors,
832
std::vector<std::vector<DMatch> >& matches,
834
const std::vector<GpuMat>& masks,
838
radiusMatchAsync(_queryDescriptors, d_matches, maxDistance, masks);
839
radiusMatchConvert(d_matches, matches, compactResult);
842
void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors, InputArray _trainDescriptors,
843
OutputArray _matches,
848
using namespace cv::cuda::device::bf_radius_match;
850
const GpuMat query = _queryDescriptors.getGpuMat();
851
const GpuMat train = _trainDescriptors.getGpuMat();
852
const GpuMat mask = _mask.getGpuMat();
854
if (query.empty() || train.empty())
860
CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
861
CV_Assert( train.cols == query.cols && train.type() == query.type() );
862
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.rows == query.rows && mask.cols == train.rows) );
864
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb& train, float maxDistance, const PtrStepSzb& mask,
865
const PtrStepSzi& trainIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
866
cudaStream_t stream);
868
static const caller_t callersL1[] =
870
matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
871
matchL1_gpu<unsigned short>, matchL1_gpu<short>,
872
matchL1_gpu<int>, matchL1_gpu<float>
874
static const caller_t callersL2[] =
876
0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
877
0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
878
0/*matchL2_gpu<int>*/, matchL2_gpu<float>
880
static const caller_t callersHamming[] =
882
matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
883
matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
884
matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
887
const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
889
const caller_t func = callers[query.depth()];
892
CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
895
const int nQuery = query.rows;
896
const int nTrain = train.rows;
898
const int cols = std::max((nTrain / 100), nQuery);
900
_matches.create(2 * nQuery + 1, cols, CV_32SC1);
901
GpuMat matches = _matches.getGpuMat();
903
GpuMat trainIdx(nQuery, cols, CV_32SC1, matches.ptr(0), matches.step);
904
GpuMat distance(nQuery, cols, CV_32FC1, matches.ptr(nQuery), matches.step);
905
GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(2 * nQuery));
907
nMatches.setTo(Scalar::all(0), stream);
909
func(query, train, maxDistance, mask, trainIdx, distance, nMatches, StreamAccessor::getStream(stream));
912
void BFMatcher_Impl::radiusMatchAsync(InputArray _queryDescriptors,
913
OutputArray _matches,
915
const std::vector<GpuMat>& masks,
918
using namespace cv::cuda::device::bf_radius_match;
920
const GpuMat query = _queryDescriptors.getGpuMat();
922
if (query.empty() || trainDescCollection_.empty())
928
CV_Assert( query.channels() == 1 && query.depth() < CV_64F );
930
GpuMat trainCollection, maskCollection;
931
makeGpuCollection(trainDescCollection_, masks, trainCollection, maskCollection);
933
typedef void (*caller_t)(const PtrStepSzb& query, const PtrStepSzb* trains, int n, float maxDistance, const PtrStepSzb* masks,
934
const PtrStepSzi& trainIdx, const PtrStepSzi& imgIdx, const PtrStepSzf& distance, const PtrStepSz<unsigned int>& nMatches,
935
cudaStream_t stream);
937
static const caller_t callersL1[] =
939
matchL1_gpu<unsigned char>, 0/*matchL1_gpu<signed char>*/,
940
matchL1_gpu<unsigned short>, matchL1_gpu<short>,
941
matchL1_gpu<int>, matchL1_gpu<float>
943
static const caller_t callersL2[] =
945
0/*matchL2_gpu<unsigned char>*/, 0/*matchL2_gpu<signed char>*/,
946
0/*matchL2_gpu<unsigned short>*/, 0/*matchL2_gpu<short>*/,
947
0/*matchL2_gpu<int>*/, matchL2_gpu<float>
949
static const caller_t callersHamming[] =
951
matchHamming_gpu<unsigned char>, 0/*matchHamming_gpu<signed char>*/,
952
matchHamming_gpu<unsigned short>, 0/*matchHamming_gpu<short>*/,
953
matchHamming_gpu<int>, 0/*matchHamming_gpu<float>*/
956
const caller_t* callers = norm_ == NORM_L1 ? callersL1 : norm_ == NORM_L2 ? callersL2 : callersHamming;
958
const caller_t func = callers[query.depth()];
961
CV_Error(Error::StsUnsupportedFormat, "unsupported combination of query.depth() and norm");
964
const int nQuery = query.rows;
966
_matches.create(3 * nQuery + 1, nQuery, CV_32FC1);
967
GpuMat matches = _matches.getGpuMat();
969
GpuMat trainIdx(nQuery, nQuery, CV_32SC1, matches.ptr(0), matches.step);
970
GpuMat imgIdx(nQuery, nQuery, CV_32SC1, matches.ptr(nQuery), matches.step);
971
GpuMat distance(nQuery, nQuery, CV_32FC1, matches.ptr(2 * nQuery), matches.step);
972
GpuMat nMatches(1, nQuery, CV_32SC1, matches.ptr(3 * nQuery));
974
nMatches.setTo(Scalar::all(0), stream);
976
std::vector<PtrStepSzb> trains_(trainDescCollection_.begin(), trainDescCollection_.end());
977
std::vector<PtrStepSzb> masks_(masks.begin(), masks.end());
979
func(query, &trains_[0], static_cast<int>(trains_.size()), maxDistance, masks_.size() == 0 ? 0 : &masks_[0],
980
trainIdx, imgIdx, distance, nMatches, StreamAccessor::getStream(stream));
983
void BFMatcher_Impl::radiusMatchConvert(InputArray _gpu_matches,
984
std::vector< std::vector<DMatch> >& matches,
988
if (_gpu_matches.kind() == _InputArray::CUDA_GPU_MAT)
990
_gpu_matches.getGpuMat().download(gpu_matches);
994
gpu_matches = _gpu_matches.getMat();
997
if (gpu_matches.empty())
1003
CV_Assert( gpu_matches.type() == CV_32SC1 || gpu_matches.type() == CV_32FC1 );
1007
const int* trainIdxPtr = NULL;
1008
const int* imgIdxPtr = NULL;
1009
const float* distancePtr = NULL;
1010
const int* nMatchesPtr = NULL;
1012
if (gpu_matches.type() == CV_32SC1)
1014
nQuery = (gpu_matches.rows - 1) / 2;
1016
trainIdxPtr = gpu_matches.ptr<int>(0);
1017
distancePtr = gpu_matches.ptr<float>(nQuery);
1018
nMatchesPtr = gpu_matches.ptr<int>(2 * nQuery);
1022
nQuery = (gpu_matches.rows - 1) / 3;
1024
trainIdxPtr = gpu_matches.ptr<int>(0);
1025
imgIdxPtr = gpu_matches.ptr<int>(nQuery);
1026
distancePtr = gpu_matches.ptr<float>(2 * nQuery);
1027
nMatchesPtr = gpu_matches.ptr<int>(3 * nQuery);
1031
matches.reserve(nQuery);
1033
for (int queryIdx = 0; queryIdx < nQuery; ++queryIdx)
1035
const int nMatched = std::min(nMatchesPtr[queryIdx], gpu_matches.cols);
1041
matches.push_back(std::vector<DMatch>());
1046
matches.push_back(std::vector<DMatch>(nMatched));
1047
std::vector<DMatch>& curMatches = matches.back();
1049
for (int i = 0; i < nMatched; ++i)
1051
const int trainIdx = trainIdxPtr[i];
1053
const int imgIdx = imgIdxPtr ? imgIdxPtr[i] : 0;
1054
const float distance = distancePtr[i];
1056
DMatch m(queryIdx, trainIdx, imgIdx, distance);
1061
std::sort(curMatches.begin(), curMatches.end());
1064
trainIdxPtr += gpu_matches.cols;
1065
distancePtr += gpu_matches.cols;
1067
imgIdxPtr += gpu_matches.cols;
1072
Ptr<cv::cuda::DescriptorMatcher> cv::cuda::DescriptorMatcher::createBFMatcher(int norm)
1074
return makePtr<BFMatcher_Impl>(norm);
1077
#endif /* !defined (HAVE_CUDA) */