1
/*M///////////////////////////////////////////////////////////////////////////////////////
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
10
// Intel License Agreement
11
// For Open Source Computer Vision Library
13
// Copyright (C) 2000, Intel Corporation, all rights reserved.
14
// Third party copyrights are property of their respective owners.
16
// Redistribution and use in source and binary forms, with or without modification,
17
// are permitted provided that the following conditions are met:
19
// * Redistribution's of source code must retain the above copyright notice,
20
// this list of conditions and the following disclaimer.
22
// * Redistribution's in binary form must reproduce the above copyright notice,
23
// this list of conditions and the following disclaimer in the documentation
24
// and/or other materials provided with the distribution.
26
// * The name of Intel Corporation may not be used to endorse or promote products
27
// derived from this software without specific prior written permission.
29
// This software is provided by the copyright holders and contributors "as is" and
30
// any express or implied warranties, including, but not limited to, the implied
31
// warranties of merchantability and fitness for a particular purpose are disclaimed.
32
// In no event shall the Intel Corporation or contributors be liable for any direct,
33
// indirect, incidental, special, exemplary, or consequential damages
34
// (including, but not limited to, procurement of substitute goods or services;
35
// loss of use, data, or profits; or business interruption) however caused
36
// and on any theory of liability, whether in contract, strict liability,
37
// or tort (including negligence or otherwise) arising in any way out of
38
// the use of this software, even if advised of the possibility of such damage.
42
#include "precomp.hpp"
43
#include "opencl_kernels_imgproc.hpp"
45
////////////////////////////////////////////////// matchTemplate //////////////////////////////////////////////////////////
52
/////////////////////////////////////////////////// CCORR //////////////////////////////////////////////////////////////
59
static bool extractFirstChannel_32F(InputArray _image, OutputArray _result, int cn)
61
int depth = _image.depth();
63
ocl::Device dev = ocl::Device::getDefault();
64
int pxPerWIy = (dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1;
66
ocl::Kernel k("extractFirstChannel", ocl::imgproc::match_template_oclsrc, format("-D FIRST_CHANNEL -D T1=%s -D cn=%d -D PIX_PER_WI_Y=%d",
67
ocl::typeToStr(depth), cn, pxPerWIy));
71
UMat image = _image.getUMat();
72
UMat result = _result.getUMat();
75
size_t globalsize[2] = {(size_t)result.cols, ((size_t)result.rows+pxPerWIy-1)/pxPerWIy};
76
return k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::WriteOnly(result)).run( 2, globalsize, NULL, false);
79
static bool sumTemplate(InputArray _src, UMat & result)
81
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
82
int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);
83
size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
86
while (wgs2_aligned < (int)wgs)
91
ocl::Kernel k("calcSum", ocl::imgproc::match_template_oclsrc,
92
format("-D CALC_SUM -D T=%s -D T1=%s -D WT=%s -D cn=%d -D convertToWT=%s -D WGS=%d -D WGS2_ALIGNED=%d",
93
ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype), cn,
94
ocl::convertTypeStr(depth, wdepth, cn, cvt),
95
(int)wgs, wgs2_aligned));
99
UMat src = _src.getUMat();
100
result.create(1, 1, CV_32FC1);
102
ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
103
resarg = ocl::KernelArg::PtrWriteOnly(result);
105
k.args(srcarg, src.cols, (int)src.total(), resarg);
107
size_t globalsize = wgs;
108
return k.run(1, &globalsize, &wgs, false);
111
static bool useNaive(Size size)
114
return size.height < dft_size && size.width < dft_size;
121
Size user_block_size;
124
UMat image_spect, templ_spect, result_spect;
125
UMat image_block, templ_block, result_data;
127
void create(Size image_size, Size templ_size);
130
void ConvolveBuf::create(Size image_size, Size templ_size)
132
result_size = Size(image_size.width - templ_size.width + 1,
133
image_size.height - templ_size.height + 1);
135
const double blockScale = 4.5;
136
const int minBlockSize = 256;
138
block_size.width = cvRound(result_size.width*blockScale);
139
block_size.width = std::max( block_size.width, minBlockSize - templ_size.width + 1 );
140
block_size.width = std::min( block_size.width, result_size.width );
141
block_size.height = cvRound(templ_size.height*blockScale);
142
block_size.height = std::max( block_size.height, minBlockSize - templ_size.height + 1 );
143
block_size.height = std::min( block_size.height, result_size.height );
145
dft_size.width = std::max(getOptimalDFTSize(block_size.width + templ_size.width - 1), 2);
146
dft_size.height = getOptimalDFTSize(block_size.height + templ_size.height - 1);
147
if( dft_size.width <= 0 || dft_size.height <= 0 )
148
CV_Error( CV_StsOutOfRange, "the input arrays are too big" );
150
// recompute block size
151
block_size.width = dft_size.width - templ_size.width + 1;
152
block_size.width = std::min( block_size.width, result_size.width);
153
block_size.height = dft_size.height - templ_size.height + 1;
154
block_size.height = std::min( block_size.height, result_size.height );
156
image_block.create(dft_size, CV_32F);
157
templ_block.create(dft_size, CV_32F);
158
result_data.create(dft_size, CV_32F);
160
image_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
161
templ_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
162
result_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
164
// Use maximum result matrix block size for the estimated DFT block size
165
block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);
166
block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);
169
static bool convolve_dft(InputArray _image, InputArray _templ, OutputArray _result)
172
CV_Assert(_image.type() == CV_32F);
173
CV_Assert(_templ.type() == CV_32F);
175
buf.create(_image.size(), _templ.size());
176
_result.create(buf.result_size, CV_32F);
178
UMat image = _image.getUMat();
179
UMat templ = _templ.getUMat();
181
UMat result = _result.getUMat();
183
Size& block_size = buf.block_size;
184
Size& dft_size = buf.dft_size;
186
UMat& image_block = buf.image_block;
187
UMat& templ_block = buf.templ_block;
188
UMat& result_data = buf.result_data;
190
UMat& image_spect = buf.image_spect;
191
UMat& templ_spect = buf.templ_spect;
192
UMat& result_spect = buf.result_spect;
194
UMat templ_roi = templ;
195
copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0,
196
templ_block.cols - templ_roi.cols, BORDER_ISOLATED);
198
dft(templ_block, templ_spect, 0, templ.rows);
200
// Process all blocks of the result matrix
201
for (int y = 0; y < result.rows; y += block_size.height)
203
for (int x = 0; x < result.cols; x += block_size.width)
205
Size image_roi_size(std::min(x + dft_size.width, image.cols) - x,
206
std::min(y + dft_size.height, image.rows) - y);
207
Rect roi0(x, y, image_roi_size.width, image_roi_size.height);
209
UMat image_roi(image, roi0);
211
copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,
212
0, image_block.cols - image_roi.cols, BORDER_ISOLATED);
214
dft(image_block, image_spect, 0);
216
mulSpectrums(image_spect, templ_spect, result_spect, 0, true);
218
dft(result_spect, result_data, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
220
Size result_roi_size(std::min(x + block_size.width, result.cols) - x,
221
std::min(y + block_size.height, result.rows) - y);
223
Rect roi1(x, y, result_roi_size.width, result_roi_size.height);
224
Rect roi2(0, 0, result_roi_size.width, result_roi_size.height);
226
UMat result_roi(result, roi1);
227
UMat result_block(result_data, roi2);
229
result_block.copyTo(result_roi);
235
static bool convolve_32F(InputArray _image, InputArray _templ, OutputArray _result)
237
_result.create(_image.rows() - _templ.rows() + 1, _image.cols() - _templ.cols() + 1, CV_32F);
239
if (_image.channels() == 1)
240
return(convolve_dft(_image, _templ, _result));
243
UMat image = _image.getUMat();
244
UMat templ = _templ.getUMat();
245
UMat result_(image.rows-templ.rows+1,(image.cols-templ.cols+1)*image.channels(), CV_32F);
246
bool ok = convolve_dft(image.reshape(1), templ.reshape(1), result_);
249
UMat result = _result.getUMat();
250
return (extractFirstChannel_32F(result_, _result, _image.channels()));
254
static bool matchTemplateNaive_CCORR(InputArray _image, InputArray _templ, OutputArray _result)
256
int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
257
int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);
259
ocl::Device dev = ocl::Device::getDefault();
260
int pxPerWIx = (cn==1 && dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1;
267
type = CV_MAKE_TYPE(depth, rated_cn);
268
wtype1 = CV_MAKE_TYPE(wdepth, rated_cn);
273
const char* convertToWT1 = ocl::convertTypeStr(depth, wdepth, cn, cvt);
274
const char* convertToWT = ocl::convertTypeStr(depth, wdepth, rated_cn, cvt1);
276
ocl::Kernel k("matchTemplate_Naive_CCORR", ocl::imgproc::match_template_oclsrc,
277
format("-D CCORR -D T=%s -D T1=%s -D WT=%s -D WT1=%s -D convertToWT=%s -D convertToWT1=%s -D cn=%d -D PIX_PER_WI_X=%d", ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype1), ocl::typeToStr(wtype),
278
convertToWT, convertToWT1, cn, pxPerWIx));
282
UMat image = _image.getUMat(), templ = _templ.getUMat();
283
_result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
284
UMat result = _result.getUMat();
286
k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ),
287
ocl::KernelArg::WriteOnly(result));
289
size_t globalsize[2] = { ((size_t)result.cols+pxPerWIx-1)/pxPerWIx, (size_t)result.rows};
290
return k.run(2, globalsize, NULL, false);
294
static bool matchTemplate_CCORR(InputArray _image, InputArray _templ, OutputArray _result)
296
if (useNaive(_templ.size()))
297
return( matchTemplateNaive_CCORR(_image, _templ, _result));
300
if(_image.depth() == CV_8U)
303
UMat image = _image.getUMat();
304
UMat templ = _templ.getUMat();
305
image.convertTo(imagef, CV_32F);
306
templ.convertTo(templf, CV_32F);
307
return(convolve_32F(imagef, templf, _result));
311
return(convolve_32F(_image, _templ, _result));
316
static bool matchTemplate_CCORR_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
318
matchTemplate(_image, _templ, _result, CV_TM_CCORR);
320
int type = _image.type(), cn = CV_MAT_CN(type);
322
ocl::Kernel k("matchTemplate_CCORR_NORMED", ocl::imgproc::match_template_oclsrc,
323
format("-D CCORR_NORMED -D T=%s -D cn=%d", ocl::typeToStr(type), cn));
327
UMat image = _image.getUMat(), templ = _templ.getUMat();
328
_result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
329
UMat result = _result.getUMat();
331
UMat image_sums, image_sqsums;
332
integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F);
335
if (!sumTemplate(templ, templ_sqsum))
338
k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result),
339
templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum));
341
size_t globalsize[2] = { (size_t)result.cols, (size_t)result.rows };
342
return k.run(2, globalsize, NULL, false);
345
////////////////////////////////////// SQDIFF //////////////////////////////////////////////////////////////
347
static bool matchTemplateNaive_SQDIFF(InputArray _image, InputArray _templ, OutputArray _result)
349
int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
350
int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);
353
ocl::Kernel k("matchTemplate_Naive_SQDIFF", ocl::imgproc::match_template_oclsrc,
354
format("-D SQDIFF -D T=%s -D T1=%s -D WT=%s -D convertToWT=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth),
355
ocl::typeToStr(wtype), ocl::convertTypeStr(depth, wdepth, cn, cvt), cn));
359
UMat image = _image.getUMat(), templ = _templ.getUMat();
360
_result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
361
UMat result = _result.getUMat();
363
k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ),
364
ocl::KernelArg::WriteOnly(result));
366
size_t globalsize[2] = { (size_t)result.cols, (size_t)result.rows };
367
return k.run(2, globalsize, NULL, false);
370
static bool matchTemplate_SQDIFF(InputArray _image, InputArray _templ, OutputArray _result)
372
if (useNaive(_templ.size()))
373
return( matchTemplateNaive_SQDIFF(_image, _templ, _result));
376
matchTemplate(_image, _templ, _result, CV_TM_CCORR);
378
int type = _image.type(), cn = CV_MAT_CN(type);
380
ocl::Kernel k("matchTemplate_Prepared_SQDIFF", ocl::imgproc::match_template_oclsrc,
381
format("-D SQDIFF_PREPARED -D T=%s -D cn=%d", ocl::typeToStr(type), cn));
385
UMat image = _image.getUMat(), templ = _templ.getUMat();
386
_result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
387
UMat result = _result.getUMat();
389
UMat image_sums, image_sqsums;
390
integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F);
393
if (!sumTemplate(_templ, templ_sqsum))
396
k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result),
397
templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum));
399
size_t globalsize[2] = { (size_t)result.cols, (size_t)result.rows };
401
return k.run(2, globalsize, NULL, false);
405
static bool matchTemplate_SQDIFF_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
407
matchTemplate(_image, _templ, _result, CV_TM_CCORR);
409
int type = _image.type(), cn = CV_MAT_CN(type);
411
ocl::Kernel k("matchTemplate_SQDIFF_NORMED", ocl::imgproc::match_template_oclsrc,
412
format("-D SQDIFF_NORMED -D T=%s -D cn=%d", ocl::typeToStr(type), cn));
416
UMat image = _image.getUMat(), templ = _templ.getUMat();
417
_result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
418
UMat result = _result.getUMat();
420
UMat image_sums, image_sqsums;
421
integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F);
424
if (!sumTemplate(_templ, templ_sqsum))
427
k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result),
428
templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum));
430
size_t globalsize[2] = { (size_t)result.cols, (size_t)result.rows };
432
return k.run(2, globalsize, NULL, false);
435
///////////////////////////////////// CCOEFF /////////////////////////////////////////////////////////////////
437
static bool matchTemplate_CCOEFF(InputArray _image, InputArray _templ, OutputArray _result)
439
matchTemplate(_image, _templ, _result, CV_TM_CCORR);
441
UMat image_sums, temp;
442
integral(_image, image_sums, CV_32F);
444
int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
446
ocl::Kernel k("matchTemplate_Prepared_CCOEFF", ocl::imgproc::match_template_oclsrc,
447
format("-D CCOEFF -D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn));
451
UMat templ = _templ.getUMat();
452
UMat result = _result.getUMat();
456
Scalar templMean = mean(templ);
457
float templ_sum = (float)templMean[0];
459
k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum);
463
Vec4f templ_sum = Vec4f::all(0);
464
templ_sum = (Vec4f)mean(templ);
466
k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum); }
468
size_t globalsize[2] = { (size_t)result.cols, (size_t)result.rows };
469
return k.run(2, globalsize, NULL, false);
472
static bool matchTemplate_CCOEFF_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
474
matchTemplate(_image, _templ, _result, CV_TM_CCORR);
476
UMat temp, image_sums, image_sqsums;
477
integral(_image, image_sums, image_sqsums, CV_32F, CV_32F);
479
int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
481
ocl::Kernel k("matchTemplate_CCOEFF_NORMED", ocl::imgproc::match_template_oclsrc,
482
format("-D CCOEFF_NORMED -D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn));
486
UMat templ = _templ.getUMat();
487
Size size = _image.size(), tsize = templ.size();
488
_result.create(size.height - templ.rows + 1, size.width - templ.cols + 1, CV_32F);
489
UMat result = _result.getUMat();
491
float scale = 1.f / tsize.area();
495
float templ_sum = (float)sum(templ)[0];
497
multiply(templ, templ, temp, 1, CV_32F);
498
float templ_sqsum = (float)sum(temp)[0];
500
templ_sqsum -= scale * templ_sum * templ_sum;
503
if (templ_sqsum < DBL_EPSILON)
505
result = Scalar::all(1);
509
k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums),
510
ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale, templ_sum, templ_sqsum);
514
Vec4f templ_sum = Vec4f::all(0), templ_sqsum = Vec4f::all(0);
515
templ_sum = sum(templ);
517
multiply(templ, templ, temp, 1, CV_32F);
518
templ_sqsum = sum(temp);
520
float templ_sqsum_sum = 0;
521
for (int i = 0; i < cn; i ++)
522
templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
526
if (templ_sqsum_sum < DBL_EPSILON)
528
result = Scalar::all(1);
532
k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums),
533
ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale,
534
templ_sum, templ_sqsum_sum); }
536
size_t globalsize[2] = { (size_t)result.cols, (size_t)result.rows };
537
return k.run(2, globalsize, NULL, false);
540
///////////////////////////////////////////////////////////////////////////////////////////////////////////
542
static bool ocl_matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, int method)
544
int cn = _img.channels();
549
typedef bool (*Caller)(InputArray _img, InputArray _templ, OutputArray _result);
551
static const Caller callers[] =
553
matchTemplate_SQDIFF, matchTemplate_SQDIFF_NORMED, matchTemplate_CCORR,
554
matchTemplate_CCORR_NORMED, matchTemplate_CCOEFF, matchTemplate_CCOEFF_NORMED
556
const Caller caller = callers[method];
558
return caller(_img, _templ, _result);
565
typedef IppStatus (CV_STDCALL * ippimatchTemplate)(const void*, int, IppiSize, const void*, int, IppiSize, Ipp32f* , int , IppEnum , Ipp8u*);
567
static bool ipp_crossCorr(const Mat& src, const Mat& tpl, Mat& dst)
571
IppiSize srcRoiSize = {src.cols,src.rows};
572
IppiSize tplRoiSize = {tpl.cols,tpl.rows};
577
int depth = src.depth();
579
ippimatchTemplate ippFunc =
580
depth==CV_8U ? (ippimatchTemplate)ippiCrossCorrNorm_8u32f_C1R:
581
depth==CV_32F? (ippimatchTemplate)ippiCrossCorrNorm_32f_C1R: 0;
586
IppEnum funCfg = (IppEnum)(ippAlgAuto | ippiNormNone | ippiROIValid);
588
status = ippiCrossCorrNormGetBufferSize(srcRoiSize, tplRoiSize, funCfg, &bufSize);
592
pBuffer = ippsMalloc_8u( bufSize );
594
status = ippFunc(src.ptr(), (int)src.step, srcRoiSize, tpl.ptr(), (int)tpl.step, tplRoiSize, dst.ptr<Ipp32f>(), (int)dst.step, funCfg, pBuffer);
600
static bool ipp_sqrDistance(const Mat& src, const Mat& tpl, Mat& dst)
604
IppiSize srcRoiSize = {src.cols,src.rows};
605
IppiSize tplRoiSize = {tpl.cols,tpl.rows};
610
int depth = src.depth();
612
ippimatchTemplate ippFunc =
613
depth==CV_8U ? (ippimatchTemplate)ippiSqrDistanceNorm_8u32f_C1R:
614
depth==CV_32F? (ippimatchTemplate)ippiSqrDistanceNorm_32f_C1R: 0;
619
IppEnum funCfg = (IppEnum)(ippAlgAuto | ippiNormNone | ippiROIValid);
621
status = ippiSqrDistanceNormGetBufferSize(srcRoiSize, tplRoiSize, funCfg, &bufSize);
625
pBuffer = ippsMalloc_8u( bufSize );
627
status = ippFunc(src.ptr(), (int)src.step, srcRoiSize, tpl.ptr(), (int)tpl.step, tplRoiSize, dst.ptr<Ipp32f>(), (int)dst.step, funCfg, pBuffer);
635
void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
636
Size corrsize, int ctype,
637
Point anchor, double delta, int borderType )
639
const double blockScale = 4.5;
640
const int minBlockSize = 256;
641
std::vector<uchar> buf;
644
int depth = img.depth(), cn = img.channels();
645
int tdepth = templ.depth(), tcn = templ.channels();
646
int cdepth = CV_MAT_DEPTH(ctype), ccn = CV_MAT_CN(ctype);
648
CV_Assert( img.dims <= 2 && templ.dims <= 2 && corr.dims <= 2 );
650
if( depth != tdepth && tdepth != std::max(CV_32F, depth) )
652
_templ.convertTo(templ, std::max(CV_32F, depth));
653
tdepth = templ.depth();
656
CV_Assert( depth == tdepth || tdepth == CV_32F);
657
CV_Assert( corrsize.height <= img.rows + templ.rows - 1 &&
658
corrsize.width <= img.cols + templ.cols - 1 );
660
CV_Assert( ccn == 1 || delta == 0 );
662
corr.create(corrsize, ctype);
664
int maxDepth = depth > CV_8S ? CV_64F : std::max(std::max(CV_32F, tdepth), cdepth);
665
Size blocksize, dftsize;
667
blocksize.width = cvRound(templ.cols*blockScale);
668
blocksize.width = std::max( blocksize.width, minBlockSize - templ.cols + 1 );
669
blocksize.width = std::min( blocksize.width, corr.cols );
670
blocksize.height = cvRound(templ.rows*blockScale);
671
blocksize.height = std::max( blocksize.height, minBlockSize - templ.rows + 1 );
672
blocksize.height = std::min( blocksize.height, corr.rows );
674
dftsize.width = std::max(getOptimalDFTSize(blocksize.width + templ.cols - 1), 2);
675
dftsize.height = getOptimalDFTSize(blocksize.height + templ.rows - 1);
676
if( dftsize.width <= 0 || dftsize.height <= 0 )
677
CV_Error( CV_StsOutOfRange, "the input arrays are too big" );
679
// recompute block size
680
blocksize.width = dftsize.width - templ.cols + 1;
681
blocksize.width = MIN( blocksize.width, corr.cols );
682
blocksize.height = dftsize.height - templ.rows + 1;
683
blocksize.height = MIN( blocksize.height, corr.rows );
685
Mat dftTempl( dftsize.height*tcn, dftsize.width, maxDepth );
686
Mat dftImg( dftsize, maxDepth );
688
int i, k, bufSize = 0;
689
if( tcn > 1 && tdepth != maxDepth )
690
bufSize = templ.cols*templ.rows*CV_ELEM_SIZE(tdepth);
692
if( cn > 1 && depth != maxDepth )
693
bufSize = std::max( bufSize, (blocksize.width + templ.cols - 1)*
694
(blocksize.height + templ.rows - 1)*CV_ELEM_SIZE(depth));
696
if( (ccn > 1 || cn > 1) && cdepth != maxDepth )
697
bufSize = std::max( bufSize, blocksize.width*blocksize.height*CV_ELEM_SIZE(cdepth));
701
// compute DFT of each template plane
702
for( k = 0; k < tcn; k++ )
704
int yofs = k*dftsize.height;
706
Mat dst(dftTempl, Rect(0, yofs, dftsize.width, dftsize.height));
707
Mat dst1(dftTempl, Rect(0, yofs, templ.cols, templ.rows));
711
src = tdepth == maxDepth ? dst1 : Mat(templ.size(), tdepth, &buf[0]);
712
int pairs[] = {k, 0};
713
mixChannels(&templ, 1, &src, 1, pairs, 1);
716
if( dst1.data != src.data )
717
src.convertTo(dst1, dst1.depth());
719
if( dst.cols > templ.cols )
721
Mat part(dst, Range(0, templ.rows), Range(templ.cols, dst.cols));
722
part = Scalar::all(0);
724
dft(dst, dst, 0, templ.rows);
727
int tileCountX = (corr.cols + blocksize.width - 1)/blocksize.width;
728
int tileCountY = (corr.rows + blocksize.height - 1)/blocksize.height;
729
int tileCount = tileCountX * tileCountY;
731
Size wholeSize = img.size();
735
if( !(borderType & BORDER_ISOLATED) )
737
img.locateROI(wholeSize, roiofs);
738
img0.adjustROI(roiofs.y, wholeSize.height-img.rows-roiofs.y,
739
roiofs.x, wholeSize.width-img.cols-roiofs.x);
741
borderType |= BORDER_ISOLATED;
743
// calculate correlation by blocks
744
for( i = 0; i < tileCount; i++ )
746
int x = (i%tileCountX)*blocksize.width;
747
int y = (i/tileCountX)*blocksize.height;
749
Size bsz(std::min(blocksize.width, corr.cols - x),
750
std::min(blocksize.height, corr.rows - y));
751
Size dsz(bsz.width + templ.cols - 1, bsz.height + templ.rows - 1);
752
int x0 = x - anchor.x + roiofs.x, y0 = y - anchor.y + roiofs.y;
753
int x1 = std::max(0, x0), y1 = std::max(0, y0);
754
int x2 = std::min(img0.cols, x0 + dsz.width);
755
int y2 = std::min(img0.rows, y0 + dsz.height);
756
Mat src0(img0, Range(y1, y2), Range(x1, x2));
757
Mat dst(dftImg, Rect(0, 0, dsz.width, dsz.height));
758
Mat dst1(dftImg, Rect(x1-x0, y1-y0, x2-x1, y2-y1));
759
Mat cdst(corr, Rect(x, y, bsz.width, bsz.height));
761
for( k = 0; k < cn; k++ )
764
dftImg = Scalar::all(0);
768
src = depth == maxDepth ? dst1 : Mat(y2-y1, x2-x1, depth, &buf[0]);
769
int pairs[] = {k, 0};
770
mixChannels(&src0, 1, &src, 1, pairs, 1);
773
if( dst1.data != src.data )
774
src.convertTo(dst1, dst1.depth());
776
if( x2 - x1 < dsz.width || y2 - y1 < dsz.height )
777
copyMakeBorder(dst1, dst, y1-y0, dst.rows-dst1.rows-(y1-y0),
778
x1-x0, dst.cols-dst1.cols-(x1-x0), borderType);
780
dft( dftImg, dftImg, 0, dsz.height );
781
Mat dftTempl1(dftTempl, Rect(0, tcn > 1 ? k*dftsize.height : 0,
782
dftsize.width, dftsize.height));
783
mulSpectrums(dftImg, dftTempl1, dftImg, 0, true);
784
dft( dftImg, dftImg, DFT_INVERSE + DFT_SCALE, bsz.height );
786
src = dftImg(Rect(0, 0, bsz.width, bsz.height));
790
if( cdepth != maxDepth )
792
Mat plane(bsz, cdepth, &buf[0]);
793
src.convertTo(plane, cdepth, 1, delta);
796
int pairs[] = {0, k};
797
mixChannels(&src, 1, &cdst, 1, pairs, 1);
802
src.convertTo(cdst, cdepth, 1, delta);
805
if( maxDepth != cdepth )
807
Mat plane(bsz, cdepth, &buf[0]);
808
src.convertTo(plane, cdepth);
811
add(src, cdst, cdst);
818
static void matchTemplateMask( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask )
820
int type = _img.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
821
CV_Assert( CV_TM_SQDIFF <= method && method <= CV_TM_CCOEFF_NORMED );
822
CV_Assert( (depth == CV_8U || depth == CV_32F) && type == _templ.type() && _img.dims() <= 2 );
824
Mat img = _img.getMat(), templ = _templ.getMat(), mask = _mask.getMat();
825
int ttype = templ.type(), tdepth = CV_MAT_DEPTH(ttype), tcn = CV_MAT_CN(ttype);
826
int mtype = img.type(), mdepth = CV_MAT_DEPTH(type), mcn = CV_MAT_CN(mtype);
831
type = CV_MAKETYPE(CV_32F, cn);
832
img.convertTo(img, type, 1.0 / 255);
838
ttype = CV_MAKETYPE(CV_32F, tcn);
839
templ.convertTo(templ, ttype, 1.0 / 255);
845
mtype = CV_MAKETYPE(CV_32F, mcn);
846
compare(mask, Scalar::all(0), mask, CMP_NE);
847
mask.convertTo(mask, mtype, 1.0 / 255);
850
Size corrSize(img.cols - templ.cols + 1, img.rows - templ.rows + 1);
851
_result.create(corrSize, CV_32F);
852
Mat result = _result.getMat();
854
Mat img2 = img.mul(img);
855
Mat mask2 = mask.mul(mask);
856
Mat mask_templ = templ.mul(mask);
857
Scalar templMean, templSdv;
859
double templSum2 = 0;
860
meanStdDev( mask_templ, templMean, templSdv );
862
templSum2 = templSdv[0]*templSdv[0] + templSdv[1]*templSdv[1] + templSdv[2]*templSdv[2] + templSdv[3]*templSdv[3];
863
templSum2 += templMean[0]*templMean[0] + templMean[1]*templMean[1] + templMean[2]*templMean[2] + templMean[3]*templMean[3];
864
templSum2 *= ((double)templ.rows * templ.cols);
866
if (method == CV_TM_SQDIFF)
868
Mat mask2_templ = templ.mul(mask2);
870
Mat corr(corrSize, CV_32F);
871
crossCorr( img, mask2_templ, corr, corr.size(), corr.type(), Point(0,0), 0, 0 );
872
crossCorr( img2, mask, result, result.size(), result.type(), Point(0,0), 0, 0 );
877
else if (method == CV_TM_CCORR_NORMED)
879
if (templSum2 < DBL_EPSILON)
881
result = Scalar::all(1);
885
Mat corr(corrSize, CV_32F);
886
crossCorr( img2, mask2, corr, corr.size(), corr.type(), Point(0,0), 0, 0 );
887
crossCorr( img, mask_templ, result, result.size(), result.type(), Point(0,0), 0, 0 );
890
result = result.mul(1/corr);
891
result /= std::sqrt(templSum2);
894
CV_Error(Error::StsNotImplemented, "");
901
static void common_matchTemplate( Mat& img, Mat& templ, Mat& result, int method, int cn )
903
if( method == CV_TM_CCORR )
906
int numType = method == CV_TM_CCORR || method == CV_TM_CCORR_NORMED ? 0 :
907
method == CV_TM_CCOEFF || method == CV_TM_CCOEFF_NORMED ? 1 : 2;
908
bool isNormed = method == CV_TM_CCORR_NORMED ||
909
method == CV_TM_SQDIFF_NORMED ||
910
method == CV_TM_CCOEFF_NORMED;
912
double invArea = 1./((double)templ.rows * templ.cols);
915
Scalar templMean, templSdv;
916
double *q0 = 0, *q1 = 0, *q2 = 0, *q3 = 0;
917
double templNorm = 0, templSum2 = 0;
919
if( method == CV_TM_CCOEFF )
921
integral(img, sum, CV_64F);
922
templMean = mean(templ);
926
integral(img, sum, sqsum, CV_64F);
927
meanStdDev( templ, templMean, templSdv );
929
templNorm = templSdv[0]*templSdv[0] + templSdv[1]*templSdv[1] + templSdv[2]*templSdv[2] + templSdv[3]*templSdv[3];
931
if( templNorm < DBL_EPSILON && method == CV_TM_CCOEFF_NORMED )
933
result = Scalar::all(1);
937
templSum2 = templNorm + templMean[0]*templMean[0] + templMean[1]*templMean[1] + templMean[2]*templMean[2] + templMean[3]*templMean[3];
941
templMean = Scalar::all(0);
942
templNorm = templSum2;
945
templSum2 /= invArea;
946
templNorm = std::sqrt(templNorm);
947
templNorm /= std::sqrt(invArea); // care of accuracy here
949
q0 = (double*)sqsum.data;
950
q1 = q0 + templ.cols*cn;
951
q2 = (double*)(sqsum.data + templ.rows*sqsum.step);
952
q3 = q2 + templ.cols*cn;
955
double* p0 = (double*)sum.data;
956
double* p1 = p0 + templ.cols*cn;
957
double* p2 = (double*)(sum.data + templ.rows*sum.step);
958
double* p3 = p2 + templ.cols*cn;
960
int sumstep = sum.data ? (int)(sum.step / sizeof(double)) : 0;
961
int sqstep = sqsum.data ? (int)(sqsum.step / sizeof(double)) : 0;
965
for( i = 0; i < result.rows; i++ )
967
float* rrow = result.ptr<float>(i);
968
int idx = i * sumstep;
969
int idx2 = i * sqstep;
971
for( j = 0; j < result.cols; j++, idx += cn, idx2 += cn )
973
double num = rrow[j], t;
974
double wndMean2 = 0, wndSum2 = 0;
978
for( k = 0; k < cn; k++ )
980
t = p0[idx+k] - p1[idx+k] - p2[idx+k] + p3[idx+k];
982
num -= t*templMean[k];
988
if( isNormed || numType == 2 )
990
for( k = 0; k < cn; k++ )
992
t = q0[idx2+k] - q1[idx2+k] - q2[idx2+k] + q3[idx2+k];
998
num = wndSum2 - 2*num + templSum2;
1005
t = std::sqrt(MAX(wndSum2 - wndMean2,0))*templNorm;
1008
else if( fabs(num) < t*1.125 )
1009
num = num > 0 ? 1 : -1;
1011
num = method != CV_TM_SQDIFF_NORMED ? 0 : 1;
1014
rrow[j] = (float)num;
1021
#if defined HAVE_IPP
1024
static bool ipp_matchTemplate( Mat& img, Mat& templ, Mat& result, int method, int cn )
1026
bool useIppMT = (templ.rows < img.rows/2 && templ.cols < img.cols/2);
1028
if(cn == 1 && useIppMT)
1030
if(method == CV_TM_SQDIFF)
1032
if (ipp_sqrDistance(img, templ, result))
1037
if(ipp_crossCorr(img, templ, result))
1039
common_matchTemplate(img, templ, result, method, cn);
1050
////////////////////////////////////////////////////////////////////////////////////////////////////////
1052
void cv::matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask )
1056
cv::matchTemplateMask(_img, _templ, _result, method, _mask);
1060
int type = _img.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
1061
CV_Assert( CV_TM_SQDIFF <= method && method <= CV_TM_CCOEFF_NORMED );
1062
CV_Assert( (depth == CV_8U || depth == CV_32F) && type == _templ.type() && _img.dims() <= 2 );
1064
bool needswap = _img.size().height < _templ.size().height || _img.size().width < _templ.size().width;
1067
CV_Assert(_img.size().height <= _templ.size().height && _img.size().width <= _templ.size().width);
1070
CV_OCL_RUN(_img.dims() <= 2 && _result.isUMat(),
1071
(!needswap ? ocl_matchTemplate(_img, _templ, _result, method) : ocl_matchTemplate(_templ, _img, _result, method)))
1073
Mat img = _img.getMat(), templ = _templ.getMat();
1075
std::swap(img, templ);
1077
Size corrSize(img.cols - templ.cols + 1, img.rows - templ.rows + 1);
1078
_result.create(corrSize, CV_32F);
1079
Mat result = _result.getMat();
1081
#ifdef HAVE_TEGRA_OPTIMIZATION
1082
if (tegra::useTegra() && tegra::matchTemplate(img, templ, result, method))
1086
CV_IPP_RUN(true, ipp_matchTemplate(img, templ, result, method, cn))
1088
crossCorr( img, templ, result, result.size(), result.type(), Point(0,0), 0, 0);
1090
common_matchTemplate(img, templ, result, method, cn);
1094
cvMatchTemplate( const CvArr* _img, const CvArr* _templ, CvArr* _result, int method )
1096
cv::Mat img = cv::cvarrToMat(_img), templ = cv::cvarrToMat(_templ),
1097
result = cv::cvarrToMat(_result);
1098
CV_Assert( result.size() == cv::Size(std::abs(img.cols - templ.cols) + 1,
1099
std::abs(img.rows - templ.rows) + 1) &&
1100
result.type() == CV_32F );
1101
matchTemplate(img, templ, result, method);