1
/*M///////////////////////////////////////////////////////////////////////////////////////
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
11
// For Open Source Computer Vision Library
13
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15
// Third party copyrights are property of their respective owners.
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
20
// * Redistribution's of source code must retain the above copyright notice,
21
// this list of conditions and the following disclaimer.
23
// * Redistribution's in binary form must reproduce the above copyright notice,
24
// this list of conditions and the following disclaimer in the documentation
25
// and/or other materials provided with the distribution.
27
// * The name of the copyright holders may not be used to endorse or promote products
28
// derived from this software without specific prior written permission.
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
43
#include "precomp.hpp"
44
#include "opencl_kernels_imgproc.hpp"
50
thresh_8u( const Mat& _src, Mat& _dst, uchar thresh, uchar maxval, int type )
52
int i, j, j_scalar = 0;
54
Size roi = _src.size();
55
roi.width *= _src.channels();
56
size_t src_step = _src.step;
57
size_t dst_step = _dst.step;
59
if( _src.isContinuous() && _dst.isContinuous() )
61
roi.width *= roi.height;
63
src_step = dst_step = roi.width;
66
#ifdef HAVE_TEGRA_OPTIMIZATION
67
if (tegra::useTegra() && tegra::thresh_8u(_src, _dst, roi.width, roi.height, thresh, maxval, type))
74
IppiSize sz = { roi.width, roi.height };
75
CV_SUPPRESS_DEPRECATED_START
79
#ifndef HAVE_IPP_ICV_ONLY
80
if (_src.data == _dst.data && ippiThreshold_GT_8u_C1IR(_dst.ptr(), (int)dst_step, sz, thresh) >= 0)
82
CV_IMPL_ADD(CV_IMPL_IPP);
86
if (ippiThreshold_GT_8u_C1R(_src.ptr(), (int)src_step, _dst.ptr(), (int)dst_step, sz, thresh) >= 0)
88
CV_IMPL_ADD(CV_IMPL_IPP);
94
#ifndef HAVE_IPP_ICV_ONLY
95
if (_src.data == _dst.data && ippiThreshold_LTVal_8u_C1IR(_dst.ptr(), (int)dst_step, sz, thresh+1, 0) >= 0)
97
CV_IMPL_ADD(CV_IMPL_IPP);
101
if (ippiThreshold_LTVal_8u_C1R(_src.ptr(), (int)src_step, _dst.ptr(), (int)dst_step, sz, thresh+1, 0) >= 0)
103
CV_IMPL_ADD(CV_IMPL_IPP);
108
case THRESH_TOZERO_INV:
109
#ifndef HAVE_IPP_ICV_ONLY
110
if (_src.data == _dst.data && ippiThreshold_GTVal_8u_C1IR(_dst.ptr(), (int)dst_step, sz, thresh, 0) >= 0)
112
CV_IMPL_ADD(CV_IMPL_IPP);
116
if (ippiThreshold_GTVal_8u_C1R(_src.ptr(), (int)src_step, _dst.ptr(), (int)dst_step, sz, thresh, 0) >= 0)
118
CV_IMPL_ADD(CV_IMPL_IPP);
124
CV_SUPPRESS_DEPRECATED_END
131
for( i = 0; i <= thresh; i++ )
133
for( ; i < 256; i++ )
136
case THRESH_BINARY_INV:
137
for( i = 0; i <= thresh; i++ )
139
for( ; i < 256; i++ )
143
for( i = 0; i <= thresh; i++ )
145
for( ; i < 256; i++ )
149
for( i = 0; i <= thresh; i++ )
151
for( ; i < 256; i++ )
154
case THRESH_TOZERO_INV:
155
for( i = 0; i <= thresh; i++ )
157
for( ; i < 256; i++ )
161
CV_Error( CV_StsBadArg, "Unknown threshold type" );
165
if( checkHardwareSupport(CV_CPU_SSE2) )
167
__m128i _x80 = _mm_set1_epi8('\x80');
168
__m128i thresh_u = _mm_set1_epi8(thresh);
169
__m128i thresh_s = _mm_set1_epi8(thresh ^ 0x80);
170
__m128i maxval_ = _mm_set1_epi8(maxval);
171
j_scalar = roi.width & -8;
173
for( i = 0; i < roi.height; i++ )
175
const uchar* src = _src.ptr() + src_step*i;
176
uchar* dst = _dst.ptr() + dst_step*i;
181
for( j = 0; j <= roi.width - 32; j += 32 )
184
v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
185
v1 = _mm_loadu_si128( (const __m128i*)(src + j + 16) );
186
v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
187
v1 = _mm_cmpgt_epi8( _mm_xor_si128(v1, _x80), thresh_s );
188
v0 = _mm_and_si128( v0, maxval_ );
189
v1 = _mm_and_si128( v1, maxval_ );
190
_mm_storeu_si128( (__m128i*)(dst + j), v0 );
191
_mm_storeu_si128( (__m128i*)(dst + j + 16), v1 );
194
for( ; j <= roi.width - 8; j += 8 )
196
__m128i v0 = _mm_loadl_epi64( (const __m128i*)(src + j) );
197
v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
198
v0 = _mm_and_si128( v0, maxval_ );
199
_mm_storel_epi64( (__m128i*)(dst + j), v0 );
203
case THRESH_BINARY_INV:
204
for( j = 0; j <= roi.width - 32; j += 32 )
207
v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
208
v1 = _mm_loadu_si128( (const __m128i*)(src + j + 16) );
209
v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
210
v1 = _mm_cmpgt_epi8( _mm_xor_si128(v1, _x80), thresh_s );
211
v0 = _mm_andnot_si128( v0, maxval_ );
212
v1 = _mm_andnot_si128( v1, maxval_ );
213
_mm_storeu_si128( (__m128i*)(dst + j), v0 );
214
_mm_storeu_si128( (__m128i*)(dst + j + 16), v1 );
217
for( ; j <= roi.width - 8; j += 8 )
219
__m128i v0 = _mm_loadl_epi64( (const __m128i*)(src + j) );
220
v0 = _mm_cmpgt_epi8( _mm_xor_si128(v0, _x80), thresh_s );
221
v0 = _mm_andnot_si128( v0, maxval_ );
222
_mm_storel_epi64( (__m128i*)(dst + j), v0 );
227
for( j = 0; j <= roi.width - 32; j += 32 )
230
v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
231
v1 = _mm_loadu_si128( (const __m128i*)(src + j + 16) );
232
v0 = _mm_subs_epu8( v0, _mm_subs_epu8( v0, thresh_u ));
233
v1 = _mm_subs_epu8( v1, _mm_subs_epu8( v1, thresh_u ));
234
_mm_storeu_si128( (__m128i*)(dst + j), v0 );
235
_mm_storeu_si128( (__m128i*)(dst + j + 16), v1 );
238
for( ; j <= roi.width - 8; j += 8 )
240
__m128i v0 = _mm_loadl_epi64( (const __m128i*)(src + j) );
241
v0 = _mm_subs_epu8( v0, _mm_subs_epu8( v0, thresh_u ));
242
_mm_storel_epi64( (__m128i*)(dst + j), v0 );
247
for( j = 0; j <= roi.width - 32; j += 32 )
250
v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
251
v1 = _mm_loadu_si128( (const __m128i*)(src + j + 16) );
252
v0 = _mm_and_si128( v0, _mm_cmpgt_epi8(_mm_xor_si128(v0, _x80), thresh_s ));
253
v1 = _mm_and_si128( v1, _mm_cmpgt_epi8(_mm_xor_si128(v1, _x80), thresh_s ));
254
_mm_storeu_si128( (__m128i*)(dst + j), v0 );
255
_mm_storeu_si128( (__m128i*)(dst + j + 16), v1 );
258
for( ; j <= roi.width - 8; j += 8 )
260
__m128i v0 = _mm_loadl_epi64( (const __m128i*)(src + j) );
261
v0 = _mm_and_si128( v0, _mm_cmpgt_epi8(_mm_xor_si128(v0, _x80), thresh_s ));
262
_mm_storel_epi64( (__m128i*)(dst + j), v0 );
266
case THRESH_TOZERO_INV:
267
for( j = 0; j <= roi.width - 32; j += 32 )
270
v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
271
v1 = _mm_loadu_si128( (const __m128i*)(src + j + 16) );
272
v0 = _mm_andnot_si128( _mm_cmpgt_epi8(_mm_xor_si128(v0, _x80), thresh_s ), v0 );
273
v1 = _mm_andnot_si128( _mm_cmpgt_epi8(_mm_xor_si128(v1, _x80), thresh_s ), v1 );
274
_mm_storeu_si128( (__m128i*)(dst + j), v0 );
275
_mm_storeu_si128( (__m128i*)(dst + j + 16), v1 );
278
for( ; j <= roi.width - 8; j += 8 )
280
__m128i v0 = _mm_loadl_epi64( (const __m128i*)(src + j) );
281
v0 = _mm_andnot_si128( _mm_cmpgt_epi8(_mm_xor_si128(v0, _x80), thresh_s ), v0 );
282
_mm_storel_epi64( (__m128i*)(dst + j), v0 );
289
uint8x16_t v_thresh = vdupq_n_u8(thresh), v_maxval = vdupq_n_u8(maxval);
294
for( i = 0; i < roi.height; i++ )
296
const uchar* src = _src.ptr() + src_step*i;
297
uchar* dst = _dst.ptr() + dst_step*i;
299
for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16)
300
vst1q_u8(dst + j_scalar, vandq_u8(vcgtq_u8(vld1q_u8(src + j_scalar), v_thresh), v_maxval));
304
case THRESH_BINARY_INV:
305
for( i = 0; i < roi.height; i++ )
307
const uchar* src = _src.ptr() + src_step*i;
308
uchar* dst = _dst.ptr() + dst_step*i;
310
for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16)
311
vst1q_u8(dst + j_scalar, vandq_u8(vcleq_u8(vld1q_u8(src + j_scalar), v_thresh), v_maxval));
316
for( i = 0; i < roi.height; i++ )
318
const uchar* src = _src.ptr() + src_step*i;
319
uchar* dst = _dst.ptr() + dst_step*i;
321
for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16)
322
vst1q_u8(dst + j_scalar, vminq_u8(vld1q_u8(src + j_scalar), v_thresh));
327
for( i = 0; i < roi.height; i++ )
329
const uchar* src = _src.ptr() + src_step*i;
330
uchar* dst = _dst.ptr() + dst_step*i;
332
for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16)
334
uint8x16_t v_src = vld1q_u8(src + j_scalar), v_mask = vcgtq_u8(v_src, v_thresh);
335
vst1q_u8(dst + j_scalar, vandq_u8(v_mask, v_src));
340
case THRESH_TOZERO_INV:
341
for( i = 0; i < roi.height; i++ )
343
const uchar* src = _src.ptr() + src_step*i;
344
uchar* dst = _dst.ptr() + dst_step*i;
346
for ( j_scalar = 0; j_scalar <= roi.width - 16; j_scalar += 16)
348
uint8x16_t v_src = vld1q_u8(src + j_scalar), v_mask = vcleq_u8(v_src, v_thresh);
349
vst1q_u8(dst + j_scalar, vandq_u8(v_mask, v_src));
354
return CV_Error( CV_StsBadArg, "" );
358
if( j_scalar < roi.width )
360
for( i = 0; i < roi.height; i++ )
362
const uchar* src = _src.ptr() + src_step*i;
363
uchar* dst = _dst.ptr() + dst_step*i;
365
#if CV_ENABLE_UNROLLED
366
for( ; j <= roi.width - 4; j += 4 )
368
uchar t0 = tab[src[j]];
369
uchar t1 = tab[src[j+1]];
381
for( ; j < roi.width; j++ )
382
dst[j] = tab[src[j]];
389
thresh_16s( const Mat& _src, Mat& _dst, short thresh, short maxval, int type )
392
Size roi = _src.size();
393
roi.width *= _src.channels();
394
const short* src = _src.ptr<short>();
395
short* dst = _dst.ptr<short>();
396
size_t src_step = _src.step/sizeof(src[0]);
397
size_t dst_step = _dst.step/sizeof(dst[0]);
400
volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE);
403
if( _src.isContinuous() && _dst.isContinuous() )
405
roi.width *= roi.height;
407
src_step = dst_step = roi.width;
410
#ifdef HAVE_TEGRA_OPTIMIZATION
411
if (tegra::useTegra() && tegra::thresh_16s(_src, _dst, roi.width, roi.height, thresh, maxval, type))
415
#if defined(HAVE_IPP)
418
IppiSize sz = { roi.width, roi.height };
419
CV_SUPPRESS_DEPRECATED_START
423
#ifndef HAVE_IPP_ICV_ONLY
424
if (_src.data == _dst.data && ippiThreshold_GT_16s_C1IR(dst, (int)dst_step*sizeof(dst[0]), sz, thresh) >= 0)
426
CV_IMPL_ADD(CV_IMPL_IPP);
430
if (ippiThreshold_GT_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh) >= 0)
432
CV_IMPL_ADD(CV_IMPL_IPP);
438
#ifndef HAVE_IPP_ICV_ONLY
439
if (_src.data == _dst.data && ippiThreshold_LTVal_16s_C1IR(dst, (int)dst_step*sizeof(dst[0]), sz, thresh + 1, 0) >= 0)
441
CV_IMPL_ADD(CV_IMPL_IPP);
445
if (ippiThreshold_LTVal_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh+1, 0) >= 0)
447
CV_IMPL_ADD(CV_IMPL_IPP);
452
case THRESH_TOZERO_INV:
453
#ifndef HAVE_IPP_ICV_ONLY
454
if (_src.data == _dst.data && ippiThreshold_GTVal_16s_C1IR(dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0) >= 0)
456
CV_IMPL_ADD(CV_IMPL_IPP);
460
if (ippiThreshold_GTVal_16s_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0) >= 0)
462
CV_IMPL_ADD(CV_IMPL_IPP);
468
CV_SUPPRESS_DEPRECATED_END
475
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
481
__m128i thresh8 = _mm_set1_epi16(thresh), maxval8 = _mm_set1_epi16(maxval);
482
for( ; j <= roi.width - 16; j += 16 )
485
v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
486
v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) );
487
v0 = _mm_cmpgt_epi16( v0, thresh8 );
488
v1 = _mm_cmpgt_epi16( v1, thresh8 );
489
v0 = _mm_and_si128( v0, maxval8 );
490
v1 = _mm_and_si128( v1, maxval8 );
491
_mm_storeu_si128((__m128i*)(dst + j), v0 );
492
_mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
496
int16x8_t v_thresh = vdupq_n_s16(thresh), v_maxval = vdupq_n_s16(maxval);
498
for( ; j <= roi.width - 8; j += 8 )
500
uint16x8_t v_mask = vcgtq_s16(vld1q_s16(src + j), v_thresh);
501
vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_maxval));
505
for( ; j < roi.width; j++ )
506
dst[j] = src[j] > thresh ? maxval : 0;
510
case THRESH_BINARY_INV:
511
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
517
__m128i thresh8 = _mm_set1_epi16(thresh), maxval8 = _mm_set1_epi16(maxval);
518
for( ; j <= roi.width - 16; j += 16 )
521
v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
522
v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) );
523
v0 = _mm_cmpgt_epi16( v0, thresh8 );
524
v1 = _mm_cmpgt_epi16( v1, thresh8 );
525
v0 = _mm_andnot_si128( v0, maxval8 );
526
v1 = _mm_andnot_si128( v1, maxval8 );
527
_mm_storeu_si128((__m128i*)(dst + j), v0 );
528
_mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
532
int16x8_t v_thresh = vdupq_n_s16(thresh), v_maxval = vdupq_n_s16(maxval);
534
for( ; j <= roi.width - 8; j += 8 )
536
uint16x8_t v_mask = vcleq_s16(vld1q_s16(src + j), v_thresh);
537
vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_maxval));
541
for( ; j < roi.width; j++ )
542
dst[j] = src[j] <= thresh ? maxval : 0;
547
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
553
__m128i thresh8 = _mm_set1_epi16(thresh);
554
for( ; j <= roi.width - 16; j += 16 )
557
v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
558
v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) );
559
v0 = _mm_min_epi16( v0, thresh8 );
560
v1 = _mm_min_epi16( v1, thresh8 );
561
_mm_storeu_si128((__m128i*)(dst + j), v0 );
562
_mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
566
int16x8_t v_thresh = vdupq_n_s16(thresh);
568
for( ; j <= roi.width - 8; j += 8 )
569
vst1q_s16(dst + j, vminq_s16(vld1q_s16(src + j), v_thresh));
572
for( ; j < roi.width; j++ )
573
dst[j] = std::min(src[j], thresh);
578
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
584
__m128i thresh8 = _mm_set1_epi16(thresh);
585
for( ; j <= roi.width - 16; j += 16 )
588
v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
589
v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) );
590
v0 = _mm_and_si128(v0, _mm_cmpgt_epi16(v0, thresh8));
591
v1 = _mm_and_si128(v1, _mm_cmpgt_epi16(v1, thresh8));
592
_mm_storeu_si128((__m128i*)(dst + j), v0 );
593
_mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
597
int16x8_t v_thresh = vdupq_n_s16(thresh);
599
for( ; j <= roi.width - 8; j += 8 )
601
int16x8_t v_src = vld1q_s16(src + j);
602
uint16x8_t v_mask = vcgtq_s16(v_src, v_thresh);
603
vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_src));
607
for( ; j < roi.width; j++ )
610
dst[j] = v > thresh ? v : 0;
615
case THRESH_TOZERO_INV:
616
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
622
__m128i thresh8 = _mm_set1_epi16(thresh);
623
for( ; j <= roi.width - 16; j += 16 )
626
v0 = _mm_loadu_si128( (const __m128i*)(src + j) );
627
v1 = _mm_loadu_si128( (const __m128i*)(src + j + 8) );
628
v0 = _mm_andnot_si128(_mm_cmpgt_epi16(v0, thresh8), v0);
629
v1 = _mm_andnot_si128(_mm_cmpgt_epi16(v1, thresh8), v1);
630
_mm_storeu_si128((__m128i*)(dst + j), v0 );
631
_mm_storeu_si128((__m128i*)(dst + j + 8), v1 );
635
int16x8_t v_thresh = vdupq_n_s16(thresh);
637
for( ; j <= roi.width - 8; j += 8 )
639
int16x8_t v_src = vld1q_s16(src + j);
640
uint16x8_t v_mask = vcleq_s16(v_src, v_thresh);
641
vst1q_s16(dst + j, vandq_s16(vreinterpretq_s16_u16(v_mask), v_src));
644
for( ; j < roi.width; j++ )
647
dst[j] = v <= thresh ? v : 0;
652
return CV_Error( CV_StsBadArg, "" );
658
thresh_32f( const Mat& _src, Mat& _dst, float thresh, float maxval, int type )
661
Size roi = _src.size();
662
roi.width *= _src.channels();
663
const float* src = _src.ptr<float>();
664
float* dst = _dst.ptr<float>();
665
size_t src_step = _src.step/sizeof(src[0]);
666
size_t dst_step = _dst.step/sizeof(dst[0]);
669
volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE);
672
if( _src.isContinuous() && _dst.isContinuous() )
674
roi.width *= roi.height;
678
#ifdef HAVE_TEGRA_OPTIMIZATION
679
if (tegra::useTegra() && tegra::thresh_32f(_src, _dst, roi.width, roi.height, thresh, maxval, type))
683
#if defined(HAVE_IPP)
686
IppiSize sz = { roi.width, roi.height };
690
if (0 <= ippiThreshold_GT_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh))
692
CV_IMPL_ADD(CV_IMPL_IPP);
698
if (0 <= ippiThreshold_LTVal_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh+FLT_EPSILON, 0))
700
CV_IMPL_ADD(CV_IMPL_IPP);
705
case THRESH_TOZERO_INV:
706
if (0 <= ippiThreshold_GTVal_32f_C1R(src, (int)src_step*sizeof(src[0]), dst, (int)dst_step*sizeof(dst[0]), sz, thresh, 0))
708
CV_IMPL_ADD(CV_IMPL_IPP);
720
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
726
__m128 thresh4 = _mm_set1_ps(thresh), maxval4 = _mm_set1_ps(maxval);
727
for( ; j <= roi.width - 8; j += 8 )
730
v0 = _mm_loadu_ps( src + j );
731
v1 = _mm_loadu_ps( src + j + 4 );
732
v0 = _mm_cmpgt_ps( v0, thresh4 );
733
v1 = _mm_cmpgt_ps( v1, thresh4 );
734
v0 = _mm_and_ps( v0, maxval4 );
735
v1 = _mm_and_ps( v1, maxval4 );
736
_mm_storeu_ps( dst + j, v0 );
737
_mm_storeu_ps( dst + j + 4, v1 );
741
float32x4_t v_thresh = vdupq_n_f32(thresh);
742
uint32x4_t v_maxval = vreinterpretq_u32_f32(vdupq_n_f32(maxval));
744
for( ; j <= roi.width - 4; j += 4 )
746
float32x4_t v_src = vld1q_f32(src + j);
747
uint32x4_t v_dst = vandq_u32(vcgtq_f32(v_src, v_thresh), v_maxval);
748
vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst));
752
for( ; j < roi.width; j++ )
753
dst[j] = src[j] > thresh ? maxval : 0;
757
case THRESH_BINARY_INV:
758
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
764
__m128 thresh4 = _mm_set1_ps(thresh), maxval4 = _mm_set1_ps(maxval);
765
for( ; j <= roi.width - 8; j += 8 )
768
v0 = _mm_loadu_ps( src + j );
769
v1 = _mm_loadu_ps( src + j + 4 );
770
v0 = _mm_cmple_ps( v0, thresh4 );
771
v1 = _mm_cmple_ps( v1, thresh4 );
772
v0 = _mm_and_ps( v0, maxval4 );
773
v1 = _mm_and_ps( v1, maxval4 );
774
_mm_storeu_ps( dst + j, v0 );
775
_mm_storeu_ps( dst + j + 4, v1 );
779
float32x4_t v_thresh = vdupq_n_f32(thresh);
780
uint32x4_t v_maxval = vreinterpretq_u32_f32(vdupq_n_f32(maxval));
782
for( ; j <= roi.width - 4; j += 4 )
784
float32x4_t v_src = vld1q_f32(src + j);
785
uint32x4_t v_dst = vandq_u32(vcleq_f32(v_src, v_thresh), v_maxval);
786
vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst));
790
for( ; j < roi.width; j++ )
791
dst[j] = src[j] <= thresh ? maxval : 0;
796
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
802
__m128 thresh4 = _mm_set1_ps(thresh);
803
for( ; j <= roi.width - 8; j += 8 )
806
v0 = _mm_loadu_ps( src + j );
807
v1 = _mm_loadu_ps( src + j + 4 );
808
v0 = _mm_min_ps( v0, thresh4 );
809
v1 = _mm_min_ps( v1, thresh4 );
810
_mm_storeu_ps( dst + j, v0 );
811
_mm_storeu_ps( dst + j + 4, v1 );
815
float32x4_t v_thresh = vdupq_n_f32(thresh);
817
for( ; j <= roi.width - 4; j += 4 )
818
vst1q_f32(dst + j, vminq_f32(vld1q_f32(src + j), v_thresh));
821
for( ; j < roi.width; j++ )
822
dst[j] = std::min(src[j], thresh);
827
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
833
__m128 thresh4 = _mm_set1_ps(thresh);
834
for( ; j <= roi.width - 8; j += 8 )
837
v0 = _mm_loadu_ps( src + j );
838
v1 = _mm_loadu_ps( src + j + 4 );
839
v0 = _mm_and_ps(v0, _mm_cmpgt_ps(v0, thresh4));
840
v1 = _mm_and_ps(v1, _mm_cmpgt_ps(v1, thresh4));
841
_mm_storeu_ps( dst + j, v0 );
842
_mm_storeu_ps( dst + j + 4, v1 );
846
float32x4_t v_thresh = vdupq_n_f32(thresh);
848
for( ; j <= roi.width - 4; j += 4 )
850
float32x4_t v_src = vld1q_f32(src + j);
851
uint32x4_t v_dst = vandq_u32(vcgtq_f32(v_src, v_thresh),
852
vreinterpretq_u32_f32(v_src));
853
vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst));
857
for( ; j < roi.width; j++ )
860
dst[j] = v > thresh ? v : 0;
865
case THRESH_TOZERO_INV:
866
for( i = 0; i < roi.height; i++, src += src_step, dst += dst_step )
872
__m128 thresh4 = _mm_set1_ps(thresh);
873
for( ; j <= roi.width - 8; j += 8 )
876
v0 = _mm_loadu_ps( src + j );
877
v1 = _mm_loadu_ps( src + j + 4 );
878
v0 = _mm_and_ps(v0, _mm_cmple_ps(v0, thresh4));
879
v1 = _mm_and_ps(v1, _mm_cmple_ps(v1, thresh4));
880
_mm_storeu_ps( dst + j, v0 );
881
_mm_storeu_ps( dst + j + 4, v1 );
885
float32x4_t v_thresh = vdupq_n_f32(thresh);
887
for( ; j <= roi.width - 4; j += 4 )
889
float32x4_t v_src = vld1q_f32(src + j);
890
uint32x4_t v_dst = vandq_u32(vcleq_f32(v_src, v_thresh),
891
vreinterpretq_u32_f32(v_src));
892
vst1q_f32(dst + j, vreinterpretq_f32_u32(v_dst));
895
for( ; j < roi.width; j++ )
898
dst[j] = v <= thresh ? v : 0;
903
return CV_Error( CV_StsBadArg, "" );
908
static bool ipp_getThreshVal_Otsu_8u( const unsigned char* _src, int step, Size size, unsigned char &thresh)
910
#if IPP_VERSION_X100 >= 810 && !HAVE_ICV
912
IppiSize srcSize = { size.width, size.height };
913
CV_SUPPRESS_DEPRECATED_START
914
ippStatus = ippiComputeThreshold_Otsu_8u_C1R(_src, step, srcSize, &thresh);
915
CV_SUPPRESS_DEPRECATED_END
920
CV_UNUSED(_src); CV_UNUSED(step); CV_UNUSED(size); CV_UNUSED(thresh);
927
getThreshVal_Otsu_8u( const Mat& _src )
929
Size size = _src.size();
930
int step = (int) _src.step;
931
if( _src.isContinuous() )
933
size.width *= size.height;
939
unsigned char thresh;
940
CV_IPP_RUN(IPP_VERSION_X100 >= 810 && !HAVE_ICV, ipp_getThreshVal_Otsu_8u(_src.ptr(), step, size, thresh), thresh);
944
int i, j, h[N] = {0};
945
for( i = 0; i < size.height; i++ )
947
const uchar* src = _src.ptr() + step*i;
949
#if CV_ENABLE_UNROLLED
950
for( ; j <= size.width - 4; j += 4 )
952
int v0 = src[j], v1 = src[j+1];
954
v0 = src[j+2]; v1 = src[j+3];
958
for( ; j < size.width; j++ )
962
double mu = 0, scale = 1./(size.width*size.height);
963
for( i = 0; i < N; i++ )
964
mu += i*(double)h[i];
967
double mu1 = 0, q1 = 0;
968
double max_sigma = 0, max_val = 0;
970
for( i = 0; i < N; i++ )
972
double p_i, q2, mu2, sigma;
979
if( std::min(q1,q2) < FLT_EPSILON || std::max(q1,q2) > 1. - FLT_EPSILON )
982
mu1 = (mu1 + i*p_i)/q1;
983
mu2 = (mu - q1*mu1)/q2;
984
sigma = q1*q2*(mu1 - mu2)*(mu1 - mu2);
985
if( sigma > max_sigma )
996
getThreshVal_Triangle_8u( const Mat& _src )
998
Size size = _src.size();
999
int step = (int) _src.step;
1000
if( _src.isContinuous() )
1002
size.width *= size.height;
1008
int i, j, h[N] = {0};
1009
for( i = 0; i < size.height; i++ )
1011
const uchar* src = _src.ptr() + step*i;
1013
#if CV_ENABLE_UNROLLED
1014
for( ; j <= size.width - 4; j += 4 )
1016
int v0 = src[j], v1 = src[j+1];
1018
v0 = src[j+2]; v1 = src[j+3];
1022
for( ; j < size.width; j++ )
1026
int left_bound = 0, right_bound = 0, max_ind = 0, max = 0;
1028
bool isflipped = false;
1030
for( i = 0; i < N; i++ )
1038
if( left_bound > 0 )
1041
for( i = N-1; i > 0; i-- )
1049
if( right_bound < N-1 )
1052
for( i = 0; i < N; i++ )
1061
if( max_ind-left_bound < right_bound-max_ind)
1067
temp = h[i]; h[i] = h[j]; h[j] = temp;
1070
left_bound = N-1-right_bound;
1071
max_ind = N-1-max_ind;
1074
double thresh = left_bound;
1075
double a, b, dist = 0, tempdist;
1078
* We do not need to compute precise distance here. Distance is maximized, so some constants can
1079
* be omitted. This speeds up a computation a bit.
1081
a = max; b = left_bound-max_ind;
1082
for( i = left_bound+1; i <= max_ind; i++ )
1084
tempdist = a*i + b*h[i];
1085
if( tempdist > dist)
1094
thresh = N-1-thresh;
1099
class ThresholdRunner : public ParallelLoopBody
1102
ThresholdRunner(Mat _src, Mat _dst, double _thresh, double _maxval, int _thresholdType)
1109
thresholdType = _thresholdType;
1112
void operator () ( const Range& range ) const
1114
int row0 = range.start;
1115
int row1 = range.end;
1117
Mat srcStripe = src.rowRange(row0, row1);
1118
Mat dstStripe = dst.rowRange(row0, row1);
1120
if (srcStripe.depth() == CV_8U)
1122
thresh_8u( srcStripe, dstStripe, (uchar)thresh, (uchar)maxval, thresholdType );
1124
else if( srcStripe.depth() == CV_16S )
1126
thresh_16s( srcStripe, dstStripe, (short)thresh, (short)maxval, thresholdType );
1128
else if( srcStripe.depth() == CV_32F )
1130
thresh_32f( srcStripe, dstStripe, (float)thresh, (float)maxval, thresholdType );
1145
static bool ocl_threshold( InputArray _src, OutputArray _dst, double & thresh, double maxval, int thresh_type )
1147
int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
1148
kercn = ocl::predictOptimalVectorWidth(_src, _dst), ktype = CV_MAKE_TYPE(depth, kercn);
1149
bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
1151
if ( !(thresh_type == THRESH_BINARY || thresh_type == THRESH_BINARY_INV || thresh_type == THRESH_TRUNC ||
1152
thresh_type == THRESH_TOZERO || thresh_type == THRESH_TOZERO_INV) ||
1153
(!doubleSupport && depth == CV_64F))
1156
const char * const thresholdMap[] = { "THRESH_BINARY", "THRESH_BINARY_INV", "THRESH_TRUNC",
1157
"THRESH_TOZERO", "THRESH_TOZERO_INV" };
1158
ocl::Device dev = ocl::Device::getDefault();
1159
int stride_size = dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU) ? 4 : 1;
1161
ocl::Kernel k("threshold", ocl::imgproc::threshold_oclsrc,
1162
format("-D %s -D T=%s -D T1=%s -D STRIDE_SIZE=%d%s", thresholdMap[thresh_type],
1163
ocl::typeToStr(ktype), ocl::typeToStr(depth), stride_size,
1164
doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
1168
UMat src = _src.getUMat();
1169
_dst.create(src.size(), type);
1170
UMat dst = _dst.getUMat();
1172
if (depth <= CV_32S)
1173
thresh = cvFloor(thresh);
1175
const double min_vals[] = { 0, CHAR_MIN, 0, SHRT_MIN, INT_MIN, -FLT_MAX, -DBL_MAX, 0 };
1176
double min_val = min_vals[depth];
1178
k.args(ocl::KernelArg::ReadOnlyNoSize(src), ocl::KernelArg::WriteOnly(dst, cn, kercn),
1179
ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(thresh))),
1180
ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(maxval))),
1181
ocl::KernelArg::Constant(Mat(1, 1, depth, Scalar::all(min_val))));
1183
size_t globalsize[2] = { (size_t)dst.cols * cn / kercn, (size_t)dst.rows };
1184
globalsize[1] = (globalsize[1] + stride_size - 1) / stride_size;
1185
return k.run(2, globalsize, NULL, false);
1192
double cv::threshold( InputArray _src, OutputArray _dst, double thresh, double maxval, int type )
1194
CV_OCL_RUN_(_src.dims() <= 2 && _dst.isUMat(),
1195
ocl_threshold(_src, _dst, thresh, maxval, type), thresh)
1197
Mat src = _src.getMat();
1198
int automatic_thresh = (type & ~CV_THRESH_MASK);
1199
type &= THRESH_MASK;
1201
CV_Assert( automatic_thresh != (CV_THRESH_OTSU | CV_THRESH_TRIANGLE) );
1202
if( automatic_thresh == CV_THRESH_OTSU )
1204
CV_Assert( src.type() == CV_8UC1 );
1205
thresh = getThreshVal_Otsu_8u( src );
1207
else if( automatic_thresh == CV_THRESH_TRIANGLE )
1209
CV_Assert( src.type() == CV_8UC1 );
1210
thresh = getThreshVal_Triangle_8u( src );
1213
_dst.create( src.size(), src.type() );
1214
Mat dst = _dst.getMat();
1216
if( src.depth() == CV_8U )
1218
int ithresh = cvFloor(thresh);
1220
int imaxval = cvRound(maxval);
1221
if( type == THRESH_TRUNC )
1223
imaxval = saturate_cast<uchar>(imaxval);
1225
if( ithresh < 0 || ithresh >= 255 )
1227
if( type == THRESH_BINARY || type == THRESH_BINARY_INV ||
1228
((type == THRESH_TRUNC || type == THRESH_TOZERO_INV) && ithresh < 0) ||
1229
(type == THRESH_TOZERO && ithresh >= 255) )
1231
int v = type == THRESH_BINARY ? (ithresh >= 255 ? 0 : imaxval) :
1232
type == THRESH_BINARY_INV ? (ithresh >= 255 ? imaxval : 0) :
1233
/*type == THRESH_TRUNC ? imaxval :*/ 0;
1243
else if( src.depth() == CV_16S )
1245
int ithresh = cvFloor(thresh);
1247
int imaxval = cvRound(maxval);
1248
if( type == THRESH_TRUNC )
1250
imaxval = saturate_cast<short>(imaxval);
1252
if( ithresh < SHRT_MIN || ithresh >= SHRT_MAX )
1254
if( type == THRESH_BINARY || type == THRESH_BINARY_INV ||
1255
((type == THRESH_TRUNC || type == THRESH_TOZERO_INV) && ithresh < SHRT_MIN) ||
1256
(type == THRESH_TOZERO && ithresh >= SHRT_MAX) )
1258
int v = type == THRESH_BINARY ? (ithresh >= SHRT_MAX ? 0 : imaxval) :
1259
type == THRESH_BINARY_INV ? (ithresh >= SHRT_MAX ? imaxval : 0) :
1260
/*type == THRESH_TRUNC ? imaxval :*/ 0;
1270
else if( src.depth() == CV_32F )
1273
CV_Error( CV_StsUnsupportedFormat, "" );
1275
parallel_for_(Range(0, dst.rows),
1276
ThresholdRunner(src, dst, thresh, maxval, type),
1277
dst.total()/(double)(1<<16));
1282
void cv::adaptiveThreshold( InputArray _src, OutputArray _dst, double maxValue,
1283
int method, int type, int blockSize, double delta )
1285
Mat src = _src.getMat();
1286
CV_Assert( src.type() == CV_8UC1 );
1287
CV_Assert( blockSize % 2 == 1 && blockSize > 1 );
1288
Size size = src.size();
1290
_dst.create( size, src.type() );
1291
Mat dst = _dst.getMat();
1301
if( src.data != dst.data )
1304
if (method == ADAPTIVE_THRESH_MEAN_C)
1305
boxFilter( src, mean, src.type(), Size(blockSize, blockSize),
1306
Point(-1,-1), true, BORDER_REPLICATE );
1307
else if (method == ADAPTIVE_THRESH_GAUSSIAN_C)
1309
Mat srcfloat,meanfloat;
1310
src.convertTo(srcfloat,CV_32F);
1312
GaussianBlur(srcfloat, meanfloat, Size(blockSize, blockSize), 0, 0, BORDER_REPLICATE);
1313
meanfloat.convertTo(mean, src.type());
1316
CV_Error( CV_StsBadFlag, "Unknown/unsupported adaptive threshold method" );
1319
uchar imaxval = saturate_cast<uchar>(maxValue);
1320
int idelta = type == THRESH_BINARY ? cvCeil(delta) : cvFloor(delta);
1323
if( type == CV_THRESH_BINARY )
1324
for( i = 0; i < 768; i++ )
1325
tab[i] = (uchar)(i - 255 > -idelta ? imaxval : 0);
1326
else if( type == CV_THRESH_BINARY_INV )
1327
for( i = 0; i < 768; i++ )
1328
tab[i] = (uchar)(i - 255 <= -idelta ? imaxval : 0);
1330
CV_Error( CV_StsBadFlag, "Unknown/unsupported threshold type" );
1332
if( src.isContinuous() && mean.isContinuous() && dst.isContinuous() )
1334
size.width *= size.height;
1338
for( i = 0; i < size.height; i++ )
1340
const uchar* sdata = src.ptr(i);
1341
const uchar* mdata = mean.ptr(i);
1342
uchar* ddata = dst.ptr(i);
1344
for( j = 0; j < size.width; j++ )
1345
ddata[j] = tab[sdata[j] - mdata[j] + 255];
1350
cvThreshold( const void* srcarr, void* dstarr, double thresh, double maxval, int type )
1352
cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr), dst0 = dst;
1354
CV_Assert( src.size == dst.size && src.channels() == dst.channels() &&
1355
(src.depth() == dst.depth() || dst.depth() == CV_8U));
1357
thresh = cv::threshold( src, dst, thresh, maxval, type );
1358
if( dst0.data != dst.data )
1359
dst.convertTo( dst0, dst0.depth() );
1365
cvAdaptiveThreshold( const void *srcIm, void *dstIm, double maxValue,
1366
int method, int type, int blockSize, double delta )
1368
cv::Mat src = cv::cvarrToMat(srcIm), dst = cv::cvarrToMat(dstIm);
1369
CV_Assert( src.size == dst.size && src.type() == dst.type() );
1370
cv::adaptiveThreshold( src, dst, maxValue, method, type, blockSize, delta );