1
// This file is part of OpenCV project.
2
// It is subject to the license terms in the LICENSE file found in the top-level directory
3
// of this distribution and at http://opencv.org/license.html.
5
// Copyright (C) 2014, Itseez, Inc., all rights reserved.
6
// Third party copyrights are property of their respective owners.
10
#pragma OPENCL EXTENSION cl_amd_fp64:enable
11
#elif defined (cl_khr_fp64)
12
#pragma OPENCL EXTENSION cl_khr_fp64:enable
16
static inline int align(int pos)
18
return (pos + (MINMAX_STRUCT_ALIGNMENT - 1)) & (~(MINMAX_STRUCT_ALIGNMENT - 1));
23
#define MAX_VAL UCHAR_MAX
25
#define MIN_VAL SCHAR_MIN
26
#define MAX_VAL SCHAR_MAX
29
#define MAX_VAL USHRT_MAX
31
#define MIN_VAL SHRT_MIN
32
#define MAX_VAL SHRT_MAX
34
#define MIN_VAL INT_MIN
35
#define MAX_VAL INT_MAX
37
#define MIN_VAL (-FLT_MAX)
38
#define MAX_VAL FLT_MAX
40
#define MIN_VAL (-DBL_MAX)
41
#define MAX_VAL DBL_MAX
45
#define INDEX_MAX UINT_MAX
48
#define MIN_ABS(a) convertFromU(abs(a))
49
#define MIN_ABS2(a, b) convertFromU(abs_diff(a, b))
50
#define MIN(a, b) min(a, b)
51
#define MAX(a, b) max(a, b)
53
#define MIN_ABS(a) fabs(a)
54
#define MIN_ABS2(a, b) fabs(a - b)
55
#define MIN(a, b) fmin(a, b)
56
#define MAX(a, b) fmax(a, b)
60
#define loadpix(addr) *(__global const srcT *)(addr)
61
#define srcTSIZE (int)sizeof(srcT)
63
#define loadpix(addr) vload3(0, (__global const srcT1 *)(addr))
64
#define srcTSIZE ((int)sizeof(srcT1) * 3)
69
#define srcTSIZE (int)sizeof(srcT1)
74
#define CALC_MIN(p, inc) \
75
if (minval > temp.p) \
81
#define CALC_MIN(p, inc) \
82
minval = MIN(minval, temp.p);
85
#define CALC_MIN(p, inc)
90
#define CALC_MAX(p, inc) \
91
if (maxval < temp.p) \
97
#define CALC_MAX(p, inc) \
98
maxval = MAX(maxval, temp.p);
101
#define CALC_MAX(p, inc)
105
#define CALC_MAX2(p) \
106
maxval2 = MAX(maxval2, temp2.p);
111
#define CALC_P(p, inc) \
116
__kernel void minmaxloc(__global const uchar * srcptr, int src_step, int src_offset, int cols,
117
int total, int groupnum, __global uchar * dstptr
119
, __global const uchar * mask, int mask_step, int mask_offset
122
, __global const uchar * src2ptr, int src2_step, int src2_offset
126
int lid = get_local_id(0);
127
int gid = get_group_id(0);
128
int id = get_global_id(0)
135
srcptr += src_offset;
140
src2ptr += src2_offset;
144
__local dstT1 localmem_min[WGS2_ALIGNED];
145
dstT1 minval = MAX_VAL;
147
__local uint localmem_minloc[WGS2_ALIGNED];
148
uint minloc = INDEX_MAX;
152
dstT1 maxval = MIN_VAL;
153
__local dstT1 localmem_max[WGS2_ALIGNED];
155
__local uint localmem_maxloc[WGS2_ALIGNED];
156
uint maxloc = INDEX_MAX;
160
__local dstT1 localmem_max2[WGS2_ALIGNED];
161
dstT1 maxval2 = MIN_VAL;
177
for (int grain = groupnum * WGS
181
; id < total; id += grain)
184
#ifdef HAVE_MASK_CONT
187
mask_index = mad24(id / cols, mask_step, id % cols);
189
if (mask[mask_index])
193
src_index = id * srcTSIZE;//mul24(id, srcTSIZE);
195
src_index = mad24(id / cols, src_step, mul24(id % cols, srcTSIZE));
197
temp = convertToDT(loadpix(srcptr + src_index));
199
temp = MIN_ABS(temp);
203
#ifdef HAVE_SRC2_CONT
204
src2_index = id * srcTSIZE; //mul24(id, srcTSIZE);
206
src2_index = mad24(id / cols, src2_step, mul24(id % cols, srcTSIZE));
208
temp2 = convertToDT(loadpix(src2ptr + src2_index));
209
temp = MIN_ABS2(temp, temp2);
211
temp2 = MIN_ABS(temp2);
224
minval = MIN(minval, temp);
235
maxval = MAX(maxval, temp);
238
maxval2 = MAX(maxval2, temp2);
270
if (lid < WGS2_ALIGNED)
273
localmem_min[lid] = minval;
276
localmem_max[lid] = maxval;
279
localmem_minloc[lid] = minloc;
282
localmem_maxloc[lid] = maxloc;
285
localmem_max2[lid] = maxval2;
288
barrier(CLK_LOCAL_MEM_FENCE);
290
if (lid >= WGS2_ALIGNED && total >= WGS2_ALIGNED)
292
int lid3 = lid - WGS2_ALIGNED;
295
if (localmem_min[lid3] >= minval)
297
if (localmem_min[lid3] == minval)
298
localmem_minloc[lid3] = min(localmem_minloc[lid3], minloc);
300
localmem_minloc[lid3] = minloc,
301
localmem_min[lid3] = minval;
304
localmem_min[lid3] = MIN(localmem_min[lid3], minval);
309
if (localmem_max[lid3] <= maxval)
311
if (localmem_max[lid3] == maxval)
312
localmem_maxloc[lid3] = min(localmem_maxloc[lid3], maxloc);
314
localmem_maxloc[lid3] = maxloc,
315
localmem_max[lid3] = maxval;
318
localmem_max[lid3] = MAX(localmem_max[lid3], maxval);
322
localmem_max2[lid3] = MAX(localmem_max2[lid3], maxval2);
325
barrier(CLK_LOCAL_MEM_FENCE);
327
for (int lsize = WGS2_ALIGNED >> 1; lsize > 0; lsize >>= 1)
331
int lid2 = lsize + lid;
335
if (localmem_min[lid] >= localmem_min[lid2])
337
if (localmem_min[lid] == localmem_min[lid2])
338
localmem_minloc[lid] = min(localmem_minloc[lid2], localmem_minloc[lid]);
340
localmem_minloc[lid] = localmem_minloc[lid2],
341
localmem_min[lid] = localmem_min[lid2];
344
localmem_min[lid] = MIN(localmem_min[lid], localmem_min[lid2]);
349
if (localmem_max[lid] <= localmem_max[lid2])
351
if (localmem_max[lid] == localmem_max[lid2])
352
localmem_maxloc[lid] = min(localmem_maxloc[lid2], localmem_maxloc[lid]);
354
localmem_maxloc[lid] = localmem_maxloc[lid2],
355
localmem_max[lid] = localmem_max[lid2];
358
localmem_max[lid] = MAX(localmem_max[lid], localmem_max[lid2]);
362
localmem_max2[lid] = MAX(localmem_max2[lid], localmem_max2[lid2]);
365
barrier(CLK_LOCAL_MEM_FENCE);
372
*(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_min[0];
373
pos = mad24(groupnum, (int)sizeof(dstT1), pos);
377
*(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_max[0];
378
pos = mad24(groupnum, (int)sizeof(dstT1), pos);
382
*(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_minloc[0];
383
pos = mad24(groupnum, (int)sizeof(uint), pos);
387
*(__global uint *)(dstptr + mad24(gid, (int)sizeof(uint), pos)) = localmem_maxloc[0];
389
pos = mad24(groupnum, (int)sizeof(uint), pos);
394
*(__global dstT1 *)(dstptr + mad24(gid, (int)sizeof(dstT1), pos)) = localmem_max2[0];