1
/*M///////////////////////////////////////////////////////////////////////////////////////
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
11
// For Open Source Computer Vision Library
13
// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
14
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15
// Third party copyrights are property of their respective owners.
18
// Peng Xiao, pengxiao@multicorewareinc.com
20
// Redistribution and use in source and binary forms, with or without modification,
21
// are permitted provided that the following conditions are met:
23
// * Redistribution's of source code must retain the above copyright notice,
24
// this list of conditions and the following disclaimer.
26
// * Redistribution's in binary form must reproduce the above copyright notice,
27
// this list of conditions and the following disclaimer in the documentation
28
// and/or other materials provided with the distribution.
30
// * The name of the copyright holders may not be used to endorse or promote products
31
// derived from this software without specific prior written permission.
33
// This software is provided by the copyright holders and contributors as is and
34
// any express or implied warranties, including, but not limited to, the implied
35
// warranties of merchantability and fitness for a particular purpose are disclaimed.
36
// In no event shall the Intel Corporation or contributors be liable for any direct,
37
// indirect, incidental, special, exemplary, or consequential damages
38
// (including, but not limited to, procurement of substitute goods or services;
39
// loss of use, data, or profits; or business interruption) however caused
40
// and on any theory of liability, whether in contract, strict liability,
41
// or tort (including negligence or otherwise) arising in any way out of
42
// the use of this software, even if advised of the possibility of such damage.
46
#define TG22 0.4142135623730950488016887242097f
47
#define TG67 2.4142135623730950488016887242097f
52
#define loadpix(addr) convert_floatN(*(__global const TYPE *)(addr))
54
#define loadpix(addr) convert_floatN(vload3(0, (__global const TYPE *)(addr)))
56
#define storepix(value, addr) *(__global int *)(addr) = (int)(value)
62
Non maxima suppression
66
__constant int prev[4][2] = {
73
__constant int next[4][2] = {
80
inline float3 sobel(int idx, __local const floatN *smem)
85
floatN dx = fma(2, smem[idx + GRP_SIZEX + 6] - smem[idx + GRP_SIZEX + 4],
86
smem[idx + 2] - smem[idx] + smem[idx + 2 * GRP_SIZEX + 10] - smem[idx + 2 * GRP_SIZEX + 8]);
88
floatN dy = fma(2, smem[idx + 1] - smem[idx + 2 * GRP_SIZEX + 9],
89
smem[idx + 2] - smem[idx + 2 * GRP_SIZEX + 10] + smem[idx] - smem[idx + 2 * GRP_SIZEX + 8]);
92
floatN magN = fma(dx, dx, dy * dy);
94
floatN magN = fabs(dx) + fabs(dy);
101
res.z = max(magN.x, max(magN.y, magN.z));
107
else if (res.z == magN.z)
119
__kernel void stage1_with_sobel(__global const uchar *src, int src_step, int src_offset, int rows, int cols,
120
__global uchar *map, int map_step, int map_offset,
121
float low_thr, float high_thr)
123
__local floatN smem[(GRP_SIZEX + 4) * (GRP_SIZEY + 4)];
125
int lidx = get_local_id(0);
126
int lidy = get_local_id(1);
128
int start_x = GRP_SIZEX * get_group_id(0);
129
int start_y = GRP_SIZEY * get_group_id(1);
131
int i = lidx + lidy * GRP_SIZEX;
132
for (int j = i; j < (GRP_SIZEX + 4) * (GRP_SIZEY + 4); j += GRP_SIZEX * GRP_SIZEY)
134
int x = clamp(start_x - 2 + (j % (GRP_SIZEX + 4)), 0, cols - 1);
135
int y = clamp(start_y - 2 + (j / (GRP_SIZEX + 4)), 0, rows - 1);
136
smem[j] = loadpix(src + mad24(y, src_step, mad24(x, cn * (int)sizeof(TYPE), src_offset)));
139
barrier(CLK_LOCAL_MEM_FENCE);
141
//// Sobel, Magnitude
144
__local float mag[(GRP_SIZEX + 2) * (GRP_SIZEY + 2)];
149
if (i < GRP_SIZEX + 2)
151
int grp_sizey = min(GRP_SIZEY + 1, rows - start_y);
152
mag[i] = (sobel(i, smem)).z;
153
mag[i + grp_sizey * (GRP_SIZEX + 2)] = (sobel(i + grp_sizey * (GRP_SIZEX + 4), smem)).z;
155
if (i < GRP_SIZEY + 2)
157
int grp_sizex = min(GRP_SIZEX + 1, cols - start_x);
158
mag[i * (GRP_SIZEX + 2)] = (sobel(i * (GRP_SIZEX + 4), smem)).z;
159
mag[i * (GRP_SIZEX + 2) + grp_sizex] = (sobel(i * (GRP_SIZEX + 4) + grp_sizex, smem)).z;
162
int idx = lidx + lidy * (GRP_SIZEX + 4);
163
i = lidx + lidy * (GRP_SIZEX + 2);
165
float3 res = sobel(idx, smem);
167
barrier(CLK_LOCAL_MEM_FENCE);
172
//// Threshold + Non maxima suppression
186
We need to determine arctg(dy / dx) to one of the four directions: 0, 45, 90 or 135 degrees.
187
Therefore if abs(dy / dx) belongs to the interval
188
[0, tg(22.5)] -> 0 direction
189
[tg(22.5), tg(67.5)] -> 1 or 3
192
Since tg(67.5) = 1 / tg(22.5), if we take
193
a = abs(dy / dx) * tg(22.5) and b = abs(dy / dx) * tg(67.5)
194
we can get another intervals
198
[tg(22.5)^2, 1] -> 1, 3
203
[1, tg(67.5)^2] -> 1,3
204
[tg(67.5)^2, +oo) -> 2
206
that can help to find direction without conditions.
208
0 - might belong to an edge
209
1 - pixel doesn't belong to an edge
210
2 - belong to an edge
213
int gidx = get_global_id(0);
214
int gidy = get_global_id(1);
216
if (gidx >= cols || gidy >= rows)
224
int a = (y / (float)x) * TG22;
225
int b = (y / (float)x) * TG67;
227
a = min((int)abs(a), 1) + 1;
228
b = min((int)abs(b), 1);
232
// a * b = { 0, 1, 2 } - directions that we need ( + 3 if x ^ y < 0)
234
int dir3 = (a * b) & (((x ^ y) & 0x80000000) >> 31); // if a = 1, b = 1, dy ^ dx < 0
235
int dir = a * b + 2 * dir3;
236
float prev_mag = mag[(lidy + prev[dir][0]) * (GRP_SIZEX + 2) + lidx + prev[dir][1]];
237
float next_mag = mag[(lidy + next[dir][0]) * (GRP_SIZEX + 2) + lidx + next[dir][1]] + (dir & 1);
239
if (mag0 > prev_mag && mag0 >= next_mag)
241
value = (mag0 > high_thr) ? 2 : 0;
245
storepix(value, map + mad24(gidy, map_step, mad24(gidx, (int)sizeof(int), map_offset)));
248
#elif defined WITHOUT_SOBEL
251
stage1_without_sobel:
253
Non maxima suppression
257
#define loadpix(addr) (__global short *)(addr)
258
#define storepix(val, addr) *(__global int *)(addr) = (int)(val)
261
#define dist(x, y) ((int)(x) * (x) + (int)(y) * (y))
263
#define dist(x, y) (abs(x) + abs(y))
266
__constant int prev[4][2] = {
273
__constant int next[4][2] = {
280
__kernel void stage1_without_sobel(__global const uchar *dxptr, int dx_step, int dx_offset,
281
__global const uchar *dyptr, int dy_step, int dy_offset,
282
__global uchar *map, int map_step, int map_offset, int rows, int cols,
283
int low_thr, int high_thr)
285
int start_x = get_group_id(0) * GRP_SIZEX;
286
int start_y = get_group_id(1) * GRP_SIZEY;
288
int lidx = get_local_id(0);
289
int lidy = get_local_id(1);
291
__local int mag[(GRP_SIZEX + 2) * (GRP_SIZEY + 2)];
292
__local short2 sigma[(GRP_SIZEX + 2) * (GRP_SIZEY + 2)];
295
for (int i = lidx + lidy * GRP_SIZEX; i < (GRP_SIZEX + 2) * (GRP_SIZEY + 2); i += GRP_SIZEX * GRP_SIZEY)
297
int x = clamp(start_x - 1 + i % (GRP_SIZEX + 2), 0, cols - 1);
298
int y = clamp(start_y - 1 + i / (GRP_SIZEX + 2), 0, rows - 1);
300
int dx_index = mad24(y, dx_step, mad24(x, cn * (int)sizeof(short), dx_offset));
301
int dy_index = mad24(y, dy_step, mad24(x, cn * (int)sizeof(short), dy_offset));
303
__global short *dx = loadpix(dxptr + dx_index);
304
__global short *dy = loadpix(dyptr + dy_index);
306
int mag0 = dist(dx[0], dy[0]);
308
short cdx = dx[0], cdy = dy[0];
310
for (int j = 1; j < cn; ++j)
312
int mag1 = dist(dx[j], dy[j]);
324
sigma[i] = (short2)(dx[0], dy[0]);
327
barrier(CLK_LOCAL_MEM_FENCE);
329
int gidx = get_global_id(0);
330
int gidy = get_global_id(1);
332
if (gidx >= cols || gidy >= rows)
338
int mag0 = mag[lidx + lidy * (GRP_SIZEX + 2)];
339
short x = (sigma[lidx + lidy * (GRP_SIZEX + 2)]).x;
340
short y = (sigma[lidx + lidy * (GRP_SIZEX + 2)]).y;
345
int a = (y / (float)x) * TG22;
346
int b = (y / (float)x) * TG67;
348
a = min((int)abs(a), 1) + 1;
349
b = min((int)abs(b), 1);
351
int dir3 = (a * b) & (((x ^ y) & 0x80000000) >> 31);
352
int dir = a * b + 2 * dir3;
353
int prev_mag = mag[(lidy + prev[dir][0]) * (GRP_SIZEX + 2) + lidx + prev[dir][1]];
354
int next_mag = mag[(lidy + next[dir][0]) * (GRP_SIZEX + 2) + lidx + next[dir][1]] + (dir & 1);
356
if (mag0 > prev_mag && mag0 >= next_mag)
358
value = (mag0 > high_thr) ? 2 : 0;
362
storepix(value, map + mad24(gidy, map_step, mad24(gidx, (int)sizeof(int), map_offset)));
371
hysteresis (add edges labeled 0 if they are connected with an edge labeled 2)
374
#define loadpix(addr) *(__global int *)(addr)
375
#define storepix(val, addr) *(__global int *)(addr) = (int)(val)
376
#define LOCAL_TOTAL (LOCAL_X*LOCAL_Y)
377
#define l_stack_size (4*LOCAL_TOTAL)
378
#define p_stack_size 8
380
__constant short move_dir[2][8] = {
381
{ -1, -1, -1, 0, 0, 1, 1, 1 },
382
{ -1, 0, 1, -1, 1, -1, 0, 1 }
385
__kernel void stage2_hysteresis(__global uchar *map_ptr, int map_step, int map_offset, int rows, int cols)
387
map_ptr += map_offset;
389
int x = get_global_id(0);
390
int y = get_global_id(1) * PIX_PER_WI;
392
int lid = get_local_id(0) + get_local_id(1) * LOCAL_X;
394
__local ushort2 l_stack[l_stack_size];
395
__local int l_counter;
399
barrier(CLK_LOCAL_MEM_FENCE);
403
__global uchar* map = map_ptr + mad24(y, map_step, x * (int)sizeof(int));
406
for (int cy = 0; cy < PIX_PER_WI; ++cy)
410
int type = loadpix(map);
413
l_stack[atomic_inc(&l_counter)] = (ushort2)(x, y);
421
barrier(CLK_LOCAL_MEM_FENCE);
423
ushort2 p_stack[p_stack_size];
426
while(l_counter != 0)
428
int mod = l_counter % LOCAL_TOTAL;
429
int pix_per_thr = l_counter / LOCAL_TOTAL + ((lid < mod) ? 1 : 0);
431
for (int i = 0; i < pix_per_thr; ++i)
433
int index = atomic_dec(&l_counter) - 1;
436
ushort2 pos = l_stack[ index ];
439
for (int j = 0; j < 8; ++j)
441
ushort posx = pos.x + move_dir[0][j];
442
ushort posy = pos.y + move_dir[1][j];
443
if (posx < 0 || posy < 0 || posx >= cols || posy >= rows)
445
__global uchar *addr = map_ptr + mad24(posy, map_step, posx * (int)sizeof(int));
446
int type = loadpix(addr);
449
p_stack[p_counter++] = (ushort2)(posx, posy);
454
barrier(CLK_LOCAL_MEM_FENCE);
457
barrier(CLK_LOCAL_MEM_FENCE);
459
while (p_counter > 0)
461
l_stack[ atomic_inc(&l_counter) ] = p_stack[--p_counter];
463
barrier(CLK_LOCAL_MEM_FENCE);
467
#elif defined GET_EDGES
469
// Get the edge result. egde type of value 2 will be marked as an edge point and set to 255. Otherwise 0.
470
// map edge type mappings
473
__kernel void getEdges(__global const uchar *mapptr, int map_step, int map_offset, int rows, int cols,
474
__global uchar *dst, int dst_step, int dst_offset)
476
int x = get_global_id(0);
477
int y = get_global_id(1) * PIX_PER_WI;
481
int map_index = mad24(map_step, y, mad24(x, (int)sizeof(int), map_offset));
482
int dst_index = mad24(dst_step, y, x + dst_offset);
485
for (int cy = 0; cy < PIX_PER_WI; ++cy)
489
__global const int * map = (__global const int *)(mapptr + map_index);
490
dst[dst_index] = (uchar)(-(map[0] >> 1));
493
map_index += map_step;
494
dst_index += dst_step;