1
/*M///////////////////////////////////////////////////////////////////////////////////////
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
11
// For Open Source Computer Vision Library
13
// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
14
// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15
// Third party copyrights are property of their respective owners.
18
// Zhang Ying, zhangying913@gmail.com
19
// Niko Li, newlife20080214@gmail.com
20
// Redistribution and use in source and binary forms, with or without modification,
21
// are permitted provided that the following conditions are met:
23
// * Redistribution's of source code must retain the above copyright notice,
24
// this list of conditions and the following disclaimer.
26
// * Redistribution's in binary form must reproduce the above copyright notice,
27
// this list of conditions and the following disclaimer in the documentation
28
// and/or other materials provided with the distribution.
30
// * The name of the copyright holders may not be used to endorse or promote products
31
// derived from this software without specific prior written permission.
33
// This software is provided by the copyright holders and contributors as is and
34
// any express or implied warranties, including, but not limited to, the implied
35
// warranties of merchantability and fitness for a particular purpose are disclaimed.
36
// In no event shall the Intel Corporation or contributors be liable for any direct,
37
// indirect, incidental, special, exemplary, or consequential damages
38
// (including, but not limited to, procurement of substitute goods or services;
39
// loss of use, data, or profits; or business interruption) however caused
40
// and on any theory of liability, whether in contract, strict liability,
41
// or tort (including negligence or otherwise) arising in any way out of
42
// the use of this software, even if advised of the possibility of such damage.
48
#pragma OPENCL EXTENSION cl_amd_fp64:enable
49
#elif defined (cl_khr_fp64)
50
#pragma OPENCL EXTENSION cl_khr_fp64:enable
54
#define INTER_RESIZE_COEF_SCALE (1 << INTER_RESIZE_COEF_BITS)
55
#define CAST_BITS (INTER_RESIZE_COEF_BITS << 1)
56
#define INC(x,l) min(x+1,l-1)
61
#define loadpix(addr) *(__global const T *)(addr)
62
#define storepix(val, addr) *(__global T *)(addr) = val
63
#define TSIZE (int)sizeof(T)
65
#define loadpix(addr) vload3(0, (__global const T1 *)(addr))
66
#define storepix(val, addr) vstore3(val, 0, (__global T1 *)(addr))
67
#define TSIZE (int)sizeof(T1)*cn
70
#if defined USE_SAMPLER
73
#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).x
74
#define INTERMEDIATE_TYPE float
76
#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).xy
77
#define INTERMEDIATE_TYPE float2
79
#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z).xyz
80
#define INTERMEDIATE_TYPE float3
82
#define READ_IMAGE(X,Y,Z) read_imagef(X,Y,Z)
83
#define INTERMEDIATE_TYPE float4
86
#define __CAT(x, y) x##y
87
#define CAT(x, y) __CAT(x, y)
88
//#define INTERMEDIATE_TYPE CAT(float, cn)
92
#define RESULT_SCALE 255.0f
94
#define RESULT_SCALE 127.0f
96
#define RESULT_SCALE 65535.0f
98
#define RESULT_SCALE 32767.0f
100
#define RESULT_SCALE 1.0f
103
__kernel void resizeSampler(__read_only image2d_t srcImage,
104
__global uchar* dstptr, int dststep, int dstoffset,
105
int dstrows, int dstcols,
106
float ifx, float ify)
108
const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
109
CLK_ADDRESS_CLAMP_TO_EDGE |
112
int dx = get_global_id(0);
113
int dy = get_global_id(1);
115
float sx = ((dx+0.5f) * ifx), sy = ((dy+0.5f) * ify);
117
INTERMEDIATE_TYPE intermediate = READ_IMAGE(srcImage, sampler, (float2)(sx, sy));
120
T uval = convertToDT(round(intermediate * RESULT_SCALE));
122
T uval = convertToDT(intermediate * RESULT_SCALE);
125
if(dx < dstcols && dy < dstrows)
127
storepix(uval, dstptr + mad24(dy, dststep, dstoffset + dx*TSIZE));
131
#elif defined INTER_LINEAR_INTEGER
133
__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
134
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
135
__global const uchar * buffer)
137
int dx = get_global_id(0);
138
int dy = get_global_id(1);
140
if (dx < dst_cols && dy < dst_rows)
142
__global const int * xofs = (__global const int *)(buffer), * yofs = xofs + dst_cols;
143
__global const short * ialpha = (__global const short *)(yofs + dst_rows);
144
__global const short * ibeta = ialpha + ((dst_cols + dy) << 1);
147
int sx0 = xofs[dx], sy0 = clamp(yofs[dy], 0, src_rows - 1),
148
sy1 = clamp(yofs[dy] + 1, 0, src_rows - 1);
149
short a0 = ialpha[0], a1 = ialpha[1];
150
short b0 = ibeta[0], b1 = ibeta[1];
152
int src_index0 = mad24(sy0, src_step, mad24(sx0, TSIZE, src_offset)),
153
src_index1 = mad24(sy1, src_step, mad24(sx0, TSIZE, src_offset));
154
WT data0 = convertToWT(loadpix(srcptr + src_index0));
155
WT data1 = convertToWT(loadpix(srcptr + src_index0 + TSIZE));
156
WT data2 = convertToWT(loadpix(srcptr + src_index1));
157
WT data3 = convertToWT(loadpix(srcptr + src_index1 + TSIZE));
159
WT val = ( (((data0 * a0 + data1 * a1) >> 4) * b0) >> 16) +
160
( (((data2 * a0 + data3 * a1) >> 4) * b1) >> 16);
162
storepix(convertToDT((val + 2) >> 2),
163
dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
167
#elif defined INTER_LINEAR
169
__kernel void resizeLN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
170
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
171
float ifx, float ify)
173
int dx = get_global_id(0);
174
int dy = get_global_id(1);
176
if (dx < dst_cols && dy < dst_rows)
178
float sx = ((dx+0.5f) * ifx - 0.5f), sy = ((dy+0.5f) * ify - 0.5f);
179
int x = floor(sx), y = floor(sy);
181
float u = sx - x, v = sy - y;
184
if ( x>=src_cols ) x=src_cols-1,u=0;
186
if ( y>=src_rows ) y=src_rows-1,v=0;
188
int y_ = INC(y, src_rows);
189
int x_ = INC(x, src_cols);
192
u = u * INTER_RESIZE_COEF_SCALE;
193
v = v * INTER_RESIZE_COEF_SCALE;
197
int U1 = rint(INTER_RESIZE_COEF_SCALE - u);
198
int V1 = rint(INTER_RESIZE_COEF_SCALE - v);
200
WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))));
201
WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset))));
202
WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset))));
203
WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset))));
205
WT val = mul24((WT)mul24(U1, V1), data0) + mul24((WT)mul24(U, V1), data1) +
206
mul24((WT)mul24(U1, V), data2) + mul24((WT)mul24(U, V), data3);
208
T uval = convertToDT((val + (1<<(CAST_BITS-1)))>>CAST_BITS);
212
WT data0 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x, TSIZE, src_offset))));
213
WT data1 = convertToWT(loadpix(srcptr + mad24(y, src_step, mad24(x_, TSIZE, src_offset))));
214
WT data2 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x, TSIZE, src_offset))));
215
WT data3 = convertToWT(loadpix(srcptr + mad24(y_, src_step, mad24(x_, TSIZE, src_offset))));
217
T uval = u1 * v1 * data0 + u * v1 * data1 + u1 * v *data2 + u * v *data3;
219
storepix(uval, dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
223
#elif defined INTER_NEAREST
225
__kernel void resizeNN(__global const uchar * srcptr, int src_step, int src_offset, int src_rows, int src_cols,
226
__global uchar * dstptr, int dst_step, int dst_offset, int dst_rows, int dst_cols,
227
float ifx, float ify)
229
int dx = get_global_id(0);
230
int dy = get_global_id(1);
232
if (dx < dst_cols && dy < dst_rows)
236
int sx = min(convert_int_rtz(s1), src_cols - 1);
237
int sy = min(convert_int_rtz(s2), src_rows - 1);
239
storepix(loadpix(srcptr + mad24(sy, src_step, mad24(sx, TSIZE, src_offset))),
240
dstptr + mad24(dy, dst_step, mad24(dx, TSIZE, dst_offset)));
244
#elif defined INTER_AREA
246
#ifdef INTER_AREA_FAST
248
__kernel void resizeAREA_FAST(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
249
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols)
251
int dx = get_global_id(0);
252
int dy = get_global_id(1);
254
if (dx < dst_cols && dy < dst_rows)
256
int dst_index = mad24(dy, dst_step, dst_offset);
258
int sx = XSCALE * dx;
259
int sy = YSCALE * dy;
263
for (int py = 0; py < YSCALE; ++py)
265
int y = min(sy + py, src_rows - 1);
266
int src_index = mad24(y, src_step, src_offset);
268
for (int px = 0; px < XSCALE; ++px)
270
int x = min(sx + px, src_cols - 1);
271
sum += convertToWTV(loadpix(src + src_index + x*TSIZE));
275
storepix(convertToT(convertToWT2V(sum) * (WT2V)(SCALE)), dst + mad24(dx, TSIZE, dst_index));
281
__kernel void resizeAREA(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols,
282
__global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols,
283
float ifx, float ify, __global const int * ofs_tab,
284
__global const int * map_tab, __global const float * alpha_tab)
286
int dx = get_global_id(0);
287
int dy = get_global_id(1);
289
if (dx < dst_cols && dy < dst_rows)
291
int dst_index = mad24(dy, dst_step, dst_offset);
293
__global const int * xmap_tab = map_tab;
294
__global const int * ymap_tab = (__global const int *)(map_tab + (src_cols << 1));
295
__global const float * xalpha_tab = alpha_tab;
296
__global const float * yalpha_tab = (__global const float *)(alpha_tab + (src_cols << 1));
297
__global const int * xofs_tab = ofs_tab;
298
__global const int * yofs_tab = (__global const int *)(ofs_tab + dst_cols + 1);
300
int xk0 = xofs_tab[dx], xk1 = xofs_tab[dx + 1];
301
int yk0 = yofs_tab[dy], yk1 = yofs_tab[dy + 1];
303
int sy0 = ymap_tab[yk0], sy1 = ymap_tab[yk1 - 1];
304
int sx0 = xmap_tab[xk0], sx1 = xmap_tab[xk1 - 1];
306
WTV sum = (WTV)(0), buf;
307
int src_index = mad24(sy0, src_step, src_offset);
309
for (int sy = sy0, yk = yk0; sy <= sy1; ++sy, src_index += src_step, ++yk)
311
WTV beta = (WTV)(yalpha_tab[yk]);
314
for (int sx = sx0, xk = xk0; sx <= sx1; ++sx, ++xk)
316
WTV alpha = (WTV)(xalpha_tab[xk]);
317
buf += convertToWTV(loadpix(src + mad24(sx, TSIZE, src_index))) * alpha;
322
storepix(convertToT(sum), dst + mad24(dx, TSIZE, dst_index));