1
/*M///////////////////////////////////////////////////////////////////////////////////////
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
11
// For Open Source Computer Vision Library
13
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15
// Third party copyrights are property of their respective owners.
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
20
// * Redistribution's of source code must retain the above copyright notice,
21
// this list of conditions and the following disclaimer.
23
// * Redistribution's in binary form must reproduce the above copyright notice,
24
// this list of conditions and the following disclaimer in the documentation
25
// and/or other materials provided with the distribution.
27
// * The name of the copyright holders may not be used to endorse or promote products
28
// derived from this software without specific prior written permission.
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
43
#if !defined CUDA_DISABLER
45
#include "opencv2/core/cuda/common.hpp"
46
#include "opencv2/core/cuda/vec_traits.hpp"
47
#include "opencv2/core/cuda/vec_math.hpp"
48
#include "opencv2/core/cuda/limits.hpp"
49
#include "opencv2/core/cuda/color.hpp"
50
#include "opencv2/core/cuda/saturate_cast.hpp"
52
namespace cv { namespace cuda { namespace device
54
template <typename T> struct Bayer2BGR;
56
template <> struct Bayer2BGR<uchar>
63
__device__ void apply(const PtrStepSzb& src, int s_x, int s_y, bool blue_last, bool start_with_green)
66
patch[0][1] = ((const uchar4*) src.ptr(s_y - 1))[s_x];
67
patch[0][0] = ((const uchar4*) src.ptr(s_y - 1))[::max(s_x - 1, 0)];
68
patch[0][2] = ((const uchar4*) src.ptr(s_y - 1))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
70
patch[1][1] = ((const uchar4*) src.ptr(s_y))[s_x];
71
patch[1][0] = ((const uchar4*) src.ptr(s_y))[::max(s_x - 1, 0)];
72
patch[1][2] = ((const uchar4*) src.ptr(s_y))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
74
patch[2][1] = ((const uchar4*) src.ptr(s_y + 1))[s_x];
75
patch[2][0] = ((const uchar4*) src.ptr(s_y + 1))[::max(s_x - 1, 0)];
76
patch[2][2] = ((const uchar4*) src.ptr(s_y + 1))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
78
if ((s_y & 1) ^ start_with_green)
80
const int t0 = (patch[0][1].x + patch[2][1].x + 1) >> 1;
81
const int t1 = (patch[1][0].w + patch[1][1].y + 1) >> 1;
83
const int t2 = (patch[0][1].x + patch[0][1].z + patch[2][1].x + patch[2][1].z + 2) >> 2;
84
const int t3 = (patch[0][1].y + patch[1][1].x + patch[1][1].z + patch[2][1].y + 2) >> 2;
86
const int t4 = (patch[0][1].z + patch[2][1].z + 1) >> 1;
87
const int t5 = (patch[1][1].y + patch[1][1].w + 1) >> 1;
89
const int t6 = (patch[0][1].z + patch[0][2].x + patch[2][1].z + patch[2][2].x + 2) >> 2;
90
const int t7 = (patch[0][1].w + patch[1][1].z + patch[1][2].x + patch[2][1].w + 2) >> 2;
92
if ((s_y & 1) ^ blue_last)
95
res0.y = patch[1][1].x;
98
res1.x = patch[1][1].y;
103
res2.y = patch[1][1].z;
106
res3.x = patch[1][1].w;
113
res0.y = patch[1][1].x;
118
res1.z = patch[1][1].y;
121
res2.y = patch[1][1].z;
126
res3.z = patch[1][1].w;
131
const int t0 = (patch[0][0].w + patch[0][1].y + patch[2][0].w + patch[2][1].y + 2) >> 2;
132
const int t1 = (patch[0][1].x + patch[1][0].w + patch[1][1].y + patch[2][1].x + 2) >> 2;
134
const int t2 = (patch[0][1].y + patch[2][1].y + 1) >> 1;
135
const int t3 = (patch[1][1].x + patch[1][1].z + 1) >> 1;
137
const int t4 = (patch[0][1].y + patch[0][1].w + patch[2][1].y + patch[2][1].w + 2) >> 2;
138
const int t5 = (patch[0][1].z + patch[1][1].y + patch[1][1].w + patch[2][1].z + 2) >> 2;
140
const int t6 = (patch[0][1].w + patch[2][1].w + 1) >> 1;
141
const int t7 = (patch[1][1].z + patch[1][2].x + 1) >> 1;
143
if ((s_y & 1) ^ blue_last)
145
res0.x = patch[1][1].x;
150
res1.y = patch[1][1].y;
153
res2.x = patch[1][1].z;
158
res3.y = patch[1][1].w;
165
res0.z = patch[1][1].x;
168
res1.y = patch[1][1].y;
173
res2.z = patch[1][1].z;
176
res3.y = patch[1][1].w;
183
template <typename D> __device__ __forceinline__ D toDst(const uchar3& pix);
184
template <> __device__ __forceinline__ uchar toDst<uchar>(const uchar3& pix)
186
typename bgr_to_gray_traits<uchar>::functor_type f = bgr_to_gray_traits<uchar>::create_functor();
189
template <> __device__ __forceinline__ uchar3 toDst<uchar3>(const uchar3& pix)
193
template <> __device__ __forceinline__ uchar4 toDst<uchar4>(const uchar3& pix)
195
return make_uchar4(pix.x, pix.y, pix.z, 255);
198
template <typename D>
199
__global__ void Bayer2BGR_8u(const PtrStepSzb src, PtrStep<D> dst, const bool blue_last, const bool start_with_green)
201
const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
202
int s_y = blockIdx.y * blockDim.y + threadIdx.y;
204
if (s_y >= src.rows || (s_x << 2) >= src.cols)
207
s_y = ::min(::max(s_y, 1), src.rows - 2);
209
Bayer2BGR<uchar> bayer;
210
bayer.apply(src, s_x, s_y, blue_last, start_with_green);
212
const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;
213
const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
215
dst(d_y, d_x) = toDst<D>(bayer.res0);
216
if (d_x + 1 < src.cols)
217
dst(d_y, d_x + 1) = toDst<D>(bayer.res1);
218
if (d_x + 2 < src.cols)
219
dst(d_y, d_x + 2) = toDst<D>(bayer.res2);
220
if (d_x + 3 < src.cols)
221
dst(d_y, d_x + 3) = toDst<D>(bayer.res3);
224
template <> struct Bayer2BGR<ushort>
229
__device__ void apply(const PtrStepSzb& src, int s_x, int s_y, bool blue_last, bool start_with_green)
232
patch[0][1] = ((const ushort2*) src.ptr(s_y - 1))[s_x];
233
patch[0][0] = ((const ushort2*) src.ptr(s_y - 1))[::max(s_x - 1, 0)];
234
patch[0][2] = ((const ushort2*) src.ptr(s_y - 1))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
236
patch[1][1] = ((const ushort2*) src.ptr(s_y))[s_x];
237
patch[1][0] = ((const ushort2*) src.ptr(s_y))[::max(s_x - 1, 0)];
238
patch[1][2] = ((const ushort2*) src.ptr(s_y))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
240
patch[2][1] = ((const ushort2*) src.ptr(s_y + 1))[s_x];
241
patch[2][0] = ((const ushort2*) src.ptr(s_y + 1))[::max(s_x - 1, 0)];
242
patch[2][2] = ((const ushort2*) src.ptr(s_y + 1))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
244
if ((s_y & 1) ^ start_with_green)
246
const int t0 = (patch[0][1].x + patch[2][1].x + 1) >> 1;
247
const int t1 = (patch[1][0].y + patch[1][1].y + 1) >> 1;
249
const int t2 = (patch[0][1].x + patch[0][2].x + patch[2][1].x + patch[2][2].x + 2) >> 2;
250
const int t3 = (patch[0][1].y + patch[1][1].x + patch[1][2].x + patch[2][1].y + 2) >> 2;
252
if ((s_y & 1) ^ blue_last)
255
res0.y = patch[1][1].x;
258
res1.x = patch[1][1].y;
265
res0.y = patch[1][1].x;
270
res1.z = patch[1][1].y;
275
const int t0 = (patch[0][0].y + patch[0][1].y + patch[2][0].y + patch[2][1].y + 2) >> 2;
276
const int t1 = (patch[0][1].x + patch[1][0].y + patch[1][1].y + patch[2][1].x + 2) >> 2;
278
const int t2 = (patch[0][1].y + patch[2][1].y + 1) >> 1;
279
const int t3 = (patch[1][1].x + patch[1][2].x + 1) >> 1;
281
if ((s_y & 1) ^ blue_last)
283
res0.x = patch[1][1].x;
288
res1.y = patch[1][1].y;
295
res0.z = patch[1][1].x;
298
res1.y = patch[1][1].y;
305
template <typename D> __device__ __forceinline__ D toDst(const ushort3& pix);
306
template <> __device__ __forceinline__ ushort toDst<ushort>(const ushort3& pix)
308
typename bgr_to_gray_traits<ushort>::functor_type f = bgr_to_gray_traits<ushort>::create_functor();
311
template <> __device__ __forceinline__ ushort3 toDst<ushort3>(const ushort3& pix)
315
template <> __device__ __forceinline__ ushort4 toDst<ushort4>(const ushort3& pix)
317
return make_ushort4(pix.x, pix.y, pix.z, numeric_limits<ushort>::max());
320
template <typename D>
321
__global__ void Bayer2BGR_16u(const PtrStepSzb src, PtrStep<D> dst, const bool blue_last, const bool start_with_green)
323
const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
324
int s_y = blockIdx.y * blockDim.y + threadIdx.y;
326
if (s_y >= src.rows || (s_x << 1) >= src.cols)
329
s_y = ::min(::max(s_y, 1), src.rows - 2);
331
Bayer2BGR<ushort> bayer;
332
bayer.apply(src, s_x, s_y, blue_last, start_with_green);
334
const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;
335
const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
337
dst(d_y, d_x) = toDst<D>(bayer.res0);
338
if (d_x + 1 < src.cols)
339
dst(d_y, d_x + 1) = toDst<D>(bayer.res1);
343
void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
345
typedef typename TypeVec<uchar, cn>::vec_type dst_t;
347
const dim3 block(32, 8);
348
const dim3 grid(divUp(src.cols, 4 * block.x), divUp(src.rows, block.y));
350
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u<dst_t>, cudaFuncCachePreferL1) );
352
Bayer2BGR_8u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
353
cudaSafeCall( cudaGetLastError() );
356
cudaSafeCall( cudaDeviceSynchronize() );
360
void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
362
typedef typename TypeVec<ushort, cn>::vec_type dst_t;
364
const dim3 block(32, 8);
365
const dim3 grid(divUp(src.cols, 2 * block.x), divUp(src.rows, block.y));
367
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u<dst_t>, cudaFuncCachePreferL1) );
369
Bayer2BGR_16u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
370
cudaSafeCall( cudaGetLastError() );
373
cudaSafeCall( cudaDeviceSynchronize() );
376
template void Bayer2BGR_8u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
377
template void Bayer2BGR_8u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
378
template void Bayer2BGR_8u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
380
template void Bayer2BGR_16u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
381
template void Bayer2BGR_16u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
382
template void Bayer2BGR_16u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
384
//////////////////////////////////////////////////////////////
385
// Bayer Demosaicing (Malvar, He, and Cutler)
387
// by Morgan McGuire, Williams College
388
// http://graphics.cs.williams.edu/papers/BayerJGT09/#shaders
392
texture<uchar, cudaTextureType2D, cudaReadModeElementType> sourceTex(false, cudaFilterModePoint, cudaAddressModeClamp);
394
template <typename DstType>
395
__global__ void MHCdemosaic(PtrStepSz<DstType> dst, const int2 sourceOffset, const int2 firstRed)
397
const float kAx = -1.0f / 8.0f, kAy = -1.5f / 8.0f, kAz = 0.5f / 8.0f /*kAw = -1.0f / 8.0f*/;
398
const float kBx = 2.0f / 8.0f, /*kBy = 0.0f / 8.0f,*/ /*kBz = 0.0f / 8.0f,*/ kBw = 4.0f / 8.0f ;
399
const float kCx = 4.0f / 8.0f, kCy = 6.0f / 8.0f, kCz = 5.0f / 8.0f /*kCw = 5.0f / 8.0f*/;
400
const float /*kDx = 0.0f / 8.0f,*/ kDy = 2.0f / 8.0f, kDz = -1.0f / 8.0f /*kDw = -1.0f / 8.0f*/;
401
const float kEx = -1.0f / 8.0f, kEy = -1.5f / 8.0f, /*kEz = -1.0f / 8.0f,*/ kEw = 0.5f / 8.0f ;
402
const float kFx = 2.0f / 8.0f, /*kFy = 0.0f / 8.0f,*/ kFz = 4.0f / 8.0f /*kFw = 0.0f / 8.0f*/;
404
const int x = blockIdx.x * blockDim.x + threadIdx.x;
405
const int y = blockIdx.y * blockDim.y + threadIdx.y;
407
if (x == 0 || x >= dst.cols - 1 || y == 0 || y >= dst.rows - 1)
411
center.x = x + sourceOffset.x;
412
center.y = y + sourceOffset.y;
415
xCoord.x = center.x - 2;
416
xCoord.y = center.x - 1;
417
xCoord.z = center.x + 1;
418
xCoord.w = center.x + 2;
421
yCoord.x = center.y - 2;
422
yCoord.y = center.y - 1;
423
yCoord.z = center.y + 1;
424
yCoord.w = center.y + 2;
426
float C = tex2D(sourceTex, center.x, center.y); // ( 0, 0)
429
Dvec.x = tex2D(sourceTex, xCoord.y, yCoord.y); // (-1,-1)
430
Dvec.y = tex2D(sourceTex, xCoord.y, yCoord.z); // (-1, 1)
431
Dvec.z = tex2D(sourceTex, xCoord.z, yCoord.y); // ( 1,-1)
432
Dvec.w = tex2D(sourceTex, xCoord.z, yCoord.z); // ( 1, 1)
435
value.x = tex2D(sourceTex, center.x, yCoord.x); // ( 0,-2) A0
436
value.y = tex2D(sourceTex, center.x, yCoord.y); // ( 0,-1) B0
437
value.z = tex2D(sourceTex, xCoord.x, center.y); // (-2, 0) E0
438
value.w = tex2D(sourceTex, xCoord.y, center.y); // (-1, 0) F0
440
// (A0 + A1), (B0 + B1), (E0 + E1), (F0 + F1)
441
value.x += tex2D(sourceTex, center.x, yCoord.w); // ( 0, 2) A1
442
value.y += tex2D(sourceTex, center.x, yCoord.z); // ( 0, 1) B1
443
value.z += tex2D(sourceTex, xCoord.w, center.y); // ( 2, 0) E1
444
value.w += tex2D(sourceTex, xCoord.z, center.y); // ( 1, 0) F1
450
PATTERN.w = PATTERN.z;
452
float D = Dvec.x + Dvec.y + Dvec.z + Dvec.w;
454
// There are five filter patterns (identity, cross, checker,
455
// theta, phi). Precompute the terms from all of them and then
456
// use swizzles to assign to color channels.
459
// x cross (e.g., EE G)
460
// y checker (e.g., EE B)
461
// z theta (e.g., EO R)
462
// w phi (e.g., EO B)
464
#define A value.x // A0 + A1
465
#define B value.y // B0 + B1
466
#define E value.z // E0 + E1
467
#define F value.w // F0 + F1
471
// PATTERN.yzw += (kD.yz * D).xyy;
478
// PATTERN += (kA.xyz * A).xyzx;
487
// PATTERN += (kE.xyw * E).xyxz;
496
// PATTERN.xw += kB.xw * B;
497
PATTERN.x += kBx * B;
498
PATTERN.w += kBw * B;
500
// PATTERN.xz += kF.xz * F;
501
PATTERN.x += kFx * F;
502
PATTERN.z += kFz * F;
504
// Determine which of four types of pixels we are on.
506
alternate.x = (x + firstRed.x) % 2;
507
alternate.y = (y + firstRed.y) % 2;
512
((alternate.x == 0) ?
513
make_uchar3(saturate_cast<uchar>(PATTERN.y), saturate_cast<uchar>(PATTERN.x), saturate_cast<uchar>(C)) :
514
make_uchar3(saturate_cast<uchar>(PATTERN.w), saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.z))) :
515
((alternate.x == 0) ?
516
make_uchar3(saturate_cast<uchar>(PATTERN.z), saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.w)) :
517
make_uchar3(saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.x), saturate_cast<uchar>(PATTERN.y)));
519
dst(y, x) = toDst<DstType>(pixelColor);
523
void MHCdemosaic(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream)
525
typedef typename TypeVec<uchar, cn>::vec_type dst_t;
527
const dim3 block(32, 8);
528
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
530
bindTexture(&sourceTex, src);
532
MHCdemosaic<dst_t><<<grid, block, 0, stream>>>((PtrStepSz<dst_t>)dst, sourceOffset, firstRed);
533
cudaSafeCall( cudaGetLastError() );
536
cudaSafeCall( cudaDeviceSynchronize() );
539
template void MHCdemosaic<1>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
540
template void MHCdemosaic<3>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
541
template void MHCdemosaic<4>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
544
#endif /* CUDA_DISABLER */