1
/*M///////////////////////////////////////////////////////////////////////////////////////
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
11
// For Open Source Computer Vision Library
13
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15
// Third party copyrights are property of their respective owners.
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
20
// * Redistribution's of source code must retain the above copyright notice,
21
// this list of conditions and the following disclaimer.
23
// * Redistribution's in binary form must reproduce the above copyright notice,
24
// this list of conditions and the following disclaimer in the documentation
25
// and/or other materials provided with the distribution.
27
// * The name of the copyright holders may not be used to endorse or promote products
28
// derived from this software without specific prior written permission.
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
43
#if !defined CUDA_DISABLER
45
#include "opencv2/core/cuda/common.hpp"
46
#include "opencv2/core/cuda/vec_traits.hpp"
47
#include "opencv2/core/cuda/vec_math.hpp"
48
#include "opencv2/core/cuda/border_interpolate.hpp"
50
using namespace cv::cuda;
52
typedef unsigned char uchar;
53
typedef unsigned short ushort;
55
//////////////////////////////////////////////////////////////////////////////////
56
/// Bilateral filtering
58
namespace cv { namespace cuda { namespace device
62
__device__ __forceinline__ float norm_l1(const float& a) { return ::fabs(a); }
63
__device__ __forceinline__ float norm_l1(const float2& a) { return ::fabs(a.x) + ::fabs(a.y); }
64
__device__ __forceinline__ float norm_l1(const float3& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z); }
65
__device__ __forceinline__ float norm_l1(const float4& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z) + ::fabs(a.w); }
67
__device__ __forceinline__ float sqr(const float& a) { return a * a; }
69
template<typename T, typename B>
70
__global__ void bilateral_kernel(const PtrStepSz<T> src, PtrStep<T> dst, const B b, const int ksz, const float sigma_spatial2_inv_half, const float sigma_color2_inv_half)
72
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
74
int x = threadIdx.x + blockIdx.x * blockDim.x;
75
int y = threadIdx.y + blockIdx.y * blockDim.y;
77
if (x >= src.cols || y >= src.rows)
80
value_type center = saturate_cast<value_type>(src(y, x));
82
value_type sum1 = VecTraits<value_type>::all(0);
86
float r2 = (float)(r * r);
91
if (x - ksz/2 >=0 && y - ksz/2 >=0 && tx < src.cols && ty < src.rows)
93
for (int cy = y - r; cy < ty; ++cy)
94
for (int cx = x - r; cx < tx; ++cx)
96
float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
100
value_type value = saturate_cast<value_type>(src(cy, cx));
102
float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);
103
sum1 = sum1 + weight * value;
104
sum2 = sum2 + weight;
109
for (int cy = y - r; cy < ty; ++cy)
110
for (int cx = x - r; cx < tx; ++cx)
112
float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
116
value_type value = saturate_cast<value_type>(b.at(cy, cx, src.data, src.step));
118
float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);
120
sum1 = sum1 + weight * value;
121
sum2 = sum2 + weight;
124
dst(y, x) = saturate_cast<T>(sum1 / sum2);
127
template<typename T, template <typename> class B>
128
void bilateral_caller(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream)
131
dim3 grid (divUp (src.cols, block.x), divUp (src.rows, block.y));
133
B<T> b(src.rows, src.cols);
135
float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial);
136
float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);
138
cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) );
139
bilateral_kernel<<<grid, block, 0, stream>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half);
140
cudaSafeCall ( cudaGetLastError () );
143
cudaSafeCall( cudaDeviceSynchronize() );
147
void bilateral_filter_gpu(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float gauss_spatial_coeff, float gauss_color_coeff, int borderMode, cudaStream_t stream)
149
typedef void (*caller_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream);
151
static caller_t funcs[] =
153
bilateral_caller<T, BrdConstant>,
154
bilateral_caller<T, BrdReplicate>,
155
bilateral_caller<T, BrdReflect>,
156
bilateral_caller<T, BrdWrap>,
157
bilateral_caller<T, BrdReflect101>
159
funcs[borderMode](src, dst, kernel_size, gauss_spatial_coeff, gauss_color_coeff, stream);
165
#define OCV_INSTANTIATE_BILATERAL_FILTER(T) \
166
template void cv::cuda::device::imgproc::bilateral_filter_gpu<T>(const PtrStepSzb&, PtrStepSzb, int, float, float, int, cudaStream_t);
168
OCV_INSTANTIATE_BILATERAL_FILTER(uchar)
169
//OCV_INSTANTIATE_BILATERAL_FILTER(uchar2)
170
OCV_INSTANTIATE_BILATERAL_FILTER(uchar3)
171
OCV_INSTANTIATE_BILATERAL_FILTER(uchar4)
173
//OCV_INSTANTIATE_BILATERAL_FILTER(schar)
174
//OCV_INSTANTIATE_BILATERAL_FILTER(schar2)
175
//OCV_INSTANTIATE_BILATERAL_FILTER(schar3)
176
//OCV_INSTANTIATE_BILATERAL_FILTER(schar4)
178
OCV_INSTANTIATE_BILATERAL_FILTER(short)
179
//OCV_INSTANTIATE_BILATERAL_FILTER(short2)
180
OCV_INSTANTIATE_BILATERAL_FILTER(short3)
181
OCV_INSTANTIATE_BILATERAL_FILTER(short4)
183
OCV_INSTANTIATE_BILATERAL_FILTER(ushort)
184
//OCV_INSTANTIATE_BILATERAL_FILTER(ushort2)
185
OCV_INSTANTIATE_BILATERAL_FILTER(ushort3)
186
OCV_INSTANTIATE_BILATERAL_FILTER(ushort4)
188
//OCV_INSTANTIATE_BILATERAL_FILTER(int)
189
//OCV_INSTANTIATE_BILATERAL_FILTER(int2)
190
//OCV_INSTANTIATE_BILATERAL_FILTER(int3)
191
//OCV_INSTANTIATE_BILATERAL_FILTER(int4)
193
OCV_INSTANTIATE_BILATERAL_FILTER(float)
194
//OCV_INSTANTIATE_BILATERAL_FILTER(float2)
195
OCV_INSTANTIATE_BILATERAL_FILTER(float3)
196
OCV_INSTANTIATE_BILATERAL_FILTER(float4)
199
#endif /* CUDA_DISABLER */