1
/*M///////////////////////////////////////////////////////////////////////////////////////
3
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5
// By downloading, copying, installing or using the software you agree to this license.
6
// If you do not agree to this license, do not download, install,
7
// copy or use the software.
11
// For Open Source Computer Vision Library
13
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15
// Third party copyrights are property of their respective owners.
17
// Redistribution and use in source and binary forms, with or without modification,
18
// are permitted provided that the following conditions are met:
20
// * Redistribution's of source code must retain the above copyright notice,
21
// this list of conditions and the following disclaimer.
23
// * Redistribution's in binary form must reproduce the above copyright notice,
24
// this list of conditions and the following disclaimer in the documentation
25
// and/or other materials provided with the distribution.
27
// * The name of the copyright holders may not be used to endorse or promote products
28
// derived from this software without specific prior written permission.
30
// This software is provided by the copyright holders and contributors "as is" and
31
// any express or implied warranties, including, but not limited to, the implied
32
// warranties of merchantability and fitness for a particular purpose are disclaimed.
33
// In no event shall the Intel Corporation or contributors be liable for any direct,
34
// indirect, incidental, special, exemplary, or consequential damages
35
// (including, but not limited to, procurement of substitute goods or services;
36
// loss of use, data, or profits; or business interruption) however caused
37
// and on any theory of liability, whether in contract, strict liability,
38
// or tort (including negligence or otherwise) arising in any way out of
39
// the use of this software, even if advised of the possibility of such damage.
43
#if !defined CUDA_DISABLER
45
#include "opencv2/core/cuda/common.hpp"
46
#include "opencv2/core/cuda/border_interpolate.hpp"
47
#include "opencv2/core/cuda/limits.hpp"
49
using namespace cv::cuda;
50
using namespace cv::cuda::device;
52
////////////////////////////////////////////////////////////
57
__global__ void centeredGradientKernel(const PtrStepSzf src, PtrStepf dx, PtrStepf dy)
59
const int x = blockIdx.x * blockDim.x + threadIdx.x;
60
const int y = blockIdx.y * blockDim.y + threadIdx.y;
62
if (x >= src.cols || y >= src.rows)
65
dx(y, x) = 0.5f * (src(y, ::min(x + 1, src.cols - 1)) - src(y, ::max(x - 1, 0)));
66
dy(y, x) = 0.5f * (src(::min(y + 1, src.rows - 1), x) - src(::max(y - 1, 0), x));
69
void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy, cudaStream_t stream)
71
const dim3 block(32, 8);
72
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
74
centeredGradientKernel<<<grid, block, 0, stream>>>(src, dx, dy);
75
cudaSafeCall( cudaGetLastError() );
78
cudaSafeCall( cudaDeviceSynchronize() );
82
////////////////////////////////////////////////////////////
87
static __device__ __forceinline__ float bicubicCoeff(float x_)
92
return x * x * (1.5f * x - 2.5f) + 1.0f;
96
return x * (x * (-0.5f * x + 2.5f) - 4.0f) + 2.0f;
104
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1 (false, cudaFilterModePoint, cudaAddressModeClamp);
105
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1x(false, cudaFilterModePoint, cudaAddressModeClamp);
106
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_I1y(false, cudaFilterModePoint, cudaAddressModeClamp);
108
__global__ void warpBackwardKernel(const PtrStepSzf I0, const PtrStepf u1, const PtrStepf u2, PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho)
110
const int x = blockIdx.x * blockDim.x + threadIdx.x;
111
const int y = blockIdx.y * blockDim.y + threadIdx.y;
113
if (x >= I0.cols || y >= I0.rows)
116
const float u1Val = u1(y, x);
117
const float u2Val = u2(y, x);
119
const float wx = x + u1Val;
120
const float wy = y + u2Val;
122
const int xmin = ::ceilf(wx - 2.0f);
123
const int xmax = ::floorf(wx + 2.0f);
125
const int ymin = ::ceilf(wy - 2.0f);
126
const int ymax = ::floorf(wy + 2.0f);
133
for (int cy = ymin; cy <= ymax; ++cy)
135
for (int cx = xmin; cx <= xmax; ++cx)
137
const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);
139
sum += w * tex2D(tex_I1 , cx, cy);
140
sumx += w * tex2D(tex_I1x, cx, cy);
141
sumy += w * tex2D(tex_I1y, cx, cy);
147
const float coeff = 1.0f / wsum;
149
const float I1wVal = sum * coeff;
150
const float I1wxVal = sumx * coeff;
151
const float I1wyVal = sumy * coeff;
154
I1wx(y, x) = I1wxVal;
155
I1wy(y, x) = I1wyVal;
157
const float Ix2 = I1wxVal * I1wxVal;
158
const float Iy2 = I1wyVal * I1wyVal;
160
// store the |Grad(I1)|^2
161
grad(y, x) = Ix2 + Iy2;
163
// compute the constant part of the rho function
164
const float I0Val = I0(y, x);
165
rho(y, x) = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
168
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y,
169
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx,
170
PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho,
173
const dim3 block(32, 8);
174
const dim3 grid(divUp(I0.cols, block.x), divUp(I0.rows, block.y));
176
bindTexture(&tex_I1 , I1);
177
bindTexture(&tex_I1x, I1x);
178
bindTexture(&tex_I1y, I1y);
180
warpBackwardKernel<<<grid, block, 0, stream>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
181
cudaSafeCall( cudaGetLastError() );
184
cudaSafeCall( cudaDeviceSynchronize() );
188
////////////////////////////////////////////////////////////
193
__device__ float divergence(const PtrStepf& v1, const PtrStepf& v2, int y, int x)
197
const float v1x = v1(y, x) - v1(y, x - 1);
198
const float v2y = v2(y, x) - v2(y - 1, x);
204
return v1(y, 0) + v2(y, 0) - v2(y - 1, 0);
208
return v1(0, x) - v1(0, x - 1) + v2(0, x);
210
return v1(0, 0) + v2(0, 0);
215
__global__ void estimateUKernel(const PtrStepSzf I1wx, const PtrStepf I1wy,
216
const PtrStepf grad, const PtrStepf rho_c,
217
const PtrStepf p11, const PtrStepf p12,
218
const PtrStepf p21, const PtrStepf p22,
219
const PtrStepf p31, const PtrStepf p32,
220
PtrStepf u1, PtrStepf u2, PtrStepf u3, PtrStepf error,
221
const float l_t, const float theta, const float gamma, const bool calcError)
223
const int x = blockIdx.x * blockDim.x + threadIdx.x;
224
const int y = blockIdx.y * blockDim.y + threadIdx.y;
226
if (x >= I1wx.cols || y >= I1wx.rows)
229
const float I1wxVal = I1wx(y, x);
230
const float I1wyVal = I1wy(y, x);
231
const float gradVal = grad(y, x);
232
const float u1OldVal = u1(y, x);
233
const float u2OldVal = u2(y, x);
234
const float u3OldVal = gamma ? u3(y, x) : 0;
236
const float rho = rho_c(y, x) + (I1wxVal * u1OldVal + I1wyVal * u2OldVal + gamma * u3OldVal);
238
// estimate the values of the variable (v1, v2) (thresholding operator TH)
244
if (rho < -l_t * gradVal)
251
else if (rho > l_t * gradVal)
258
else if (gradVal > numeric_limits<float>::epsilon())
260
const float fi = -rho / gradVal;
267
const float v1 = u1OldVal + d1;
268
const float v2 = u2OldVal + d2;
269
const float v3 = u3OldVal + d3;
271
// compute the divergence of the dual variable (p1, p2)
273
const float div_p1 = divergence(p11, p12, y, x);
274
const float div_p2 = divergence(p21, p22, y, x);
275
const float div_p3 = gamma ? divergence(p31, p32, y, x) : 0;
277
// estimate the values of the optical flow (u1, u2)
279
const float u1NewVal = v1 + theta * div_p1;
280
const float u2NewVal = v2 + theta * div_p2;
281
const float u3NewVal = gamma ? v3 + theta * div_p3 : 0;
290
const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);
291
const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);
292
error(y, x) = n1 + n2;
296
void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy,
297
PtrStepSzf grad, PtrStepSzf rho_c,
298
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,
299
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf error,
300
float l_t, float theta, float gamma, bool calcError,
303
const dim3 block(32, 8);
304
const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));
306
estimateUKernel<<<grid, block, 0, stream>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, error, l_t, theta, gamma, calcError);
307
cudaSafeCall( cudaGetLastError() );
310
cudaSafeCall( cudaDeviceSynchronize() );
314
////////////////////////////////////////////////////////////
315
// estimateDualVariables
319
__global__ void estimateDualVariablesKernel(const PtrStepSzf u1, const PtrStepf u2, const PtrStepSzf u3,
320
PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, PtrStepf p31, PtrStepf p32, const float taut, const float gamma)
322
const int x = blockIdx.x * blockDim.x + threadIdx.x;
323
const int y = blockIdx.y * blockDim.y + threadIdx.y;
325
if (x >= u1.cols || y >= u1.rows)
328
const float u1x = u1(y, ::min(x + 1, u1.cols - 1)) - u1(y, x);
329
const float u1y = u1(::min(y + 1, u1.rows - 1), x) - u1(y, x);
331
const float u2x = u2(y, ::min(x + 1, u1.cols - 1)) - u2(y, x);
332
const float u2y = u2(::min(y + 1, u1.rows - 1), x) - u2(y, x);
334
const float u3x = gamma ? u3(y, ::min(x + 1, u1.cols - 1)) - u3(y, x) : 0;
335
const float u3y = gamma ? u3(::min(y + 1, u1.rows - 1), x) - u3(y, x) : 0;
337
const float g1 = ::hypotf(u1x, u1y);
338
const float g2 = ::hypotf(u2x, u2y);
339
const float g3 = gamma ? ::hypotf(u3x, u3y) : 0;
341
const float ng1 = 1.0f + taut * g1;
342
const float ng2 = 1.0f + taut * g2;
343
const float ng3 = gamma ? 1.0f + taut * g3 : 0;
345
p11(y, x) = (p11(y, x) + taut * u1x) / ng1;
346
p12(y, x) = (p12(y, x) + taut * u1y) / ng1;
347
p21(y, x) = (p21(y, x) + taut * u2x) / ng2;
348
p22(y, x) = (p22(y, x) + taut * u2y) / ng2;
351
p31(y, x) = (p31(y, x) + taut * u3x) / ng3;
352
p32(y, x) = (p32(y, x) + taut * u3y) / ng3;
356
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3,
357
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,
358
float taut, float gamma,
361
const dim3 block(32, 8);
362
const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y));
364
estimateDualVariablesKernel<<<grid, block, 0, stream>>>(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma);
365
cudaSafeCall( cudaGetLastError() );
368
cudaSafeCall( cudaDeviceSynchronize() );
372
#endif // !defined CUDA_DISABLER