~paparazzi-uav/paparazzi/v5.0-manual

__global__ void warpBackwardKernel(const PtrStepSzf I0, const PtrStepf u1, const PtrStepf u2, PtrStepf I1w, PtrStepf I1wx, PtrStepf I1wy, PtrStepf grad, PtrStepf rho)

109

{

110

const int x = blockIdx.x * blockDim.x + threadIdx.x;

111

const int y = blockIdx.y * blockDim.y + threadIdx.y;

112

113

if (x >= I0.cols || y >= I0.rows)

114

return;

115

116

const float u1Val = u1(y, x);

117

const float u2Val = u2(y, x);

118

119

const float wx = x + u1Val;

120

const float wy = y + u2Val;

121

122

const int xmin = ::ceilf(wx - 2.0f);

123

const int xmax = ::floorf(wx + 2.0f);

124

125

const int ymin = ::ceilf(wy - 2.0f);

126

const int ymax = ::floorf(wy + 2.0f);

127

128

float sum = 0.0f;

129

float sumx = 0.0f;

130

float sumy = 0.0f;

131

float wsum = 0.0f;

132

133

for (int cy = ymin; cy <= ymax; ++cy)

134

{

135

for (int cx = xmin; cx <= xmax; ++cx)

136

{

137

const float w = bicubicCoeff(wx - cx) * bicubicCoeff(wy - cy);

138

139

sum += w * tex2D(tex_I1 , cx, cy);

140

sumx += w * tex2D(tex_I1x, cx, cy);

141

sumy += w * tex2D(tex_I1y, cx, cy);

142

143

wsum += w;

144

}

145

}

146

147

const float coeff = 1.0f / wsum;

148

149

const float I1wVal = sum * coeff;

150

const float I1wxVal = sumx * coeff;

151

const float I1wyVal = sumy * coeff;

152

153

I1w(y, x) = I1wVal;

154

I1wx(y, x) = I1wxVal;

155

I1wy(y, x) = I1wyVal;

156

157

const float Ix2 = I1wxVal * I1wxVal;

158

const float Iy2 = I1wyVal * I1wyVal;

159

160

// store the |Grad(I1)|^2

161

grad(y, x) = Ix2 + Iy2;

162

163

// compute the constant part of the rho function

164

const float I0Val = I0(y, x);

165

rho(y, x) = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;

166

}

167

168

void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y,

169

PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx,

170

PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho,

171

cudaStream_t stream)

172

{

173

const dim3 block(32, 8);

174

const dim3 grid(divUp(I0.cols, block.x), divUp(I0.rows, block.y));

175

176

bindTexture(&tex_I1 , I1);

177

bindTexture(&tex_I1x, I1x);

178

bindTexture(&tex_I1y, I1y);

179

180

warpBackwardKernel<<<grid, block, 0, stream>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);

181

cudaSafeCall( cudaGetLastError() );

182

183

if (!stream)

184

cudaSafeCall( cudaDeviceSynchronize() );

185

}

186

}

187

188

////////////////////////////////////////////////////////////

189

// estimateU

190

191

namespace tvl1flow

192

{

193

__device__ float divergence(const PtrStepf& v1, const PtrStepf& v2, int y, int x)

194

{

195

if (x > 0 && y > 0)

196

{

197

const float v1x = v1(y, x) - v1(y, x - 1);

198

const float v2y = v2(y, x) - v2(y - 1, x);

199

return v1x + v2y;

200

}

201

else

202

{

203

if (y > 0)

204

return v1(y, 0) + v2(y, 0) - v2(y - 1, 0);

205

else

206

{

207

if (x > 0)

208

return v1(0, x) - v1(0, x - 1) + v2(0, x);

209

else

210

return v1(0, 0) + v2(0, 0);

211

}

212

}

213

}

214

215

__global__ void estimateUKernel(const PtrStepSzf I1wx, const PtrStepf I1wy,

216

const PtrStepf grad, const PtrStepf rho_c,

217

const PtrStepf p11, const PtrStepf p12,

218

const PtrStepf p21, const PtrStepf p22,

219

const PtrStepf p31, const PtrStepf p32,

220

PtrStepf u1, PtrStepf u2, PtrStepf u3, PtrStepf error,

221

const float l_t, const float theta, const float gamma, const bool calcError)

222

{

223

const int x = blockIdx.x * blockDim.x + threadIdx.x;

224

const int y = blockIdx.y * blockDim.y + threadIdx.y;

225

226

if (x >= I1wx.cols || y >= I1wx.rows)

227

return;

228

229

const float I1wxVal = I1wx(y, x);

230

const float I1wyVal = I1wy(y, x);

231

const float gradVal = grad(y, x);

232

const float u1OldVal = u1(y, x);

233

const float u2OldVal = u2(y, x);

234

const float u3OldVal = gamma ? u3(y, x) : 0;

235

236

const float rho = rho_c(y, x) + (I1wxVal * u1OldVal + I1wyVal * u2OldVal + gamma * u3OldVal);

237

238

// estimate the values of the variable (v1, v2) (thresholding operator TH)

239

240

float d1 = 0.0f;

241

float d2 = 0.0f;

242

float d3 = 0.0f;

243

244

if (rho < -l_t * gradVal)

245

{

246

d1 = l_t * I1wxVal;

247

d2 = l_t * I1wyVal;

248

if (gamma)

249

d3 = l_t * gamma;

250

}

251

else if (rho > l_t * gradVal)

252

{

253

d1 = -l_t * I1wxVal;

254

d2 = -l_t * I1wyVal;

255

if (gamma)

256

d3 = -l_t * gamma;

257

}

258

else if (gradVal > numeric_limits<float>::epsilon())

259

{

260

const float fi = -rho / gradVal;

261

d1 = fi * I1wxVal;

262

d2 = fi * I1wyVal;

263

if (gamma)

264

d3 = fi * gamma;

265

}

266

267

const float v1 = u1OldVal + d1;

268

const float v2 = u2OldVal + d2;

269

const float v3 = u3OldVal + d3;

270

271

// compute the divergence of the dual variable (p1, p2)

272

273

const float div_p1 = divergence(p11, p12, y, x);

274

const float div_p2 = divergence(p21, p22, y, x);

275

const float div_p3 = gamma ? divergence(p31, p32, y, x) : 0;

276

277

// estimate the values of the optical flow (u1, u2)

278

279

const float u1NewVal = v1 + theta * div_p1;

280

const float u2NewVal = v2 + theta * div_p2;

281

const float u3NewVal = gamma ? v3 + theta * div_p3 : 0;

282

283

u1(y, x) = u1NewVal;

284

u2(y, x) = u2NewVal;

285

if (gamma)

286

u3(y, x) = u3NewVal;

287

288

if (calcError)

289

{

290

const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal);

291

const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal);

292

error(y, x) = n1 + n2;

293

}

294

}

295

296

void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy,

297

PtrStepSzf grad, PtrStepSzf rho_c,

298

PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,

299

PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf error,

300

float l_t, float theta, float gamma, bool calcError,

301

cudaStream_t stream)

302

{

303

const dim3 block(32, 8);

304

const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));

305

306

estimateUKernel<<<grid, block, 0, stream>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, error, l_t, theta, gamma, calcError);

307

cudaSafeCall( cudaGetLastError() );

308

309

if (!stream)

310

cudaSafeCall( cudaDeviceSynchronize() );

311

}

312

}

313

314

////////////////////////////////////////////////////////////

315

// estimateDualVariables

316

317

namespace tvl1flow

318

{

319

__global__ void estimateDualVariablesKernel(const PtrStepSzf u1, const PtrStepf u2, const PtrStepSzf u3,

320

PtrStepf p11, PtrStepf p12, PtrStepf p21, PtrStepf p22, PtrStepf p31, PtrStepf p32, const float taut, const float gamma)

321

{

322

const int x = blockIdx.x * blockDim.x + threadIdx.x;

323

const int y = blockIdx.y * blockDim.y + threadIdx.y;

324

325

if (x >= u1.cols || y >= u1.rows)

326

return;

327

328

const float u1x = u1(y, ::min(x + 1, u1.cols - 1)) - u1(y, x);

329

const float u1y = u1(::min(y + 1, u1.rows - 1), x) - u1(y, x);

330

331

const float u2x = u2(y, ::min(x + 1, u1.cols - 1)) - u2(y, x);

332

const float u2y = u2(::min(y + 1, u1.rows - 1), x) - u2(y, x);

333

334

const float u3x = gamma ? u3(y, ::min(x + 1, u1.cols - 1)) - u3(y, x) : 0;

335

const float u3y = gamma ? u3(::min(y + 1, u1.rows - 1), x) - u3(y, x) : 0;

336

337

const float g1 = ::hypotf(u1x, u1y);

338

const float g2 = ::hypotf(u2x, u2y);

339

const float g3 = gamma ? ::hypotf(u3x, u3y) : 0;

340

341

const float ng1 = 1.0f + taut * g1;

342

const float ng2 = 1.0f + taut * g2;

343

const float ng3 = gamma ? 1.0f + taut * g3 : 0;

344

345

p11(y, x) = (p11(y, x) + taut * u1x) / ng1;

346

p12(y, x) = (p12(y, x) + taut * u1y) / ng1;

347

p21(y, x) = (p21(y, x) + taut * u2x) / ng2;

348

p22(y, x) = (p22(y, x) + taut * u2y) / ng2;

349

if (gamma)

350

{

351

p31(y, x) = (p31(y, x) + taut * u3x) / ng3;

352

p32(y, x) = (p32(y, x) + taut * u3y) / ng3;

353

}

354

}

355

356

void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3,

357

PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,

358

float taut, float gamma,

359

cudaStream_t stream)

360

{

361

const dim3 block(32, 8);

362

const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y));

363

364

estimateDualVariablesKernel<<<grid, block, 0, stream>>>(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma);

365

cudaSafeCall( cudaGetLastError() );

366

367

if (!stream)

368

cudaSafeCall( cudaDeviceSynchronize() );

369

}

370

}

371

372

#endif // !defined CUDA_DISABLER

Older »