~paparazzi-uav/paparazzi/v5.0-manual

Ptr<FarnebackOpticalFlow> cv::cuda::FarnebackOpticalFlow::create(int, double, bool, int, int, int, double, int) { throw_no_cuda(); return Ptr<FarnebackOpticalFlow>(); }

#else

#define MIN_SIZE 32

// CUDA resize() is fast, but it differs from the CPU analog. Disabling this flag

// leads to an inefficient code. It's for debug purposes only.

#define ENABLE_CUDA_RESIZE 1

namespace cv { namespace cuda { namespace device { namespace optflow_farneback

{

void setPolynomialExpansionConsts(

int polyN, const float *g, const float *xg, const float *xxg,

float ig11, float ig03, float ig33, float ig55);

void polynomialExpansionGpu(const PtrStepSzf &src, int polyN, PtrStepSzf dst, cudaStream_t stream);

void setUpdateMatricesConsts();

void updateMatricesGpu(

const PtrStepSzf flowx, const PtrStepSzf flowy, const PtrStepSzf R0, const PtrStepSzf R1,

PtrStepSzf M, cudaStream_t stream);

void updateFlowGpu(

const PtrStepSzf M, PtrStepSzf flowx, PtrStepSzf flowy, cudaStream_t stream);

void boxFilter5Gpu(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream);

void boxFilter5Gpu_CC11(const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, cudaStream_t stream);

void setGaussianBlurKernel(const float *gKer, int ksizeHalf);

void gaussianBlurGpu(

const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream);

void gaussianBlur5Gpu(

const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream);

void gaussianBlur5Gpu_CC11(

const PtrStepSzf src, int ksizeHalf, PtrStepSzf dst, int borderType, cudaStream_t stream);

}}}}

namespace

{

class FarnebackOpticalFlowImpl : public FarnebackOpticalFlow

{

public:

FarnebackOpticalFlowImpl(int numLevels, double pyrScale, bool fastPyramids, int winSize,

100

int numIters, int polyN, double polySigma, int flags) :

101

numLevels_(numLevels), pyrScale_(pyrScale), fastPyramids_(fastPyramids), winSize_(winSize),

102

numIters_(numIters), polyN_(polyN), polySigma_(polySigma), flags_(flags)

103

{

104

}

105

106

virtual int getNumLevels() const { return numLevels_; }

107

virtual void setNumLevels(int numLevels) { numLevels_ = numLevels; }

108

109

virtual double getPyrScale() const { return pyrScale_; }

110

virtual void setPyrScale(double pyrScale) { pyrScale_ = pyrScale; }

111

112

virtual bool getFastPyramids() const { return fastPyramids_; }

113

virtual void setFastPyramids(bool fastPyramids) { fastPyramids_ = fastPyramids; }

114

115

virtual int getWinSize() const { return winSize_; }

116

virtual void setWinSize(int winSize) { winSize_ = winSize; }

117

118

virtual int getNumIters() const { return numIters_; }

119

virtual void setNumIters(int numIters) { numIters_ = numIters; }

120

121

virtual int getPolyN() const { return polyN_; }

122

virtual void setPolyN(int polyN) { polyN_ = polyN; }

123

124

virtual double getPolySigma() const { return polySigma_; }

125

virtual void setPolySigma(double polySigma) { polySigma_ = polySigma; }

126

127

virtual int getFlags() const { return flags_; }

128

virtual void setFlags(int flags) { flags_ = flags; }

129

130

virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow, Stream& stream);

131

132

private:

133

int numLevels_;

134

double pyrScale_;

135

bool fastPyramids_;

136

int winSize_;

137

int numIters_;

138

int polyN_;

139

double polySigma_;

140

int flags_;

141

142

private:

143

void prepareGaussian(

144

int n, double sigma, float *g, float *xg, float *xxg,

145

double &ig11, double &ig03, double &ig33, double &ig55);

146

147

void setPolynomialExpansionConsts(int n, double sigma);

148

149

void updateFlow_boxFilter(

150

const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,

151

GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);

152

153

void updateFlow_gaussianBlur(

154

const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,

155

GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);

156

157

void calcImpl(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &stream);

158

159

GpuMat frames_[2];

160

GpuMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];

161

std::vector<GpuMat> pyramid0_, pyramid1_;

162

};

163

164

void FarnebackOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOutputArray _flow, Stream& stream)

165

{

166

const GpuMat frame0 = _frame0.getGpuMat();

167

const GpuMat frame1 = _frame1.getGpuMat();

168

169

BufferPool pool(stream);

170

GpuMat flowx = pool.getBuffer(frame0.size(), CV_32FC1);

171

GpuMat flowy = pool.getBuffer(frame0.size(), CV_32FC1);

172

173

calcImpl(frame0, frame1, flowx, flowy, stream);

174

175

GpuMat flows[] = {flowx, flowy};

176

cuda::merge(flows, 2, _flow, stream);

177

}

178

179

GpuMat allocMatFromBuf(int rows, int cols, int type, GpuMat& mat)

180

{

181

if (!mat.empty() && mat.type() == type && mat.rows >= rows && mat.cols >= cols)

182

return mat(Rect(0, 0, cols, rows));

183

184

return mat = GpuMat(rows, cols, type);

185

}

186

187

void FarnebackOpticalFlowImpl::prepareGaussian(

188

int n, double sigma, float *g, float *xg, float *xxg,

189

double &ig11, double &ig03, double &ig33, double &ig55)

190

{

191

double s = 0.;

192

for (int x = -n; x <= n; x++)

193

{

194

g[x] = (float)std::exp(-x*x/(2*sigma*sigma));

195

s += g[x];

196

}

197

198

s = 1./s;

199

for (int x = -n; x <= n; x++)

200

{

201

g[x] = (float)(g[x]*s);

202

xg[x] = (float)(x*g[x]);

203

xxg[x] = (float)(x*x*g[x]);

204

}

205

206

Mat_<double> G(6, 6);

207

G.setTo(0);

208

209

for (int y = -n; y <= n; y++)

210

{

211

for (int x = -n; x <= n; x++)

212

{

213

G(0,0) += g[y]*g[x];

214

G(1,1) += g[y]*g[x]*x*x;

215

G(3,3) += g[y]*g[x]*x*x*x*x;

216

G(5,5) += g[y]*g[x]*x*x*y*y;

217

}

218

}

219

220

//G[0][0] = 1.;

221

G(2,2) = G(0,3) = G(0,4) = G(3,0) = G(4,0) = G(1,1);

222

G(4,4) = G(3,3);

223

G(3,4) = G(4,3) = G(5,5);

224

225

// invG:

226

// [ x e e ]

227

// [ y ]

228

// [ y ]

229

// [ e z ]

230

// [ e z ]

231

// [ u ]

232

Mat_<double> invG = G.inv(DECOMP_CHOLESKY);

233

234

ig11 = invG(1,1);

235

ig03 = invG(0,3);

236

ig33 = invG(3,3);

237

ig55 = invG(5,5);

238

}

239

240

void FarnebackOpticalFlowImpl::setPolynomialExpansionConsts(int n, double sigma)

241

{

242

std::vector<float> buf(n*6 + 3);

243

float* g = &buf[0] + n;

244

float* xg = g + n*2 + 1;

245

float* xxg = xg + n*2 + 1;

246

247

if (sigma < FLT_EPSILON)

248

sigma = n*0.3;

249

250

double ig11, ig03, ig33, ig55;

251

prepareGaussian(n, sigma, g, xg, xxg, ig11, ig03, ig33, ig55);

252

253

device::optflow_farneback::setPolynomialExpansionConsts(n, g, xg, xxg, static_cast<float>(ig11), static_cast<float>(ig03), static_cast<float>(ig33), static_cast<float>(ig55));

254

}

255

256

void FarnebackOpticalFlowImpl::updateFlow_boxFilter(

257

const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,

258

GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[])

259

{

260

if (deviceSupports(FEATURE_SET_COMPUTE_12))

261

device::optflow_farneback::boxFilter5Gpu(M, blockSize/2, bufM, StreamAccessor::getStream(streams[0]));

262

else

263

device::optflow_farneback::boxFilter5Gpu_CC11(M, blockSize/2, bufM, StreamAccessor::getStream(streams[0]));

264

swap(M, bufM);

265

266

for (int i = 1; i < 5; ++i)

267

streams[i].waitForCompletion();

268

device::optflow_farneback::updateFlowGpu(M, flowx, flowy, StreamAccessor::getStream(streams[0]));

269

270

if (updateMatrices)

271

device::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, StreamAccessor::getStream(streams[0]));

272

}

273

274

void FarnebackOpticalFlowImpl::updateFlow_gaussianBlur(

275

const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,

276

GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[])

277

{

278

if (deviceSupports(FEATURE_SET_COMPUTE_12))

279

device::optflow_farneback::gaussianBlur5Gpu(

280

M, blockSize/2, bufM, BORDER_REPLICATE, StreamAccessor::getStream(streams[0]));

281

else

282

device::optflow_farneback::gaussianBlur5Gpu_CC11(

283

M, blockSize/2, bufM, BORDER_REPLICATE, StreamAccessor::getStream(streams[0]));

284

swap(M, bufM);

285

286

device::optflow_farneback::updateFlowGpu(M, flowx, flowy, StreamAccessor::getStream(streams[0]));

287

288

if (updateMatrices)

289

device::optflow_farneback::updateMatricesGpu(flowx, flowy, R0, R1, M, StreamAccessor::getStream(streams[0]));

290

}

291

292

void FarnebackOpticalFlowImpl::calcImpl(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &stream)

293

{

294

CV_Assert(frame0.channels() == 1 && frame1.channels() == 1);

295

CV_Assert(frame0.size() == frame1.size());

296

CV_Assert(polyN_ == 5 || polyN_ == 7);

297

CV_Assert(!fastPyramids_ || std::abs(pyrScale_ - 0.5) < 1e-6);

298

299

Stream streams[5];

300

if (stream)

301

streams[0] = stream;

302

303

Size size = frame0.size();

304

GpuMat prevFlowX, prevFlowY, curFlowX, curFlowY;

305

306

flowx.create(size, CV_32F);

307

flowy.create(size, CV_32F);

308

GpuMat flowx0 = flowx;

309

GpuMat flowy0 = flowy;

310

311

// Crop unnecessary levels

312

double scale = 1;

313

int numLevelsCropped = 0;

314

for (; numLevelsCropped < numLevels_; numLevelsCropped++)

315

{

316

scale *= pyrScale_;

317

if (size.width*scale < MIN_SIZE || size.height*scale < MIN_SIZE)

318

break;

319

}

320

321

frame0.convertTo(frames_[0], CV_32F, streams[0]);

322

frame1.convertTo(frames_[1], CV_32F, streams[1]);

323

324

if (fastPyramids_)

325

{

326

// Build Gaussian pyramids using pyrDown()

327

pyramid0_.resize(numLevelsCropped + 1);

328

pyramid1_.resize(numLevelsCropped + 1);

329

pyramid0_[0] = frames_[0];

330

pyramid1_[0] = frames_[1];

331

for (int i = 1; i <= numLevelsCropped; ++i)

332

{

333

cuda::pyrDown(pyramid0_[i - 1], pyramid0_[i], streams[0]);

334

cuda::pyrDown(pyramid1_[i - 1], pyramid1_[i], streams[1]);

335

}

336

}

337

338

setPolynomialExpansionConsts(polyN_, polySigma_);

339

device::optflow_farneback::setUpdateMatricesConsts();

340

341

for (int k = numLevelsCropped; k >= 0; k--)

342

{

343

streams[0].waitForCompletion();

344

345

scale = 1;

346

for (int i = 0; i < k; i++)

347

scale *= pyrScale_;

348

349

double sigma = (1./scale - 1) * 0.5;

350

int smoothSize = cvRound(sigma*5) | 1;

351

smoothSize = std::max(smoothSize, 3);

352

353

int width = cvRound(size.width*scale);

354

int height = cvRound(size.height*scale);

355

356

if (fastPyramids_)

357

{

358

width = pyramid0_[k].cols;

359

height = pyramid0_[k].rows;

360

}

361

362

if (k > 0)

363

{

364

curFlowX.create(height, width, CV_32F);

365

curFlowY.create(height, width, CV_32F);

366

}

367

else

368

{

369

curFlowX = flowx0;

370

curFlowY = flowy0;

371

}

372

373

if (!prevFlowX.data)

374

{

375

if (flags_ & OPTFLOW_USE_INITIAL_FLOW)

376

{

377

cuda::resize(flowx0, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);

378

cuda::resize(flowy0, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);

379

curFlowX.convertTo(curFlowX, curFlowX.depth(), scale, streams[0]);

380

curFlowY.convertTo(curFlowY, curFlowY.depth(), scale, streams[1]);

381

}

382

else

383

{

384

curFlowX.setTo(0, streams[0]);

385

curFlowY.setTo(0, streams[1]);

386

}

387

}

388

else

389

{

390

cuda::resize(prevFlowX, curFlowX, Size(width, height), 0, 0, INTER_LINEAR, streams[0]);

391

cuda::resize(prevFlowY, curFlowY, Size(width, height), 0, 0, INTER_LINEAR, streams[1]);

392

curFlowX.convertTo(curFlowX, curFlowX.depth(), 1./pyrScale_, streams[0]);

393

curFlowY.convertTo(curFlowY, curFlowY.depth(), 1./pyrScale_, streams[1]);

394

}

395

396

GpuMat M = allocMatFromBuf(5*height, width, CV_32F, M_);

397

GpuMat bufM = allocMatFromBuf(5*height, width, CV_32F, bufM_);

398

GpuMat R[2] =

399

{

400

allocMatFromBuf(5*height, width, CV_32F, R_[0]),

401

allocMatFromBuf(5*height, width, CV_32F, R_[1])

402

};

403

404

if (fastPyramids_)

405

{

406

device::optflow_farneback::polynomialExpansionGpu(pyramid0_[k], polyN_, R[0], StreamAccessor::getStream(streams[0]));

407

device::optflow_farneback::polynomialExpansionGpu(pyramid1_[k], polyN_, R[1], StreamAccessor::getStream(streams[1]));

408

}

409

else

410

{

411

GpuMat blurredFrame[2] =

412

{

413

allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[0]),

414

allocMatFromBuf(size.height, size.width, CV_32F, blurredFrame_[1])

415

};

416

GpuMat pyrLevel[2] =

417

{

418

allocMatFromBuf(height, width, CV_32F, pyrLevel_[0]),

419

allocMatFromBuf(height, width, CV_32F, pyrLevel_[1])

420

};

421

422

Mat g = getGaussianKernel(smoothSize, sigma, CV_32F);

423

device::optflow_farneback::setGaussianBlurKernel(g.ptr<float>(smoothSize/2), smoothSize/2);

424

425

for (int i = 0; i < 2; i++)

426

{

427

device::optflow_farneback::gaussianBlurGpu(

428

frames_[i], smoothSize/2, blurredFrame[i], BORDER_REFLECT101, StreamAccessor::getStream(streams[i]));

429

cuda::resize(blurredFrame[i], pyrLevel[i], Size(width, height), 0.0, 0.0, INTER_LINEAR, streams[i]);

430

device::optflow_farneback::polynomialExpansionGpu(pyrLevel[i], polyN_, R[i], StreamAccessor::getStream(streams[i]));

431

}

432

}

433

434

streams[1].waitForCompletion();

435

device::optflow_farneback::updateMatricesGpu(curFlowX, curFlowY, R[0], R[1], M, StreamAccessor::getStream(streams[0]));

436

437

if (flags_ & OPTFLOW_FARNEBACK_GAUSSIAN)

438

{

439

Mat g = getGaussianKernel(winSize_, winSize_/2*0.3f, CV_32F);

440

device::optflow_farneback::setGaussianBlurKernel(g.ptr<float>(winSize_/2), winSize_/2);

441

}

442

for (int i = 0; i < numIters_; i++)

443

{

444

if (flags_ & OPTFLOW_FARNEBACK_GAUSSIAN)

445

updateFlow_gaussianBlur(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize_, i < numIters_-1, streams);

446

else

447

updateFlow_boxFilter(R[0], R[1], curFlowX, curFlowY, M, bufM, winSize_, i < numIters_-1, streams);

448

}

449

450

prevFlowX = curFlowX;

451

prevFlowY = curFlowY;

452

}

453

454

flowx = curFlowX;

455

flowy = curFlowY;

456

457

if (!stream)

458

streams[0].waitForCompletion();

459

}

460

}

461

462

Ptr<FarnebackOpticalFlow> cv::cuda::FarnebackOpticalFlow::create(int numLevels, double pyrScale, bool fastPyramids, int winSize,

463

int numIters, int polyN, double polySigma, int flags)

464

{

465

return makePtr<FarnebackOpticalFlowImpl>(numLevels, pyrScale, fastPyramids, winSize,

466

numIters, polyN, polySigma, flags);

467

}

468

469

#endif

Older »