~ubuntu-branches/ubuntu/wily/libde265/wily

Viewing changes to libde265/fallback-motion.cc

Committer: Package Import Robot
Author(s): Joachim Bauch
Date: 2015-07-16 11:07:46 UTC
mfrom: (2.1.2 sid)
Revision ID: package-import@ubuntu.com-20150716110746-76vsv24j3yux7tnu

Tags: 1.0.2-1

* Imported Upstream version 1.0.2
* Added new files to copyright information.
* Only export decoder API and update symbols for new version.

files added:
.pc/disable_tools.patch

.pc/disable_tools.patch/Makefile.am

.pc/disable_tools.patch/dec265

.pc/disable_tools.patch/dec265/Makefile.am

.pc/only_export_decoder_api.patch

.pc/only_export_decoder_api.patch/configure.ac

.pc/only_export_decoder_api.patch/libde265

.pc/only_export_decoder_api.patch/libde265/configparam.h

.pc/only_export_decoder_api.patch/libde265/encoder

.pc/only_export_decoder_api.patch/libde265/encoder/Makefile.am

.pc/only_export_decoder_api.patch/libde265/encoder/algo

.pc/only_export_decoder_api.patch/libde265/encoder/algo/Makefile.am

.pc/only_export_decoder_api.patch/libde265/image-io.cc

.pc/only_export_decoder_api.patch/libde265/image-io.h

.pc/only_export_decoder_api.patch/libde265/quality.h

CMakeLists.txt

TODO

debian/patches

debian/patches/disable_tools.patch

debian/patches/only_export_decoder_api.patch

debian/patches/series

dec265/hdrcopy.cc

enc265

enc265/CMakeLists.txt

enc265/Makefile.am

enc265/Makefile.in

enc265/Makefile.vc7

enc265/enc265.cc

libde265/CMakeLists.txt

libde265/alloc_pool.cc

libde265/alloc_pool.h

libde265/arm

libde265/arm/Makefile.am

libde265/arm/Makefile.in

libde265/arm/arm.cc

libde265/arm/arm.h

libde265/arm/asm.S

libde265/arm/cpudetect.S

libde265/arm/hevcdsp_qpel_neon.S

libde265/arm/neon.S

libde265/configparam.cc

libde265/configparam.h

libde265/contextmodel.cc

libde265/contextmodel.h

libde265/en265.cc

libde265/en265.h

libde265/encoder

libde265/encoder/CMakeLists.txt

libde265/encoder/Makefile.am

libde265/encoder/Makefile.in

libde265/encoder/algo

libde265/encoder/algo/CMakeLists.txt

libde265/encoder/algo/Makefile.am

libde265/encoder/algo/Makefile.in

libde265/encoder/algo/algo.cc

libde265/encoder/algo/algo.h

libde265/encoder/algo/cb-interpartmode.cc

libde265/encoder/algo/cb-interpartmode.h

libde265/encoder/algo/cb-intra-inter.cc

libde265/encoder/algo/cb-intra-inter.h

libde265/encoder/algo/cb-intrapartmode.cc

libde265/encoder/algo/cb-intrapartmode.h

libde265/encoder/algo/cb-mergeindex.cc

libde265/encoder/algo/cb-mergeindex.h

libde265/encoder/algo/cb-skip.cc

libde265/encoder/algo/cb-skip.h

libde265/encoder/algo/cb-split.cc

libde265/encoder/algo/cb-split.h

libde265/encoder/algo/coding-options.cc

libde265/encoder/algo/coding-options.h

libde265/encoder/algo/ctb-qscale.cc

libde265/encoder/algo/ctb-qscale.h

libde265/encoder/algo/pb-mv.cc

libde265/encoder/algo/pb-mv.h

libde265/encoder/algo/tb-intrapredmode.cc

libde265/encoder/algo/tb-intrapredmode.h

libde265/encoder/algo/tb-split.cc

libde265/encoder/algo/tb-split.h

libde265/encoder/analyze.cc

libde265/encoder/analyze.h

libde265/encoder/encode.cc

libde265/encoder/encode.h

libde265/encoder/encoder-context.cc

libde265/encoder/encoder-context.h

libde265/encoder/encoder-params.cc

libde265/encoder/encoder-params.h

libde265/encoder/encpicbuf.cc

libde265/encoder/encpicbuf.h

libde265/encoder/sop.cc

libde265/encoder/sop.h

libde265/image-io.cc

libde265/image-io.h

libde265/quality.cc

libde265/quality.h

libde265/vui.cc

libde265/vui.h

libde265/x86/CMakeLists.txt

m4/ax_cxx_compile_stdcxx_11.m4

tools

tools/Makefile.am

tools/Makefile.in

tools/bjoentegaard.cc

tools/block-rate-estim.cc

tools/gen-entropy-table.cc

tools/rd-curves.cc

tools/tests.cc

tools/yuv-distortion.cc

files removed:
build

build/vc9-x86

build/vc9-x86/make-solutions.bat

m4/visibility.m4

files modified:
.pc/applied-patches

.travis.yml

Makefile.am

Makefile.in

Makefile.vc7

README.md

aclocal.m4

autogen.sh

build.bat

config.h.in

configure

configure.ac

debian/changelog

debian/control

debian/copyright

debian/libde265-0.symbols

dec265/CMakeLists.txt

dec265/Makefile.in

dec265/Makefile.vc7

dec265/dec265.cc

dec265/sdl.cc

dec265/sdl.hh

extra/win32cond.c

libde265/Makefile.am

libde265/Makefile.in

libde265/Makefile.vc7

libde265/acceleration.h

libde265/bitstream.cc

libde265/cabac.cc

libde265/cabac.h

libde265/de265-version.h

libde265/de265.cc

libde265/de265.h

libde265/deblock.cc

libde265/decctx.cc

libde265/decctx.h

libde265/dpb.cc

libde265/dpb.h

libde265/fallback-dct.cc

libde265/fallback-dct.h

libde265/fallback-motion.cc

libde265/fallback-motion.h

libde265/fallback.cc

libde265/image.cc

libde265/image.h

libde265/intrapred.cc

libde265/intrapred.h

libde265/motion.cc

libde265/motion.h

libde265/nal-parser.cc

libde265/nal-parser.h

libde265/nal.cc

libde265/nal.h

libde265/pps.cc

libde265/pps.h

libde265/refpic.cc

libde265/refpic.h

libde265/sao.cc

libde265/sei.cc

libde265/sei.h

libde265/slice.cc

libde265/slice.h

libde265/sps.cc

libde265/sps.h

libde265/threads.cc

libde265/threads.h

libde265/transform.cc

libde265/transform.h

libde265/util.cc

libde265/util.h

libde265/visualize.cc

libde265/vps.cc

libde265/vps.h

libde265/x86/Makefile.am

libde265/x86/Makefile.in

libde265/x86/sse-dct.cc

libde265/x86/sse-dct.h

libde265/x86/sse-motion.cc

libde265/x86/sse-motion.h

libde265/x86/sse.cc

sherlock265/Makefile.in

sherlock265/VideoDecoder.cc

Show diffs side-by-side

added added

removed removed

libde265/fallback-motion.cc

void put_unweighted_pred_8_fallback(uint8_t *dst, ptrdiff_t dststride,

int16_t *src, ptrdiff_t srcstride,

const int16_t *src, ptrdiff_t srcstride,

int width, int height)

{

int offset8bit = 32;

assert((width&1)==0);

for (int y=0;y<height;y++) {

int16_t* in = &src[y*srcstride];

const int16_t* in = &src[y*srcstride];

uint8_t* out = &dst[y*dststride];

for (int x=0;x<width;x+=2) {

void put_weighted_pred_8_fallback(uint8_t *dst, ptrdiff_t dststride,

int16_t *src, ptrdiff_t srcstride,

const int16_t *src, ptrdiff_t srcstride,

int width, int height,

int w,int o,int log2WD)

{

const int rnd = (1<<(log2WD-1));

for (int y=0;y<height;y++) {

int16_t* in = &src[y*srcstride];

const int16_t* in = &src[y*srcstride];

uint8_t* out = &dst[y*dststride];

for (int x=0;x<width;x++) {

}

void put_weighted_bipred_8_fallback(uint8_t *dst, ptrdiff_t dststride,

int16_t *src1, int16_t *src2, ptrdiff_t srcstride,

const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride,

int width, int height,

int w1,int o1, int w2,int o2, int log2WD)

{

const int rnd = ((o1+o2+1) << log2WD);

for (int y=0;y<height;y++) {

int16_t* in1 = &src1[y*srcstride];

int16_t* in2 = &src2[y*srcstride];

const int16_t* in1 = &src1[y*srcstride];

const int16_t* in2 = &src2[y*srcstride];

uint8_t* out = &dst[y*dststride];

for (int x=0;x<width;x++) {

void put_weighted_pred_avg_8_fallback(uint8_t *dst, ptrdiff_t dststride,

int16_t *src1, int16_t *src2,

const int16_t *src1, const int16_t *src2,

ptrdiff_t srcstride, int width,

100

int height)

101

{

144

#endif

145

{

146

for (int y=0;y<height;y++) {

147

int16_t* in1 = &src1[y*srcstride];

148

int16_t* in2 = &src2[y*srcstride];

147

const int16_t* in1 = &src1[y*srcstride];

148

const int16_t* in2 = &src2[y*srcstride];

149

uint8_t* out = &dst[y*dststride];

150

151

for (int x=0;x<width;x+=2) {

159

160

161

162

163

164

void put_unweighted_pred_16_fallback(uint16_t *dst, ptrdiff_t dststride,

165

const int16_t *src, ptrdiff_t srcstride,

166

int width, int height, int bit_depth)

167

{

168

int shift1 = 14-bit_depth;

169

int offset1 = 0;

170

if (shift1>0) { offset1 = 1<<(shift1-1); }

171

172

assert((width&1)==0);

173

174

for (int y=0;y<height;y++) {

175

const int16_t* in = &src[y*srcstride];

176

uint16_t* out = &dst[y*dststride];

177

178

for (int x=0;x<width;x+=2) {

179

out[0] = Clip_BitDepth((in[0] + offset1)>>shift1, bit_depth);

180

out[1] = Clip_BitDepth((in[1] + offset1)>>shift1, bit_depth);

181

out+=2; in+=2;

182

}

183

}

184

}

185

186

#include <stdlib.h>

187

188

void put_weighted_pred_16_fallback(uint16_t *dst, ptrdiff_t dststride,

189

const int16_t *src, ptrdiff_t srcstride,

190

int width, int height,

191

int w,int o,int log2WD, int bit_depth)

192

{

193

assert(log2WD>=1); // TODO

194

195

const int rnd = (1<<(log2WD-1));

196

197

for (int y=0;y<height;y++) {

198

const int16_t* in = &src[y*srcstride];

199

uint16_t* out = &dst[y*dststride];

200

201

for (int x=0;x<width;x++) {

202

out[0] = Clip_BitDepth(((in[0]*w + rnd)>>log2WD) + o, bit_depth);

203

out++; in++;

204

}

205

}

206

}

207

208

void put_weighted_bipred_16_fallback(uint16_t *dst, ptrdiff_t dststride,

209

const int16_t *src1, const int16_t *src2, ptrdiff_t srcstride,

210

int width, int height,

211

int w1,int o1, int w2,int o2, int log2WD, int bit_depth)

212

{

213

assert(log2WD>=1); // TODO

214

215

const int rnd = ((o1+o2+1) << log2WD);

216

217

for (int y=0;y<height;y++) {

218

const int16_t* in1 = &src1[y*srcstride];

219

const int16_t* in2 = &src2[y*srcstride];

220

uint16_t* out = &dst[y*dststride];

221

222

for (int x=0;x<width;x++) {

223

out[0] = Clip_BitDepth((in1[0]*w1 + in2[0]*w2 + rnd)>>(log2WD+1), bit_depth);

224

out++; in1++; in2++;

225

}

226

}

227

}

228

229

230

void put_weighted_pred_avg_16_fallback(uint16_t *dst, ptrdiff_t dststride,

231

const int16_t *src1, const int16_t *src2,

232

ptrdiff_t srcstride, int width,

233

int height, int bit_depth)

234

{

235

int shift2 = 15-bit_depth;

236

int offset2 = 1<<(shift2-1);

237

238

assert((width&1)==0);

239

240

for (int y=0;y<height;y++) {

241

const int16_t* in1 = &src1[y*srcstride];

242

const int16_t* in2 = &src2[y*srcstride];

243

uint16_t* out = &dst[y*dststride];

244

245

for (int x=0;x<width;x+=2) {

246

out[0] = Clip_BitDepth((in1[0] + in2[0] + offset2)>>shift2, bit_depth);

247

out[1] = Clip_BitDepth((in1[1] + in2[1] + offset2)>>shift2, bit_depth);

248

out+=2; in1+=2; in2+=2;

249

}

250

}

251

}

252

253

254

255

256

162

257

void put_epel_8_fallback(int16_t *out, ptrdiff_t out_stride,

163

uint8_t *src, ptrdiff_t src_stride,

258

const uint8_t *src, ptrdiff_t src_stride,

164

259

int width, int height,

165

260

int mx, int my, int16_t* mcbuffer)

166

261

{

168

263

169

264

for (int y=0;y<height;y++) {

170

265

int16_t* o = &out[y*out_stride];

171

uint8_t* i = &src[y*src_stride];

172

173

for (int x=0;x<width;x++) {

174

*o = *i << shift3;

175

o++;

176

i++;

177

}

178

}

179

}

180

181

182

void put_epel_hv_8_fallback(int16_t *dst, ptrdiff_t dst_stride,

183

uint8_t *src, ptrdiff_t src_stride,

184

int nPbWC, int nPbHC,

185

int xFracC, int yFracC, int16_t* mcbuffer)

186

{

187

const int shift1 = 0;

266

const uint8_t* i = &src[y*src_stride];

267

268

for (int x=0;x<width;x++) {

269

*o = *i << shift3;

270

o++;

271

i++;

272

}

273

}

274

}

275

276

277

void put_epel_16_fallback(int16_t *out, ptrdiff_t out_stride,

278

const uint16_t *src, ptrdiff_t src_stride,

279

int width, int height,

280

int mx, int my, int16_t* mcbuffer, int bit_depth)

281

{

282

int shift3 = 14 - bit_depth;

283

284

for (int y=0;y<height;y++) {

285

int16_t* o = &out[y*out_stride];

286

const uint16_t* i = &src[y*src_stride];

287

288

for (int x=0;x<width;x++) {

289

*o = *i << shift3;

290

o++;

291

i++;

292

}

293

}

294

}

295

296

297

template <class pixel_t>

298

void put_epel_hv_fallback(int16_t *dst, ptrdiff_t dst_stride,

299

const pixel_t *src, ptrdiff_t src_stride,

300

int nPbWC, int nPbHC,

301

int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth)

302

{

303

const int shift1 = bit_depth-8;

188

304

const int shift2 = 6;

189

305

//const int shift3 = 6;

190

306

224

340

//printf("---H---(%d)\n",xFracC);

225

341

226

342

for (int y=-extra_top;y<nPbHC+extra_bottom;y++) {

227

uint8_t* p = &src[y*src_stride - extra_left];

343

const pixel_t* p = &src[y*src_stride - extra_left];

228

344

229

345

for (int x=0;x<nPbWC;x++) {

230

346

int16_t v;

241

357

}

242

358

243

359

//printf("%d %d %d %d -> %d\n",p[0],p[1],p[2],p[3],v);

244

360

245

361

tmp2buf[y+extra_top + x*nPbH_extra] = v;

246

362

p++;

247

363

272

388

default:

273

389

case 7: v = (-2*p[0]+10*p[1]+58*p[2]-2*p[3])>>vshift; break;

274

390

}

275

391

276

392

dst[x + y*dst_stride] = v;

277

393

p++;

278

394

}

291

407

}

292

408

293

409

410

template

411

void put_epel_hv_fallback<uint8_t>(int16_t *dst, ptrdiff_t dst_stride,

412

const uint8_t *src, ptrdiff_t src_stride,

413

int nPbWC, int nPbHC,

414

int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth);

415

template

416

void put_epel_hv_fallback<uint16_t>(int16_t *dst, ptrdiff_t dst_stride,

417

const uint16_t *src, ptrdiff_t src_stride,

418

int nPbWC, int nPbHC,

419

int xFracC, int yFracC, int16_t* mcbuffer, int bit_depth);

420

294

421

295

422

296

423

void put_qpel_0_0_fallback(int16_t *out, ptrdiff_t out_stride,

297

uint8_t *src, ptrdiff_t srcstride,

424

const uint8_t *src, ptrdiff_t srcstride,

298

425

int nPbW, int nPbH, int16_t* mcbuffer)

299

426

{

300

427

//const int shift1 = 0; // sps->BitDepth_Y-8;

303

430

// straight copy

304

431

305

432

for (int y=0;y<nPbH;y++) {

306

uint8_t* p = src + srcstride*y;

433

const uint8_t* p = src + srcstride*y;

307

434

int16_t* o = out + out_stride*y;

308

435

309

436

for (int x=0;x<nPbW;x+=4) {

310

#if 0

311

*o = *p << shift2;

312

o++; p++;

313

#else

437

314

438

// does not seem to be faster...

315

439

int16_t o0,o1,o2,o3;

316

440

o0 = p[0] << shift2;

324

448

325

449

o+=4;

326

450

p+=4;

327

#endif

328

451

}

329

452

}

330

453

}

331

454

332

455

456

void put_qpel_0_0_fallback_16(int16_t *out, ptrdiff_t out_stride,

457

const uint16_t *src, ptrdiff_t srcstride,

458

int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth)

459

{

460

//const int shift1 = bit_depth-8;

461

//const int shift2 = 6;

462

const int shift3 = 14-bit_depth;

463

464

// straight copy

465

466

for (int y=0;y<nPbH;y++) {

467

const uint16_t* p = src + srcstride*y;

468

int16_t* o = out + out_stride*y;

469

470

for (int x=0;x<nPbW;x++) {

471

*o++ = *p++ << shift3;

472

}

473

}

474

}

475

476

333

477

334

478

static int extra_before[4] = { 0,3,3,2 };

335

479

static int extra_after [4] = { 0,3,4,4 };

336

480

481

template <class pixel_t>

337

482

void put_qpel_fallback(int16_t *out, ptrdiff_t out_stride,

338

uint8_t *src, ptrdiff_t srcstride,

483

const pixel_t *src, ptrdiff_t srcstride,

339

484

int nPbW, int nPbH, int16_t* mcbuffer,

340

int xFracL, int yFracL)

485

int xFracL, int yFracL, int bit_depth)

341

486

{

342

487

int extra_left = extra_before[xFracL];

343

488

//int extra_right = extra_after [xFracL];

347

492

//int nPbW_extra = extra_left + nPbW + extra_right;

348

493

int nPbH_extra = extra_top + nPbH + extra_bottom;

349

494

350

const int shift1 = 0; // sps->BitDepth_Y-8;

495

const int shift1 = bit_depth-8;

351

496

const int shift2 = 6;

352

497

353

498

356

501

switch (xFracL) {

357

502

case 0:

358

503

for (int y=-extra_top;y<nPbH+extra_bottom;y++) {

359

uint8_t* p = src + srcstride*y - extra_left;

504

const pixel_t* p = src + srcstride*y - extra_left;

360

505

int16_t* o = &mcbuffer[y+extra_top];

361

506

362

507

for (int x=0;x<nPbW;x++) {

368

513

break;

369

514

case 1:

370

515

for (int y=-extra_top;y<nPbH+extra_bottom;y++) {

371

uint8_t* p = src + srcstride*y - extra_left;

516

const pixel_t* p = src + srcstride*y - extra_left;

372

517

int16_t* o = &mcbuffer[y+extra_top];

373

518

374

519

for (int x=0;x<nPbW;x++) {

380

525

break;

381

526

case 2:

382

527

for (int y=-extra_top;y<nPbH+extra_bottom;y++) {

383

uint8_t* p = src + srcstride*y - extra_left;

528

const pixel_t* p = src + srcstride*y - extra_left;

384

529

int16_t* o = &mcbuffer[y+extra_top];

385

530

386

531

for (int x=0;x<nPbW;x++) {

392

537

break;

393

538

case 3:

394

539

for (int y=-extra_top;y<nPbH+extra_bottom;y++) {

395

uint8_t* p = src + srcstride*y - extra_left;

540

const pixel_t* p = src + srcstride*y - extra_left;

396

541

int16_t* o = &mcbuffer[y+extra_top];

397

542

398

543

for (int x=0;x<nPbW;x++) {

421

566

switch (yFracL) {

422

567

case 0:

423

568

for (int x=0;x<nPbW;x++) {

424

int16_t* p = &mcbuffer[x*nPbH_extra];

569

const int16_t* p = &mcbuffer[x*nPbH_extra];

425

570

int16_t* o = &out[x];

426

571

427

572

for (int y=0;y<nPbH;y++) {

428

573

*o = *p;

429

574

o+=out_stride;

433

578

break;

434

579

case 1:

435

580

for (int x=0;x<nPbW;x++) {

436

int16_t* p = &mcbuffer[x*nPbH_extra];

581

const int16_t* p = &mcbuffer[x*nPbH_extra];

437

582

int16_t* o = &out[x];

438

583

439

584

for (int y=0;y<nPbH;y++) {

440

585

*o = (-p[0]+4*p[1]-10*p[2]+58*p[3]+17*p[4] -5*p[5] +p[6])>>vshift;

441

586

o+=out_stride;

445

590

break;

446

591

case 2:

447

592

for (int x=0;x<nPbW;x++) {

448

int16_t* p = &mcbuffer[x*nPbH_extra];

593

const int16_t* p = &mcbuffer[x*nPbH_extra];

449

594

int16_t* o = &out[x];

450

595

451

596

for (int y=0;y<nPbH;y++) {

452

597

*o = (-p[0]+4*p[1]-11*p[2]+40*p[3]+40*p[4]-11*p[5]+4*p[6]-p[7])>>vshift;

453

598

o+=out_stride;

457

602

break;

458

603

case 3:

459

604

for (int x=0;x<nPbW;x++) {

460

int16_t* p = &mcbuffer[x*nPbH_extra];

605

const int16_t* p = &mcbuffer[x*nPbH_extra];

461

606

int16_t* o = &out[x];

462

607

463

608

for (int y=0;y<nPbH;y++) {

464

609

*o = ( p[0]-5*p[1]+17*p[2]+58*p[3]-10*p[4] +4*p[5] -p[6])>>vshift;

465

610

o+=out_stride;

480

625

}

481

626

482

627

628

483

629

#define QPEL(x,y) void put_qpel_ ## x ## _ ## y ## _fallback(int16_t *out, ptrdiff_t out_stride, \

484

uint8_t *src, ptrdiff_t srcstride, \

485

int nPbW, int nPbH, int16_t* mcbuffer) \

486

{ put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y ); }

630

const uint8_t *src, ptrdiff_t srcstride, \

631

int nPbW, int nPbH, int16_t* mcbuffer) \

632

{ put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, 8 ); }

633

634

635

#define QPEL16(x,y) void put_qpel_ ## x ## _ ## y ## _fallback_16(int16_t *out, ptrdiff_t out_stride, \

636

const uint16_t *src, ptrdiff_t srcstride, \

637

int nPbW, int nPbH, int16_t* mcbuffer, int bit_depth) \

638

{ put_qpel_fallback(out,out_stride, src,srcstride, nPbW,nPbH,mcbuffer,x,y, bit_depth ); }

487

639

488

640

/* */ QPEL(0,1) QPEL(0,2) QPEL(0,3)

489

641

QPEL(1,0) QPEL(1,1) QPEL(1,2) QPEL(1,3)

490

642

QPEL(2,0) QPEL(2,1) QPEL(2,2) QPEL(2,3)

491

643

QPEL(3,0) QPEL(3,1) QPEL(3,2) QPEL(3,3)

644

645

/* */ QPEL16(0,1) QPEL16(0,2) QPEL16(0,3)

646

QPEL16(1,0) QPEL16(1,1) QPEL16(1,2) QPEL16(1,3)

647

QPEL16(2,0) QPEL16(2,1) QPEL16(2,2) QPEL16(2,3)

648

QPEL16(3,0) QPEL16(3,1) QPEL16(3,2) QPEL16(3,3)

Older »