~ubuntu-branches/ubuntu/raring/libav/raring-security

0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,

};

static const uint8_t div6[QP_MAX_NUM+1]={

0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9,10,10,10,10,

};

static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {

PIX_FMT_DXVA2_VLD,

PIX_FMT_VAAPI_VLD,

PIX_FMT_YUVJ420P,

PIX_FMT_NONE

};

void ff_h264_write_back_intra_pred_mode(H264Context *h){

int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];

AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4);

mode[4]= h->intra4x4_pred_mode_cache[7+8*3];

mode[5]= h->intra4x4_pred_mode_cache[7+8*2];

mode[6]= h->intra4x4_pred_mode_cache[7+8*1];

}

/**

* checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.

int ff_h264_check_intra4x4_pred_mode(H264Context *h){

MpegEncContext * const s = &h->s;

static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};

static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};

int i;

if(!(h->top_samples_available&0x8000)){

for(i=0; i<4; i++){

int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];

if(status<0){

av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);

return -1;

} else if(status){

h->intra4x4_pred_mode_cache[scan8[0] + i]= status;

}

if((h->left_samples_available&0x8888)!=0x8888){

static const int mask[4]={0x8000,0x2000,0x80,0x20};

for(i=0; i<4; i++){

if(!(h->left_samples_available&mask[i])){

int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];

if(status<0){

av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);

100

return -1;

101

} else if(status){

102

h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;

103

}

104

}

105

}

106

}

107

108

return 0;

109

} //FIXME cleanup like ff_h264_check_intra_pred_mode

110

111

/**

112

* checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.

113

114

int ff_h264_check_intra_pred_mode(H264Context *h, int mode){

115

MpegEncContext * const s = &h->s;

116

static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};

117

static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};

118

119

if(mode > 6U) {

120

av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);

121

return -1;

122

}

123

124

if(!(h->top_samples_available&0x8000)){

125

mode= top[ mode ];

126

if(mode<0){

127

av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);

128

return -1;

129

}

130

}

131

132

if((h->left_samples_available&0x8080) != 0x8080){

133

mode= left[ mode ];

134

if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred

135

mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);

136

}

137

if(mode<0){

138

av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);

139

return -1;

140

}

141

}

142

143

return mode;

144

}

145

146

const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){

147

int i, si, di;

148

uint8_t *dst;

149

int bufidx;

150

151

// src[0]&0x80; //forbidden bit

152

h->nal_ref_idc= src[0]>>5;

153

h->nal_unit_type= src[0]&0x1F;

154

155

src++; length--;

156

157

#if HAVE_FAST_UNALIGNED

158

# if HAVE_FAST_64BIT

159

# define RS 7

160

for(i=0; i+1<length; i+=9){

161

if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))

162

# else

163

# define RS 3

164

for(i=0; i+1<length; i+=5){

165

if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U))

166

# endif

167

continue;

168

if(i>0 && !src[i]) i--;

169

while(src[i]) i++;

170

#else

171

# define RS 0

172

for(i=0; i+1<length; i+=2){

173

if(src[i]) continue;

174

if(i>0 && src[i-1]==0) i--;

175

#endif

176

if(i+2<length && src[i+1]==0 && src[i+2]<=3){

177

if(src[i+2]!=3){

178

/* startcode, so we must be past the end */

179

length=i;

180

}

181

break;

182

}

183

i-= RS;

184

}

185

186

if(i>=length-1){ //no escaped 0

187

*dst_length= length;

188

*consumed= length+1; //+1 for the header

189

return src;

190

}

191

192

bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data

193

av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);

194

dst= h->rbsp_buffer[bufidx];

195

196

if (dst == NULL){

197

return NULL;

198

}

199

200

//printf("decoding esc\n");

201

memcpy(dst, src, i);

202

si=di=i;

203

while(si+2<length){

204

//remove escapes (very rare 1:2^22)

205

if(src[si+2]>3){

206

dst[di++]= src[si++];

207

dst[di++]= src[si++];

208

}else if(src[si]==0 && src[si+1]==0){

209

if(src[si+2]==3){ //escape

210

dst[di++]= 0;

211

dst[di++]= 0;

212

si+=3;

213

continue;

214

}else //next start code

215

goto nsc;

216

}

217

218

dst[di++]= src[si++];

219

}

220

while(si<length)

221

dst[di++]= src[si++];

222

nsc:

223

224

memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);

225

226

*dst_length= di;

227

*consumed= si + 1;//+1 for the header

228

//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)

229

return dst;

230

}

231

232

/**

233

* Identify the exact end of the bitstream

234

* @return the length of the trailing, or 0 if damaged

235

236

static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){

237

int v= *src;

238

int r;

239

240

tprintf(h->s.avctx, "rbsp trailing %X\n", v);

241

242

for(r=1; r<9; r++){

243

if(v&1) return r;

244

v>>=1;

245

}

246

return 0;

247

}

248

249

static inline int get_lowest_part_list_y(H264Context *h, Picture *pic, int n, int height,

250

int y_offset, int list){

251

int raw_my= h->mv_cache[list][ scan8[n] ][1];

252

int filter_height= (raw_my&3) ? 2 : 0;

253

int full_my= (raw_my>>2) + y_offset;

254

int top = full_my - filter_height, bottom = full_my + height + filter_height;

255

256

return FFMAX(abs(top), bottom);

257

}

258

259

static inline void get_lowest_part_y(H264Context *h, int refs[2][48], int n, int height,

260

int y_offset, int list0, int list1, int *nrefs){

261

MpegEncContext * const s = &h->s;

262

int my;

263

264

y_offset += 16*(s->mb_y >> MB_FIELD);

265

266

if(list0){

267

int ref_n = h->ref_cache[0][ scan8[n] ];

268

Picture *ref= &h->ref_list[0][ref_n];

269

270

// Error resilience puts the current picture in the ref list.

271

// Don't try to wait on these as it will cause a deadlock.

272

// Fields can wait on each other, though.

273

if(ref->thread_opaque != s->current_picture.thread_opaque ||

274

(ref->reference&3) != s->picture_structure) {

275

my = get_lowest_part_list_y(h, ref, n, height, y_offset, 0);

276

if (refs[0][ref_n] < 0) nrefs[0] += 1;

277

refs[0][ref_n] = FFMAX(refs[0][ref_n], my);

278

}

279

}

280

281

if(list1){

282

int ref_n = h->ref_cache[1][ scan8[n] ];

283

Picture *ref= &h->ref_list[1][ref_n];

284

285

if(ref->thread_opaque != s->current_picture.thread_opaque ||

286

(ref->reference&3) != s->picture_structure) {

287

my = get_lowest_part_list_y(h, ref, n, height, y_offset, 1);

288

if (refs[1][ref_n] < 0) nrefs[1] += 1;

289

refs[1][ref_n] = FFMAX(refs[1][ref_n], my);

290

}

291

}

292

}

293

294

/**

295

* Wait until all reference frames are available for MC operations.

296

297

* @param h the H264 context

298

299

static void await_references(H264Context *h){

300

MpegEncContext * const s = &h->s;

301

const int mb_xy= h->mb_xy;

302

const int mb_type= s->current_picture.mb_type[mb_xy];

303

int refs[2][48];

304

int nrefs[2] = {0};

305

int ref, list;

306

307

memset(refs, -1, sizeof(refs));

308

309

if(IS_16X16(mb_type)){

310

get_lowest_part_y(h, refs, 0, 16, 0,

311

IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);

312

}else if(IS_16X8(mb_type)){

313

get_lowest_part_y(h, refs, 0, 8, 0,

314

IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);

315

get_lowest_part_y(h, refs, 8, 8, 8,

316

IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);

317

}else if(IS_8X16(mb_type)){

318

get_lowest_part_y(h, refs, 0, 16, 0,

319

IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1), nrefs);

320

get_lowest_part_y(h, refs, 4, 16, 0,

321

IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1), nrefs);

322

}else{

323

int i;

324

325

assert(IS_8X8(mb_type));

326

327

for(i=0; i<4; i++){

328

const int sub_mb_type= h->sub_mb_type[i];

329

const int n= 4*i;

330

int y_offset= (i&2)<<2;

331

332

if(IS_SUB_8X8(sub_mb_type)){

333

get_lowest_part_y(h, refs, n , 8, y_offset,

334

IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);

335

}else if(IS_SUB_8X4(sub_mb_type)){

336

get_lowest_part_y(h, refs, n , 4, y_offset,

337

IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);

338

get_lowest_part_y(h, refs, n+2, 4, y_offset+4,

339

IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);

340

}else if(IS_SUB_4X8(sub_mb_type)){

341

get_lowest_part_y(h, refs, n , 8, y_offset,

342

IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);

343

get_lowest_part_y(h, refs, n+1, 8, y_offset,

344

IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);

345

}else{

346

int j;

347

assert(IS_SUB_4X4(sub_mb_type));

348

for(j=0; j<4; j++){

349

int sub_y_offset= y_offset + 2*(j&2);

350

get_lowest_part_y(h, refs, n+j, 4, sub_y_offset,

351

IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1), nrefs);

352

}

353

}

354

}

355

}

356

357

for(list=h->list_count-1; list>=0; list--){

358

for(ref=0; ref<48 && nrefs[list]; ref++){

359

int row = refs[list][ref];

360

if(row >= 0){

361

Picture *ref_pic = &h->ref_list[list][ref];

362

int ref_field = ref_pic->reference - 1;

363

int ref_field_picture = ref_pic->field_picture;

364

int pic_height = 16*s->mb_height >> ref_field_picture;

365

366

row <<= MB_MBAFF;

367

nrefs[list]--;

368

369

if(!FIELD_PICTURE && ref_field_picture){ // frame referencing two fields

370

ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) - !(row&1), pic_height-1), 1);

371

ff_thread_await_progress((AVFrame*)ref_pic, FFMIN((row >> 1) , pic_height-1), 0);

372

}else if(FIELD_PICTURE && !ref_field_picture){ // field referencing one field of a frame

373

ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row*2 + ref_field , pic_height-1), 0);

374

}else if(FIELD_PICTURE){

375

ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), ref_field);

376

}else{

377

ff_thread_await_progress((AVFrame*)ref_pic, FFMIN(row, pic_height-1), 0);

378

}

379

}

380

}

381

}

382

}

383

384

#if 0

385

/**

386

* DCT transforms the 16 dc values.

387

* @param qp quantization parameter ??? FIXME

388

389

static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){

390

// const int qmul= dequant_coeff[qp][0];

391

int i;

392

int temp[16]; //FIXME check if this is a good idea

393

static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};

394

static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};

395

396

for(i=0; i<4; i++){

397

const int offset= y_offset[i];

398

const int z0= block[offset+stride*0] + block[offset+stride*4];

399

const int z1= block[offset+stride*0] - block[offset+stride*4];

400

const int z2= block[offset+stride*1] - block[offset+stride*5];

401

const int z3= block[offset+stride*1] + block[offset+stride*5];

402

403

temp[4*i+0]= z0+z3;

404

temp[4*i+1]= z1+z2;

405

temp[4*i+2]= z1-z2;

406

temp[4*i+3]= z0-z3;

407

}

408

409

for(i=0; i<4; i++){

410

const int offset= x_offset[i];

411

const int z0= temp[4*0+i] + temp[4*2+i];

412

const int z1= temp[4*0+i] - temp[4*2+i];

413

const int z2= temp[4*1+i] - temp[4*3+i];

414

const int z3= temp[4*1+i] + temp[4*3+i];

415

416

block[stride*0 +offset]= (z0 + z3)>>1;

417

block[stride*2 +offset]= (z1 + z2)>>1;

418

block[stride*8 +offset]= (z1 - z2)>>1;

419

block[stride*10+offset]= (z0 - z3)>>1;

420

}

421

}

422

#endif

423

424

#undef xStride

425

#undef stride

426

427

#if 0

428

static void chroma_dc_dct_c(DCTELEM *block){

429

const int stride= 16*2;

430

const int xStride= 16;

431

int a,b,c,d,e;

432

433

a= block[stride*0 + xStride*0];

434

b= block[stride*0 + xStride*1];

435

c= block[stride*1 + xStride*0];

436

d= block[stride*1 + xStride*1];

437

438

e= a-b;

439

a= a+b;

440

b= c-d;

441

c= c+d;

442

443

block[stride*0 + xStride*0]= (a+c);

444

block[stride*0 + xStride*1]= (e+b);

445

block[stride*1 + xStride*0]= (a-c);

446

block[stride*1 + xStride*1]= (e-b);

447

}

448

#endif

449

450

static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,

451

uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,

452

int src_x_offset, int src_y_offset,

453

qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op,

454

int pixel_shift, int chroma444){

455

MpegEncContext * const s = &h->s;

456

const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;

457

int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;

458

const int luma_xy= (mx&3) + ((my&3)<<2);

459

int offset = ((mx>>2) << pixel_shift) + (my>>2)*h->mb_linesize;

460

uint8_t * src_y = pic->data[0] + offset;

461

uint8_t * src_cb, * src_cr;

462

int extra_width= h->emu_edge_width;

463

int extra_height= h->emu_edge_height;

464

int emu=0;

465

const int full_mx= mx>>2;

466

const int full_my= my>>2;

467

const int pic_width = 16*s->mb_width;

468

const int pic_height = 16*s->mb_height >> MB_FIELD;

469

470

if(mx&7) extra_width -= 3;

471

if(my&7) extra_height -= 3;

472

473

if( full_mx < 0-extra_width

474

|| full_my < 0-extra_height

475

|| full_mx + 16/*FIXME*/ > pic_width + extra_width

476

|| full_my + 16/*FIXME*/ > pic_height + extra_height){

477

s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);

478

src_y= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;

479

emu=1;

480

}

481

482

qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?

483

if(!square){

484

qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);

485

}

486

487

if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;

488

489

if(chroma444){

490

src_cb = pic->data[1] + offset;

491

if(emu){

492

s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,

493

16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);

494

src_cb= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;

495

}

496

qpix_op[luma_xy](dest_cb, src_cb, h->mb_linesize); //FIXME try variable height perhaps?

497

if(!square){

498

qpix_op[luma_xy](dest_cb + delta, src_cb + delta, h->mb_linesize);

499

}

500

501

src_cr = pic->data[2] + offset;

502

if(emu){

503

s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr - (2 << pixel_shift) - 2*h->mb_linesize, h->mb_linesize,

504

16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);

505

src_cr= s->edge_emu_buffer + (2 << pixel_shift) + 2*h->mb_linesize;

506

}

507

qpix_op[luma_xy](dest_cr, src_cr, h->mb_linesize); //FIXME try variable height perhaps?

508

if(!square){

509

qpix_op[luma_xy](dest_cr + delta, src_cr + delta, h->mb_linesize);

510

}

511

return;

512

}

513

514

if(MB_FIELD){

515

// chroma offset when predicting from a field of opposite parity

516

my += 2 * ((s->mb_y & 1) - (pic->reference - 1));

517

emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);

518

}

519

src_cb= pic->data[1] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;

520

src_cr= pic->data[2] + ((mx>>3) << pixel_shift) + (my>>3)*h->mb_uvlinesize;

521

522

if(emu){

523

s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);

524

src_cb= s->edge_emu_buffer;

525

}

526

chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);

527

528

if(emu){

529

s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);

530

src_cr= s->edge_emu_buffer;

531

}

532

chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);

533

}

534

535

static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,

536

uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,

537

int x_offset, int y_offset,

538

qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,

539

qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,

540

int list0, int list1, int pixel_shift, int chroma444){

541

MpegEncContext * const s = &h->s;

542

qpel_mc_func *qpix_op= qpix_put;

543

h264_chroma_mc_func chroma_op= chroma_put;

544

545

dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;

546

if(chroma444){

547

dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;

548

dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;

549

}else{

550

dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;

551

dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;

552

}

553

x_offset += 8*s->mb_x;

554

y_offset += 8*(s->mb_y >> MB_FIELD);

555

556

if(list0){

557

Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];

558

mc_dir_part(h, ref, n, square, chroma_height, delta, 0,

559

dest_y, dest_cb, dest_cr, x_offset, y_offset,

560

qpix_op, chroma_op, pixel_shift, chroma444);

561

562

qpix_op= qpix_avg;

563

chroma_op= chroma_avg;

564

}

565

566

if(list1){

567

Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];

568

mc_dir_part(h, ref, n, square, chroma_height, delta, 1,

569

dest_y, dest_cb, dest_cr, x_offset, y_offset,

570

qpix_op, chroma_op, pixel_shift, chroma444);

571

}

572

}

573

574

static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,

575

uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,

576

int x_offset, int y_offset,

577

qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,

578

h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,

579

h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,

580

int list0, int list1, int pixel_shift, int chroma444){

581

MpegEncContext * const s = &h->s;

582

583

dest_y += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;

584

if(chroma444){

585

chroma_weight_avg = luma_weight_avg;

586

chroma_weight_op = luma_weight_op;

587

dest_cb += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;

588

dest_cr += (2*x_offset << pixel_shift) + 2*y_offset*h->mb_linesize;

589

}else{

590

dest_cb += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;

591

dest_cr += ( x_offset << pixel_shift) + y_offset*h->mb_uvlinesize;

592

}

593

x_offset += 8*s->mb_x;

594

y_offset += 8*(s->mb_y >> MB_FIELD);

595

596

if(list0 && list1){

597

/* don't optimize for luma-only case, since B-frames usually

598

* use implicit weights => chroma too. */

599

uint8_t *tmp_cb = s->obmc_scratchpad;

600

uint8_t *tmp_cr = s->obmc_scratchpad + (16 << pixel_shift);

601

uint8_t *tmp_y = s->obmc_scratchpad + 16*h->mb_uvlinesize;

602

int refn0 = h->ref_cache[0][ scan8[n] ];

603

int refn1 = h->ref_cache[1][ scan8[n] ];

604

605

mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,

606

dest_y, dest_cb, dest_cr,

607

x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);

608

mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,

609

tmp_y, tmp_cb, tmp_cr,

610

x_offset, y_offset, qpix_put, chroma_put, pixel_shift, chroma444);

611

612

if(h->use_weight == 2){

613

int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];

614

int weight1 = 64 - weight0;

615

luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);

616

chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);

617

chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);

618

}else{

619

luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,

620

h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],

621

h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);

622

chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,

623

h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],

624

h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);

625

chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,

626

h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],

627

h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);

628

}

629

}else{

630

int list = list1 ? 1 : 0;

631

int refn = h->ref_cache[list][ scan8[n] ];

632

Picture *ref= &h->ref_list[list][refn];

633

mc_dir_part(h, ref, n, square, chroma_height, delta, list,

634

dest_y, dest_cb, dest_cr, x_offset, y_offset,

635

qpix_put, chroma_put, pixel_shift, chroma444);

636

637

luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,

638

h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);

639

if(h->use_weight_chroma){

640

chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,

641

h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);

642

chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,

643

h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);

644

}

645

}

646

}

647

648

static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,

649

uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,

650

int x_offset, int y_offset,

651

qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,

652

qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,

653

h264_weight_func *weight_op, h264_biweight_func *weight_avg,

654

int list0, int list1, int pixel_shift, int chroma444){

655

if((h->use_weight==2 && list0 && list1

656

&& (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))

657

|| h->use_weight==1)

658

mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,

659

x_offset, y_offset, qpix_put, chroma_put,

660

weight_op[0], weight_op[3], weight_avg[0],

661

weight_avg[3], list0, list1, pixel_shift, chroma444);

662

else

663

mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,

664

x_offset, y_offset, qpix_put, chroma_put, qpix_avg,

665

chroma_avg, list0, list1, pixel_shift, chroma444);

666

}

667

668

static inline void prefetch_motion(H264Context *h, int list, int pixel_shift, int chroma444){

669

/* fetch pixels for estimated mv 4 macroblocks ahead

670

* optimized for 64byte cache lines */

671

MpegEncContext * const s = &h->s;

672

const int refn = h->ref_cache[list][scan8[0]];

673

if(refn >= 0){

674

const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;

675

const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;

676

uint8_t **src= h->ref_list[list][refn].data;

677

int off= (mx << pixel_shift) + (my + (s->mb_x&3)*4)*h->mb_linesize + (64 << pixel_shift);

678

s->dsp.prefetch(src[0]+off, s->linesize, 4);

679

if(chroma444){

680

s->dsp.prefetch(src[1]+off, s->linesize, 4);

681

s->dsp.prefetch(src[2]+off, s->linesize, 4);

682

}else{

683

off= ((mx>>1) << pixel_shift) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + (64 << pixel_shift);

684

s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);

685

}

686

}

687

}

688

689

static av_always_inline void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,

690

qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),

691

qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),

692

h264_weight_func *weight_op, h264_biweight_func *weight_avg,

693

int pixel_shift, int chroma444){

694

MpegEncContext * const s = &h->s;

695

const int mb_xy= h->mb_xy;

696

const int mb_type= s->current_picture.mb_type[mb_xy];

697

698

assert(IS_INTER(mb_type));

699

700

if(HAVE_PTHREADS && (s->avctx->active_thread_type & FF_THREAD_FRAME))

701

await_references(h);

702

prefetch_motion(h, 0, pixel_shift, chroma444);

703

704

if(IS_16X16(mb_type)){

705

mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,

706

qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],

707

weight_op, weight_avg,

708

IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),

709

pixel_shift, chroma444);

710

}else if(IS_16X8(mb_type)){

711

mc_part(h, 0, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 0,

712

qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],

713

&weight_op[1], &weight_avg[1],

714

IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),

715

pixel_shift, chroma444);

716

mc_part(h, 8, 0, 4, 8 << pixel_shift, dest_y, dest_cb, dest_cr, 0, 4,

717

qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],

718

&weight_op[1], &weight_avg[1],

719

IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),

720

pixel_shift, chroma444);

721

}else if(IS_8X16(mb_type)){

722

mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,

723

qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],

724

&weight_op[2], &weight_avg[2],

725

IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1),

726

pixel_shift, chroma444);

727

mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,

728

qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],

729

&weight_op[2], &weight_avg[2],

730

IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1),

731

pixel_shift, chroma444);

732

}else{

733

int i;

734

735

assert(IS_8X8(mb_type));

736

737

for(i=0; i<4; i++){

738

const int sub_mb_type= h->sub_mb_type[i];

739

const int n= 4*i;

740

int x_offset= (i&1)<<2;

741

int y_offset= (i&2)<<1;

742

743

if(IS_SUB_8X8(sub_mb_type)){

744

mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,

745

qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],

746

&weight_op[3], &weight_avg[3],

747

IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),

748

pixel_shift, chroma444);

749

}else if(IS_SUB_8X4(sub_mb_type)){

750

mc_part(h, n , 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset,

751

qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],

752

&weight_op[4], &weight_avg[4],

753

IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),

754

pixel_shift, chroma444);

755

mc_part(h, n+2, 0, 2, 4 << pixel_shift, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,

756

qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],

757

&weight_op[4], &weight_avg[4],

758

IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),

759

pixel_shift, chroma444);

760

}else if(IS_SUB_4X8(sub_mb_type)){

761

mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,

762

qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],

763

&weight_op[5], &weight_avg[5],

764

IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),

765

pixel_shift, chroma444);

766

mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,

767

qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],

768

&weight_op[5], &weight_avg[5],

769

IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),

770

pixel_shift, chroma444);

771

}else{

772

int j;

773

assert(IS_SUB_4X4(sub_mb_type));

774

for(j=0; j<4; j++){

775

int sub_x_offset= x_offset + 2*(j&1);

776

int sub_y_offset= y_offset + (j&2);

777

mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,

778

qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],

779

&weight_op[6], &weight_avg[6],

780

IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1),

781

pixel_shift, chroma444);

782

}

783

}

784

}

785

}

786

787

prefetch_motion(h, 1, pixel_shift, chroma444);

788

}

789

790

#define hl_motion_fn(sh, bits) \

791

static av_always_inline void hl_motion_ ## bits(H264Context *h, \

792

uint8_t *dest_y, \

793

uint8_t *dest_cb, uint8_t *dest_cr, \

794

qpel_mc_func (*qpix_put)[16], \

795

h264_chroma_mc_func (*chroma_put), \

796

qpel_mc_func (*qpix_avg)[16], \

797

h264_chroma_mc_func (*chroma_avg), \

798

h264_weight_func *weight_op, \

799

h264_biweight_func *weight_avg, \

800

int chroma444) \

801

{ \

802

hl_motion(h, dest_y, dest_cb, dest_cr, qpix_put, chroma_put, \

803

qpix_avg, chroma_avg, weight_op, weight_avg, sh, chroma444); \

804

}

805

hl_motion_fn(0, 8);

806

hl_motion_fn(1, 16);

807

808

static void free_tables(H264Context *h, int free_rbsp){

809

int i;

810

H264Context *hx;

811

812

av_freep(&h->intra4x4_pred_mode);

813

av_freep(&h->chroma_pred_mode_table);

814

av_freep(&h->cbp_table);

815

av_freep(&h->mvd_table[0]);

816

av_freep(&h->mvd_table[1]);

817

av_freep(&h->direct_table);

818

av_freep(&h->non_zero_count);

819

av_freep(&h->slice_table_base);

820

h->slice_table= NULL;

821

av_freep(&h->list_counts);

822

823

av_freep(&h->mb2b_xy);

824

av_freep(&h->mb2br_xy);

825

826

for(i = 0; i < MAX_THREADS; i++) {

827

hx = h->thread_context[i];

828

if(!hx) continue;

829

av_freep(&hx->top_borders[1]);

830

av_freep(&hx->top_borders[0]);

831

av_freep(&hx->s.obmc_scratchpad);

832

if (free_rbsp){

833

av_freep(&hx->rbsp_buffer[1]);

834

av_freep(&hx->rbsp_buffer[0]);

835

hx->rbsp_buffer_size[0] = 0;

836

hx->rbsp_buffer_size[1] = 0;

837

}

838

if (i) av_freep(&h->thread_context[i]);

839

}

840

}

841

842

static void init_dequant8_coeff_table(H264Context *h){

843

int i,j,q,x;

844

const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);

845

846

for(i=0; i<6; i++ ){

847

h->dequant8_coeff[i] = h->dequant8_buffer[i];

848

for(j=0; j<i; j++){

849

if(!memcmp(h->pps.scaling_matrix8[j], h->pps.scaling_matrix8[i], 64*sizeof(uint8_t))){

850

h->dequant8_coeff[i] = h->dequant8_buffer[j];

851

break;

852

}

853

}

854

if(j<i)

855

continue;

856

857

for(q=0; q<max_qp+1; q++){

858

int shift = div6[q];

859

int idx = rem6[q];

860

for(x=0; x<64; x++)

861

h->dequant8_coeff[i][q][(x>>3)|((x&7)<<3)] =

862

((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *

863

h->pps.scaling_matrix8[i][x]) << shift;

864

}

865

}

866

}

867

868

static void init_dequant4_coeff_table(H264Context *h){

869

int i,j,q,x;

870

const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);

871

for(i=0; i<6; i++ ){

872

h->dequant4_coeff[i] = h->dequant4_buffer[i];

873

for(j=0; j<i; j++){

874

if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){

875

h->dequant4_coeff[i] = h->dequant4_buffer[j];

876

break;

877

}

878

}

879

if(j<i)

880

continue;

881

882

for(q=0; q<max_qp+1; q++){

883

int shift = div6[q] + 2;

884

int idx = rem6[q];

885

for(x=0; x<16; x++)

886

h->dequant4_coeff[i][q][(x>>2)|((x<<2)&0xF)] =

887

((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *

888

h->pps.scaling_matrix4[i][x]) << shift;

889

}

890

}

891

}

892

893

static void init_dequant_tables(H264Context *h){

894

int i,x;

895

init_dequant4_coeff_table(h);

896

if(h->pps.transform_8x8_mode)

897

init_dequant8_coeff_table(h);

898

if(h->sps.transform_bypass){

899

for(i=0; i<6; i++)

900

for(x=0; x<16; x++)

901

h->dequant4_coeff[i][0][x] = 1<<6;

902

if(h->pps.transform_8x8_mode)

903

for(i=0; i<6; i++)

904

for(x=0; x<64; x++)

905

h->dequant8_coeff[i][0][x] = 1<<6;

906

}

907

}

908

909

910

int ff_h264_alloc_tables(H264Context *h){

911

MpegEncContext * const s = &h->s;

912

const int big_mb_num= s->mb_stride * (s->mb_height+1);

913

const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count;

914

int x,y;

915

916

FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8 * sizeof(uint8_t), fail)

917

918

FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 48 * sizeof(uint8_t), fail)

919

FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)

920

FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)

921

922

FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)

923

FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail);

924

FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail);

925

FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail);

926

FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)

927

928

memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));

929

h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;

930

931

FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail);

932

FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);

933

for(y=0; y<s->mb_height; y++){

934

for(x=0; x<s->mb_width; x++){

935

const int mb_xy= x + y*s->mb_stride;

936

const int b_xy = 4*x + 4*y*h->b_stride;

937

938

h->mb2b_xy [mb_xy]= b_xy;

939

h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));

940

}

941

}

942

943

s->obmc_scratchpad = NULL;

944

945

if(!h->dequant4_coeff[0])

946

init_dequant_tables(h);

947

948

return 0;

949

fail:

950

free_tables(h, 1);

951

return -1;

952

}

953

954

/**

955

* Mimic alloc_tables(), but for every context thread.

956

957

static void clone_tables(H264Context *dst, H264Context *src, int i){

958

MpegEncContext * const s = &src->s;

959

dst->intra4x4_pred_mode = src->intra4x4_pred_mode + i*8*2*s->mb_stride;

960

dst->non_zero_count = src->non_zero_count;

961

dst->slice_table = src->slice_table;

962

dst->cbp_table = src->cbp_table;

963

dst->mb2b_xy = src->mb2b_xy;

964

dst->mb2br_xy = src->mb2br_xy;

965

dst->chroma_pred_mode_table = src->chroma_pred_mode_table;

966

dst->mvd_table[0] = src->mvd_table[0] + i*8*2*s->mb_stride;

967

dst->mvd_table[1] = src->mvd_table[1] + i*8*2*s->mb_stride;

968

dst->direct_table = src->direct_table;

969

dst->list_counts = src->list_counts;

970

971

dst->s.obmc_scratchpad = NULL;

972

ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma);

973

}

974

975

/**

976

* Init context

977

* Allocate buffers which are not shared amongst multiple threads.

978

979

static int context_init(H264Context *h){

980

FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)

981

FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * 16*3 * sizeof(uint8_t)*2, fail)

982

983

h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =

984

h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;

985

986

return 0;

987

fail:

988

return -1; // free_tables will clean up for us

989

}

990

991

static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size);

992

993

static av_cold void common_init(H264Context *h){

994

MpegEncContext * const s = &h->s;

995

996

s->width = s->avctx->width;

997

s->height = s->avctx->height;

998

s->codec_id= s->avctx->codec->id;

999

1000

ff_h264dsp_init(&h->h264dsp, 8);

1001

ff_h264_pred_init(&h->hpc, s->codec_id, 8);

1002

1003

h->dequant_coeff_pps= -1;

1004

s->unrestricted_mv=1;

1005

s->decode=1; //FIXME

1006

1007

dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early

1008

1009

memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));

1010

memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));

1011

}

1012

1013

int ff_h264_decode_extradata(H264Context *h)

1014

{

1015

AVCodecContext *avctx = h->s.avctx;

1016

1017

if(avctx->extradata[0] == 1){

1018

int i, cnt, nalsize;

1019

unsigned char *p = avctx->extradata;

1020

1021

h->is_avc = 1;

1022

1023

if(avctx->extradata_size < 7) {

1024

av_log(avctx, AV_LOG_ERROR, "avcC too short\n");

1025

return -1;

1026

}

1027

/* sps and pps in the avcC always have length coded with 2 bytes,

1028

so put a fake nal_length_size = 2 while parsing them */

1029

h->nal_length_size = 2;

1030

// Decode sps from avcC

1031

cnt = *(p+5) & 0x1f; // Number of sps

1032

p += 6;

1033

for (i = 0; i < cnt; i++) {

1034

nalsize = AV_RB16(p) + 2;

1035

if(decode_nal_units(h, p, nalsize) < 0) {

1036

av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);

1037

return -1;

1038

}

1039

p += nalsize;

1040

}

1041

// Decode pps from avcC

1042

cnt = *(p++); // Number of pps

1043

for (i = 0; i < cnt; i++) {

1044

nalsize = AV_RB16(p) + 2;

1045

if (decode_nal_units(h, p, nalsize) < 0) {

1046

av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);

1047

return -1;

1048

}

1049

p += nalsize;

1050

}

1051

// Now store right nal length size, that will be use to parse all other nals

1052

h->nal_length_size = (avctx->extradata[4] & 0x03) + 1;

1053

} else {

1054

h->is_avc = 0;

1055

if(decode_nal_units(h, avctx->extradata, avctx->extradata_size) < 0)

1056

return -1;

1057

}

1058

return 0;

1059

}

1060

1061

av_cold int ff_h264_decode_init(AVCodecContext *avctx){

1062

H264Context *h= avctx->priv_data;

1063

MpegEncContext * const s = &h->s;

1064

1065

MPV_decode_defaults(s);

1066

1067

s->avctx = avctx;

1068

common_init(h);

1069

1070

s->out_format = FMT_H264;

1071

s->workaround_bugs= avctx->workaround_bugs;

1072

1073

// set defaults

1074

// s->decode_mb= ff_h263_decode_mb;

1075

s->quarter_sample = 1;

1076

if(!avctx->has_b_frames)

1077

s->low_delay= 1;

1078

1079

avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;

1080

1081

ff_h264_decode_init_vlc();

1082

1083

h->pixel_shift = 0;

1084

h->sps.bit_depth_luma = avctx->bits_per_raw_sample = 8;

1085

1086

h->thread_context[0] = h;

1087

h->outputed_poc = h->next_outputed_poc = INT_MIN;

1088

h->prev_poc_msb= 1<<16;

1089

h->x264_build = -1;

1090

ff_h264_reset_sei(h);

1091

if(avctx->codec_id == CODEC_ID_H264){

1092

if(avctx->ticks_per_frame == 1){

1093

s->avctx->time_base.den *=2;

1094

}

1095

avctx->ticks_per_frame = 2;

1096

}

1097

1098

if(avctx->extradata_size > 0 && avctx->extradata &&

1099

ff_h264_decode_extradata(h))

1100

return -1;

1101

1102

if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){

1103

s->avctx->has_b_frames = h->sps.num_reorder_frames;

1104

s->low_delay = 0;

1105

}

1106

1107

return 0;

1108

}

1109

1110

#define IN_RANGE(a, b, size) (((a) >= (b)) && ((a) < ((b)+(size))))

1111

static void copy_picture_range(Picture **to, Picture **from, int count, MpegEncContext *new_base, MpegEncContext *old_base)

1112

{

1113

int i;

1114

1115

for (i=0; i<count; i++){

1116

assert((IN_RANGE(from[i], old_base, sizeof(*old_base)) ||

1117

IN_RANGE(from[i], old_base->picture, sizeof(Picture) * old_base->picture_count) ||

1118

!from[i]));

1119

to[i] = REBASE_PICTURE(from[i], new_base, old_base);

1120

}

1121

}

1122

1123

static void copy_parameter_set(void **to, void **from, int count, int size)

1124

{

1125

int i;

1126

1127

for (i=0; i<count; i++){

1128

if (to[i] && !from[i]) av_freep(&to[i]);

1129

else if (from[i] && !to[i]) to[i] = av_malloc(size);

1130

1131

if (from[i]) memcpy(to[i], from[i], size);

1132

}

1133

}

1134

1135

static int decode_init_thread_copy(AVCodecContext *avctx){

1136

H264Context *h= avctx->priv_data;

1137

1138

if (!avctx->is_copy) return 0;

1139

memset(h->sps_buffers, 0, sizeof(h->sps_buffers));

1140

memset(h->pps_buffers, 0, sizeof(h->pps_buffers));

1141

1142

return 0;

1143

}

1144

1145

#define copy_fields(to, from, start_field, end_field) memcpy(&to->start_field, &from->start_field, (char*)&to->end_field - (char*)&to->start_field)

1146

static int decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src){

1147

H264Context *h= dst->priv_data, *h1= src->priv_data;

1148

MpegEncContext * const s = &h->s, * const s1 = &h1->s;

1149

int inited = s->context_initialized, err;

1150

int i;

1151

1152

if(dst == src || !s1->context_initialized) return 0;

1153

1154

err = ff_mpeg_update_thread_context(dst, src);

1155

if(err) return err;

1156

1157

//FIXME handle width/height changing

1158

if(!inited){

1159

for(i = 0; i < MAX_SPS_COUNT; i++)

1160

av_freep(h->sps_buffers + i);

1161

1162

for(i = 0; i < MAX_PPS_COUNT; i++)

1163

av_freep(h->pps_buffers + i);

1164

1165

memcpy(&h->s + 1, &h1->s + 1, sizeof(H264Context) - sizeof(MpegEncContext)); //copy all fields after MpegEnc

1166

memset(h->sps_buffers, 0, sizeof(h->sps_buffers));

1167

memset(h->pps_buffers, 0, sizeof(h->pps_buffers));

1168

ff_h264_alloc_tables(h);

1169

context_init(h);

1170

1171

for(i=0; i<2; i++){

1172

h->rbsp_buffer[i] = NULL;

1173

h->rbsp_buffer_size[i] = 0;

1174

}

1175

1176

h->thread_context[0] = h;

1177

1178

// frame_start may not be called for the next thread (if it's decoding a bottom field)

1179

// so this has to be allocated here

1180

h->s.obmc_scratchpad = av_malloc(16*6*s->linesize);

1181

1182

s->dsp.clear_blocks(h->mb);

1183

s->dsp.clear_blocks(h->mb+(24*16<<h->pixel_shift));

1184

}

1185

1186

//extradata/NAL handling

1187

h->is_avc = h1->is_avc;

1188

1189

//SPS/PPS

1190

copy_parameter_set((void**)h->sps_buffers, (void**)h1->sps_buffers, MAX_SPS_COUNT, sizeof(SPS));

1191

h->sps = h1->sps;

1192

copy_parameter_set((void**)h->pps_buffers, (void**)h1->pps_buffers, MAX_PPS_COUNT, sizeof(PPS));

1193

h->pps = h1->pps;

1194

1195

//Dequantization matrices

1196

//FIXME these are big - can they be only copied when PPS changes?

1197

copy_fields(h, h1, dequant4_buffer, dequant4_coeff);

1198

1199

for(i=0; i<6; i++)

1200

h->dequant4_coeff[i] = h->dequant4_buffer[0] + (h1->dequant4_coeff[i] - h1->dequant4_buffer[0]);

1201

1202

for(i=0; i<6; i++)

1203

h->dequant8_coeff[i] = h->dequant8_buffer[0] + (h1->dequant8_coeff[i] - h1->dequant8_buffer[0]);

1204

1205

h->dequant_coeff_pps = h1->dequant_coeff_pps;

1206

1207

//POC timing

1208

copy_fields(h, h1, poc_lsb, redundant_pic_count);

1209

1210

//reference lists

1211

copy_fields(h, h1, ref_count, list_count);

1212

copy_fields(h, h1, ref_list, intra_gb);

1213

copy_fields(h, h1, short_ref, cabac_init_idc);

1214

1215

copy_picture_range(h->short_ref, h1->short_ref, 32, s, s1);

1216

copy_picture_range(h->long_ref, h1->long_ref, 32, s, s1);

1217

copy_picture_range(h->delayed_pic, h1->delayed_pic, MAX_DELAYED_PIC_COUNT+2, s, s1);

1218

1219

h->last_slice_type = h1->last_slice_type;

1220

1221

if(!s->current_picture_ptr) return 0;

1222

1223

if(!s->dropable) {

1224

ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);

1225

h->prev_poc_msb = h->poc_msb;

1226

h->prev_poc_lsb = h->poc_lsb;

1227

}

1228

h->prev_frame_num_offset= h->frame_num_offset;

1229

h->prev_frame_num = h->frame_num;

1230

h->outputed_poc = h->next_outputed_poc;

1231

1232

return 0;

1233

}

1234

1235

int ff_h264_frame_start(H264Context *h){

1236

MpegEncContext * const s = &h->s;

1237

int i;

1238

const int pixel_shift = h->pixel_shift;

1239

int thread_count = (s->avctx->active_thread_type & FF_THREAD_SLICE) ? s->avctx->thread_count : 1;

1240

1241

if(MPV_frame_start(s, s->avctx) < 0)

1242

return -1;

1243

ff_er_frame_start(s);

1244

1245

* MPV_frame_start uses pict_type to derive key_frame.

1246

* This is incorrect for H.264; IDR markings must be used.

1247

* Zero here; IDR markings per slice in frame or fields are ORed in later.

1248

* See decode_nal_units().

1249

1250

s->current_picture_ptr->key_frame= 0;

1251

s->current_picture_ptr->mmco_reset= 0;

1252

1253

assert(s->linesize && s->uvlinesize);

1254

1255

for(i=0; i<16; i++){

1256

h->block_offset[i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->linesize*((scan8[i] - scan8[0])>>3);

1257

h->block_offset[48+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->linesize*((scan8[i] - scan8[0])>>3);

1258

}

1259

for(i=0; i<16; i++){

1260

h->block_offset[16+i]=

1261

h->block_offset[32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);

1262

h->block_offset[48+16+i]=

1263

h->block_offset[48+32+i]= (4*((scan8[i] - scan8[0])&7) << pixel_shift) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);

1264

}

1265

1266

/* can't be in alloc_tables because linesize isn't known there.

1267

* FIXME: redo bipred weight to not require extra buffer? */

1268

for(i = 0; i < thread_count; i++)

1269

if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)

1270

h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*6*s->linesize);

1271

1272

/* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/

1273

memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));

1274

1275

// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;

1276

1277

// We mark the current picture as non-reference after allocating it, so

1278

// that if we break out due to an error it can be released automatically

1279

// in the next MPV_frame_start().

1280

// SVQ3 as well as most other codecs have only last/next/current and thus

1281

// get released even with set reference, besides SVQ3 and others do not

1282

// mark frames as reference later "naturally".

1283

if(s->codec_id != CODEC_ID_SVQ3)

1284

s->current_picture_ptr->reference= 0;

1285

1286

s->current_picture_ptr->field_poc[0]=

1287

s->current_picture_ptr->field_poc[1]= INT_MAX;

1288

1289

h->next_output_pic = NULL;

1290

1291

assert(s->current_picture_ptr->long_ref==0);

1292

1293

return 0;

1294

}

1295

1296

/**

1297

* Run setup operations that must be run after slice header decoding.

1298

* This includes finding the next displayed frame.

1299

1300

* @param h h264 master context

1301

* @param setup_finished enough NALs have been read that we can call

1302

* ff_thread_finish_setup()

1303

1304

static void decode_postinit(H264Context *h, int setup_finished){

1305

MpegEncContext * const s = &h->s;

1306

Picture *out = s->current_picture_ptr;

1307

Picture *cur = s->current_picture_ptr;

1308

int i, pics, out_of_order, out_idx;

1309

1310

s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;

1311

s->current_picture_ptr->pict_type= s->pict_type;

1312

1313

if (h->next_output_pic) return;

1314

1315

if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {

1316

//FIXME: if we have two PAFF fields in one packet, we can't start the next thread here.

1317

//If we have one field per packet, we can. The check in decode_nal_units() is not good enough

1318

//to find this yet, so we assume the worst for now.

1319

//if (setup_finished)

1320

// ff_thread_finish_setup(s->avctx);

1321

return;

1322

}

1323

1324

cur->interlaced_frame = 0;

1325

cur->repeat_pict = 0;

1326

1327

/* Signal interlacing information externally. */

1328

/* Prioritize picture timing SEI information over used decoding process if it exists. */

1329

1330

if(h->sps.pic_struct_present_flag){

1331

switch (h->sei_pic_struct)

1332

{

1333

case SEI_PIC_STRUCT_FRAME:

1334

break;

1335

case SEI_PIC_STRUCT_TOP_FIELD:

1336

case SEI_PIC_STRUCT_BOTTOM_FIELD:

1337

cur->interlaced_frame = 1;

1338

break;

1339

case SEI_PIC_STRUCT_TOP_BOTTOM:

1340

case SEI_PIC_STRUCT_BOTTOM_TOP:

1341

if (FIELD_OR_MBAFF_PICTURE)

1342

cur->interlaced_frame = 1;

1343

else

1344

// try to flag soft telecine progressive

1345

cur->interlaced_frame = h->prev_interlaced_frame;

1346

break;

1347

case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:

1348

case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:

1349

// Signal the possibility of telecined film externally (pic_struct 5,6)

1350

// From these hints, let the applications decide if they apply deinterlacing.

1351

cur->repeat_pict = 1;

1352

break;

1353

case SEI_PIC_STRUCT_FRAME_DOUBLING:

1354

// Force progressive here, as doubling interlaced frame is a bad idea.

1355

cur->repeat_pict = 2;

1356

break;

1357

case SEI_PIC_STRUCT_FRAME_TRIPLING:

1358

cur->repeat_pict = 4;

1359

break;

1360

}

1361

1362

if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)

1363

cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;

1364

}else{

1365

/* Derive interlacing flag from used decoding process. */

1366

cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;

1367

}

1368

h->prev_interlaced_frame = cur->interlaced_frame;

1369

1370

if (cur->field_poc[0] != cur->field_poc[1]){

1371

/* Derive top_field_first from field pocs. */

1372

cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];

1373

}else{

1374

if(cur->interlaced_frame || h->sps.pic_struct_present_flag){

1375

/* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */

1376

if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM

1377

|| h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)

1378

cur->top_field_first = 1;

1379

else

1380

cur->top_field_first = 0;

1381

}else{

1382

/* Most likely progressive */

1383

cur->top_field_first = 0;

1384

}

1385

}

1386

1387

//FIXME do something with unavailable reference frames

1388

1389

/* Sort B-frames into display order */

1390

1391

if(h->sps.bitstream_restriction_flag

1392

&& s->avctx->has_b_frames < h->sps.num_reorder_frames){

1393

s->avctx->has_b_frames = h->sps.num_reorder_frames;

1394

s->low_delay = 0;

1395

}

1396

1397

if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT

1398

&& !h->sps.bitstream_restriction_flag){

1399

s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;

1400

s->low_delay= 0;

1401

}

1402

1403

pics = 0;

1404

while(h->delayed_pic[pics]) pics++;

1405

1406

assert(pics <= MAX_DELAYED_PIC_COUNT);

1407

1408

h->delayed_pic[pics++] = cur;

1409

if(cur->reference == 0)

1410

cur->reference = DELAYED_PIC_REF;

1411

1412

out = h->delayed_pic[0];

1413

out_idx = 0;

1414

for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)

1415

if(h->delayed_pic[i]->poc < out->poc){

1416

out = h->delayed_pic[i];

1417

out_idx = i;

1418

}

1419

if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))

1420

h->next_outputed_poc= INT_MIN;

1421

out_of_order = out->poc < h->next_outputed_poc;

1422

1423

if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)

1424

{ }

1425

else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)

1426

|| (s->low_delay &&

1427

((h->next_outputed_poc != INT_MIN && out->poc > h->next_outputed_poc + 2)

1428

|| cur->pict_type == AV_PICTURE_TYPE_B)))

1429

{

1430

s->low_delay = 0;

1431

s->avctx->has_b_frames++;

1432

}

1433

1434

if(out_of_order || pics > s->avctx->has_b_frames){

1435

out->reference &= ~DELAYED_PIC_REF;

1436

out->owner2 = s; // for frame threading, the owner must be the second field's thread

1437

// or else the first thread can release the picture and reuse it unsafely

1438

for(i=out_idx; h->delayed_pic[i]; i++)

1439

h->delayed_pic[i] = h->delayed_pic[i+1];

1440

}

1441

if(!out_of_order && pics > s->avctx->has_b_frames){

1442

h->next_output_pic = out;

1443

if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {

1444

h->next_outputed_poc = INT_MIN;

1445

} else

1446

h->next_outputed_poc = out->poc;

1447

}else{

1448

av_log(s->avctx, AV_LOG_DEBUG, "no picture\n");

1449

}

1450

1451

if (setup_finished)

1452

ff_thread_finish_setup(s->avctx);

1453

}

1454

1455

static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){

1456

MpegEncContext * const s = &h->s;

1457

uint8_t *top_border;

1458

int top_idx = 1;

1459

const int pixel_shift = h->pixel_shift;

1460

1461

src_y -= linesize;

1462

src_cb -= uvlinesize;

1463

src_cr -= uvlinesize;

1464

1465

if(!simple && FRAME_MBAFF){

1466

if(s->mb_y&1){

1467

if(!MB_MBAFF){

1468

top_border = h->top_borders[0][s->mb_x];

1469

AV_COPY128(top_border, src_y + 15*linesize);

1470

if (pixel_shift)

1471

AV_COPY128(top_border+16, src_y+15*linesize+16);

1472

if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){

1473

if(chroma444){

1474

if (pixel_shift){

1475

AV_COPY128(top_border+32, src_cb + 15*uvlinesize);

1476

AV_COPY128(top_border+48, src_cb + 15*uvlinesize+16);

1477

AV_COPY128(top_border+64, src_cr + 15*uvlinesize);

1478

AV_COPY128(top_border+80, src_cr + 15*uvlinesize+16);

1479

} else {

1480

AV_COPY128(top_border+16, src_cb + 15*uvlinesize);

1481

AV_COPY128(top_border+32, src_cr + 15*uvlinesize);

1482

}

1483

} else {

1484

if (pixel_shift) {

1485

AV_COPY128(top_border+32, src_cb+7*uvlinesize);

1486

AV_COPY128(top_border+48, src_cr+7*uvlinesize);

1487

} else {

1488

AV_COPY64(top_border+16, src_cb+7*uvlinesize);

1489

AV_COPY64(top_border+24, src_cr+7*uvlinesize);

1490

}

1491

}

1492

}

1493

}

1494

}else if(MB_MBAFF){

1495

top_idx = 0;

1496

}else

1497

return;

1498

}

1499

1500

top_border = h->top_borders[top_idx][s->mb_x];

1501

// There are two lines saved, the line above the the top macroblock of a pair,

1502

// and the line above the bottom macroblock

1503

AV_COPY128(top_border, src_y + 16*linesize);

1504

if (pixel_shift)

1505

AV_COPY128(top_border+16, src_y+16*linesize+16);

1506

1507

if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){

1508

if(chroma444){

1509

if (pixel_shift){

1510

AV_COPY128(top_border+32, src_cb + 16*linesize);

1511

AV_COPY128(top_border+48, src_cb + 16*linesize+16);

1512

AV_COPY128(top_border+64, src_cr + 16*linesize);

1513

AV_COPY128(top_border+80, src_cr + 16*linesize+16);

1514

} else {

1515

AV_COPY128(top_border+16, src_cb + 16*linesize);

1516

AV_COPY128(top_border+32, src_cr + 16*linesize);

1517

}

1518

} else {

1519

if (pixel_shift) {

1520

AV_COPY128(top_border+32, src_cb+8*uvlinesize);

1521

AV_COPY128(top_border+48, src_cr+8*uvlinesize);

1522

} else {

1523

AV_COPY64(top_border+16, src_cb+8*uvlinesize);

1524

AV_COPY64(top_border+24, src_cr+8*uvlinesize);

1525

}

1526

}

1527

}

1528

}

1529

1530

static inline void xchg_mb_border(H264Context *h, uint8_t *src_y,

1531

uint8_t *src_cb, uint8_t *src_cr,

1532

int linesize, int uvlinesize,

1533

int xchg, int chroma444,

1534

int simple, int pixel_shift){

1535

MpegEncContext * const s = &h->s;

1536

int deblock_topleft;

1537

int deblock_top;

1538

int top_idx = 1;

1539

uint8_t *top_border_m1;

1540

uint8_t *top_border;

1541

1542

if(!simple && FRAME_MBAFF){

1543

if(s->mb_y&1){

1544

if(!MB_MBAFF)

1545

return;

1546

}else{

1547

top_idx = MB_MBAFF ? 0 : 1;

1548

}

1549

}

1550

1551

if(h->deblocking_filter == 2) {

1552

deblock_topleft = h->slice_table[h->mb_xy - 1 - s->mb_stride] == h->slice_num;

1553

deblock_top = h->top_type;

1554

} else {

1555

deblock_topleft = (s->mb_x > 0);

1556

deblock_top = (s->mb_y > !!MB_FIELD);

1557

}

1558

1559

src_y -= linesize + 1 + pixel_shift;

1560

src_cb -= uvlinesize + 1 + pixel_shift;

1561

src_cr -= uvlinesize + 1 + pixel_shift;

1562

1563

top_border_m1 = h->top_borders[top_idx][s->mb_x-1];

1564

top_border = h->top_borders[top_idx][s->mb_x];

1565

1566

#define XCHG(a,b,xchg)\

1567

if (pixel_shift) {\

1568

if (xchg) {\

1569

AV_SWAP64(b+0,a+0);\

1570

AV_SWAP64(b+8,a+8);\

1571

} else {\

1572

AV_COPY128(b,a); \

1573

1574

} else \

1575

if (xchg) AV_SWAP64(b,a);\

1576

else AV_COPY64(b,a);

1577

1578

if(deblock_top){

1579

if(deblock_topleft){

1580

XCHG(top_border_m1 + (8 << pixel_shift), src_y - (7 << pixel_shift), 1);

1581

}

1582

XCHG(top_border + (0 << pixel_shift), src_y + (1 << pixel_shift), xchg);

1583

XCHG(top_border + (8 << pixel_shift), src_y + (9 << pixel_shift), 1);

1584

if(s->mb_x+1 < s->mb_width){

1585

XCHG(h->top_borders[top_idx][s->mb_x+1], src_y + (17 << pixel_shift), 1);

1586

}

1587

}

1588

if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){

1589

if(chroma444){

1590

if(deblock_topleft){

1591

XCHG(top_border_m1 + (24 << pixel_shift), src_cb - (7 << pixel_shift), 1);

1592

XCHG(top_border_m1 + (40 << pixel_shift), src_cr - (7 << pixel_shift), 1);

1593

}

1594

XCHG(top_border + (16 << pixel_shift), src_cb + (1 << pixel_shift), xchg);

1595

XCHG(top_border + (24 << pixel_shift), src_cb + (9 << pixel_shift), 1);

1596

XCHG(top_border + (32 << pixel_shift), src_cr + (1 << pixel_shift), xchg);

1597

XCHG(top_border + (40 << pixel_shift), src_cr + (9 << pixel_shift), 1);

1598

if(s->mb_x+1 < s->mb_width){

1599

XCHG(h->top_borders[top_idx][s->mb_x+1] + (16 << pixel_shift), src_cb + (17 << pixel_shift), 1);

1600

XCHG(h->top_borders[top_idx][s->mb_x+1] + (32 << pixel_shift), src_cr + (17 << pixel_shift), 1);

1601

}

1602

} else {

1603

if(deblock_top){

1604

if(deblock_topleft){

1605

XCHG(top_border_m1 + (16 << pixel_shift), src_cb - (7 << pixel_shift), 1);

1606

XCHG(top_border_m1 + (24 << pixel_shift), src_cr - (7 << pixel_shift), 1);

1607

}

1608

XCHG(top_border + (16 << pixel_shift), src_cb+1+pixel_shift, 1);

1609

XCHG(top_border + (24 << pixel_shift), src_cr+1+pixel_shift, 1);

1610

}

1611

}

1612

}

1613

}

1614

1615

static av_always_inline int dctcoef_get(DCTELEM *mb, int high_bit_depth, int index) {

1616

if (high_bit_depth) {

1617

return AV_RN32A(((int32_t*)mb) + index);

1618

} else

1619

return AV_RN16A(mb + index);

1620

}

1621

1622

static av_always_inline void dctcoef_set(DCTELEM *mb, int high_bit_depth, int index, int value) {

1623

if (high_bit_depth) {

1624

AV_WN32A(((int32_t*)mb) + index, value);

1625

} else

1626

AV_WN16A(mb + index, value);

1627

}

1628

1629

static av_always_inline void hl_decode_mb_predict_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,

1630

int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)

1631

{

1632

MpegEncContext * const s = &h->s;

1633

void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);

1634

void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);

1635

int i;

1636

int qscale = p == 0 ? s->qscale : h->chroma_qp[p-1];

1637

block_offset += 16*p;

1638

if(IS_INTRA4x4(mb_type)){

1639

if(simple || !s->encoding){

1640

if(IS_8x8DCT(mb_type)){

1641

if(transform_bypass){

1642

idct_dc_add =

1643

idct_add = s->dsp.add_pixels8;

1644

}else{

1645

idct_dc_add = h->h264dsp.h264_idct8_dc_add;

1646

idct_add = h->h264dsp.h264_idct8_add;

1647

}

1648

for(i=0; i<16; i+=4){

1649

uint8_t * const ptr= dest_y + block_offset[i];

1650

const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];

1651

if(transform_bypass && h->sps.profile_idc==244 && dir<=1){

1652

h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);

1653

}else{

1654

const int nnz = h->non_zero_count_cache[ scan8[i+p*16] ];

1655

h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,

1656

(h->topright_samples_available<<i)&0x4000, linesize);

1657

if(nnz){

1658

if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))

1659

idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);

1660

else

1661

idct_add (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);

1662

}

1663

}

1664

}

1665

}else{

1666

if(transform_bypass){

1667

idct_dc_add =

1668

idct_add = s->dsp.add_pixels4;

1669

}else{

1670

idct_dc_add = h->h264dsp.h264_idct_dc_add;

1671

idct_add = h->h264dsp.h264_idct_add;

1672

}

1673

for(i=0; i<16; i++){

1674

uint8_t * const ptr= dest_y + block_offset[i];

1675

const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];

1676

1677

if(transform_bypass && h->sps.profile_idc==244 && dir<=1){

1678

h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);

1679

}else{

1680

uint8_t *topright;

1681

int nnz, tr;

1682

uint64_t tr_high;

1683

if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){

1684

const int topright_avail= (h->topright_samples_available<<i)&0x8000;

1685

assert(s->mb_y || linesize <= block_offset[i]);

1686

if(!topright_avail){

1687

if (pixel_shift) {

1688

tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;

1689

topright= (uint8_t*) &tr_high;

1690

} else {

1691

tr= ptr[3 - linesize]*0x01010101;

1692

topright= (uint8_t*) &tr;

1693

}

1694

}else

1695

topright= ptr + (4 << pixel_shift) - linesize;

1696

}else

1697

topright= NULL;

1698

1699

h->hpc.pred4x4[ dir ](ptr, topright, linesize);

1700

nnz = h->non_zero_count_cache[ scan8[i+p*16] ];

1701

if(nnz){

1702

if(is_h264){

1703

if(nnz == 1 && dctcoef_get(h->mb, pixel_shift, i*16+p*256))

1704

idct_dc_add(ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);

1705

else

1706

idct_add (ptr, h->mb + (i*16+p*256 << pixel_shift), linesize);

1707

}else

1708

ff_svq3_add_idct_c(ptr, h->mb + i*16+p*256, linesize, qscale, 0);

1709

}

1710

}

1711

}

1712

}

1713

}

1714

}else{

1715

h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);

1716

if(is_h264){

1717

if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX+p] ]){

1718

if(!transform_bypass)

1719

h->h264dsp.h264_luma_dc_dequant_idct(h->mb+(p*256 << pixel_shift), h->mb_luma_dc[p], h->dequant4_coeff[p][qscale][0]);

1720

else{

1721

static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,

1722

8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};

1723

for(i = 0; i < 16; i++)

1724

dctcoef_set(h->mb+p*256, pixel_shift, dc_mapping[i], dctcoef_get(h->mb_luma_dc[p], pixel_shift, i));

1725

}

1726

}

1727

}else

1728

ff_svq3_luma_dc_dequant_idct_c(h->mb+p*256, h->mb_luma_dc[p], qscale);

1729

}

1730

}

1731

1732

static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type, int is_h264, int simple, int transform_bypass,

1733

int pixel_shift, int *block_offset, int linesize, uint8_t *dest_y, int p)

1734

{

1735

MpegEncContext * const s = &h->s;

1736

void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);

1737

int i;

1738

block_offset += 16*p;

1739

if(!IS_INTRA4x4(mb_type)){

1740

if(is_h264){

1741

if(IS_INTRA16x16(mb_type)){

1742

if(transform_bypass){

1743

if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){

1744

h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize);

1745

}else{

1746

for(i=0; i<16; i++){

1747

if(h->non_zero_count_cache[ scan8[i+p*16] ] || dctcoef_get(h->mb, pixel_shift, i*16+p*256))

1748

s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);

1749

}

1750

}

1751

}else{

1752

h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);

1753

}

1754

}else if(h->cbp&15){

1755

if(transform_bypass){

1756

const int di = IS_8x8DCT(mb_type) ? 4 : 1;

1757

idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;

1758

for(i=0; i<16; i+=di){

1759

if(h->non_zero_count_cache[ scan8[i+p*16] ]){

1760

idct_add(dest_y + block_offset[i], h->mb + (i*16+p*256 << pixel_shift), linesize);

1761

}

1762

}

1763

}else{

1764

if(IS_8x8DCT(mb_type)){

1765

h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);

1766

}else{

1767

h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb + (p*256 << pixel_shift), linesize, h->non_zero_count_cache+p*5*8);

1768

}

1769

}

1770

}

1771

}else{

1772

for(i=0; i<16; i++){

1773

if(h->non_zero_count_cache[ scan8[i+p*16] ] || h->mb[i*16+p*256]){ //FIXME benchmark weird rule, & below

1774

uint8_t * const ptr= dest_y + block_offset[i];

1775

ff_svq3_add_idct_c(ptr, h->mb + i*16 + p*256, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);

1776

}

1777

}

1778

}

1779

}

1780

}

1781

1782

static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){

1783

MpegEncContext * const s = &h->s;

1784

const int mb_x= s->mb_x;

1785

const int mb_y= s->mb_y;

1786

const int mb_xy= h->mb_xy;

1787

const int mb_type= s->current_picture.mb_type[mb_xy];

1788

uint8_t *dest_y, *dest_cb, *dest_cr;

1789

int linesize, uvlinesize /*dct_offset*/;

1790

int i, j;

1791

int *block_offset = &h->block_offset[0];

1792

const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);

1793

/* is_h264 should always be true if SVQ3 is disabled. */

1794

const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;

1795

void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);

1796

1797

dest_y = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16;

1798

dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;

1799

dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8;

1800

1801

s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);

1802

s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2);

1803

1804

h->list_counts[mb_xy]= h->list_count;

1805

1806

if (!simple && MB_FIELD) {

1807

linesize = h->mb_linesize = s->linesize * 2;

1808

uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;

1809

block_offset = &h->block_offset[48];

1810

if(mb_y&1){ //FIXME move out of this function?

1811

dest_y -= s->linesize*15;

1812

dest_cb-= s->uvlinesize*7;

1813

dest_cr-= s->uvlinesize*7;

1814

}

1815

if(FRAME_MBAFF) {

1816

int list;

1817

for(list=0; list<h->list_count; list++){

1818

if(!USES_LIST(mb_type, list))

1819

continue;

1820

if(IS_16X16(mb_type)){

1821

int8_t *ref = &h->ref_cache[list][scan8[0]];

1822

fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);

1823

}else{

1824

for(i=0; i<16; i+=4){

1825

int ref = h->ref_cache[list][scan8[i]];

1826

if(ref >= 0)

1827

fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);

1828

}

1829

}

1830

}

1831

}

1832

} else {

1833

linesize = h->mb_linesize = s->linesize;

1834

uvlinesize = h->mb_uvlinesize = s->uvlinesize;

1835

// dct_offset = s->linesize * 16;

1836

}

1837

1838

if (!simple && IS_INTRA_PCM(mb_type)) {

1839

if (pixel_shift) {

1840

const int bit_depth = h->sps.bit_depth_luma;

1841

int j;

1842

GetBitContext gb;

1843

init_get_bits(&gb, (uint8_t*)h->mb, 384*bit_depth);

1844

1845

for (i = 0; i < 16; i++) {

1846

uint16_t *tmp_y = (uint16_t*)(dest_y + i*linesize);

1847

for (j = 0; j < 16; j++)

1848

tmp_y[j] = get_bits(&gb, bit_depth);

1849

}

1850

if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){

1851

for (i = 0; i < 8; i++) {

1852

uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize);

1853

for (j = 0; j < 8; j++)

1854

tmp_cb[j] = get_bits(&gb, bit_depth);

1855

}

1856

for (i = 0; i < 8; i++) {

1857

uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize);

1858

for (j = 0; j < 8; j++)

1859

tmp_cr[j] = get_bits(&gb, bit_depth);

1860

}

1861

}

1862

} else {

1863

for (i=0; i<16; i++) {

1864

memcpy(dest_y + i* linesize, h->mb + i*8, 16);

1865

}

1866

if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){

1867

for (i=0; i<8; i++) {

1868

memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);

1869

memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);

1870

}

1871

}

1872

}

1873

} else {

1874

if(IS_INTRA(mb_type)){

1875

if(h->deblocking_filter)

1876

xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, 0, simple, pixel_shift);

1877

1878

if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){

1879

h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);

1880

h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);

1881

}

1882

1883

hl_decode_mb_predict_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);

1884

1885

if(h->deblocking_filter)

1886

xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, 0, simple, pixel_shift);

1887

}else if(is_h264){

1888

if (pixel_shift) {

1889

hl_motion_16(h, dest_y, dest_cb, dest_cr,

1890

s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,

1891

s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,

1892

h->h264dsp.weight_h264_pixels_tab,

1893

h->h264dsp.biweight_h264_pixels_tab, 0);

1894

} else

1895

hl_motion_8(h, dest_y, dest_cb, dest_cr,

1896

s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,

1897

s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,

1898

h->h264dsp.weight_h264_pixels_tab,

1899

h->h264dsp.biweight_h264_pixels_tab, 0);

1900

}

1901

1902

hl_decode_mb_idct_luma(h, mb_type, is_h264, simple, transform_bypass, pixel_shift, block_offset, linesize, dest_y, 0);

1903

1904

if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){

1905

uint8_t *dest[2] = {dest_cb, dest_cr};

1906

if(transform_bypass){

1907

if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){

1908

h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16*1 << pixel_shift), uvlinesize);

1909

h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 32, h->mb + (16*16*2 << pixel_shift), uvlinesize);

1910

}else{

1911

idct_add = s->dsp.add_pixels4;

1912

for(j=1; j<3; j++){

1913

for(i=j*16; i<j*16+4; i++){

1914

if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h->mb, pixel_shift, i*16))

1915

idct_add (dest[j-1] + block_offset[i], h->mb + (i*16 << pixel_shift), uvlinesize);

1916

}

1917

}

1918

}

1919

}else{

1920

if(is_h264){

1921

if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])

1922

h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);

1923

if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])

1924

h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);

1925

h->h264dsp.h264_idct_add8(dest, block_offset,

1926

h->mb, uvlinesize,

1927

h->non_zero_count_cache);

1928

}else{

1929

h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*1, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);

1930

h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*2, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);

1931

for(j=1; j<3; j++){

1932

for(i=j*16; i<j*16+4; i++){

1933

if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){

1934

uint8_t * const ptr= dest[j-1] + block_offset[i];

1935

ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[0][s->qscale + 12] - 12, 2);

1936

}

1937

}

1938

}

1939

}

1940

}

1941

}

1942

}

1943

if(h->cbp || IS_INTRA(mb_type))

1944

{

1945

s->dsp.clear_blocks(h->mb);

1946

s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));

1947

}

1948

}

1949

1950

static av_always_inline void hl_decode_mb_444_internal(H264Context *h, int simple, int pixel_shift){

1951

MpegEncContext * const s = &h->s;

1952

const int mb_x= s->mb_x;

1953

const int mb_y= s->mb_y;

1954

const int mb_xy= h->mb_xy;

1955

const int mb_type= s->current_picture.mb_type[mb_xy];

1956

uint8_t *dest[3];

1957

int linesize;

1958

int i, j, p;

1959

int *block_offset = &h->block_offset[0];

1960

const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);

1961

const int plane_count = (simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) ? 3 : 1;

1962

1963

for (p = 0; p < plane_count; p++)

1964

{

1965

dest[p] = s->current_picture.data[p] + ((mb_x << pixel_shift) + mb_y * s->linesize) * 16;

1966

s->dsp.prefetch(dest[p] + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4);

1967

}

1968

1969

h->list_counts[mb_xy]= h->list_count;

1970

1971

if (!simple && MB_FIELD) {

1972

linesize = h->mb_linesize = h->mb_uvlinesize = s->linesize * 2;

1973

block_offset = &h->block_offset[48];

1974

if(mb_y&1) //FIXME move out of this function?

1975

for (p = 0; p < 3; p++)

1976

dest[p] -= s->linesize*15;

1977

if(FRAME_MBAFF) {

1978

int list;

1979

for(list=0; list<h->list_count; list++){

1980

if(!USES_LIST(mb_type, list))

1981

continue;

1982

if(IS_16X16(mb_type)){

1983

int8_t *ref = &h->ref_cache[list][scan8[0]];

1984

fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);

1985

}else{

1986

for(i=0; i<16; i+=4){

1987

int ref = h->ref_cache[list][scan8[i]];

1988

if(ref >= 0)

1989

fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);

1990

}

1991

}

1992

}

1993

}

1994

} else {

1995

linesize = h->mb_linesize = h->mb_uvlinesize = s->linesize;

1996

}

1997

1998

if (!simple && IS_INTRA_PCM(mb_type)) {

1999

if (pixel_shift) {

2000

const int bit_depth = h->sps.bit_depth_luma;

2001

GetBitContext gb;

2002

init_get_bits(&gb, (uint8_t*)h->mb, 768*bit_depth);

2003

2004

for (p = 0; p < plane_count; p++) {

2005

for (i = 0; i < 16; i++) {

2006

uint16_t *tmp = (uint16_t*)(dest[p] + i*linesize);

2007

for (j = 0; j < 16; j++)

2008

tmp[j] = get_bits(&gb, bit_depth);

2009

}

2010

}

2011

} else {

2012

for (p = 0; p < plane_count; p++) {

2013

for (i = 0; i < 16; i++) {

2014

memcpy(dest[p] + i*linesize, h->mb + p*128 + i*8, 16);

2015

}

2016

}

2017

}

2018

} else {

2019

if(IS_INTRA(mb_type)){

2020

if(h->deblocking_filter)

2021

xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 1, 1, simple, pixel_shift);

2022

2023

for (p = 0; p < plane_count; p++)

2024

hl_decode_mb_predict_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);

2025

2026

if(h->deblocking_filter)

2027

xchg_mb_border(h, dest[0], dest[1], dest[2], linesize, linesize, 0, 1, simple, pixel_shift);

2028

}else{

2029

if (pixel_shift) {

2030

hl_motion_16(h, dest[0], dest[1], dest[2],

2031

s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,

2032

s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,

2033

h->h264dsp.weight_h264_pixels_tab,

2034

h->h264dsp.biweight_h264_pixels_tab, 1);

2035

} else

2036

hl_motion_8(h, dest[0], dest[1], dest[2],

2037

s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,

2038

s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,

2039

h->h264dsp.weight_h264_pixels_tab,

2040

h->h264dsp.biweight_h264_pixels_tab, 1);

2041

}

2042

2043

for (p = 0; p < plane_count; p++)

2044

hl_decode_mb_idct_luma(h, mb_type, 1, simple, transform_bypass, pixel_shift, block_offset, linesize, dest[p], p);

2045

}

2046

if(h->cbp || IS_INTRA(mb_type))

2047

{

2048

s->dsp.clear_blocks(h->mb);

2049

s->dsp.clear_blocks(h->mb+(24*16<<pixel_shift));

2050

}

2051

}

2052

2053

/**

2054

* Process a macroblock; this case avoids checks for expensive uncommon cases.

2055

2056

#define hl_decode_mb_simple(sh, bits) \

2057

static void hl_decode_mb_simple_ ## bits(H264Context *h){ \

2058

hl_decode_mb_internal(h, 1, sh); \

2059

}

2060

hl_decode_mb_simple(0, 8);

2061

hl_decode_mb_simple(1, 16);

2062

2063

/**

2064

* Process a macroblock; this handles edge cases, such as interlacing.

2065

2066

static void av_noinline hl_decode_mb_complex(H264Context *h){

2067

hl_decode_mb_internal(h, 0, h->pixel_shift);

2068

}

2069

2070

static void av_noinline hl_decode_mb_444_complex(H264Context *h){

2071

hl_decode_mb_444_internal(h, 0, h->pixel_shift);

2072

}

2073

2074

static void av_noinline hl_decode_mb_444_simple(H264Context *h){

2075

hl_decode_mb_444_internal(h, 1, 0);

2076

}

2077

2078

void ff_h264_hl_decode_mb(H264Context *h){

2079

MpegEncContext * const s = &h->s;

2080

const int mb_xy= h->mb_xy;

2081

const int mb_type= s->current_picture.mb_type[mb_xy];

2082

int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;

2083

2084

if (CHROMA444) {

2085

if(is_complex || h->pixel_shift)

2086

hl_decode_mb_444_complex(h);

2087

else

2088

hl_decode_mb_444_simple(h);

2089

} else if (is_complex) {

2090

hl_decode_mb_complex(h);

2091

} else if (h->pixel_shift) {

2092

hl_decode_mb_simple_16(h);

2093

} else

2094

hl_decode_mb_simple_8(h);

2095

}

2096

2097

static int pred_weight_table(H264Context *h){

2098

MpegEncContext * const s = &h->s;

2099

int list, i;

2100

int luma_def, chroma_def;

2101

2102

h->use_weight= 0;

2103

h->use_weight_chroma= 0;

2104

h->luma_log2_weight_denom= get_ue_golomb(&s->gb);

2105

if(h->sps.chroma_format_idc)

2106

h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);

2107

luma_def = 1<<h->luma_log2_weight_denom;

2108

chroma_def = 1<<h->chroma_log2_weight_denom;

2109

2110

for(list=0; list<2; list++){

2111

h->luma_weight_flag[list] = 0;

2112

h->chroma_weight_flag[list] = 0;

2113

for(i=0; i<h->ref_count[list]; i++){

2114

int luma_weight_flag, chroma_weight_flag;

2115

2116

luma_weight_flag= get_bits1(&s->gb);

2117

if(luma_weight_flag){

2118

h->luma_weight[i][list][0]= get_se_golomb(&s->gb);

2119

h->luma_weight[i][list][1]= get_se_golomb(&s->gb);

2120

if( h->luma_weight[i][list][0] != luma_def

2121

|| h->luma_weight[i][list][1] != 0) {

2122

h->use_weight= 1;

2123

h->luma_weight_flag[list]= 1;

2124

}

2125

}else{

2126

h->luma_weight[i][list][0]= luma_def;

2127

h->luma_weight[i][list][1]= 0;

2128

}

2129

2130

if(h->sps.chroma_format_idc){

2131

chroma_weight_flag= get_bits1(&s->gb);

2132

if(chroma_weight_flag){

2133

int j;

2134

for(j=0; j<2; j++){

2135

h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);

2136

h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);

2137

if( h->chroma_weight[i][list][j][0] != chroma_def

2138

|| h->chroma_weight[i][list][j][1] != 0) {

2139

h->use_weight_chroma= 1;

2140

h->chroma_weight_flag[list]= 1;

2141

}

2142

}

2143

}else{

2144

int j;

2145

for(j=0; j<2; j++){

2146

h->chroma_weight[i][list][j][0]= chroma_def;

2147

h->chroma_weight[i][list][j][1]= 0;

2148

}

2149

}

2150

}

2151

}

2152

if(h->slice_type_nos != AV_PICTURE_TYPE_B) break;

2153

}

2154

h->use_weight= h->use_weight || h->use_weight_chroma;

2155

return 0;

2156

}

2157

2158

/**

2159

* Initialize implicit_weight table.

2160

* @param field 0/1 initialize the weight for interlaced MBAFF

2161

* -1 initializes the rest

2162

2163

static void implicit_weight_table(H264Context *h, int field){

2164

MpegEncContext * const s = &h->s;

2165

int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;

2166

2167

for (i = 0; i < 2; i++) {

2168

h->luma_weight_flag[i] = 0;

2169

h->chroma_weight_flag[i] = 0;

2170

}

2171

2172

if(field < 0){

2173

cur_poc = s->current_picture_ptr->poc;

2174

if( h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF

2175

&& h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){

2176

h->use_weight= 0;

2177

h->use_weight_chroma= 0;

2178

return;

2179

}

2180

ref_start= 0;

2181

ref_count0= h->ref_count[0];

2182

ref_count1= h->ref_count[1];

2183

}else{

2184

cur_poc = s->current_picture_ptr->field_poc[field];

2185

ref_start= 16;

2186

ref_count0= 16+2*h->ref_count[0];

2187

ref_count1= 16+2*h->ref_count[1];

2188

}

2189

2190

h->use_weight= 2;

2191

h->use_weight_chroma= 2;

2192

h->luma_log2_weight_denom= 5;

2193

h->chroma_log2_weight_denom= 5;

2194

2195

for(ref0=ref_start; ref0 < ref_count0; ref0++){

2196

int poc0 = h->ref_list[0][ref0].poc;

2197

for(ref1=ref_start; ref1 < ref_count1; ref1++){

2198

int poc1 = h->ref_list[1][ref1].poc;

2199

int td = av_clip(poc1 - poc0, -128, 127);

2200

int w= 32;

2201

if(td){

2202

int tb = av_clip(cur_poc - poc0, -128, 127);

2203

int tx = (16384 + (FFABS(td) >> 1)) / td;

2204

int dist_scale_factor = (tb*tx + 32) >> 8;

2205

if(dist_scale_factor >= -64 && dist_scale_factor <= 128)

2206

w = 64 - dist_scale_factor;

2207

}

2208

if(field<0){

2209

h->implicit_weight[ref0][ref1][0]=

2210

h->implicit_weight[ref0][ref1][1]= w;

2211

}else{

2212

h->implicit_weight[ref0][ref1][field]=w;

2213

}

2214

}

2215

}

2216

}

2217

2218

/**

2219

* instantaneous decoder refresh.

2220

2221

static void idr(H264Context *h){

2222

ff_h264_remove_all_refs(h);

2223

h->prev_frame_num= 0;

2224

h->prev_frame_num_offset= 0;

2225

h->prev_poc_msb=

2226

h->prev_poc_lsb= 0;

2227

}

2228

2229

/* forget old pics after a seek */

2230

static void flush_dpb(AVCodecContext *avctx){

2231

H264Context *h= avctx->priv_data;

2232

int i;

2233

for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {

2234

if(h->delayed_pic[i])

2235

h->delayed_pic[i]->reference= 0;

2236

h->delayed_pic[i]= NULL;

2237

}

2238

h->outputed_poc=h->next_outputed_poc= INT_MIN;

2239

h->prev_interlaced_frame = 1;

2240

idr(h);

2241

if(h->s.current_picture_ptr)

2242

h->s.current_picture_ptr->reference= 0;

2243

h->s.first_field= 0;

2244

ff_h264_reset_sei(h);

2245

ff_mpeg_flush(avctx);

2246

}

2247

2248

static int init_poc(H264Context *h){

2249

MpegEncContext * const s = &h->s;

2250

const int max_frame_num= 1<<h->sps.log2_max_frame_num;

2251

int field_poc[2];

2252

Picture *cur = s->current_picture_ptr;

2253

2254

h->frame_num_offset= h->prev_frame_num_offset;

2255

if(h->frame_num < h->prev_frame_num)

2256

h->frame_num_offset += max_frame_num;

2257

2258

if(h->sps.poc_type==0){

2259

const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;

2260

2261

if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)

2262

h->poc_msb = h->prev_poc_msb + max_poc_lsb;

2263

else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)

2264

h->poc_msb = h->prev_poc_msb - max_poc_lsb;

2265

else

2266

h->poc_msb = h->prev_poc_msb;

2267

//printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);

2268

field_poc[0] =

2269

field_poc[1] = h->poc_msb + h->poc_lsb;

2270

if(s->picture_structure == PICT_FRAME)

2271

field_poc[1] += h->delta_poc_bottom;

2272

}else if(h->sps.poc_type==1){

2273

int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;

2274

int i;

2275

2276

if(h->sps.poc_cycle_length != 0)

2277

abs_frame_num = h->frame_num_offset + h->frame_num;

2278

else

2279

abs_frame_num = 0;

2280

2281

if(h->nal_ref_idc==0 && abs_frame_num > 0)

2282

abs_frame_num--;

2283

2284

expected_delta_per_poc_cycle = 0;

2285

for(i=0; i < h->sps.poc_cycle_length; i++)

2286

expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse

2287

2288

if(abs_frame_num > 0){

2289

int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;

2290

int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;

2291

2292

expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;

2293

for(i = 0; i <= frame_num_in_poc_cycle; i++)

2294

expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];

2295

} else

2296

expectedpoc = 0;

2297

2298

if(h->nal_ref_idc == 0)

2299

expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;

2300

2301

field_poc[0] = expectedpoc + h->delta_poc[0];

2302

field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;

2303

2304

if(s->picture_structure == PICT_FRAME)

2305

field_poc[1] += h->delta_poc[1];

2306

}else{

2307

int poc= 2*(h->frame_num_offset + h->frame_num);

2308

2309

if(!h->nal_ref_idc)

2310

poc--;

2311

2312

field_poc[0]= poc;

2313

field_poc[1]= poc;

2314

}

2315

2316

if(s->picture_structure != PICT_BOTTOM_FIELD)

2317

s->current_picture_ptr->field_poc[0]= field_poc[0];

2318

if(s->picture_structure != PICT_TOP_FIELD)

2319

s->current_picture_ptr->field_poc[1]= field_poc[1];

2320

cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);

2321

2322

return 0;

2323

}

2324

2325

2326

/**

2327

* initialize scan tables

2328

2329

static void init_scan_tables(H264Context *h){

2330

int i;

2331

for(i=0; i<16; i++){

2332

#define T(x) (x>>2) | ((x<<2) & 0xF)

2333

h->zigzag_scan[i] = T(zigzag_scan[i]);

2334

h-> field_scan[i] = T( field_scan[i]);

2335

#undef T

2336

}

2337

for(i=0; i<64; i++){

2338

#define T(x) (x>>3) | ((x&7)<<3)

2339

h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);

2340

h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);

2341

h->field_scan8x8[i] = T(field_scan8x8[i]);

2342

h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);

2343

#undef T

2344

}

2345

if(h->sps.transform_bypass){ //FIXME same ugly

2346

h->zigzag_scan_q0 = zigzag_scan;

2347

h->zigzag_scan8x8_q0 = ff_zigzag_direct;

2348

h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;

2349

h->field_scan_q0 = field_scan;

2350

h->field_scan8x8_q0 = field_scan8x8;

2351

h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;

2352

}else{

2353

h->zigzag_scan_q0 = h->zigzag_scan;

2354

h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;

2355

h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;

2356

h->field_scan_q0 = h->field_scan;

2357

h->field_scan8x8_q0 = h->field_scan8x8;

2358

h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;

2359

}

2360

}

2361

2362

static void field_end(H264Context *h, int in_setup){

2363

MpegEncContext * const s = &h->s;

2364

AVCodecContext * const avctx= s->avctx;

2365

s->mb_y= 0;

2366

2367

if (!in_setup && !s->dropable)

2368

ff_thread_report_progress((AVFrame*)s->current_picture_ptr, (16*s->mb_height >> FIELD_PICTURE) - 1,

2369

s->picture_structure==PICT_BOTTOM_FIELD);

2370

2371

if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)

2372

ff_vdpau_h264_set_reference_frames(s);

2373

2374

if(in_setup || !(avctx->active_thread_type&FF_THREAD_FRAME)){

2375

if(!s->dropable) {

2376

ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);

2377

h->prev_poc_msb= h->poc_msb;

2378

h->prev_poc_lsb= h->poc_lsb;

2379

}

2380

h->prev_frame_num_offset= h->frame_num_offset;

2381

h->prev_frame_num= h->frame_num;

2382

h->outputed_poc = h->next_outputed_poc;

2383

}

2384

2385

if (avctx->hwaccel) {

2386

if (avctx->hwaccel->end_frame(avctx) < 0)

2387

av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");

2388

}

2389

2390

if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)

2391

ff_vdpau_h264_picture_complete(s);

2392

2393

2394

* FIXME: Error handling code does not seem to support interlaced

2395

* when slices span multiple rows

2396

* The ff_er_add_slice calls don't work right for bottom

2397

* fields; they cause massive erroneous error concealing

2398

* Error marking covers both fields (top and bottom).

2399

* This causes a mismatched s->error_count

2400

* and a bad error table. Further, the error count goes to

2401

* INT_MAX when called for bottom field, because mb_y is

2402

* past end by one (callers fault) and resync_mb_y != 0

2403

* causes problems for the first MB line, too.

2404

2405

if (!FIELD_PICTURE)

2406

ff_er_frame_end(s);

2407

2408

MPV_frame_end(s);

2409

2410

h->current_slice=0;

2411

}

2412

2413

/**

2414

* Replicate H264 "master" context to thread contexts.

2415

2416

static void clone_slice(H264Context *dst, H264Context *src)

2417

{

2418

memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));

2419

dst->s.current_picture_ptr = src->s.current_picture_ptr;

2420

dst->s.current_picture = src->s.current_picture;

2421

dst->s.linesize = src->s.linesize;

2422

dst->s.uvlinesize = src->s.uvlinesize;

2423

dst->s.first_field = src->s.first_field;

2424

2425

dst->prev_poc_msb = src->prev_poc_msb;

2426

dst->prev_poc_lsb = src->prev_poc_lsb;

2427

dst->prev_frame_num_offset = src->prev_frame_num_offset;

2428

dst->prev_frame_num = src->prev_frame_num;

2429

dst->short_ref_count = src->short_ref_count;

2430

2431

memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));

2432

memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));

2433

memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));

2434

memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));

2435

2436

memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));

2437

memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));

2438

}

2439

2440

/**

2441

* computes profile from profile_idc and constraint_set?_flags

2442

2443

* @param sps SPS

2444

2445

* @return profile as defined by FF_PROFILE_H264_*

2446

2447

int ff_h264_get_profile(SPS *sps)

2448

{

2449

int profile = sps->profile_idc;

2450

2451

switch(sps->profile_idc) {

2452

case FF_PROFILE_H264_BASELINE:

2453

// constraint_set1_flag set to 1

2454

profile |= (sps->constraint_set_flags & 1<<1) ? FF_PROFILE_H264_CONSTRAINED : 0;

2455

break;

2456

case FF_PROFILE_H264_HIGH_10:

2457

case FF_PROFILE_H264_HIGH_422:

2458

case FF_PROFILE_H264_HIGH_444_PREDICTIVE:

2459

// constraint_set3_flag set to 1

2460

profile |= (sps->constraint_set_flags & 1<<3) ? FF_PROFILE_H264_INTRA : 0;

2461

break;

2462

}

2463

2464

return profile;

2465

}

2466

2467

/**

2468

* decodes a slice header.

2469

* This will also call MPV_common_init() and frame_start() as needed.

2470

2471

* @param h h264context

2472

* @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)

2473

2474

* @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded

2475

2476

static int decode_slice_header(H264Context *h, H264Context *h0){

2477

MpegEncContext * const s = &h->s;

2478

MpegEncContext * const s0 = &h0->s;

2479

unsigned int first_mb_in_slice;

2480

unsigned int pps_id;

2481

int num_ref_idx_active_override_flag;

2482

unsigned int slice_type, tmp, i, j;

2483

int default_ref_list_done = 0;

2484

int last_pic_structure;

2485

2486

s->dropable= h->nal_ref_idc == 0;

2487

2488

/* FIXME: 2tap qpel isn't implemented for high bit depth. */

2489

if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc && !h->pixel_shift){

2490

s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;

2491

s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;

2492

}else{

2493

s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;

2494

s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;

2495

}

2496

2497

first_mb_in_slice= get_ue_golomb(&s->gb);

2498

2499

if(first_mb_in_slice == 0){ //FIXME better field boundary detection

2500

if(h0->current_slice && FIELD_PICTURE){

2501

field_end(h, 1);

2502

}

2503

2504

h0->current_slice = 0;

2505

if (!s0->first_field)

2506

s->current_picture_ptr= NULL;

2507

}

2508

2509

slice_type= get_ue_golomb_31(&s->gb);

2510

if(slice_type > 9){

2511

av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);

2512

return -1;

2513

}

2514

if(slice_type > 4){

2515

slice_type -= 5;

2516

h->slice_type_fixed=1;

2517

}else

2518

h->slice_type_fixed=0;

2519

2520

slice_type= golomb_to_pict_type[ slice_type ];

2521

if (slice_type == AV_PICTURE_TYPE_I

2522

|| (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {

2523

default_ref_list_done = 1;

2524

}

2525

h->slice_type= slice_type;

2526

h->slice_type_nos= slice_type & 3;

2527

2528

s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though

2529

2530

pps_id= get_ue_golomb(&s->gb);

2531

if(pps_id>=MAX_PPS_COUNT){

2532

av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");

2533

return -1;

2534

}

2535

if(!h0->pps_buffers[pps_id]) {

2536

av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);

2537

return -1;

2538

}

2539

h->pps= *h0->pps_buffers[pps_id];

2540

2541

if(!h0->sps_buffers[h->pps.sps_id]) {

2542

av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);

2543

return -1;

2544

}

2545

h->sps = *h0->sps_buffers[h->pps.sps_id];

2546

2547

s->avctx->profile = ff_h264_get_profile(&h->sps);

2548

s->avctx->level = h->sps.level_idc;

2549

s->avctx->refs = h->sps.ref_frame_count;

2550

2551

if(h == h0 && h->dequant_coeff_pps != pps_id){

2552

h->dequant_coeff_pps = pps_id;

2553

init_dequant_tables(h);

2554

}

2555

2556

s->mb_width= h->sps.mb_width;

2557

s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);

2558

2559

h->b_stride= s->mb_width*4;

2560

2561

s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1);

2562

if(h->sps.frame_mbs_only_flag)

2563

s->height= 16*s->mb_height - (2>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);

2564

else

2565

s->height= 16*s->mb_height - (4>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1);

2566

2567

if (s->context_initialized

2568

&& ( s->width != s->avctx->width || s->height != s->avctx->height

2569

|| av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {

2570

if(h != h0) {

2571

av_log_missing_feature(s->avctx, "Width/height changing with threads is", 0);

2572

return -1; // width / height changed during parallelized decoding

2573

}

2574

free_tables(h, 0);

2575

flush_dpb(s->avctx);

2576

MPV_common_end(s);

2577

}

2578

if (!s->context_initialized) {

2579

if (h != h0) {

2580

av_log(h->s.avctx, AV_LOG_ERROR, "Cannot (re-)initialize context during parallel decoding.\n");

2581

return -1;

2582

}

2583

2584

avcodec_set_dimensions(s->avctx, s->width, s->height);

2585

s->avctx->sample_aspect_ratio= h->sps.sar;

2586

av_assert0(s->avctx->sample_aspect_ratio.den);

2587

2588

h->s.avctx->coded_width = 16*s->mb_width;

2589

h->s.avctx->coded_height = 16*s->mb_height;

2590

2591

if(h->sps.video_signal_type_present_flag){

2592

s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;

2593

if(h->sps.colour_description_present_flag){

2594

s->avctx->color_primaries = h->sps.color_primaries;

2595

s->avctx->color_trc = h->sps.color_trc;

2596

s->avctx->colorspace = h->sps.colorspace;

2597

}

2598

}

2599

2600

if(h->sps.timing_info_present_flag){

2601

int64_t den= h->sps.time_scale;

2602

if(h->x264_build < 44U)

2603

den *= 2;

2604

av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,

2605

h->sps.num_units_in_tick, den, 1<<30);

2606

}

2607

2608

switch (h->sps.bit_depth_luma) {

2609

case 9 :

2610

s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P9 : PIX_FMT_YUV420P9;

2611

break;

2612

case 10 :

2613

s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P10 : PIX_FMT_YUV420P10;

2614

break;

2615

default:

2616

if (CHROMA444){

2617

s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P;

2618

}else{

2619

s->avctx->pix_fmt = s->avctx->get_format(s->avctx,

2620

s->avctx->codec->pix_fmts ?

2621

s->avctx->codec->pix_fmts :

2622

s->avctx->color_range == AVCOL_RANGE_JPEG ?

2623

hwaccel_pixfmt_list_h264_jpeg_420 :

2624

ff_hwaccel_pixfmt_list_420);

2625

}

2626

}

2627

2628

s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);

2629

2630

if (MPV_common_init(s) < 0) {

2631

av_log(h->s.avctx, AV_LOG_ERROR, "MPV_common_init() failed.\n");

2632

return -1;

2633

}

2634

s->first_field = 0;

2635

h->prev_interlaced_frame = 1;

2636

2637

init_scan_tables(h);

2638

ff_h264_alloc_tables(h);

2639

2640

if (!HAVE_THREADS || !(s->avctx->active_thread_type&FF_THREAD_SLICE)) {

2641

if (context_init(h) < 0) {

2642

av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");

2643

return -1;

2644

}

2645

} else {

2646

for(i = 1; i < s->avctx->thread_count; i++) {

2647

H264Context *c;

2648

c = h->thread_context[i] = av_malloc(sizeof(H264Context));

2649

memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));

2650

memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));

2651

c->h264dsp = h->h264dsp;

2652

c->sps = h->sps;

2653

c->pps = h->pps;

2654

c->pixel_shift = h->pixel_shift;

2655

init_scan_tables(c);

2656

clone_tables(c, h, i);

2657

}

2658

2659

for(i = 0; i < s->avctx->thread_count; i++)

2660

if (context_init(h->thread_context[i]) < 0) {

2661

av_log(h->s.avctx, AV_LOG_ERROR, "context_init() failed.\n");

2662

return -1;

2663

}

2664

}

2665

}

2666

2667

h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);

2668

2669

h->mb_mbaff = 0;

2670

h->mb_aff_frame = 0;

2671

last_pic_structure = s0->picture_structure;

2672

if(h->sps.frame_mbs_only_flag){

2673

s->picture_structure= PICT_FRAME;

2674

}else{

2675

if(get_bits1(&s->gb)) { //field_pic_flag

2676

s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag

2677

} else {

2678

s->picture_structure= PICT_FRAME;

2679

h->mb_aff_frame = h->sps.mb_aff;

2680

}

2681

}

2682

h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;

2683

2684

if(h0->current_slice == 0){

2685

// Shorten frame num gaps so we don't have to allocate reference frames just to throw them away

2686

if(h->frame_num != h->prev_frame_num) {

2687

int unwrap_prev_frame_num = h->prev_frame_num, max_frame_num = 1<<h->sps.log2_max_frame_num;

2688

2689

if (unwrap_prev_frame_num > h->frame_num) unwrap_prev_frame_num -= max_frame_num;

2690

2691

if ((h->frame_num - unwrap_prev_frame_num) > h->sps.ref_frame_count) {

2692

unwrap_prev_frame_num = (h->frame_num - h->sps.ref_frame_count) - 1;

2693

if (unwrap_prev_frame_num < 0)

2694

unwrap_prev_frame_num += max_frame_num;

2695

2696

h->prev_frame_num = unwrap_prev_frame_num;

2697

}

2698

}

2699

2700

while(h->frame_num != h->prev_frame_num &&

2701

h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){

2702

Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;

2703

av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);

2704

if (ff_h264_frame_start(h) < 0)

2705

return -1;

2706

h->prev_frame_num++;

2707

h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;

2708

s->current_picture_ptr->frame_num= h->prev_frame_num;

2709

ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 0);

2710

ff_thread_report_progress((AVFrame*)s->current_picture_ptr, INT_MAX, 1);

2711

ff_generate_sliding_window_mmcos(h);

2712

ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);

2713

/* Error concealment: if a ref is missing, copy the previous ref in its place.

2714

* FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions

2715

* about there being no actual duplicates.

2716

* FIXME: this doesn't copy padding for out-of-frame motion vectors. Given we're

2717

* concealing a lost frame, this probably isn't noticable by comparison, but it should

2718

* be fixed. */

2719

if (h->short_ref_count) {

2720

if (prev) {

2721

av_image_copy(h->short_ref[0]->data, h->short_ref[0]->linesize,

2722

(const uint8_t**)prev->data, prev->linesize,

2723

s->avctx->pix_fmt, s->mb_width*16, s->mb_height*16);

2724

h->short_ref[0]->poc = prev->poc+2;

2725

}

2726

h->short_ref[0]->frame_num = h->prev_frame_num;

2727

}

2728

}

2729

2730

/* See if we have a decoded first field looking for a pair... */

2731

if (s0->first_field) {

2732

assert(s0->current_picture_ptr);

2733

assert(s0->current_picture_ptr->data[0]);

2734

assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);

2735

2736

/* figure out if we have a complementary field pair */

2737

if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {

2738

2739

* Previous field is unmatched. Don't display it, but let it

2740

* remain for reference if marked as such.

2741

2742

s0->current_picture_ptr = NULL;

2743

s0->first_field = FIELD_PICTURE;

2744

2745

} else {

2746

if (h->nal_ref_idc &&

2747

s0->current_picture_ptr->reference &&

2748

s0->current_picture_ptr->frame_num != h->frame_num) {

2749

2750

* This and previous field were reference, but had

2751

* different frame_nums. Consider this field first in

2752

* pair. Throw away previous field except for reference

2753

* purposes.

2754

2755

s0->first_field = 1;

2756

s0->current_picture_ptr = NULL;

2757

2758

} else {

2759

/* Second field in complementary pair */

2760

s0->first_field = 0;

2761

}

2762

}

2763

2764

} else {

2765

/* Frame or first field in a potentially complementary pair */

2766

assert(!s0->current_picture_ptr);

2767

s0->first_field = FIELD_PICTURE;

2768

}

2769

2770

if(!FIELD_PICTURE || s0->first_field) {

2771

if (ff_h264_frame_start(h) < 0) {

2772

s0->first_field = 0;

2773

return -1;

2774

}

2775

} else {

2776

ff_release_unused_pictures(s, 0);

2777

}

2778

}

2779

if(h != h0)

2780

clone_slice(h, h0);

2781

2782

s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup

2783

2784

assert(s->mb_num == s->mb_width * s->mb_height);

2785

if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||

2786

first_mb_in_slice >= s->mb_num){

2787

av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");

2788

return -1;

2789

}

2790

s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;

2791

s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;

2792

if (s->picture_structure == PICT_BOTTOM_FIELD)

2793

s->resync_mb_y = s->mb_y = s->mb_y + 1;

2794

assert(s->mb_y < s->mb_height);

2795

2796

if(s->picture_structure==PICT_FRAME){

2797

h->curr_pic_num= h->frame_num;

2798

h->max_pic_num= 1<< h->sps.log2_max_frame_num;

2799

}else{

2800

h->curr_pic_num= 2*h->frame_num + 1;

2801

h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);

2802

}

2803

2804

if(h->nal_unit_type == NAL_IDR_SLICE){

2805

get_ue_golomb(&s->gb); /* idr_pic_id */

2806

}

2807

2808

if(h->sps.poc_type==0){

2809

h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);

2810

2811

if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){

2812

h->delta_poc_bottom= get_se_golomb(&s->gb);

2813

}

2814

}

2815

2816

if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){

2817

h->delta_poc[0]= get_se_golomb(&s->gb);

2818

2819

if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)

2820

h->delta_poc[1]= get_se_golomb(&s->gb);

2821

}

2822

2823

init_poc(h);

2824

2825

if(h->pps.redundant_pic_cnt_present){

2826

h->redundant_pic_count= get_ue_golomb(&s->gb);

2827

}

2828

2829

//set defaults, might be overridden a few lines later

2830

h->ref_count[0]= h->pps.ref_count[0];

2831

h->ref_count[1]= h->pps.ref_count[1];

2832

2833

if(h->slice_type_nos != AV_PICTURE_TYPE_I){

2834

if(h->slice_type_nos == AV_PICTURE_TYPE_B){

2835

h->direct_spatial_mv_pred= get_bits1(&s->gb);

2836

}

2837

num_ref_idx_active_override_flag= get_bits1(&s->gb);

2838

2839

if(num_ref_idx_active_override_flag){

2840

h->ref_count[0]= get_ue_golomb(&s->gb) + 1;

2841

if(h->slice_type_nos==AV_PICTURE_TYPE_B)

2842

h->ref_count[1]= get_ue_golomb(&s->gb) + 1;

2843

2844

if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){

2845

av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");

2846

h->ref_count[0]= h->ref_count[1]= 1;

2847

return -1;

2848

}

2849

}

2850

if(h->slice_type_nos == AV_PICTURE_TYPE_B)

2851

h->list_count= 2;

2852

else

2853

h->list_count= 1;

2854

}else

2855

h->list_count= 0;

2856

2857

if(!default_ref_list_done){

2858

ff_h264_fill_default_ref_list(h);

2859

}

2860

2861

if(h->slice_type_nos!=AV_PICTURE_TYPE_I && ff_h264_decode_ref_pic_list_reordering(h) < 0)

2862

return -1;

2863

2864

if(h->slice_type_nos!=AV_PICTURE_TYPE_I){

2865

s->last_picture_ptr= &h->ref_list[0][0];

2866

ff_copy_picture(&s->last_picture, s->last_picture_ptr);

2867

}

2868

if(h->slice_type_nos==AV_PICTURE_TYPE_B){

2869

s->next_picture_ptr= &h->ref_list[1][0];

2870

ff_copy_picture(&s->next_picture, s->next_picture_ptr);

2871

}

2872

2873

if( (h->pps.weighted_pred && h->slice_type_nos == AV_PICTURE_TYPE_P )

2874

|| (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== AV_PICTURE_TYPE_B ) )

2875

pred_weight_table(h);

2876

else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){

2877

implicit_weight_table(h, -1);

2878

}else {

2879

h->use_weight = 0;

2880

for (i = 0; i < 2; i++) {

2881

h->luma_weight_flag[i] = 0;

2882

h->chroma_weight_flag[i] = 0;

2883

}

2884

}

2885

2886

if(h->nal_ref_idc)

2887

ff_h264_decode_ref_pic_marking(h0, &s->gb);

2888

2889

if(FRAME_MBAFF){

2890

ff_h264_fill_mbaff_ref_list(h);

2891

2892

if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== AV_PICTURE_TYPE_B){

2893

implicit_weight_table(h, 0);

2894

implicit_weight_table(h, 1);

2895

}

2896

}

2897

2898

if(h->slice_type_nos==AV_PICTURE_TYPE_B && !h->direct_spatial_mv_pred)

2899

ff_h264_direct_dist_scale_factor(h);

2900

ff_h264_direct_ref_list_init(h);

2901

2902

if( h->slice_type_nos != AV_PICTURE_TYPE_I && h->pps.cabac ){

2903

tmp = get_ue_golomb_31(&s->gb);

2904

if(tmp > 2){

2905

av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");

2906

return -1;

2907

}

2908

h->cabac_init_idc= tmp;

2909

}

2910

2911

h->last_qscale_diff = 0;

2912

tmp = h->pps.init_qp + get_se_golomb(&s->gb);

2913

if(tmp>51+6*(h->sps.bit_depth_luma-8)){

2914

av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);

2915

return -1;

2916

}

2917

s->qscale= tmp;

2918

h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);

2919

h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);

2920

//FIXME qscale / qp ... stuff

2921

if(h->slice_type == AV_PICTURE_TYPE_SP){

2922

get_bits1(&s->gb); /* sp_for_switch_flag */

2923

}

2924

if(h->slice_type==AV_PICTURE_TYPE_SP || h->slice_type == AV_PICTURE_TYPE_SI){

2925

get_se_golomb(&s->gb); /* slice_qs_delta */

2926

}

2927

2928

h->deblocking_filter = 1;

2929

h->slice_alpha_c0_offset = 52;

2930

h->slice_beta_offset = 52;

2931

if( h->pps.deblocking_filter_parameters_present ) {

2932

tmp= get_ue_golomb_31(&s->gb);

2933

if(tmp > 2){

2934

av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);

2935

return -1;

2936

}

2937

h->deblocking_filter= tmp;

2938

if(h->deblocking_filter < 2)

2939

h->deblocking_filter^= 1; // 1<->0

2940

2941

if( h->deblocking_filter ) {

2942

h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;

2943

h->slice_beta_offset += get_se_golomb(&s->gb) << 1;

2944

if( h->slice_alpha_c0_offset > 104U

2945

|| h->slice_beta_offset > 104U){

2946

av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset);

2947

return -1;

2948

}

2949

}

2950

}

2951

2952

if( s->avctx->skip_loop_filter >= AVDISCARD_ALL

2953

||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != AV_PICTURE_TYPE_I)

2954

||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == AV_PICTURE_TYPE_B)

2955

||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))

2956

h->deblocking_filter= 0;

2957

2958

if(h->deblocking_filter == 1 && h0->max_contexts > 1) {

2959

if(s->avctx->flags2 & CODEC_FLAG2_FAST) {

2960

/* Cheat slightly for speed:

2961

Do not bother to deblock across slices. */

2962

h->deblocking_filter = 2;

2963

} else {

2964

h0->max_contexts = 1;

2965

if(!h0->single_decode_warning) {

2966

av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");

2967

h0->single_decode_warning = 1;

2968

}

2969

if (h != h0) {

2970

av_log(h->s.avctx, AV_LOG_ERROR, "Deblocking switched inside frame.\n");

2971

return 1;

2972

}

2973

}

2974

}

2975

h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);

2976

2977

#if 0 //FMO

2978

if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)

2979

slice_group_change_cycle= get_bits(&s->gb, ?);

2980

#endif

2981

2982

h0->last_slice_type = slice_type;

2983

h->slice_num = ++h0->current_slice;

2984

if(h->slice_num >= MAX_SLICES){

2985

av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");

2986

}

2987

2988

for(j=0; j<2; j++){

2989

int id_list[16];

2990

int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];

2991

for(i=0; i<16; i++){

2992

id_list[i]= 60;

2993

if(h->ref_list[j][i].data[0]){

2994

int k;

2995

uint8_t *base= h->ref_list[j][i].base[0];

2996

for(k=0; k<h->short_ref_count; k++)

2997

if(h->short_ref[k]->base[0] == base){

2998

id_list[i]= k;

2999

break;

3000

}

3001

for(k=0; k<h->long_ref_count; k++)

3002

if(h->long_ref[k] && h->long_ref[k]->base[0] == base){

3003

id_list[i]= h->short_ref_count + k;

3004

break;

3005

}

3006

}

3007

}

3008

3009

ref2frm[0]=

3010

ref2frm[1]= -1;

3011

for(i=0; i<16; i++)

3012

ref2frm[i+2]= 4*id_list[i]

3013

+(h->ref_list[j][i].reference&3);

3014

ref2frm[18+0]=

3015

ref2frm[18+1]= -1;

3016

for(i=16; i<48; i++)

3017

ref2frm[i+4]= 4*id_list[(i-16)>>1]

3018

+(h->ref_list[j][i].reference&3);

3019

}

3020

3021

//FIXME: fix draw_edges+PAFF+frame threads

3022

h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE || (!h->sps.frame_mbs_only_flag && s->avctx->active_thread_type)) ? 0 : 16;

3023

h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;

3024

3025

if(s->avctx->debug&FF_DEBUG_PICT_INFO){

3026

av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",

3027

h->slice_num,

3028

(s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),

3029

first_mb_in_slice,

3030

av_get_picture_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",

3031

pps_id, h->frame_num,

3032

s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],

3033

h->ref_count[0], h->ref_count[1],

3034

s->qscale,

3035

h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,

3036

h->use_weight,

3037

h->use_weight==1 && h->use_weight_chroma ? "c" : "",

3038

h->slice_type == AV_PICTURE_TYPE_B ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""

3039

);

3040

}

3041

3042

return 0;

3043

}

3044

3045

int ff_h264_get_slice_type(const H264Context *h)

3046

{

3047

switch (h->slice_type) {

3048

case AV_PICTURE_TYPE_P: return 0;

3049

case AV_PICTURE_TYPE_B: return 1;

3050

case AV_PICTURE_TYPE_I: return 2;

3051

case AV_PICTURE_TYPE_SP: return 3;

3052

case AV_PICTURE_TYPE_SI: return 4;

3053

default: return -1;

3054

}

3055

}

3056

3057

/**

3058

3059

* @return non zero if the loop filter can be skiped

3060

3061

static int fill_filter_caches(H264Context *h, int mb_type){

3062

MpegEncContext * const s = &h->s;

3063

const int mb_xy= h->mb_xy;

3064

int top_xy, left_xy[2];

3065

int top_type, left_type[2];

3066

3067

top_xy = mb_xy - (s->mb_stride << MB_FIELD);

3068

3069

//FIXME deblocking could skip the intra and nnz parts.

3070

3071

/* Wow, what a mess, why didn't they simplify the interlacing & intra

3072

* stuff, I can't imagine that these complex rules are worth it. */

3073

3074

left_xy[1] = left_xy[0] = mb_xy-1;

3075

if(FRAME_MBAFF){

3076

const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);

3077

const int curr_mb_field_flag = IS_INTERLACED(mb_type);

3078

if(s->mb_y&1){

3079

if (left_mb_field_flag != curr_mb_field_flag) {

3080

left_xy[0] -= s->mb_stride;

3081

}

3082

}else{

3083

if(curr_mb_field_flag){

3084

top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1);

3085

}

3086

if (left_mb_field_flag != curr_mb_field_flag) {

3087

left_xy[1] += s->mb_stride;

3088

}

3089

}

3090

}

3091

3092

h->top_mb_xy = top_xy;

3093

h->left_mb_xy[0] = left_xy[0];

3094

h->left_mb_xy[1] = left_xy[1];

3095

{

3096

//for sufficiently low qp, filtering wouldn't do anything

3097

//this is a conservative estimate: could also check beta_offset and more accurate chroma_qp

3098

int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice

3099

int qp = s->current_picture.qscale_table[mb_xy];

3100

if(qp <= qp_thresh

3101

&& (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)

3102

&& (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){

3103

if(!FRAME_MBAFF)

3104

return 1;

3105

if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_table[left_xy[1] ] + 1)>>1) <= qp_thresh)

3106

&& (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh))

3107

return 1;

3108

}

3109

}

3110

3111

top_type = s->current_picture.mb_type[top_xy] ;

3112

left_type[0] = s->current_picture.mb_type[left_xy[0]];

3113

left_type[1] = s->current_picture.mb_type[left_xy[1]];

3114

if(h->deblocking_filter == 2){

3115

if(h->slice_table[top_xy ] != h->slice_num) top_type= 0;

3116

if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0;

3117

}else{

3118

if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0;

3119

if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0;

3120

}

3121

h->top_type = top_type ;

3122

h->left_type[0]= left_type[0];

3123

h->left_type[1]= left_type[1];

3124

3125

if(IS_INTRA(mb_type))

3126

return 0;

3127

3128

AV_COPY32(&h->non_zero_count_cache[4+8* 1], &h->non_zero_count[mb_xy][ 0]);

3129

AV_COPY32(&h->non_zero_count_cache[4+8* 2], &h->non_zero_count[mb_xy][ 4]);

3130

AV_COPY32(&h->non_zero_count_cache[4+8* 3], &h->non_zero_count[mb_xy][ 8]);

3131

AV_COPY32(&h->non_zero_count_cache[4+8* 4], &h->non_zero_count[mb_xy][12]);

3132

3133

h->cbp= h->cbp_table[mb_xy];

3134

3135

{

3136

int list;

3137

for(list=0; list<h->list_count; list++){

3138

int8_t *ref;

3139

int y, b_stride;

3140

int16_t (*mv_dst)[2];

3141

int16_t (*mv_src)[2];

3142

3143

if(!USES_LIST(mb_type, list)){

3144

fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);

3145

AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u);

3146

AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u);

3147

AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u);

3148

AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u);

3149

continue;

3150

}

3151

3152

ref = &s->current_picture.ref_index[list][4*mb_xy];

3153

{

3154

int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);

3155

AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);

3156

AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);

3157

ref += 2;

3158

AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);

3159

AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);

3160

}

3161

3162

b_stride = h->b_stride;

3163

mv_dst = &h->mv_cache[list][scan8[0]];

3164

mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];

3165

for(y=0; y<4; y++){

3166

AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);

3167

}

3168

3169

}

3170

}

3171

3172

3173

3174

0 . T T. T T T T

3175

1 L . .L . . . .

3176

2 L . .L . . . .

3177

3 . T TL . . . .

3178

4 L . .L . . . .

3179

5 L . .. . . . .

3180

3181

//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)

3182

if(top_type){

3183

AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][3*4]);

3184

}

3185

3186

if(left_type[0]){

3187

h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][3+0*4];

3188

h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][3+1*4];

3189

h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][3+2*4];

3190

h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][3+3*4];

3191

}

3192

3193

// CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs

3194

if(!CABAC && h->pps.transform_8x8_mode){

3195

if(IS_8x8DCT(top_type)){

3196

h->non_zero_count_cache[4+8*0]=

3197

h->non_zero_count_cache[5+8*0]= (h->cbp_table[top_xy] & 0x4000) >> 12;

3198

h->non_zero_count_cache[6+8*0]=

3199

h->non_zero_count_cache[7+8*0]= (h->cbp_table[top_xy] & 0x8000) >> 12;

3200

}

3201

if(IS_8x8DCT(left_type[0])){

3202

h->non_zero_count_cache[3+8*1]=

3203

h->non_zero_count_cache[3+8*2]= (h->cbp_table[left_xy[0]]&0x2000) >> 12; //FIXME check MBAFF

3204

}

3205

if(IS_8x8DCT(left_type[1])){

3206

h->non_zero_count_cache[3+8*3]=

3207

h->non_zero_count_cache[3+8*4]= (h->cbp_table[left_xy[1]]&0x8000) >> 12; //FIXME check MBAFF

3208

}

3209

3210

if(IS_8x8DCT(mb_type)){

3211

h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=

3212

h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= (h->cbp & 0x1000) >> 12;

3213

3214

h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=

3215

h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= (h->cbp & 0x2000) >> 12;

3216

3217

h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=

3218

h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= (h->cbp & 0x4000) >> 12;

3219

3220

h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=

3221

h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= (h->cbp & 0x8000) >> 12;

3222

}

3223

}

3224

3225

if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){

3226

int list;

3227

for(list=0; list<h->list_count; list++){

3228

if(USES_LIST(top_type, list)){

3229

const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;

3230

const int b8_xy= 4*top_xy + 2;

3231

int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);

3232

AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);

3233

h->ref_cache[list][scan8[0] + 0 - 1*8]=

3234

h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];

3235

h->ref_cache[list][scan8[0] + 2 - 1*8]=

3236

h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];

3237

}else{

3238

AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);

3239

AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);

3240

}

3241

3242

if(!IS_INTERLACED(mb_type^left_type[0])){

3243

if(USES_LIST(left_type[0], list)){

3244

const int b_xy= h->mb2b_xy[left_xy[0]] + 3;

3245

const int b8_xy= 4*left_xy[0] + 1;

3246

int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);

3247

AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]);

3248

AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]);

3249

AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]);

3250

AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]);

3251

h->ref_cache[list][scan8[0] - 1 + 0 ]=

3252

h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];

3253

h->ref_cache[list][scan8[0] - 1 +16 ]=

3254

h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];

3255

}else{

3256

AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]);

3257

AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]);

3258

AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]);

3259

AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]);

3260

h->ref_cache[list][scan8[0] - 1 + 0 ]=

3261

h->ref_cache[list][scan8[0] - 1 + 8 ]=

3262

h->ref_cache[list][scan8[0] - 1 + 16 ]=

3263

h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;

3264

}

3265

}

3266

}

3267

}

3268

3269

return 0;

3270

}

3271

3272

static void loop_filter(H264Context *h, int start_x, int end_x){

3273

MpegEncContext * const s = &h->s;

3274

uint8_t *dest_y, *dest_cb, *dest_cr;

3275

int linesize, uvlinesize, mb_x, mb_y;

3276

const int end_mb_y= s->mb_y + FRAME_MBAFF;

3277

const int old_slice_type= h->slice_type;

3278

const int pixel_shift = h->pixel_shift;

3279

3280

if(h->deblocking_filter) {

3281

for(mb_x= start_x; mb_x<end_x; mb_x++){

3282

for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){

3283

int mb_xy, mb_type;

3284

mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;

3285

h->slice_num= h->slice_table[mb_xy];

3286

mb_type= s->current_picture.mb_type[mb_xy];

3287

h->list_count= h->list_counts[mb_xy];

3288

3289

if(FRAME_MBAFF)

3290

h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);

3291

3292

s->mb_x= mb_x;

3293

s->mb_y= mb_y;

3294

dest_y = s->current_picture.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16;

3295

dest_cb = s->current_picture.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);

3296

dest_cr = s->current_picture.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444);

3297

//FIXME simplify above

3298

3299

if (MB_FIELD) {

3300

linesize = h->mb_linesize = s->linesize * 2;

3301

uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;

3302

if(mb_y&1){ //FIXME move out of this function?

3303

dest_y -= s->linesize*15;

3304

dest_cb-= s->uvlinesize*((8 << CHROMA444)-1);

3305

dest_cr-= s->uvlinesize*((8 << CHROMA444)-1);

3306

}

3307

} else {

3308

linesize = h->mb_linesize = s->linesize;

3309

uvlinesize = h->mb_uvlinesize = s->uvlinesize;

3310

}

3311

backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, CHROMA444, 0);

3312

if(fill_filter_caches(h, mb_type))

3313

continue;

3314

h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);

3315

h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);

3316

3317

if (FRAME_MBAFF) {

3318

ff_h264_filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);

3319

} else {

3320

ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);

3321

}

3322

}

3323

}

3324

}

3325

h->slice_type= old_slice_type;

3326

s->mb_x= end_x;

3327

s->mb_y= end_mb_y - FRAME_MBAFF;

3328

h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);

3329

h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);

3330

}

3331

3332

static void predict_field_decoding_flag(H264Context *h){

3333

MpegEncContext * const s = &h->s;

3334

const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;

3335

int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)

3336

? s->current_picture.mb_type[mb_xy-1]

3337

: (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)

3338

? s->current_picture.mb_type[mb_xy-s->mb_stride]

3339

: 0;

3340

h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;

3341

}

3342

3343

/**

3344

* Draw edges and report progress for the last MB row.

3345

3346

static void decode_finish_row(H264Context *h){

3347

MpegEncContext * const s = &h->s;

3348

int top = 16*(s->mb_y >> FIELD_PICTURE);

3349

int height = 16 << FRAME_MBAFF;

3350

int deblock_border = (16 + 4) << FRAME_MBAFF;

3351

int pic_height = 16*s->mb_height >> FIELD_PICTURE;

3352

3353

if (h->deblocking_filter) {

3354

if((top + height) >= pic_height)

3355

height += deblock_border;

3356

3357

top -= deblock_border;

3358

}

3359

3360

if (top >= pic_height || (top + height) < h->emu_edge_height)

3361

return;

3362

3363

height = FFMIN(height, pic_height - top);

3364

if (top < h->emu_edge_height) {

3365

height = top+height;

3366

top = 0;

3367

}

3368

3369

ff_draw_horiz_band(s, top, height);

3370

3371

if (s->dropable) return;

3372

3373

ff_thread_report_progress((AVFrame*)s->current_picture_ptr, top + height - 1,

3374

s->picture_structure==PICT_BOTTOM_FIELD);

3375

}

3376

3377

static int decode_slice(struct AVCodecContext *avctx, void *arg){

3378

H264Context *h = *(void**)arg;

3379

MpegEncContext * const s = &h->s;

3380

const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;

3381

int lf_x_start = s->mb_x;

3382

3383

s->mb_skip_run= -1;

3384

3385

h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||

3386

(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));

3387

3388

if( h->pps.cabac ) {

3389

/* realign */

3390

align_get_bits( &s->gb );

3391

3392

/* init cabac */

3393

ff_init_cabac_states( &h->cabac);

3394

ff_init_cabac_decoder( &h->cabac,

3395

s->gb.buffer + get_bits_count(&s->gb)/8,

3396

(get_bits_left(&s->gb) + 7)/8);

3397

3398

ff_h264_init_cabac_states(h);

3399

3400

for(;;){

3401

//START_TIMER

3402

int ret = ff_h264_decode_mb_cabac(h);

3403

int eos;

3404

//STOP_TIMER("decode_mb_cabac")

3405

3406

if(ret>=0) ff_h264_hl_decode_mb(h);

3407

3408

if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?

3409

s->mb_y++;

3410

3411

ret = ff_h264_decode_mb_cabac(h);

3412

3413

if(ret>=0) ff_h264_hl_decode_mb(h);

3414

s->mb_y--;

3415

}

3416

eos = get_cabac_terminate( &h->cabac );

3417

3418

if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){

3419

ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);

3420

if (s->mb_x >= lf_x_start) loop_filter(h, lf_x_start, s->mb_x + 1);

3421

return 0;

3422

}

3423

if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {

3424

av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);

3425

ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);

3426

return -1;

3427

}

3428

3429

if( ++s->mb_x >= s->mb_width ) {

3430

loop_filter(h, lf_x_start, s->mb_x);

3431

s->mb_x = lf_x_start = 0;

3432

decode_finish_row(h);

3433

++s->mb_y;

3434

if(FIELD_OR_MBAFF_PICTURE) {

3435

++s->mb_y;

3436

if(FRAME_MBAFF && s->mb_y < s->mb_height)

3437

predict_field_decoding_flag(h);

3438

}

3439

}

3440

3441

if( eos || s->mb_y >= s->mb_height ) {

3442

tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);

3443

ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);

3444

if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);

3445

return 0;

3446

}

3447

}

3448

3449

} else {

3450

for(;;){

3451

int ret = ff_h264_decode_mb_cavlc(h);

3452

3453

if(ret>=0) ff_h264_hl_decode_mb(h);

3454

3455

if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?

3456

s->mb_y++;

3457

ret = ff_h264_decode_mb_cavlc(h);

3458

3459

if(ret>=0) ff_h264_hl_decode_mb(h);

3460

s->mb_y--;

3461

}

3462

3463

if(ret<0){

3464

av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);

3465

ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);

3466

return -1;

3467

}

3468

3469

if(++s->mb_x >= s->mb_width){

3470

loop_filter(h, lf_x_start, s->mb_x);

3471

s->mb_x = lf_x_start = 0;

3472

decode_finish_row(h);

3473

++s->mb_y;

3474

if(FIELD_OR_MBAFF_PICTURE) {

3475

++s->mb_y;

3476

if(FRAME_MBAFF && s->mb_y < s->mb_height)

3477

predict_field_decoding_flag(h);

3478

}

3479

if(s->mb_y >= s->mb_height){

3480

tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);

3481

3482

if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {

3483

ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);

3484

3485

return 0;

3486

}else{

3487

ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);

3488

3489

return -1;

3490

}

3491

}

3492

}

3493

3494

if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){

3495

tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);

3496

if(get_bits_count(&s->gb) == s->gb.size_in_bits ){

3497

ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);

3498

if (s->mb_x > lf_x_start) loop_filter(h, lf_x_start, s->mb_x);

3499

3500

return 0;

3501

}else{

3502

ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);

3503

3504

return -1;

3505

}

3506

}

3507

}

3508

}

3509

3510

#if 0

3511

for(;s->mb_y < s->mb_height; s->mb_y++){

3512

for(;s->mb_x < s->mb_width; s->mb_x++){

3513

int ret= decode_mb(h);

3514

3515

ff_h264_hl_decode_mb(h);

3516

3517

if(ret<0){

3518

av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);

3519

ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);

3520

3521

return -1;

3522

}

3523

3524

if(++s->mb_x >= s->mb_width){

3525

s->mb_x=0;

3526

if(++s->mb_y >= s->mb_height){

3527

if(get_bits_count(s->gb) == s->gb.size_in_bits){

3528

ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);

3529

3530

return 0;

3531

}else{

3532

ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);

3533

3534

return -1;

3535

}

3536

}

3537

}

3538

3539

if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){

3540

if(get_bits_count(s->gb) == s->gb.size_in_bits){

3541

ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);

3542

3543

return 0;

3544

}else{

3545

ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);

3546

3547

return -1;

3548

}

3549

}

3550

}

3551

s->mb_x=0;

3552

ff_draw_horiz_band(s, 16*s->mb_y, 16);

3553

}

3554

#endif

3555

return -1; //not reached

3556

}

3557

3558

/**

3559

* Call decode_slice() for each context.

3560

3561

* @param h h264 master context

3562

* @param context_count number of contexts to execute

3563

3564

static void execute_decode_slices(H264Context *h, int context_count){

3565

MpegEncContext * const s = &h->s;

3566

AVCodecContext * const avctx= s->avctx;

3567

H264Context *hx;

3568

int i;

3569

3570

if (s->avctx->hwaccel)

3571

return;

3572

if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)

3573

return;

3574

if(context_count == 1) {

3575

decode_slice(avctx, &h);

3576

} else {

3577

for(i = 1; i < context_count; i++) {

3578

hx = h->thread_context[i];

3579

hx->s.error_recognition = avctx->error_recognition;

3580

hx->s.error_count = 0;

3581

}

3582

3583

avctx->execute(avctx, (void *)decode_slice,

3584

h->thread_context, NULL, context_count, sizeof(void*));

3585

3586

/* pull back stuff from slices to master context */

3587

hx = h->thread_context[context_count - 1];

3588

s->mb_x = hx->s.mb_x;

3589

s->mb_y = hx->s.mb_y;

3590

s->dropable = hx->s.dropable;

3591

s->picture_structure = hx->s.picture_structure;

3592

for(i = 1; i < context_count; i++)

3593

h->s.error_count += h->thread_context[i]->s.error_count;

3594

}

3595

}

3596

3597

3598

static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){

3599

MpegEncContext * const s = &h->s;

3600

AVCodecContext * const avctx= s->avctx;

3601

H264Context *hx; ///< thread context

3602

int buf_index;

3603

int context_count;

3604

int next_avc;

3605

int pass = !(avctx->active_thread_type & FF_THREAD_FRAME);

3606

int nals_needed=0; ///< number of NALs that need decoding before the next frame thread starts

3607

int nal_index;

3608

3609

h->max_contexts = (HAVE_THREADS && (s->avctx->active_thread_type&FF_THREAD_SLICE)) ? avctx->thread_count : 1;

3610

if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){

3611

h->current_slice = 0;

3612

if (!s->first_field)

3613

s->current_picture_ptr= NULL;

3614

ff_h264_reset_sei(h);

3615

}

3616

3617

for(;pass <= 1;pass++){

3618

buf_index = 0;

3619

context_count = 0;

3620

next_avc = h->is_avc ? 0 : buf_size;

3621

nal_index = 0;

3622

for(;;){

3623

int consumed;

3624

int dst_length;

3625

int bit_length;

3626

const uint8_t *ptr;

3627

int i, nalsize = 0;

3628

int err;

3629

3630

if(buf_index >= next_avc) {

3631

if(buf_index >= buf_size) break;

3632

nalsize = 0;

3633

for(i = 0; i < h->nal_length_size; i++)

3634

nalsize = (nalsize << 8) | buf[buf_index++];

3635

if(nalsize <= 0 || nalsize > buf_size - buf_index){

3636

av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);

3637

break;

3638

}

3639

next_avc= buf_index + nalsize;

3640

} else {

3641

// start code prefix search

3642

for(; buf_index + 3 < next_avc; buf_index++){

3643

// This should always succeed in the first iteration.

3644

if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)

3645

break;

3646

}

3647

3648

if(buf_index+3 >= buf_size) break;

3649

3650

buf_index+=3;

3651

if(buf_index >= next_avc) continue;

3652

}

3653

3654

hx = h->thread_context[context_count];

3655

3656

ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);

3657

if (ptr==NULL || dst_length < 0){

3658

return -1;

3659

}

3660

i= buf_index + consumed;

3661

if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc &&

3662

buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0)

3663

s->workaround_bugs |= FF_BUG_TRUNCATED;

3664

3665

if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){

3666

while(ptr[dst_length - 1] == 0 && dst_length > 0)

3667

dst_length--;

3668

}

3669

bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));

3670

3671

if(s->avctx->debug&FF_DEBUG_STARTCODE){

3672

av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);

3673

}

3674

3675

if (h->is_avc && (nalsize != consumed) && nalsize){

3676

av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);

3677

}

3678

3679

buf_index += consumed;

3680

nal_index++;

3681

3682

if(pass == 0) {

3683

// packets can sometimes contain multiple PPS/SPS

3684

// e.g. two PAFF field pictures in one packet, or a demuxer which splits NALs strangely

3685

// if so, when frame threading we can't start the next thread until we've read all of them

3686

switch (hx->nal_unit_type) {

3687

case NAL_SPS:

3688

case NAL_PPS:

3689

case NAL_IDR_SLICE:

3690

case NAL_SLICE:

3691

nals_needed = nal_index;

3692

}

3693

continue;

3694

}

3695

3696

//FIXME do not discard SEI id

3697

if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0)

3698

continue;

3699

3700

again:

3701

err = 0;

3702

switch(hx->nal_unit_type){

3703

case NAL_IDR_SLICE:

3704

if (h->nal_unit_type != NAL_IDR_SLICE) {

3705

av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");

3706

return -1;

3707

}

3708

idr(h); //FIXME ensure we don't loose some frames if there is reordering

3709

case NAL_SLICE:

3710

init_get_bits(&hx->s.gb, ptr, bit_length);

3711

hx->intra_gb_ptr=

3712

hx->inter_gb_ptr= &hx->s.gb;

3713

hx->s.data_partitioning = 0;

3714

3715

if((err = decode_slice_header(hx, h)))

3716

break;

3717

3718

s->current_picture_ptr->key_frame |=

3719

(hx->nal_unit_type == NAL_IDR_SLICE) ||

3720

(h->sei_recovery_frame_cnt >= 0);

3721

3722

if (h->current_slice == 1) {

3723

if(!(s->flags2 & CODEC_FLAG2_CHUNKS)) {

3724

decode_postinit(h, nal_index >= nals_needed);

3725

}

3726

3727

if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)

3728

return -1;

3729

if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)

3730

ff_vdpau_h264_picture_start(s);

3731

}

3732

3733

if(hx->redundant_pic_count==0

3734

&& (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)

3735

&& (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=AV_PICTURE_TYPE_B)

3736

&& (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)

3737

&& avctx->skip_frame < AVDISCARD_ALL){

3738

if(avctx->hwaccel) {

3739

if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)

3740

return -1;

3741

}else

3742

if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){

3743

static const uint8_t start_code[] = {0x00, 0x00, 0x01};

3744

ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));

3745

ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );

3746

}else

3747

context_count++;

3748

}

3749

break;

3750

case NAL_DPA:

3751

init_get_bits(&hx->s.gb, ptr, bit_length);

3752

hx->intra_gb_ptr=

3753

hx->inter_gb_ptr= NULL;

3754

3755

if ((err = decode_slice_header(hx, h)) < 0)

3756

break;

3757

3758

hx->s.data_partitioning = 1;

3759

3760

break;

3761

case NAL_DPB:

3762

init_get_bits(&hx->intra_gb, ptr, bit_length);

3763

hx->intra_gb_ptr= &hx->intra_gb;

3764

break;

3765

case NAL_DPC:

3766

init_get_bits(&hx->inter_gb, ptr, bit_length);

3767

hx->inter_gb_ptr= &hx->inter_gb;

3768

3769

if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning

3770

&& s->context_initialized

3771

&& (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)

3772

&& (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=AV_PICTURE_TYPE_B)

3773

&& (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==AV_PICTURE_TYPE_I)

3774

&& avctx->skip_frame < AVDISCARD_ALL)

3775

context_count++;

3776

break;

3777

case NAL_SEI:

3778

init_get_bits(&s->gb, ptr, bit_length);

3779

ff_h264_decode_sei(h);

3780

break;

3781

case NAL_SPS:

3782

init_get_bits(&s->gb, ptr, bit_length);

3783

ff_h264_decode_seq_parameter_set(h);

3784

3785

if (s->flags& CODEC_FLAG_LOW_DELAY ||

3786

(h->sps.bitstream_restriction_flag && !h->sps.num_reorder_frames))

3787

s->low_delay=1;

3788

3789

if(avctx->has_b_frames < 2)

3790

avctx->has_b_frames= !s->low_delay;

3791

3792

if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) {

3793

if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) {

3794

avctx->bits_per_raw_sample = h->sps.bit_depth_luma;

3795

h->pixel_shift = h->sps.bit_depth_luma > 8;

3796

3797

ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);

3798

ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);

3799

dsputil_init(&s->dsp, s->avctx);

3800

} else {

3801

av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);

3802

return -1;

3803

}

3804

}

3805

break;

3806

case NAL_PPS:

3807

init_get_bits(&s->gb, ptr, bit_length);

3808

3809

ff_h264_decode_picture_parameter_set(h, bit_length);

3810

3811

break;

3812

case NAL_AUD:

3813

case NAL_END_SEQUENCE:

3814

case NAL_END_STREAM:

3815

case NAL_FILLER_DATA:

3816

case NAL_SPS_EXT:

3817

case NAL_AUXILIARY_SLICE:

3818

break;

3819

default:

3820

av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length);

3821

}

3822

3823

if(context_count == h->max_contexts) {

3824

execute_decode_slices(h, context_count);

3825

context_count = 0;

3826

}

3827

3828

if (err < 0)

3829

av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");

3830

else if(err == 1) {

3831

/* Slice could not be decoded in parallel mode, copy down

3832

* NAL unit stuff to context 0 and restart. Note that

3833

* rbsp_buffer is not transferred, but since we no longer

3834

* run in parallel mode this should not be an issue. */

3835

h->nal_unit_type = hx->nal_unit_type;

3836

h->nal_ref_idc = hx->nal_ref_idc;

3837

hx = h;

3838

goto again;

3839

}

3840

}

3841

}

3842

if(context_count)

3843

execute_decode_slices(h, context_count);

3844

return buf_index;

3845

}

3846

3847

/**

3848

* returns the number of bytes consumed for building the current frame

3849

3850

static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){

3851

if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)

3852

if(pos+10>buf_size) pos=buf_size; // oops ;)

3853

3854

return pos;

3855

}

3856

3857

static int decode_frame(AVCodecContext *avctx,

3858

void *data, int *data_size,

3859

AVPacket *avpkt)

3860

{

3861

const uint8_t *buf = avpkt->data;

3862

int buf_size = avpkt->size;

3863

H264Context *h = avctx->priv_data;

3864

MpegEncContext *s = &h->s;

3865

AVFrame *pict = data;

3866

int buf_index;

3867

3868

s->flags= avctx->flags;

3869

s->flags2= avctx->flags2;

3870

3871

/* end of stream, output what is still in the buffers */

3872

out:

3873

if (buf_size == 0) {

3874

Picture *out;

3875

int i, out_idx;

3876

3877

s->current_picture_ptr = NULL;

3878

3879

//FIXME factorize this with the output code below

3880

out = h->delayed_pic[0];

3881

out_idx = 0;

3882

for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)

3883

if(h->delayed_pic[i]->poc < out->poc){

3884

out = h->delayed_pic[i];

3885

out_idx = i;

3886

}

3887

3888

for(i=out_idx; h->delayed_pic[i]; i++)

3889

h->delayed_pic[i] = h->delayed_pic[i+1];

3890

3891

if(out){

3892

*data_size = sizeof(AVFrame);

3893

*pict= *(AVFrame*)out;

3894

}

3895

3896

return 0;

3897

}

3898

3899

buf_index=decode_nal_units(h, buf, buf_size);

3900

if(buf_index < 0)

3901

return -1;

3902

3903

if (!s->current_picture_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {

3904

buf_size = 0;

3905

goto out;

3906

}

3907

3908

if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){

3909

if (avctx->skip_frame >= AVDISCARD_NONREF)

3910

return 0;

3911

av_log(avctx, AV_LOG_ERROR, "no frame!\n");

3912

return -1;

3913

}

3914

3915

if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){

3916

3917

if(s->flags2 & CODEC_FLAG2_CHUNKS) decode_postinit(h, 1);

3918

3919

field_end(h, 0);

3920

3921

if (!h->next_output_pic) {

3922

/* Wait for second field. */

3923

*data_size = 0;

3924

3925

} else {

3926

*data_size = sizeof(AVFrame);

3927

*pict = *(AVFrame*)h->next_output_pic;

3928

}

3929

}

3930

3931

assert(pict->data[0] || !*data_size);

3932

ff_print_debug_info(s, pict);

3933

//printf("out %d\n", (int)pict->data[0]);

3934

3935

return get_consumed_bytes(s, buf_index, buf_size);

3936

}

3937

#if 0

3938

static inline void fill_mb_avail(H264Context *h){

3939

MpegEncContext * const s = &h->s;

3940

const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;

3941

3942

if(s->mb_y){

3943

h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;

3944

h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;

3945

h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;

3946

}else{

3947

h->mb_avail[0]=

3948

h->mb_avail[1]=

3949

h->mb_avail[2]= 0;

3950

}

3951

h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;

3952

h->mb_avail[4]= 1; //FIXME move out

3953

h->mb_avail[5]= 0; //FIXME move out

3954

}

3955

#endif

3956

3957

#ifdef TEST

3958

#undef printf

3959

#undef random

3960

#define COUNT 8000

3961

#define SIZE (COUNT*40)

3962

int main(void){

3963

int i;

3964

uint8_t temp[SIZE];

3965

PutBitContext pb;

3966

GetBitContext gb;

3967

// int int_temp[10000];

3968

DSPContext dsp;

3969

AVCodecContext avctx;

3970

3971

dsputil_init(&dsp, &avctx);

3972

3973

init_put_bits(&pb, temp, SIZE);

3974

printf("testing unsigned exp golomb\n");

3975

for(i=0; i<COUNT; i++){

3976

START_TIMER

3977

set_ue_golomb(&pb, i);

3978

STOP_TIMER("set_ue_golomb");

3979

}

3980

flush_put_bits(&pb);

3981

3982

init_get_bits(&gb, temp, 8*SIZE);

3983

for(i=0; i<COUNT; i++){

3984

int j, s;

3985

3986

s= show_bits(&gb, 24);

3987

3988

START_TIMER

3989

j= get_ue_golomb(&gb);

3990

if(j != i){

3991

printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);

3992

// return -1;

3993

}

3994

STOP_TIMER("get_ue_golomb");

3995

}

3996

3997

3998

init_put_bits(&pb, temp, SIZE);

3999

printf("testing signed exp golomb\n");

4000

for(i=0; i<COUNT; i++){

4001

START_TIMER

4002

set_se_golomb(&pb, i - COUNT/2);

4003

STOP_TIMER("set_se_golomb");

4004

}

4005

flush_put_bits(&pb);

4006

4007

init_get_bits(&gb, temp, 8*SIZE);

4008

for(i=0; i<COUNT; i++){

4009

int j, s;

4010

4011

s= show_bits(&gb, 24);

4012

4013

START_TIMER

4014

j= get_se_golomb(&gb);

4015

if(j != i - COUNT/2){

4016

printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);

4017

// return -1;

4018

}

4019

STOP_TIMER("get_se_golomb");

4020

}

4021

4022

#if 0

4023

printf("testing 4x4 (I)DCT\n");

4024

4025

DCTELEM block[16];

4026

uint8_t src[16], ref[16];

4027

uint64_t error= 0, max_error=0;

4028

4029

for(i=0; i<COUNT; i++){

4030

int j;

4031

// printf("%d %d %d\n", r1, r2, (r2-r1)*16);

4032

for(j=0; j<16; j++){

4033

ref[j]= random()%255;

4034

src[j]= random()%255;

4035

}

4036

4037

h264_diff_dct_c(block, src, ref, 4);

4038

4039

//normalize

4040

for(j=0; j<16; j++){

4041

// printf("%d ", block[j]);

4042

block[j]= block[j]*4;

4043

if(j&1) block[j]= (block[j]*4 + 2)/5;

4044

if(j&4) block[j]= (block[j]*4 + 2)/5;

4045

}

4046

// printf("\n");

4047

4048

h->h264dsp.h264_idct_add(ref, block, 4);

4049

/* for(j=0; j<16; j++){

4050

printf("%d ", ref[j]);

4051

}

4052

printf("\n");*/

4053

4054

for(j=0; j<16; j++){

4055

int diff= FFABS(src[j] - ref[j]);

4056

4057

error+= diff*diff;

4058

max_error= FFMAX(max_error, diff);

4059

}

4060

}

4061

printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );

4062

printf("testing quantizer\n");

4063

for(qp=0; qp<52; qp++){

4064

for(i=0; i<16; i++)

4065

src1_block[i]= src2_block[i]= random()%255;

4066

4067

}

4068

printf("Testing NAL layer\n");

4069

4070

uint8_t bitstream[COUNT];

4071

uint8_t nal[COUNT*2];

4072

H264Context h;

4073

memset(&h, 0, sizeof(H264Context));

4074

4075

for(i=0; i<COUNT; i++){

4076

int zeros= i;

4077

int nal_length;

4078

int consumed;

4079

int out_length;

4080

uint8_t *out;

4081

int j;

4082

4083

for(j=0; j<COUNT; j++){

4084

bitstream[j]= (random() % 255) + 1;

4085

}

4086

4087

for(j=0; j<zeros; j++){

4088

int pos= random() % COUNT;

4089

while(bitstream[pos] == 0){

4090

pos++;

4091

pos %= COUNT;

4092

}

4093

bitstream[pos]=0;

4094

}

4095

4096

START_TIMER

4097

4098

nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);

4099

if(nal_length<0){

4100

printf("encoding failed\n");

4101

return -1;

4102

}

4103

4104

out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);

4105

4106

STOP_TIMER("NAL")

4107

4108

if(out_length != COUNT){

4109

printf("incorrect length %d %d\n", out_length, COUNT);

4110

return -1;

4111

}

4112

4113

if(consumed != nal_length){

4114

printf("incorrect consumed length %d %d\n", nal_length, consumed);

4115

return -1;

4116

}

4117

4118

if(memcmp(bitstream, out, COUNT)){

4119

printf("mismatch\n");

4120

return -1;

4121

}

4122

}

4123

#endif

4124

4125

printf("Testing RBSP\n");

4126

4127

4128

return 0;

4129

}

4130

#endif /* TEST */

4131

4132

4133

av_cold void ff_h264_free_context(H264Context *h)

4134

{

4135

int i;

4136

4137

free_tables(h, 1); //FIXME cleanup init stuff perhaps

4138

4139

for(i = 0; i < MAX_SPS_COUNT; i++)

4140

av_freep(h->sps_buffers + i);

4141

4142

for(i = 0; i < MAX_PPS_COUNT; i++)

4143

av_freep(h->pps_buffers + i);

4144

}

4145

4146

av_cold int ff_h264_decode_end(AVCodecContext *avctx)

4147

{

4148

H264Context *h = avctx->priv_data;

4149

MpegEncContext *s = &h->s;

4150

4151

ff_h264_free_context(h);

4152

4153

MPV_common_end(s);

4154

4155

// memset(h, 0, sizeof(H264Context));

4156

4157

return 0;

4158

}

4159

4160

static const AVProfile profiles[] = {

4161

{ FF_PROFILE_H264_BASELINE, "Baseline" },

4162

{ FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline" },

4163

{ FF_PROFILE_H264_MAIN, "Main" },

4164

{ FF_PROFILE_H264_EXTENDED, "Extended" },

4165

{ FF_PROFILE_H264_HIGH, "High" },

4166

{ FF_PROFILE_H264_HIGH_10, "High 10" },

4167

{ FF_PROFILE_H264_HIGH_10_INTRA, "High 10 Intra" },

4168

{ FF_PROFILE_H264_HIGH_422, "High 4:2:2" },

4169

{ FF_PROFILE_H264_HIGH_422_INTRA, "High 4:2:2 Intra" },

4170

{ FF_PROFILE_H264_HIGH_444, "High 4:4:4" },

4171

{ FF_PROFILE_H264_HIGH_444_PREDICTIVE, "High 4:4:4 Predictive" },

4172

{ FF_PROFILE_H264_HIGH_444_INTRA, "High 4:4:4 Intra" },

4173

{ FF_PROFILE_H264_CAVLC_444, "CAVLC 4:4:4" },

4174

{ FF_PROFILE_UNKNOWN },

4175

};

4176

4177

AVCodec ff_h264_decoder = {

4178

"h264",

4179

AVMEDIA_TYPE_VIDEO,

4180

CODEC_ID_H264,

4181

sizeof(H264Context),

4182

ff_h264_decode_init,

4183

NULL,

4184

ff_h264_decode_end,

4185

decode_frame,

4186

/*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY |

4187

CODEC_CAP_SLICE_THREADS | CODEC_CAP_FRAME_THREADS,

4188

.flush= flush_dpb,

4189

.long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),

4190

.init_thread_copy = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),

4191

.update_thread_context = ONLY_IF_THREADS_ENABLED(decode_update_thread_context),

4192

.profiles = NULL_IF_CONFIG_SMALL(profiles),

4193

};

4194

4195

#if CONFIG_H264_VDPAU_DECODER

4196

AVCodec ff_h264_vdpau_decoder = {

4197

"h264_vdpau",

4198

AVMEDIA_TYPE_VIDEO,

4199

CODEC_ID_H264,

4200

sizeof(H264Context),

4201

ff_h264_decode_init,

4202

NULL,

4203

ff_h264_decode_end,

4204

decode_frame,

4205

CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,

4206

.flush= flush_dpb,

4207

.long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),

4208

.pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE},

4209

.profiles = NULL_IF_CONFIG_SMALL(profiles),

4210

};

4211

#endif

Older »