2
* VP9 compatible video decoder
4
* Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5
* Copyright (C) 2013 Clément Bœsch <u pkh me>
7
* This file is part of FFmpeg.
9
* FFmpeg is free software; you can redistribute it and/or
10
* modify it under the terms of the GNU Lesser General Public
11
* License as published by the Free Software Foundation; either
12
* version 2.1 of the License, or (at your option) any later version.
14
* FFmpeg is distributed in the hope that it will be useful,
15
* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17
* Lesser General Public License for more details.
19
* You should have received a copy of the GNU Lesser General Public
20
* License along with FFmpeg; if not, write to the Free Software
21
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33
#include "libavutil/avassert.h"
34
#include "libavutil/pixdesc.h"
36
#define VP9_SYNCCODE 0x498342
73
typedef struct VP9Frame {
75
AVBufferRef *extradata;
76
uint8_t *segmentation_map;
77
struct VP9mvrefPair *mv;
83
uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
84
[8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
87
typedef struct VP9Block {
88
uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
89
enum FilterMode filter;
90
VP56mv mv[4 /* b_idx */][2 /* ref */];
92
enum TxfmMode tx, uvtx;
94
enum BlockPartition bp;
97
typedef struct VP9Context {
104
VP9Block *b_base, *b;
106
int row, row7, col, col7;
108
ptrdiff_t y_stride, uv_stride;
111
uint8_t keyframe, last_keyframe;
112
uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
114
uint8_t use_last_frame_mvs;
119
uint8_t refreshrefmask;
120
uint8_t highprecisionmvs;
121
enum FilterMode filtermode;
122
uint8_t allowcompinter;
125
uint8_t parallelmode;
129
uint8_t varcompref[2];
130
ThreadFrame refs[8], next_refs[8];
132
#define REF_FRAME_MVPAIR 1
133
#define REF_FRAME_SEGMAP 2
140
uint8_t mblim_lut[64];
148
int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
150
#define MAX_SEGMENT 8
154
uint8_t absolute_vals;
156
uint8_t ignore_refmap;
161
uint8_t skip_enabled;
170
unsigned log2_tile_cols, log2_tile_rows;
171
unsigned tile_cols, tile_rows;
172
unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
174
unsigned sb_cols, sb_rows, rows, cols;
177
uint8_t coef[4][2][2][6][6][3];
181
uint8_t coef[4][2][2][6][6][11];
186
unsigned y_mode[4][10];
187
unsigned uv_mode[10][10];
188
unsigned filter[4][3];
189
unsigned mv_mode[7][4];
190
unsigned intra[4][2];
192
unsigned single_ref[5][2][2];
193
unsigned comp_ref[5][2];
194
unsigned tx32p[2][4];
195
unsigned tx16p[2][3];
198
unsigned mv_joint[4];
201
unsigned classes[11];
203
unsigned bits[10][2];
204
unsigned class0_fp[2][4];
206
unsigned class0_hp[2];
209
unsigned partition[4][4][4];
210
unsigned coef[4][2][2][6][6][3];
211
unsigned eob[4][2][2][6][6][2];
213
enum TxfmMode txfmmode;
214
enum CompPredMode comppredmode;
216
// contextual (left/above) cache
217
DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
218
DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
219
DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
220
DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
221
DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
222
DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
223
DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
224
DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
225
DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
226
DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
227
DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
228
DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
229
uint8_t *above_partition_ctx;
230
uint8_t *above_mode_ctx;
231
// FIXME maybe merge some of the below in a flags field?
232
uint8_t *above_y_nnz_ctx;
233
uint8_t *above_uv_nnz_ctx[2];
234
uint8_t *above_skip_ctx; // 1bit
235
uint8_t *above_txfm_ctx; // 2bit
236
uint8_t *above_segpred_ctx; // 1bit
237
uint8_t *above_intra_ctx; // 1bit
238
uint8_t *above_comp_ctx; // 1bit
239
uint8_t *above_ref_ctx; // 2bit
240
uint8_t *above_filter_ctx;
241
VP56mv (*above_mv_ctx)[2];
244
uint8_t *intra_pred_data[3];
245
struct VP9Filter *lflvl;
246
DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135 * 144 * 2];
248
// block reconstruction intermediates
249
int block_alloc_using_2pass;
250
int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
251
uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
252
struct { int x, y; } min_mv, max_mv;
253
DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
254
DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
255
uint16_t mvscale[3][2];
256
uint8_t mvstep[3][2];
259
static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
261
{ 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
262
{ 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
264
{ 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
265
{ 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
269
static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
271
VP9Context *s = ctx->priv_data;
274
if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
276
sz = 64 * s->sb_cols * s->sb_rows;
277
if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
278
ff_thread_release_buffer(ctx, &f->tf);
279
return AVERROR(ENOMEM);
282
f->segmentation_map = f->extradata->data;
283
f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
288
static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
290
ff_thread_release_buffer(ctx, &f->tf);
291
av_buffer_unref(&f->extradata);
294
static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
298
if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
300
} else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
301
vp9_unref_frame(ctx, dst);
302
return AVERROR(ENOMEM);
305
dst->segmentation_map = src->segmentation_map;
307
dst->uses_2pass = src->uses_2pass;
312
static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
314
VP9Context *s = ctx->priv_data;
316
int bytesperpixel = s->bytesperpixel;
318
av_assert0(w > 0 && h > 0);
320
if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
326
s->sb_cols = (w + 63) >> 6;
327
s->sb_rows = (h + 63) >> 6;
328
s->cols = (w + 7) >> 3;
329
s->rows = (h + 7) >> 3;
331
#define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
332
av_freep(&s->intra_pred_data[0]);
333
// FIXME we slightly over-allocate here for subsampled chroma, but a little
334
// bit of padding shouldn't affect performance...
335
p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
336
sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
338
return AVERROR(ENOMEM);
339
assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
340
assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
341
assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
342
assign(s->above_y_nnz_ctx, uint8_t *, 16);
343
assign(s->above_mode_ctx, uint8_t *, 16);
344
assign(s->above_mv_ctx, VP56mv(*)[2], 16);
345
assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
346
assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
347
assign(s->above_partition_ctx, uint8_t *, 8);
348
assign(s->above_skip_ctx, uint8_t *, 8);
349
assign(s->above_txfm_ctx, uint8_t *, 8);
350
assign(s->above_segpred_ctx, uint8_t *, 8);
351
assign(s->above_intra_ctx, uint8_t *, 8);
352
assign(s->above_comp_ctx, uint8_t *, 8);
353
assign(s->above_ref_ctx, uint8_t *, 8);
354
assign(s->above_filter_ctx, uint8_t *, 8);
355
assign(s->lflvl, struct VP9Filter *, 1);
358
// these will be re-allocated a little later
359
av_freep(&s->b_base);
360
av_freep(&s->block_base);
362
if (s->bpp != s->last_bpp) {
363
ff_vp9dsp_init(&s->dsp, s->bpp);
364
ff_videodsp_init(&s->vdsp, s->bpp);
365
s->last_bpp = s->bpp;
371
static int update_block_buffers(AVCodecContext *ctx)
373
VP9Context *s = ctx->priv_data;
374
int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
376
if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
380
av_free(s->block_base);
381
chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
382
chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
383
if (s->frames[CUR_FRAME].uses_2pass) {
384
int sbs = s->sb_cols * s->sb_rows;
386
s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
387
s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
388
16 * 16 + 2 * chroma_eobs) * sbs);
389
if (!s->b_base || !s->block_base)
390
return AVERROR(ENOMEM);
391
s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
392
s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
393
s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
394
s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
395
s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
397
s->b_base = av_malloc(sizeof(VP9Block));
398
s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
399
16 * 16 + 2 * chroma_eobs);
400
if (!s->b_base || !s->block_base)
401
return AVERROR(ENOMEM);
402
s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
403
s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
404
s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
405
s->uveob_base[0] = s->eob_base + 16 * 16;
406
s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
408
s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
413
// for some reason the sign bit is at the end, not the start, of a bit sequence
414
static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
416
int v = get_bits(gb, n);
417
return get_bits1(gb) ? -v : v;
420
static av_always_inline int inv_recenter_nonneg(int v, int m)
422
return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
425
// differential forward probability updates
426
static int update_prob(VP56RangeCoder *c, int p)
428
static const int inv_map_table[255] = {
429
7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
430
189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
431
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
432
25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
433
40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
434
55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
435
70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
436
86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
437
101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
438
116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
439
131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
440
146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
441
161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
442
177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
443
192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
444
207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
445
222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
446
237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
451
/* This code is trying to do a differential probability update. For a
452
* current probability A in the range [1, 255], the difference to a new
453
* probability of any value can be expressed differentially as 1-A,255-A
454
* where some part of this (absolute range) exists both in positive as
455
* well as the negative part, whereas another part only exists in one
456
* half. We're trying to code this shared part differentially, i.e.
457
* times two where the value of the lowest bit specifies the sign, and
458
* the single part is then coded on top of this. This absolute difference
459
* then again has a value of [0,254], but a bigger value in this range
460
* indicates that we're further away from the original value A, so we
461
* can code this as a VLC code, since higher values are increasingly
462
* unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
463
* updates vs. the 'fine, exact' updates further down the range, which
464
* adds one extra dimension to this differential update model. */
466
if (!vp8_rac_get(c)) {
467
d = vp8_rac_get_uint(c, 4) + 0;
468
} else if (!vp8_rac_get(c)) {
469
d = vp8_rac_get_uint(c, 4) + 16;
470
} else if (!vp8_rac_get(c)) {
471
d = vp8_rac_get_uint(c, 5) + 32;
473
d = vp8_rac_get_uint(c, 7);
475
d = (d << 1) - 65 + vp8_rac_get(c);
477
av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
480
return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
481
255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
484
static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
486
static const enum AVColorSpace colorspaces[8] = {
487
AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
488
AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
490
VP9Context *s = ctx->priv_data;
491
enum AVPixelFormat res;
492
int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
495
s->bpp = 8 + bits * 2;
496
s->bytesperpixel = (7 + s->bpp) >> 3;
497
ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
498
if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
499
static const enum AVPixelFormat pix_fmt_rgb[3] = {
500
AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
502
if (ctx->profile & 1) {
503
s->ss_h = s->ss_v = 1;
504
res = pix_fmt_rgb[bits];
505
ctx->color_range = AVCOL_RANGE_JPEG;
507
av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
509
return AVERROR_INVALIDDATA;
512
static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
513
{ { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
514
{ AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
515
{ { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
516
{ AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
517
{ { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
518
{ AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
520
ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
521
if (ctx->profile & 1) {
522
s->ss_h = get_bits1(&s->gb);
523
s->ss_v = get_bits1(&s->gb);
524
if ((res = pix_fmt_for_ss[bits][s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
525
av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
527
return AVERROR_INVALIDDATA;
528
} else if (get_bits1(&s->gb)) {
529
av_log(ctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
531
return AVERROR_INVALIDDATA;
534
s->ss_h = s->ss_v = 1;
535
res = pix_fmt_for_ss[bits][1][1];
542
static int decode_frame_header(AVCodecContext *ctx,
543
const uint8_t *data, int size, int *ref)
545
VP9Context *s = ctx->priv_data;
546
int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
547
enum AVPixelFormat fmt = ctx->pix_fmt;
549
const uint8_t *data2;
552
if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
553
av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
556
if (get_bits(&s->gb, 2) != 0x2) { // frame marker
557
av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
558
return AVERROR_INVALIDDATA;
560
ctx->profile = get_bits1(&s->gb);
561
ctx->profile |= get_bits1(&s->gb) << 1;
562
if (ctx->profile == 3) ctx->profile += get_bits1(&s->gb);
563
if (ctx->profile > 3) {
564
av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
565
return AVERROR_INVALIDDATA;
567
if (get_bits1(&s->gb)) {
568
*ref = get_bits(&s->gb, 3);
571
s->last_keyframe = s->keyframe;
572
s->keyframe = !get_bits1(&s->gb);
573
last_invisible = s->invisible;
574
s->invisible = !get_bits1(&s->gb);
575
s->errorres = get_bits1(&s->gb);
576
s->use_last_frame_mvs = !s->errorres && !last_invisible;
578
if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
579
av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
580
return AVERROR_INVALIDDATA;
582
if ((fmt = read_colorspace_details(ctx)) < 0)
584
// for profile 1, here follows the subsampling bits
585
s->refreshrefmask = 0xff;
586
w = get_bits(&s->gb, 16) + 1;
587
h = get_bits(&s->gb, 16) + 1;
588
if (get_bits1(&s->gb)) // display size
589
skip_bits(&s->gb, 32);
591
s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
592
s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
594
if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
595
av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
596
return AVERROR_INVALIDDATA;
598
if (ctx->profile == 1) {
599
if ((fmt = read_colorspace_details(ctx)) < 0)
602
s->ss_h = s->ss_v = 1;
605
s->bytesperpixel = 1;
606
fmt = AV_PIX_FMT_YUV420P;
607
ctx->colorspace = AVCOL_SPC_BT470BG;
608
ctx->color_range = AVCOL_RANGE_JPEG;
610
s->refreshrefmask = get_bits(&s->gb, 8);
611
w = get_bits(&s->gb, 16) + 1;
612
h = get_bits(&s->gb, 16) + 1;
613
if (get_bits1(&s->gb)) // display size
614
skip_bits(&s->gb, 32);
616
s->refreshrefmask = get_bits(&s->gb, 8);
617
s->refidx[0] = get_bits(&s->gb, 3);
618
s->signbias[0] = get_bits1(&s->gb) && !s->errorres;
619
s->refidx[1] = get_bits(&s->gb, 3);
620
s->signbias[1] = get_bits1(&s->gb) && !s->errorres;
621
s->refidx[2] = get_bits(&s->gb, 3);
622
s->signbias[2] = get_bits1(&s->gb) && !s->errorres;
623
if (!s->refs[s->refidx[0]].f->data[0] ||
624
!s->refs[s->refidx[1]].f->data[0] ||
625
!s->refs[s->refidx[2]].f->data[0]) {
626
av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
627
return AVERROR_INVALIDDATA;
629
if (get_bits1(&s->gb)) {
630
w = s->refs[s->refidx[0]].f->width;
631
h = s->refs[s->refidx[0]].f->height;
632
} else if (get_bits1(&s->gb)) {
633
w = s->refs[s->refidx[1]].f->width;
634
h = s->refs[s->refidx[1]].f->height;
635
} else if (get_bits1(&s->gb)) {
636
w = s->refs[s->refidx[2]].f->width;
637
h = s->refs[s->refidx[2]].f->height;
639
w = get_bits(&s->gb, 16) + 1;
640
h = get_bits(&s->gb, 16) + 1;
642
// Note that in this code, "CUR_FRAME" is actually before we
643
// have formally allocated a frame, and thus actually represents
645
s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
646
s->frames[CUR_FRAME].tf.f->height == h;
647
if (get_bits1(&s->gb)) // display size
648
skip_bits(&s->gb, 32);
649
s->highprecisionmvs = get_bits1(&s->gb);
650
s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
652
s->allowcompinter = (s->signbias[0] != s->signbias[1] ||
653
s->signbias[0] != s->signbias[2]);
654
if (s->allowcompinter) {
655
if (s->signbias[0] == s->signbias[1]) {
657
s->varcompref[0] = 0;
658
s->varcompref[1] = 1;
659
} else if (s->signbias[0] == s->signbias[2]) {
661
s->varcompref[0] = 0;
662
s->varcompref[1] = 2;
665
s->varcompref[0] = 1;
666
s->varcompref[1] = 2;
670
for (i = 0; i < 3; i++) {
671
AVFrame *ref = s->refs[s->refidx[i]].f;
672
int refw = ref->width, refh = ref->height;
674
if (ref->format != fmt) {
675
av_log(ctx, AV_LOG_ERROR,
676
"Ref pixfmt (%s) did not match current frame (%s)",
677
av_get_pix_fmt_name(ref->format),
678
av_get_pix_fmt_name(fmt));
679
return AVERROR_INVALIDDATA;
680
} else if (refw == w && refh == h) {
681
s->mvscale[i][0] = s->mvscale[i][1] = 0;
683
if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
684
av_log(ctx, AV_LOG_ERROR,
685
"Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
687
return AVERROR_INVALIDDATA;
689
s->mvscale[i][0] = (refw << 14) / w;
690
s->mvscale[i][1] = (refh << 14) / h;
691
s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
692
s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
697
s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
698
s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
699
s->framectxid = c = get_bits(&s->gb, 2);
701
/* loopfilter header data */
702
if (s->keyframe || s->errorres || s->intraonly) {
703
// reset loopfilter defaults
704
s->lf_delta.ref[0] = 1;
705
s->lf_delta.ref[1] = 0;
706
s->lf_delta.ref[2] = -1;
707
s->lf_delta.ref[3] = -1;
708
s->lf_delta.mode[0] = 0;
709
s->lf_delta.mode[1] = 0;
711
s->filter.level = get_bits(&s->gb, 6);
712
sharp = get_bits(&s->gb, 3);
713
// if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
714
// the old cache values since they are still valid
715
if (s->filter.sharpness != sharp)
716
memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
717
s->filter.sharpness = sharp;
718
if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
719
if (get_bits1(&s->gb)) {
720
for (i = 0; i < 4; i++)
721
if (get_bits1(&s->gb))
722
s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
723
for (i = 0; i < 2; i++)
724
if (get_bits1(&s->gb))
725
s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
729
/* quantization header data */
730
s->yac_qi = get_bits(&s->gb, 8);
731
s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
732
s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
733
s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
734
s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
735
s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
737
/* segmentation header info */
738
s->segmentation.ignore_refmap = 0;
739
if ((s->segmentation.enabled = get_bits1(&s->gb))) {
740
if ((s->segmentation.update_map = get_bits1(&s->gb))) {
741
for (i = 0; i < 7; i++)
742
s->prob.seg[i] = get_bits1(&s->gb) ?
743
get_bits(&s->gb, 8) : 255;
744
if ((s->segmentation.temporal = get_bits1(&s->gb))) {
745
for (i = 0; i < 3; i++)
746
s->prob.segpred[i] = get_bits1(&s->gb) ?
747
get_bits(&s->gb, 8) : 255;
750
if ((!s->segmentation.update_map || s->segmentation.temporal) &&
751
(w != s->frames[CUR_FRAME].tf.f->width ||
752
h != s->frames[CUR_FRAME].tf.f->height)) {
753
av_log(ctx, AV_LOG_WARNING,
754
"Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
755
s->segmentation.temporal, s->segmentation.update_map);
756
s->segmentation.ignore_refmap = 1;
757
//return AVERROR_INVALIDDATA;
760
if (get_bits1(&s->gb)) {
761
s->segmentation.absolute_vals = get_bits1(&s->gb);
762
for (i = 0; i < 8; i++) {
763
if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
764
s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
765
if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
766
s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
767
if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
768
s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
769
s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
773
s->segmentation.feat[0].q_enabled = 0;
774
s->segmentation.feat[0].lf_enabled = 0;
775
s->segmentation.feat[0].skip_enabled = 0;
776
s->segmentation.feat[0].ref_enabled = 0;
779
// set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
780
for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
781
int qyac, qydc, quvac, quvdc, lflvl, sh;
783
if (s->segmentation.feat[i].q_enabled) {
784
if (s->segmentation.absolute_vals)
785
qyac = s->segmentation.feat[i].q_val;
787
qyac = s->yac_qi + s->segmentation.feat[i].q_val;
791
qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
792
quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
793
quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
794
qyac = av_clip_uintp2(qyac, 8);
796
s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
797
s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
798
s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
799
s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
801
sh = s->filter.level >= 32;
802
if (s->segmentation.feat[i].lf_enabled) {
803
if (s->segmentation.absolute_vals)
804
lflvl = av_clip_uintp2(s->segmentation.feat[i].lf_val, 6);
806
lflvl = av_clip_uintp2(s->filter.level + s->segmentation.feat[i].lf_val, 6);
808
lflvl = s->filter.level;
810
if (s->lf_delta.enabled) {
811
s->segmentation.feat[i].lflvl[0][0] =
812
s->segmentation.feat[i].lflvl[0][1] =
813
av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
814
for (j = 1; j < 4; j++) {
815
s->segmentation.feat[i].lflvl[j][0] =
816
av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
817
s->lf_delta.mode[0]) * (1 << sh)), 6);
818
s->segmentation.feat[i].lflvl[j][1] =
819
av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
820
s->lf_delta.mode[1]) * (1 << sh)), 6);
823
memset(s->segmentation.feat[i].lflvl, lflvl,
824
sizeof(s->segmentation.feat[i].lflvl));
829
if ((res = update_size(ctx, w, h, fmt)) < 0) {
830
av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
833
for (s->tiling.log2_tile_cols = 0;
834
(s->sb_cols >> s->tiling.log2_tile_cols) > 64;
835
s->tiling.log2_tile_cols++) ;
836
for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
837
max = FFMAX(0, max - 1);
838
while (max > s->tiling.log2_tile_cols) {
839
if (get_bits1(&s->gb))
840
s->tiling.log2_tile_cols++;
844
s->tiling.log2_tile_rows = decode012(&s->gb);
845
s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
846
if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
847
s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
848
s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
849
sizeof(VP56RangeCoder) * s->tiling.tile_cols);
851
av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
852
return AVERROR(ENOMEM);
856
if (s->keyframe || s->errorres || s->intraonly) {
857
s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
858
s->prob_ctx[3].p = vp9_default_probs;
859
memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
860
sizeof(vp9_default_coef_probs));
861
memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
862
sizeof(vp9_default_coef_probs));
863
memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
864
sizeof(vp9_default_coef_probs));
865
memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
866
sizeof(vp9_default_coef_probs));
869
// next 16 bits is size of the rest of the header (arith-coded)
870
size2 = get_bits(&s->gb, 16);
871
data2 = align_get_bits(&s->gb);
872
if (size2 > size - (data2 - data)) {
873
av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
874
return AVERROR_INVALIDDATA;
876
ff_vp56_init_range_decoder(&s->c, data2, size2);
877
if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
878
av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
879
return AVERROR_INVALIDDATA;
882
if (s->keyframe || s->intraonly) {
883
memset(s->counts.coef, 0, sizeof(s->counts.coef));
884
memset(s->counts.eob, 0, sizeof(s->counts.eob));
886
memset(&s->counts, 0, sizeof(s->counts));
888
// FIXME is it faster to not copy here, but do it down in the fw updates
889
// as explicit copies if the fw update is missing (and skip the copy upon
891
s->prob.p = s->prob_ctx[c].p;
895
s->txfmmode = TX_4X4;
897
s->txfmmode = vp8_rac_get_uint(&s->c, 2);
898
if (s->txfmmode == 3)
899
s->txfmmode += vp8_rac_get(&s->c);
901
if (s->txfmmode == TX_SWITCHABLE) {
902
for (i = 0; i < 2; i++)
903
if (vp56_rac_get_prob_branchy(&s->c, 252))
904
s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
905
for (i = 0; i < 2; i++)
906
for (j = 0; j < 2; j++)
907
if (vp56_rac_get_prob_branchy(&s->c, 252))
908
s->prob.p.tx16p[i][j] =
909
update_prob(&s->c, s->prob.p.tx16p[i][j]);
910
for (i = 0; i < 2; i++)
911
for (j = 0; j < 3; j++)
912
if (vp56_rac_get_prob_branchy(&s->c, 252))
913
s->prob.p.tx32p[i][j] =
914
update_prob(&s->c, s->prob.p.tx32p[i][j]);
919
for (i = 0; i < 4; i++) {
920
uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
921
if (vp8_rac_get(&s->c)) {
922
for (j = 0; j < 2; j++)
923
for (k = 0; k < 2; k++)
924
for (l = 0; l < 6; l++)
925
for (m = 0; m < 6; m++) {
926
uint8_t *p = s->prob.coef[i][j][k][l][m];
927
uint8_t *r = ref[j][k][l][m];
928
if (m >= 3 && l == 0) // dc only has 3 pt
930
for (n = 0; n < 3; n++) {
931
if (vp56_rac_get_prob_branchy(&s->c, 252)) {
932
p[n] = update_prob(&s->c, r[n]);
940
for (j = 0; j < 2; j++)
941
for (k = 0; k < 2; k++)
942
for (l = 0; l < 6; l++)
943
for (m = 0; m < 6; m++) {
944
uint8_t *p = s->prob.coef[i][j][k][l][m];
945
uint8_t *r = ref[j][k][l][m];
946
if (m > 3 && l == 0) // dc only has 3 pt
952
if (s->txfmmode == i)
957
for (i = 0; i < 3; i++)
958
if (vp56_rac_get_prob_branchy(&s->c, 252))
959
s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
960
if (!s->keyframe && !s->intraonly) {
961
for (i = 0; i < 7; i++)
962
for (j = 0; j < 3; j++)
963
if (vp56_rac_get_prob_branchy(&s->c, 252))
964
s->prob.p.mv_mode[i][j] =
965
update_prob(&s->c, s->prob.p.mv_mode[i][j]);
967
if (s->filtermode == FILTER_SWITCHABLE)
968
for (i = 0; i < 4; i++)
969
for (j = 0; j < 2; j++)
970
if (vp56_rac_get_prob_branchy(&s->c, 252))
971
s->prob.p.filter[i][j] =
972
update_prob(&s->c, s->prob.p.filter[i][j]);
974
for (i = 0; i < 4; i++)
975
if (vp56_rac_get_prob_branchy(&s->c, 252))
976
s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
978
if (s->allowcompinter) {
979
s->comppredmode = vp8_rac_get(&s->c);
981
s->comppredmode += vp8_rac_get(&s->c);
982
if (s->comppredmode == PRED_SWITCHABLE)
983
for (i = 0; i < 5; i++)
984
if (vp56_rac_get_prob_branchy(&s->c, 252))
986
update_prob(&s->c, s->prob.p.comp[i]);
988
s->comppredmode = PRED_SINGLEREF;
991
if (s->comppredmode != PRED_COMPREF) {
992
for (i = 0; i < 5; i++) {
993
if (vp56_rac_get_prob_branchy(&s->c, 252))
994
s->prob.p.single_ref[i][0] =
995
update_prob(&s->c, s->prob.p.single_ref[i][0]);
996
if (vp56_rac_get_prob_branchy(&s->c, 252))
997
s->prob.p.single_ref[i][1] =
998
update_prob(&s->c, s->prob.p.single_ref[i][1]);
1002
if (s->comppredmode != PRED_SINGLEREF) {
1003
for (i = 0; i < 5; i++)
1004
if (vp56_rac_get_prob_branchy(&s->c, 252))
1005
s->prob.p.comp_ref[i] =
1006
update_prob(&s->c, s->prob.p.comp_ref[i]);
1009
for (i = 0; i < 4; i++)
1010
for (j = 0; j < 9; j++)
1011
if (vp56_rac_get_prob_branchy(&s->c, 252))
1012
s->prob.p.y_mode[i][j] =
1013
update_prob(&s->c, s->prob.p.y_mode[i][j]);
1015
for (i = 0; i < 4; i++)
1016
for (j = 0; j < 4; j++)
1017
for (k = 0; k < 3; k++)
1018
if (vp56_rac_get_prob_branchy(&s->c, 252))
1019
s->prob.p.partition[3 - i][j][k] =
1020
update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
1022
// mv fields don't use the update_prob subexp model for some reason
1023
for (i = 0; i < 3; i++)
1024
if (vp56_rac_get_prob_branchy(&s->c, 252))
1025
s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1027
for (i = 0; i < 2; i++) {
1028
if (vp56_rac_get_prob_branchy(&s->c, 252))
1029
s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1031
for (j = 0; j < 10; j++)
1032
if (vp56_rac_get_prob_branchy(&s->c, 252))
1033
s->prob.p.mv_comp[i].classes[j] =
1034
(vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1036
if (vp56_rac_get_prob_branchy(&s->c, 252))
1037
s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1039
for (j = 0; j < 10; j++)
1040
if (vp56_rac_get_prob_branchy(&s->c, 252))
1041
s->prob.p.mv_comp[i].bits[j] =
1042
(vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1045
for (i = 0; i < 2; i++) {
1046
for (j = 0; j < 2; j++)
1047
for (k = 0; k < 3; k++)
1048
if (vp56_rac_get_prob_branchy(&s->c, 252))
1049
s->prob.p.mv_comp[i].class0_fp[j][k] =
1050
(vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1052
for (j = 0; j < 3; j++)
1053
if (vp56_rac_get_prob_branchy(&s->c, 252))
1054
s->prob.p.mv_comp[i].fp[j] =
1055
(vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1058
if (s->highprecisionmvs) {
1059
for (i = 0; i < 2; i++) {
1060
if (vp56_rac_get_prob_branchy(&s->c, 252))
1061
s->prob.p.mv_comp[i].class0_hp =
1062
(vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1064
if (vp56_rac_get_prob_branchy(&s->c, 252))
1065
s->prob.p.mv_comp[i].hp =
1066
(vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1071
return (data2 - data) + size2;
1074
static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
1077
dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
1078
dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
1081
static void find_ref_mvs(VP9Context *s,
1082
VP56mv *pmv, int ref, int z, int idx, int sb)
1084
static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
1085
[BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1086
{ -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1087
[BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1088
{ -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1089
[BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1090
{ -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1091
[BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1092
{ -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1093
[BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1094
{ -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1095
[BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1096
{ 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1097
[BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1098
{ -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1099
[BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1100
{ 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1101
[BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1102
{ -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1103
[BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1104
{ -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1105
[BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1106
{ -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1107
[BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1108
{ -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1109
[BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1110
{ -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1113
int row = s->row, col = s->col, row7 = s->row7;
1114
const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1115
#define INVALID_MV 0x80008000U
1116
uint32_t mem = INVALID_MV, mem_sub8x8 = INVALID_MV;
1119
#define RETURN_DIRECT_MV(mv) \
1121
uint32_t m = AV_RN32A(&mv); \
1125
} else if (mem == INVALID_MV) { \
1127
} else if (m != mem) { \
1134
if (sb == 2 || sb == 1) {
1135
RETURN_DIRECT_MV(b->mv[0][z]);
1136
} else if (sb == 3) {
1137
RETURN_DIRECT_MV(b->mv[2][z]);
1138
RETURN_DIRECT_MV(b->mv[1][z]);
1139
RETURN_DIRECT_MV(b->mv[0][z]);
1142
#define RETURN_MV(mv) \
1147
av_assert2(idx == 1); \
1148
av_assert2(mem != INVALID_MV); \
1149
if (mem_sub8x8 == INVALID_MV) { \
1150
clamp_mv(&tmp, &mv, s); \
1151
m = AV_RN32A(&tmp); \
1156
mem_sub8x8 = AV_RN32A(&mv); \
1157
} else if (mem_sub8x8 != AV_RN32A(&mv)) { \
1158
clamp_mv(&tmp, &mv, s); \
1159
m = AV_RN32A(&tmp); \
1163
/* BUG I'm pretty sure this isn't the intention */ \
1169
uint32_t m = AV_RN32A(&mv); \
1171
clamp_mv(pmv, &mv, s); \
1173
} else if (mem == INVALID_MV) { \
1175
} else if (m != mem) { \
1176
clamp_mv(pmv, &mv, s); \
1183
struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1184
if (mv->ref[0] == ref) {
1185
RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1186
} else if (mv->ref[1] == ref) {
1187
RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1190
if (col > s->tiling.tile_col_start) {
1191
struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1192
if (mv->ref[0] == ref) {
1193
RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1194
} else if (mv->ref[1] == ref) {
1195
RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1203
// previously coded MVs in this neighbourhood, using same reference frame
1204
for (; i < 8; i++) {
1205
int c = p[i][0] + col, r = p[i][1] + row;
1207
if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1208
struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1210
if (mv->ref[0] == ref) {
1211
RETURN_MV(mv->mv[0]);
1212
} else if (mv->ref[1] == ref) {
1213
RETURN_MV(mv->mv[1]);
1218
// MV at this position in previous frame, using same reference frame
1219
if (s->use_last_frame_mvs) {
1220
struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1222
if (!s->frames[REF_FRAME_MVPAIR].uses_2pass)
1223
ff_thread_await_progress(&s->frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
1224
if (mv->ref[0] == ref) {
1225
RETURN_MV(mv->mv[0]);
1226
} else if (mv->ref[1] == ref) {
1227
RETURN_MV(mv->mv[1]);
1231
#define RETURN_SCALE_MV(mv, scale) \
1234
VP56mv mv_temp = { -mv.x, -mv.y }; \
1235
RETURN_MV(mv_temp); \
1241
// previously coded MVs in this neighbourhood, using different reference frame
1242
for (i = 0; i < 8; i++) {
1243
int c = p[i][0] + col, r = p[i][1] + row;
1245
if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1246
struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1248
if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1249
RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1251
if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1252
// BUG - libvpx has this condition regardless of whether
1253
// we used the first ref MV and pre-scaling
1254
AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1255
RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1260
// MV at this position in previous frame, using different reference frame
1261
if (s->use_last_frame_mvs) {
1262
struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1264
// no need to await_progress, because we already did that above
1265
if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1266
RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1268
if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1269
// BUG - libvpx has this condition regardless of whether
1270
// we used the first ref MV and pre-scaling
1271
AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1272
RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1277
clamp_mv(pmv, pmv, s);
1280
#undef RETURN_SCALE_MV
1283
static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1285
int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1286
int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1287
s->prob.p.mv_comp[idx].classes);
1289
s->counts.mv_comp[idx].sign[sign]++;
1290
s->counts.mv_comp[idx].classes[c]++;
1294
for (n = 0, m = 0; m < c; m++) {
1295
bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1297
s->counts.mv_comp[idx].bits[m][bit]++;
1300
bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1302
s->counts.mv_comp[idx].fp[bit]++;
1304
bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1305
s->counts.mv_comp[idx].hp[bit]++;
1309
// bug in libvpx - we count for bw entropy purposes even if the
1311
s->counts.mv_comp[idx].hp[1]++;
1315
n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1316
s->counts.mv_comp[idx].class0[n]++;
1317
bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1318
s->prob.p.mv_comp[idx].class0_fp[n]);
1319
s->counts.mv_comp[idx].class0_fp[n][bit]++;
1320
n = (n << 3) | (bit << 1);
1322
bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1323
s->counts.mv_comp[idx].class0_hp[bit]++;
1327
// bug in libvpx - we count for bw entropy purposes even if the
1329
s->counts.mv_comp[idx].class0_hp[1]++;
1333
return sign ? -(n + 1) : (n + 1);
1336
static void fill_mv(VP9Context *s,
1337
VP56mv *mv, int mode, int sb)
1341
if (mode == ZEROMV) {
1346
// FIXME cache this value and reuse for other subblocks
1347
find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1348
mode == NEWMV ? -1 : sb);
1349
// FIXME maybe move this code into find_ref_mvs()
1350
if ((mode == NEWMV || sb == -1) &&
1351
!(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1365
if (mode == NEWMV) {
1366
enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1367
s->prob.p.mv_joint);
1369
s->counts.mv_joint[j]++;
1370
if (j >= MV_JOINT_V)
1371
mv[0].y += read_mv_component(s, 0, hp);
1373
mv[0].x += read_mv_component(s, 1, hp);
1377
// FIXME cache this value and reuse for other subblocks
1378
find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1379
mode == NEWMV ? -1 : sb);
1380
if ((mode == NEWMV || sb == -1) &&
1381
!(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1395
if (mode == NEWMV) {
1396
enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1397
s->prob.p.mv_joint);
1399
s->counts.mv_joint[j]++;
1400
if (j >= MV_JOINT_V)
1401
mv[1].y += read_mv_component(s, 0, hp);
1403
mv[1].x += read_mv_component(s, 1, hp);
1409
static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1410
ptrdiff_t stride, int v)
1420
int v16 = v * 0x0101;
1428
uint32_t v32 = v * 0x01010101;
1437
uint64_t v64 = v * 0x0101010101010101ULL;
1443
uint32_t v32 = v * 0x01010101;
1446
AV_WN32A(ptr + 4, v32);
1455
static void decode_mode(AVCodecContext *ctx)
1457
static const uint8_t left_ctx[N_BS_SIZES] = {
1458
0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1460
static const uint8_t above_ctx[N_BS_SIZES] = {
1461
0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1463
static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1464
TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1465
TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1467
VP9Context *s = ctx->priv_data;
1469
int row = s->row, col = s->col, row7 = s->row7;
1470
enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1471
int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1472
int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1473
int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1474
int vref, filter_id;
1476
if (!s->segmentation.enabled) {
1478
} else if (s->keyframe || s->intraonly) {
1479
b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1480
} else if (!s->segmentation.update_map ||
1481
(s->segmentation.temporal &&
1482
vp56_rac_get_prob_branchy(&s->c,
1483
s->prob.segpred[s->above_segpred_ctx[col] +
1484
s->left_segpred_ctx[row7]]))) {
1485
if (!s->errorres && !s->segmentation.ignore_refmap) {
1487
uint8_t *refsegmap = s->frames[REF_FRAME_SEGMAP].segmentation_map;
1489
if (!s->frames[REF_FRAME_SEGMAP].uses_2pass)
1490
ff_thread_await_progress(&s->frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
1491
for (y = 0; y < h4; y++) {
1492
int idx_base = (y + row) * 8 * s->sb_cols + col;
1493
for (x = 0; x < w4; x++)
1494
pred = FFMIN(pred, refsegmap[idx_base + x]);
1496
av_assert1(pred < 8);
1502
memset(&s->above_segpred_ctx[col], 1, w4);
1503
memset(&s->left_segpred_ctx[row7], 1, h4);
1505
b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1508
memset(&s->above_segpred_ctx[col], 0, w4);
1509
memset(&s->left_segpred_ctx[row7], 0, h4);
1511
if (s->segmentation.enabled &&
1512
(s->segmentation.update_map || s->keyframe || s->intraonly)) {
1513
setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1514
bw4, bh4, 8 * s->sb_cols, b->seg_id);
1517
b->skip = s->segmentation.enabled &&
1518
s->segmentation.feat[b->seg_id].skip_enabled;
1520
int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1521
b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1522
s->counts.skip[c][b->skip]++;
1525
if (s->keyframe || s->intraonly) {
1527
} else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1528
b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1532
if (have_a && have_l) {
1533
c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1536
c = have_a ? 2 * s->above_intra_ctx[col] :
1537
have_l ? 2 * s->left_intra_ctx[row7] : 0;
1539
bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1540
s->counts.intra[c][bit]++;
1544
if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1548
c = (s->above_skip_ctx[col] ? max_tx :
1549
s->above_txfm_ctx[col]) +
1550
(s->left_skip_ctx[row7] ? max_tx :
1551
s->left_txfm_ctx[row7]) > max_tx;
1553
c = s->above_skip_ctx[col] ? 1 :
1554
(s->above_txfm_ctx[col] * 2 > max_tx);
1556
} else if (have_l) {
1557
c = s->left_skip_ctx[row7] ? 1 :
1558
(s->left_txfm_ctx[row7] * 2 > max_tx);
1564
b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1566
b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1568
b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1570
s->counts.tx32p[c][b->tx]++;
1573
b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1575
b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1576
s->counts.tx16p[c][b->tx]++;
1579
b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1580
s->counts.tx8p[c][b->tx]++;
1587
b->tx = FFMIN(max_tx, s->txfmmode);
1590
if (s->keyframe || s->intraonly) {
1591
uint8_t *a = &s->above_mode_ctx[col * 2];
1592
uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1595
if (b->bs > BS_8x8) {
1596
// FIXME the memory storage intermediates here aren't really
1597
// necessary, they're just there to make the code slightly
1599
b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1600
vp9_default_kf_ymode_probs[a[0]][l[0]]);
1601
if (b->bs != BS_8x4) {
1602
b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1603
vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1604
l[0] = a[1] = b->mode[1];
1606
l[0] = a[1] = b->mode[1] = b->mode[0];
1608
if (b->bs != BS_4x8) {
1609
b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1610
vp9_default_kf_ymode_probs[a[0]][l[1]]);
1611
if (b->bs != BS_8x4) {
1612
b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1613
vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1614
l[1] = a[1] = b->mode[3];
1616
l[1] = a[1] = b->mode[3] = b->mode[2];
1619
b->mode[2] = b->mode[0];
1620
l[1] = a[1] = b->mode[3] = b->mode[1];
1623
b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1624
vp9_default_kf_ymode_probs[*a][*l]);
1625
b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1626
// FIXME this can probably be optimized
1627
memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1628
memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1630
b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1631
vp9_default_kf_uvmode_probs[b->mode[3]]);
1632
} else if (b->intra) {
1634
if (b->bs > BS_8x8) {
1635
b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1636
s->prob.p.y_mode[0]);
1637
s->counts.y_mode[0][b->mode[0]]++;
1638
if (b->bs != BS_8x4) {
1639
b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1640
s->prob.p.y_mode[0]);
1641
s->counts.y_mode[0][b->mode[1]]++;
1643
b->mode[1] = b->mode[0];
1645
if (b->bs != BS_4x8) {
1646
b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1647
s->prob.p.y_mode[0]);
1648
s->counts.y_mode[0][b->mode[2]]++;
1649
if (b->bs != BS_8x4) {
1650
b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1651
s->prob.p.y_mode[0]);
1652
s->counts.y_mode[0][b->mode[3]]++;
1654
b->mode[3] = b->mode[2];
1657
b->mode[2] = b->mode[0];
1658
b->mode[3] = b->mode[1];
1661
static const uint8_t size_group[10] = {
1662
3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1664
int sz = size_group[b->bs];
1666
b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1667
s->prob.p.y_mode[sz]);
1668
b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1669
s->counts.y_mode[sz][b->mode[3]]++;
1671
b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1672
s->prob.p.uv_mode[b->mode[3]]);
1673
s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1675
static const uint8_t inter_mode_ctx_lut[14][14] = {
1676
{ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1677
{ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1678
{ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1679
{ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1680
{ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1681
{ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1682
{ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1683
{ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1684
{ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1685
{ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1686
{ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1687
{ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1688
{ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1689
{ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1692
if (s->segmentation.feat[b->seg_id].ref_enabled) {
1693
av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1695
b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1697
// read comp_pred flag
1698
if (s->comppredmode != PRED_SWITCHABLE) {
1699
b->comp = s->comppredmode == PRED_COMPREF;
1703
// FIXME add intra as ref=0xff (or -1) to make these easier?
1706
if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1708
} else if (s->above_comp_ctx[col]) {
1709
c = 2 + (s->left_intra_ctx[row7] ||
1710
s->left_ref_ctx[row7] == s->fixcompref);
1711
} else if (s->left_comp_ctx[row7]) {
1712
c = 2 + (s->above_intra_ctx[col] ||
1713
s->above_ref_ctx[col] == s->fixcompref);
1715
c = (!s->above_intra_ctx[col] &&
1716
s->above_ref_ctx[col] == s->fixcompref) ^
1717
(!s->left_intra_ctx[row7] &&
1718
s->left_ref_ctx[row & 7] == s->fixcompref);
1721
c = s->above_comp_ctx[col] ? 3 :
1722
(!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1724
} else if (have_l) {
1725
c = s->left_comp_ctx[row7] ? 3 :
1726
(!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1730
b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1731
s->counts.comp[c][b->comp]++;
1734
// read actual references
1735
// FIXME probably cache a few variables here to prevent repetitive
1736
// memory accesses below
1737
if (b->comp) /* two references */ {
1738
int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1740
b->ref[fix_idx] = s->fixcompref;
1741
// FIXME can this codeblob be replaced by some sort of LUT?
1744
if (s->above_intra_ctx[col]) {
1745
if (s->left_intra_ctx[row7]) {
1748
c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1750
} else if (s->left_intra_ctx[row7]) {
1751
c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1753
int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1755
if (refl == refa && refa == s->varcompref[1]) {
1757
} else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1758
if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1759
(refl == s->fixcompref && refa == s->varcompref[0])) {
1762
c = (refa == refl) ? 3 : 1;
1764
} else if (!s->left_comp_ctx[row7]) {
1765
if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1768
c = (refl == s->varcompref[1] &&
1769
refa != s->varcompref[1]) ? 2 : 4;
1771
} else if (!s->above_comp_ctx[col]) {
1772
if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1775
c = (refa == s->varcompref[1] &&
1776
refl != s->varcompref[1]) ? 2 : 4;
1779
c = (refl == refa) ? 4 : 2;
1783
if (s->above_intra_ctx[col]) {
1785
} else if (s->above_comp_ctx[col]) {
1786
c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1788
c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1791
} else if (have_l) {
1792
if (s->left_intra_ctx[row7]) {
1794
} else if (s->left_comp_ctx[row7]) {
1795
c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1797
c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1802
bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1803
b->ref[var_idx] = s->varcompref[bit];
1804
s->counts.comp_ref[c][bit]++;
1805
} else /* single reference */ {
1808
if (have_a && !s->above_intra_ctx[col]) {
1809
if (have_l && !s->left_intra_ctx[row7]) {
1810
if (s->left_comp_ctx[row7]) {
1811
if (s->above_comp_ctx[col]) {
1812
c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1813
!s->above_ref_ctx[col]);
1815
c = (3 * !s->above_ref_ctx[col]) +
1816
(!s->fixcompref || !s->left_ref_ctx[row7]);
1818
} else if (s->above_comp_ctx[col]) {
1819
c = (3 * !s->left_ref_ctx[row7]) +
1820
(!s->fixcompref || !s->above_ref_ctx[col]);
1822
c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1824
} else if (s->above_intra_ctx[col]) {
1826
} else if (s->above_comp_ctx[col]) {
1827
c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1829
c = 4 * (!s->above_ref_ctx[col]);
1831
} else if (have_l && !s->left_intra_ctx[row7]) {
1832
if (s->left_intra_ctx[row7]) {
1834
} else if (s->left_comp_ctx[row7]) {
1835
c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1837
c = 4 * (!s->left_ref_ctx[row7]);
1842
bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1843
s->counts.single_ref[c][0][bit]++;
1847
// FIXME can this codeblob be replaced by some sort of LUT?
1850
if (s->left_intra_ctx[row7]) {
1851
if (s->above_intra_ctx[col]) {
1853
} else if (s->above_comp_ctx[col]) {
1854
c = 1 + 2 * (s->fixcompref == 1 ||
1855
s->above_ref_ctx[col] == 1);
1856
} else if (!s->above_ref_ctx[col]) {
1859
c = 4 * (s->above_ref_ctx[col] == 1);
1861
} else if (s->above_intra_ctx[col]) {
1862
if (s->left_intra_ctx[row7]) {
1864
} else if (s->left_comp_ctx[row7]) {
1865
c = 1 + 2 * (s->fixcompref == 1 ||
1866
s->left_ref_ctx[row7] == 1);
1867
} else if (!s->left_ref_ctx[row7]) {
1870
c = 4 * (s->left_ref_ctx[row7] == 1);
1872
} else if (s->above_comp_ctx[col]) {
1873
if (s->left_comp_ctx[row7]) {
1874
if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1875
c = 3 * (s->fixcompref == 1 ||
1876
s->left_ref_ctx[row7] == 1);
1880
} else if (!s->left_ref_ctx[row7]) {
1881
c = 1 + 2 * (s->fixcompref == 1 ||
1882
s->above_ref_ctx[col] == 1);
1884
c = 3 * (s->left_ref_ctx[row7] == 1) +
1885
(s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1887
} else if (s->left_comp_ctx[row7]) {
1888
if (!s->above_ref_ctx[col]) {
1889
c = 1 + 2 * (s->fixcompref == 1 ||
1890
s->left_ref_ctx[row7] == 1);
1892
c = 3 * (s->above_ref_ctx[col] == 1) +
1893
(s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1895
} else if (!s->above_ref_ctx[col]) {
1896
if (!s->left_ref_ctx[row7]) {
1899
c = 4 * (s->left_ref_ctx[row7] == 1);
1901
} else if (!s->left_ref_ctx[row7]) {
1902
c = 4 * (s->above_ref_ctx[col] == 1);
1904
c = 2 * (s->left_ref_ctx[row7] == 1) +
1905
2 * (s->above_ref_ctx[col] == 1);
1908
if (s->above_intra_ctx[col] ||
1909
(!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1911
} else if (s->above_comp_ctx[col]) {
1912
c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1914
c = 4 * (s->above_ref_ctx[col] == 1);
1917
} else if (have_l) {
1918
if (s->left_intra_ctx[row7] ||
1919
(!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1921
} else if (s->left_comp_ctx[row7]) {
1922
c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1924
c = 4 * (s->left_ref_ctx[row7] == 1);
1929
bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1930
s->counts.single_ref[c][1][bit]++;
1931
b->ref[0] = 1 + bit;
1936
if (b->bs <= BS_8x8) {
1937
if (s->segmentation.feat[b->seg_id].skip_enabled) {
1938
b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1940
static const uint8_t off[10] = {
1941
3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1944
// FIXME this needs to use the LUT tables from find_ref_mvs
1945
// because not all are -1,0/0,-1
1946
int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1947
[s->left_mode_ctx[row7 + off[b->bs]]];
1949
b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1950
s->prob.p.mv_mode[c]);
1951
b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1952
s->counts.mv_mode[c][b->mode[0] - 10]++;
1956
if (s->filtermode == FILTER_SWITCHABLE) {
1959
if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1960
if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1961
c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1962
s->left_filter_ctx[row7] : 3;
1964
c = s->above_filter_ctx[col];
1966
} else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1967
c = s->left_filter_ctx[row7];
1972
filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1973
s->prob.p.filter[c]);
1974
s->counts.filter[c][filter_id]++;
1975
b->filter = vp9_filter_lut[filter_id];
1977
b->filter = s->filtermode;
1980
if (b->bs > BS_8x8) {
1981
int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1983
b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1984
s->prob.p.mv_mode[c]);
1985
s->counts.mv_mode[c][b->mode[0] - 10]++;
1986
fill_mv(s, b->mv[0], b->mode[0], 0);
1988
if (b->bs != BS_8x4) {
1989
b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1990
s->prob.p.mv_mode[c]);
1991
s->counts.mv_mode[c][b->mode[1] - 10]++;
1992
fill_mv(s, b->mv[1], b->mode[1], 1);
1994
b->mode[1] = b->mode[0];
1995
AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1996
AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1999
if (b->bs != BS_4x8) {
2000
b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
2001
s->prob.p.mv_mode[c]);
2002
s->counts.mv_mode[c][b->mode[2] - 10]++;
2003
fill_mv(s, b->mv[2], b->mode[2], 2);
2005
if (b->bs != BS_8x4) {
2006
b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
2007
s->prob.p.mv_mode[c]);
2008
s->counts.mv_mode[c][b->mode[3] - 10]++;
2009
fill_mv(s, b->mv[3], b->mode[3], 3);
2011
b->mode[3] = b->mode[2];
2012
AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
2013
AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
2016
b->mode[2] = b->mode[0];
2017
AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2018
AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2019
b->mode[3] = b->mode[1];
2020
AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
2021
AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
2024
fill_mv(s, b->mv[0], b->mode[0], -1);
2025
AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
2026
AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2027
AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
2028
AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
2029
AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2030
AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
2033
vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
2037
#define SPLAT_CTX(var, val, n) \
2039
case 1: var = val; break; \
2040
case 2: AV_WN16A(&var, val * 0x0101); break; \
2041
case 4: AV_WN32A(&var, val * 0x01010101); break; \
2042
case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
2044
uint64_t v64 = val * 0x0101010101010101ULL; \
2045
AV_WN64A( &var, v64); \
2046
AV_WN64A(&((uint8_t *) &var)[8], v64); \
2051
#define SPLAT_CTX(var, val, n) \
2053
case 1: var = val; break; \
2054
case 2: AV_WN16A(&var, val * 0x0101); break; \
2055
case 4: AV_WN32A(&var, val * 0x01010101); break; \
2057
uint32_t v32 = val * 0x01010101; \
2058
AV_WN32A( &var, v32); \
2059
AV_WN32A(&((uint8_t *) &var)[4], v32); \
2063
uint32_t v32 = val * 0x01010101; \
2064
AV_WN32A( &var, v32); \
2065
AV_WN32A(&((uint8_t *) &var)[4], v32); \
2066
AV_WN32A(&((uint8_t *) &var)[8], v32); \
2067
AV_WN32A(&((uint8_t *) &var)[12], v32); \
2073
switch (bwh_tab[1][b->bs][0]) {
2074
#define SET_CTXS(dir, off, n) \
2076
SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2077
SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2078
SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2079
if (!s->keyframe && !s->intraonly) { \
2080
SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2081
SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2082
SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2084
SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2085
if (s->filtermode == FILTER_SWITCHABLE) { \
2086
SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2091
case 1: SET_CTXS(above, col, 1); break;
2092
case 2: SET_CTXS(above, col, 2); break;
2093
case 4: SET_CTXS(above, col, 4); break;
2094
case 8: SET_CTXS(above, col, 8); break;
2096
switch (bwh_tab[1][b->bs][1]) {
2097
case 1: SET_CTXS(left, row7, 1); break;
2098
case 2: SET_CTXS(left, row7, 2); break;
2099
case 4: SET_CTXS(left, row7, 4); break;
2100
case 8: SET_CTXS(left, row7, 8); break;
2105
if (!s->keyframe && !s->intraonly) {
2106
if (b->bs > BS_8x8) {
2107
int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2109
AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2110
AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2111
AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2112
AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2113
AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2114
AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2115
AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2116
AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2118
int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2120
for (n = 0; n < w4 * 2; n++) {
2121
AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2122
AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2124
for (n = 0; n < h4 * 2; n++) {
2125
AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2126
AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2132
for (y = 0; y < h4; y++) {
2133
int x, o = (row + y) * s->sb_cols * 8 + col;
2134
struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
2137
for (x = 0; x < w4; x++) {
2141
} else if (b->comp) {
2142
for (x = 0; x < w4; x++) {
2143
mv[x].ref[0] = b->ref[0];
2144
mv[x].ref[1] = b->ref[1];
2145
AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2146
AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2149
for (x = 0; x < w4; x++) {
2150
mv[x].ref[0] = b->ref[0];
2152
AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2158
// FIXME merge cnt/eob arguments?
2159
static av_always_inline int
2160
decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2161
int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
2162
unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2163
int nnz, const int16_t *scan, const int16_t (*nb)[2],
2164
const int16_t *band_counts, const int16_t *qmul)
2166
int i = 0, band = 0, band_left = band_counts[band];
2167
uint8_t *tp = p[0][nnz];
2168
uint8_t cache[1024];
2173
val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2174
eob[band][nnz][val]++;
2179
if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2180
cnt[band][nnz][0]++;
2182
band_left = band_counts[++band];
2184
nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2186
if (++i == n_coeffs)
2187
break; //invalid input; blocks should end with EOB
2192
if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2193
cnt[band][nnz][1]++;
2197
// fill in p[3-10] (model fill) - only once per frame for each pos
2199
memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2201
cnt[band][nnz][2]++;
2202
if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2203
if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2204
cache[rc] = val = 2;
2206
val = 3 + vp56_rac_get_prob(c, tp[5]);
2209
} else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2211
if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2212
val = 5 + vp56_rac_get_prob(c, 159);
2214
val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2215
val += vp56_rac_get_prob(c, 145);
2219
if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2220
if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2221
val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2222
val += (vp56_rac_get_prob(c, 148) << 1);
2223
val += vp56_rac_get_prob(c, 140);
2225
val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2226
val += (vp56_rac_get_prob(c, 155) << 2);
2227
val += (vp56_rac_get_prob(c, 140) << 1);
2228
val += vp56_rac_get_prob(c, 135);
2230
} else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2231
val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2232
val += (vp56_rac_get_prob(c, 157) << 3);
2233
val += (vp56_rac_get_prob(c, 141) << 2);
2234
val += (vp56_rac_get_prob(c, 134) << 1);
2235
val += vp56_rac_get_prob(c, 130);
2238
if (!is8bitsperpixel) {
2240
val += vp56_rac_get_prob(c, 255) << 17;
2241
val += vp56_rac_get_prob(c, 255) << 16;
2243
val += (vp56_rac_get_prob(c, 255) << 15);
2244
val += (vp56_rac_get_prob(c, 255) << 14);
2246
val += (vp56_rac_get_prob(c, 254) << 13);
2247
val += (vp56_rac_get_prob(c, 254) << 12);
2248
val += (vp56_rac_get_prob(c, 254) << 11);
2249
val += (vp56_rac_get_prob(c, 252) << 10);
2250
val += (vp56_rac_get_prob(c, 249) << 9);
2251
val += (vp56_rac_get_prob(c, 243) << 8);
2252
val += (vp56_rac_get_prob(c, 230) << 7);
2253
val += (vp56_rac_get_prob(c, 196) << 6);
2254
val += (vp56_rac_get_prob(c, 177) << 5);
2255
val += (vp56_rac_get_prob(c, 153) << 4);
2256
val += (vp56_rac_get_prob(c, 140) << 3);
2257
val += (vp56_rac_get_prob(c, 133) << 2);
2258
val += (vp56_rac_get_prob(c, 130) << 1);
2259
val += vp56_rac_get_prob(c, 129);
2263
#define STORE_COEF(c, i, v) do { \
2264
if (is8bitsperpixel) { \
2267
AV_WN32A(&c[i * 2], v); \
2271
band_left = band_counts[++band];
2273
STORE_COEF(coef, rc, ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2);
2275
STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * qmul[!!i]);
2276
nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2278
} while (++i < n_coeffs);
2283
static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2284
unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2285
uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2286
const int16_t (*nb)[2], const int16_t *band_counts,
2287
const int16_t *qmul)
2289
return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
2290
nnz, scan, nb, band_counts, qmul);
2293
static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2294
unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2295
uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2296
const int16_t (*nb)[2], const int16_t *band_counts,
2297
const int16_t *qmul)
2299
return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
2300
nnz, scan, nb, band_counts, qmul);
2303
static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2304
unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2305
uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2306
const int16_t (*nb)[2], const int16_t *band_counts,
2307
const int16_t *qmul)
2309
return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p,
2310
nnz, scan, nb, band_counts, qmul);
2313
static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2314
unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2315
uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2316
const int16_t (*nb)[2], const int16_t *band_counts,
2317
const int16_t *qmul)
2319
return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p,
2320
nnz, scan, nb, band_counts, qmul);
2323
static av_always_inline int decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
2325
VP9Context *s = ctx->priv_data;
2327
int row = s->row, col = s->col;
2328
uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2329
unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2330
unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2331
int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2332
int end_x = FFMIN(2 * (s->cols - col), w4);
2333
int end_y = FFMIN(2 * (s->rows - row), h4);
2334
int n, pl, x, y, res;
2335
int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2336
int tx = 4 * s->lossless + b->tx;
2337
const int16_t * const *yscans = vp9_scans[tx];
2338
const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2339
const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2340
const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2341
uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2342
uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2343
static const int16_t band_counts[4][8] = {
2344
{ 1, 2, 3, 4, 3, 16 - 13 },
2345
{ 1, 2, 3, 4, 11, 64 - 21 },
2346
{ 1, 2, 3, 4, 11, 256 - 21 },
2347
{ 1, 2, 3, 4, 11, 1024 - 21 },
2349
const int16_t *y_band_counts = band_counts[b->tx];
2350
const int16_t *uv_band_counts = band_counts[b->uvtx];
2351
int bytesperpixel = is8bitsperpixel ? 1 : 2;
2352
int total_coeff = 0;
2354
#define MERGE(la, end, step, rd) \
2355
for (n = 0; n < end; n += step) \
2356
la[n] = !!rd(&la[n])
2357
#define MERGE_CTX(step, rd) \
2359
MERGE(l, end_y, step, rd); \
2360
MERGE(a, end_x, step, rd); \
2363
#define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2364
for (n = 0, y = 0; y < end_y; y += step) { \
2365
for (x = 0; x < end_x; x += step, n += step * step) { \
2366
enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2367
res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2368
(s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2369
c, e, p, a[x] + l[y], yscans[txtp], \
2370
ynbs[txtp], y_band_counts, qmul[0]); \
2371
a[x] = l[y] = !!res; \
2372
total_coeff |= !!res; \
2374
AV_WN16A(&s->eob[n], res); \
2381
#define SPLAT(la, end, step, cond) \
2383
for (n = 1; n < end; n += step) \
2384
la[n] = la[n - 1]; \
2385
} else if (step == 4) { \
2387
for (n = 0; n < end; n += step) \
2388
AV_WN32A(&la[n], la[n] * 0x01010101); \
2390
for (n = 0; n < end; n += step) \
2391
memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2393
} else /* step == 8 */ { \
2395
if (HAVE_FAST_64BIT) { \
2396
for (n = 0; n < end; n += step) \
2397
AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2399
for (n = 0; n < end; n += step) { \
2400
uint32_t v32 = la[n] * 0x01010101; \
2401
AV_WN32A(&la[n], v32); \
2402
AV_WN32A(&la[n + 4], v32); \
2406
for (n = 0; n < end; n += step) \
2407
memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2410
#define SPLAT_CTX(step) \
2412
SPLAT(a, end_x, step, end_x == w4); \
2413
SPLAT(l, end_y, step, end_y == h4); \
2419
DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2422
MERGE_CTX(2, AV_RN16A);
2423
DECODE_Y_COEF_LOOP(2, 0,);
2427
MERGE_CTX(4, AV_RN32A);
2428
DECODE_Y_COEF_LOOP(4, 0,);
2432
MERGE_CTX(8, AV_RN64A);
2433
DECODE_Y_COEF_LOOP(8, 0, 32);
2438
#define DECODE_UV_COEF_LOOP(step, v) \
2439
for (n = 0, y = 0; y < end_y; y += step) { \
2440
for (x = 0; x < end_x; x += step, n += step * step) { \
2441
res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2442
(s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2443
16 * step * step, c, e, p, a[x] + l[y], \
2444
uvscan, uvnb, uv_band_counts, qmul[1]); \
2445
a[x] = l[y] = !!res; \
2446
total_coeff |= !!res; \
2448
AV_WN16A(&s->uveob[pl][n], res); \
2450
s->uveob[pl][n] = res; \
2455
p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2456
c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2457
e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2462
for (pl = 0; pl < 2; pl++) {
2463
a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2464
l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2467
DECODE_UV_COEF_LOOP(1,);
2470
MERGE_CTX(2, AV_RN16A);
2471
DECODE_UV_COEF_LOOP(2,);
2475
MERGE_CTX(4, AV_RN32A);
2476
DECODE_UV_COEF_LOOP(4,);
2480
MERGE_CTX(8, AV_RN64A);
2481
DECODE_UV_COEF_LOOP(8, 32);
2490
static int decode_coeffs_8bpp(AVCodecContext *ctx)
2492
return decode_coeffs(ctx, 1);
2495
static int decode_coeffs_16bpp(AVCodecContext *ctx)
2497
return decode_coeffs(ctx, 0);
2500
static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2501
uint8_t *dst_edge, ptrdiff_t stride_edge,
2502
uint8_t *dst_inner, ptrdiff_t stride_inner,
2503
uint8_t *l, int col, int x, int w,
2504
int row, int y, enum TxfmMode tx,
2505
int p, int ss_h, int ss_v, int bytesperpixel)
2507
int have_top = row > 0 || y > 0;
2508
int have_left = col > s->tiling.tile_col_start || x > 0;
2509
int have_right = x < w - 1;
2511
static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2512
[VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2513
{ DC_127_PRED, VERT_PRED } },
2514
[HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2515
{ HOR_PRED, HOR_PRED } },
2516
[DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2517
{ LEFT_DC_PRED, DC_PRED } },
2518
[DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2519
{ DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2520
[DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2521
{ DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2522
[VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2523
{ VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2524
[HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2525
{ HOR_DOWN_PRED, HOR_DOWN_PRED } },
2526
[VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2527
{ DC_127_PRED, VERT_LEFT_PRED } },
2528
[HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2529
{ HOR_UP_PRED, HOR_UP_PRED } },
2530
[TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2531
{ HOR_PRED, TM_VP8_PRED } },
2533
static const struct {
2534
uint8_t needs_left:1;
2535
uint8_t needs_top:1;
2536
uint8_t needs_topleft:1;
2537
uint8_t needs_topright:1;
2538
uint8_t invert_left:1;
2539
} edges[N_INTRA_PRED_MODES] = {
2540
[VERT_PRED] = { .needs_top = 1 },
2541
[HOR_PRED] = { .needs_left = 1 },
2542
[DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2543
[DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2544
[DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2545
[VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2546
[HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2547
[VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2548
[HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2549
[TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2550
[LEFT_DC_PRED] = { .needs_left = 1 },
2551
[TOP_DC_PRED] = { .needs_top = 1 },
2552
[DC_128_PRED] = { 0 },
2553
[DC_127_PRED] = { 0 },
2554
[DC_129_PRED] = { 0 }
2557
av_assert2(mode >= 0 && mode < 10);
2558
mode = mode_conv[mode][have_left][have_top];
2559
if (edges[mode].needs_top) {
2560
uint8_t *top, *topleft;
2561
int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
2562
int n_px_need_tr = 0;
2564
if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2567
// if top of sb64-row, use s->intra_pred_data[] instead of
2568
// dst[-stride] for intra prediction (it contains pre- instead of
2569
// post-loopfilter data)
2571
top = !(row & 7) && !y ?
2572
s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2573
y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2575
topleft = !(row & 7) && !y ?
2576
s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2577
y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2578
&dst_inner[-stride_inner];
2582
(!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2583
(tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2584
n_px_need + n_px_need_tr <= n_px_have) {
2588
if (n_px_need <= n_px_have) {
2589
memcpy(*a, top, n_px_need * bytesperpixel);
2591
#define memset_bpp(c, i1, v, i2, num) do { \
2592
if (bytesperpixel == 1) { \
2593
memset(&(c)[(i1)], (v)[(i2)], (num)); \
2595
int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2596
for (n = 0; n < (num); n++) { \
2597
AV_WN16A(&(c)[((i1) + n) * 2], val); \
2601
memcpy(*a, top, n_px_have * bytesperpixel);
2602
memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2605
#define memset_val(c, val, num) do { \
2606
if (bytesperpixel == 1) { \
2607
memset((c), (val), (num)); \
2610
for (n = 0; n < (num); n++) { \
2611
AV_WN16A(&(c)[n * 2], (val)); \
2615
memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2617
if (edges[mode].needs_topleft) {
2618
if (have_left && have_top) {
2619
#define assign_bpp(c, i1, v, i2) do { \
2620
if (bytesperpixel == 1) { \
2621
(c)[(i1)] = (v)[(i2)]; \
2623
AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2626
assign_bpp(*a, -1, topleft, -1);
2628
#define assign_val(c, i, v) do { \
2629
if (bytesperpixel == 1) { \
2632
AV_WN16A(&(c)[(i) * 2], (v)); \
2635
assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2638
if (tx == TX_4X4 && edges[mode].needs_topright) {
2639
if (have_top && have_right &&
2640
n_px_need + n_px_need_tr <= n_px_have) {
2641
memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2643
memset_bpp(*a, 4, *a, 3, 4);
2648
if (edges[mode].needs_left) {
2650
int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
2651
uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2652
ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2654
if (edges[mode].invert_left) {
2655
if (n_px_need <= n_px_have) {
2656
for (i = 0; i < n_px_need; i++)
2657
assign_bpp(l, i, &dst[i * stride], -1);
2659
for (i = 0; i < n_px_have; i++)
2660
assign_bpp(l, i, &dst[i * stride], -1);
2661
memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2664
if (n_px_need <= n_px_have) {
2665
for (i = 0; i < n_px_need; i++)
2666
assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2668
for (i = 0; i < n_px_have; i++)
2669
assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2670
memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2674
memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2681
static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
2682
ptrdiff_t uv_off, int bytesperpixel)
2684
VP9Context *s = ctx->priv_data;
2686
int row = s->row, col = s->col;
2687
int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2688
int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2689
int end_x = FFMIN(2 * (s->cols - col), w4);
2690
int end_y = FFMIN(2 * (s->rows - row), h4);
2691
int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2692
int uvstep1d = 1 << b->uvtx, p;
2693
uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2694
LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
2695
LOCAL_ALIGNED_32(uint8_t, l, [64]);
2697
for (n = 0, y = 0; y < end_y; y += step1d) {
2698
uint8_t *ptr = dst, *ptr_r = dst_r;
2699
for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2700
ptr_r += 4 * step1d * bytesperpixel, n += step) {
2701
int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2703
uint8_t *a = &a_buf[32];
2704
enum TxfmType txtp = vp9_intra_txfm_type[mode];
2705
int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2707
mode = check_intra_mode(s, mode, &a, ptr_r,
2708
s->frames[CUR_FRAME].tf.f->linesize[0],
2709
ptr, s->y_stride, l,
2710
col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2711
s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2713
s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2714
s->block + 16 * n * bytesperpixel, eob);
2716
dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2717
dst += 4 * step1d * s->y_stride;
2724
step = 1 << (b->uvtx * 2);
2725
for (p = 0; p < 2; p++) {
2726
dst = s->dst[1 + p];
2727
dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2728
for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2729
uint8_t *ptr = dst, *ptr_r = dst_r;
2730
for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2731
ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
2732
int mode = b->uvmode;
2733
uint8_t *a = &a_buf[32];
2734
int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2736
mode = check_intra_mode(s, mode, &a, ptr_r,
2737
s->frames[CUR_FRAME].tf.f->linesize[1],
2738
ptr, s->uv_stride, l, col, x, w4, row, y,
2739
b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
2740
s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2742
s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2743
s->uvblock[p] + 16 * n * bytesperpixel, eob);
2745
dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2746
dst += 4 * uvstep1d * s->uv_stride;
2751
static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2753
intra_recon(ctx, y_off, uv_off, 1);
2756
static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2758
intra_recon(ctx, y_off, uv_off, 2);
2761
static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2762
uint8_t *dst, ptrdiff_t dst_stride,
2763
const uint8_t *ref, ptrdiff_t ref_stride,
2764
ThreadFrame *ref_frame,
2765
ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2766
int px, int py, int pw, int ph,
2767
int bw, int bh, int w, int h, int bytesperpixel,
2768
const uint16_t *scale, const uint8_t *step)
2770
#define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2772
int refbw_m1, refbh_m1;
2776
mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2777
mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2778
// BUG libvpx seems to scale the two components separately. This introduces
2779
// rounding errors but we have to reproduce them to be exactly compatible
2780
// with the output from libvpx...
2781
mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
2782
my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
2786
ref += y * ref_stride + x * bytesperpixel;
2789
refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2790
refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2791
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
2792
// we use +7 because the last 7 pixels of each sbrow can be changed in
2793
// the longest loopfilter of the next sbrow
2794
th = (y + refbh_m1 + 4 + 7) >> 6;
2795
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2796
if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2797
s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2798
ref - 3 * ref_stride - 3 * bytesperpixel,
2800
refbw_m1 + 8, refbh_m1 + 8,
2801
x - 3, y - 3, w, h);
2802
ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2805
smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2808
static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2809
uint8_t *dst_u, uint8_t *dst_v,
2810
ptrdiff_t dst_stride,
2811
const uint8_t *ref_u, ptrdiff_t src_stride_u,
2812
const uint8_t *ref_v, ptrdiff_t src_stride_v,
2813
ThreadFrame *ref_frame,
2814
ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2815
int px, int py, int pw, int ph,
2816
int bw, int bh, int w, int h, int bytesperpixel,
2817
const uint16_t *scale, const uint8_t *step)
2820
int refbw_m1, refbh_m1;
2825
// BUG https://code.google.com/p/webm/issues/detail?id=820
2826
mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 4, (s->cols * 4 - x + px + 3) << 4);
2827
mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2829
mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2830
mx = scale_mv(mv.x << 1, 0) + scale_mv(x * 16, 0);
2833
// BUG https://code.google.com/p/webm/issues/detail?id=820
2834
mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 4, (s->rows * 4 - y + py + 3) << 4);
2835
my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2837
mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2838
my = scale_mv(mv.y << 1, 1) + scale_mv(y * 16, 1);
2843
ref_u += y * src_stride_u + x * bytesperpixel;
2844
ref_v += y * src_stride_v + x * bytesperpixel;
2847
refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2848
refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2849
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
2850
// we use +7 because the last 7 pixels of each sbrow can be changed in
2851
// the longest loopfilter of the next sbrow
2852
th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
2853
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2854
if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2855
s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2856
ref_u - 3 * src_stride_u - 3 * bytesperpixel,
2858
refbw_m1 + 8, refbh_m1 + 8,
2859
x - 3, y - 3, w, h);
2860
ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2861
smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
2863
s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2864
ref_v - 3 * src_stride_v - 3 * bytesperpixel,
2866
refbw_m1 + 8, refbh_m1 + 8,
2867
x - 3, y - 3, w, h);
2868
ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2869
smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
2871
smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2872
smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2876
#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2877
px, py, pw, ph, bw, bh, w, h, i) \
2878
mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2879
mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2880
s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2881
#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2882
row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2883
mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2884
row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2885
s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2887
#define FN(x) x##_scaled_8bpp
2888
#define BYTES_PER_PIXEL 1
2889
#include "vp9_mc_template.c"
2891
#undef BYTES_PER_PIXEL
2892
#define FN(x) x##_scaled_16bpp
2893
#define BYTES_PER_PIXEL 2
2894
#include "vp9_mc_template.c"
2896
#undef mc_chroma_dir
2898
#undef BYTES_PER_PIXEL
2901
static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2902
uint8_t *dst, ptrdiff_t dst_stride,
2903
const uint8_t *ref, ptrdiff_t ref_stride,
2904
ThreadFrame *ref_frame,
2905
ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2906
int bw, int bh, int w, int h, int bytesperpixel)
2908
int mx = mv->x, my = mv->y, th;
2912
ref += y * ref_stride + x * bytesperpixel;
2915
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
2916
// we use +7 because the last 7 pixels of each sbrow can be changed in
2917
// the longest loopfilter of the next sbrow
2918
th = (y + bh + 4 * !!my + 7) >> 6;
2919
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2920
if (x < !!mx * 3 || y < !!my * 3 ||
2921
x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2922
s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2923
ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
2925
bw + !!mx * 7, bh + !!my * 7,
2926
x - !!mx * 3, y - !!my * 3, w, h);
2927
ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2930
mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2933
static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2934
uint8_t *dst_u, uint8_t *dst_v,
2935
ptrdiff_t dst_stride,
2936
const uint8_t *ref_u, ptrdiff_t src_stride_u,
2937
const uint8_t *ref_v, ptrdiff_t src_stride_v,
2938
ThreadFrame *ref_frame,
2939
ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2940
int bw, int bh, int w, int h, int bytesperpixel)
2942
int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
2946
ref_u += y * src_stride_u + x * bytesperpixel;
2947
ref_v += y * src_stride_v + x * bytesperpixel;
2950
// FIXME bilinear filter only needs 0/1 pixels, not 3/4
2951
// we use +7 because the last 7 pixels of each sbrow can be changed in
2952
// the longest loopfilter of the next sbrow
2953
th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
2954
ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2955
if (x < !!mx * 3 || y < !!my * 3 ||
2956
x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2957
s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2958
ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
2960
bw + !!mx * 7, bh + !!my * 7,
2961
x - !!mx * 3, y - !!my * 3, w, h);
2962
ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2963
mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
2965
s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2966
ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
2968
bw + !!mx * 7, bh + !!my * 7,
2969
x - !!mx * 3, y - !!my * 3, w, h);
2970
ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2971
mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
2973
mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2974
mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2978
#define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2979
px, py, pw, ph, bw, bh, w, h, i) \
2980
mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2981
mv, bw, bh, w, h, bytesperpixel)
2982
#define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2983
row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2984
mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2985
row, col, mv, bw, bh, w, h, bytesperpixel)
2987
#define FN(x) x##_8bpp
2988
#define BYTES_PER_PIXEL 1
2989
#include "vp9_mc_template.c"
2991
#undef BYTES_PER_PIXEL
2992
#define FN(x) x##_16bpp
2993
#define BYTES_PER_PIXEL 2
2994
#include "vp9_mc_template.c"
2995
#undef mc_luma_dir_dir
2996
#undef mc_chroma_dir_dir
2998
#undef BYTES_PER_PIXEL
3001
static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
3003
VP9Context *s = ctx->priv_data;
3005
int row = s->row, col = s->col;
3007
if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
3008
if (bytesperpixel == 1) {
3009
inter_pred_scaled_8bpp(ctx);
3011
inter_pred_scaled_16bpp(ctx);
3014
if (bytesperpixel == 1) {
3015
inter_pred_8bpp(ctx);
3017
inter_pred_16bpp(ctx);
3021
/* mostly copied intra_recon() */
3023
int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
3024
int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
3025
int end_x = FFMIN(2 * (s->cols - col), w4);
3026
int end_y = FFMIN(2 * (s->rows - row), h4);
3027
int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
3028
int uvstep1d = 1 << b->uvtx, p;
3029
uint8_t *dst = s->dst[0];
3032
for (n = 0, y = 0; y < end_y; y += step1d) {
3034
for (x = 0; x < end_x; x += step1d,
3035
ptr += 4 * step1d * bytesperpixel, n += step) {
3036
int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
3039
s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
3040
s->block + 16 * n * bytesperpixel, eob);
3042
dst += 4 * s->y_stride * step1d;
3048
step = 1 << (b->uvtx * 2);
3049
for (p = 0; p < 2; p++) {
3050
dst = s->dst[p + 1];
3051
for (n = 0, y = 0; y < end_y; y += uvstep1d) {
3053
for (x = 0; x < end_x; x += uvstep1d,
3054
ptr += 4 * uvstep1d * bytesperpixel, n += step) {
3055
int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
3058
s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
3059
s->uvblock[p] + 16 * n * bytesperpixel, eob);
3061
dst += 4 * uvstep1d * s->uv_stride;
3067
static void inter_recon_8bpp(AVCodecContext *ctx)
3069
inter_recon(ctx, 1);
3072
static void inter_recon_16bpp(AVCodecContext *ctx)
3074
inter_recon(ctx, 2);
3077
static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
3078
int row_and_7, int col_and_7,
3079
int w, int h, int col_end, int row_end,
3080
enum TxfmMode tx, int skip_inter)
3082
static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
3083
static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
3085
// FIXME I'm pretty sure all loops can be replaced by a single LUT if
3086
// we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
3087
// and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
3088
// use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
3090
// the intended behaviour of the vp9 loopfilter is to work on 8-pixel
3091
// edges. This means that for UV, we work on two subsampled blocks at
3092
// a time, and we only use the topleft block's mode information to set
3093
// things like block strength. Thus, for any block size smaller than
3094
// 16x16, ignore the odd portion of the block.
3095
if (tx == TX_4X4 && (ss_v | ss_h)) {
3110
if (tx == TX_4X4 && !skip_inter) {
3111
int t = 1 << col_and_7, m_col = (t << w) - t, y;
3112
// on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
3113
int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3115
for (y = row_and_7; y < h + row_and_7; y++) {
3116
int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
3118
mask[0][y][1] |= m_row_8;
3119
mask[0][y][2] |= m_row_4;
3120
// for odd lines, if the odd col is not being filtered,
3121
// skip odd row also:
3128
// if a/c are even row/col and b/d are odd, and d is skipped,
3129
// e.g. right edge of size-66x66.webm, then skip b also (bug)
3130
if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
3131
mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
3133
mask[1][y][col_mask_id] |= m_col;
3136
mask[0][y][3] |= m_col;
3138
if (ss_h && (col_end & 1))
3139
mask[1][y][3] |= (t << (w - 1)) - t;
3141
mask[1][y][3] |= m_col;
3145
int y, t = 1 << col_and_7, m_col = (t << w) - t;
3148
int mask_id = (tx == TX_8X8);
3149
static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3150
int l2 = tx + ss_h - 1, step1d;
3151
int m_row = m_col & masks[l2];
3153
// at odd UV col/row edges tx16/tx32 loopfilter edges, force
3154
// 8wd loopfilter to prevent going off the visible edge.
3155
if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
3156
int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3157
int m_row_8 = m_row - m_row_16;
3159
for (y = row_and_7; y < h + row_and_7; y++) {
3160
mask[0][y][0] |= m_row_16;
3161
mask[0][y][1] |= m_row_8;
3164
for (y = row_and_7; y < h + row_and_7; y++)
3165
mask[0][y][mask_id] |= m_row;
3170
if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
3171
for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3172
mask[1][y][0] |= m_col;
3173
if (y - row_and_7 == h - 1)
3174
mask[1][y][1] |= m_col;
3176
for (y = row_and_7; y < h + row_and_7; y += step1d)
3177
mask[1][y][mask_id] |= m_col;
3179
} else if (tx != TX_4X4) {
3182
mask_id = (tx == TX_8X8) || (h == ss_v);
3183
mask[1][row_and_7][mask_id] |= m_col;
3184
mask_id = (tx == TX_8X8) || (w == ss_h);
3185
for (y = row_and_7; y < h + row_and_7; y++)
3186
mask[0][y][mask_id] |= t;
3188
int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
3190
for (y = row_and_7; y < h + row_and_7; y++) {
3191
mask[0][y][2] |= t4;
3192
mask[0][y][1] |= t8;
3194
mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3199
static void decode_b(AVCodecContext *ctx, int row, int col,
3200
struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3201
enum BlockLevel bl, enum BlockPartition bp)
3203
VP9Context *s = ctx->priv_data;
3205
enum BlockSize bs = bl * 3 + bp;
3206
int bytesperpixel = s->bytesperpixel;
3207
int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
3209
AVFrame *f = s->frames[CUR_FRAME].tf.f;
3215
s->min_mv.x = -(128 + col * 64);
3216
s->min_mv.y = -(128 + row * 64);
3217
s->max_mv.x = 128 + (s->cols - col - w4) * 64;
3218
s->max_mv.y = 128 + (s->rows - row - h4) * 64;
3224
b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
3225
(s->ss_v && h4 * 2 == (1 << b->tx)));
3230
if (bytesperpixel == 1) {
3231
has_coeffs = decode_coeffs_8bpp(ctx);
3233
has_coeffs = decode_coeffs_16bpp(ctx);
3235
if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
3237
memset(&s->above_skip_ctx[col], 1, w4);
3238
memset(&s->left_skip_ctx[s->row7], 1, h4);
3243
#define SPLAT_ZERO_CTX(v, n) \
3245
case 1: v = 0; break; \
3246
case 2: AV_ZERO16(&v); break; \
3247
case 4: AV_ZERO32(&v); break; \
3248
case 8: AV_ZERO64(&v); break; \
3249
case 16: AV_ZERO128(&v); break; \
3251
#define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3253
SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3254
if (s->ss_##dir2) { \
3255
SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3256
SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3258
SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3259
SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3264
case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3265
case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3266
case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3267
case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3270
case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3271
case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3272
case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3273
case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3278
s->block += w4 * h4 * 64 * bytesperpixel;
3279
s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3280
s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3281
s->eob += 4 * w4 * h4;
3282
s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3283
s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3289
// emulated overhangs if the stride of the target buffer can't hold. This
3290
// makes it possible to support emu-edge and so on even if we have large block
3292
emu[0] = (col + w4) * 8 > f->linesize[0] ||
3293
(row + h4) > s->rows;
3294
emu[1] = (col + w4) * 4 > f->linesize[1] ||
3295
(row + h4) > s->rows;
3297
s->dst[0] = s->tmp_y;
3300
s->dst[0] = f->data[0] + yoff;
3301
s->y_stride = f->linesize[0];
3304
s->dst[1] = s->tmp_uv[0];
3305
s->dst[2] = s->tmp_uv[1];
3308
s->dst[1] = f->data[1] + uvoff;
3309
s->dst[2] = f->data[2] + uvoff;
3310
s->uv_stride = f->linesize[1];
3314
intra_recon_16bpp(ctx, yoff, uvoff);
3316
intra_recon_8bpp(ctx, yoff, uvoff);
3320
inter_recon_16bpp(ctx);
3322
inter_recon_8bpp(ctx);
3326
int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3328
for (n = 0; o < w; n++) {
3333
s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3334
s->tmp_y + o, 128, h, 0, 0);
3335
o += bw * bytesperpixel;
3340
int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
3341
int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
3343
for (n = s->ss_h; o < w; n++) {
3348
s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3349
s->tmp_uv[0] + o, 128, h, 0, 0);
3350
s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3351
s->tmp_uv[1] + o, 128, h, 0, 0);
3352
o += bw * bytesperpixel;
3357
// pick filter level and find edges to apply filter to
3358
if (s->filter.level &&
3359
(lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3360
[b->mode[3] != ZEROMV]) > 0) {
3361
int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3362
int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3364
setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3365
mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3366
if (s->ss_h || s->ss_v)
3367
mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
3368
s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3369
s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3370
b->uvtx, skip_inter);
3372
if (!s->filter.lim_lut[lvl]) {
3373
int sharp = s->filter.sharpness;
3377
limit >>= (sharp + 3) >> 2;
3378
limit = FFMIN(limit, 9 - sharp);
3380
limit = FFMAX(limit, 1);
3382
s->filter.lim_lut[lvl] = limit;
3383
s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3389
s->block += w4 * h4 * 64 * bytesperpixel;
3390
s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3391
s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3392
s->eob += 4 * w4 * h4;
3393
s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3394
s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3398
static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3399
ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3401
VP9Context *s = ctx->priv_data;
3402
int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3403
(((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3404
const uint8_t *p = s->keyframe || s->intraonly ? vp9_default_kf_partition_probs[bl][c] :
3405
s->prob.p.partition[bl][c];
3406
enum BlockPartition bp;
3407
ptrdiff_t hbs = 4 >> bl;
3408
AVFrame *f = s->frames[CUR_FRAME].tf.f;
3409
ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3410
int bytesperpixel = s->bytesperpixel;
3413
bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3414
decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3415
} else if (col + hbs < s->cols) { // FIXME why not <=?
3416
if (row + hbs < s->rows) { // FIXME why not <=?
3417
bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3419
case PARTITION_NONE:
3420
decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3423
decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3424
yoff += hbs * 8 * y_stride;
3425
uvoff += hbs * 8 * uv_stride >> s->ss_v;
3426
decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3429
decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3430
yoff += hbs * 8 * bytesperpixel;
3431
uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3432
decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3434
case PARTITION_SPLIT:
3435
decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3436
decode_sb(ctx, row, col + hbs, lflvl,
3437
yoff + 8 * hbs * bytesperpixel,
3438
uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3439
yoff += hbs * 8 * y_stride;
3440
uvoff += hbs * 8 * uv_stride >> s->ss_v;
3441
decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3442
decode_sb(ctx, row + hbs, col + hbs, lflvl,
3443
yoff + 8 * hbs * bytesperpixel,
3444
uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3449
} else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3450
bp = PARTITION_SPLIT;
3451
decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3452
decode_sb(ctx, row, col + hbs, lflvl,
3453
yoff + 8 * hbs * bytesperpixel,
3454
uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3457
decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3459
} else if (row + hbs < s->rows) { // FIXME why not <=?
3460
if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3461
bp = PARTITION_SPLIT;
3462
decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3463
yoff += hbs * 8 * y_stride;
3464
uvoff += hbs * 8 * uv_stride >> s->ss_v;
3465
decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3468
decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3471
bp = PARTITION_SPLIT;
3472
decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3474
s->counts.partition[bl][c][bp]++;
3477
static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3478
ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3480
VP9Context *s = ctx->priv_data;
3482
ptrdiff_t hbs = 4 >> bl;
3483
AVFrame *f = s->frames[CUR_FRAME].tf.f;
3484
ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3485
int bytesperpixel = s->bytesperpixel;
3488
av_assert2(b->bl == BL_8X8);
3489
decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3490
} else if (s->b->bl == bl) {
3491
decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3492
if (b->bp == PARTITION_H && row + hbs < s->rows) {
3493
yoff += hbs * 8 * y_stride;
3494
uvoff += hbs * 8 * uv_stride >> s->ss_v;
3495
decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3496
} else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3497
yoff += hbs * 8 * bytesperpixel;
3498
uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3499
decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3502
decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3503
if (col + hbs < s->cols) { // FIXME why not <=?
3504
if (row + hbs < s->rows) {
3505
decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3506
uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3507
yoff += hbs * 8 * y_stride;
3508
uvoff += hbs * 8 * uv_stride >> s->ss_v;
3509
decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3510
decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3511
yoff + 8 * hbs * bytesperpixel,
3512
uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3514
yoff += hbs * 8 * bytesperpixel;
3515
uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3516
decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3518
} else if (row + hbs < s->rows) {
3519
yoff += hbs * 8 * y_stride;
3520
uvoff += hbs * 8 * uv_stride >> s->ss_v;
3521
decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3526
static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
3527
uint8_t *lvl, uint8_t (*mask)[4],
3528
uint8_t *dst, ptrdiff_t ls)
3530
int y, x, bytesperpixel = s->bytesperpixel;
3532
// filter edges between columns (e.g. block1 | block2)
3533
for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3534
uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
3535
unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3536
unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3537
unsigned hm = hm1 | hm2 | hm13 | hm23;
3539
for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3542
int L = *l, H = L >> 4;
3543
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3545
if (hmask1[0] & x) {
3546
if (hmask2[0] & x) {
3547
av_assert2(l[8 << ss_v] == L);
3548
s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
3550
s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
3552
} else if (hm2 & x) {
3555
E |= s->filter.mblim_lut[L] << 8;
3556
I |= s->filter.lim_lut[L] << 8;
3557
s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3559
[0](ptr, ls, E, I, H);
3561
s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3562
[0](ptr, ls, E, I, H);
3564
} else if (hm2 & x) {
3565
int L = l[8 << ss_v], H = L >> 4;
3566
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3568
s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3569
[0](ptr + 8 * ls, ls, E, I, H);
3577
int L = *l, H = L >> 4;
3578
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3583
E |= s->filter.mblim_lut[L] << 8;
3584
I |= s->filter.lim_lut[L] << 8;
3585
s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3587
s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3589
} else if (hm23 & x) {
3590
int L = l[8 << ss_v], H = L >> 4;
3591
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3593
s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
3601
static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
3602
uint8_t *lvl, uint8_t (*mask)[4],
3603
uint8_t *dst, ptrdiff_t ls)
3605
int y, x, bytesperpixel = s->bytesperpixel;
3608
// filter edges between rows (e.g. ------)
3610
for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3611
uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
3612
unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3614
for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3617
int L = *l, H = L >> 4;
3618
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3621
if (vmask[0] & (x << (1 + ss_h))) {
3622
av_assert2(l[1 + ss_h] == L);
3623
s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
3625
s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
3627
} else if (vm & (x << (1 + ss_h))) {
3630
E |= s->filter.mblim_lut[L] << 8;
3631
I |= s->filter.lim_lut[L] << 8;
3632
s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3633
[!!(vmask[1] & (x << (1 + ss_h)))]
3634
[1](ptr, ls, E, I, H);
3636
s->dsp.loop_filter_8[!!(vmask[1] & x)]
3637
[1](ptr, ls, E, I, H);
3639
} else if (vm & (x << (1 + ss_h))) {
3640
int L = l[1 + ss_h], H = L >> 4;
3641
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3643
s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
3644
[1](ptr + 8 * bytesperpixel, ls, E, I, H);
3649
int L = *l, H = L >> 4;
3650
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3652
if (vm3 & (x << (1 + ss_h))) {
3655
E |= s->filter.mblim_lut[L] << 8;
3656
I |= s->filter.lim_lut[L] << 8;
3657
s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
3659
s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
3661
} else if (vm3 & (x << (1 + ss_h))) {
3662
int L = l[1 + ss_h], H = L >> 4;
3663
int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3665
s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
3678
static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3679
int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3681
VP9Context *s = ctx->priv_data;
3682
AVFrame *f = s->frames[CUR_FRAME].tf.f;
3683
uint8_t *dst = f->data[0] + yoff;
3684
ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3685
uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
3688
// FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3689
// if you think of them as acting on a 8x8 block max, we can interleave
3690
// each v/h within the single x loop, but that only works if we work on
3691
// 8 pixel blocks, and we won't always do that (we want at least 16px
3692
// to use SSE2 optimizations, perhaps 32 for AVX2)
3694
filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
3695
filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
3697
for (p = 0; p < 2; p++) {
3698
dst = f->data[1 + p] + uvoff;
3699
filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
3700
filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
3704
static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3706
int sb_start = ( idx * n) >> log2_n;
3707
int sb_end = ((idx + 1) * n) >> log2_n;
3708
*start = FFMIN(sb_start, n) << 3;
3709
*end = FFMIN(sb_end, n) << 3;
3712
static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3713
int max_count, int update_factor)
3715
unsigned ct = ct0 + ct1, p2, p1;
3721
p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3722
p2 = av_clip(p2, 1, 255);
3723
ct = FFMIN(ct, max_count);
3724
update_factor = FASTDIV(update_factor * ct, max_count);
3726
// (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3727
*p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3730
static void adapt_probs(VP9Context *s)
3733
prob_context *p = &s->prob_ctx[s->framectxid].p;
3734
int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3737
for (i = 0; i < 4; i++)
3738
for (j = 0; j < 2; j++)
3739
for (k = 0; k < 2; k++)
3740
for (l = 0; l < 6; l++)
3741
for (m = 0; m < 6; m++) {
3742
uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3743
unsigned *e = s->counts.eob[i][j][k][l][m];
3744
unsigned *c = s->counts.coef[i][j][k][l][m];
3746
if (l == 0 && m >= 3) // dc only has 3 pt
3749
adapt_prob(&pp[0], e[0], e[1], 24, uf);
3750
adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3751
adapt_prob(&pp[2], c[1], c[2], 24, uf);
3754
if (s->keyframe || s->intraonly) {
3755
memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3756
memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3757
memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3758
memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3763
for (i = 0; i < 3; i++)
3764
adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3767
for (i = 0; i < 4; i++)
3768
adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3771
if (s->comppredmode == PRED_SWITCHABLE) {
3772
for (i = 0; i < 5; i++)
3773
adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3777
if (s->comppredmode != PRED_SINGLEREF) {
3778
for (i = 0; i < 5; i++)
3779
adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3780
s->counts.comp_ref[i][1], 20, 128);
3783
if (s->comppredmode != PRED_COMPREF) {
3784
for (i = 0; i < 5; i++) {
3785
uint8_t *pp = p->single_ref[i];
3786
unsigned (*c)[2] = s->counts.single_ref[i];
3788
adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3789
adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3793
// block partitioning
3794
for (i = 0; i < 4; i++)
3795
for (j = 0; j < 4; j++) {
3796
uint8_t *pp = p->partition[i][j];
3797
unsigned *c = s->counts.partition[i][j];
3799
adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3800
adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3801
adapt_prob(&pp[2], c[2], c[3], 20, 128);
3805
if (s->txfmmode == TX_SWITCHABLE) {
3806
for (i = 0; i < 2; i++) {
3807
unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3809
adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3810
adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3811
adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3812
adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3813
adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3814
adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3818
// interpolation filter
3819
if (s->filtermode == FILTER_SWITCHABLE) {
3820
for (i = 0; i < 4; i++) {
3821
uint8_t *pp = p->filter[i];
3822
unsigned *c = s->counts.filter[i];
3824
adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3825
adapt_prob(&pp[1], c[1], c[2], 20, 128);
3830
for (i = 0; i < 7; i++) {
3831
uint8_t *pp = p->mv_mode[i];
3832
unsigned *c = s->counts.mv_mode[i];
3834
adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3835
adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3836
adapt_prob(&pp[2], c[1], c[3], 20, 128);
3841
uint8_t *pp = p->mv_joint;
3842
unsigned *c = s->counts.mv_joint;
3844
adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3845
adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3846
adapt_prob(&pp[2], c[2], c[3], 20, 128);
3850
for (i = 0; i < 2; i++) {
3852
unsigned *c, (*c2)[2], sum;
3854
adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3855
s->counts.mv_comp[i].sign[1], 20, 128);
3857
pp = p->mv_comp[i].classes;
3858
c = s->counts.mv_comp[i].classes;
3859
sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3860
adapt_prob(&pp[0], c[0], sum, 20, 128);
3862
adapt_prob(&pp[1], c[1], sum, 20, 128);
3864
adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3865
adapt_prob(&pp[3], c[2], c[3], 20, 128);
3867
adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3868
adapt_prob(&pp[5], c[4], c[5], 20, 128);
3870
adapt_prob(&pp[6], c[6], sum, 20, 128);
3871
adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3872
adapt_prob(&pp[8], c[7], c[8], 20, 128);
3873
adapt_prob(&pp[9], c[9], c[10], 20, 128);
3875
adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3876
s->counts.mv_comp[i].class0[1], 20, 128);
3877
pp = p->mv_comp[i].bits;
3878
c2 = s->counts.mv_comp[i].bits;
3879
for (j = 0; j < 10; j++)
3880
adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3882
for (j = 0; j < 2; j++) {
3883
pp = p->mv_comp[i].class0_fp[j];
3884
c = s->counts.mv_comp[i].class0_fp[j];
3885
adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3886
adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3887
adapt_prob(&pp[2], c[2], c[3], 20, 128);
3889
pp = p->mv_comp[i].fp;
3890
c = s->counts.mv_comp[i].fp;
3891
adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3892
adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3893
adapt_prob(&pp[2], c[2], c[3], 20, 128);
3895
if (s->highprecisionmvs) {
3896
adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3897
s->counts.mv_comp[i].class0_hp[1], 20, 128);
3898
adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3899
s->counts.mv_comp[i].hp[1], 20, 128);
3904
for (i = 0; i < 4; i++) {
3905
uint8_t *pp = p->y_mode[i];
3906
unsigned *c = s->counts.y_mode[i], sum, s2;
3908
sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3909
adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3910
sum -= c[TM_VP8_PRED];
3911
adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3912
sum -= c[VERT_PRED];
3913
adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3914
s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3916
adapt_prob(&pp[3], s2, sum, 20, 128);
3918
adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3919
adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3920
sum -= c[DIAG_DOWN_LEFT_PRED];
3921
adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3922
sum -= c[VERT_LEFT_PRED];
3923
adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3924
adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3928
for (i = 0; i < 10; i++) {
3929
uint8_t *pp = p->uv_mode[i];
3930
unsigned *c = s->counts.uv_mode[i], sum, s2;
3932
sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3933
adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3934
sum -= c[TM_VP8_PRED];
3935
adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3936
sum -= c[VERT_PRED];
3937
adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3938
s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3940
adapt_prob(&pp[3], s2, sum, 20, 128);
3942
adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3943
adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3944
sum -= c[DIAG_DOWN_LEFT_PRED];
3945
adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3946
sum -= c[VERT_LEFT_PRED];
3947
adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3948
adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3952
static void free_buffers(VP9Context *s)
3954
av_freep(&s->intra_pred_data[0]);
3955
av_freep(&s->b_base);
3956
av_freep(&s->block_base);
3959
static av_cold int vp9_decode_free(AVCodecContext *ctx)
3961
VP9Context *s = ctx->priv_data;
3964
for (i = 0; i < 3; i++) {
3965
if (s->frames[i].tf.f->data[0])
3966
vp9_unref_frame(ctx, &s->frames[i]);
3967
av_frame_free(&s->frames[i].tf.f);
3969
for (i = 0; i < 8; i++) {
3970
if (s->refs[i].f->data[0])
3971
ff_thread_release_buffer(ctx, &s->refs[i]);
3972
av_frame_free(&s->refs[i].f);
3973
if (s->next_refs[i].f->data[0])
3974
ff_thread_release_buffer(ctx, &s->next_refs[i]);
3975
av_frame_free(&s->next_refs[i].f);
3985
static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3986
int *got_frame, AVPacket *pkt)
3988
const uint8_t *data = pkt->data;
3989
int size = pkt->size;
3990
VP9Context *s = ctx->priv_data;
3991
int res, tile_row, tile_col, i, ref, row, col;
3992
int retain_segmap_ref = s->segmentation.enabled && !s->segmentation.update_map
3993
&& s->frames[REF_FRAME_SEGMAP].segmentation_map;
3994
ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3998
if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
4000
} else if (res == 0) {
4001
if (!s->refs[ref].f->data[0]) {
4002
av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
4003
return AVERROR_INVALIDDATA;
4005
if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
4007
((AVFrame *)frame)->pkt_pts = pkt->pts;
4008
((AVFrame *)frame)->pkt_dts = pkt->dts;
4009
for (i = 0; i < 8; i++) {
4010
if (s->next_refs[i].f->data[0])
4011
ff_thread_release_buffer(ctx, &s->next_refs[i]);
4012
if (s->refs[i].f->data[0] &&
4013
(res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i])) < 0)
4022
if (!retain_segmap_ref) {
4023
if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
4024
vp9_unref_frame(ctx, &s->frames[REF_FRAME_SEGMAP]);
4025
if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4026
(res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
4029
if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
4030
vp9_unref_frame(ctx, &s->frames[REF_FRAME_MVPAIR]);
4031
if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4032
(res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
4034
if (s->frames[CUR_FRAME].tf.f->data[0])
4035
vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
4036
if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
4038
f = s->frames[CUR_FRAME].tf.f;
4039
f->key_frame = s->keyframe;
4040
f->pict_type = (s->keyframe || s->intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
4041
ls_y = f->linesize[0];
4042
ls_uv =f->linesize[1];
4045
for (i = 0; i < 8; i++) {
4046
if (s->next_refs[i].f->data[0])
4047
ff_thread_release_buffer(ctx, &s->next_refs[i]);
4048
if (s->refreshrefmask & (1 << i)) {
4049
res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
4050
} else if (s->refs[i].f->data[0]) {
4051
res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
4057
// main tile decode loop
4058
bytesperpixel = s->bytesperpixel;
4059
memset(s->above_partition_ctx, 0, s->cols);
4060
memset(s->above_skip_ctx, 0, s->cols);
4061
if (s->keyframe || s->intraonly) {
4062
memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
4064
memset(s->above_mode_ctx, NEARESTMV, s->cols);
4066
memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
4067
memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
4068
memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
4069
memset(s->above_segpred_ctx, 0, s->cols);
4070
s->pass = s->frames[CUR_FRAME].uses_2pass =
4071
ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
4072
if ((res = update_block_buffers(ctx)) < 0) {
4073
av_log(ctx, AV_LOG_ERROR,
4074
"Failed to allocate block buffers\n");
4077
if (s->refreshctx && s->parallelmode) {
4080
for (i = 0; i < 4; i++) {
4081
for (j = 0; j < 2; j++)
4082
for (k = 0; k < 2; k++)
4083
for (l = 0; l < 6; l++)
4084
for (m = 0; m < 6; m++)
4085
memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
4086
s->prob.coef[i][j][k][l][m], 3);
4087
if (s->txfmmode == i)
4090
s->prob_ctx[s->framectxid].p = s->prob.p;
4091
ff_thread_finish_setup(ctx);
4092
} else if (!s->refreshctx) {
4093
ff_thread_finish_setup(ctx);
4099
s->block = s->block_base;
4100
s->uvblock[0] = s->uvblock_base[0];
4101
s->uvblock[1] = s->uvblock_base[1];
4102
s->eob = s->eob_base;
4103
s->uveob[0] = s->uveob_base[0];
4104
s->uveob[1] = s->uveob_base[1];
4106
for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
4107
set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
4108
tile_row, s->tiling.log2_tile_rows, s->sb_rows);
4110
for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4113
if (tile_col == s->tiling.tile_cols - 1 &&
4114
tile_row == s->tiling.tile_rows - 1) {
4117
tile_size = AV_RB32(data);
4121
if (tile_size > size) {
4122
ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4123
return AVERROR_INVALIDDATA;
4125
ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
4126
if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
4127
ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4128
return AVERROR_INVALIDDATA;
4135
for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
4136
row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
4137
struct VP9Filter *lflvl_ptr = s->lflvl;
4138
ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
4140
for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4141
set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
4142
tile_col, s->tiling.log2_tile_cols, s->sb_cols);
4145
memset(s->left_partition_ctx, 0, 8);
4146
memset(s->left_skip_ctx, 0, 8);
4147
if (s->keyframe || s->intraonly) {
4148
memset(s->left_mode_ctx, DC_PRED, 16);
4150
memset(s->left_mode_ctx, NEARESTMV, 8);
4152
memset(s->left_y_nnz_ctx, 0, 16);
4153
memset(s->left_uv_nnz_ctx, 0, 32);
4154
memset(s->left_segpred_ctx, 0, 8);
4156
memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
4159
for (col = s->tiling.tile_col_start;
4160
col < s->tiling.tile_col_end;
4161
col += 8, yoff2 += 64 * bytesperpixel,
4162
uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4163
// FIXME integrate with lf code (i.e. zero after each
4164
// use, similar to invtxfm coefficients, or similar)
4166
memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
4170
decode_sb_mem(ctx, row, col, lflvl_ptr,
4171
yoff2, uvoff2, BL_64X64);
4173
decode_sb(ctx, row, col, lflvl_ptr,
4174
yoff2, uvoff2, BL_64X64);
4178
memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
4186
// backup pre-loopfilter reconstruction data for intra
4187
// prediction of next row of sb64s
4188
if (row + 8 < s->rows) {
4189
memcpy(s->intra_pred_data[0],
4190
f->data[0] + yoff + 63 * ls_y,
4191
8 * s->cols * bytesperpixel);
4192
memcpy(s->intra_pred_data[1],
4193
f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4194
8 * s->cols * bytesperpixel >> s->ss_h);
4195
memcpy(s->intra_pred_data[2],
4196
f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4197
8 * s->cols * bytesperpixel >> s->ss_h);
4200
// loopfilter one row
4201
if (s->filter.level) {
4204
lflvl_ptr = s->lflvl;
4205
for (col = 0; col < s->cols;
4206
col += 8, yoff2 += 64 * bytesperpixel,
4207
uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4208
loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
4212
// FIXME maybe we can make this more finegrained by running the
4213
// loopfilter per-block instead of after each sbrow
4214
// In fact that would also make intra pred left preparation easier?
4215
ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
4219
if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
4221
ff_thread_finish_setup(ctx);
4223
} while (s->pass++ == 1);
4224
ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4227
for (i = 0; i < 8; i++) {
4228
if (s->refs[i].f->data[0])
4229
ff_thread_release_buffer(ctx, &s->refs[i]);
4230
ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
4233
if (!s->invisible) {
4234
if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
4242
static void vp9_decode_flush(AVCodecContext *ctx)
4244
VP9Context *s = ctx->priv_data;
4247
for (i = 0; i < 3; i++)
4248
vp9_unref_frame(ctx, &s->frames[i]);
4249
for (i = 0; i < 8; i++)
4250
ff_thread_release_buffer(ctx, &s->refs[i]);
4253
static int init_frames(AVCodecContext *ctx)
4255
VP9Context *s = ctx->priv_data;
4258
for (i = 0; i < 3; i++) {
4259
s->frames[i].tf.f = av_frame_alloc();
4260
if (!s->frames[i].tf.f) {
4261
vp9_decode_free(ctx);
4262
av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4263
return AVERROR(ENOMEM);
4266
for (i = 0; i < 8; i++) {
4267
s->refs[i].f = av_frame_alloc();
4268
s->next_refs[i].f = av_frame_alloc();
4269
if (!s->refs[i].f || !s->next_refs[i].f) {
4270
vp9_decode_free(ctx);
4271
av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4272
return AVERROR(ENOMEM);
4279
static av_cold int vp9_decode_init(AVCodecContext *ctx)
4281
VP9Context *s = ctx->priv_data;
4283
ctx->internal->allocate_progress = 1;
4285
s->filter.sharpness = -1;
4287
return init_frames(ctx);
4290
static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4292
return init_frames(avctx);
4295
static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4298
VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4300
// detect size changes in other threads
4301
if (s->intra_pred_data[0] &&
4302
(!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4306
for (i = 0; i < 3; i++) {
4307
if (s->frames[i].tf.f->data[0])
4308
vp9_unref_frame(dst, &s->frames[i]);
4309
if (ssrc->frames[i].tf.f->data[0]) {
4310
if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4314
for (i = 0; i < 8; i++) {
4315
if (s->refs[i].f->data[0])
4316
ff_thread_release_buffer(dst, &s->refs[i]);
4317
if (ssrc->next_refs[i].f->data[0]) {
4318
if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4323
s->invisible = ssrc->invisible;
4324
s->keyframe = ssrc->keyframe;
4325
s->ss_v = ssrc->ss_v;
4326
s->ss_h = ssrc->ss_h;
4327
s->segmentation.enabled = ssrc->segmentation.enabled;
4328
s->segmentation.update_map = ssrc->segmentation.update_map;
4329
s->bytesperpixel = ssrc->bytesperpixel;
4331
s->bpp_index = ssrc->bpp_index;
4332
memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4333
memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4334
if (ssrc->segmentation.enabled) {
4335
memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4336
sizeof(s->segmentation.feat));
4342
static const AVProfile profiles[] = {
4343
{ FF_PROFILE_VP9_0, "Profile 0" },
4344
{ FF_PROFILE_VP9_1, "Profile 1" },
4345
{ FF_PROFILE_VP9_2, "Profile 2" },
4346
{ FF_PROFILE_VP9_3, "Profile 3" },
4347
{ FF_PROFILE_UNKNOWN },
4350
AVCodec ff_vp9_decoder = {
4352
.long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4353
.type = AVMEDIA_TYPE_VIDEO,
4354
.id = AV_CODEC_ID_VP9,
4355
.priv_data_size = sizeof(VP9Context),
4356
.init = vp9_decode_init,
4357
.close = vp9_decode_free,
4358
.decode = vp9_decode_frame,
4359
.capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4360
.flush = vp9_decode_flush,
4361
.init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4362
.update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
4363
.profiles = NULL_IF_CONFIG_SMALL(profiles),