24
25
* VP8 compatible video decoder
28
#include "libavutil/common.h"
28
31
#include "vp8dsp.h"
29
#include "libavutil/common.h"
33
#define MK_IDCT_DC_ADD4_C(name) \
34
static void name ## _idct_dc_add4uv_c(uint8_t *dst, int16_t block[4][16], \
37
name ## _idct_dc_add_c(dst + stride * 0 + 0, block[0], stride); \
38
name ## _idct_dc_add_c(dst + stride * 0 + 4, block[1], stride); \
39
name ## _idct_dc_add_c(dst + stride * 4 + 0, block[2], stride); \
40
name ## _idct_dc_add_c(dst + stride * 4 + 4, block[3], stride); \
43
static void name ## _idct_dc_add4y_c(uint8_t *dst, int16_t block[4][16], \
46
name ## _idct_dc_add_c(dst + 0, block[0], stride); \
47
name ## _idct_dc_add_c(dst + 4, block[1], stride); \
48
name ## _idct_dc_add_c(dst + 8, block[2], stride); \
49
name ## _idct_dc_add_c(dst + 12, block[3], stride); \
52
#if CONFIG_VP7_DECODER
53
static void vp7_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16])
55
int i, a1, b1, c1, d1;
58
for (i = 0; i < 4; i++) {
59
a1 = (dc[i * 4 + 0] + dc[i * 4 + 2]) * 23170;
60
b1 = (dc[i * 4 + 0] - dc[i * 4 + 2]) * 23170;
61
c1 = dc[i * 4 + 1] * 12540 - dc[i * 4 + 3] * 30274;
62
d1 = dc[i * 4 + 1] * 30274 + dc[i * 4 + 3] * 12540;
63
tmp[i * 4 + 0] = (a1 + d1) >> 14;
64
tmp[i * 4 + 3] = (a1 - d1) >> 14;
65
tmp[i * 4 + 1] = (b1 + c1) >> 14;
66
tmp[i * 4 + 2] = (b1 - c1) >> 14;
69
for (i = 0; i < 4; i++) {
70
a1 = (tmp[i + 0] + tmp[i + 8]) * 23170;
71
b1 = (tmp[i + 0] - tmp[i + 8]) * 23170;
72
c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274;
73
d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540;
78
block[0][i][0] = (a1 + d1 + 0x20000) >> 18;
79
block[3][i][0] = (a1 - d1 + 0x20000) >> 18;
80
block[1][i][0] = (b1 + c1 + 0x20000) >> 18;
81
block[2][i][0] = (b1 - c1 + 0x20000) >> 18;
85
static void vp7_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16])
87
int i, val = (23170 * (23170 * dc[0] >> 14) + 0x20000) >> 18;
90
for (i = 0; i < 4; i++) {
98
static void vp7_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
100
int i, a1, b1, c1, d1;
103
for (i = 0; i < 4; i++) {
104
a1 = (block[i * 4 + 0] + block[i * 4 + 2]) * 23170;
105
b1 = (block[i * 4 + 0] - block[i * 4 + 2]) * 23170;
106
c1 = block[i * 4 + 1] * 12540 - block[i * 4 + 3] * 30274;
107
d1 = block[i * 4 + 1] * 30274 + block[i * 4 + 3] * 12540;
108
block[i * 4 + 0] = 0;
109
block[i * 4 + 1] = 0;
110
block[i * 4 + 2] = 0;
111
block[i * 4 + 3] = 0;
112
tmp[i * 4 + 0] = (a1 + d1) >> 14;
113
tmp[i * 4 + 3] = (a1 - d1) >> 14;
114
tmp[i * 4 + 1] = (b1 + c1) >> 14;
115
tmp[i * 4 + 2] = (b1 - c1) >> 14;
118
for (i = 0; i < 4; i++) {
119
a1 = (tmp[i + 0] + tmp[i + 8]) * 23170;
120
b1 = (tmp[i + 0] - tmp[i + 8]) * 23170;
121
c1 = tmp[i + 4] * 12540 - tmp[i + 12] * 30274;
122
d1 = tmp[i + 4] * 30274 + tmp[i + 12] * 12540;
123
dst[0 * stride + i] = av_clip_uint8(dst[0 * stride + i] +
124
((a1 + d1 + 0x20000) >> 18));
125
dst[3 * stride + i] = av_clip_uint8(dst[3 * stride + i] +
126
((a1 - d1 + 0x20000) >> 18));
127
dst[1 * stride + i] = av_clip_uint8(dst[1 * stride + i] +
128
((b1 + c1 + 0x20000) >> 18));
129
dst[2 * stride + i] = av_clip_uint8(dst[2 * stride + i] +
130
((b1 - c1 + 0x20000) >> 18));
134
static void vp7_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
136
int i, dc = (23170 * (23170 * block[0] >> 14) + 0x20000) >> 18;
139
for (i = 0; i < 4; i++) {
140
dst[0] = av_clip_uint8(dst[0] + dc);
141
dst[1] = av_clip_uint8(dst[1] + dc);
142
dst[2] = av_clip_uint8(dst[2] + dc);
143
dst[3] = av_clip_uint8(dst[3] + dc);
148
MK_IDCT_DC_ADD4_C(vp7)
149
#endif /* CONFIG_VP7_DECODER */
31
151
// TODO: Maybe add dequant
152
#if CONFIG_VP8_DECODER
32
153
static void vp8_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16])
34
155
int i, t0, t1, t2, t3;
36
157
for (i = 0; i < 4; i++) {
37
t0 = dc[0*4+i] + dc[3*4+i];
38
t1 = dc[1*4+i] + dc[2*4+i];
39
t2 = dc[1*4+i] - dc[2*4+i];
40
t3 = dc[0*4+i] - dc[3*4+i];
158
t0 = dc[0 * 4 + i] + dc[3 * 4 + i];
159
t1 = dc[1 * 4 + i] + dc[2 * 4 + i];
160
t2 = dc[1 * 4 + i] - dc[2 * 4 + i];
161
t3 = dc[0 * 4 + i] - dc[3 * 4 + i];
163
dc[0 * 4 + i] = t0 + t1;
164
dc[1 * 4 + i] = t3 + t2;
165
dc[2 * 4 + i] = t0 - t1;
166
dc[3 * 4 + i] = t3 - t2;
48
169
for (i = 0; i < 4; i++) {
49
t0 = dc[i*4+0] + dc[i*4+3] + 3; // rounding
50
t1 = dc[i*4+1] + dc[i*4+2];
51
t2 = dc[i*4+1] - dc[i*4+2];
52
t3 = dc[i*4+0] - dc[i*4+3] + 3; // rounding
170
t0 = dc[i * 4 + 0] + dc[i * 4 + 3] + 3; // rounding
171
t1 = dc[i * 4 + 1] + dc[i * 4 + 2];
172
t2 = dc[i * 4 + 1] - dc[i * 4 + 2];
173
t3 = dc[i * 4 + 0] - dc[i * 4 + 3] + 3; // rounding
58
179
block[i][0][0] = (t0 + t1) >> 3;
59
180
block[i][1][0] = (t3 + t2) >> 3;
172
281
// We deviate from the spec here with c(a+3) >> 3
173
282
// since that's what libvpx does.
174
f1 = FFMIN(a+4, 127) >> 3;
175
f2 = FFMIN(a+3, 127) >> 3;
283
f1 = FFMIN(a + 4, 127) >> 3;
286
f2 = f1 - ((a & 7) == 4);
288
f2 = FFMIN(a + 3, 127) >> 3;
177
290
// Despite what the spec says, we do need to clamp here to
178
291
// be bitexact with libvpx.
179
p[-1*stride] = cm[p0 + f2];
180
p[ 0*stride] = cm[q0 - f1];
292
p[-1 * stride] = cm[p0 + f2];
293
p[ 0 * stride] = cm[q0 - f1];
182
295
// only used for _inner on blocks without high edge variance
185
p[-2*stride] = cm[p1 + a];
186
p[ 1*stride] = cm[q1 - a];
298
p[-2 * stride] = cm[p1 + a];
299
p[ 1 * stride] = cm[q1 - a];
190
static av_always_inline int simple_limit(uint8_t *p, ptrdiff_t stride, int flim)
193
return 2*FFABS(p0-q0) + (FFABS(p1-q1) >> 1) <= flim;
303
static av_always_inline void vp7_filter_common(uint8_t *p, ptrdiff_t stride,
306
filter_common(p, stride, is4tap, IS_VP7);
309
static av_always_inline void vp8_filter_common(uint8_t *p, ptrdiff_t stride,
312
filter_common(p, stride, is4tap, IS_VP8);
315
static av_always_inline int vp7_simple_limit(uint8_t *p, ptrdiff_t stride,
319
return FFABS(p0 - q0) <= flim;
322
static av_always_inline int vp8_simple_limit(uint8_t *p, ptrdiff_t stride,
326
return 2 * FFABS(p0 - q0) + (FFABS(p1 - q1) >> 1) <= flim;
197
330
* E - limit at the macroblock edge
198
331
* I - limit for interior difference
200
static av_always_inline int normal_limit(uint8_t *p, ptrdiff_t stride, int E, int I)
203
return simple_limit(p, stride, E)
204
&& FFABS(p3-p2) <= I && FFABS(p2-p1) <= I && FFABS(p1-p0) <= I
205
&& FFABS(q3-q2) <= I && FFABS(q2-q1) <= I && FFABS(q1-q0) <= I;
333
#define NORMAL_LIMIT(vpn) \
334
static av_always_inline int vp ## vpn ## _normal_limit(uint8_t *p, \
339
return vp ## vpn ## _simple_limit(p, stride, E) && \
340
FFABS(p3 - p2) <= I && FFABS(p2 - p1) <= I && \
341
FFABS(p1 - p0) <= I && FFABS(q3 - q2) <= I && \
342
FFABS(q2 - q1) <= I && FFABS(q1 - q0) <= I; \
208
348
// high edge variance
209
349
static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
212
return FFABS(p1-p0) > thresh || FFABS(q1-q0) > thresh;
352
return FFABS(p1 - p0) > thresh || FFABS(q1 - q0) > thresh;
215
355
static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride)
217
357
int a0, a1, a2, w;
218
const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
358
const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
222
w = clip_int8(p1-q1);
223
w = clip_int8(w + 3*(q0-p0));
225
a0 = (27*w + 63) >> 7;
226
a1 = (18*w + 63) >> 7;
227
a2 = ( 9*w + 63) >> 7;
229
p[-3*stride] = cm[p2 + a2];
230
p[-2*stride] = cm[p1 + a1];
231
p[-1*stride] = cm[p0 + a0];
232
p[ 0*stride] = cm[q0 - a0];
233
p[ 1*stride] = cm[q1 - a1];
234
p[ 2*stride] = cm[q2 - a2];
237
#define LOOP_FILTER(dir, size, stridea, strideb, maybe_inline) \
238
static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, ptrdiff_t stride,\
239
int flim_E, int flim_I, int hev_thresh)\
243
for (i = 0; i < size; i++)\
244
if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\
245
if (hev(dst+i*stridea, strideb, hev_thresh))\
246
filter_common(dst+i*stridea, strideb, 1);\
248
filter_mbedge(dst+i*stridea, strideb);\
252
static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, ptrdiff_t stride,\
253
int flim_E, int flim_I, int hev_thresh)\
257
for (i = 0; i < size; i++)\
258
if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\
259
int hv = hev(dst+i*stridea, strideb, hev_thresh);\
261
filter_common(dst+i*stridea, strideb, 1);\
263
filter_common(dst+i*stridea, strideb, 0);\
267
LOOP_FILTER(v, 16, 1, stride,)
268
LOOP_FILTER(h, 16, stride, 1,)
270
#define UV_LOOP_FILTER(dir, stridea, strideb) \
271
LOOP_FILTER(dir, 8, stridea, strideb, av_always_inline) \
272
static void vp8_ ## dir ## _loop_filter8uv_c(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,\
273
int fE, int fI, int hev_thresh)\
275
vp8_ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh);\
276
vp8_ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh);\
278
static void vp8_ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,\
279
int fE, int fI, int hev_thresh)\
281
vp8_ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, hev_thresh);\
282
vp8_ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, hev_thresh);\
285
UV_LOOP_FILTER(v, 1, stride)
286
UV_LOOP_FILTER(h, stride, 1)
288
static void vp8_v_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, int flim)
292
for (i = 0; i < 16; i++)
293
if (simple_limit(dst+i, stride, flim))
294
filter_common(dst+i, stride, 1);
297
static void vp8_h_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, int flim)
301
for (i = 0; i < 16; i++)
302
if (simple_limit(dst+i*stride, 1, flim))
303
filter_common(dst+i*stride, 1, 1);
362
w = clip_int8(p1 - q1);
363
w = clip_int8(w + 3 * (q0 - p0));
365
a0 = (27 * w + 63) >> 7;
366
a1 = (18 * w + 63) >> 7;
367
a2 = (9 * w + 63) >> 7;
369
p[-3 * stride] = cm[p2 + a2];
370
p[-2 * stride] = cm[p1 + a1];
371
p[-1 * stride] = cm[p0 + a0];
372
p[ 0 * stride] = cm[q0 - a0];
373
p[ 1 * stride] = cm[q1 - a1];
374
p[ 2 * stride] = cm[q2 - a2];
377
#define LOOP_FILTER(vpn, dir, size, stridea, strideb, maybe_inline) \
378
static maybe_inline \
379
void vpn ## _ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, \
381
int flim_E, int flim_I, \
385
for (i = 0; i < size; i++) \
386
if (vpn ## _normal_limit(dst + i * stridea, strideb, \
388
if (hev(dst + i * stridea, strideb, hev_thresh)) \
389
vpn ## _filter_common(dst + i * stridea, strideb, 1); \
391
filter_mbedge(dst + i * stridea, strideb); \
395
static maybe_inline \
396
void vpn ## _ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, \
403
for (i = 0; i < size; i++) \
404
if (vpn ## _normal_limit(dst + i * stridea, strideb, \
406
int hv = hev(dst + i * stridea, strideb, hev_thresh); \
408
vpn ## _filter_common(dst + i * stridea, strideb, 1); \
410
vpn ## _filter_common(dst + i * stridea, strideb, 0); \
414
#define UV_LOOP_FILTER(vpn, dir, stridea, strideb) \
415
LOOP_FILTER(vpn, dir, 8, stridea, strideb, av_always_inline) \
416
static void vpn ## _ ## dir ## _loop_filter8uv_c(uint8_t *dstU, \
418
ptrdiff_t stride, int fE, \
419
int fI, int hev_thresh) \
421
vpn ## _ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh); \
422
vpn ## _ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh); \
425
static void vpn ## _ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, \
431
vpn ## _ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, \
433
vpn ## _ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, \
437
#define LOOP_FILTER_SIMPLE(vpn) \
438
static void vpn ## _v_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, \
442
for (i = 0; i < 16; i++) \
443
if (vpn ## _simple_limit(dst + i, stride, flim)) \
444
vpn ## _filter_common(dst + i, stride, 1); \
447
static void vpn ## _h_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, \
451
for (i = 0; i < 16; i++) \
452
if (vpn ## _simple_limit(dst + i * stride, 1, flim)) \
453
vpn ## _filter_common(dst + i * stride, 1, 1); \
456
#define LOOP_FILTERS(vpn) \
457
LOOP_FILTER(vpn, v, 16, 1, stride, ) \
458
LOOP_FILTER(vpn, h, 16, stride, 1, ) \
459
UV_LOOP_FILTER(vpn, v, 1, stride) \
460
UV_LOOP_FILTER(vpn, h, stride, 1) \
461
LOOP_FILTER_SIMPLE(vpn) \
306
463
static const uint8_t subpel_filters[7][6] = {
307
{ 0, 6, 123, 12, 1, 0 },
308
{ 2, 11, 108, 36, 8, 1 },
309
{ 0, 9, 93, 50, 6, 0 },
310
{ 3, 16, 77, 77, 16, 3 },
311
{ 0, 6, 50, 93, 9, 0 },
312
{ 1, 8, 36, 108, 11, 2 },
313
{ 0, 1, 12, 123, 6, 0 },
464
{ 0, 6, 123, 12, 1, 0 },
465
{ 2, 11, 108, 36, 8, 1 },
466
{ 0, 9, 93, 50, 6, 0 },
467
{ 3, 16, 77, 77, 16, 3 },
468
{ 0, 6, 50, 93, 9, 0 },
469
{ 1, 8, 36, 108, 11, 2 },
470
{ 0, 1, 12, 123, 6, 0 },
316
#define PUT_PIXELS(WIDTH) \
317
static void put_vp8_pixels ## WIDTH ##_c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y) { \
319
for (i = 0; i < h; i++, dst+= dststride, src+= srcstride) { \
320
memcpy(dst, src, WIDTH); \
473
#define PUT_PIXELS(WIDTH) \
474
static void put_vp8_pixels ## WIDTH ## _c(uint8_t *dst, ptrdiff_t dststride, \
475
uint8_t *src, ptrdiff_t srcstride, \
476
int h, int x, int y) \
479
for (i = 0; i < h; i++, dst += dststride, src += srcstride) \
480
memcpy(dst, src, WIDTH); \
328
#define FILTER_6TAP(src, F, stride) \
329
cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + F[0]*src[x-2*stride] + \
330
F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + F[5]*src[x+3*stride] + 64) >> 7]
332
#define FILTER_4TAP(src, F, stride) \
333
cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + \
334
F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + 64) >> 7]
336
#define VP8_EPEL_H(SIZE, TAPS) \
337
static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \
339
const uint8_t *filter = subpel_filters[mx-1]; \
340
const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \
343
for (y = 0; y < h; y++) { \
344
for (x = 0; x < SIZE; x++) \
345
dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \
350
#define VP8_EPEL_V(SIZE, TAPS) \
351
static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \
353
const uint8_t *filter = subpel_filters[my-1]; \
354
const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \
357
for (y = 0; y < h; y++) { \
358
for (x = 0; x < SIZE; x++) \
359
dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \
364
#define VP8_EPEL_HV(SIZE, HTAPS, VTAPS) \
365
static void put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \
367
const uint8_t *filter = subpel_filters[mx-1]; \
368
const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \
370
uint8_t tmp_array[(2*SIZE+VTAPS-1)*SIZE]; \
371
uint8_t *tmp = tmp_array; \
372
src -= (2-(VTAPS==4))*srcstride; \
374
for (y = 0; y < h+VTAPS-1; y++) { \
375
for (x = 0; x < SIZE; x++) \
376
tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \
381
tmp = tmp_array + (2-(VTAPS==4))*SIZE; \
382
filter = subpel_filters[my-1]; \
384
for (y = 0; y < h; y++) { \
385
for (x = 0; x < SIZE; x++) \
386
dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \
487
#define FILTER_6TAP(src, F, stride) \
488
cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \
489
F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] - \
490
F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7]
492
#define FILTER_4TAP(src, F, stride) \
493
cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \
494
F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7]
496
#define VP8_EPEL_H(SIZE, TAPS) \
497
static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst, \
498
ptrdiff_t dststride, \
500
ptrdiff_t srcstride, \
501
int h, int mx, int my) \
503
const uint8_t *filter = subpel_filters[mx - 1]; \
504
const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
506
for (y = 0; y < h; y++) { \
507
for (x = 0; x < SIZE; x++) \
508
dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \
514
#define VP8_EPEL_V(SIZE, TAPS) \
515
static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst, \
516
ptrdiff_t dststride, \
518
ptrdiff_t srcstride, \
519
int h, int mx, int my) \
521
const uint8_t *filter = subpel_filters[my - 1]; \
522
const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
524
for (y = 0; y < h; y++) { \
525
for (x = 0; x < SIZE; x++) \
526
dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \
532
#define VP8_EPEL_HV(SIZE, HTAPS, VTAPS) \
534
put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst, \
535
ptrdiff_t dststride, \
537
ptrdiff_t srcstride, \
541
const uint8_t *filter = subpel_filters[mx - 1]; \
542
const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
544
uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE]; \
545
uint8_t *tmp = tmp_array; \
546
src -= (2 - (VTAPS == 4)) * srcstride; \
548
for (y = 0; y < h + VTAPS - 1; y++) { \
549
for (x = 0; x < SIZE; x++) \
550
tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \
554
tmp = tmp_array + (2 - (VTAPS == 4)) * SIZE; \
555
filter = subpel_filters[my - 1]; \
557
for (y = 0; y < h; y++) { \
558
for (x = 0; x < SIZE; x++) \
559
dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \
392
565
VP8_EPEL_H(16, 4)
414
588
VP8_EPEL_HV(8, 6, 6)
415
589
VP8_EPEL_HV(4, 6, 6)
417
#define VP8_BILINEAR(SIZE) \
418
static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \
420
int a = 8-mx, b = mx; \
423
for (y = 0; y < h; y++) { \
424
for (x = 0; x < SIZE; x++) \
425
dst[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \
430
static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \
432
int c = 8-my, d = my; \
435
for (y = 0; y < h; y++) { \
436
for (x = 0; x < SIZE; x++) \
437
dst[x] = (c*src[x] + d*src[x+sstride] + 4) >> 3; \
443
static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \
445
int a = 8-mx, b = mx; \
446
int c = 8-my, d = my; \
448
uint8_t tmp_array[(2*SIZE+1)*SIZE]; \
449
uint8_t *tmp = tmp_array; \
451
for (y = 0; y < h+1; y++) { \
452
for (x = 0; x < SIZE; x++) \
453
tmp[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \
460
for (y = 0; y < h; y++) { \
461
for (x = 0; x < SIZE; x++) \
462
dst[x] = (c*tmp[x] + d*tmp[x+SIZE] + 4) >> 3; \
591
#define VP8_BILINEAR(SIZE) \
592
static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t dstride, \
593
uint8_t *src, ptrdiff_t sstride, \
594
int h, int mx, int my) \
596
int a = 8 - mx, b = mx; \
598
for (y = 0; y < h; y++) { \
599
for (x = 0; x < SIZE; x++) \
600
dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \
606
static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t dstride, \
607
uint8_t *src, ptrdiff_t sstride, \
608
int h, int mx, int my) \
610
int c = 8 - my, d = my; \
612
for (y = 0; y < h; y++) { \
613
for (x = 0; x < SIZE; x++) \
614
dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3; \
620
static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, \
624
int h, int mx, int my) \
626
int a = 8 - mx, b = mx; \
627
int c = 8 - my, d = my; \
629
uint8_t tmp_array[(2 * SIZE + 1) * SIZE]; \
630
uint8_t *tmp = tmp_array; \
631
for (y = 0; y < h + 1; y++) { \
632
for (x = 0; x < SIZE; x++) \
633
tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3; \
638
for (y = 0; y < h; y++) { \
639
for (x = 0; x < SIZE; x++) \
640
dst[x] = (c * tmp[x] + d * tmp[x + SIZE] + 4) >> 3; \
472
#define VP8_MC_FUNC(IDX, SIZE) \
473
dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \
474
dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \
475
dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \
476
dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \
650
#define VP78_MC_FUNC(IDX, SIZE) \
651
dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \
652
dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \
653
dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \
654
dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \
477
655
dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \
478
656
dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \
479
dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \
657
dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \
480
658
dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \
481
659
dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c
483
#define VP8_BILINEAR_MC_FUNC(IDX, SIZE) \
484
dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \
661
#define VP78_BILINEAR_MC_FUNC(IDX, SIZE) \
662
dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \
485
663
dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \
486
664
dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \
487
665
dsp->put_vp8_bilinear_pixels_tab[IDX][1][0] = put_vp8_bilinear ## SIZE ## _v_c; \