3
* Copyright (c) 2000, 2001 Fabrice Bellard.
4
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6
* This library is free software; you can redistribute it and/or
7
* modify it under the terms of the GNU Lesser General Public
8
* License as published by the Free Software Foundation; either
9
* version 2 of the License, or (at your option) any later version.
11
* This library is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
* Lesser General Public License for more details.
16
* You should have received a copy of the GNU Lesser General Public
17
* License along with this library; if not, write to the Free Software
18
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
* gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
30
#include "mpegvideo.h"
31
#include "simple_idct.h"
35
void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
37
uint8_t cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
38
uint32_t squareTbl[512] = {0, };
40
const uint8_t ff_zigzag_direct[64] = {
41
0, 1, 8, 16, 9, 2, 3, 10,
42
17, 24, 32, 25, 18, 11, 4, 5,
43
12, 19, 26, 33, 40, 48, 41, 34,
44
27, 20, 13, 6, 7, 14, 21, 28,
45
35, 42, 49, 56, 57, 50, 43, 36,
46
29, 22, 15, 23, 30, 37, 44, 51,
47
58, 59, 52, 45, 38, 31, 39, 46,
48
53, 60, 61, 54, 47, 55, 62, 63
51
/* Specific zigzag scan for 248 idct. NOTE that unlike the
52
specification, we interleave the fields */
53
const uint8_t ff_zigzag248_direct[64] = {
54
0, 8, 1, 9, 16, 24, 2, 10,
55
17, 25, 32, 40, 48, 56, 33, 41,
56
18, 26, 3, 11, 4, 12, 19, 27,
57
34, 42, 49, 57, 50, 58, 35, 43,
58
20, 28, 5, 13, 6, 14, 21, 29,
59
36, 44, 51, 59, 52, 60, 37, 45,
60
22, 30, 7, 15, 23, 31, 38, 46,
61
53, 61, 54, 62, 39, 47, 55, 63,
64
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
65
uint16_t __align8 inv_zigzag_direct16[64] = {0, };
67
const uint8_t ff_alternate_horizontal_scan[64] = {
68
0, 1, 2, 3, 8, 9, 16, 17,
69
10, 11, 4, 5, 6, 7, 15, 14,
70
13, 12, 19, 18, 24, 25, 32, 33,
71
26, 27, 20, 21, 22, 23, 28, 29,
72
30, 31, 34, 35, 40, 41, 48, 49,
73
42, 43, 36, 37, 38, 39, 44, 45,
74
46, 47, 50, 51, 56, 57, 58, 59,
75
52, 53, 54, 55, 60, 61, 62, 63,
78
const uint8_t ff_alternate_vertical_scan[64] = {
79
0, 8, 16, 24, 1, 9, 2, 10,
80
17, 25, 32, 40, 48, 56, 57, 49,
81
41, 33, 26, 18, 3, 11, 4, 12,
82
19, 27, 34, 42, 50, 58, 35, 43,
83
51, 59, 20, 28, 5, 13, 6, 14,
84
21, 29, 36, 44, 52, 60, 37, 45,
85
53, 61, 22, 30, 7, 15, 23, 31,
86
38, 46, 54, 62, 39, 47, 55, 63,
89
/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
90
const uint32_t inverse[256]={
91
0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
92
536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
93
268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
94
178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
95
134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
96
107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
97
89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
98
76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
99
67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
100
59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
101
53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
102
48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
103
44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
104
41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
105
38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
106
35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
107
33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
108
31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
109
29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
110
28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
111
26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
112
25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
113
24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
114
23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
115
22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
116
21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
117
20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
118
19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
119
19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
120
18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
121
17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
122
17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
125
/* Input permutation for the simple_idct_mmx */
126
static const uint8_t simple_mmx_permutation[64]={
127
0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
128
0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
129
0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
130
0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
131
0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
132
0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
133
0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
134
0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
137
static int pix_sum_c(uint8_t * pix, int line_size)
142
for (i = 0; i < 16; i++) {
143
for (j = 0; j < 16; j += 8) {
154
pix += line_size - 16;
159
static int pix_norm1_c(uint8_t * pix, int line_size)
162
uint32_t *sq = squareTbl + 256;
165
for (i = 0; i < 16; i++) {
166
for (j = 0; j < 16; j += 8) {
177
#if LONG_MAX > 2147483647
178
register uint64_t x=*(uint64_t*)pix;
180
s += sq[(x>>8)&0xff];
181
s += sq[(x>>16)&0xff];
182
s += sq[(x>>24)&0xff];
183
s += sq[(x>>32)&0xff];
184
s += sq[(x>>40)&0xff];
185
s += sq[(x>>48)&0xff];
186
s += sq[(x>>56)&0xff];
188
register uint32_t x=*(uint32_t*)pix;
190
s += sq[(x>>8)&0xff];
191
s += sq[(x>>16)&0xff];
192
s += sq[(x>>24)&0xff];
193
x=*(uint32_t*)(pix+4);
195
s += sq[(x>>8)&0xff];
196
s += sq[(x>>16)&0xff];
197
s += sq[(x>>24)&0xff];
202
pix += line_size - 16;
207
static void bswap_buf(uint32_t *dst, uint32_t *src, int w){
210
for(i=0; i+8<=w; i+=8){
211
dst[i+0]= bswap_32(src[i+0]);
212
dst[i+1]= bswap_32(src[i+1]);
213
dst[i+2]= bswap_32(src[i+2]);
214
dst[i+3]= bswap_32(src[i+3]);
215
dst[i+4]= bswap_32(src[i+4]);
216
dst[i+5]= bswap_32(src[i+5]);
217
dst[i+6]= bswap_32(src[i+6]);
218
dst[i+7]= bswap_32(src[i+7]);
221
dst[i+0]= bswap_32(src[i+0]);
225
static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
228
uint32_t *sq = squareTbl + 256;
231
for (i = 0; i < h; i++) {
232
s += sq[pix1[0] - pix2[0]];
233
s += sq[pix1[1] - pix2[1]];
234
s += sq[pix1[2] - pix2[2]];
235
s += sq[pix1[3] - pix2[3]];
242
static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
245
uint32_t *sq = squareTbl + 256;
248
for (i = 0; i < h; i++) {
249
s += sq[pix1[0] - pix2[0]];
250
s += sq[pix1[1] - pix2[1]];
251
s += sq[pix1[2] - pix2[2]];
252
s += sq[pix1[3] - pix2[3]];
253
s += sq[pix1[4] - pix2[4]];
254
s += sq[pix1[5] - pix2[5]];
255
s += sq[pix1[6] - pix2[6]];
256
s += sq[pix1[7] - pix2[7]];
263
static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
266
uint32_t *sq = squareTbl + 256;
269
for (i = 0; i < h; i++) {
270
s += sq[pix1[ 0] - pix2[ 0]];
271
s += sq[pix1[ 1] - pix2[ 1]];
272
s += sq[pix1[ 2] - pix2[ 2]];
273
s += sq[pix1[ 3] - pix2[ 3]];
274
s += sq[pix1[ 4] - pix2[ 4]];
275
s += sq[pix1[ 5] - pix2[ 5]];
276
s += sq[pix1[ 6] - pix2[ 6]];
277
s += sq[pix1[ 7] - pix2[ 7]];
278
s += sq[pix1[ 8] - pix2[ 8]];
279
s += sq[pix1[ 9] - pix2[ 9]];
280
s += sq[pix1[10] - pix2[10]];
281
s += sq[pix1[11] - pix2[11]];
282
s += sq[pix1[12] - pix2[12]];
283
s += sq[pix1[13] - pix2[13]];
284
s += sq[pix1[14] - pix2[14]];
285
s += sq[pix1[15] - pix2[15]];
294
static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){
295
#ifdef CONFIG_SNOW_ENCODER //idwt is in snow.c
297
const int dec_count= w==8 ? 3 : 4;
301
static const int scale[2][2][4][4]={
305
{268, 239, 239, 213},
310
{344, 310, 310, 280},
318
{275, 245, 245, 218},
323
{352, 317, 317, 286},
332
for (i = 0; i < h; i++) {
333
for (j = 0; j < w; j+=4) {
334
tmp[16*i+j+0] = (pix1[j+0] - pix2[j+0])<<4;
335
tmp[16*i+j+1] = (pix1[j+1] - pix2[j+1])<<4;
336
tmp[16*i+j+2] = (pix1[j+2] - pix2[j+2])<<4;
337
tmp[16*i+j+3] = (pix1[j+3] - pix2[j+3])<<4;
343
ff_spatial_dwt(tmp, w, h, 16, type, dec_count);
347
for(level=0; level<dec_count; level++){
348
for(ori= level ? 1 : 0; ori<4; ori++){
349
int sx= (ori&1) ? 1<<level: 0;
350
int stride= 16<<(dec_count-level);
351
int sy= (ori&2) ? stride>>1 : 0;
354
for(i=0; i<size; i++){
355
for(j=0; j<size; j++){
356
int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
363
for (i = 0; i < h; i++) {
364
for (j = 0; j < w; j+=4) {
365
s+= ABS(tmp[16*i+j+0]);
366
s+= ABS(tmp[16*i+j+1]);
367
s+= ABS(tmp[16*i+j+2]);
368
s+= ABS(tmp[16*i+j+3]);
377
static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
378
return w_c(v, pix1, pix2, line_size, 8, h, 1);
381
static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
382
return w_c(v, pix1, pix2, line_size, 8, h, 0);
385
static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
386
return w_c(v, pix1, pix2, line_size, 16, h, 1);
389
static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){
390
return w_c(v, pix1, pix2, line_size, 16, h, 0);
393
static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
397
/* read the pixels */
399
block[0] = pixels[0];
400
block[1] = pixels[1];
401
block[2] = pixels[2];
402
block[3] = pixels[3];
403
block[4] = pixels[4];
404
block[5] = pixels[5];
405
block[6] = pixels[6];
406
block[7] = pixels[7];
412
static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
413
const uint8_t *s2, int stride){
416
/* read the pixels */
418
block[0] = s1[0] - s2[0];
419
block[1] = s1[1] - s2[1];
420
block[2] = s1[2] - s2[2];
421
block[3] = s1[3] - s2[3];
422
block[4] = s1[4] - s2[4];
423
block[5] = s1[5] - s2[5];
424
block[6] = s1[6] - s2[6];
425
block[7] = s1[7] - s2[7];
433
static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
437
uint8_t *cm = cropTbl + MAX_NEG_CROP;
439
/* read the pixels */
441
pixels[0] = cm[block[0]];
442
pixels[1] = cm[block[1]];
443
pixels[2] = cm[block[2]];
444
pixels[3] = cm[block[3]];
445
pixels[4] = cm[block[4]];
446
pixels[5] = cm[block[5]];
447
pixels[6] = cm[block[6]];
448
pixels[7] = cm[block[7]];
455
static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
459
uint8_t *cm = cropTbl + MAX_NEG_CROP;
461
/* read the pixels */
463
pixels[0] = cm[block[0]];
464
pixels[1] = cm[block[1]];
465
pixels[2] = cm[block[2]];
466
pixels[3] = cm[block[3]];
473
static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
477
uint8_t *cm = cropTbl + MAX_NEG_CROP;
479
/* read the pixels */
481
pixels[0] = cm[block[0]];
482
pixels[1] = cm[block[1]];
489
static void put_signed_pixels_clamped_c(const DCTELEM *block,
490
uint8_t *restrict pixels,
495
for (i = 0; i < 8; i++) {
496
for (j = 0; j < 8; j++) {
499
else if (*block > 127)
502
*pixels = (uint8_t)(*block + 128);
506
pixels += (line_size - 8);
510
static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
514
uint8_t *cm = cropTbl + MAX_NEG_CROP;
516
/* read the pixels */
518
pixels[0] = cm[pixels[0] + block[0]];
519
pixels[1] = cm[pixels[1] + block[1]];
520
pixels[2] = cm[pixels[2] + block[2]];
521
pixels[3] = cm[pixels[3] + block[3]];
522
pixels[4] = cm[pixels[4] + block[4]];
523
pixels[5] = cm[pixels[5] + block[5]];
524
pixels[6] = cm[pixels[6] + block[6]];
525
pixels[7] = cm[pixels[7] + block[7]];
531
static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
535
uint8_t *cm = cropTbl + MAX_NEG_CROP;
537
/* read the pixels */
539
pixels[0] = cm[pixels[0] + block[0]];
540
pixels[1] = cm[pixels[1] + block[1]];
541
pixels[2] = cm[pixels[2] + block[2]];
542
pixels[3] = cm[pixels[3] + block[3]];
548
static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
552
uint8_t *cm = cropTbl + MAX_NEG_CROP;
554
/* read the pixels */
556
pixels[0] = cm[pixels[0] + block[0]];
557
pixels[1] = cm[pixels[1] + block[1]];
563
static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
567
pixels[0] += block[0];
568
pixels[1] += block[1];
569
pixels[2] += block[2];
570
pixels[3] += block[3];
571
pixels[4] += block[4];
572
pixels[5] += block[5];
573
pixels[6] += block[6];
574
pixels[7] += block[7];
580
static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
584
pixels[0] += block[0];
585
pixels[1] += block[1];
586
pixels[2] += block[2];
587
pixels[3] += block[3];
595
#define PIXOP2(OPNAME, OP) \
596
static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
600
OP(*((uint64_t*)block), LD64(pixels));\
606
static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
610
const uint64_t a= LD64(pixels );\
611
const uint64_t b= LD64(pixels+1);\
612
OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
618
static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
622
const uint64_t a= LD64(pixels );\
623
const uint64_t b= LD64(pixels+1);\
624
OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
630
static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
634
const uint64_t a= LD64(pixels );\
635
const uint64_t b= LD64(pixels+line_size);\
636
OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
642
static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
646
const uint64_t a= LD64(pixels );\
647
const uint64_t b= LD64(pixels+line_size);\
648
OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
654
static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
657
const uint64_t a= LD64(pixels );\
658
const uint64_t b= LD64(pixels+1);\
659
uint64_t l0= (a&0x0303030303030303ULL)\
660
+ (b&0x0303030303030303ULL)\
661
+ 0x0202020202020202ULL;\
662
uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
663
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
667
for(i=0; i<h; i+=2){\
668
uint64_t a= LD64(pixels );\
669
uint64_t b= LD64(pixels+1);\
670
l1= (a&0x0303030303030303ULL)\
671
+ (b&0x0303030303030303ULL);\
672
h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
673
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
674
OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
679
l0= (a&0x0303030303030303ULL)\
680
+ (b&0x0303030303030303ULL)\
681
+ 0x0202020202020202ULL;\
682
h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
683
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
684
OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
690
static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
693
const uint64_t a= LD64(pixels );\
694
const uint64_t b= LD64(pixels+1);\
695
uint64_t l0= (a&0x0303030303030303ULL)\
696
+ (b&0x0303030303030303ULL)\
697
+ 0x0101010101010101ULL;\
698
uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
699
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
703
for(i=0; i<h; i+=2){\
704
uint64_t a= LD64(pixels );\
705
uint64_t b= LD64(pixels+1);\
706
l1= (a&0x0303030303030303ULL)\
707
+ (b&0x0303030303030303ULL);\
708
h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
709
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
710
OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
715
l0= (a&0x0303030303030303ULL)\
716
+ (b&0x0303030303030303ULL)\
717
+ 0x0101010101010101ULL;\
718
h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
719
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
720
OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
726
CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
727
CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
728
CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
729
CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
730
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
731
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
732
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
734
#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
735
#else // 64 bit variant
737
#define PIXOP2(OPNAME, OP) \
738
static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
741
OP(*((uint16_t*)(block )), LD16(pixels ));\
746
static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
749
OP(*((uint32_t*)(block )), LD32(pixels ));\
754
static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
757
OP(*((uint32_t*)(block )), LD32(pixels ));\
758
OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
763
static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
764
OPNAME ## _pixels8_c(block, pixels, line_size, h);\
767
static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
768
int src_stride1, int src_stride2, int h){\
772
a= LD32(&src1[i*src_stride1 ]);\
773
b= LD32(&src2[i*src_stride2 ]);\
774
OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\
775
a= LD32(&src1[i*src_stride1+4]);\
776
b= LD32(&src2[i*src_stride2+4]);\
777
OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
781
static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
782
int src_stride1, int src_stride2, int h){\
786
a= LD32(&src1[i*src_stride1 ]);\
787
b= LD32(&src2[i*src_stride2 ]);\
788
OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
789
a= LD32(&src1[i*src_stride1+4]);\
790
b= LD32(&src2[i*src_stride2+4]);\
791
OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
795
static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
796
int src_stride1, int src_stride2, int h){\
800
a= LD32(&src1[i*src_stride1 ]);\
801
b= LD32(&src2[i*src_stride2 ]);\
802
OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
806
static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
807
int src_stride1, int src_stride2, int h){\
811
a= LD16(&src1[i*src_stride1 ]);\
812
b= LD16(&src2[i*src_stride2 ]);\
813
OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\
817
static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
818
int src_stride1, int src_stride2, int h){\
819
OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
820
OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
823
static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
824
int src_stride1, int src_stride2, int h){\
825
OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
826
OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
829
static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
830
OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
833
static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
834
OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
837
static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
838
OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
841
static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
842
OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
845
static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
846
int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
849
uint32_t a, b, c, d, l0, l1, h0, h1;\
850
a= LD32(&src1[i*src_stride1]);\
851
b= LD32(&src2[i*src_stride2]);\
852
c= LD32(&src3[i*src_stride3]);\
853
d= LD32(&src4[i*src_stride4]);\
854
l0= (a&0x03030303UL)\
857
h0= ((a&0xFCFCFCFCUL)>>2)\
858
+ ((b&0xFCFCFCFCUL)>>2);\
859
l1= (c&0x03030303UL)\
861
h1= ((c&0xFCFCFCFCUL)>>2)\
862
+ ((d&0xFCFCFCFCUL)>>2);\
863
OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
864
a= LD32(&src1[i*src_stride1+4]);\
865
b= LD32(&src2[i*src_stride2+4]);\
866
c= LD32(&src3[i*src_stride3+4]);\
867
d= LD32(&src4[i*src_stride4+4]);\
868
l0= (a&0x03030303UL)\
871
h0= ((a&0xFCFCFCFCUL)>>2)\
872
+ ((b&0xFCFCFCFCUL)>>2);\
873
l1= (c&0x03030303UL)\
875
h1= ((c&0xFCFCFCFCUL)>>2)\
876
+ ((d&0xFCFCFCFCUL)>>2);\
877
OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
881
static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
882
OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
885
static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
886
OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
889
static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
890
OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
893
static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
894
OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
897
static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
898
int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
901
uint32_t a, b, c, d, l0, l1, h0, h1;\
902
a= LD32(&src1[i*src_stride1]);\
903
b= LD32(&src2[i*src_stride2]);\
904
c= LD32(&src3[i*src_stride3]);\
905
d= LD32(&src4[i*src_stride4]);\
906
l0= (a&0x03030303UL)\
909
h0= ((a&0xFCFCFCFCUL)>>2)\
910
+ ((b&0xFCFCFCFCUL)>>2);\
911
l1= (c&0x03030303UL)\
913
h1= ((c&0xFCFCFCFCUL)>>2)\
914
+ ((d&0xFCFCFCFCUL)>>2);\
915
OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
916
a= LD32(&src1[i*src_stride1+4]);\
917
b= LD32(&src2[i*src_stride2+4]);\
918
c= LD32(&src3[i*src_stride3+4]);\
919
d= LD32(&src4[i*src_stride4+4]);\
920
l0= (a&0x03030303UL)\
923
h0= ((a&0xFCFCFCFCUL)>>2)\
924
+ ((b&0xFCFCFCFCUL)>>2);\
925
l1= (c&0x03030303UL)\
927
h1= ((c&0xFCFCFCFCUL)>>2)\
928
+ ((d&0xFCFCFCFCUL)>>2);\
929
OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
932
static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
933
int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
934
OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
935
OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
937
static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
938
int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
939
OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
940
OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
943
static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
945
int i, a0, b0, a1, b1;\
952
for(i=0; i<h; i+=2){\
958
block[0]= (a1+a0)>>2; /* FIXME non put */\
959
block[1]= (b1+b0)>>2;\
969
block[0]= (a1+a0)>>2;\
970
block[1]= (b1+b0)>>2;\
976
static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
979
const uint32_t a= LD32(pixels );\
980
const uint32_t b= LD32(pixels+1);\
981
uint32_t l0= (a&0x03030303UL)\
984
uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
985
+ ((b&0xFCFCFCFCUL)>>2);\
989
for(i=0; i<h; i+=2){\
990
uint32_t a= LD32(pixels );\
991
uint32_t b= LD32(pixels+1);\
992
l1= (a&0x03030303UL)\
994
h1= ((a&0xFCFCFCFCUL)>>2)\
995
+ ((b&0xFCFCFCFCUL)>>2);\
996
OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1001
l0= (a&0x03030303UL)\
1004
h0= ((a&0xFCFCFCFCUL)>>2)\
1005
+ ((b&0xFCFCFCFCUL)>>2);\
1006
OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1012
static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1015
for(j=0; j<2; j++){\
1017
const uint32_t a= LD32(pixels );\
1018
const uint32_t b= LD32(pixels+1);\
1019
uint32_t l0= (a&0x03030303UL)\
1022
uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1023
+ ((b&0xFCFCFCFCUL)>>2);\
1027
for(i=0; i<h; i+=2){\
1028
uint32_t a= LD32(pixels );\
1029
uint32_t b= LD32(pixels+1);\
1030
l1= (a&0x03030303UL)\
1031
+ (b&0x03030303UL);\
1032
h1= ((a&0xFCFCFCFCUL)>>2)\
1033
+ ((b&0xFCFCFCFCUL)>>2);\
1034
OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1039
l0= (a&0x03030303UL)\
1042
h0= ((a&0xFCFCFCFCUL)>>2)\
1043
+ ((b&0xFCFCFCFCUL)>>2);\
1044
OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1048
pixels+=4-line_size*(h+1);\
1049
block +=4-line_size*h;\
1053
static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1056
for(j=0; j<2; j++){\
1058
const uint32_t a= LD32(pixels );\
1059
const uint32_t b= LD32(pixels+1);\
1060
uint32_t l0= (a&0x03030303UL)\
1063
uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1064
+ ((b&0xFCFCFCFCUL)>>2);\
1068
for(i=0; i<h; i+=2){\
1069
uint32_t a= LD32(pixels );\
1070
uint32_t b= LD32(pixels+1);\
1071
l1= (a&0x03030303UL)\
1072
+ (b&0x03030303UL);\
1073
h1= ((a&0xFCFCFCFCUL)>>2)\
1074
+ ((b&0xFCFCFCFCUL)>>2);\
1075
OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1080
l0= (a&0x03030303UL)\
1083
h0= ((a&0xFCFCFCFCUL)>>2)\
1084
+ ((b&0xFCFCFCFCUL)>>2);\
1085
OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1089
pixels+=4-line_size*(h+1);\
1090
block +=4-line_size*h;\
1094
CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\
1095
CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
1096
CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
1097
CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
1098
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\
1099
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
1100
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
1101
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
1103
#define op_avg(a, b) a = rnd_avg32(a, b)
1105
#define op_put(a, b) a = b
1112
#define avg2(a,b) ((a+b+1)>>1)
1113
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
1115
static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1116
put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
1119
static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1120
put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
1123
static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
1125
const int A=(16-x16)*(16-y16);
1126
const int B=( x16)*(16-y16);
1127
const int C=(16-x16)*( y16);
1128
const int D=( x16)*( y16);
1133
dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
1134
dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
1135
dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
1136
dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
1137
dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
1138
dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
1139
dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
1140
dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
1146
static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
1147
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
1150
const int s= 1<<shift;
1160
for(x=0; x<8; x++){ //XXX FIXME optimize
1161
int src_x, src_y, frac_x, frac_y, index;
1165
frac_x= src_x&(s-1);
1166
frac_y= src_y&(s-1);
1170
if((unsigned)src_x < width){
1171
if((unsigned)src_y < height){
1172
index= src_x + src_y*stride;
1173
dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
1174
+ src[index +1]* frac_x )*(s-frac_y)
1175
+ ( src[index+stride ]*(s-frac_x)
1176
+ src[index+stride+1]* frac_x )* frac_y
1179
index= src_x + clip(src_y, 0, height)*stride;
1180
dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
1181
+ src[index +1]* frac_x )*s
1185
if((unsigned)src_y < height){
1186
index= clip(src_x, 0, width) + src_y*stride;
1187
dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
1188
+ src[index+stride ]* frac_y )*s
1191
index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
1192
dst[y*stride + x]= src[index ];
1204
static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1206
case 2: put_pixels2_c (dst, src, stride, height); break;
1207
case 4: put_pixels4_c (dst, src, stride, height); break;
1208
case 8: put_pixels8_c (dst, src, stride, height); break;
1209
case 16:put_pixels16_c(dst, src, stride, height); break;
1213
static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1215
for (i=0; i < height; i++) {
1216
for (j=0; j < width; j++) {
1217
dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
1224
static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1226
for (i=0; i < height; i++) {
1227
for (j=0; j < width; j++) {
1228
dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
1235
static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1237
for (i=0; i < height; i++) {
1238
for (j=0; j < width; j++) {
1239
dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
1246
static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1248
for (i=0; i < height; i++) {
1249
for (j=0; j < width; j++) {
1250
dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
1257
static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1259
for (i=0; i < height; i++) {
1260
for (j=0; j < width; j++) {
1261
dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
1268
static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1270
for (i=0; i < height; i++) {
1271
for (j=0; j < width; j++) {
1272
dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
1279
static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1281
for (i=0; i < height; i++) {
1282
for (j=0; j < width; j++) {
1283
dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
1290
static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1292
for (i=0; i < height; i++) {
1293
for (j=0; j < width; j++) {
1294
dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
1301
static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1303
case 2: avg_pixels2_c (dst, src, stride, height); break;
1304
case 4: avg_pixels4_c (dst, src, stride, height); break;
1305
case 8: avg_pixels8_c (dst, src, stride, height); break;
1306
case 16:avg_pixels16_c(dst, src, stride, height); break;
1310
static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1312
for (i=0; i < height; i++) {
1313
for (j=0; j < width; j++) {
1314
dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
1321
static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1323
for (i=0; i < height; i++) {
1324
for (j=0; j < width; j++) {
1325
dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
1332
static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1334
for (i=0; i < height; i++) {
1335
for (j=0; j < width; j++) {
1336
dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
1343
static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1345
for (i=0; i < height; i++) {
1346
for (j=0; j < width; j++) {
1347
dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1354
static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1356
for (i=0; i < height; i++) {
1357
for (j=0; j < width; j++) {
1358
dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1365
static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1367
for (i=0; i < height; i++) {
1368
for (j=0; j < width; j++) {
1369
dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
1376
static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1378
for (i=0; i < height; i++) {
1379
for (j=0; j < width; j++) {
1380
dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1387
static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
1389
for (i=0; i < height; i++) {
1390
for (j=0; j < width; j++) {
1391
dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
1398
#define TPEL_WIDTH(width)\
1399
static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1400
void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
1401
static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1402
void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
1403
static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1404
void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
1405
static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1406
void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
1407
static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1408
void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
1409
static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1410
void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
1411
static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1412
void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
1413
static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1414
void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
1415
static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
1416
void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1419
#define H264_CHROMA_MC(OPNAME, OP)\
1420
static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1421
const int A=(8-x)*(8-y);\
1422
const int B=( x)*(8-y);\
1423
const int C=(8-x)*( y);\
1424
const int D=( x)*( y);\
1427
assert(x<8 && y<8 && x>=0 && y>=0);\
1431
OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1432
OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1438
static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1439
const int A=(8-x)*(8-y);\
1440
const int B=( x)*(8-y);\
1441
const int C=(8-x)*( y);\
1442
const int D=( x)*( y);\
1445
assert(x<8 && y<8 && x>=0 && y>=0);\
1449
OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1450
OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1451
OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1452
OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1458
static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1459
const int A=(8-x)*(8-y);\
1460
const int B=( x)*(8-y);\
1461
const int C=(8-x)*( y);\
1462
const int D=( x)*( y);\
1465
assert(x<8 && y<8 && x>=0 && y>=0);\
1469
OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1470
OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1471
OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1472
OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1473
OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1474
OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1475
OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1476
OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1482
#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1483
#define op_put(a, b) a = (((b) + 32)>>6)
1485
H264_CHROMA_MC(put_ , op_put)
1486
H264_CHROMA_MC(avg_ , op_avg)
1490
static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1495
ST32(dst , LD32(src ));
1501
static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1506
ST32(dst , LD32(src ));
1507
ST32(dst+4 , LD32(src+4 ));
1513
static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1518
ST32(dst , LD32(src ));
1519
ST32(dst+4 , LD32(src+4 ));
1520
ST32(dst+8 , LD32(src+8 ));
1521
ST32(dst+12, LD32(src+12));
1527
static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1532
ST32(dst , LD32(src ));
1533
ST32(dst+4 , LD32(src+4 ));
1534
ST32(dst+8 , LD32(src+8 ));
1535
ST32(dst+12, LD32(src+12));
1542
static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
1547
ST32(dst , LD32(src ));
1548
ST32(dst+4 , LD32(src+4 ));
1556
#define QPEL_MC(r, OPNAME, RND, OP) \
1557
static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1558
uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1562
OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
1563
OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
1564
OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
1565
OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
1566
OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
1567
OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
1568
OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
1569
OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
1575
static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1577
uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1581
const int src0= src[0*srcStride];\
1582
const int src1= src[1*srcStride];\
1583
const int src2= src[2*srcStride];\
1584
const int src3= src[3*srcStride];\
1585
const int src4= src[4*srcStride];\
1586
const int src5= src[5*srcStride];\
1587
const int src6= src[6*srcStride];\
1588
const int src7= src[7*srcStride];\
1589
const int src8= src[8*srcStride];\
1590
OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
1591
OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
1592
OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
1593
OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
1594
OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
1595
OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
1596
OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
1597
OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
1603
static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1604
uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1609
OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
1610
OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
1611
OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
1612
OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
1613
OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
1614
OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
1615
OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
1616
OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
1617
OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
1618
OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
1619
OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
1620
OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
1621
OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
1622
OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
1623
OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
1624
OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
1630
static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1631
uint8_t *cm = cropTbl + MAX_NEG_CROP;\
1636
const int src0= src[0*srcStride];\
1637
const int src1= src[1*srcStride];\
1638
const int src2= src[2*srcStride];\
1639
const int src3= src[3*srcStride];\
1640
const int src4= src[4*srcStride];\
1641
const int src5= src[5*srcStride];\
1642
const int src6= src[6*srcStride];\
1643
const int src7= src[7*srcStride];\
1644
const int src8= src[8*srcStride];\
1645
const int src9= src[9*srcStride];\
1646
const int src10= src[10*srcStride];\
1647
const int src11= src[11*srcStride];\
1648
const int src12= src[12*srcStride];\
1649
const int src13= src[13*srcStride];\
1650
const int src14= src[14*srcStride];\
1651
const int src15= src[15*srcStride];\
1652
const int src16= src[16*srcStride];\
1653
OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
1654
OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
1655
OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
1656
OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
1657
OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
1658
OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
1659
OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
1660
OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
1661
OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
1662
OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
1663
OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
1664
OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
1665
OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
1666
OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
1667
OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
1668
OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
1674
static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
1675
OPNAME ## pixels8_c(dst, src, stride, 8);\
1678
static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1680
put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1681
OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
1684
static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1685
OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
1688
static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1690
put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
1691
OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
1694
static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1695
uint8_t full[16*9];\
1697
copy_block9(full, src, 16, stride, 9);\
1698
put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1699
OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
1702
static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1703
uint8_t full[16*9];\
1704
copy_block9(full, src, 16, stride, 9);\
1705
OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
1708
static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1709
uint8_t full[16*9];\
1711
copy_block9(full, src, 16, stride, 9);\
1712
put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
1713
OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
1715
void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1716
uint8_t full[16*9];\
1719
uint8_t halfHV[64];\
1720
copy_block9(full, src, 16, stride, 9);\
1721
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1722
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1723
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1724
OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1726
static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1727
uint8_t full[16*9];\
1729
uint8_t halfHV[64];\
1730
copy_block9(full, src, 16, stride, 9);\
1731
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1732
put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1733
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1734
OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1736
void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1737
uint8_t full[16*9];\
1740
uint8_t halfHV[64];\
1741
copy_block9(full, src, 16, stride, 9);\
1742
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1743
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1744
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1745
OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1747
static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1748
uint8_t full[16*9];\
1750
uint8_t halfHV[64];\
1751
copy_block9(full, src, 16, stride, 9);\
1752
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1753
put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1754
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1755
OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1757
void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1758
uint8_t full[16*9];\
1761
uint8_t halfHV[64];\
1762
copy_block9(full, src, 16, stride, 9);\
1763
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1764
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1765
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1766
OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1768
static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1769
uint8_t full[16*9];\
1771
uint8_t halfHV[64];\
1772
copy_block9(full, src, 16, stride, 9);\
1773
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1774
put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1775
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1776
OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1778
void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1779
uint8_t full[16*9];\
1782
uint8_t halfHV[64];\
1783
copy_block9(full, src, 16, stride, 9);\
1784
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
1785
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1786
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1787
OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1789
static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1790
uint8_t full[16*9];\
1792
uint8_t halfHV[64];\
1793
copy_block9(full, src, 16, stride, 9);\
1794
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1795
put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1796
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1797
OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1799
static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1801
uint8_t halfHV[64];\
1802
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1803
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1804
OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
1806
static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1808
uint8_t halfHV[64];\
1809
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1810
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1811
OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1813
void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1814
uint8_t full[16*9];\
1817
uint8_t halfHV[64];\
1818
copy_block9(full, src, 16, stride, 9);\
1819
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1820
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1821
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1822
OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1824
static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
1825
uint8_t full[16*9];\
1827
copy_block9(full, src, 16, stride, 9);\
1828
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1829
put ## RND ## pixels8_l2(halfH, halfH, full, 8, 8, 16, 9);\
1830
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1832
void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
1833
uint8_t full[16*9];\
1836
uint8_t halfHV[64];\
1837
copy_block9(full, src, 16, stride, 9);\
1838
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1839
put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1840
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1841
OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
1843
static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
1844
uint8_t full[16*9];\
1846
copy_block9(full, src, 16, stride, 9);\
1847
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1848
put ## RND ## pixels8_l2(halfH, halfH, full+1, 8, 8, 16, 9);\
1849
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1851
static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
1853
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1854
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1856
static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
1857
OPNAME ## pixels16_c(dst, src, stride, 16);\
1860
static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
1862
put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1863
OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
1866
static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
1867
OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1870
static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
1872
put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1873
OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
1876
static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
1877
uint8_t full[24*17];\
1879
copy_block17(full, src, 24, stride, 17);\
1880
put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1881
OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
1884
static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
1885
uint8_t full[24*17];\
1886
copy_block17(full, src, 24, stride, 17);\
1887
OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1890
static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
1891
uint8_t full[24*17];\
1893
copy_block17(full, src, 24, stride, 17);\
1894
put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1895
OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
1897
void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
1898
uint8_t full[24*17];\
1899
uint8_t halfH[272];\
1900
uint8_t halfV[256];\
1901
uint8_t halfHV[256];\
1902
copy_block17(full, src, 24, stride, 17);\
1903
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1904
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1905
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1906
OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1908
static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
1909
uint8_t full[24*17];\
1910
uint8_t halfH[272];\
1911
uint8_t halfHV[256];\
1912
copy_block17(full, src, 24, stride, 17);\
1913
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1914
put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1915
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1916
OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1918
void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
1919
uint8_t full[24*17];\
1920
uint8_t halfH[272];\
1921
uint8_t halfV[256];\
1922
uint8_t halfHV[256];\
1923
copy_block17(full, src, 24, stride, 17);\
1924
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1925
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1926
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1927
OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1929
static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
1930
uint8_t full[24*17];\
1931
uint8_t halfH[272];\
1932
uint8_t halfHV[256];\
1933
copy_block17(full, src, 24, stride, 17);\
1934
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1935
put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1936
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1937
OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1939
void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
1940
uint8_t full[24*17];\
1941
uint8_t halfH[272];\
1942
uint8_t halfV[256];\
1943
uint8_t halfHV[256];\
1944
copy_block17(full, src, 24, stride, 17);\
1945
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1946
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1947
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1948
OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1950
static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
1951
uint8_t full[24*17];\
1952
uint8_t halfH[272];\
1953
uint8_t halfHV[256];\
1954
copy_block17(full, src, 24, stride, 17);\
1955
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1956
put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
1957
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1958
OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1960
void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
1961
uint8_t full[24*17];\
1962
uint8_t halfH[272];\
1963
uint8_t halfV[256];\
1964
uint8_t halfHV[256];\
1965
copy_block17(full, src, 24, stride, 17);\
1966
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1967
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1968
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1969
OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1971
static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
1972
uint8_t full[24*17];\
1973
uint8_t halfH[272];\
1974
uint8_t halfHV[256];\
1975
copy_block17(full, src, 24, stride, 17);\
1976
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1977
put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
1978
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1979
OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1981
static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
1982
uint8_t halfH[272];\
1983
uint8_t halfHV[256];\
1984
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1985
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1986
OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
1988
static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
1989
uint8_t halfH[272];\
1990
uint8_t halfHV[256];\
1991
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1992
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1993
OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1995
void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
1996
uint8_t full[24*17];\
1997
uint8_t halfH[272];\
1998
uint8_t halfV[256];\
1999
uint8_t halfHV[256];\
2000
copy_block17(full, src, 24, stride, 17);\
2001
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2002
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
2003
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2004
OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2006
static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2007
uint8_t full[24*17];\
2008
uint8_t halfH[272];\
2009
copy_block17(full, src, 24, stride, 17);\
2010
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2011
put ## RND ## pixels16_l2(halfH, halfH, full, 16, 16, 24, 17);\
2012
OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2014
void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
2015
uint8_t full[24*17];\
2016
uint8_t halfH[272];\
2017
uint8_t halfV[256];\
2018
uint8_t halfHV[256];\
2019
copy_block17(full, src, 24, stride, 17);\
2020
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2021
put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
2022
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
2023
OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
2025
static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2026
uint8_t full[24*17];\
2027
uint8_t halfH[272];\
2028
copy_block17(full, src, 24, stride, 17);\
2029
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
2030
put ## RND ## pixels16_l2(halfH, halfH, full+1, 16, 16, 24, 17);\
2031
OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2033
static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2034
uint8_t halfH[272];\
2035
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
2036
OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
2039
#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2040
#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
2041
#define op_put(a, b) a = cm[((b) + 16)>>5]
2042
#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
2044
QPEL_MC(0, put_ , _ , op_put)
2045
QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
2046
QPEL_MC(0, avg_ , _ , op_avg)
2047
//QPEL_MC(1, avg_no_rnd , _ , op_avg)
2049
#undef op_avg_no_rnd
2051
#undef op_put_no_rnd
2054
#define H264_LOWPASS(OPNAME, OP, OP2) \
2055
static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2057
uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2061
OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2062
OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2063
OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
2064
OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
2070
static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2072
uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2076
const int srcB= src[-2*srcStride];\
2077
const int srcA= src[-1*srcStride];\
2078
const int src0= src[0 *srcStride];\
2079
const int src1= src[1 *srcStride];\
2080
const int src2= src[2 *srcStride];\
2081
const int src3= src[3 *srcStride];\
2082
const int src4= src[4 *srcStride];\
2083
const int src5= src[5 *srcStride];\
2084
const int src6= src[6 *srcStride];\
2085
OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2086
OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2087
OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2088
OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2094
static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2097
uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2099
src -= 2*srcStride;\
2100
for(i=0; i<h+5; i++)\
2102
tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2103
tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2104
tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
2105
tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
2109
tmp -= tmpStride*(h+5-2);\
2112
const int tmpB= tmp[-2*tmpStride];\
2113
const int tmpA= tmp[-1*tmpStride];\
2114
const int tmp0= tmp[0 *tmpStride];\
2115
const int tmp1= tmp[1 *tmpStride];\
2116
const int tmp2= tmp[2 *tmpStride];\
2117
const int tmp3= tmp[3 *tmpStride];\
2118
const int tmp4= tmp[4 *tmpStride];\
2119
const int tmp5= tmp[5 *tmpStride];\
2120
const int tmp6= tmp[6 *tmpStride];\
2121
OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2122
OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2123
OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2124
OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2130
static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2132
uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2136
OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
2137
OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
2138
OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
2139
OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
2140
OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
2141
OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
2142
OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
2143
OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
2149
static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2151
uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2155
const int srcB= src[-2*srcStride];\
2156
const int srcA= src[-1*srcStride];\
2157
const int src0= src[0 *srcStride];\
2158
const int src1= src[1 *srcStride];\
2159
const int src2= src[2 *srcStride];\
2160
const int src3= src[3 *srcStride];\
2161
const int src4= src[4 *srcStride];\
2162
const int src5= src[5 *srcStride];\
2163
const int src6= src[6 *srcStride];\
2164
const int src7= src[7 *srcStride];\
2165
const int src8= src[8 *srcStride];\
2166
const int src9= src[9 *srcStride];\
2167
const int src10=src[10*srcStride];\
2168
OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2169
OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2170
OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2171
OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2172
OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
2173
OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
2174
OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
2175
OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
2181
static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2184
uint8_t *cm = cropTbl + MAX_NEG_CROP;\
2186
src -= 2*srcStride;\
2187
for(i=0; i<h+5; i++)\
2189
tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
2190
tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
2191
tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
2192
tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
2193
tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
2194
tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
2195
tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
2196
tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
2200
tmp -= tmpStride*(h+5-2);\
2203
const int tmpB= tmp[-2*tmpStride];\
2204
const int tmpA= tmp[-1*tmpStride];\
2205
const int tmp0= tmp[0 *tmpStride];\
2206
const int tmp1= tmp[1 *tmpStride];\
2207
const int tmp2= tmp[2 *tmpStride];\
2208
const int tmp3= tmp[3 *tmpStride];\
2209
const int tmp4= tmp[4 *tmpStride];\
2210
const int tmp5= tmp[5 *tmpStride];\
2211
const int tmp6= tmp[6 *tmpStride];\
2212
const int tmp7= tmp[7 *tmpStride];\
2213
const int tmp8= tmp[8 *tmpStride];\
2214
const int tmp9= tmp[9 *tmpStride];\
2215
const int tmp10=tmp[10*tmpStride];\
2216
OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2217
OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2218
OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2219
OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2220
OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
2221
OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
2222
OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
2223
OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
2229
static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2230
OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2231
OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2232
src += 8*srcStride;\
2233
dst += 8*dstStride;\
2234
OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\
2235
OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2238
static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2239
OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2240
OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2241
src += 8*srcStride;\
2242
dst += 8*dstStride;\
2243
OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\
2244
OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2247
static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2248
OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2249
OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2250
src += 8*srcStride;\
2251
dst += 8*dstStride;\
2252
OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\
2253
OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2256
#define H264_MC(OPNAME, SIZE) \
2257
static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2258
OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
2261
static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2262
uint8_t half[SIZE*SIZE];\
2263
put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2264
OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
2267
static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2268
OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
2271
static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2272
uint8_t half[SIZE*SIZE];\
2273
put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2274
OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
2277
static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2278
uint8_t full[SIZE*(SIZE+5)];\
2279
uint8_t * const full_mid= full + SIZE*2;\
2280
uint8_t half[SIZE*SIZE];\
2281
copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2282
put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2283
OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
2286
static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2287
uint8_t full[SIZE*(SIZE+5)];\
2288
uint8_t * const full_mid= full + SIZE*2;\
2289
copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2290
OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
2293
static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2294
uint8_t full[SIZE*(SIZE+5)];\
2295
uint8_t * const full_mid= full + SIZE*2;\
2296
uint8_t half[SIZE*SIZE];\
2297
copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2298
put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2299
OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
2302
static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2303
uint8_t full[SIZE*(SIZE+5)];\
2304
uint8_t * const full_mid= full + SIZE*2;\
2305
uint8_t halfH[SIZE*SIZE];\
2306
uint8_t halfV[SIZE*SIZE];\
2307
put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2308
copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2309
put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2310
OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2313
static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2314
uint8_t full[SIZE*(SIZE+5)];\
2315
uint8_t * const full_mid= full + SIZE*2;\
2316
uint8_t halfH[SIZE*SIZE];\
2317
uint8_t halfV[SIZE*SIZE];\
2318
put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2319
copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2320
put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2321
OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2324
static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2325
uint8_t full[SIZE*(SIZE+5)];\
2326
uint8_t * const full_mid= full + SIZE*2;\
2327
uint8_t halfH[SIZE*SIZE];\
2328
uint8_t halfV[SIZE*SIZE];\
2329
put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2330
copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2331
put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2332
OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2335
static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2336
uint8_t full[SIZE*(SIZE+5)];\
2337
uint8_t * const full_mid= full + SIZE*2;\
2338
uint8_t halfH[SIZE*SIZE];\
2339
uint8_t halfV[SIZE*SIZE];\
2340
put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2341
copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2342
put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2343
OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2346
static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2347
int16_t tmp[SIZE*(SIZE+5)];\
2348
OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
2351
static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2352
int16_t tmp[SIZE*(SIZE+5)];\
2353
uint8_t halfH[SIZE*SIZE];\
2354
uint8_t halfHV[SIZE*SIZE];\
2355
put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2356
put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2357
OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2360
static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2361
int16_t tmp[SIZE*(SIZE+5)];\
2362
uint8_t halfH[SIZE*SIZE];\
2363
uint8_t halfHV[SIZE*SIZE];\
2364
put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2365
put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2366
OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2369
static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2370
uint8_t full[SIZE*(SIZE+5)];\
2371
uint8_t * const full_mid= full + SIZE*2;\
2372
int16_t tmp[SIZE*(SIZE+5)];\
2373
uint8_t halfV[SIZE*SIZE];\
2374
uint8_t halfHV[SIZE*SIZE];\
2375
copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
2376
put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2377
put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2378
OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2381
static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2382
uint8_t full[SIZE*(SIZE+5)];\
2383
uint8_t * const full_mid= full + SIZE*2;\
2384
int16_t tmp[SIZE*(SIZE+5)];\
2385
uint8_t halfV[SIZE*SIZE];\
2386
uint8_t halfHV[SIZE*SIZE];\
2387
copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
2388
put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2389
put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2390
OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2393
#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2394
//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
2395
#define op_put(a, b) a = cm[((b) + 16)>>5]
2396
#define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
2397
#define op2_put(a, b) a = cm[((b) + 512)>>10]
2399
H264_LOWPASS(put_ , op_put, op2_put)
2400
H264_LOWPASS(avg_ , op_avg, op2_avg)
2414
#define op_scale1(x) block[x] = clip_uint8( (block[x]*weight + offset) >> log2_denom )
2415
#define op_scale2(x) dst[x] = clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
2416
#define H264_WEIGHT(W,H) \
2417
static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
2418
int attribute_unused x, y; \
2419
offset <<= log2_denom; \
2420
if(log2_denom) offset += 1<<(log2_denom-1); \
2421
for(y=0; y<H; y++, block += stride){ \
2424
if(W==2) continue; \
2427
if(W==4) continue; \
2432
if(W==8) continue; \
2443
static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offsetd, int offsets){ \
2444
int attribute_unused x, y; \
2445
int offset = (offsets + offsetd + 1) >> 1; \
2446
offset = ((offset << 1) + 1) << log2_denom; \
2447
for(y=0; y<H; y++, dst += stride, src += stride){ \
2450
if(W==2) continue; \
2453
if(W==4) continue; \
2458
if(W==8) continue; \
2485
static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
2486
uint8_t *cm = cropTbl + MAX_NEG_CROP;
2490
dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
2491
dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
2492
dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
2493
dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
2494
dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
2495
dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
2496
dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
2497
dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
2503
static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
2504
uint8_t *cm = cropTbl + MAX_NEG_CROP;
2508
const int src_1= src[ -srcStride];
2509
const int src0 = src[0 ];
2510
const int src1 = src[ srcStride];
2511
const int src2 = src[2*srcStride];
2512
const int src3 = src[3*srcStride];
2513
const int src4 = src[4*srcStride];
2514
const int src5 = src[5*srcStride];
2515
const int src6 = src[6*srcStride];
2516
const int src7 = src[7*srcStride];
2517
const int src8 = src[8*srcStride];
2518
const int src9 = src[9*srcStride];
2519
dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
2520
dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
2521
dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
2522
dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
2523
dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
2524
dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
2525
dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
2526
dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
2532
static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
2533
put_pixels8_c(dst, src, stride, 8);
2536
static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
2538
wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2539
put_pixels8_l2(dst, src, half, stride, stride, 8, 8);
2542
static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
2543
wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
2546
static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
2548
wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
2549
put_pixels8_l2(dst, src+1, half, stride, stride, 8, 8);
2552
static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
2553
wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
2556
static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
2560
wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2561
wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
2562
wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2563
put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2565
static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
2569
wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2570
wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
2571
wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
2572
put_pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);
2574
static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
2576
wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
2577
wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
2580
static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
2582
const int strength= ff_h263_loop_filter_strength[qscale];
2586
int p0= src[x-2*stride];
2587
int p1= src[x-1*stride];
2588
int p2= src[x+0*stride];
2589
int p3= src[x+1*stride];
2590
int d = (p0 - p3 + 4*(p2 - p1)) / 8;
2592
if (d<-2*strength) d1= 0;
2593
else if(d<- strength) d1=-2*strength - d;
2594
else if(d< strength) d1= d;
2595
else if(d< 2*strength) d1= 2*strength - d;
2600
if(p1&256) p1= ~(p1>>31);
2601
if(p2&256) p2= ~(p2>>31);
2603
src[x-1*stride] = p1;
2604
src[x+0*stride] = p2;
2608
d2= clip((p0-p3)/4, -ad1, ad1);
2610
src[x-2*stride] = p0 - d2;
2611
src[x+ stride] = p3 + d2;
2615
static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
2617
const int strength= ff_h263_loop_filter_strength[qscale];
2621
int p0= src[y*stride-2];
2622
int p1= src[y*stride-1];
2623
int p2= src[y*stride+0];
2624
int p3= src[y*stride+1];
2625
int d = (p0 - p3 + 4*(p2 - p1)) / 8;
2627
if (d<-2*strength) d1= 0;
2628
else if(d<- strength) d1=-2*strength - d;
2629
else if(d< strength) d1= d;
2630
else if(d< 2*strength) d1= 2*strength - d;
2635
if(p1&256) p1= ~(p1>>31);
2636
if(p2&256) p2= ~(p2>>31);
2638
src[y*stride-1] = p1;
2639
src[y*stride+0] = p2;
2643
d2= clip((p0-p3)/4, -ad1, ad1);
2645
src[y*stride-2] = p0 - d2;
2646
src[y*stride+1] = p3 + d2;
2650
static void h261_loop_filter_c(uint8_t *src, int stride){
2655
temp[x ] = 4*src[x ];
2656
temp[x + 7*8] = 4*src[x + 7*stride];
2660
xy = y * stride + x;
2662
temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
2667
src[ y*stride] = (temp[ y*8] + 2)>>2;
2668
src[7+y*stride] = (temp[7+y*8] + 2)>>2;
2670
xy = y * stride + x;
2672
src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
2677
static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
2680
for( i = 0; i < 4; i++ ) {
2685
for( d = 0; d < 4; d++ ) {
2686
const int p0 = pix[-1*xstride];
2687
const int p1 = pix[-2*xstride];
2688
const int p2 = pix[-3*xstride];
2689
const int q0 = pix[0];
2690
const int q1 = pix[1*xstride];
2691
const int q2 = pix[2*xstride];
2693
if( ABS( p0 - q0 ) < alpha &&
2694
ABS( p1 - p0 ) < beta &&
2695
ABS( q1 - q0 ) < beta ) {
2700
if( ABS( p2 - p0 ) < beta ) {
2701
pix[-2*xstride] = p1 + clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
2704
if( ABS( q2 - q0 ) < beta ) {
2705
pix[ xstride] = q1 + clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
2709
i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
2710
pix[-xstride] = clip_uint8( p0 + i_delta ); /* p0' */
2711
pix[0] = clip_uint8( q0 - i_delta ); /* q0' */
2717
static void h264_v_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
2719
h264_loop_filter_luma_c(pix, stride, 1, alpha, beta, tc0);
2721
static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
2723
h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);
2726
static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
2729
for( i = 0; i < 4; i++ ) {
2730
const int tc = tc0[i];
2735
for( d = 0; d < 2; d++ ) {
2736
const int p0 = pix[-1*xstride];
2737
const int p1 = pix[-2*xstride];
2738
const int q0 = pix[0];
2739
const int q1 = pix[1*xstride];
2741
if( ABS( p0 - q0 ) < alpha &&
2742
ABS( p1 - p0 ) < beta &&
2743
ABS( q1 - q0 ) < beta ) {
2745
int delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
2747
pix[-xstride] = clip_uint8( p0 + delta ); /* p0' */
2748
pix[0] = clip_uint8( q0 - delta ); /* q0' */
2754
static void h264_v_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
2756
h264_loop_filter_chroma_c(pix, stride, 1, alpha, beta, tc0);
2758
static void h264_h_loop_filter_chroma_c(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
2760
h264_loop_filter_chroma_c(pix, 1, stride, alpha, beta, tc0);
2763
static inline void h264_loop_filter_chroma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
2766
for( d = 0; d < 8; d++ ) {
2767
const int p0 = pix[-1*xstride];
2768
const int p1 = pix[-2*xstride];
2769
const int q0 = pix[0];
2770
const int q1 = pix[1*xstride];
2772
if( ABS( p0 - q0 ) < alpha &&
2773
ABS( p1 - p0 ) < beta &&
2774
ABS( q1 - q0 ) < beta ) {
2776
pix[-xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
2777
pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
2782
static void h264_v_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
2784
h264_loop_filter_chroma_intra_c(pix, stride, 1, alpha, beta);
2786
static void h264_h_loop_filter_chroma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
2788
h264_loop_filter_chroma_intra_c(pix, 1, stride, alpha, beta);
2791
static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
2797
s += abs(pix1[0] - pix2[0]);
2798
s += abs(pix1[1] - pix2[1]);
2799
s += abs(pix1[2] - pix2[2]);
2800
s += abs(pix1[3] - pix2[3]);
2801
s += abs(pix1[4] - pix2[4]);
2802
s += abs(pix1[5] - pix2[5]);
2803
s += abs(pix1[6] - pix2[6]);
2804
s += abs(pix1[7] - pix2[7]);
2805
s += abs(pix1[8] - pix2[8]);
2806
s += abs(pix1[9] - pix2[9]);
2807
s += abs(pix1[10] - pix2[10]);
2808
s += abs(pix1[11] - pix2[11]);
2809
s += abs(pix1[12] - pix2[12]);
2810
s += abs(pix1[13] - pix2[13]);
2811
s += abs(pix1[14] - pix2[14]);
2812
s += abs(pix1[15] - pix2[15]);
2819
static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
2825
s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
2826
s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
2827
s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
2828
s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
2829
s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
2830
s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
2831
s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
2832
s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
2833
s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
2834
s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
2835
s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
2836
s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
2837
s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
2838
s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
2839
s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
2840
s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
2847
static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
2850
uint8_t *pix3 = pix2 + line_size;
2854
s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
2855
s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
2856
s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
2857
s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
2858
s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
2859
s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
2860
s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
2861
s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
2862
s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
2863
s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
2864
s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
2865
s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
2866
s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
2867
s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
2868
s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
2869
s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
2877
static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
2880
uint8_t *pix3 = pix2 + line_size;
2884
s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
2885
s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
2886
s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
2887
s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
2888
s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
2889
s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
2890
s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
2891
s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
2892
s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
2893
s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
2894
s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
2895
s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
2896
s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
2897
s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
2898
s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
2899
s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
2907
static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
2913
s += abs(pix1[0] - pix2[0]);
2914
s += abs(pix1[1] - pix2[1]);
2915
s += abs(pix1[2] - pix2[2]);
2916
s += abs(pix1[3] - pix2[3]);
2917
s += abs(pix1[4] - pix2[4]);
2918
s += abs(pix1[5] - pix2[5]);
2919
s += abs(pix1[6] - pix2[6]);
2920
s += abs(pix1[7] - pix2[7]);
2927
static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
2933
s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
2934
s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
2935
s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
2936
s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
2937
s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
2938
s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
2939
s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
2940
s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
2947
static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
2950
uint8_t *pix3 = pix2 + line_size;
2954
s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
2955
s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
2956
s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
2957
s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
2958
s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
2959
s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
2960
s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
2961
s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
2969
static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
2972
uint8_t *pix3 = pix2 + line_size;
2976
s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
2977
s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
2978
s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
2979
s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
2980
s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
2981
s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
2982
s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
2983
s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
2991
static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
2992
MpegEncContext *c = v;
2998
for(x=0; x<16; x++){
2999
score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
3002
for(x=0; x<15; x++){
3003
score2+= ABS( s1[x ] - s1[x +stride]
3004
- s1[x+1] + s1[x+1+stride])
3005
-ABS( s2[x ] - s2[x +stride]
3006
- s2[x+1] + s2[x+1+stride]);
3013
if(c) return score1 + ABS(score2)*c->avctx->nsse_weight;
3014
else return score1 + ABS(score2)*8;
3017
static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
3018
MpegEncContext *c = v;
3025
score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
3029
score2+= ABS( s1[x ] - s1[x +stride]
3030
- s1[x+1] + s1[x+1+stride])
3031
-ABS( s2[x ] - s2[x +stride]
3032
- s2[x+1] + s2[x+1+stride]);
3039
if(c) return score1 + ABS(score2)*c->avctx->nsse_weight;
3040
else return score1 + ABS(score2)*8;
3043
static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
3047
for(i=0; i<8*8; i++){
3048
int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
3051
assert(-512<b && b<512);
3053
sum += (w*b)*(w*b)>>4;
3058
static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
3061
for(i=0; i<8*8; i++){
3062
rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
3067
* permutes an 8x8 block.
3068
* @param block the block which will be permuted according to the given permutation vector
3069
* @param permutation the permutation vector
3070
* @param last the last non zero coefficient in scantable order, used to speed the permutation up
3071
* @param scantable the used scantable, this is only used to speed the permutation up, the block is not
3072
* (inverse) permutated to scantable order!
3074
void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
3080
//if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
3082
for(i=0; i<=last; i++){
3083
const int j= scantable[i];
3088
for(i=0; i<=last; i++){
3089
const int j= scantable[i];
3090
const int perm_j= permutation[j];
3091
block[perm_j]= temp[j];
3095
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
3099
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
3102
memset(cmp, 0, sizeof(void*)*5);
3110
cmp[i]= c->hadamard8_diff[i];
3116
cmp[i]= c->dct_sad[i];
3119
cmp[i]= c->dct_max[i];
3122
cmp[i]= c->quant_psnr[i];
3149
av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
3155
* memset(blocks, 0, sizeof(DCTELEM)*6*64)
3157
static void clear_blocks_c(DCTELEM *blocks)
3159
memset(blocks, 0, sizeof(DCTELEM)*6*64);
3162
static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
3164
for(i=0; i+7<w; i+=8){
3165
dst[i+0] += src[i+0];
3166
dst[i+1] += src[i+1];
3167
dst[i+2] += src[i+2];
3168
dst[i+3] += src[i+3];
3169
dst[i+4] += src[i+4];
3170
dst[i+5] += src[i+5];
3171
dst[i+6] += src[i+6];
3172
dst[i+7] += src[i+7];
3175
dst[i+0] += src[i+0];
3178
static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
3180
for(i=0; i+7<w; i+=8){
3181
dst[i+0] = src1[i+0]-src2[i+0];
3182
dst[i+1] = src1[i+1]-src2[i+1];
3183
dst[i+2] = src1[i+2]-src2[i+2];
3184
dst[i+3] = src1[i+3]-src2[i+3];
3185
dst[i+4] = src1[i+4]-src2[i+4];
3186
dst[i+5] = src1[i+5]-src2[i+5];
3187
dst[i+6] = src1[i+6]-src2[i+6];
3188
dst[i+7] = src1[i+7]-src2[i+7];
3191
dst[i+0] = src1[i+0]-src2[i+0];
3194
static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
3202
const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
3212
#define BUTTERFLY2(o1,o2,i1,i2) \
3216
#define BUTTERFLY1(x,y) \
3225
#define BUTTERFLYA(x,y) (ABS((x)+(y)) + ABS((x)-(y)))
3227
static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
3235
//FIXME try pointer walks
3236
BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
3237
BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
3238
BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
3239
BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
3241
BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
3242
BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
3243
BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
3244
BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
3246
BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
3247
BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
3248
BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
3249
BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
3253
BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
3254
BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
3255
BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
3256
BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
3258
BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
3259
BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
3260
BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
3261
BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
3264
BUTTERFLYA(temp[8*0+i], temp[8*4+i])
3265
+BUTTERFLYA(temp[8*1+i], temp[8*5+i])
3266
+BUTTERFLYA(temp[8*2+i], temp[8*6+i])
3267
+BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
3273
printf("MAX:%d\n", maxi);
3279
static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
3287
//FIXME try pointer walks
3288
BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
3289
BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
3290
BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
3291
BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
3293
BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
3294
BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
3295
BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
3296
BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
3298
BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
3299
BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
3300
BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
3301
BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
3305
BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
3306
BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
3307
BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
3308
BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
3310
BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
3311
BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
3312
BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
3313
BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
3316
BUTTERFLYA(temp[8*0+i], temp[8*4+i])
3317
+BUTTERFLYA(temp[8*1+i], temp[8*5+i])
3318
+BUTTERFLYA(temp[8*2+i], temp[8*6+i])
3319
+BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
3322
sum -= ABS(temp[8*0] + temp[8*4]); // -mean
3327
static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3328
MpegEncContext * const s= (MpegEncContext *)c;
3329
uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
3330
DCTELEM * const temp= (DCTELEM*)aligned_temp;
3335
s->dsp.diff_pixels(temp, src1, src2, stride);
3344
static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3345
MpegEncContext * const s= (MpegEncContext *)c;
3346
uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
3347
DCTELEM * const temp= (DCTELEM*)aligned_temp;
3352
s->dsp.diff_pixels(temp, src1, src2, stride);
3356
sum= FFMAX(sum, ABS(temp[i]));
3361
void simple_idct(DCTELEM *block); //FIXME
3363
static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3364
MpegEncContext * const s= (MpegEncContext *)c;
3365
uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8];
3366
DCTELEM * const temp= (DCTELEM*)aligned_temp;
3367
DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
3373
s->dsp.diff_pixels(temp, src1, src2, stride);
3375
memcpy(bak, temp, 64*sizeof(DCTELEM));
3377
s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
3378
s->dct_unquantize_inter(s, temp, 0, s->qscale);
3379
simple_idct(temp); //FIXME
3382
sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
3387
static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3388
MpegEncContext * const s= (MpegEncContext *)c;
3389
const uint8_t *scantable= s->intra_scantable.permutated;
3390
uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
3391
uint64_t __align8 aligned_bak[stride];
3392
DCTELEM * const temp= (DCTELEM*)aligned_temp;
3393
uint8_t * const bak= (uint8_t*)aligned_bak;
3394
int i, last, run, bits, level, distoration, start_i;
3395
const int esc_length= s->ac_esc_length;
3397
uint8_t * last_length;
3402
((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0];
3403
((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1];
3406
s->dsp.diff_pixels(temp, src1, src2, stride);
3408
s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
3414
length = s->intra_ac_vlc_length;
3415
last_length= s->intra_ac_vlc_last_length;
3416
bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3419
length = s->inter_ac_vlc_length;
3420
last_length= s->inter_ac_vlc_last_length;
3425
for(i=start_i; i<last; i++){
3426
int j= scantable[i];
3431
if((level&(~127)) == 0){
3432
bits+= length[UNI_AC_ENC_INDEX(run, level)];
3441
level= temp[i] + 64;
3445
if((level&(~127)) == 0){
3446
bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
3454
s->dct_unquantize_intra(s, temp, 0, s->qscale);
3456
s->dct_unquantize_inter(s, temp, 0, s->qscale);
3459
s->dsp.idct_add(bak, stride, temp);
3461
distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8);
3463
return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);
3466
static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
3467
MpegEncContext * const s= (MpegEncContext *)c;
3468
const uint8_t *scantable= s->intra_scantable.permutated;
3469
uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
3470
DCTELEM * const temp= (DCTELEM*)aligned_temp;
3471
int i, last, run, bits, level, start_i;
3472
const int esc_length= s->ac_esc_length;
3474
uint8_t * last_length;
3478
s->dsp.diff_pixels(temp, src1, src2, stride);
3480
s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
3486
length = s->intra_ac_vlc_length;
3487
last_length= s->intra_ac_vlc_last_length;
3488
bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
3491
length = s->inter_ac_vlc_length;
3492
last_length= s->inter_ac_vlc_last_length;
3497
for(i=start_i; i<last; i++){
3498
int j= scantable[i];
3503
if((level&(~127)) == 0){
3504
bits+= length[UNI_AC_ENC_INDEX(run, level)];
3513
level= temp[i] + 64;
3517
if((level&(~127)) == 0){
3518
bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
3526
static int vsad_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
3531
for(x=0; x<16; x+=4){
3532
score+= ABS(s[x ] - s[x +stride]) + ABS(s[x+1] - s[x+1+stride])
3533
+ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
3541
static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
3546
for(x=0; x<16; x++){
3547
score+= ABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
3556
#define SQ(a) ((a)*(a))
3557
static int vsse_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
3562
for(x=0; x<16; x+=4){
3563
score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride])
3564
+SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
3572
static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
3577
for(x=0; x<16; x++){
3578
score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
3587
WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
3588
WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
3589
WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
3590
WARPER8_16_SQ(dct_max8x8_c, dct_max16_c)
3591
WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
3592
WARPER8_16_SQ(rd8x8_c, rd16_c)
3593
WARPER8_16_SQ(bit8x8_c, bit16_c)
3595
/* XXX: those functions should be suppressed ASAP when all IDCTs are
3597
static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
3600
put_pixels_clamped_c(block, dest, line_size);
3602
static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
3605
add_pixels_clamped_c(block, dest, line_size);
3608
static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
3611
put_pixels_clamped4_c(block, dest, line_size);
3613
static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
3616
add_pixels_clamped4_c(block, dest, line_size);
3619
static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
3622
put_pixels_clamped2_c(block, dest, line_size);
3624
static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
3627
add_pixels_clamped2_c(block, dest, line_size);
3630
static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
3632
uint8_t *cm = cropTbl + MAX_NEG_CROP;
3634
dest[0] = cm[(block[0] + 4)>>3];
3636
static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
3638
uint8_t *cm = cropTbl + MAX_NEG_CROP;
3640
dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
3643
/* init static data */
3644
void dsputil_static_init(void)
3648
for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
3649
for(i=0;i<MAX_NEG_CROP;i++) {
3651
cropTbl[i + MAX_NEG_CROP + 256] = 255;
3654
for(i=0;i<512;i++) {
3655
squareTbl[i] = (i - 256) * (i - 256);
3658
for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
3662
void dsputil_init(DSPContext* c, AVCodecContext *avctx)
3666
#ifdef CONFIG_ENCODERS
3667
if(avctx->dct_algo==FF_DCT_FASTINT) {
3668
c->fdct = fdct_ifast;
3669
c->fdct248 = fdct_ifast248;
3671
else if(avctx->dct_algo==FF_DCT_FAAN) {
3672
c->fdct = ff_faandct;
3673
c->fdct248 = ff_faandct248;
3676
c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
3677
c->fdct248 = ff_fdct248_islow;
3679
#endif //CONFIG_ENCODERS
3681
if(avctx->lowres==1){
3682
if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO){
3683
c->idct_put= ff_jref_idct4_put;
3684
c->idct_add= ff_jref_idct4_add;
3686
c->idct_put= ff_h264_lowres_idct_put_c;
3687
c->idct_add= ff_h264_lowres_idct_add_c;
3689
c->idct = j_rev_dct4;
3690
c->idct_permutation_type= FF_NO_IDCT_PERM;
3691
}else if(avctx->lowres==2){
3692
c->idct_put= ff_jref_idct2_put;
3693
c->idct_add= ff_jref_idct2_add;
3694
c->idct = j_rev_dct2;
3695
c->idct_permutation_type= FF_NO_IDCT_PERM;
3696
}else if(avctx->lowres==3){
3697
c->idct_put= ff_jref_idct1_put;
3698
c->idct_add= ff_jref_idct1_add;
3699
c->idct = j_rev_dct1;
3700
c->idct_permutation_type= FF_NO_IDCT_PERM;
3702
if(avctx->idct_algo==FF_IDCT_INT){
3703
c->idct_put= ff_jref_idct_put;
3704
c->idct_add= ff_jref_idct_add;
3705
c->idct = j_rev_dct;
3706
c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
3707
}else if(avctx->idct_algo==FF_IDCT_VP3){
3708
c->idct_put= ff_vp3_idct_put_c;
3709
c->idct_add= ff_vp3_idct_add_c;
3710
c->idct = ff_vp3_idct_c;
3711
c->idct_permutation_type= FF_NO_IDCT_PERM;
3712
}else{ //accurate/default
3713
c->idct_put= simple_idct_put;
3714
c->idct_add= simple_idct_add;
3715
c->idct = simple_idct;
3716
c->idct_permutation_type= FF_NO_IDCT_PERM;
3720
c->h264_idct_add= ff_h264_idct_add_c;
3721
c->h264_idct8_add= ff_h264_idct8_add_c;
3723
c->get_pixels = get_pixels_c;
3724
c->diff_pixels = diff_pixels_c;
3725
c->put_pixels_clamped = put_pixels_clamped_c;
3726
c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
3727
c->add_pixels_clamped = add_pixels_clamped_c;
3728
c->add_pixels8 = add_pixels8_c;
3729
c->add_pixels4 = add_pixels4_c;
3732
c->clear_blocks = clear_blocks_c;
3733
c->pix_sum = pix_sum_c;
3734
c->pix_norm1 = pix_norm1_c;
3736
/* TODO [0] 16 [1] 8 */
3737
c->pix_abs[0][0] = pix_abs16_c;
3738
c->pix_abs[0][1] = pix_abs16_x2_c;
3739
c->pix_abs[0][2] = pix_abs16_y2_c;
3740
c->pix_abs[0][3] = pix_abs16_xy2_c;
3741
c->pix_abs[1][0] = pix_abs8_c;
3742
c->pix_abs[1][1] = pix_abs8_x2_c;
3743
c->pix_abs[1][2] = pix_abs8_y2_c;
3744
c->pix_abs[1][3] = pix_abs8_xy2_c;
3746
#define dspfunc(PFX, IDX, NUM) \
3747
c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## NUM ## _c; \
3748
c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## NUM ## _x2_c; \
3749
c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## NUM ## _y2_c; \
3750
c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## NUM ## _xy2_c
3752
dspfunc(put, 0, 16);
3753
dspfunc(put_no_rnd, 0, 16);
3755
dspfunc(put_no_rnd, 1, 8);
3759
dspfunc(avg, 0, 16);
3760
dspfunc(avg_no_rnd, 0, 16);
3762
dspfunc(avg_no_rnd, 1, 8);
3767
c->put_no_rnd_pixels_l2[0]= put_no_rnd_pixels16_l2_c;
3768
c->put_no_rnd_pixels_l2[1]= put_no_rnd_pixels8_l2_c;
3770
c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
3771
c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
3772
c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
3773
c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
3774
c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
3775
c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
3776
c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
3777
c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
3778
c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
3780
c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
3781
c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
3782
c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
3783
c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
3784
c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
3785
c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
3786
c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
3787
c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
3788
c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
3790
#define dspfunc(PFX, IDX, NUM) \
3791
c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
3792
c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
3793
c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
3794
c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
3795
c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
3796
c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
3797
c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
3798
c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
3799
c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
3800
c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
3801
c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
3802
c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
3803
c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
3804
c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
3805
c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
3806
c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
3808
dspfunc(put_qpel, 0, 16);
3809
dspfunc(put_no_rnd_qpel, 0, 16);
3811
dspfunc(avg_qpel, 0, 16);
3812
/* dspfunc(avg_no_rnd_qpel, 0, 16); */
3814
dspfunc(put_qpel, 1, 8);
3815
dspfunc(put_no_rnd_qpel, 1, 8);
3817
dspfunc(avg_qpel, 1, 8);
3818
/* dspfunc(avg_no_rnd_qpel, 1, 8); */
3820
dspfunc(put_h264_qpel, 0, 16);
3821
dspfunc(put_h264_qpel, 1, 8);
3822
dspfunc(put_h264_qpel, 2, 4);
3823
dspfunc(avg_h264_qpel, 0, 16);
3824
dspfunc(avg_h264_qpel, 1, 8);
3825
dspfunc(avg_h264_qpel, 2, 4);
3828
c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
3829
c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
3830
c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
3831
c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
3832
c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
3833
c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
3835
c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c;
3836
c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c;
3837
c->weight_h264_pixels_tab[2]= weight_h264_pixels8x16_c;
3838
c->weight_h264_pixels_tab[3]= weight_h264_pixels8x8_c;
3839
c->weight_h264_pixels_tab[4]= weight_h264_pixels8x4_c;
3840
c->weight_h264_pixels_tab[5]= weight_h264_pixels4x8_c;
3841
c->weight_h264_pixels_tab[6]= weight_h264_pixels4x4_c;
3842
c->weight_h264_pixels_tab[7]= weight_h264_pixels4x2_c;
3843
c->weight_h264_pixels_tab[8]= weight_h264_pixels2x4_c;
3844
c->weight_h264_pixels_tab[9]= weight_h264_pixels2x2_c;
3845
c->biweight_h264_pixels_tab[0]= biweight_h264_pixels16x16_c;
3846
c->biweight_h264_pixels_tab[1]= biweight_h264_pixels16x8_c;
3847
c->biweight_h264_pixels_tab[2]= biweight_h264_pixels8x16_c;
3848
c->biweight_h264_pixels_tab[3]= biweight_h264_pixels8x8_c;
3849
c->biweight_h264_pixels_tab[4]= biweight_h264_pixels8x4_c;
3850
c->biweight_h264_pixels_tab[5]= biweight_h264_pixels4x8_c;
3851
c->biweight_h264_pixels_tab[6]= biweight_h264_pixels4x4_c;
3852
c->biweight_h264_pixels_tab[7]= biweight_h264_pixels4x2_c;
3853
c->biweight_h264_pixels_tab[8]= biweight_h264_pixels2x4_c;
3854
c->biweight_h264_pixels_tab[9]= biweight_h264_pixels2x2_c;
3856
c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
3857
c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
3858
c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
3859
c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
3860
c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
3861
c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
3862
c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
3863
c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
3865
#define SET_CMP_FUNC(name) \
3866
c->name[0]= name ## 16_c;\
3867
c->name[1]= name ## 8x8_c;
3869
SET_CMP_FUNC(hadamard8_diff)
3870
c->hadamard8_diff[4]= hadamard8_intra16_c;
3871
SET_CMP_FUNC(dct_sad)
3872
SET_CMP_FUNC(dct_max)
3873
c->sad[0]= pix_abs16_c;
3874
c->sad[1]= pix_abs8_c;
3878
SET_CMP_FUNC(quant_psnr)
3881
c->vsad[0]= vsad16_c;
3882
c->vsad[4]= vsad_intra16_c;
3883
c->vsse[0]= vsse16_c;
3884
c->vsse[4]= vsse_intra16_c;
3885
c->nsse[0]= nsse16_c;
3886
c->nsse[1]= nsse8_c;
3887
c->w53[0]= w53_16_c;
3889
c->w97[0]= w97_16_c;
3892
c->add_bytes= add_bytes_c;
3893
c->diff_bytes= diff_bytes_c;
3894
c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
3895
c->bswap_buf= bswap_buf;
3897
c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c;
3898
c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
3899
c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
3900
c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
3901
c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
3902
c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
3904
c->h263_h_loop_filter= h263_h_loop_filter_c;
3905
c->h263_v_loop_filter= h263_v_loop_filter_c;
3907
c->h261_loop_filter= h261_loop_filter_c;
3909
c->try_8x8basis= try_8x8basis_c;
3910
c->add_8x8basis= add_8x8basis_c;
3913
dsputil_init_mmx(c, avctx);
3916
dsputil_init_armv4l(c, avctx);
3919
dsputil_init_mlib(c, avctx);
3922
dsputil_init_vis(c,avctx);
3925
dsputil_init_alpha(c, avctx);
3928
dsputil_init_ppc(c, avctx);
3931
dsputil_init_mmi(c, avctx);
3934
dsputil_init_sh4(c,avctx);
3937
switch(c->idct_permutation_type){
3938
case FF_NO_IDCT_PERM:
3940
c->idct_permutation[i]= i;
3942
case FF_LIBMPEG2_IDCT_PERM:
3944
c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
3946
case FF_SIMPLE_IDCT_PERM:
3948
c->idct_permutation[i]= simple_mmx_permutation[i];
3950
case FF_TRANSPOSE_IDCT_PERM:
3952
c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
3954
case FF_PARTTRANS_IDCT_PERM:
3956
c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
3959
av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");