29
based upon some outcommented c code from mpeg2dec (idct_mmx.c
30
written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
28
#include "libavutil/intreadwrite.h"
32
29
#include "avcodec.h"
33
30
#include "dsputil.h"
34
31
#include "mathops.h"
35
32
#include "simple_idct.h"
38
#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
39
#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
40
#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
41
#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
42
#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
43
#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
44
#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
48
#define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
49
#define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
50
#define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
51
#define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
52
#define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
53
#define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
54
#define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
56
#define COL_SHIFT 20 // 6
59
static inline void idctRowCondDC (DCTELEM * row)
61
int a0, a1, a2, a3, b0, b1, b2, b3;
70
#define ROW0_MASK 0xffff000000000000LL
72
#define ROW0_MASK 0xffffLL
74
if(sizeof(DCTELEM)==2){
75
if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) |
76
((uint64_t *)row)[1]) == 0) {
77
temp = (row[0] << 3) & 0xffff;
80
((uint64_t *)row)[0] = temp;
81
((uint64_t *)row)[1] = temp;
85
if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) {
86
row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3;
91
if(sizeof(DCTELEM)==2){
92
if (!(((uint32_t*)row)[1] |
96
temp = (row[0] << 3) & 0xffff;
98
((uint32_t*)row)[0]=((uint32_t*)row)[1] =
99
((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp;
103
if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) {
104
row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3;
110
a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
115
/* no need to optimize : gcc does it */
121
b0 = MUL16(W1, row[1]);
122
MAC16(b0, W3, row[3]);
123
b1 = MUL16(W3, row[1]);
124
MAC16(b1, -W7, row[3]);
125
b2 = MUL16(W5, row[1]);
126
MAC16(b2, -W1, row[3]);
127
b3 = MUL16(W7, row[1]);
128
MAC16(b3, -W5, row[3]);
131
temp = ((uint64_t*)row)[1];
133
temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3];
136
a0 += W4*row[4] + W6*row[6];
137
a1 += - W4*row[4] - W2*row[6];
138
a2 += - W4*row[4] + W2*row[6];
139
a3 += W4*row[4] - W6*row[6];
141
MAC16(b0, W5, row[5]);
142
MAC16(b0, W7, row[7]);
144
MAC16(b1, -W1, row[5]);
145
MAC16(b1, -W5, row[7]);
147
MAC16(b2, W7, row[5]);
148
MAC16(b2, W3, row[7]);
150
MAC16(b3, W3, row[5]);
151
MAC16(b3, -W1, row[7]);
154
row[0] = (a0 + b0) >> ROW_SHIFT;
155
row[7] = (a0 - b0) >> ROW_SHIFT;
156
row[1] = (a1 + b1) >> ROW_SHIFT;
157
row[6] = (a1 - b1) >> ROW_SHIFT;
158
row[2] = (a2 + b2) >> ROW_SHIFT;
159
row[5] = (a2 - b2) >> ROW_SHIFT;
160
row[3] = (a3 + b3) >> ROW_SHIFT;
161
row[4] = (a3 - b3) >> ROW_SHIFT;
164
static inline void idctSparseColPut (uint8_t *dest, int line_size,
167
int a0, a1, a2, a3, b0, b1, b2, b3;
168
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
170
/* XXX: I did that only to give same values as previous code */
171
a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
181
b0 = MUL16(W1, col[8*1]);
182
b1 = MUL16(W3, col[8*1]);
183
b2 = MUL16(W5, col[8*1]);
184
b3 = MUL16(W7, col[8*1]);
186
MAC16(b0, + W3, col[8*3]);
187
MAC16(b1, - W7, col[8*3]);
188
MAC16(b2, - W1, col[8*3]);
189
MAC16(b3, - W5, col[8*3]);
199
MAC16(b0, + W5, col[8*5]);
200
MAC16(b1, - W1, col[8*5]);
201
MAC16(b2, + W7, col[8*5]);
202
MAC16(b3, + W3, col[8*5]);
213
MAC16(b0, + W7, col[8*7]);
214
MAC16(b1, - W5, col[8*7]);
215
MAC16(b2, + W3, col[8*7]);
216
MAC16(b3, - W1, col[8*7]);
219
dest[0] = cm[(a0 + b0) >> COL_SHIFT];
221
dest[0] = cm[(a1 + b1) >> COL_SHIFT];
223
dest[0] = cm[(a2 + b2) >> COL_SHIFT];
225
dest[0] = cm[(a3 + b3) >> COL_SHIFT];
227
dest[0] = cm[(a3 - b3) >> COL_SHIFT];
229
dest[0] = cm[(a2 - b2) >> COL_SHIFT];
231
dest[0] = cm[(a1 - b1) >> COL_SHIFT];
233
dest[0] = cm[(a0 - b0) >> COL_SHIFT];
236
static inline void idctSparseColAdd (uint8_t *dest, int line_size,
239
int a0, a1, a2, a3, b0, b1, b2, b3;
240
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
242
/* XXX: I did that only to give same values as previous code */
243
a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
253
b0 = MUL16(W1, col[8*1]);
254
b1 = MUL16(W3, col[8*1]);
255
b2 = MUL16(W5, col[8*1]);
256
b3 = MUL16(W7, col[8*1]);
258
MAC16(b0, + W3, col[8*3]);
259
MAC16(b1, - W7, col[8*3]);
260
MAC16(b2, - W1, col[8*3]);
261
MAC16(b3, - W5, col[8*3]);
271
MAC16(b0, + W5, col[8*5]);
272
MAC16(b1, - W1, col[8*5]);
273
MAC16(b2, + W7, col[8*5]);
274
MAC16(b3, + W3, col[8*5]);
285
MAC16(b0, + W7, col[8*7]);
286
MAC16(b1, - W5, col[8*7]);
287
MAC16(b2, + W3, col[8*7]);
288
MAC16(b3, - W1, col[8*7]);
291
dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)];
293
dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)];
295
dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)];
297
dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)];
299
dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)];
301
dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)];
303
dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)];
305
dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)];
308
static inline void idctSparseCol (DCTELEM * col)
310
int a0, a1, a2, a3, b0, b1, b2, b3;
312
/* XXX: I did that only to give same values as previous code */
313
a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
323
b0 = MUL16(W1, col[8*1]);
324
b1 = MUL16(W3, col[8*1]);
325
b2 = MUL16(W5, col[8*1]);
326
b3 = MUL16(W7, col[8*1]);
328
MAC16(b0, + W3, col[8*3]);
329
MAC16(b1, - W7, col[8*3]);
330
MAC16(b2, - W1, col[8*3]);
331
MAC16(b3, - W5, col[8*3]);
341
MAC16(b0, + W5, col[8*5]);
342
MAC16(b1, - W1, col[8*5]);
343
MAC16(b2, + W7, col[8*5]);
344
MAC16(b3, + W3, col[8*5]);
355
MAC16(b0, + W7, col[8*7]);
356
MAC16(b1, - W5, col[8*7]);
357
MAC16(b2, + W3, col[8*7]);
358
MAC16(b3, - W1, col[8*7]);
361
col[0 ] = ((a0 + b0) >> COL_SHIFT);
362
col[8 ] = ((a1 + b1) >> COL_SHIFT);
363
col[16] = ((a2 + b2) >> COL_SHIFT);
364
col[24] = ((a3 + b3) >> COL_SHIFT);
365
col[32] = ((a3 - b3) >> COL_SHIFT);
366
col[40] = ((a2 - b2) >> COL_SHIFT);
367
col[48] = ((a1 - b1) >> COL_SHIFT);
368
col[56] = ((a0 - b0) >> COL_SHIFT);
371
void ff_simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
375
idctRowCondDC(block + i*8);
378
idctSparseColPut(dest + i, line_size, block + i);
381
void ff_simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
385
idctRowCondDC(block + i*8);
388
idctSparseColAdd(dest + i, line_size, block + i);
391
void ff_simple_idct(DCTELEM *block)
395
idctRowCondDC(block + i*8);
398
idctSparseCol(block + i);
35
#include "simple_idct_template.c"
39
#include "simple_idct_template.c"