40
42
#define xC6S2 25080
41
43
#define xC7S1 12785
43
#define M(a,b) (((a) * (b))>>16)
45
#define M(a, b) (((a) * (b)) >> 16)
45
static av_always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
47
static av_always_inline void idct(uint8_t *dst, int stride,
48
int16_t *input, int type)
47
50
int16_t *ip = input;
54
57
/* Inverse DCT on the rows now */
55
58
for (i = 0; i < 8; i++) {
56
59
/* Check for non-zero values */
57
if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
58
ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
60
if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
61
ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8]) {
59
62
A = M(xC1S7, ip[1 * 8]) + M(xC7S1, ip[7 * 8]);
60
63
B = M(xC7S1, ip[1 * 8]) - M(xC1S7, ip[7 * 8]);
61
64
C = M(xC3S5, ip[3 * 8]) + M(xC5S3, ip[5 * 8]);
104
for ( i = 0; i < 8; i++) {
107
for (i = 0; i < 8; i++) {
105
108
/* Check for non-zero values (bitwise or faster than ||) */
106
if ( ip[1] | ip[2] | ip[3] |
107
ip[4] | ip[5] | ip[6] | ip[7] ) {
109
if (ip[1] | ip[2] | ip[3] |
110
ip[4] | ip[5] | ip[6] | ip[7]) {
109
111
A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]);
110
112
B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]);
111
113
C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]);
140
142
/* Final sequence of operations over-write original inputs. */
142
dst[0*stride] = av_clip_uint8((Gd + Cd ) >> 4);
143
dst[7*stride] = av_clip_uint8((Gd - Cd ) >> 4);
145
dst[1*stride] = av_clip_uint8((Add + Hd ) >> 4);
146
dst[2*stride] = av_clip_uint8((Add - Hd ) >> 4);
148
dst[3*stride] = av_clip_uint8((Ed + Dd ) >> 4);
149
dst[4*stride] = av_clip_uint8((Ed - Dd ) >> 4);
151
dst[5*stride] = av_clip_uint8((Fd + Bdd ) >> 4);
152
dst[6*stride] = av_clip_uint8((Fd - Bdd ) >> 4);
154
dst[0*stride] = av_clip_uint8(dst[0*stride] + ((Gd + Cd ) >> 4));
155
dst[7*stride] = av_clip_uint8(dst[7*stride] + ((Gd - Cd ) >> 4));
157
dst[1*stride] = av_clip_uint8(dst[1*stride] + ((Add + Hd ) >> 4));
158
dst[2*stride] = av_clip_uint8(dst[2*stride] + ((Add - Hd ) >> 4));
160
dst[3*stride] = av_clip_uint8(dst[3*stride] + ((Ed + Dd ) >> 4));
161
dst[4*stride] = av_clip_uint8(dst[4*stride] + ((Ed - Dd ) >> 4));
163
dst[5*stride] = av_clip_uint8(dst[5*stride] + ((Fd + Bdd ) >> 4));
164
dst[6*stride] = av_clip_uint8(dst[6*stride] + ((Fd - Bdd ) >> 4));
144
dst[0 * stride] = av_clip_uint8((Gd + Cd) >> 4);
145
dst[7 * stride] = av_clip_uint8((Gd - Cd) >> 4);
147
dst[1 * stride] = av_clip_uint8((Add + Hd) >> 4);
148
dst[2 * stride] = av_clip_uint8((Add - Hd) >> 4);
150
dst[3 * stride] = av_clip_uint8((Ed + Dd) >> 4);
151
dst[4 * stride] = av_clip_uint8((Ed - Dd) >> 4);
153
dst[5 * stride] = av_clip_uint8((Fd + Bdd) >> 4);
154
dst[6 * stride] = av_clip_uint8((Fd - Bdd) >> 4);
156
dst[0 * stride] = av_clip_uint8(dst[0 * stride] + ((Gd + Cd) >> 4));
157
dst[7 * stride] = av_clip_uint8(dst[7 * stride] + ((Gd - Cd) >> 4));
159
dst[1 * stride] = av_clip_uint8(dst[1 * stride] + ((Add + Hd) >> 4));
160
dst[2 * stride] = av_clip_uint8(dst[2 * stride] + ((Add - Hd) >> 4));
162
dst[3 * stride] = av_clip_uint8(dst[3 * stride] + ((Ed + Dd) >> 4));
163
dst[4 * stride] = av_clip_uint8(dst[4 * stride] + ((Ed - Dd) >> 4));
165
dst[5 * stride] = av_clip_uint8(dst[5 * stride] + ((Fd + Bdd) >> 4));
166
dst[6 * stride] = av_clip_uint8(dst[6 * stride] + ((Fd - Bdd) >> 4));
176
dst[7*stride]= av_clip_uint8(128 + ((xC4S4 * ip[0] + (IdctAdjustBeforeShift<<16))>>20));
179
int v= ((xC4S4 * ip[0] + (IdctAdjustBeforeShift<<16))>>20);
180
dst[0*stride] = av_clip_uint8(dst[0*stride] + v);
181
dst[1*stride] = av_clip_uint8(dst[1*stride] + v);
182
dst[2*stride] = av_clip_uint8(dst[2*stride] + v);
183
dst[3*stride] = av_clip_uint8(dst[3*stride] + v);
184
dst[4*stride] = av_clip_uint8(dst[4*stride] + v);
185
dst[5*stride] = av_clip_uint8(dst[5*stride] + v);
186
dst[6*stride] = av_clip_uint8(dst[6*stride] + v);
187
dst[7*stride] = av_clip_uint8(dst[7*stride] + v);
177
dst[7*stride] = av_clip_uint8(128 + ((xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20));
180
int v = (xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20;
181
dst[0 * stride] = av_clip_uint8(dst[0 * stride] + v);
182
dst[1 * stride] = av_clip_uint8(dst[1 * stride] + v);
183
dst[2 * stride] = av_clip_uint8(dst[2 * stride] + v);
184
dst[3 * stride] = av_clip_uint8(dst[3 * stride] + v);
185
dst[4 * stride] = av_clip_uint8(dst[4 * stride] + v);
186
dst[5 * stride] = av_clip_uint8(dst[5 * stride] + v);
187
dst[6 * stride] = av_clip_uint8(dst[6 * stride] + v);
188
dst[7 * stride] = av_clip_uint8(dst[7 * stride] + v);
197
static void vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size,
198
int16_t *block/*align 16*/)
198
static void vp3_idct_put_c(uint8_t *dest /* align 8 */, int line_size,
199
int16_t *block /* align 16 */)
200
201
idct(dest, line_size, block, 1);
201
202
memset(block, 0, sizeof(*block) * 64);
204
static void vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size,
205
int16_t *block/*align 16*/)
205
static void vp3_idct_add_c(uint8_t *dest /* align 8 */, int line_size,
206
int16_t *block /* align 16 */)
207
208
idct(dest, line_size, block, 2);
208
209
memset(block, 0, sizeof(*block) * 64);
211
static void vp3_idct_dc_add_c(uint8_t *dest/*align 8*/, int line_size,
212
int16_t *block/*align 16*/)
212
static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, int line_size,
213
int16_t *block /* align 16 */)
214
215
int i, dc = (block[0] + 15) >> 5;
216
for(i = 0; i < 8; i++){
217
for (i = 0; i < 8; i++) {
217
218
dest[0] = av_clip_uint8(dest[0] + dc);
218
219
dest[1] = av_clip_uint8(dest[1] + dc);
219
220
dest[2] = av_clip_uint8(dest[2] + dc);
233
234
unsigned char *end;
234
235
int filter_value;
235
const int nstride= -stride;
236
const int nstride = -stride;
237
for (end= first_pixel + 8; first_pixel < end; first_pixel++) {
239
(first_pixel[2 * nstride] - first_pixel[ stride])
240
+3*(first_pixel[0 ] - first_pixel[nstride]);
238
for (end = first_pixel + 8; first_pixel < end; first_pixel++) {
239
filter_value = (first_pixel[2 * nstride] - first_pixel[stride]) +
240
(first_pixel[0] - first_pixel[nstride]) * 3;
241
241
filter_value = bounding_values[(filter_value + 4) >> 3];
242
243
first_pixel[nstride] = av_clip_uint8(first_pixel[nstride] + filter_value);
243
first_pixel[0] = av_clip_uint8(first_pixel[0] - filter_value);
244
first_pixel[0] = av_clip_uint8(first_pixel[0] - filter_value);
250
251
unsigned char *end;
251
252
int filter_value;
253
for (end= first_pixel + 8*stride; first_pixel != end; first_pixel += stride) {
255
(first_pixel[-2] - first_pixel[ 1])
256
+3*(first_pixel[ 0] - first_pixel[-1]);
254
for (end = first_pixel + 8 * stride; first_pixel != end; first_pixel += stride) {
255
filter_value = (first_pixel[-2] - first_pixel[1]) +
256
(first_pixel[ 0] - first_pixel[-1]) * 3;
257
257
filter_value = bounding_values[(filter_value + 4) >> 3];
258
259
first_pixel[-1] = av_clip_uint8(first_pixel[-1] + filter_value);
259
260
first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value);