4
4
* Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
6
* This library is free software; you can redistribute it and/or
6
* This file is part of FFmpeg.
8
* FFmpeg is free software; you can redistribute it and/or
7
9
* modify it under the terms of the GNU Lesser General Public
8
10
* License as published by the Free Software Foundation; either
9
* version 2 of the License, or (at your option) any later version.
11
* version 2.1 of the License, or (at your option) any later version.
11
* This library is distributed in the hope that it will be useful,
13
* FFmpeg is distributed in the hope that it will be useful,
12
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
16
* Lesser General Public License for more details.
16
18
* You should have received a copy of the GNU Lesser General Public
17
* License along with this library; if not, write to the Free Software
18
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
* License along with FFmpeg; if not, write to the Free Software
20
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
24
* @file simple_idct.c
27
29
based upon some outcommented c code from mpeg2dec (idct_mmx.c
28
written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
30
written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
30
32
#include "avcodec.h"
31
33
#include "dsputil.h"
76
78
static inline void idctRowCondDC (DCTELEM * row)
78
int a0, a1, a2, a3, b0, b1, b2, b3;
80
int a0, a1, a2, a3, b0, b1, b2, b3;
81
#ifdef HAVE_FAST_64BIT
87
#ifdef HAVE_FAST_64BIT
86
88
#ifdef WORDS_BIGENDIAN
87
89
#define ROW0_MASK 0xffff000000000000LL
89
91
#define ROW0_MASK 0xffffLL
91
93
if(sizeof(DCTELEM)==2){
92
if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) |
94
if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) |
93
95
((uint64_t *)row)[1]) == 0) {
94
96
temp = (row[0] << 3) & 0xffff;
95
97
temp += temp << 16;
144
146
MUL16(b3, W7, row[1]);
145
147
MAC16(b3, -W5, row[3]);
149
#ifdef HAVE_FAST_64BIT
148
150
temp = ((uint64_t*)row)[1];
150
152
temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3];
153
155
a0 += W4*row[4] + W6*row[6];
154
156
a1 += - W4*row[4] - W2*row[6];
155
157
a2 += - W4*row[4] + W2*row[6];
158
160
MAC16(b0, W5, row[5]);
159
161
MAC16(b0, W7, row[7]);
161
163
MAC16(b1, -W1, row[5]);
162
164
MAC16(b1, -W5, row[7]);
164
166
MAC16(b2, W7, row[5]);
165
167
MAC16(b2, W3, row[7]);
167
169
MAC16(b3, W3, row[5]);
168
170
MAC16(b3, -W1, row[7]);
171
row[0] = (a0 + b0) >> ROW_SHIFT;
172
row[7] = (a0 - b0) >> ROW_SHIFT;
173
row[1] = (a1 + b1) >> ROW_SHIFT;
174
row[6] = (a1 - b1) >> ROW_SHIFT;
175
row[2] = (a2 + b2) >> ROW_SHIFT;
176
row[5] = (a2 - b2) >> ROW_SHIFT;
177
row[3] = (a3 + b3) >> ROW_SHIFT;
178
row[4] = (a3 - b3) >> ROW_SHIFT;
173
row[0] = (a0 + b0) >> ROW_SHIFT;
174
row[7] = (a0 - b0) >> ROW_SHIFT;
175
row[1] = (a1 + b1) >> ROW_SHIFT;
176
row[6] = (a1 - b1) >> ROW_SHIFT;
177
row[2] = (a2 + b2) >> ROW_SHIFT;
178
row[5] = (a2 - b2) >> ROW_SHIFT;
179
row[3] = (a3 + b3) >> ROW_SHIFT;
180
row[4] = (a3 - b3) >> ROW_SHIFT;
181
static inline void idctSparseColPut (uint8_t *dest, int line_size,
183
static inline void idctSparseColPut (uint8_t *dest, int line_size,
184
int a0, a1, a2, a3, b0, b1, b2, b3;
185
uint8_t *cm = cropTbl + MAX_NEG_CROP;
186
int a0, a1, a2, a3, b0, b1, b2, b3;
187
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
187
189
/* XXX: I did that only to give same values as previous code */
188
a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
190
a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
193
195
a0 += + W2*col[8*2];
194
196
a1 += + W6*col[8*2];
205
207
MAC16(b2, - W1, col[8*3]);
206
208
MAC16(b3, - W5, col[8*3]);
209
211
a0 += + W4*col[8*4];
210
212
a1 += - W4*col[8*4];
211
213
a2 += - W4*col[8*4];
212
214
a3 += + W4*col[8*4];
216
218
MAC16(b0, + W5, col[8*5]);
217
219
MAC16(b1, - W1, col[8*5]);
218
220
MAC16(b2, + W7, col[8*5]);
219
221
MAC16(b3, + W3, col[8*5]);
223
225
a0 += + W6*col[8*6];
224
226
a1 += - W2*col[8*6];
225
227
a2 += + W2*col[8*6];
226
228
a3 += - W6*col[8*6];
230
232
MAC16(b0, + W7, col[8*7]);
231
233
MAC16(b1, - W5, col[8*7]);
232
234
MAC16(b2, + W3, col[8*7]);
233
235
MAC16(b3, - W1, col[8*7]);
236
238
dest[0] = cm[(a0 + b0) >> COL_SHIFT];
237
239
dest += line_size;
250
252
dest[0] = cm[(a0 - b0) >> COL_SHIFT];
253
static inline void idctSparseColAdd (uint8_t *dest, int line_size,
255
static inline void idctSparseColAdd (uint8_t *dest, int line_size,
256
int a0, a1, a2, a3, b0, b1, b2, b3;
257
uint8_t *cm = cropTbl + MAX_NEG_CROP;
258
int a0, a1, a2, a3, b0, b1, b2, b3;
259
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
259
261
/* XXX: I did that only to give same values as previous code */
260
a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
262
a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
265
267
a0 += + W2*col[8*2];
266
268
a1 += + W6*col[8*2];
277
279
MAC16(b2, - W1, col[8*3]);
278
280
MAC16(b3, - W5, col[8*3]);
281
283
a0 += + W4*col[8*4];
282
284
a1 += - W4*col[8*4];
283
285
a2 += - W4*col[8*4];
284
286
a3 += + W4*col[8*4];
288
290
MAC16(b0, + W5, col[8*5]);
289
291
MAC16(b1, - W1, col[8*5]);
290
292
MAC16(b2, + W7, col[8*5]);
291
293
MAC16(b3, + W3, col[8*5]);
295
297
a0 += + W6*col[8*6];
296
298
a1 += - W2*col[8*6];
297
299
a2 += + W2*col[8*6];
298
300
a3 += - W6*col[8*6];
302
304
MAC16(b0, + W7, col[8*7]);
303
305
MAC16(b1, - W5, col[8*7]);
304
306
MAC16(b2, + W3, col[8*7]);
305
307
MAC16(b3, - W1, col[8*7]);
308
310
dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)];
309
311
dest += line_size;
325
327
static inline void idctSparseCol (DCTELEM * col)
327
int a0, a1, a2, a3, b0, b1, b2, b3;
329
int a0, a1, a2, a3, b0, b1, b2, b3;
329
331
/* XXX: I did that only to give same values as previous code */
330
a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
332
a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
335
337
a0 += + W2*col[8*2];
336
338
a1 += + W6*col[8*2];
347
349
MAC16(b2, - W1, col[8*3]);
348
350
MAC16(b3, - W5, col[8*3]);
351
353
a0 += + W4*col[8*4];
352
354
a1 += - W4*col[8*4];
353
355
a2 += - W4*col[8*4];
354
356
a3 += + W4*col[8*4];
358
360
MAC16(b0, + W5, col[8*5]);
359
361
MAC16(b1, - W1, col[8*5]);
360
362
MAC16(b2, + W7, col[8*5]);
361
363
MAC16(b3, + W3, col[8*5]);
365
367
a0 += + W6*col[8*6];
366
368
a1 += - W2*col[8*6];
367
369
a2 += + W2*col[8*6];
368
370
a3 += - W6*col[8*6];
372
374
MAC16(b0, + W7, col[8*7]);
373
375
MAC16(b1, - W5, col[8*7]);
374
376
MAC16(b2, + W3, col[8*7]);
375
377
MAC16(b3, - W1, col[8*7]);
378
380
col[0 ] = ((a0 + b0) >> COL_SHIFT);
379
381
col[8 ] = ((a1 + b1) >> COL_SHIFT);
429
431
static inline void idct4col(uint8_t *dest, int line_size, const DCTELEM *col)
431
433
int c0, c1, c2, c3, a0, a1, a2, a3;
432
const uint8_t *cm = cropTbl + MAX_NEG_CROP;
434
const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
509
511
static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col)
511
513
int c0, c1, c2, c3, a0, a1, a2, a3;
512
const uint8_t *cm = cropTbl + MAX_NEG_CROP;
514
const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
537
539
static inline void idct4row(DCTELEM *row)
539
541
int c0, c1, c2, c3, a0, a1, a2, a3;
540
//const uint8_t *cm = cropTbl + MAX_NEG_CROP;
542
//const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;