4
4
* Copyright (c) 2001-2003 BERO <bero@geocities.co.jp>
6
* This library is free software; you can redistribute it and/or
6
* This file is part of FFmpeg.
8
* FFmpeg is free software; you can redistribute it and/or
7
9
* modify it under the terms of the GNU Lesser General Public
8
10
* License as published by the Free Software Foundation; either
9
* version 2 of the License, or (at your option) any later version.
11
* version 2.1 of the License, or (at your option) any later version.
11
* This library is distributed in the hope that it will be useful,
13
* FFmpeg is distributed in the hope that it will be useful,
12
14
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
15
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
16
* Lesser General Public License for more details.
16
18
* You should have received a copy of the GNU Lesser General Public
17
* License along with this library; if not, write to the Free Software
18
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
* License along with FFmpeg; if not, write to the Free Software
20
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
23
#include "../dsputil.h"
22
#define c1 1.38703984532214752434 /* sqrt(2)*cos(1*pi/16) */
23
#define c2 1.30656296487637657577 /* sqrt(2)*cos(2*pi/16) */
24
#define c3 1.17587560241935884520 /* sqrt(2)*cos(3*pi/16) */
25
#define c4 1.00000000000000000000 /* sqrt(2)*cos(4*pi/16) */
26
#define c5 0.78569495838710234903 /* sqrt(2)*cos(5*pi/16) */
27
#define c6 0.54119610014619712324 /* sqrt(2)*cos(6*pi/16) */
28
#define c7 0.27589937928294311353 /* sqrt(2)*cos(7*pi/16) */
30
const static float even_table[] __attribute__ ((aligned(8))) = {
37
const static float odd_table[] __attribute__ ((aligned(8))) = {
24
#define c1 1.38703984532214752434 /* sqrt(2)*cos(1*pi/16) */
25
#define c2 1.30656296487637657577 /* sqrt(2)*cos(2*pi/16) */
26
#define c3 1.17587560241935884520 /* sqrt(2)*cos(3*pi/16) */
27
#define c4 1.00000000000000000000 /* sqrt(2)*cos(4*pi/16) */
28
#define c5 0.78569495838710234903 /* sqrt(2)*cos(5*pi/16) */
29
#define c6 0.54119610014619712324 /* sqrt(2)*cos(6*pi/16) */
30
#define c7 0.27589937928294311353 /* sqrt(2)*cos(7*pi/16) */
32
static const float even_table[] __attribute__ ((aligned(8))) = {
39
static const float odd_table[] __attribute__ ((aligned(8))) = {
52
54
#if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
54
#define load_matrix(table) \
72
__asm__ volatile("ftrv xmtrx,fv0" \
73
: "=f"(fr0),"=f"(fr1),"=f"(fr2),"=f"(fr3) \
74
: "0"(fr0), "1"(fr1), "2"(fr2), "3"(fr3) );
77
register float fr0 __asm__("fr0"); \
78
register float fr1 __asm__("fr1"); \
79
register float fr2 __asm__("fr2"); \
80
register float fr3 __asm__("fr3")
56
#define load_matrix(table) \
74
__asm__ volatile("ftrv xmtrx,fv0" \
75
: "=f"(fr0),"=f"(fr1),"=f"(fr2),"=f"(fr3) \
76
: "0"(fr0), "1"(fr1), "2"(fr2), "3"(fr3) );
79
register float fr0 __asm__("fr0"); \
80
register float fr1 __asm__("fr1"); \
81
register float fr2 __asm__("fr2"); \
82
register float fr3 __asm__("fr3")
86
88
static void ftrv_(const float xf[],float fv[])
93
fv[0] = xf[0]*f0 + xf[4]*f1 + xf[ 8]*f2 + xf[12]*f3;
94
fv[1] = xf[1]*f0 + xf[5]*f1 + xf[ 9]*f2 + xf[13]*f3;
95
fv[2] = xf[2]*f0 + xf[6]*f1 + xf[10]*f2 + xf[14]*f3;
96
fv[3] = xf[3]*f0 + xf[7]*f1 + xf[11]*f2 + xf[15]*f3;
95
fv[0] = xf[0]*f0 + xf[4]*f1 + xf[ 8]*f2 + xf[12]*f3;
96
fv[1] = xf[1]*f0 + xf[5]*f1 + xf[ 9]*f2 + xf[13]*f3;
97
fv[2] = xf[2]*f0 + xf[6]*f1 + xf[10]*f2 + xf[14]*f3;
98
fv[3] = xf[3]*f0 + xf[7]*f1 + xf[11]*f2 + xf[15]*f3;
99
101
static void load_matrix_(float xf[],const float table[])
102
for(i=0;i<16;i++) xf[i]=table[i];
104
for(i=0;i<16;i++) xf[i]=table[i];
105
#define ftrv() ftrv_(xf,fv)
106
#define load_matrix(table) load_matrix_(xf,table)
107
#define ftrv() ftrv_(xf,fv)
108
#define load_matrix(table) load_matrix_(xf,table)
119
#define DESCALE(x,n) (x)*(1.0f/(1<<(n)))
121
#define DESCALE(x,n) (x)*(1.0f/(1<<(n)))
121
#define DESCALE(x,n) (((int)(x)+(1<<(n-1)))>>(n))
123
#define DESCALE(x,n) (((int)(x)+(1<<(n-1)))>>(n))
124
126
/* this code work worse on gcc cvs. 3.2.3 work fine */
130
132
void idct_sh4(DCTELEM *block)
135
float tblock[8*8],*fblock;
137
float tblock[8*8],*fblock;
138
140
#if defined(__SH4__)
139
#error "FIXME!! change to single float"
141
#error "FIXME!! change to single float"
145
load_matrix(even_table);
165
load_matrix(odd_table);
169
// ofs1 = sizeof(float)*1;
170
// ofs2 = sizeof(float)*2;
171
// ofs3 = sizeof(float)*3;
186
*--fblock = t0 - fr0;
187
*--fblock = t1 - fr1;
188
*--fblock = t2 - fr2;
189
*--fblock = t3 - fr3;
190
*--fblock = t3 + fr3;
191
*--fblock = t2 + fr2;
192
*--fblock = t1 + fr1;
193
*--fblock = t0 + fr0;
202
load_matrix(even_table);
204
ofs1 = sizeof(float)*2*8;
205
ofs2 = sizeof(float)*4*8;
206
ofs3 = sizeof(float)*6*8;
210
#define OA(fblock,ofs) *(float*)((char*)fblock + ofs)
214
fr1 = OA(fblock,ofs1);
215
fr2 = OA(fblock,ofs2);
216
fr3 = OA(fblock,ofs3);
219
OA(fblock,ofs1) = fr1;
220
OA(fblock,ofs2) = fr2;
221
OA(fblock,ofs3) = fr3;
226
load_matrix(odd_table);
231
t0 = OA(fblock, 0); /* [8*0] */
232
t1 = OA(fblock,ofs1); /* [8*2] */
233
t2 = OA(fblock,ofs2); /* [8*4] */
234
t3 = OA(fblock,ofs3); /* [8*6] */
236
fr0 = OA(fblock, 0); /* [8*1] */
237
fr1 = OA(fblock,ofs1); /* [8*3] */
238
fr2 = OA(fblock,ofs2); /* [8*5] */
239
fr3 = OA(fblock,ofs3); /* [8*7] */
242
block[8*0] = DESCALE(t0 + fr0,3);
243
block[8*7] = DESCALE(t0 - fr0,3);
244
block[8*1] = DESCALE(t1 + fr1,3);
245
block[8*6] = DESCALE(t1 - fr1,3);
246
block[8*2] = DESCALE(t2 + fr2,3);
247
block[8*5] = DESCALE(t2 - fr2,3);
248
block[8*3] = DESCALE(t3 + fr3,3);
249
block[8*4] = DESCALE(t3 - fr3,3);
147
load_matrix(even_table);
167
load_matrix(odd_table);
171
// ofs1 = sizeof(float)*1;
172
// ofs2 = sizeof(float)*2;
173
// ofs3 = sizeof(float)*3;
188
*--fblock = t0 - fr0;
189
*--fblock = t1 - fr1;
190
*--fblock = t2 - fr2;
191
*--fblock = t3 - fr3;
192
*--fblock = t3 + fr3;
193
*--fblock = t2 + fr2;
194
*--fblock = t1 + fr1;
195
*--fblock = t0 + fr0;
204
load_matrix(even_table);
206
ofs1 = sizeof(float)*2*8;
207
ofs2 = sizeof(float)*4*8;
208
ofs3 = sizeof(float)*6*8;
212
#define OA(fblock,ofs) *(float*)((char*)fblock + ofs)
216
fr1 = OA(fblock,ofs1);
217
fr2 = OA(fblock,ofs2);
218
fr3 = OA(fblock,ofs3);
221
OA(fblock,ofs1) = fr1;
222
OA(fblock,ofs2) = fr2;
223
OA(fblock,ofs3) = fr3;
228
load_matrix(odd_table);
233
t0 = OA(fblock, 0); /* [8*0] */
234
t1 = OA(fblock,ofs1); /* [8*2] */
235
t2 = OA(fblock,ofs2); /* [8*4] */
236
t3 = OA(fblock,ofs3); /* [8*6] */
238
fr0 = OA(fblock, 0); /* [8*1] */
239
fr1 = OA(fblock,ofs1); /* [8*3] */
240
fr2 = OA(fblock,ofs2); /* [8*5] */
241
fr3 = OA(fblock,ofs3); /* [8*7] */
244
block[8*0] = DESCALE(t0 + fr0,3);
245
block[8*7] = DESCALE(t0 - fr0,3);
246
block[8*1] = DESCALE(t1 + fr1,3);
247
block[8*6] = DESCALE(t1 - fr1,3);
248
block[8*2] = DESCALE(t2 + fr2,3);
249
block[8*5] = DESCALE(t2 - fr2,3);
250
block[8*3] = DESCALE(t3 + fr3,3);
251
block[8*4] = DESCALE(t3 - fr3,3);
253
255
#if defined(__SH4__)
254
#error "FIXME!! change to double"
256
#error "FIXME!! change to double"
258
260
void idct_sh4(DCTELEM *block)
263
float tblock[8*8],*fblock;
268
load_matrix(even_table);
288
load_matrix(odd_table);
304
fblock[0] = t0 + fr0;
305
fblock[7] = t0 - fr0;
306
fblock[1] = t1 + fr1;
307
fblock[6] = t1 - fr1;
308
fblock[2] = t2 + fr2;
309
fblock[5] = t2 - fr2;
310
fblock[3] = t3 + fr3;
311
fblock[4] = t3 - fr3;
320
load_matrix(even_table);
338
load_matrix(odd_table);
353
block[8*0] = DESCALE(t0 + fr0,3);
354
block[8*7] = DESCALE(t0 - fr0,3);
355
block[8*1] = DESCALE(t1 + fr1,3);
356
block[8*6] = DESCALE(t1 - fr1,3);
357
block[8*2] = DESCALE(t2 + fr2,3);
358
block[8*5] = DESCALE(t2 - fr2,3);
359
block[8*3] = DESCALE(t3 + fr3,3);
360
block[8*4] = DESCALE(t3 - fr3,3);
265
float tblock[8*8],*fblock;
270
load_matrix(even_table);
290
load_matrix(odd_table);
306
fblock[0] = t0 + fr0;
307
fblock[7] = t0 - fr0;
308
fblock[1] = t1 + fr1;
309
fblock[6] = t1 - fr1;
310
fblock[2] = t2 + fr2;
311
fblock[5] = t2 - fr2;
312
fblock[3] = t3 + fr3;
313
fblock[4] = t3 - fr3;
322
load_matrix(even_table);
340
load_matrix(odd_table);
355
block[8*0] = DESCALE(t0 + fr0,3);
356
block[8*7] = DESCALE(t0 - fr0,3);
357
block[8*1] = DESCALE(t1 + fr1,3);
358
block[8*6] = DESCALE(t1 - fr1,3);
359
block[8*2] = DESCALE(t2 + fr2,3);
360
block[8*5] = DESCALE(t2 - fr2,3);
361
block[8*3] = DESCALE(t3 + fr3,3);
362
block[8*4] = DESCALE(t3 - fr3,3);