3
Copyright (C) 2001 Martin Vogt
5
This program is free software; you can redistribute it and/or modify
6
it under the terms of the GNU Library General Public License as published by
7
the Free Software Foundation.
9
For more information look at the file COPYRIGHT in this package
16
ATTR_ALIGN(64) static REAL hcos_64[16];
17
ATTR_ALIGN(64) static REAL hcos_32[8];
18
ATTR_ALIGN(64) static REAL hcos_16[4];
19
ATTR_ALIGN(64) static REAL hcos_8[2];
20
ATTR_ALIGN(64) static REAL hcos_4;
23
This was some time ago a standalone dct class,
24
but to get more speed I made it an inline dct
25
int the filter classes
28
static int dct64Init=false;
30
void initialize_dct64() {
31
if (dct64Init==true) {
39
hcos_64[i]=1.0/(2.0*cos(MY_PI*double(i*2+1)/64.0));
42
hcos_32[i]=1.0/(2.0*cos(MY_PI*double(i*2+1)/32.0));
45
hcos_16[i]=1.0/(2.0*cos(MY_PI*double(i*2+1)/16.0));
48
hcos_8[i]=1.0/(2.0*cos(MY_PI*double(i*2+1)/ 8.0));
50
hcos_4=1.0/(2.0*cos(MY_PI*1.0/4.0));
58
// splay dct64 , faster than mpeg123 dct. (from decode.c)
60
inline void dct64(REAL* out1,REAL* out2,REAL *fraction) {
61
ATTR_ALIGN(64) REAL p0,p1,p2,p3,p4,p5,p6,p7,p8,p9,pa,pb,pc,pd,pe,pf;
62
ATTR_ALIGN(64) REAL q0,q1,q2,q3,q4,q5,q6,q7,q8,q9,qa,qb,qc,qd,qe,qf;
64
#define OUT1(v,t) out1[(32-(v))*16] =(-(out1[(v)*16]=t))
65
#define OUT2(v) out2[(96-(v)-32)*16]=out2[((v)-32)*16]
67
// compute new values via a fast cosine transform:
69
// put to buffer 0..15
70
register REAL* x=fraction;
72
p0=x[ 0]+x[31];p1=x[ 1]+x[30];p2=x[ 2]+x[29];p3=x[ 3]+x[28];
73
p4=x[ 4]+x[27];p5=x[ 5]+x[26];p6=x[ 6]+x[25];p7=x[ 7]+x[24];
74
p8=x[ 8]+x[23];p9=x[ 9]+x[22];pa=x[10]+x[21];pb=x[11]+x[20];
75
pc=x[12]+x[19];pd=x[13]+x[18];pe=x[14]+x[17];pf=x[15]+x[16];
78
// put to buffer 32..39
79
q0=p0+pf;q1=p1+pe;q2=p2+pd;q3=p3+pc;
80
q4=p4+pb;q5=p5+pa;q6=p6+p9;q7=p7+p8;
81
// put to buffer 40..47
82
q8=hcos_32[0]*(p0-pf);q9=hcos_32[1]*(p1-pe);
83
qa=hcos_32[2]*(p2-pd);qb=hcos_32[3]*(p3-pc);
84
qc=hcos_32[4]*(p4-pb);qd=hcos_32[5]*(p5-pa);
85
qe=hcos_32[6]*(p6-p9);qf=hcos_32[7]*(p7-p8);
87
p0=q0+q7;p1=q1+q6;p2=q2+q5;p3=q3+q4;
88
p4=hcos_16[0]*(q0-q7);p5=hcos_16[1]*(q1-q6);
89
p6=hcos_16[2]*(q2-q5);p7=hcos_16[3]*(q3-q4);
90
p8=q8+qf;p9=q9+qe;pa=qa+qd;pb=qb+qc;
91
pc=hcos_16[0]*(q8-qf);pd=hcos_16[1]*(q9-qe);
92
pe=hcos_16[2]*(qa-qd);pf=hcos_16[3]*(qb-qc);
94
q0=p0+p3;q1=p1+p2;q2=hcos_8[0]*(p0-p3);q3=hcos_8[1]*(p1-p2);
95
q4=p4+p7;q5=p5+p6;q6=hcos_8[0]*(p4-p7);q7=hcos_8[1]*(p5-p6);
96
q8=p8+pb;q9=p9+pa;qa=hcos_8[0]*(p8-pb);qb=hcos_8[1]*(p9-pa);
97
qc=pc+pf;qd=pd+pe;qe=hcos_8[0]*(pc-pf);qf=hcos_8[1]*(pd-pe);
99
p0=q0+q1;p1=hcos_4*(q0-q1);p2=q2+q3;p3=hcos_4*(q2-q3);
100
p4=q4+q5;p5=hcos_4*(q4-q5);p6=q6+q7;p7=hcos_4*(q6-q7);
101
p8=q8+q9;p9=hcos_4*(q8-q9);pa=qa+qb;pb=hcos_4*(qa-qb);
102
pc=qc+qd;pd=hcos_4*(qc-qd);pe=qe+qf;pf=hcos_4*(qe-qf);
114
OUT2(46)=-(p8+pc+tmp);
115
OUT2(34)=-(p9+pd+tmp);
122
out2[0]=-(out1[0]=p1);
130
// put to buffer 16..31
131
register REAL *x=fraction;
133
p0=hcos_64[ 0]*(x[ 0]-x[31]);p1=hcos_64[ 1]*(x[ 1]-x[30]);
134
p2=hcos_64[ 2]*(x[ 2]-x[29]);p3=hcos_64[ 3]*(x[ 3]-x[28]);
135
p4=hcos_64[ 4]*(x[ 4]-x[27]);p5=hcos_64[ 5]*(x[ 5]-x[26]);
136
p6=hcos_64[ 6]*(x[ 6]-x[25]);p7=hcos_64[ 7]*(x[ 7]-x[24]);
137
p8=hcos_64[ 8]*(x[ 8]-x[23]);p9=hcos_64[ 9]*(x[ 9]-x[22]);
138
pa=hcos_64[10]*(x[10]-x[21]);pb=hcos_64[11]*(x[11]-x[20]);
139
pc=hcos_64[12]*(x[12]-x[19]);pd=hcos_64[13]*(x[13]-x[18]);
140
pe=hcos_64[14]*(x[14]-x[17]);pf=hcos_64[15]*(x[15]-x[16]);
144
q0=p0+pf;q1=p1+pe;q2=p2+pd;q3=p3+pc;
145
q4=p4+pb;q5=p5+pa;q6=p6+p9;q7=p7+p8;
146
q8=hcos_32[0]*(p0-pf);q9=hcos_32[1]*(p1-pe);
147
qa=hcos_32[2]*(p2-pd);qb=hcos_32[3]*(p3-pc);
148
qc=hcos_32[4]*(p4-pb);qd=hcos_32[5]*(p5-pa);
149
qe=hcos_32[6]*(p6-p9);qf=hcos_32[7]*(p7-p8);
151
p0=q0+q7;p1=q1+q6;p2=q2+q5;p3=q3+q4;
152
p4=hcos_16[0]*(q0-q7);p5=hcos_16[1]*(q1-q6);
153
p6=hcos_16[2]*(q2-q5);p7=hcos_16[3]*(q3-q4);
154
p8=q8+qf;p9=q9+qe;pa=qa+qd;pb=qb+qc;
155
pc=hcos_16[0]*(q8-qf);pd=hcos_16[1]*(q9-qe);
156
pe=hcos_16[2]*(qa-qd);pf=hcos_16[3]*(qb-qc);
158
q0=p0+p3;q1=p1+p2;q2=hcos_8[0]*(p0-p3);q3=hcos_8[1]*(p1-p2);
159
q4=p4+p7;q5=p5+p6;q6=hcos_8[0]*(p4-p7);q7=hcos_8[1]*(p5-p6);
160
q8=p8+pb;q9=p9+pa;qa=hcos_8[0]*(p8-pb);qb=hcos_8[1]*(p9-pa);
161
qc=pc+pf;qd=pd+pe;qe=hcos_8[0]*(pc-pf);qf=hcos_8[1]*(pd-pe);
163
p0=q0+q1;p1=hcos_4*(q0-q1);
164
p2=q2+q3;p3=hcos_4*(q2-q3);
165
p4=q4+q5;p5=hcos_4*(q4-q5);
166
p6=q6+q7;p7=hcos_4*(q6-q7);
167
p8=q8+q9;p9=hcos_4*(q8-q9);
168
pa=qa+qb;pb=hcos_4*(qa-qb);
169
pc=qc+qd;pd=hcos_4*(qc-qd);
170
pe=qe+qf;pf=hcos_4*(qe-qf);
176
OUT1(5,p5+p7+pb+tmp);
179
OUT2(33)=-(p1+pe+tmp);
182
OUT2(35)=-(p6+pe+tmp);
183
tmp=pa+pb+pc+pd+pe+pf;
184
OUT2(39)=-(p2+p3+tmp-pc);
185
OUT2(43)=-(p4+p6+p7+tmp-pd);
186
OUT2(37)=-(p5+p6+p7+tmp-pc);
187
OUT2(41)=-(p2+p3+tmp-pd);
190
OUT2(45)=-(p4+p6+p7+tmp);