2
dct64.c: DCT64, the plain C version
4
copyright ?-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
5
see COPYING and AUTHORS files in distribution or http://mpg123.de
6
initially written by Michael Hipp
10
* Discrete Cosine Tansform (DCT) for subband synthesis
12
* -funroll-loops (for gcc) will remove the loops for better performance
13
* using loops in the source-code enhances readabillity
16
* TODO: write an optimized version for the down-sampling modes
17
* (in these modes the bands 16-31 (2:1) or 8-31 (4:1) are zero
23
void dct64(real *out0,real *out1,real *samples)
29
register real *b1,*b2,*bs,*costab;
37
*bs++ = (*b1++ + *--b2);
39
*bs++ = REAL_MUL((*--b2 - *b1++), *--costab);
47
*bs++ = (*b1++ + *--b2);
49
*bs++ = REAL_MUL((*--b2 - *b1++), *--costab);
53
*bs++ = (*b1++ + *--b2);
55
*bs++ = REAL_MUL((*b1++ - *--b2), *--costab);
66
*bs++ = (*b1++ + *--b2);
68
*bs++ = REAL_MUL((*--b2 - *b1++), costab[i]);
71
*bs++ = (*b1++ + *--b2);
73
*bs++ = REAL_MUL((*b1++ - *--b2), costab[i]);
83
*bs++ = (*b1++ + *--b2);
84
*bs++ = (*b1++ + *--b2);
85
*bs++ = REAL_MUL((*--b2 - *b1++), costab[1]);
86
*bs++ = REAL_MUL((*--b2 - *b1++), costab[0]);
88
*bs++ = (*b1++ + *--b2);
89
*bs++ = (*b1++ + *--b2);
90
*bs++ = REAL_MUL((*b1++ - *--b2), costab[1]);
91
*bs++ = REAL_MUL((*b1++ - *--b2), costab[0]);
100
v0=*b1++; v1 = *b1++;
102
*bs++ = REAL_MUL((v0 - v1), (*costab));
103
v0=*b1++; v1 = *b1++;
105
*bs++ = REAL_MUL((v1 - v0), (*costab));
115
for(b1=bufs,i=8;i;i--,b1+=4)
118
for(b1=bufs,i=4;i;i--,b1+=8)
125
for(b1=bufs,i=2;i;i--,b1+=16)
138
out0[0x10*16] = bufs[0];
139
out0[0x10*15] = bufs[16+0] + bufs[16+8];
140
out0[0x10*14] = bufs[8];
141
out0[0x10*13] = bufs[16+8] + bufs[16+4];
142
out0[0x10*12] = bufs[4];
143
out0[0x10*11] = bufs[16+4] + bufs[16+12];
144
out0[0x10*10] = bufs[12];
145
out0[0x10* 9] = bufs[16+12] + bufs[16+2];
146
out0[0x10* 8] = bufs[2];
147
out0[0x10* 7] = bufs[16+2] + bufs[16+10];
148
out0[0x10* 6] = bufs[10];
149
out0[0x10* 5] = bufs[16+10] + bufs[16+6];
150
out0[0x10* 4] = bufs[6];
151
out0[0x10* 3] = bufs[16+6] + bufs[16+14];
152
out0[0x10* 2] = bufs[14];
153
out0[0x10* 1] = bufs[16+14] + bufs[16+1];
154
out0[0x10* 0] = bufs[1];
156
out1[0x10* 0] = bufs[1];
157
out1[0x10* 1] = bufs[16+1] + bufs[16+9];
158
out1[0x10* 2] = bufs[9];
159
out1[0x10* 3] = bufs[16+9] + bufs[16+5];
160
out1[0x10* 4] = bufs[5];
161
out1[0x10* 5] = bufs[16+5] + bufs[16+13];
162
out1[0x10* 6] = bufs[13];
163
out1[0x10* 7] = bufs[16+13] + bufs[16+3];
164
out1[0x10* 8] = bufs[3];
165
out1[0x10* 9] = bufs[16+3] + bufs[16+11];
166
out1[0x10*10] = bufs[11];
167
out1[0x10*11] = bufs[16+11] + bufs[16+7];
168
out1[0x10*12] = bufs[7];
169
out1[0x10*13] = bufs[16+7] + bufs[16+15];
170
out1[0x10*14] = bufs[15];
171
out1[0x10*15] = bufs[16+15];