2
decode_i386.c: decode for i386 (really faster?)
4
copyright 1995-2006 by the mpg123 project - free software under the terms of the LGPL 2.1
5
see COPYING and AUTHORS files in distribution or http://mpg123.de
6
initially written by Michael Hipp
8
slighlty optimized for machines without autoincrement/decrement.
9
The performance is highly compiler dependend. Maybe
10
the decode.c version for 'normal' processor may be faster
11
even for Intel processors.
22
/* old WRITE_SAMPLE */
23
#define WRITE_SAMPLE(samples,sum,clip) \
24
if( (sum) > 32767.0) { *(samples) = 0x7fff; (clip)++; } \
25
else if( (sum) < -32768.0) { *(samples) = -0x8000; (clip)++; } \
26
else { *(samples) = sum; }
28
/* new WRITE_SAMPLE */
29
/* keep in mind that we are on known little-endian i386 here and special tricks are allowed... */
30
#define WRITE_SAMPLE(samples,sum,clip) { \
31
double dtemp; int v; /* sizeof(int) == 4 */ \
32
dtemp = ((((65536.0 * 65536.0 * 16)+(65536.0 * 0.5))* 65536.0)) + (sum); \
33
v = ((*(int *)&dtemp) - 0x80000000); \
34
if( v > 32767) { *(samples) = 0x7fff; (clip)++; } \
35
else if( v < -32768) { *(samples) = -0x8000; (clip)++; } \
36
else { *(samples) = v; } \
40
int synth_1to1_8bit(real *bandPtr,int channel,unsigned char *samples,int *pnt)
42
short samples_tmp[64];
43
short *tmp1 = samples_tmp + channel;
47
ret = synth_1to1(bandPtr,channel,(unsigned char *)samples_tmp,&pnt1);
48
samples += channel + *pnt;
51
*samples = conv16to8[*tmp1>>AUSHIFT];
60
int synth_1to1_8bit_mono(real *bandPtr,unsigned char *samples,int *pnt)
62
short samples_tmp[64];
63
short *tmp1 = samples_tmp;
67
ret = synth_1to1(bandPtr,0,(unsigned char *)samples_tmp,&pnt1);
71
*samples++ = conv16to8[*tmp1>>AUSHIFT];
79
int synth_1to1_8bit_mono2stereo(real *bandPtr,unsigned char *samples,int *pnt)
81
short samples_tmp[64];
82
short *tmp1 = samples_tmp;
86
ret = synth_1to1(bandPtr,0,(unsigned char *)samples_tmp,&pnt1);
90
*samples++ = conv16to8[*tmp1>>AUSHIFT];
91
*samples++ = conv16to8[*tmp1>>AUSHIFT];
99
int synth_1to1_mono(real *bandPtr,unsigned char *samples,int *pnt)
101
short samples_tmp[64];
102
short *tmp1 = samples_tmp;
106
ret = synth_1to1(bandPtr,0,(unsigned char *) samples_tmp,&pnt1);
110
*( (short *) samples) = *tmp1;
120
int synth_1to1_mono2stereo(real *bandPtr,unsigned char *samples,int *pnt)
124
ret = synth_1to1(bandPtr,0,samples,pnt);
125
samples = samples + *pnt - 128;
128
((short *)samples)[1] = ((short *)samples)[0];
135
int synth_1to1(real *bandPtr,int channel,unsigned char *out,int *pnt)
138
static real buffs[2][2][0x110];
139
static const int step = 2;
141
short *samples = (short *) (out + *pnt);
143
real *b0,(*buf)[0x110];
149
do_equalizer(bandPtr,channel);
165
dct64(buf[1]+((bo+1)&0xf),buf[0]+bo,bandPtr);
170
dct64(buf[0]+bo,buf[1]+bo+1,bandPtr);
175
real *window = decwin + 16 - bo1;
177
for (j=16;j;j--,b0+=0x10,window+=0x20,samples+=step)
180
sum = window[0x0] * b0[0x0];
181
sum -= window[0x1] * b0[0x1];
182
sum += window[0x2] * b0[0x2];
183
sum -= window[0x3] * b0[0x3];
184
sum += window[0x4] * b0[0x4];
185
sum -= window[0x5] * b0[0x5];
186
sum += window[0x6] * b0[0x6];
187
sum -= window[0x7] * b0[0x7];
188
sum += window[0x8] * b0[0x8];
189
sum -= window[0x9] * b0[0x9];
190
sum += window[0xA] * b0[0xA];
191
sum -= window[0xB] * b0[0xB];
192
sum += window[0xC] * b0[0xC];
193
sum -= window[0xD] * b0[0xD];
194
sum += window[0xE] * b0[0xE];
195
sum -= window[0xF] * b0[0xF];
197
WRITE_SAMPLE(samples,sum,clip);
202
sum = window[0x0] * b0[0x0];
203
sum += window[0x2] * b0[0x2];
204
sum += window[0x4] * b0[0x4];
205
sum += window[0x6] * b0[0x6];
206
sum += window[0x8] * b0[0x8];
207
sum += window[0xA] * b0[0xA];
208
sum += window[0xC] * b0[0xC];
209
sum += window[0xE] * b0[0xE];
210
WRITE_SAMPLE(samples,sum,clip);
211
b0-=0x10,window-=0x20,samples+=step;
215
for (j=15;j;j--,b0-=0x10,window-=0x20,samples+=step)
218
sum = -window[-0x1] * b0[0x0];
219
sum -= window[-0x2] * b0[0x1];
220
sum -= window[-0x3] * b0[0x2];
221
sum -= window[-0x4] * b0[0x3];
222
sum -= window[-0x5] * b0[0x4];
223
sum -= window[-0x6] * b0[0x5];
224
sum -= window[-0x7] * b0[0x6];
225
sum -= window[-0x8] * b0[0x7];
226
sum -= window[-0x9] * b0[0x8];
227
sum -= window[-0xA] * b0[0x9];
228
sum -= window[-0xB] * b0[0xA];
229
sum -= window[-0xC] * b0[0xB];
230
sum -= window[-0xD] * b0[0xC];
231
sum -= window[-0xE] * b0[0xD];
232
sum -= window[-0xF] * b0[0xE];
233
sum -= window[-0x0] * b0[0xF];
235
WRITE_SAMPLE(samples,sum,clip);
241
#elif defined(USE_MMX)
243
static short buffs[2][2][0x110];
245
short *samples = (short *) (out + *pnt);
246
synth_1to1_MMX(bandPtr, channel, samples, (short *) buffs, &bo);
253
ret = synth_1to1_pent(bandPtr,channel,out+*pnt);