2
Copyright (c) 2003-2004, Mark Borgerding
6
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
8
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
9
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
10
* Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
12
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
19
#include "os_support.h"
20
#include "kiss_fftr.h"
21
#include "_kiss_fft_guts.h"
23
struct kiss_fftr_state{
24
kiss_fft_cfg substate;
25
kiss_fft_cpx * tmpbuf;
26
kiss_fft_cpx * super_twiddles;
32
kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)
35
kiss_fftr_cfg st = NULL;
36
size_t subsize, memneeded;
39
speex_warning("Real FFT optimization must be even.\n");
44
kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize);
45
memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 2);
48
st = (kiss_fftr_cfg) KISS_FFT_MALLOC (memneeded);
50
if (*lenmem >= memneeded)
51
st = (kiss_fftr_cfg) mem;
57
st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */
58
st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize);
59
st->super_twiddles = st->tmpbuf + nfft;
60
kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
63
for (i=0;i<nfft;++i) {
64
spx_word32_t phase = i+(nfft>>1);
67
kf_cexp2(st->super_twiddles+i, DIV32(SHL32(phase,16),nfft));
70
for (i=0;i<nfft;++i) {
71
const double pi=3.14159265358979323846264338327;
72
double phase = pi*(((double)i) /nfft + .5);
75
kf_cexp(st->super_twiddles+i, phase );
81
void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata)
83
/* input buffer timedata is stored row-wise */
85
kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc;
87
if ( st->substate->inverse) {
88
speex_fatal("kiss fft usage error: improper alloc\n");
91
ncfft = st->substate->nfft;
93
/*perform the parallel fft of two real signals packed in real,imag*/
94
kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf );
95
/* The real part of the DC element of the frequency spectrum in st->tmpbuf
96
* contains the sum of the even-numbered elements of the input time sequence
97
* The imag part is the sum of the odd-numbered elements
99
* The sum of tdc.r and tdc.i is the sum of the input time sequence.
100
* yielding DC of input time sequence
101
* The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
102
* yielding Nyquist bin of input time sequence
105
tdc.r = st->tmpbuf[0].r;
106
tdc.i = st->tmpbuf[0].i;
108
CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
109
CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
110
freqdata[0].r = tdc.r + tdc.i;
111
freqdata[ncfft].r = tdc.r - tdc.i;
113
freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
115
freqdata[ncfft].i = freqdata[0].i = 0;
118
for ( k=1;k <= ncfft/2 ; ++k ) {
120
fpnk.r = st->tmpbuf[ncfft-k].r;
121
fpnk.i = - st->tmpbuf[ncfft-k].i;
125
C_ADD( f1k, fpk , fpnk );
126
C_SUB( f2k, fpk , fpnk );
127
C_MUL( tw , f2k , st->super_twiddles[k]);
129
freqdata[k].r = HALF_OF(f1k.r + tw.r);
130
freqdata[k].i = HALF_OF(f1k.i + tw.i);
131
freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r);
132
freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i);
136
void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata, kiss_fft_scalar *timedata)
138
/* input buffer timedata is stored row-wise */
141
if (st->substate->inverse == 0) {
142
speex_fatal("kiss fft usage error: improper alloc\n");
145
ncfft = st->substate->nfft;
147
st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
148
st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
149
/*C_FIXDIV(st->tmpbuf[0],2);*/
151
for (k = 1; k <= ncfft / 2; ++k) {
152
kiss_fft_cpx fk, fnkc, fek, fok, tmp;
154
fnkc.r = freqdata[ncfft - k].r;
155
fnkc.i = -freqdata[ncfft - k].i;
156
/*C_FIXDIV( fk , 2 );
157
C_FIXDIV( fnkc , 2 );*/
159
C_ADD (fek, fk, fnkc);
160
C_SUB (tmp, fk, fnkc);
161
C_MUL (fok, tmp, st->super_twiddles[k]);
162
C_ADD (st->tmpbuf[k], fek, fok);
163
C_SUB (st->tmpbuf[ncfft - k], fek, fok);
165
st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
167
st->tmpbuf[ncfft - k].i *= -1;
170
kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
173
void kiss_fftr2(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_scalar *freqdata)
175
/* input buffer timedata is stored row-wise */
177
kiss_fft_cpx f2k,tdc;
178
spx_word32_t f1kr, f1ki, twr, twi;
180
if ( st->substate->inverse) {
181
speex_fatal("kiss fft usage error: improper alloc\n");
184
ncfft = st->substate->nfft;
186
/*perform the parallel fft of two real signals packed in real,imag*/
187
kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf );
188
/* The real part of the DC element of the frequency spectrum in st->tmpbuf
189
* contains the sum of the even-numbered elements of the input time sequence
190
* The imag part is the sum of the odd-numbered elements
192
* The sum of tdc.r and tdc.i is the sum of the input time sequence.
193
* yielding DC of input time sequence
194
* The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
195
* yielding Nyquist bin of input time sequence
198
tdc.r = st->tmpbuf[0].r;
199
tdc.i = st->tmpbuf[0].i;
201
CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
202
CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
203
freqdata[0] = tdc.r + tdc.i;
204
freqdata[2*ncfft-1] = tdc.r - tdc.i;
206
for ( k=1;k <= ncfft/2 ; ++k )
208
/*fpk = st->tmpbuf[k];
209
fpnk.r = st->tmpbuf[ncfft-k].r;
210
fpnk.i = - st->tmpbuf[ncfft-k].i;
214
C_ADD( f1k, fpk , fpnk );
215
C_SUB( f2k, fpk , fpnk );
217
C_MUL( tw , f2k , st->super_twiddles[k]);
219
freqdata[2*k-1] = HALF_OF(f1k.r + tw.r);
220
freqdata[2*k] = HALF_OF(f1k.i + tw.i);
221
freqdata[2*(ncfft-k)-1] = HALF_OF(f1k.r - tw.r);
222
freqdata[2*(ncfft-k)] = HALF_OF(tw.i - f1k.i);
225
/*f1k.r = PSHR32(ADD32(EXTEND32(st->tmpbuf[k].r), EXTEND32(st->tmpbuf[ncfft-k].r)),1);
226
f1k.i = PSHR32(SUB32(EXTEND32(st->tmpbuf[k].i), EXTEND32(st->tmpbuf[ncfft-k].i)),1);
227
f2k.r = PSHR32(SUB32(EXTEND32(st->tmpbuf[k].r), EXTEND32(st->tmpbuf[ncfft-k].r)),1);
228
f2k.i = SHR32(ADD32(EXTEND32(st->tmpbuf[k].i), EXTEND32(st->tmpbuf[ncfft-k].i)),1);
230
C_MUL( tw , f2k , st->super_twiddles[k]);
232
freqdata[2*k-1] = HALF_OF(f1k.r + tw.r);
233
freqdata[2*k] = HALF_OF(f1k.i + tw.i);
234
freqdata[2*(ncfft-k)-1] = HALF_OF(f1k.r - tw.r);
235
freqdata[2*(ncfft-k)] = HALF_OF(tw.i - f1k.i);
237
f2k.r = SHR32(SUB32(EXTEND32(st->tmpbuf[k].r), EXTEND32(st->tmpbuf[ncfft-k].r)),1);
238
f2k.i = PSHR32(ADD32(EXTEND32(st->tmpbuf[k].i), EXTEND32(st->tmpbuf[ncfft-k].i)),1);
240
f1kr = SHL32(ADD32(EXTEND32(st->tmpbuf[k].r), EXTEND32(st->tmpbuf[ncfft-k].r)),13);
241
f1ki = SHL32(SUB32(EXTEND32(st->tmpbuf[k].i), EXTEND32(st->tmpbuf[ncfft-k].i)),13);
243
twr = SHR32(SUB32(MULT16_16(f2k.r,st->super_twiddles[k].r),MULT16_16(f2k.i,st->super_twiddles[k].i)), 1);
244
twi = SHR32(ADD32(MULT16_16(f2k.i,st->super_twiddles[k].r),MULT16_16(f2k.r,st->super_twiddles[k].i)), 1);
247
freqdata[2*k-1] = PSHR32(f1kr + twr, 15);
248
freqdata[2*k] = PSHR32(f1ki + twi, 15);
249
freqdata[2*(ncfft-k)-1] = PSHR32(f1kr - twr, 15);
250
freqdata[2*(ncfft-k)] = PSHR32(twi - f1ki, 15);
252
freqdata[2*k-1] = .5f*(f1kr + twr);
253
freqdata[2*k] = .5f*(f1ki + twi);
254
freqdata[2*(ncfft-k)-1] = .5f*(f1kr - twr);
255
freqdata[2*(ncfft-k)] = .5f*(twi - f1ki);
261
void kiss_fftri2(kiss_fftr_cfg st,const kiss_fft_scalar *freqdata,kiss_fft_scalar *timedata)
263
/* input buffer timedata is stored row-wise */
266
if (st->substate->inverse == 0) {
267
speex_fatal ("kiss fft usage error: improper alloc\n");
270
ncfft = st->substate->nfft;
272
st->tmpbuf[0].r = freqdata[0] + freqdata[2*ncfft-1];
273
st->tmpbuf[0].i = freqdata[0] - freqdata[2*ncfft-1];
274
/*C_FIXDIV(st->tmpbuf[0],2);*/
276
for (k = 1; k <= ncfft / 2; ++k) {
277
kiss_fft_cpx fk, fnkc, fek, fok, tmp;
278
fk.r = freqdata[2*k-1];
279
fk.i = freqdata[2*k];
280
fnkc.r = freqdata[2*(ncfft - k)-1];
281
fnkc.i = -freqdata[2*(ncfft - k)];
282
/*C_FIXDIV( fk , 2 );
283
C_FIXDIV( fnkc , 2 );*/
285
C_ADD (fek, fk, fnkc);
286
C_SUB (tmp, fk, fnkc);
287
C_MUL (fok, tmp, st->super_twiddles[k]);
288
C_ADD (st->tmpbuf[k], fek, fok);
289
C_SUB (st->tmpbuf[ncfft - k], fek, fok);
291
st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
293
st->tmpbuf[ncfft - k].i *= -1;
296
kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);