2
Copyright (c) 2003-2004, Mark Borgerding
3
Copyright (c) 2005-2007, Jean-Marc Valin
7
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
9
* Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
10
* Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
11
* Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
13
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
21
#include "_kiss_fft_guts.h"
23
#include "os_support.h"
25
/* The guts header contains all the multiplication and addition macros that are defined for
26
fixed or floating point complex numbers. It also delares the kf_ internal functions.
32
const kiss_fft_cfg st,
43
kiss_fft_cpx * Fout_beg = Fout;
46
Fout = Fout_beg + i*mm;
51
/* Almost the same as the code path below, except that we divide the input by two
52
(while keeping the best accuracy possible) */
54
tr = SHR32(SUB32(MULT16_16(Fout2->r , tw1->r),MULT16_16(Fout2->i , tw1->i)), 1);
55
ti = SHR32(ADD32(MULT16_16(Fout2->i , tw1->r),MULT16_16(Fout2->r , tw1->i)), 1);
57
Fout2->r = PSHR32(SUB32(SHL32(EXTEND32(Fout->r), 14), tr), 15);
58
Fout2->i = PSHR32(SUB32(SHL32(EXTEND32(Fout->i), 14), ti), 15);
59
Fout->r = PSHR32(ADD32(SHL32(EXTEND32(Fout->r), 14), tr), 15);
60
Fout->i = PSHR32(ADD32(SHL32(EXTEND32(Fout->i), 14), ti), 15);
67
kiss_fft_cpx * Fout_beg = Fout;
70
Fout = Fout_beg + i*mm;
75
C_MUL (t, *Fout2 , *tw1);
77
C_SUB( *Fout2 , *Fout , t );
89
const kiss_fft_cfg st,
95
kiss_fft_cpx *tw1,*tw2,*tw3;
96
kiss_fft_cpx scratch[6];
103
kiss_fft_cpx * Fout_beg = Fout;
106
Fout = Fout_beg + i*mm;
107
tw3 = tw2 = tw1 = st->twiddles;
110
C_MUL(scratch[0],Fout[m] , *tw1 );
111
C_MUL(scratch[1],Fout[m2] , *tw2 );
112
C_MUL(scratch[2],Fout[m3] , *tw3 );
114
C_SUB( scratch[5] , *Fout, scratch[1] );
115
C_ADDTO(*Fout, scratch[1]);
116
C_ADD( scratch[3] , scratch[0] , scratch[2] );
117
C_SUB( scratch[4] , scratch[0] , scratch[2] );
118
C_SUB( Fout[m2], *Fout, scratch[3] );
122
C_ADDTO( *Fout , scratch[3] );
124
Fout[m].r = scratch[5].r - scratch[4].i;
125
Fout[m].i = scratch[5].i + scratch[4].r;
126
Fout[m3].r = scratch[5].r + scratch[4].i;
127
Fout[m3].i = scratch[5].i - scratch[4].r;
133
kiss_fft_cpx * Fout_beg = Fout;
136
Fout = Fout_beg + i*mm;
137
tw3 = tw2 = tw1 = st->twiddles;
140
C_MUL4(scratch[0],Fout[m] , *tw1 );
141
C_MUL4(scratch[1],Fout[m2] , *tw2 );
142
C_MUL4(scratch[2],Fout[m3] , *tw3 );
144
Fout->r = PSHR16(Fout->r, 2);
145
Fout->i = PSHR16(Fout->i, 2);
146
C_SUB( scratch[5] , *Fout, scratch[1] );
147
C_ADDTO(*Fout, scratch[1]);
148
C_ADD( scratch[3] , scratch[0] , scratch[2] );
149
C_SUB( scratch[4] , scratch[0] , scratch[2] );
150
Fout[m2].r = PSHR16(Fout[m2].r, 2);
151
Fout[m2].i = PSHR16(Fout[m2].i, 2);
152
C_SUB( Fout[m2], *Fout, scratch[3] );
156
C_ADDTO( *Fout , scratch[3] );
158
Fout[m].r = scratch[5].r + scratch[4].i;
159
Fout[m].i = scratch[5].i - scratch[4].r;
160
Fout[m3].r = scratch[5].r - scratch[4].i;
161
Fout[m3].i = scratch[5].i + scratch[4].r;
168
static void kf_bfly3(
170
const size_t fstride,
171
const kiss_fft_cfg st,
176
const size_t m2 = 2*m;
177
kiss_fft_cpx *tw1,*tw2;
178
kiss_fft_cpx scratch[5];
180
epi3 = st->twiddles[fstride*m];
182
tw1=tw2=st->twiddles;
186
C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
189
C_MUL(scratch[1],Fout[m] , *tw1);
190
C_MUL(scratch[2],Fout[m2] , *tw2);
192
C_ADD(scratch[3],scratch[1],scratch[2]);
193
C_SUB(scratch[0],scratch[1],scratch[2]);
197
Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
198
Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
200
C_MULBYSCALAR( scratch[0] , epi3.i );
202
C_ADDTO(*Fout,scratch[3]);
204
Fout[m2].r = Fout[m].r + scratch[0].i;
205
Fout[m2].i = Fout[m].i - scratch[0].r;
207
Fout[m].r -= scratch[0].i;
208
Fout[m].i += scratch[0].r;
214
static void kf_bfly5(
216
const size_t fstride,
217
const kiss_fft_cfg st,
221
kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
223
kiss_fft_cpx scratch[13];
224
kiss_fft_cpx * twiddles = st->twiddles;
227
ya = twiddles[fstride*m];
228
yb = twiddles[fstride*2*m];
237
for ( u=0; u<m; ++u ) {
239
C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
243
C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
244
C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
245
C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
246
C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
248
C_ADD( scratch[7],scratch[1],scratch[4]);
249
C_SUB( scratch[10],scratch[1],scratch[4]);
250
C_ADD( scratch[8],scratch[2],scratch[3]);
251
C_SUB( scratch[9],scratch[2],scratch[3]);
253
Fout0->r += scratch[7].r + scratch[8].r;
254
Fout0->i += scratch[7].i + scratch[8].i;
256
scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
257
scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
259
scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
260
scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
262
C_SUB(*Fout1,scratch[5],scratch[6]);
263
C_ADD(*Fout4,scratch[5],scratch[6]);
265
scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
266
scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
267
scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
268
scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
270
C_ADD(*Fout2,scratch[11],scratch[12]);
271
C_SUB(*Fout3,scratch[11],scratch[12]);
273
++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
277
/* perform the butterfly for one stage of a mixed radix FFT */
278
static void kf_bfly_generic(
280
const size_t fstride,
281
const kiss_fft_cfg st,
287
kiss_fft_cpx * twiddles = st->twiddles;
289
kiss_fft_cpx scratchbuf[17];
290
int Norig = st->nfft;
292
/*CHECKBUF(scratchbuf,nscratchbuf,p);*/
294
speex_fatal("KissFFT: max radix supported is 17");
296
for ( u=0; u<m; ++u ) {
298
for ( q1=0 ; q1<p ; ++q1 ) {
299
scratchbuf[q1] = Fout[ k ];
301
C_FIXDIV(scratchbuf[q1],p);
307
for ( q1=0 ; q1<p ; ++q1 ) {
309
Fout[ k ] = scratchbuf[0];
311
twidx += fstride * k;
312
if (twidx>=Norig) twidx-=Norig;
313
C_MUL(t,scratchbuf[q] , twiddles[twidx] );
314
C_ADDTO( Fout[ k ] ,t);
324
const kiss_fft_cpx * f,
325
const size_t fstride,
328
const kiss_fft_cfg st
331
const int p=*factors++; /* the radix */
332
const int m=*factors++; /* stage's fft length/p */
334
/*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
341
f += fstride*in_stride;
347
kf_shuffle( Fout , f, fstride*p, in_stride, factors,st);
348
f += fstride*in_stride;
357
const kiss_fft_cpx * f,
358
const size_t fstride,
361
const kiss_fft_cfg st,
368
kiss_fft_cpx * Fout_beg=Fout;
369
const int p=*factors++; /* the radix */
370
const int m=*factors++; /* stage's fft length/p */
372
/*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
379
f += fstride*in_stride;
385
kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m);
386
f += fstride*in_stride;
394
case 2: kf_bfly2(Fout,fstride,st,m); break;
395
case 3: kf_bfly3(Fout,fstride,st,m); break;
396
case 4: kf_bfly4(Fout,fstride,st,m); break;
397
case 5: kf_bfly5(Fout,fstride,st,m); break;
398
default: kf_bfly_generic(Fout,fstride,st,m,p); break;
401
/*printf ("fft %d %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N, m2);*/
407
Fout = Fout_beg+i*m2;
408
const kiss_fft_cpx * f2 = f+i*s2;
412
f2 += fstride*in_stride;
416
kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m);
423
case 2: kf_bfly2(Fout,fstride,st,m, N, m2); break;
424
case 3: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly3(Fout,fstride,st,m);} break;
425
case 4: kf_bfly4(Fout,fstride,st,m, N, m2); break;
426
case 5: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly5(Fout,fstride,st,m);} break;
427
default: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly_generic(Fout,fstride,st,m,p);} break;
432
/* facbuf is populated by p1,m1,p2,m2, ...
437
void kf_factor(int n,int * facbuf)
441
/*factor out powers of 4, powers of 2, then any remaining primes */
445
case 4: p = 2; break;
446
case 2: p = 3; break;
447
default: p += 2; break;
449
if (p>32000 || (spx_int32_t)p*(spx_int32_t)p > n)
450
p = n; /* no more factors, skip to end */
459
* User-callable function to allocate all necessary storage space for the fft.
461
* The return value is a contiguous block of memory, allocated with malloc. As such,
462
* It can be freed with free(), rather than a kiss_fft-specific function.
464
kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
466
kiss_fft_cfg st=NULL;
467
size_t memneeded = sizeof(struct kiss_fft_state)
468
+ sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/
470
if ( lenmem==NULL ) {
471
st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
473
if (mem != NULL && *lenmem >= memneeded)
474
st = (kiss_fft_cfg)mem;
480
st->inverse = inverse_fft;
482
for (i=0;i<nfft;++i) {
483
spx_word32_t phase = i;
486
kf_cexp2(st->twiddles+i, DIV32(SHL32(phase,17),nfft));
489
for (i=0;i<nfft;++i) {
490
const double pi=3.14159265358979323846264338327;
491
double phase = ( -2*pi /nfft ) * i;
494
kf_cexp(st->twiddles+i, phase );
497
kf_factor(nfft,st->factors);
505
void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
509
speex_fatal("In-place FFT not supported");
510
/*CHECKBUF(tmpbuf,ntmpbuf,st->nfft);
511
kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
512
SPEEX_MOVE(fout,tmpbuf,st->nfft);*/
514
kf_shuffle( fout, fin, 1,in_stride, st->factors,st);
515
kf_work( fout, fin, 1,in_stride, st->factors,st, 1, in_stride, 1);
519
void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
521
kiss_fft_stride(cfg,fin,fout,1);