1
/********************************************************************
3
* THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
4
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8
* THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 *
9
* by the XIPHOPHORUS Company http://www.xiph.org/ *
11
********************************************************************
13
function: *unnormalized* fft transform
14
last mod: $Id: smallft.c 2002 2008-06-10 14:09:37Z nanang $
16
********************************************************************/
18
/* FFT implementation from OggSquish, minus cosine transforms,
19
* minus all but radix 2/4 case. In Vorbis we only need this
22
* To do more than just power-of-two sized vectors, see the full
23
* version I wrote for NetLib.
25
* Note that the packing is a little strange; rather than the FFT r/i
26
* packing following R_0, I_n, R_1, I_1, R_2, I_2 ... R_n-1, I_n-1,
27
* it follows R_0, R_1, I_1, R_2, I_2 ... R_n-1, I_n-1, I_n like the
38
#include "os_support.h"
40
static void drfti1(int n, float *wa, int *ifac){
41
static int ntryh[4] = { 4,2,3,5 };
42
static float tpi = 6.28318530717958648f;
43
float arg,argh,argld,fi;
46
int ld, ii, ip, is, nq, nr;
86
for (k1=0;k1<nfm1;k1++){
98
for (ii=2;ii<ido;ii+=2){
110
static void fdrffti(int n, float *wsave, int *ifac){
113
drfti1(n, wsave+n, ifac);
116
static void dradf2(int ido,int l1,float *cc,float *ch,float *wa1){
119
int t0,t1,t2,t3,t4,t5,t6;
125
ch[t1<<1]=cc[t1]+cc[t2];
126
ch[(t1<<1)+t3-1]=cc[t1]-cc[t2];
146
tr2=wa1[i-2]*cc[t3-1]+wa1[i-1]*cc[t3];
147
ti2=wa1[i-2]*cc[t3]-wa1[i-1]*cc[t3-1];
150
ch[t6-1]=cc[t5-1]+tr2;
151
ch[t4-1]=cc[t5-1]-tr2;
171
static void dradf4(int ido,int l1,float *cc,float *ch,float *wa1,
172
float *wa2,float *wa3){
173
static float hsqt2 = .70710678118654752f;
174
int i,k,t0,t1,t2,t3,t4,t5,t6;
175
float ci2,ci3,ci4,cr2,cr3,cr4,ti1,ti2,ti3,ti4,tr1,tr2,tr3,tr4;
187
ch[t5=t3<<2]=tr1+tr2;
188
ch[(ido<<2)+t5-1]=tr2-tr1;
189
ch[(t5+=(ido<<1))-1]=cc[t3]-cc[t4];
190
ch[t5]=cc[t2]-cc[t1];
213
cr2=wa1[i-2]*cc[t3-1]+wa1[i-1]*cc[t3];
214
ci2=wa1[i-2]*cc[t3]-wa1[i-1]*cc[t3-1];
216
cr3=wa2[i-2]*cc[t3-1]+wa2[i-1]*cc[t3];
217
ci3=wa2[i-2]*cc[t3]-wa2[i-1]*cc[t3-1];
219
cr4=wa3[i-2]*cc[t3-1]+wa3[i-1]*cc[t3];
220
ci4=wa3[i-2]*cc[t3]-wa3[i-1]*cc[t3-1];
250
t2=(t1=t0+ido-1)+(t0<<1);
257
ti1=-hsqt2*(cc[t1]+cc[t2]);
258
tr1=hsqt2*(cc[t1]-cc[t2]);
260
ch[t4-1]=tr1+cc[t6-1];
261
ch[t4+t5-1]=cc[t6-1]-tr1;
263
ch[t4]=ti1-cc[t1+t0];
264
ch[t4+t5]=ti1+cc[t1+t0];
273
static void dradfg(int ido,int ip,int l1,int idl1,float *cc,float *c1,
274
float *c2,float *ch,float *ch2,float *wa){
276
static float tpi=6.283185307179586f;
277
int idij,ipph,i,j,k,l,ic,ik,is;
278
int t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
279
float dc2,ai1,ai2,ar1,ar2,ds2;
281
float dcp,arg,dsp,ar1h,ar2h;
295
for(ik=0;ik<idl1;ik++)ch2[ik]=c2[ik];
321
ch[t3-1]=wa[idij-1]*c1[t3-1]+wa[idij]*c1[t3];
322
ch[t3]=wa[idij-1]*c1[t3]-wa[idij]*c1[t3-1];
338
ch[t3-1]=wa[idij-1]*c1[t3-1]+wa[idij]*c1[t3];
339
ch[t3]=wa[idij-1]*c1[t3]-wa[idij]*c1[t3-1];
362
c1[t5-1]=ch[t5-1]+ch[t6-1];
363
c1[t6-1]=ch[t5]-ch[t6];
364
c1[t5]=ch[t5]+ch[t6];
365
c1[t6]=ch[t6-1]-ch[t5-1];
381
c1[t5-1]=ch[t5-1]+ch[t6-1];
382
c1[t6-1]=ch[t5]-ch[t6];
383
c1[t5]=ch[t5]+ch[t6];
384
c1[t6]=ch[t6-1]-ch[t5-1];
393
for(ik=0;ik<idl1;ik++)c2[ik]=ch2[ik];
405
c1[t3]=ch[t3]+ch[t4];
406
c1[t4]=ch[t4]-ch[t3];
418
ar1h=dcp*ar1-dsp*ai1;
426
for(ik=0;ik<idl1;ik++){
427
ch2[t4++]=c2[ik]+ar1*c2[t7++];
428
ch2[t5++]=ai1*c2[t6++];
442
ar2h=dc2*ar2-ds2*ai2;
450
for(ik=0;ik<idl1;ik++){
451
ch2[t6++]+=ar2*c2[t8++];
452
ch2[t7++]+=ai2*c2[t9++];
461
for(ik=0;ik<idl1;ik++)ch2[ik]+=c2[t2++];
471
for(i=0;i<ido;i++)cc[t4++]=ch[t3++];
532
cc[i+t7-1]=ch[i+t8-1]+ch[i+t9-1];
533
cc[ic+t6-1]=ch[i+t8-1]-ch[i+t9-1];
534
cc[i+t7]=ch[i+t8]+ch[i+t9];
535
cc[ic+t6]=ch[i+t9]-ch[i+t8];
562
cc[t7-1]=ch[t8-1]+ch[t9-1];
563
cc[t6-1]=ch[t8-1]-ch[t9-1];
564
cc[t7]=ch[t8]+ch[t9];
565
cc[t6]=ch[t9]-ch[t8];
575
static void drftf1(int n,float *c,float *ch,float *wa,int *ifac){
578
int ip,iw,ido,idl1,ix2,ix3;
585
for(k1=0;k1<nf;k1++){
599
dradf4(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1);
601
dradf4(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1);
608
dradf2(ido,l1,c,ch,wa+iw-1);
612
dradf2(ido,l1,ch,c,wa+iw-1);
619
dradfg(ido,ip,l1,idl1,c,c,c,ch,ch,wa+iw-1);
624
dradfg(ido,ip,l1,idl1,ch,ch,ch,c,c,wa+iw-1);
633
for(i=0;i<n;i++)c[i]=ch[i];
636
static void dradb2(int ido,int l1,float *cc,float *ch,float *wa1){
637
int i,k,t0,t1,t2,t3,t4,t5,t6;
646
ch[t1]=cc[t2]+cc[t3+t2];
647
ch[t1+t0]=cc[t2]-cc[t3+t2];
665
ch[t3-1]=cc[t4-1]+cc[t5-1];
666
tr2=cc[t4-1]-cc[t5-1];
667
ch[t3]=cc[t4]-cc[t5];
669
ch[t6-1]=wa1[i-2]*tr2-wa1[i-1]*ti2;
670
ch[t6]=wa1[i-2]*ti2+wa1[i-1]*tr2;
681
ch[t1]=cc[t2]+cc[t2];
682
ch[t1+t0]=-(cc[t2+1]+cc[t2+1]);
688
static void dradb3(int ido,int l1,float *cc,float *ch,float *wa1,
690
static float taur = -.5f;
691
static float taui = .8660254037844386f;
692
int i,k,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
693
float ci2,ci3,di2,di3,cr2,cr3,dr2,dr3,ti2,tr2;
702
tr2=cc[t3-1]+cc[t3-1];
703
cr2=cc[t5]+(taur*tr2);
705
ci3=taui*(cc[t3]+cc[t3]);
730
tr2=cc[t5-1]+cc[t6-1];
731
cr2=cc[t7-1]+(taur*tr2);
732
ch[t8-1]=cc[t7-1]+tr2;
734
ci2=cc[t7]+(taur*ti2);
736
cr3=taui*(cc[t5-1]-cc[t6-1]);
737
ci3=taui*(cc[t5]+cc[t6]);
742
ch[t9-1]=wa1[i-2]*dr2-wa1[i-1]*di2;
743
ch[t9]=wa1[i-2]*di2+wa1[i-1]*dr2;
744
ch[t10-1]=wa2[i-2]*dr3-wa2[i-1]*di3;
745
ch[t10]=wa2[i-2]*di3+wa2[i-1]*dr3;
751
static void dradb4(int ido,int l1,float *cc,float *ch,float *wa1,
752
float *wa2,float *wa3){
753
static float sqrt2=1.414213562373095f;
754
int i,k,t0,t1,t2,t3,t4,t5,t6,t7,t8;
755
float ci2,ci3,ci4,cr2,cr3,cr4,ti1,ti2,ti3,ti4,tr1,tr2,tr3,tr4;
765
tr3=cc[t4-1]+cc[t4-1];
767
tr1=cc[t3]-cc[(t4+=t6)-1];
782
t5=(t4=(t3=(t2=t1<<2)+t6))+t6;
794
tr1=cc[t2-1]-cc[t5-1];
795
tr2=cc[t2-1]+cc[t5-1];
796
ti4=cc[t3-1]-cc[t4-1];
797
tr3=cc[t3-1]+cc[t4-1];
807
ch[(t8=t7+t0)-1]=wa1[i-2]*cr2-wa1[i-1]*ci2;
808
ch[t8]=wa1[i-2]*ci2+wa1[i-1]*cr2;
809
ch[(t8+=t0)-1]=wa2[i-2]*cr3-wa2[i-1]*ci3;
810
ch[t8]=wa2[i-2]*ci3+wa2[i-1]*cr3;
811
ch[(t8+=t0)-1]=wa3[i-2]*cr4-wa3[i-1]*ci4;
812
ch[t8]=wa3[i-2]*ci4+wa3[i-1]*cr4;
817
if(ido%2 == 1)return;
829
tr1=cc[t1-1]-cc[t4-1];
830
tr2=cc[t1-1]+cc[t4-1];
832
ch[t5+=t0]=sqrt2*(tr1-ti1);
834
ch[t5+=t0]=-sqrt2*(tr1+ti1);
842
static void dradbg(int ido,int ip,int l1,int idl1,float *cc,float *c1,
843
float *c2,float *ch,float *ch2,float *wa){
844
static float tpi=6.283185307179586f;
845
int idij,ipph,i,j,k,l,ik,is,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,
847
float dc2,ai1,ai2,ar1,ar2,ds2;
849
float dcp,arg,dsp,ar1h,ar2h;
901
ch[t3]=cc[t6-1]+cc[t6-1];
902
ch[t4]=cc[t6]+cc[t6];
910
if (ido == 1)goto L116;
934
ch[t5-1]=cc[t9-1]+cc[t11-1];
935
ch[t6-1]=cc[t9-1]-cc[t11-1];
936
ch[t5]=cc[t9]-cc[t11];
937
ch[t6]=cc[t9]+cc[t11];
968
ch[t5-1]=cc[t11-1]+cc[t12-1];
969
ch[t6-1]=cc[t11-1]-cc[t12-1];
970
ch[t5]=cc[t11]-cc[t12];
971
ch[t6]=cc[t11]+cc[t12];
990
ar1h=dcp*ar1-dsp*ai1;
998
for(ik=0;ik<idl1;ik++){
999
c2[t4++]=ch2[t6++]+ar1*ch2[t7++];
1000
c2[t5++]=ai1*ch2[t8++];
1009
for(j=2;j<ipph;j++){
1012
ar2h=dc2*ar2-ds2*ai2;
1013
ai2=dc2*ai2+ds2*ar2;
1019
for(ik=0;ik<idl1;ik++){
1020
c2[t4++]+=ar2*ch2[t11++];
1021
c2[t5++]+=ai2*ch2[t12++];
1027
for(j=1;j<ipph;j++){
1030
for(ik=0;ik<idl1;ik++)ch2[ik]+=ch2[t2++];
1035
for(j=1;j<ipph;j++){
1041
ch[t3]=c1[t3]-c1[t4];
1042
ch[t4]=c1[t3]+c1[t4];
1048
if(ido==1)goto L132;
1049
if(nbd<l1)goto L128;
1053
for(j=1;j<ipph;j++){
1061
for(i=2;i<ido;i+=2){
1064
ch[t5-1]=c1[t5-1]-c1[t6];
1065
ch[t6-1]=c1[t5-1]+c1[t6];
1066
ch[t5]=c1[t5]+c1[t6-1];
1067
ch[t6]=c1[t5]-c1[t6-1];
1078
for(j=1;j<ipph;j++){
1083
for(i=2;i<ido;i+=2){
1089
ch[t5-1]=c1[t5-1]-c1[t6];
1090
ch[t6-1]=c1[t5-1]+c1[t6];
1091
ch[t5]=c1[t5]+c1[t6-1];
1092
ch[t6]=c1[t5]-c1[t6-1];
1102
for(ik=0;ik<idl1;ik++)c2[ik]=ch2[ik];
1113
if(nbd>l1)goto L139;
1122
for(i=2;i<ido;i+=2){
1127
c1[t3-1]=wa[idij-1]*ch[t3-1]-wa[idij]*ch[t3];
1128
c1[t3]=wa[idij-1]*ch[t3]+wa[idij]*ch[t3-1];
1145
for(i=2;i<ido;i+=2){
1148
c1[t3-1]=wa[idij-1]*ch[t3-1]-wa[idij]*ch[t3];
1149
c1[t3]=wa[idij-1]*ch[t3]+wa[idij]*ch[t3-1];
1156
static void drftb1(int n, float *c, float *ch, float *wa, int *ifac){
1159
int nf,ip,iw,ix2,ix3,ido,idl1;
1166
for(k1=0;k1<nf;k1++){
1176
dradb4(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1);
1178
dradb4(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1);
1186
dradb2(ido,l1,ch,c,wa+iw-1);
1188
dradb2(ido,l1,c,ch,wa+iw-1);
1197
dradb3(ido,l1,ch,c,wa+iw-1,wa+ix2-1);
1199
dradb3(ido,l1,c,ch,wa+iw-1,wa+ix2-1);
1204
/* The radix five case can be translated later..... */
1205
/* if(ip!=5)goto L112;
1211
dradb5(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1,wa+ix4-1);
1213
dradb5(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1,wa+ix4-1);
1219
dradbg(ido,ip,l1,idl1,ch,ch,ch,c,c,wa+iw-1);
1221
dradbg(ido,ip,l1,idl1,c,c,c,ch,ch,wa+iw-1);
1231
for(i=0;i<n;i++)c[i]=ch[i];
1234
void spx_drft_forward(struct drft_lookup *l,float *data){
1236
drftf1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
1239
void spx_drft_backward(struct drft_lookup *l,float *data){
1241
drftb1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
1244
void spx_drft_init(struct drft_lookup *l,int n)
1247
l->trigcache=(float*)speex_alloc(3*n*sizeof(*l->trigcache));
1248
l->splitcache=(int*)speex_alloc(32*sizeof(*l->splitcache));
1249
fdrffti(n, l->trigcache, l->splitcache);
1252
void spx_drft_clear(struct drft_lookup *l)
1257
speex_free(l->trigcache);
1259
speex_free(l->splitcache);