1
/********************************************************************
3
* THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
4
* USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5
* GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6
* IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8
* THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 *
9
* by the XIPHOPHORUS Company http://www.xiph.org/ *
11
********************************************************************
13
function: *unnormalized* fft transform
14
last mod: $Id: smallft.c,v 1.19 2003/10/08 05:12:37 jm Exp $
16
********************************************************************/
18
/* FFT implementation from OggSquish, minus cosine transforms,
19
* minus all but radix 2/4 case. In Vorbis we only need this
22
* To do more than just power-of-two sized vectors, see the full
23
* version I wrote for NetLib.
25
* Note that the packing is a little strange; rather than the FFT r/i
26
* packing following R_0, I_n, R_1, I_1, R_2, I_2 ... R_n-1, I_n-1,
27
* it follows R_0, R_1, I_1, R_2, I_2 ... R_n-1, I_n-1, I_n like the
39
static void drfti1(int n, float *wa, int *ifac){
40
static int ntryh[4] = { 4,2,3,5 };
41
static float tpi = 6.28318530717958648f;
42
float arg,argh,argld,fi;
45
int ld, ii, ip, is, nq, nr;
85
for (k1=0;k1<nfm1;k1++){
97
for (ii=2;ii<ido;ii+=2){
109
static void fdrffti(int n, float *wsave, int *ifac){
112
drfti1(n, wsave+n, ifac);
115
static void dradf2(int ido,int l1,float *cc,float *ch,float *wa1){
118
int t0,t1,t2,t3,t4,t5,t6;
124
ch[t1<<1]=cc[t1]+cc[t2];
125
ch[(t1<<1)+t3-1]=cc[t1]-cc[t2];
145
tr2=wa1[i-2]*cc[t3-1]+wa1[i-1]*cc[t3];
146
ti2=wa1[i-2]*cc[t3]-wa1[i-1]*cc[t3-1];
149
ch[t6-1]=cc[t5-1]+tr2;
150
ch[t4-1]=cc[t5-1]-tr2;
170
static void dradf4(int ido,int l1,float *cc,float *ch,float *wa1,
171
float *wa2,float *wa3){
172
static float hsqt2 = .70710678118654752f;
173
int i,k,t0,t1,t2,t3,t4,t5,t6;
174
float ci2,ci3,ci4,cr2,cr3,cr4,ti1,ti2,ti3,ti4,tr1,tr2,tr3,tr4;
186
ch[t5=t3<<2]=tr1+tr2;
187
ch[(ido<<2)+t5-1]=tr2-tr1;
188
ch[(t5+=(ido<<1))-1]=cc[t3]-cc[t4];
189
ch[t5]=cc[t2]-cc[t1];
212
cr2=wa1[i-2]*cc[t3-1]+wa1[i-1]*cc[t3];
213
ci2=wa1[i-2]*cc[t3]-wa1[i-1]*cc[t3-1];
215
cr3=wa2[i-2]*cc[t3-1]+wa2[i-1]*cc[t3];
216
ci3=wa2[i-2]*cc[t3]-wa2[i-1]*cc[t3-1];
218
cr4=wa3[i-2]*cc[t3-1]+wa3[i-1]*cc[t3];
219
ci4=wa3[i-2]*cc[t3]-wa3[i-1]*cc[t3-1];
249
t2=(t1=t0+ido-1)+(t0<<1);
256
ti1=-hsqt2*(cc[t1]+cc[t2]);
257
tr1=hsqt2*(cc[t1]-cc[t2]);
259
ch[t4-1]=tr1+cc[t6-1];
260
ch[t4+t5-1]=cc[t6-1]-tr1;
262
ch[t4]=ti1-cc[t1+t0];
263
ch[t4+t5]=ti1+cc[t1+t0];
272
static void dradfg(int ido,int ip,int l1,int idl1,float *cc,float *c1,
273
float *c2,float *ch,float *ch2,float *wa){
275
static float tpi=6.283185307179586f;
276
int idij,ipph,i,j,k,l,ic,ik,is;
277
int t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
278
float dc2,ai1,ai2,ar1,ar2,ds2;
280
float dcp,arg,dsp,ar1h,ar2h;
294
for(ik=0;ik<idl1;ik++)ch2[ik]=c2[ik];
320
ch[t3-1]=wa[idij-1]*c1[t3-1]+wa[idij]*c1[t3];
321
ch[t3]=wa[idij-1]*c1[t3]-wa[idij]*c1[t3-1];
337
ch[t3-1]=wa[idij-1]*c1[t3-1]+wa[idij]*c1[t3];
338
ch[t3]=wa[idij-1]*c1[t3]-wa[idij]*c1[t3-1];
361
c1[t5-1]=ch[t5-1]+ch[t6-1];
362
c1[t6-1]=ch[t5]-ch[t6];
363
c1[t5]=ch[t5]+ch[t6];
364
c1[t6]=ch[t6-1]-ch[t5-1];
380
c1[t5-1]=ch[t5-1]+ch[t6-1];
381
c1[t6-1]=ch[t5]-ch[t6];
382
c1[t5]=ch[t5]+ch[t6];
383
c1[t6]=ch[t6-1]-ch[t5-1];
392
for(ik=0;ik<idl1;ik++)c2[ik]=ch2[ik];
404
c1[t3]=ch[t3]+ch[t4];
405
c1[t4]=ch[t4]-ch[t3];
417
ar1h=dcp*ar1-dsp*ai1;
425
for(ik=0;ik<idl1;ik++){
426
ch2[t4++]=c2[ik]+ar1*c2[t7++];
427
ch2[t5++]=ai1*c2[t6++];
441
ar2h=dc2*ar2-ds2*ai2;
449
for(ik=0;ik<idl1;ik++){
450
ch2[t6++]+=ar2*c2[t8++];
451
ch2[t7++]+=ai2*c2[t9++];
460
for(ik=0;ik<idl1;ik++)ch2[ik]+=c2[t2++];
470
for(i=0;i<ido;i++)cc[t4++]=ch[t3++];
531
cc[i+t7-1]=ch[i+t8-1]+ch[i+t9-1];
532
cc[ic+t6-1]=ch[i+t8-1]-ch[i+t9-1];
533
cc[i+t7]=ch[i+t8]+ch[i+t9];
534
cc[ic+t6]=ch[i+t9]-ch[i+t8];
561
cc[t7-1]=ch[t8-1]+ch[t9-1];
562
cc[t6-1]=ch[t8-1]-ch[t9-1];
563
cc[t7]=ch[t8]+ch[t9];
564
cc[t6]=ch[t9]-ch[t8];
574
static void drftf1(int n,float *c,float *ch,float *wa,int *ifac){
577
int ip,iw,ido,idl1,ix2,ix3;
584
for(k1=0;k1<nf;k1++){
598
dradf4(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1);
600
dradf4(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1);
607
dradf2(ido,l1,c,ch,wa+iw-1);
611
dradf2(ido,l1,ch,c,wa+iw-1);
618
dradfg(ido,ip,l1,idl1,c,c,c,ch,ch,wa+iw-1);
623
dradfg(ido,ip,l1,idl1,ch,ch,ch,c,c,wa+iw-1);
632
for(i=0;i<n;i++)c[i]=ch[i];
635
static void dradb2(int ido,int l1,float *cc,float *ch,float *wa1){
636
int i,k,t0,t1,t2,t3,t4,t5,t6;
645
ch[t1]=cc[t2]+cc[t3+t2];
646
ch[t1+t0]=cc[t2]-cc[t3+t2];
664
ch[t3-1]=cc[t4-1]+cc[t5-1];
665
tr2=cc[t4-1]-cc[t5-1];
666
ch[t3]=cc[t4]-cc[t5];
668
ch[t6-1]=wa1[i-2]*tr2-wa1[i-1]*ti2;
669
ch[t6]=wa1[i-2]*ti2+wa1[i-1]*tr2;
680
ch[t1]=cc[t2]+cc[t2];
681
ch[t1+t0]=-(cc[t2+1]+cc[t2+1]);
687
static void dradb3(int ido,int l1,float *cc,float *ch,float *wa1,
689
static float taur = -.5f;
690
static float taui = .8660254037844386f;
691
int i,k,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
692
float ci2,ci3,di2,di3,cr2,cr3,dr2,dr3,ti2,tr2;
701
tr2=cc[t3-1]+cc[t3-1];
702
cr2=cc[t5]+(taur*tr2);
704
ci3=taui*(cc[t3]+cc[t3]);
729
tr2=cc[t5-1]+cc[t6-1];
730
cr2=cc[t7-1]+(taur*tr2);
731
ch[t8-1]=cc[t7-1]+tr2;
733
ci2=cc[t7]+(taur*ti2);
735
cr3=taui*(cc[t5-1]-cc[t6-1]);
736
ci3=taui*(cc[t5]+cc[t6]);
741
ch[t9-1]=wa1[i-2]*dr2-wa1[i-1]*di2;
742
ch[t9]=wa1[i-2]*di2+wa1[i-1]*dr2;
743
ch[t10-1]=wa2[i-2]*dr3-wa2[i-1]*di3;
744
ch[t10]=wa2[i-2]*di3+wa2[i-1]*dr3;
750
static void dradb4(int ido,int l1,float *cc,float *ch,float *wa1,
751
float *wa2,float *wa3){
752
static float sqrt2=1.414213562373095f;
753
int i,k,t0,t1,t2,t3,t4,t5,t6,t7,t8;
754
float ci2,ci3,ci4,cr2,cr3,cr4,ti1,ti2,ti3,ti4,tr1,tr2,tr3,tr4;
764
tr3=cc[t4-1]+cc[t4-1];
766
tr1=cc[t3]-cc[(t4+=t6)-1];
781
t5=(t4=(t3=(t2=t1<<2)+t6))+t6;
793
tr1=cc[t2-1]-cc[t5-1];
794
tr2=cc[t2-1]+cc[t5-1];
795
ti4=cc[t3-1]-cc[t4-1];
796
tr3=cc[t3-1]+cc[t4-1];
806
ch[(t8=t7+t0)-1]=wa1[i-2]*cr2-wa1[i-1]*ci2;
807
ch[t8]=wa1[i-2]*ci2+wa1[i-1]*cr2;
808
ch[(t8+=t0)-1]=wa2[i-2]*cr3-wa2[i-1]*ci3;
809
ch[t8]=wa2[i-2]*ci3+wa2[i-1]*cr3;
810
ch[(t8+=t0)-1]=wa3[i-2]*cr4-wa3[i-1]*ci4;
811
ch[t8]=wa3[i-2]*ci4+wa3[i-1]*cr4;
816
if(ido%2 == 1)return;
828
tr1=cc[t1-1]-cc[t4-1];
829
tr2=cc[t1-1]+cc[t4-1];
831
ch[t5+=t0]=sqrt2*(tr1-ti1);
833
ch[t5+=t0]=-sqrt2*(tr1+ti1);
841
static void dradbg(int ido,int ip,int l1,int idl1,float *cc,float *c1,
842
float *c2,float *ch,float *ch2,float *wa){
843
static float tpi=6.283185307179586f;
844
int idij,ipph,i,j,k,l,ik,is,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,
846
float dc2,ai1,ai2,ar1,ar2,ds2;
848
float dcp,arg,dsp,ar1h,ar2h;
900
ch[t3]=cc[t6-1]+cc[t6-1];
901
ch[t4]=cc[t6]+cc[t6];
909
if (ido == 1)goto L116;
933
ch[t5-1]=cc[t9-1]+cc[t11-1];
934
ch[t6-1]=cc[t9-1]-cc[t11-1];
935
ch[t5]=cc[t9]-cc[t11];
936
ch[t6]=cc[t9]+cc[t11];
967
ch[t5-1]=cc[t11-1]+cc[t12-1];
968
ch[t6-1]=cc[t11-1]-cc[t12-1];
969
ch[t5]=cc[t11]-cc[t12];
970
ch[t6]=cc[t11]+cc[t12];
989
ar1h=dcp*ar1-dsp*ai1;
997
for(ik=0;ik<idl1;ik++){
998
c2[t4++]=ch2[t6++]+ar1*ch2[t7++];
999
c2[t5++]=ai1*ch2[t8++];
1008
for(j=2;j<ipph;j++){
1011
ar2h=dc2*ar2-ds2*ai2;
1012
ai2=dc2*ai2+ds2*ar2;
1018
for(ik=0;ik<idl1;ik++){
1019
c2[t4++]+=ar2*ch2[t11++];
1020
c2[t5++]+=ai2*ch2[t12++];
1026
for(j=1;j<ipph;j++){
1029
for(ik=0;ik<idl1;ik++)ch2[ik]+=ch2[t2++];
1034
for(j=1;j<ipph;j++){
1040
ch[t3]=c1[t3]-c1[t4];
1041
ch[t4]=c1[t3]+c1[t4];
1047
if(ido==1)goto L132;
1048
if(nbd<l1)goto L128;
1052
for(j=1;j<ipph;j++){
1060
for(i=2;i<ido;i+=2){
1063
ch[t5-1]=c1[t5-1]-c1[t6];
1064
ch[t6-1]=c1[t5-1]+c1[t6];
1065
ch[t5]=c1[t5]+c1[t6-1];
1066
ch[t6]=c1[t5]-c1[t6-1];
1077
for(j=1;j<ipph;j++){
1082
for(i=2;i<ido;i+=2){
1088
ch[t5-1]=c1[t5-1]-c1[t6];
1089
ch[t6-1]=c1[t5-1]+c1[t6];
1090
ch[t5]=c1[t5]+c1[t6-1];
1091
ch[t6]=c1[t5]-c1[t6-1];
1101
for(ik=0;ik<idl1;ik++)c2[ik]=ch2[ik];
1112
if(nbd>l1)goto L139;
1121
for(i=2;i<ido;i+=2){
1126
c1[t3-1]=wa[idij-1]*ch[t3-1]-wa[idij]*ch[t3];
1127
c1[t3]=wa[idij-1]*ch[t3]+wa[idij]*ch[t3-1];
1144
for(i=2;i<ido;i+=2){
1147
c1[t3-1]=wa[idij-1]*ch[t3-1]-wa[idij]*ch[t3];
1148
c1[t3]=wa[idij-1]*ch[t3]+wa[idij]*ch[t3-1];
1155
static void drftb1(int n, float *c, float *ch, float *wa, int *ifac){
1158
int nf,ip,iw,ix2,ix3,ido,idl1;
1165
for(k1=0;k1<nf;k1++){
1175
dradb4(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1);
1177
dradb4(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1);
1185
dradb2(ido,l1,ch,c,wa+iw-1);
1187
dradb2(ido,l1,c,ch,wa+iw-1);
1196
dradb3(ido,l1,ch,c,wa+iw-1,wa+ix2-1);
1198
dradb3(ido,l1,c,ch,wa+iw-1,wa+ix2-1);
1203
/* The radix five case can be translated later..... */
1204
/* if(ip!=5)goto L112;
1210
dradb5(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1,wa+ix4-1);
1212
dradb5(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1,wa+ix4-1);
1218
dradbg(ido,ip,l1,idl1,ch,ch,ch,c,c,wa+iw-1);
1220
dradbg(ido,ip,l1,idl1,c,c,c,ch,ch,wa+iw-1);
1230
for(i=0;i<n;i++)c[i]=ch[i];
1233
void drft_forward(struct drft_lookup *l,float *data){
1235
drftf1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
1238
void drft_backward(struct drft_lookup *l,float *data){
1240
drftb1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
1243
void drft_init(struct drft_lookup *l,int n)
1246
l->trigcache=(float*)speex_alloc(3*n*sizeof(*l->trigcache));
1247
l->splitcache=(int*)speex_alloc(32*sizeof(*l->splitcache));
1248
fdrffti(n, l->trigcache, l->splitcache);
1251
void drft_clear(struct drft_lookup *l)
1256
speex_free(l->trigcache);
1258
speex_free(l->splitcache);