2
Generic functions for computing 1D convolutions of periodic sequences.
4
Supported FFT libraries:
5
DJBFFT - optional, used for power-of-two length arrays
7
FFTPACK - used if any of the above libraries is not available
9
Author: Pearu Peterson, September 2002
14
/**************** DJBFFT *****************************/
16
GEN_CACHE(ddjbfft,(int n)
18
,(caches_ddjbfft[i].n==n)
19
,caches_ddjbfft[id].ptr = (double*)malloc(sizeof(double)*n);
20
,free(caches_ddjbfft[id].ptr);
24
/**************** FFTW *****************************/
26
GEN_CACHE(drfftw,(int n)
29
,(caches_drfftw[i].n==n)
30
,caches_drfftw[id].plan1 = rfftw_create_plan(n,
32
FFTW_IN_PLACE|FFTW_ESTIMATE);
33
caches_drfftw[id].plan2 = rfftw_create_plan(n,
35
FFTW_IN_PLACE|FFTW_ESTIMATE);
36
,rfftw_destroy_plan(caches_drfftw[id].plan1);
37
rfftw_destroy_plan(caches_drfftw[id].plan2);
40
/**************** FFTPACK ZFFT **********************/
41
extern void F_FUNC(dfftf,DFFTF)(int*,double*,double*);
42
extern void F_FUNC(dfftb,DFFTB)(int*,double*,double*);
43
extern void F_FUNC(dffti,DFFTI)(int*,double*);
44
GEN_CACHE(dfftpack,(int n)
46
,(caches_dfftpack[i].n==n)
47
,caches_dfftpack[id].wsave = (double*)malloc(sizeof(double)*(2*n+15));
48
F_FUNC(dffti,DFFTI)(&n,caches_dfftpack[id].wsave);
49
,free(caches_dfftpack[id].wsave);
52
extern void destroy_convolve_cache(void) {
54
destroy_ddjbfft_caches();
57
destroy_drfftw_caches();
59
destroy_dfftpack_caches();
63
/**************** convolve **********************/
65
void convolve(int n,double* inout,double* omega,int swap_real_imag) {
71
rfftw_plan plan1 = NULL;
72
rfftw_plan plan2 = NULL;
78
case 2:;case 4:;case 8:;case 16:;case 32:;case 64:;case 128:;case 256:;
79
case 512:;case 1024:;case 2048:;case 4096:;case 8192:
80
i = get_cache_id_ddjbfft(n);
81
ptr = caches_ddjbfft[i].ptr;
82
COPYSTD2DJB(inout,ptr,n);
84
#define TMPCASE(N) case N: fftr8_##N(ptr); break
85
TMPCASE(2);TMPCASE(4);TMPCASE(8);TMPCASE(16);TMPCASE(32);
86
TMPCASE(64);TMPCASE(128);TMPCASE(256);TMPCASE(512);
87
TMPCASE(1024);TMPCASE(2048);TMPCASE(4096);TMPCASE(8192);
96
c = ptr[i] * omega[i];
97
ptr[i] = ptr[i+1] * omega[i+1];
105
#define TMPCASE(N)case N:fftr8_un##N(ptr);break
106
TMPCASE(2);TMPCASE(4);TMPCASE(8);TMPCASE(16);TMPCASE(32);
107
TMPCASE(64);TMPCASE(128);TMPCASE(256);TMPCASE(512);
108
TMPCASE(1024);TMPCASE(2048);TMPCASE(4096);TMPCASE(8192);
111
COPYINVDJB2STD2(ptr,inout,n);
118
i = get_cache_id_drfftw(n);
119
plan1 = caches_drfftw[i].plan1;
120
plan2 = caches_drfftw[i].plan2;
121
rfftw_one(plan1, (fftw_real *)inout, NULL);
122
if (swap_real_imag) {
124
inout[0] *= omega[0];
126
inout[n/2] *= omega[n/2];
128
c = inout[i] * omega[i];
129
inout[i] = omega[n-i] * inout[n-i];
135
inout[i] *= omega[i];
136
rfftw_one(plan2, (fftw_real *)inout, NULL);
138
i = get_cache_id_dfftpack(n);
139
wsave = caches_dfftpack[i].wsave;
140
F_FUNC(dfftf,DFFTF)(&n,inout,wsave);
141
if (swap_real_imag) {
144
inout[0] *= omega[0];
146
inout[n-1] *= omega[n-1];
148
c = inout[i] * omega[i];
149
inout[i] = inout[i+1] * omega[i+1];
155
inout[i] *= omega[i];
156
F_FUNC(dfftb,DFFTB)(&n,inout,wsave);
161
/**************** convolve **********************/
163
void convolve_z(int n,double* inout,double* omega_real,double* omega_imag) {
169
rfftw_plan plan1 = NULL;
170
rfftw_plan plan2 = NULL;
172
double* wsave = NULL;
176
case 2:;case 4:;case 8:;case 16:;case 32:;case 64:;case 128:;case 256:;
177
case 512:;case 1024:;case 2048:;case 4096:;case 8192:
178
i = get_cache_id_ddjbfft(n);
179
ptr = caches_ddjbfft[i].ptr;
180
COPYSTD2DJB(inout,ptr,n);
182
#define TMPCASE(N) case N: fftr8_##N(ptr); break
183
TMPCASE(2);TMPCASE(4);TMPCASE(8);TMPCASE(16);TMPCASE(32);
184
TMPCASE(64);TMPCASE(128);TMPCASE(256);TMPCASE(512);
185
TMPCASE(1024);TMPCASE(2048);TMPCASE(4096);TMPCASE(8192);
191
ptr[0] *= (omega_real[0]+omega_imag[0]);
192
ptr[1] *= (omega_real[1]+omega_imag[1]);
194
c = ptr[i] * omega_imag[i];
195
ptr[i] *= omega_real[i];
196
ptr[i] += ptr[i+1] * omega_imag[i+1];
197
ptr[i+1] *= omega_real[i+1];
202
#define TMPCASE(N)case N:fftr8_un##N(ptr);break
203
TMPCASE(2);TMPCASE(4);TMPCASE(8);TMPCASE(16);TMPCASE(32);
204
TMPCASE(64);TMPCASE(128);TMPCASE(256);TMPCASE(512);
205
TMPCASE(1024);TMPCASE(2048);TMPCASE(4096);TMPCASE(8192);
208
COPYINVDJB2STD2(ptr,inout,n);
215
i = get_cache_id_drfftw(n);
216
plan1 = caches_drfftw[i].plan1;
217
plan2 = caches_drfftw[i].plan2;
218
rfftw_one(plan1, (fftw_real *)inout, NULL);
221
inout[0] *= (omega_real[0]+omega_imag[0]);
223
inout[n/2] *= (omega_real[n/2]+omega_imag[n/2]);
225
c = inout[i] * omega_imag[i];
226
inout[i] *= omega_real[i];
227
inout[i] += omega_imag[n-i] * inout[n-i];
228
inout[n-i] *= omega_real[n-i];
232
rfftw_one(plan2, (fftw_real *)inout, NULL);
234
i = get_cache_id_dfftpack(n);
235
wsave = caches_dfftpack[i].wsave;
236
F_FUNC(dfftf,DFFTF)(&n,inout,wsave);
240
inout[0] *= (omega_real[0]+omega_imag[0]);
242
inout[n-1] *= (omega_real[n-1]+omega_imag[n-1]);
244
c = inout[i] * omega_imag[i];
245
inout[i] *= omega_real[i];
246
inout[i] += inout[i+1] * omega_imag[i+1];
247
inout[i+1] *= omega_real[i+1];
251
F_FUNC(dfftb,DFFTB)(&n,inout,wsave);
257
void init_convolution_kernel(int n,double* omega, int d,
258
double (*kernel_func)(int),
261
omega[k] = pow(sqrt(-1),d) * kernel_func(k)
262
omega[0] = kernel_func(0)
263
conjugate(omega[-k]) == omega[k]
267
case 2:;case 4:;case 8:;case 16:;case 32:;case 64:;case 128:;case 256:;
268
case 512:;case 1024:;case 2048:;case 4096:;case 8192:
270
int k,n2=n/2, *f = (int*)malloc(sizeof(int)*(n));
273
if (f[k]>n2) f[k] -= n;
274
omega[0] = (*kernel_func)(0)/n;
277
for (k=2;k<n-1;k+=2) {
278
omega[k] = (*kernel_func)(f[k])/n2;
279
omega[k+1] = -omega[k];
281
omega[1] = (zero_nyquist?0.0:(*kernel_func)(n2)/n);
285
omega[k] = omega[k+1] = -(*kernel_func)(f[k])/n2;
286
omega[1] = (zero_nyquist?0.0:(*kernel_func)(n2)/n);
289
for (k=2;k<n-1;k+=2) {
290
omega[k] = -(*kernel_func)(f[k])/n2;
291
omega[k+1] = -omega[k];
293
omega[1] = (zero_nyquist?0.0:-(*kernel_func)(n2)/n);
297
omega[k] = omega[k+1] = (*kernel_func)(f[k])/n2;
298
omega[1] = (zero_nyquist?0.0:-(*kernel_func)(n2)/n);
309
omega[0] = (*kernel_func)(0)/n;;
313
omega[k] = omega[n-k] = (*kernel_func)(k)/n;
315
omega[n/2] = (zero_nyquist?0.0:(*kernel_func)(n/2)/n);
319
omega[k] = (*kernel_func)(k)/n;
320
omega[n-k] = -omega[k];
323
omega[n/2] = (zero_nyquist?0.0:(*kernel_func)(n/2)/n);
327
omega[k] = omega[n-k] = -(*kernel_func)(k)/n;
329
omega[n/2] = (zero_nyquist?0.0:-(*kernel_func)(n/2)/n);
333
omega[k] = -(*kernel_func)(k)/n;
334
omega[n-k] = -omega[k];
337
omega[n/2] = (zero_nyquist?0.0:-(*kernel_func)(n/2)/n);
343
int j,k,l=(n%2?n:n-1);
344
omega[0] = (*kernel_func)(0)/n;
347
for (k=j=1;j<l;j+=2,++k)
348
omega[j] = omega[j+1] = (*kernel_func)(k)/n;
350
omega[n-1] = (zero_nyquist?0.0:(*kernel_func)(k)/n);
353
for (k=j=1;j<l;j+=2,++k) {
354
omega[j] = (*kernel_func)(k)/n;
355
omega[j+1] = -omega[j];
358
omega[n-1] = (zero_nyquist?0.0:(*kernel_func)(k)/n);
361
for (k=j=1;j<l;j+=2,++k)
362
omega[j] = omega[j+1] = -(*kernel_func)(k)/n;
364
omega[n-1] = (zero_nyquist?0.0:-(*kernel_func)(k)/n);
367
for (k=j=1;j<l;j+=2,++k) {
368
omega[j] = -(*kernel_func)(k)/n;
369
omega[j+1] = -omega[j];
372
omega[n-1] = (zero_nyquist?0.0:-(*kernel_func)(k)/n);