5
* This FFT (Fast Fourier Transform) is from Takuya OOURA
7
* Copyright(C) 1996-1999 Takuya OOURA
8
* email: ooura@mmm.t.u-tokyo.ac.jp
10
* See full FFT documentation below ... (MCR)
12
* This software is provided "as is" without express or implied warranty.
17
* This file contains the FFT based FAST MULTIPLICATION function
18
* as well as its support functions.
23
#include "pgscript/utilities/mapm-lib/m_apm_lc.h"
26
#define MM_PI_2 1.570796326794896619231321691639751442098584699687
29
#ifndef WR5000 /* cos(MM_PI_2*0.5000) */
30
#define WR5000 0.707106781186547524400844362104849039284835937688
33
#ifndef RDFT_LOOP_DIV /* control of the RDFT's speed & tolerance */
34
#define RDFT_LOOP_DIV 64
37
extern void M_fast_mul_fft(UCHAR *, UCHAR *, UCHAR *, int);
39
extern void M_rdft(int, int, double *);
40
extern void M_bitrv2(int, double *);
41
extern void M_cftfsub(int, double *);
42
extern void M_cftbsub(int, double *);
43
extern void M_rftfsub(int, double *);
44
extern void M_rftbsub(int, double *);
45
extern void M_cft1st(int, double *);
46
extern void M_cftmdl(int, int, double *);
48
static double *M_aa_array, *M_bb_array;
49
static int M_size = -1;
51
static char *M_fft_error_msg = (char *)"\'M_fast_mul_fft\', Out of memory";
53
/****************************************************************************/
58
MAPM_FREE(M_aa_array);
59
MAPM_FREE(M_bb_array);
63
/****************************************************************************/
65
* multiply 'uu' by 'vv' with nbytes each
66
* yielding a 2*nbytes result in 'ww'.
67
* each byte contains a base 100 'digit',
68
* i.e.: range from 0-99.
72
* uu,vv [0] [1] [2] ... [N-1]
73
* ww [0] [1] [2] ... [2N-1]
76
void M_fast_mul_fft(UCHAR *ww, UCHAR *uu, UCHAR *vv, int nbytes)
78
int mflag, i, j, nn2, nn;
79
double carry, nnr, dtemp, *a, *b;
83
if (M_size < 0) /* if first time in, setup working arrays */
85
if (M_get_sizeof_int() == 2) /* if still using 16 bit compilers */
90
M_aa_array = (double *)MAPM_MALLOC(M_size * sizeof(double));
91
M_bb_array = (double *)MAPM_MALLOC(M_size * sizeof(double));
93
if ((M_aa_array == NULL) || (M_bb_array == NULL))
95
/* fatal, this does not return */
97
M_apm_log_error_msg(M_APM_FATAL, M_fft_error_msg);
108
a = (double *)MAPM_MALLOC((nn + 8) * sizeof(double));
109
b = (double *)MAPM_MALLOC((nn + 8) * sizeof(double));
111
if ((a == NULL) || (b == NULL))
113
/* fatal, this does not return */
115
M_apm_log_error_msg(M_APM_FATAL, M_fft_error_msg);
127
* convert normal base 100 MAPM numbers to base 10000
128
* for the FFT operation.
132
for (j=0; j < nn2; j++)
134
a[j] = (double)((int)uu[i] * 100 + uu[i+1]);
135
b[j] = (double)((int)vv[i] * 100 + vv[i+1]);
139
/* zero fill the second half of the arrays */
141
for (j=nn2; j < nn; j++)
147
/* perform the forward Fourier transforms for both numbers */
152
/* perform the convolution ... */
157
for (j=3; j <= nn; j += 2)
160
b[j-1] = dtemp * a[j-1] - b[j] * a[j];
161
b[j] = dtemp * a[j] + b[j] * a[j-1];
164
/* perform the inverse transform on the result */
168
/* perform a final pass to release all the carries */
169
/* we are still in base 10000 at this point */
173
nnr = 2.0 / (double)nn;
177
dtemp = b[--j] * nnr + carry + 0.5;
178
ul = (unsigned long)(dtemp * 1.0E-4);
180
b[j] = dtemp - carry * 10000.0;
186
/* copy result to our destination after converting back to base 100 */
189
M_get_div_rem((int)ul, w0, (w0 + 1));
191
for (j=0; j <= (nn - 2); j++)
194
M_get_div_rem((int)b[j], w0, (w0 + 1));
203
/****************************************************************************/
206
* The following info is from Takuya OOURA's documentation :
208
* NOTE : MAPM only uses the 'RDFT' function (as well as the
209
* functions RDFT calls). All the code from here down
210
* in this file is from Takuya OOURA. The only change I
211
* made was to add 'M_' in front of all the functions
212
* I used. This was to guard against any possible
213
* name collisions in the future.
218
* General Purpose FFT (Fast Fourier/Cosine/Sine Transform) Package
221
* A package to calculate Discrete Fourier/Cosine/Sine Transforms of
222
* 1-dimensional sequences of length 2^N.
224
* fft4g_h.c : FFT Package in C - Simple Version I (radix 4,2)
226
* rdft: Real Discrete Fourier Transform
229
* -------- rdft --------
230
* A method with a following butterfly operation appended to "cdft".
231
* In forward transform :
232
* A[k] = sum_j=0^n-1 a[j]*W(n)^(j*k), 0<=k<=n/2,
233
* W(n) = exp(2*pi*i/n),
234
* this routine makes an array x[] :
235
* x[j] = a[2*j] + i*a[2*j+1], 0<=j<n/2
236
* and calls "cdft" of length n/2 :
237
* X[k] = sum_j=0^n/2-1 x[j] * W(n/2)^(j*k), 0<=k<n.
238
* The result A[k] are :
239
* A[k] = X[k] - (1+i*W(n)^k)/2 * (X[k]-conjg(X[n/2-k])),
240
* A[n/2-k] = X[n/2-k] +
241
* conjg((1+i*W(n)^k)/2 * (X[k]-conjg(X[n/2-k]))),
243
* (notes: conjg() is a complex conjugate, X[n/2]=X[0]).
244
* ----------------------
247
* * Masatake MORI, Makoto NATORI, Tatuo TORII: Suchikeisan,
248
* Iwanamikouzajyouhoukagaku18, Iwanami, 1982 (Japanese)
249
* * Henri J. Nussbaumer: Fast Fourier Transform and Convolution
250
* Algorithms, Springer Verlag, 1982
251
* * C. S. Burrus, Notes on the FFT (with large FFT paper list)
252
* http://www-dsp.rice.edu/research/fft/fftnote.asc
255
* Copyright(C) 1996-1999 Takuya OOURA
256
* email: ooura@mmm.t.u-tokyo.ac.jp
257
* download: http://momonga.t.u-tokyo.ac.jp/~ooura/fft.html
258
* You may use, copy, modify this code for any purpose and
259
* without fee. You may distribute this ORIGINAL package.
265
rdft: Real Discrete Fourier Transform
268
void rdft(int, int, double *);
270
-------- Real DFT / Inverse of Real DFT --------
273
R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
274
I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
275
<case2> IRDFT (excluding scale)
276
a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
277
sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
278
sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
286
n >= 2, n = power of 2
287
a[0...n-1] :input/output data (double *)
290
a[2*k] = R[k], 0<=k<n/2
291
a[2*k+1] = I[k], 0<k<n/2
295
a[2*j] = R[j], 0<=j<n/2
296
a[2*j+1] = I[j], 0<j<n/2
303
for (j = 0; j <= n - 1; j++) {
309
void M_rdft(int n, int isgn, double *a)
325
a[1] = 0.5 * (a[0] - a[1]);
339
void M_bitrv2(int n, double *a)
341
int j0, k0, j1, k1, l, m, i, j, k;
342
double xr, xi, yr, yi;
352
for (k0 = 0; k0 < m; k0 += 2) {
354
for (j = j0; j < j0 + k0; j += 2) {
393
for (i = n >> 1; i > (k ^= i); i >>= 1);
405
for (i = n >> 1; i > (j0 ^= i); i >>= 1);
409
for (k0 = 2; k0 < m; k0 += 2) {
410
for (i = n >> 1; i > (j0 ^= i); i >>= 1);
412
for (j = j0; j < j0 + k0; j += 2) {
431
for (i = n >> 1; i > (k ^= i); i >>= 1);
439
void M_cftfsub(int n, double *a)
441
int j, j1, j2, j3, l;
442
double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
448
while ((l << 2) < n) {
454
for (j = 0; j < l; j += 2) {
459
x0i = a[j + 1] + a[j1 + 1];
461
x1i = a[j + 1] - a[j1 + 1];
463
x2i = a[j2 + 1] + a[j3 + 1];
465
x3i = a[j2 + 1] - a[j3 + 1];
467
a[j + 1] = x0i + x2i;
469
a[j2 + 1] = x0i - x2i;
471
a[j1 + 1] = x1i + x3r;
473
a[j3 + 1] = x1i - x3r;
476
for (j = 0; j < l; j += 2) {
479
x0i = a[j + 1] - a[j1 + 1];
481
a[j + 1] += a[j1 + 1];
490
void M_cftbsub(int n, double *a)
492
int j, j1, j2, j3, l;
493
double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
499
while ((l << 2) < n) {
505
for (j = 0; j < l; j += 2) {
510
x0i = -a[j + 1] - a[j1 + 1];
512
x1i = -a[j + 1] + a[j1 + 1];
514
x2i = a[j2 + 1] + a[j3 + 1];
516
x3i = a[j2 + 1] - a[j3 + 1];
518
a[j + 1] = x0i - x2i;
520
a[j2 + 1] = x0i + x2i;
522
a[j1 + 1] = x1i - x3r;
524
a[j3 + 1] = x1i + x3r;
527
for (j = 0; j < l; j += 2) {
530
x0i = -a[j + 1] + a[j1 + 1];
532
a[j + 1] = -a[j + 1] - a[j1 + 1];
541
void M_cft1st(int n, double *a)
544
double ew, wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
545
double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
578
a[10] = wn4r * (x0r - x0i);
579
a[11] = wn4r * (x0r + x0i);
582
a[14] = wn4r * (x0i - x0r);
583
a[15] = wn4r * (x0i + x0r);
586
for (j = 16; j < n; j += 16) {
587
for (kj = n >> 2; kj > (kr ^= kj); kj >>= 1);
590
wk2r = 1 - 2 * wk1i * wk1i;
591
wk2i = 2 * wk1i * wk1r;
592
wk3r = wk1r - 2 * wk2i * wk1i;
593
wk3i = 2 * wk2i * wk1r - wk1i;
594
x0r = a[j] + a[j + 2];
595
x0i = a[j + 1] + a[j + 3];
596
x1r = a[j] - a[j + 2];
597
x1i = a[j + 1] - a[j + 3];
598
x2r = a[j + 4] + a[j + 6];
599
x2i = a[j + 5] + a[j + 7];
600
x3r = a[j + 4] - a[j + 6];
601
x3i = a[j + 5] - a[j + 7];
603
a[j + 1] = x0i + x2i;
606
a[j + 4] = wk2r * x0r - wk2i * x0i;
607
a[j + 5] = wk2r * x0i + wk2i * x0r;
610
a[j + 2] = wk1r * x0r - wk1i * x0i;
611
a[j + 3] = wk1r * x0i + wk1i * x0r;
614
a[j + 6] = wk3r * x0r - wk3i * x0i;
615
a[j + 7] = wk3r * x0i + wk3i * x0r;
616
x0r = wn4r * (wk1r - wk1i);
617
wk1i = wn4r * (wk1r + wk1i);
619
wk3r = wk1r - 2 * wk2r * wk1i;
620
wk3i = 2 * wk2r * wk1r - wk1i;
621
x0r = a[j + 8] + a[j + 10];
622
x0i = a[j + 9] + a[j + 11];
623
x1r = a[j + 8] - a[j + 10];
624
x1i = a[j + 9] - a[j + 11];
625
x2r = a[j + 12] + a[j + 14];
626
x2i = a[j + 13] + a[j + 15];
627
x3r = a[j + 12] - a[j + 14];
628
x3i = a[j + 13] - a[j + 15];
629
a[j + 8] = x0r + x2r;
630
a[j + 9] = x0i + x2i;
633
a[j + 12] = -wk2i * x0r - wk2r * x0i;
634
a[j + 13] = -wk2i * x0i + wk2r * x0r;
637
a[j + 10] = wk1r * x0r - wk1i * x0i;
638
a[j + 11] = wk1r * x0i + wk1i * x0r;
641
a[j + 14] = wk3r * x0r - wk3i * x0i;
642
a[j + 15] = wk3r * x0i + wk3i * x0r;
648
void M_cftmdl(int n, int l, double *a)
650
int j, j1, j2, j3, k, kj, kr, m, m2;
651
double ew, wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
652
double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
655
for (j = 0; j < l; j += 2) {
660
x0i = a[j + 1] + a[j1 + 1];
662
x1i = a[j + 1] - a[j1 + 1];
664
x2i = a[j2 + 1] + a[j3 + 1];
666
x3i = a[j2 + 1] - a[j3 + 1];
668
a[j + 1] = x0i + x2i;
670
a[j2 + 1] = x0i - x2i;
672
a[j1 + 1] = x1i + x3r;
674
a[j3 + 1] = x1i - x3r;
677
for (j = m; j < l + m; j += 2) {
682
x0i = a[j + 1] + a[j1 + 1];
684
x1i = a[j + 1] - a[j1 + 1];
686
x2i = a[j2 + 1] + a[j3 + 1];
688
x3i = a[j2 + 1] - a[j3 + 1];
690
a[j + 1] = x0i + x2i;
692
a[j2 + 1] = x0r - x2r;
695
a[j1] = wn4r * (x0r - x0i);
696
a[j1 + 1] = wn4r * (x0r + x0i);
699
a[j3] = wn4r * (x0i - x0r);
700
a[j3 + 1] = wn4r * (x0i + x0r);
705
for (k = m2; k < n; k += m2) {
706
for (kj = n >> 2; kj > (kr ^= kj); kj >>= 1);
709
wk2r = 1 - 2 * wk1i * wk1i;
710
wk2i = 2 * wk1i * wk1r;
711
wk3r = wk1r - 2 * wk2i * wk1i;
712
wk3i = 2 * wk2i * wk1r - wk1i;
713
for (j = k; j < l + k; j += 2) {
718
x0i = a[j + 1] + a[j1 + 1];
720
x1i = a[j + 1] - a[j1 + 1];
722
x2i = a[j2 + 1] + a[j3 + 1];
724
x3i = a[j2 + 1] - a[j3 + 1];
726
a[j + 1] = x0i + x2i;
729
a[j2] = wk2r * x0r - wk2i * x0i;
730
a[j2 + 1] = wk2r * x0i + wk2i * x0r;
733
a[j1] = wk1r * x0r - wk1i * x0i;
734
a[j1 + 1] = wk1r * x0i + wk1i * x0r;
737
a[j3] = wk3r * x0r - wk3i * x0i;
738
a[j3 + 1] = wk3r * x0i + wk3i * x0r;
740
x0r = wn4r * (wk1r - wk1i);
741
wk1i = wn4r * (wk1r + wk1i);
743
wk3r = wk1r - 2 * wk2r * wk1i;
744
wk3i = 2 * wk2r * wk1r - wk1i;
745
for (j = k + m; j < l + (k + m); j += 2) {
750
x0i = a[j + 1] + a[j1 + 1];
752
x1i = a[j + 1] - a[j1 + 1];
754
x2i = a[j2 + 1] + a[j3 + 1];
756
x3i = a[j2 + 1] - a[j3 + 1];
758
a[j + 1] = x0i + x2i;
761
a[j2] = -wk2i * x0r - wk2r * x0i;
762
a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
765
a[j1] = wk1r * x0r - wk1i * x0i;
766
a[j1 + 1] = wk1r * x0i + wk1i * x0r;
769
a[j3] = wk3r * x0r - wk3i * x0i;
770
a[j3 + 1] = wk3r * x0i + wk3i * x0r;
777
void M_rftfsub(int n, double *a)
780
double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
782
ec = 2 * MM_PI_2 / n;
794
i0 = i - 4 * RDFT_LOOP_DIV;
798
for (j = i - 4; j >= i0; j -= 4) {
800
xr = a[j + 2] - a[k - 2];
801
xi = a[j + 3] + a[k - 1];
802
yr = wdr * xr - wdi * xi;
803
yi = wdr * xi + wdi * xr;
809
wki += ss * (0.5 - wdr);
811
xi = a[j + 1] + a[k + 1];
812
yr = wkr * xr - wki * xi;
813
yi = wkr * xi + wki * xr;
819
wdi += ss * (0.5 - wkr);
824
wkr = 0.5 * sin(ec * i0);
825
wki = 0.5 * cos(ec * i0);
826
wdr = 0.5 - (wkr * w1r - wki * w1i);
827
wdi = wkr * w1i + wki * w1r;
831
xr = a[2] - a[n - 2];
832
xi = a[3] + a[n - 1];
833
yr = wdr * xr - wdi * xi;
834
yi = wdr * xi + wdi * xr;
843
void M_rftbsub(int n, double *a)
846
double ec, w1r, w1i, wkr, wki, wdr, wdi, ss, xr, xi, yr, yi;
848
ec = 2 * MM_PI_2 / n;
859
a[i + 1] = -a[i + 1];
861
i0 = i - 4 * RDFT_LOOP_DIV;
865
for (j = i - 4; j >= i0; j -= 4) {
867
xr = a[j + 2] - a[k - 2];
868
xi = a[j + 3] + a[k - 1];
869
yr = wdr * xr + wdi * xi;
870
yi = wdr * xi - wdi * xr;
872
a[j + 3] = yi - a[j + 3];
874
a[k - 1] = yi - a[k - 1];
876
wki += ss * (0.5 - wdr);
878
xi = a[j + 1] + a[k + 1];
879
yr = wkr * xr + wki * xi;
880
yi = wkr * xi - wki * xr;
882
a[j + 1] = yi - a[j + 1];
884
a[k + 1] = yi - a[k + 1];
886
wdi += ss * (0.5 - wkr);
891
wkr = 0.5 * sin(ec * i0);
892
wki = 0.5 * cos(ec * i0);
893
wdr = 0.5 - (wkr * w1r - wki * w1i);
894
wdi = wkr * w1i + wki * w1r;
898
xr = a[2] - a[n - 2];
899
xi = a[3] + a[n - 1];
900
yr = wdr * xr + wdi * xi;
901
yi = wdr * xi - wdi * xr;
905
a[n - 1] = yi - a[n - 1];