2
* Copyright (c) 1997-1999 Massachusetts Institute of Technology
3
* Copyright (c) 2003, 2006 Matteo Frigo
4
* Copyright (c) 2003, 2006 Massachusetts Institute of Technology
6
* This program is free software; you can redistribute it and/or modify
7
* it under the terms of the GNU General Public License as published by
8
* the Free Software Foundation; either version 2 of the License, or
9
* (at your option) any later version.
11
* This program is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
* GNU General Public License for more details.
16
* You should have received a copy of the GNU General Public License
17
* along with this program; if not, write to the Free Software
18
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
(* $Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $ *)
23
let cvsid = "$Id: fft.ml,v 1.4 2006-01-05 03:04:27 stevenj Exp $"
25
(* This is the part of the generator that actually computes the FFT
31
(* choose a suitable factor of n *)
33
(* first choice: i such that gcd(i, n / i) = 1, i as big as possible *)
37
else if ((n mod i) == 0 && gcd i (n / i) == 1) then loop (i + 1) i
41
(* second choice: the biggest factor i of n, where i < sqrt(n), if any *)
45
else if ((n mod i) == 0) then loop (i + 1) i
49
in let i = choose1 n in
53
let is_power_of_two n = (n > 0) && ((n - 1) land n == 0)
55
let rec dft_prime sign n input =
58
let coeff = filter (exp n (sign * i * j))
59
in coeff @* (input j)) in
60
let computation_even = array n (sum identity)
62
let sumr = array n (sum real)
63
and sumi = array n (sum ((times Complex.i) @@ imag)) in
66
(* expose some common subexpressions *)
68
sigma 1 ((n + 1) / 2) (fun j -> input j @+ input (n - j))
70
let i' = min i (n - i) in
74
sumr i' @- sumi i') in
75
if (n >= !Magic.rader_min) then
76
dft_rader sign n input
83
and dft_rader sign p input =
85
let one_half = inverse_int 2 in
88
and make_product n a b =
89
let scale_factor = inverse_int n in
90
array n (fun i -> a i @* (scale_factor @* b i)) in
92
(* generates a convolution using ffts. (all arguments are the
93
same as to gen_convolution, below) *)
94
let gen_convolution_by_fft n a b addtoall =
96
and fft_b = dft 1 n b in
98
let fft_ab = make_product n fft_a fft_b
99
and dc_term i = if (i == 0) then addtoall else zero in
101
let fft_ab1 = array n (fun i -> fft_ab i @+ dc_term i)
103
let conv = dft (-1) n fft_ab1 in
106
(* alternate routine for convolution. Seems to work better for
107
small sizes. I have no idea why. *)
108
and gen_convolution_by_fft_alt n a b addtoall =
109
let ap = array n (fun i -> half (a i @+ a ((n - i) mod n)))
110
and am = array n (fun i -> half (a i @- a ((n - i) mod n)))
111
and bp = array n (fun i -> half (b i @+ b ((n - i) mod n)))
112
and bm = array n (fun i -> half (b i @- b ((n - i) mod n)))
115
let fft_ap = dft 1 n ap
116
and fft_am = dft 1 n am
117
and fft_bp = dft 1 n bp
118
and fft_bm = dft 1 n bm in
120
let fft_abpp = make_product n fft_ap fft_bp
121
and fft_abpm = make_product n fft_ap fft_bm
122
and fft_abmp = make_product n fft_am fft_bp
123
and fft_abmm = make_product n fft_am fft_bm
124
and sum = fft_ap 0 @+ fft_am 0
125
and dc_term i = if (i == 0) then addtoall else zero in
127
let fft_ab1 = array n (fun i -> (fft_abpp i @+ fft_abmm i) @+ dc_term i)
128
and fft_ab2 = array n (fun i -> fft_abpm i @+ fft_abmp i) in
129
let conv1 = dft (-1) n fft_ab1
130
and conv2 = dft (-1) n fft_ab2 in
131
let conv = array n (fun i ->
132
conv1 i @+ conv2 i) in
135
(* generator of assignment list assigning conv to the convolution of
136
a and b, all of which are of length n. addtoall is added to
137
all of the elements of the result. Returns (sum, convolution) pair
138
where sum is the sum of the elements of a. *)
140
in let gen_convolution =
141
if (p <= !Magic.alternate_convolution) then
142
gen_convolution_by_fft_alt
144
gen_convolution_by_fft
146
(* fft generator for prime n = p using Rader's algorithm for
147
turning the fft into a convolution, which then can be
148
performed in a variety of ways *)
150
let g = find_generator p in
151
let ginv = pow_mod g (p - 2) p in
152
let input_perm = array p (fun i -> input (pow_mod g i p))
153
and omega_perm = array p (fun i -> exp p (sign * (pow_mod ginv i p)))
154
and output_perm = array p (fun i -> pow_mod ginv i p)
156
(gen_convolution (p - 1) input_perm omega_perm (input 0))
161
let i' = suchthat 0 (fun i' -> i = output_perm i')
164
(* our modified version of the conjugate-pair split-radix algorithm,
165
which reduces the number of multiplications by rescaling the
166
sub-transforms (power-of-two n's only) *)
167
and newsplit sign n input =
168
let rec s n k = (* recursive scale factor *)
172
let k4 = (abs k) mod (n / 4) in
173
let k4' = if k4 <= (n / 8) then k4 else (n/4 - k4) in
174
(s (n / 4) k4') @* (real (exp n k4'))
176
and sinv n k = (* 1 / s(n,k) *)
180
let k4 = (abs k) mod (n / 4) in
181
let k4' = if k4 <= (n / 8) then k4 else (n/4 - k4) in
182
(sinv (n / 4) k4') @* (sec n k4')
184
in let sdiv2 n k = (s n k) @* (sinv (2*n) k) (* s(n,k) / s(2*n,k) *)
185
and sdiv4 n k = (* s(n,k) / s(4*n,k) *)
186
let k4 = (abs k) mod n in
187
sec (4*n) (if k4 <= (n / 2) then k4 else (n - k4))
189
in let t n k = (exp n k) @* (sdiv4 (n/4) k)
191
and dft1 input = input
192
and dft2 input = array 2 (fun k -> (input 0) @+ ((input 1) @* exp 2 k))
194
in let rec newsplit0 sign n input =
195
if (n == 1) then dft1 input
196
else if (n == 2) then dft2 input
197
else let u = newsplit0 sign (n / 2) (fun i -> input (i*2))
198
and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1))
199
and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n))
200
and twid = array n (fun k -> s (n/4) k @* exp n (sign * k)) in
201
let w = array n (fun k -> twid k @* z (k mod (n / 4)))
202
and w' = array n (fun k -> conj (twid k) @* z' (k mod (n / 4))) in
203
let ww = array n (fun k -> w k @+ w' k) in
204
array n (fun k -> u (k mod (n / 2)) @+ ww k)
206
and newsplitS sign n input =
207
if (n == 1) then dft1 input
208
else if (n == 2) then dft2 input
209
else let u = newsplitS2 sign (n / 2) (fun i -> input (i*2))
210
and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1))
211
and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n)) in
212
let w = array n (fun k -> t n (sign * k) @* z (k mod (n / 4)))
213
and w' = array n (fun k -> conj (t n (sign * k)) @* z' (k mod (n / 4))) in
214
let ww = array n (fun k -> w k @+ w' k) in
215
array n (fun k -> u (k mod (n / 2)) @+ ww k)
217
and newsplitS2 sign n input =
218
if (n == 1) then dft1 input
219
else if (n == 2) then dft2 input
220
else let u = newsplitS4 sign (n / 2) (fun i -> input (i*2))
221
and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1))
222
and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n)) in
223
let w = array n (fun k -> t n (sign * k) @* z (k mod (n / 4)))
224
and w' = array n (fun k -> conj (t n (sign * k)) @* z' (k mod (n / 4))) in
225
let ww = array n (fun k -> (w k @+ w' k) @* (sdiv2 n k)) in
226
array n (fun k -> u (k mod (n / 2)) @+ ww k)
228
and newsplitS4 sign n input =
229
if (n == 1) then dft1 input
230
else if (n == 2) then
232
in array 2 (fun k -> (f k) @* (sinv 8 k))
233
else let u = newsplitS2 sign (n / 2) (fun i -> input (i*2))
234
and z = newsplitS sign (n / 4) (fun i -> input (i*4 + 1))
235
and z' = newsplitS sign (n / 4) (fun i -> input ((n + i*4 - 1) mod n)) in
236
let w = array n (fun k -> t n (sign * k) @* z (k mod (n / 4)))
237
and w' = array n (fun k -> conj (t n (sign * k)) @* z' (k mod (n / 4))) in
238
let ww = array n (fun k -> w k @+ w' k) in
239
array n (fun k -> (u (k mod (n / 2)) @+ ww k) @* (sdiv4 n k))
241
in newsplit0 sign n input
243
and dft sign n input =
244
let rec cooley_tukey sign n1 n2 input =
247
dft sign n1 (fun i1 -> input (i1 * n2 + i2))) in
251
exp n (sign * i1 * i2) @* tmp1 i2 i1)) in
252
let tmp3 = array n1 (fun i1 -> dft sign n2 (tmp2 i1)) in
253
(fun i -> tmp3 (i mod n1) (i / n1))
256
* This is "exponent -1" split-radix by Dan Bernstein.
258
and split_radix_dit sign n input =
259
let f0 = dft sign (n / 2) (fun i -> input (i * 2))
260
and f10 = dft sign (n / 4) (fun i -> input (i * 4 + 1))
261
and f11 = dft sign (n / 4) (fun i -> input ((n + i * 4 - 1) mod n)) in
262
let g10 = array n (fun k ->
263
exp n (sign * k) @* f10 (k mod (n / 4)))
264
and g11 = array n (fun k ->
265
exp n (- sign * k) @* f11 (k mod (n / 4))) in
266
let g1 = array n (fun k -> g10 k @+ g11 k) in
267
array n (fun k -> f0 (k mod (n / 2)) @+ g1 k)
269
and split_radix_dif sign n input =
270
let n2 = n / 2 and n4 = n / 4 in
271
let x0 = array n2 (fun i -> input i @+ input (i + n2))
272
and x10 = array n4 (fun i -> input i @- input (i + n2))
273
and x11 = array n4 (fun i ->
274
input (i + n4) @- input (i + n2 + n4)) in
276
exp n (k * i * sign) @* (x10 i @+ exp 4 (k * sign) @* x11 i) in
277
let f0 = dft sign n2 x0
278
and f1 = array 4 (fun k -> dft sign n4 (x1 k)) in
280
if k mod 2 = 0 then f0 (k / 2)
281
else let k' = k mod 4 in f1 k' ((k - k') / 4))
283
and prime_factor sign n1 n2 input =
284
let tmp1 = array n2 (fun i2 ->
285
dft sign n1 (fun i1 -> input ((i1 * n2 + i2 * n1) mod n)))
286
in let tmp2 = array n1 (fun i1 ->
287
dft sign n2 (fun k2 -> tmp1 k2 i1))
288
in fun i -> tmp2 (i mod n1) (i mod n2)
290
in let algorithm sign n =
291
let r = choose_factor n in
292
if List.mem n !Magic.rader_list then
295
else if (r == 1) then (* n is prime *)
297
else if (gcd r (n / r)) == 1 then
298
prime_factor sign r (n / r)
299
else if (n mod 4 = 0 && n > 4) then
300
if !Magic.newsplit && is_power_of_two n then
302
else if !Magic.dif_split_radix then
303
split_radix_dif sign n
305
split_radix_dit sign n
307
cooley_tukey sign r (n / r)
309
array n (algorithm sign n input)