2
* Copyright (c) 2001 Stefan Kral
4
* This program is free software; you can redistribute it and/or modify
5
* it under the terms of the GNU General Public License as published by
6
* the Free Software Foundation; either version 2 of the License, or
7
* (at your option) any later version.
9
* This program is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
* GNU General Public License for more details.
14
* You should have received a copy of the GNU General Public License
15
* along with this program; if not, write to the Free Software
16
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25
open K7RegisterAllocationBasics
27
open AssignmentsToVfpinstrs
30
let cvsid = "$Id: gen_twiddle.ml,v 1.14 2005-12-24 21:08:49 athena Exp $"
32
type ditdif = DIT | DIF
34
let usage = "Usage: " ^ Sys.argv.(0) ^ " -n <number> [ -dit | -dif ]"
38
Arg.Unit(fun () -> ditdif := DIT),
39
" generate a DIT codelet";
42
Arg.Unit(fun () -> ditdif := DIF),
43
" generate a DIF codelet";
46
let choose sign m p = if sign > 0 then p else m
48
let twiddle_gen n sign nt byw =
49
let _ = info "generating..." in
52
| DIT -> Fft.dft sign n (byw (load_var @@ access_input))
53
| DIF -> byw (Fft.dft sign n (load_var @@ access_input))
55
let code = store_array_c n expr in
56
let code' = vect_optimize varinfo_twiddle n code in
58
let _ = info "generating k7vinstrs..." in
59
let fnarg_inout = choose sign (K7_MFunArg 1) (K7_MFunArg 2)
60
and fnarg_w = K7_MFunArg 3
61
and fnarg_iostride = K7_MFunArg 4
62
and fnarg_m = K7_MFunArg 5
63
and fnarg_idist = K7_MFunArg 6 in
65
let (inout,inout2) = makeNewVintreg2 ()
66
and (iostride4p,iostride4n,idist4p) = makeNewVintreg3 ()
67
and (w, m) = makeNewVintreg2 () in
70
loadfnargs [(fnarg_inout, inout); (fnarg_w, w); (fnarg_m, m)] @
72
(inout2, get2ndhalfcode inout iostride4p inout2 (pred (msb n)));
73
(idist4p, [K7V_IntLoadMem(fnarg_idist, idist4p);
74
K7V_IntLoadEA(K7V_SID(idist4p,4,0), idist4p)]);
75
(iostride4p, [K7V_IntLoadMem(fnarg_iostride,iostride4p);
76
K7V_IntLoadEA(K7V_SID(iostride4p,4,0), iostride4p)]);
78
let initcode = map (fun (d,xs) -> AddIntOnDemandCode(d,xs)) int_initcode in
79
(* force W to be allocated in retval register *)
80
let initcode = initcode @ [FixRegister (w, retval)] in
81
let do_split = n >= 32 in
85
strided_complex_split2_unparser
86
(inout,inout2,1 lsl (pred (msb n)),iostride4p))
88
([], strided_complex_unparser (inout,iostride4p)) in
89
let tw_unparser' = ([], unitstride_complex_unparser w) in
90
let unparser = make_asm_unparser io_unparser' io_unparser' tw_unparser' in
94
K7V_RefInts([inout; inout2; w; iostride4p]);
95
K7V_IntUnaryOpMem(K7_IShlImm 2, fnarg_idist);
98
(vsimdinstrsToK7vinstrs unparser code') @
100
K7V_IntUnaryOp(K7_IAddImm(nt * 4), w);
101
K7V_IntBinOpMem(K7_IAdd, fnarg_idist, inout);
102
K7V_IntBinOpMem(K7_IAdd, fnarg_idist, inout2);
103
K7V_IntUnaryOpMem(K7_IDec, fnarg_m);
104
K7V_RefInts([inout; inout2; w; iostride4p]);
105
K7V_CondBranch(K7_BCond_NotZero, K7V_BTarget_Named ".L0")
109
K7V_RefInts([inout; w; iostride4p; idist4p; m]);
112
(vsimdinstrsToK7vinstrs unparser code') @
114
K7V_IntUnaryOp(K7_IAddImm(nt * 4), w);
115
K7V_IntBinOp(K7_IAdd, idist4p, inout);
116
K7V_IntUnaryOp(K7_IDec, m);
117
K7V_RefInts([inout; w; iostride4p; idist4p; m]);
118
K7V_CondBranch(K7_BCond_NotZero, K7V_BTarget_Named ".L0");
121
in ((initcode, body), k7vFlops body)
125
let name = !Magic.codelet_name
126
and sign = !GenUtil.sign
128
let (bytwiddle, num_twiddles, twdesc) = Twiddle.twiddle_policy () in
129
let nt = num_twiddles n in
130
let byw = bytwiddle n sign (load_constant_array_c nt) in
132
let (code, (add, mul)) = twiddle_gen n sign nt byw in
133
let p = Printf.printf in
136
compileToAsm name 6 code;
138
p ".section .rodata\n";
140
p "\t.string \"%s\"\n" name;
143
p "%s" (Twiddle.twinstr_to_asm_string (twdesc n));
148
p "\t.long twinstr\n";
149
p "\t.long fftwf_kdft_ct_k7_%sgenus\n" (choose sign "m" "p");
150
p "\t.double %d\n" add;
151
p "\t.double %d\n" mul;
152
p "\t.double 0\n"; (* fma *)
153
p "\t.double 0\n"; (* other *)
154
p "\t.long 0\n"; (* s1 *)
155
p "\t.long 0\n"; (* s2 *)
156
p "\t.long 0\n"; (* dist *)
160
p ".globl %s\n" (register_fcn name);
161
p "%s:\n" (register_fcn name);
162
p "\tsubl $12,%%esp\n";
163
p "\tmovl 16(%%esp),%%eax\n";
164
p "\taddl $-4,%%esp\n";
166
p "\tpushl $%s\n" name;
170
| DIT -> p "\tcall fftwf_kdft_dit_register\n"
171
| DIF -> p "\tcall fftwf_kdft_dif_register\n"
173
p "\taddl $16,%%esp\n";
174
p "\taddl $12,%%esp\n";
181
parse (speclist @ Twiddle.speclist) usage;
182
generate (check_size());