2
* Copyright (c) 2001 Stefan Kral
4
* This program is free software; you can redistribute it and/or modify
5
* it under the terms of the GNU General Public License as published by
6
* the Free Software Foundation; either version 2 of the License, or
7
* (at your option) any later version.
9
* This program is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
* GNU General Public License for more details.
14
* You should have received a copy of the GNU General Public License
15
* along with this program; if not, write to the Free Software
16
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
29
open VAnnotatedScheduler
31
open K7RegisterAllocationBasics
32
open K7RegisterReallocation
35
open K7FlatInstructionScheduling
36
open K7InstructionSchedulingBasics
38
open AssignmentsToVfpinstrs
41
let vect_schedule vect_optimized =
42
let _ = info "vectorized scheduling..." in
43
let vect_scheduled = schedule vect_optimized in
44
let _ = info "vectorized annotating..." in
45
let vect_annotated = annotate vect_scheduled in
46
let _ = info "vectorized linearizing..." in
47
annotatedscheduleToVsimdinstrs vect_annotated
49
let vect_optimize varinfo n dag =
50
let _ = info "simplifying..." in
51
let optim = Algsimp.algsimp dag in
52
let alist = To_alist.to_assignments optim in
57
(fun x -> Printf.printf "%s\n" (Expr.assignment_to_string x))
62
let _ = info "mapping assignments to vfpinstrs..." in
63
let code1 = AssignmentsToVfpinstrs.assignmentsToVfpinstrs varinfo alist in
64
let _ = info "vectorizing..." in
65
let (operandsize,code2) = K7Vectorization.vectorize varinfo n code1 in
67
let _ = info "optimizing..." in
68
let code3 = VK7Optimization.apply_optrules code2 in
70
let code4 = vect_schedule code3 in
72
let _ = info "improving..." in
73
let code5 = VImproveSchedule.improve_schedule code4 in
78
(fun x -> Printf.printf "%s\n" (VFpUnparsing.vfpinstrToString x))
86
(fun x -> Printf.printf "%s\n" (VSimdUnparsing.vsimdinstrToString x))
93
(****************************************************************************)
95
let get2ndhalfcode base one dst ld_n =
97
[K7V_IntCpyUnaryOp(K7_ICopy, base, dst)]
98
(* issue a warning or sth similar *)
99
(* failwith "get2ndhalfcode: ld_n < 0 not supported!" *)
100
else if ld_n <= 3 then
101
[K7V_IntLoadEA(K7V_RISID(base,one,1 lsl ld_n,0), dst)]
103
[K7V_IntCpyUnaryOp(K7_ICopy, one, dst);
104
K7V_IntUnaryOp(K7_IShlImm ld_n, dst);
105
K7V_IntBinOp(K7_IAdd, base, dst)]
108
let loadfnargs xs = map (fun (src,dst) -> (dst, [K7V_IntLoadMem(src,dst)])) xs
110
(****************************************************************************)
111
let asmline x = print_string (x ^ "\n")
113
(* Warning: this function produces side-effects. *)
114
let emit_instr (_,_,instr) =
115
k7rinstrAdaptStackPointerAdjustment instr;
116
asmline (K7Unparsing.k7rinstrToString instr)
118
let emit_instr' (t,cplen,i) =
119
k7rinstrAdaptStackPointerAdjustment i;
120
Printf.printf "\t/* t=%d, cp=%d */\t" t cplen;
121
print_string (K7Unparsing.k7rinstrToString i);
125
k7rinstrAdaptStackPointerAdjustment i;
126
Printf.printf "\t/* t=?, cp=? */\t";
127
print_string (K7Unparsing.k7rinstrToString i);
130
let emit_instrk7v' i =
131
Printf.printf "\t/* t=?, cp=? */\t";
132
print_string (K7Unparsing.k7vinstrToString i);
136
Printf.printf "\t/* t=?, cp=? */\t";
137
print_string (VSimdUnparsing.vsimdinstrToString i);
140
(****************************************************************************)
143
(* determine stackframe size in bytes *)
144
let getStackFrameSize instrs =
145
let processInstr ((operand_size_bytes,max_bytes) as s0) = function
146
| K7R_SimdSpill(_,idx) ->
147
(operand_size_bytes, max max_bytes ((idx+1) * operand_size_bytes))
148
| K7R_SimdPromiseCellSize operand_size ->
149
(k7operandsizeToInteger operand_size, max_bytes)
152
snd (fold_left processInstr (k7operandsizeToInteger K7_QWord,min_int) instrs)
154
(****************************************************************************)
157
| SC_NumberPair of Number.number * Number.number
158
| SC_SimdPos of vsimdpos
160
let eq_numberpair (n,m) (n',m') =
161
Number.equal n n' && Number.equal m m'
163
let eq_simdconstant a b = match (a,b) with
164
| (SC_SimdPos a, SC_SimdPos b) ->
166
| (SC_NumberPair(n,m), SC_NumberPair(n',m')) ->
167
eq_numberpair (n,m) (n',m')
171
let k7rinstrsToConstants =
172
let rec rinstrsToConsts bag = function
176
| K7R_SimdUnaryOp(K7_FPChs p,_)
177
when not (exists (eq_simdconstant (SC_SimdPos p)) bag) ->
178
rinstrsToConsts ((SC_SimdPos p)::bag) xs
179
| K7R_SimdUnaryOp(K7_FPMulConst(n,m),_)
180
when not (exists (eq_simdconstant (SC_NumberPair (n,m))) bag) ->
181
rinstrsToConsts ((SC_NumberPair (n,m))::bag) xs
182
| _ -> rinstrsToConsts bag xs)
183
in rinstrsToConsts []
185
let myconst_to_decl = function
187
(vsimdposToChsconstnamestring V_Lo) ^ ": .long 0x80000000, 0x00000000"
189
(vsimdposToChsconstnamestring V_Hi) ^ ": .long 0x00000000, 0x80000000"
190
| SC_SimdPos V_LoHi ->
191
(vsimdposToChsconstnamestring V_LoHi) ^ ": .long 0x80000000, 0x80000000"
192
| SC_NumberPair(n,m) ->
193
sprintf "%s%s: .float %s, %s"
199
(****************************************************************************)
201
let instr_wants_down_pass1 = function
202
| K7R_SimdPromiseCellSize _ -> false
203
| K7R_SimdLoadStoreBarrier -> true
204
| K7R_IntLoadMem _ -> true
205
| K7R_IntStoreMem _ -> false
206
| K7R_IntLoadEA _ -> true
207
| K7R_IntUnaryOp _ -> true
208
| K7R_IntUnaryOpMem _ -> true
209
| K7R_IntCpyUnaryOp _ -> true
210
| K7R_IntBinOp _ -> true
211
| K7R_IntBinOpMem _ -> true
212
| K7R_SimdLoad _ -> true
213
| K7R_SimdStore _ -> false
214
| K7R_SimdSpill _ -> false
215
| K7R_SimdUnaryOp(K7_FPMulConst _,_) -> false
216
| K7R_SimdUnaryOp _ -> true
217
| K7R_SimdCpyUnaryOp _ -> true
218
| K7R_SimdCpyUnaryOpMem _ -> true
219
| K7R_SimdBinOp _ -> false
220
| K7R_SimdBinOpMem _ -> false
221
| K7R_Label _ -> false
222
| K7R_Jump _ -> false
223
| K7R_CondBranch _ -> false
227
let instr_wants_down_pass2 = function
228
| K7R_SimdCpyUnaryOpMem _ -> false
229
| instr -> instr_wants_down_pass1 instr
231
let rec move_down_instrs instr_wants_down =
232
let rec moveonedown i = function
235
if k7rinstrCannotRollOverK7rinstr (get3of3 i) (get3of3 x) then
238
x::(moveonedown i xs)
242
let xs' = move_down_instrs instr_wants_down xs in
243
if instr_wants_down (get3of3 x) then moveonedown x xs' else x::xs'
245
(****************************************************************************)
247
let k7rinstrIsAGIMovable = function
248
| K7R_SimdLoad _ -> true
249
| K7R_SimdStore _ -> true
250
| K7R_SimdCpyUnaryOpMem _ -> true
251
| K7R_SimdBinOpMem _ -> true
252
| K7R_SimdSpill _ -> true
255
let k7rinstrCanMoveOverAGI a b = match (a,b) with
256
| (K7R_SimdLoad _, K7R_SimdStore _) -> true
257
| (K7R_SimdLoad _, K7R_SimdCpyUnaryOpMem _) -> true
258
| (K7R_SimdLoad _, K7R_SimdSpill _) -> true
259
| (K7R_SimdLoad _, K7R_SimdBinOpMem _) -> true
260
| (K7R_SimdLoad _, K7R_SimdUnaryOp _) -> true
261
| (K7R_SimdCpyUnaryOpMem _, K7R_SimdSpill _) -> true
262
| (K7R_SimdUnaryOp _, K7R_SimdSpill _) -> true
263
| (K7R_SimdStore _, K7R_SimdBinOpMem _) -> true
264
| (K7R_SimdStore _, K7R_SimdCpyUnaryOpMem _) -> true
265
| (K7R_SimdStore _, K7R_SimdSpill _) -> true
266
| (K7R_SimdStore _, K7R_SimdUnaryOp _) -> true
269
let avoid_address_generation_interlock instrs =
270
let stop (_,_,x) (_,_,y) =
271
k7rinstrCannotRollOverK7rinstr x y ||
272
(not (k7rinstrCanMoveOverAGI x y)) in
273
let rec loop = function
275
| ((_,_,instr) as x)::xs ->
276
if k7rinstrIsAGIMovable instr then
277
insertList stop x (loop xs)
282
(****************************************************************************)
284
let k7rinstrsToPromiseEarly xs =
285
let flag = ref false in
286
let rec k7rinstrsToPromiseEarly__int = function
288
| (K7R_Label _ as x1)::(K7R_SimdPromiseCellSize _ as x2)::xs ->
289
x1::x2::(k7rinstrsToPromiseEarly__int xs)
290
| x1::(K7R_SimdPromiseCellSize _ as x2)::xs ->
292
x2::(k7rinstrsToPromiseEarly__int (x1::xs))
294
x::(k7rinstrsToPromiseEarly__int xs)
295
in (!flag,k7rinstrsToPromiseEarly__int xs)
298
let rec optimize ref_t0 instrs0 =
299
let _ = info (sprintf "optimizing: len %d, ref_t %d."
300
(length instrs0) ref_t0) in
301
let instrs1 = k7rinstrsToRegisterReallocated
303
instr_wants_down_pass2
304
(move_down_instrs instr_wants_down_pass1 instrs0)) in
305
let instrs = k7rinstrsToInstructionscheduled instrs1 in
306
let (ref_t,_,_) = list_last instrs in
307
if ref_t < ref_t0 then
308
optimize ref_t instrs
312
(****************************************************************************)
314
let rec addIndexToListElems i0 = function
316
| x::xs -> (i0,x)::(addIndexToListElems (succ i0) xs)
318
let procedure_proepilog stackframe_size_bytes nargs code =
320
addIndexToListElems 1
321
(filter (fun r -> exists (k7rinstrUsesK7rintreg r) code)
322
k7rintregs_calleesaved) in
323
let total_stackframe_size =
324
0 + (if true || (nargs mod 2 = 1) then 4 else 0)
325
+ (if stackframe_size_bytes < 0 then 0 else stackframe_size_bytes)
326
+ (if regs_to_save = [] then 0 else 16) in
328
(if total_stackframe_size > 0 then
329
[K7R_IntUnaryOp(K7_ISubImm total_stackframe_size, k7rintreg_stackpointer)]
332
(map (fun (i,r) -> K7R_IntStoreMem(r,K7_MFunArg (-i))) regs_to_save) @
334
(map (fun (i,r) -> K7R_IntLoadMem(K7_MFunArg (-i), r)) regs_to_save) @
335
(if total_stackframe_size > 0 then
336
[K7R_IntUnaryOp(K7_IAddImm total_stackframe_size, k7rintreg_stackpointer)]
341
(****************************************************************************)
342
let datasection all_consts =
343
if all_consts <> [] then begin
344
asmline (".section .rodata");
345
asmline ("\t" ^ ".balign 64");
346
List.iter (fun c -> asmline (myconst_to_decl c)) all_consts;
350
let compileToAsm name nargs (initcode, k7vinstrs) =
351
let _ = info "compileToAsm..." in
352
let realcode = K7RegisterAllocator.regalloc initcode k7vinstrs in
353
let stackframe_size_bytes = getStackFrameSize realcode in
354
let realcode = procedure_proepilog stackframe_size_bytes nargs realcode in
355
let _ = info "scheduling instructions..." in
356
let realcode = fixpoint k7rinstrsToPromiseEarly realcode in
358
avoid_address_generation_interlock
359
(optimize 1000000 (k7rinstrsToInstructionscheduled realcode)) in
361
let _ = info "before unparseToAsm" in
362
let all_consts = k7rinstrsToConstants (map get3of3 realcode') in
364
(* preserve Stefan's copyright, which otherwise does not appear
366
print_string ("/* The following asm code is Copyright (c) 2000-2001 Stefan Kral */\n");
367
datasection all_consts;
368
k7rinstrInitStackPointerAdjustment 0;
370
asmline "\t.balign 64";
371
asmline (name ^ ":");
372
iter emit_instr realcode';
375
(******************************************************************)
377
let standard_arg_parse_fail _ = failwith "too many arguments"
384
"-n", Arg.Int(fun i -> size := Some i), " generate a codelet of size <n>";
391
" sign of transform";
394
let parse user_speclist usage =
396
(user_speclist @ speclist @ Magic.speclist)
397
standard_arg_parse_fail
403
| None -> failwith "must specify -n"
405
let register_fcn name = "fftwf_codelet_" ^ name
407
(* output the command line *)
409
fold_right (fun a b -> a ^ " " ^ b) (Array.to_list Sys.argv) ""
411
let boilerplate cvsid =
413
("/* Generated by: " ^ (cmdline ()) ^ "*/\n\n" ^
415
" * Generator Id's : \n" ^
416
" * " ^ Algsimp.cvsid ^ "\n" ^
417
" * " ^ Fft.cvsid ^ "\n" ^
418
" * " ^ cvsid ^ "\n" ^
423
let store_array_c n f =
425
(List.map (fun i -> store_var (access_output i) (f i)) (iota n))
428
array n (fun i -> load_var (access_input i))
430
let load_constant_array_c n =
431
array n (fun i -> load_var (access_twiddle i))