1
%% -*- erlang-indent-level: 2 -*-
2
%% Floating point handling.
4
-module(hipe_x86_float).
5
-include("hipe_x86.hrl").
6
-include("../main/hipe.hrl").
10
CFG0 = hipe_x86_cfg:init(Defun),
11
%%hipe_x86_cfg:pp(CFG0),
12
Liveness = hipe_x86_liveness:analyse(CFG0),
13
StartLabel = hipe_x86_cfg:start_label(CFG0),
14
SuccMap = hipe_x86_cfg:succ_map(CFG0),
15
{CFG1, _} = do_blocks([],[StartLabel], CFG0, Liveness, [], SuccMap,
17
CFG2 = hipe_x86_cfg:var_range_update(CFG1, []),
18
hipe_x86_cfg:linearise(CFG2).
21
do_blocks(Pred,[Lbl|Lbls], CFG, Liveness, Map, SuccMap, BlockMap) ->
22
case gb_trees:lookup(Lbl, BlockMap) of
24
%% This block has not been visited.
25
Block = hipe_x86_cfg:bb(CFG, Lbl),
26
Succ = hipe_x86_cfg:succ(SuccMap, Lbl),
27
NewBlockMap = gb_trees:insert(Lbl, Map, BlockMap),
28
LiveOut = [X || X <- hipe_x86_liveness:liveout(Liveness, Lbl),
30
Code = hipe_bb:code(Block),
31
ReverseCode = lists:reverse(Code),
32
{NewCode0, NewMap, NewBlockMap1, Dirty} =
33
do_block(ReverseCode, LiveOut, Map, NewBlockMap),
34
{NewCFG1, NewSuccMap} =
37
NewBlock = hipe_bb:code_update(Block, NewCode0),
38
{hipe_x86_cfg:bb_update(CFG, Lbl, NewBlock), SuccMap};
42
{NewCFG3, NewBlockMap2} =
43
do_blocks(Lbl,Succ, NewCFG1, Liveness, NewMap,
44
NewSuccMap, NewBlockMap1),
45
do_blocks(Pred,Lbls, NewCFG3, Liveness,
46
Map, NewSuccMap, NewBlockMap2);
49
%% Don't have to follow this trace any longer.
50
do_blocks(Pred,Lbls, CFG, Liveness,
51
Map, SuccMap, BlockMap);
52
{value, ExistingMap}->
53
%% This block belongs to a trace already handled.
54
%% The Map coming in must be identical to the one used
55
%% when the block was processed.
56
if ExistingMap == Map ->
57
do_blocks(Pred,Lbls, CFG, Liveness,
58
Map, SuccMap, BlockMap);
60
NewCFG = do_shuffle(Pred,Lbl,CFG, Map,ExistingMap),
61
NewSuccMap = hipe_x86_cfg:succ_map(NewCFG),
62
do_blocks(Pred,Lbls, NewCFG, Liveness, Map,
66
do_blocks(_Pred,[], CFG, _Liveness, _Map, _SuccMap, BlockMap) ->
69
do_block(Ins, LiveOut, Map, BlockMap)->
70
do_block(Ins, LiveOut, Map, BlockMap, false).
72
do_block([I|Is], LiveOut, Map, BlockMap, Dirty) ->
73
case handle_insn(I) of
75
{NewCode, NewMap, NewBlockMap, NewDirty} =
76
do_block(Is, LiveOut,Map,BlockMap,Dirty),
77
{NewCode++[I], NewMap, NewBlockMap, NewDirty};
79
Def = ordsets:from_list(hipe_x86_defuse:insn_def(I)),
80
Use = ordsets:from_list(hipe_x86_defuse:insn_use(I)),
82
ordsets:filter(fun(X)->is_fp(X)end,
84
ordsets:subtract(LiveOut, Def),Use)),
86
{NewCode, NewMap, NewBlockMap, NewDirty} =
87
do_block(Is, NewLiveOut, Map, BlockMap, Dirty),
88
{NewI, NewMap1, NewBlockMap1} =
89
do_insn(I, LiveOut, NewMap, NewBlockMap),
91
if Dirty == true -> true;
92
NewDirty == true -> true;
93
NewI =:= [I] -> false;
96
{NewCode++NewI, NewMap1, NewBlockMap1, NewDirty1}
99
do_block([], LiveOut, Map, BlockMap, Dirty) ->
100
case lists:filter(fun(X)->not lists:member(X, LiveOut)end, Map) of
102
{[], Map, BlockMap, Dirty};
104
{PopIns, NewMap} = pop_dead(Pop, Map),
105
{PopIns, NewMap, BlockMap, true}
108
do_shuffle(Pred,Lbl,CFG, OldMap, NewMap)->
109
%% First make sure both maps has the same members.
110
Push = NewMap -- OldMap,
111
Pop = OldMap -- NewMap,
112
{PopInsn, OldMap0} = pop_dead(Pop, OldMap),
113
{PushInsn, OldMap1} =
116
_-> push_list(lists:reverse(Push), OldMap0)
119
if OldMap1=:=NewMap ->
120
%% It was enough to push and pop.
121
PopInsn ++ PushInsn ++
122
[hipe_x86:mk_jmp_label(Lbl)];
124
%% Shuffle the positions so the maps match
125
Cycles = find_swap_cycles(OldMap1, NewMap),
126
SwitchInsns = do_switching(Cycles),
127
PopInsn++PushInsn++SwitchInsns++
128
[hipe_x86:mk_jmp_label(Lbl)]
132
NewLabel = hipe_gensym:get_next_label(x86),
133
{LLo,_} = hipe_x86_cfg:label_range(CFG),
134
LHi = hipe_gensym:get_label(x86),
135
NewCFG0 = hipe_x86_cfg:label_range_update(CFG, {LLo, LHi}),
136
NewCFG1 = hipe_x86_cfg:bb_add(NewCFG0, NewLabel,
137
hipe_bb:mk_bb(Code,false)),
138
OldPred = hipe_x86_cfg:bb(NewCFG1, Pred),
139
PredCode = hipe_bb:code(OldPred),
140
NewLast = redirect(lists:last(PredCode), Lbl,NewLabel),
141
NewPredCode = butlast(PredCode)++[NewLast],
142
NewPredBB = hipe_bb:code_update(OldPred, NewPredCode),
143
hipe_x86_cfg:bb_update(NewCFG1, Pred, NewPredBB).
146
find_swap_cycles(OldMap, NewMap)->
147
Moves = [get_pos(X,NewMap,1) || X <- OldMap],
148
find_swap_cycles(OldMap, Moves, lists:seq(1,length(OldMap)), []).
150
find_swap_cycles(OldMap, Moves, NotHandled, Cycles)->
151
if length(NotHandled) == 0 -> Cycles;
153
Cycle = find_cycle(Moves, [hd(NotHandled)]),
154
NewNotHandled = NotHandled -- Cycle,
155
case lists:member(1, Cycle) of
157
%% The cycle that contains the first element on the stack
158
%% must be processed last.
159
NewCycle = format_cycle(Cycle),
160
find_swap_cycles(OldMap, Moves, NewNotHandled,
163
NewCycle = format_cycle(Cycle),
164
find_swap_cycles(OldMap, Moves, NewNotHandled,
169
find_cycle(Moves, Cycle)->
170
To = lists:nth(lists:last(Cycle),Moves),
171
if To == hd(Cycle) -> Cycle;
172
true -> find_cycle(Moves, Cycle++[To])
176
%% The position numbers start with 1 - should start with 0.
177
%% If position 0 is in the cycle it will be permuted until
178
%% the 0 is first and then remove it.
179
%% Otherwise the first element is also added last.
180
NewCycle = [X - 1 || X <- C],
181
case lists:member(0,NewCycle) of
182
true -> format_cycle(NewCycle, []);
183
_ -> NewCycle ++ [hd(NewCycle)]
185
format_cycle([H|T], NewCycle)->
188
_ -> format_cycle(T,NewCycle++[H])
191
do_switching(Cycles)->
192
do_switching(Cycles, []).
193
do_switching([C|Cycles], Insns)->
195
0 -> do_switching(Cycles, Insns);
197
NewInsns = Insns ++ [hipe_x86:mk_fp_unop(fxch, mk_st(X))],
198
do_switching(Cycles, NewInsns);
199
_ -> NewInsns = Insns ++ [hipe_x86:mk_fp_unop(fxch, mk_st(X)) || X<-C],
200
do_switching(Cycles, NewInsns)
202
do_switching([],Insns) ->
205
redirect(Insn, OldLbl, NewLbl)->
207
#pseudo_call{contlab=ContLab, exnlab=ExnLab}->
208
if ContLab =:= OldLbl ->
209
Insn#pseudo_call{contlab=NewLbl};
211
Insn#pseudo_call{exnlab=NewLbl}
214
hipe_x86_cfg:redirect_jmp(Insn, OldLbl, NewLbl)
217
do_insn(I, LiveOut, Map, BlockMap) ->
219
#move{src=#x86_imm{value=Value}}->
221
{'erl_fp_check_exception', Type} ->
222
Store = pseudo_pop(Map),
223
{Store ++ [hipe_x86:mk_fp_unop('fwait', []),
224
I#move{src=#x86_imm{value={'erl_fp_exception',Type}}}],
229
#pseudo_call{'fun'=Fun, contlab = ContLab}->
231
%% We don't want to spill anything if an exception has been thrown.
232
{_, 'handle_fp_exception'} ->
234
case gb_trees:lookup(ContLab, BlockMap) of
238
gb_trees:update(ContLab, fail, BlockMap);
240
gb_trees:insert(ContLab, fail, BlockMap)
242
{[I], [], NewBlockMap};
244
{pop_all(Map)++[I],[],BlockMap}
247
{NewI, NewMap} = do_fp_unop(I, LiveOut, Map),
248
{NewI, NewMap, BlockMap};
250
{NewI, NewMap} = do_fp_binop(I, LiveOut, Map),
251
{NewI, NewMap, BlockMap};
252
#fmov{src=Src, dst=Dst}->
254
%% Don't need to keep this instruction!
255
%% However, we may need to pop from the stack.
256
case is_liveOut(Src, LiveOut) of
260
{SwitchInsn, NewMap0} = switch_first(Dst, Map),
261
NewMap = pop(NewMap0),
262
{SwitchInsn++pop_insn(), NewMap, BlockMap}
265
{NewI, NewMap} = do_fmov(Src, Dst, LiveOut, Map),
266
{NewI, NewMap, BlockMap}
272
do_fmov(Src, Dst=#x86_mem{},LiveOut, Map) ->
273
%%% Storing a float from the stack into memory.
274
{SwitchInsn, NewMap0} = switch_first(Src, Map),
275
case is_liveOut(Src, LiveOut) of
277
{SwitchInsn ++[hipe_x86:mk_fp_unop(fst, Dst)], NewMap0};
279
NewMap1 = pop(NewMap0),
280
{SwitchInsn ++[hipe_x86:mk_fp_unop(fstp, Dst)], NewMap1}
283
do_fmov(Src=#x86_mem{}, Dst, _LiveOut, Map) ->
284
%%% Pushing a float into the stack.
285
case in_map(Dst, Map) of
286
true -> ?EXIT({loadingExistingFpVariable,{Src,Dst}});
289
{PushOp, [_|NewMap0]} = push(Src, Map),
290
%% We want Dst in the map rather than Src.
291
NewMap = [Dst|NewMap0],
294
do_fmov(Src, Dst, LiveOut, Map) ->
295
%%% Copying a float that either is spilled or is on the fp stack,
296
%%% or converting a fixnum in a temp to a float on the fp stack.
297
case in_map(Dst, Map) of
298
true -> ?EXIT({copyingToExistingFpVariable,{Src,Dst}});
303
#x86_temp{type=Type}-> Type /= 'double';
308
do_conv(Src,Dst,Map);
311
case {is_liveOut(Src, LiveOut),in_map(Src, Map)} of
313
%% Just remap Dst to Src
314
{Head,[_|T]} = lists:splitwith(fun(X)->X/=Src end,Map),
317
{PushOp, [_|NewMap0]} = push(Src, Map),
318
%% We want Dst in the map rather than Src.
319
NewMap = [Dst|NewMap0],
324
do_conv(Src=#x86_temp{reg=Reg},Dst,Map)->
325
%% Converting. Src must not be a register, so we
326
%% might have to put it into memory in between.
328
case hipe_x86_registers:is_precoloured(Reg) of
330
Temp = hipe_x86:mk_new_temp('untagged'),
331
{[hipe_x86:mk_move(Src,Temp)],Temp};
335
{PushOp, [_|NewMap0]} = push(NewSrc, Map),
336
%% We want Dst in the map rather than NewSrc.
337
NewMap = [Dst|NewMap0],
338
case length(PushOp) of
339
1 -> %% No popping of memory object on fpstack
340
{Move++[hipe_x86:mk_fp_unop(fild, NewSrc)], NewMap};
341
_ -> %% H contains pop instructions. Must be kept!
342
Head = butlast(PushOp),
343
{Move++Head++[hipe_x86:mk_fp_unop(fild, NewSrc)], NewMap}
347
do_fp_unop(I = #fp_unop{arg=Arg, op=fchs}, Liveout, Map ) ->
348
%% This is fchs, the only operation without a
349
%% popping version. Needs special handling.
350
case is_liveOut(Arg, Liveout) of
352
{SwitchIns, NewMap} = switch_first(Arg, Map),
353
{SwitchIns++[I#fp_unop{arg=[]}], NewMap};
355
%% Don't need to keep this instruction!
356
%% However, we may need to pop Src from the stack.
357
case in_map(Arg, Map) of
359
{SwitchInsn, NewMap0} = switch_first(Arg, Map),
360
NewMap = pop(NewMap0),
361
{SwitchInsn++pop_insn(), NewMap};
367
do_fp_binop(#fp_binop{src=Src, dst=Dst, op=Op},
369
case {is_liveOut(Src, LiveOut), is_liveOut(Dst, LiveOut)} of
371
keep_both(Op, Src, Dst, Map);
373
keep_src(Op, Src, Dst, Map);
375
keep_dst(Op, Src, Dst, Map);
377
%% Both Dst and Src are popped.
378
keep_none(Op, Src, Dst, Map)
381
keep_both(Op, Src, Dst, Map)->
382
%% Keep both Dst and Src if it is there.
383
{SwitchInsn, NewMap} = switch_first(Dst, Map),
384
NewSrc = get_new_opnd(Src, NewMap),
385
Insn = format_fp_binop(Op, NewSrc, mk_st(0)),
386
{SwitchInsn++Insn, NewMap}.
388
keep_src(Op, Src, Dst, Map)->
389
%% Pop Dst but keep Src in stack if it is there.
390
{SwitchInsn, NewMap0} = switch_first(Dst, Map),
391
NewSrc = get_new_opnd(Src, NewMap0),
392
NewMap = pop(NewMap0),
393
Insn = format_fp_binop(Op, NewSrc, mk_st(0)),
394
{SwitchInsn++Insn++pop_insn(), NewMap}.
396
keep_dst(Op, Src, Dst, Map)->
397
%% Keep Dst but pop Src.
398
%% Dst must be in stack.
399
DstInMap = in_map(Dst, Map),
400
SrcInMap = in_map(Src, Map),
406
%% Src must be popped. If Dst is on top of the stack we can
407
%% alter the operation rather than shuffle the stack.
408
{SwitchInsn, Insn, NewMap} =
410
NewOp = mk_op_pop(reverse_op(Op)),
411
NewDst = get_new_opnd(Src, Map),
412
TmpMap = lists:map(fun(X)->if X==Src->Dst;true->X end end,Map),
413
{[], format_fp_binop(NewOp, mk_st(0), NewDst), pop(TmpMap)};
415
{SwitchInsn1, NewMap0} = switch_first(Src, Map),
416
NewDst = get_new_opnd(Dst,NewMap0),
417
NewOp = mk_op_pop(Op),
418
{SwitchInsn1,format_fp_binop(NewOp, mk_st(0), NewDst), pop(NewMap0)}
420
{SwitchInsn++Insn,NewMap};
422
%% Src is on the stack, but Dst isn't. Use memory command to avoid
423
%% unnecessary loading instructions.
424
{SwitchInsn, NewMap0} = switch_first(Src, Map),
425
NewOp = reverse_op(Op),
426
NewMap = [Dst]++tl(NewMap0),
427
Insn = format_fp_binop(NewOp, Dst, mk_st(0)),
428
{SwitchInsn++Insn, NewMap}
431
%% Src isn't in the map so it doesn't have to be popped.
432
{SwitchInsn, NewMap} = switch_first(Dst, Map),
433
{SwitchInsn++[#fp_unop{arg=Src,op=Op}], NewMap}
436
keep_none(Op, Src, Dst, Map)->
437
%% Dst must be on stack.
438
{PushInsn, NewMap0} =
439
case in_map(Dst, Map) of
443
case in_map(Src, NewMap0) of
445
%% Src must be popped.
446
{SwitchInsn1, NewMap1} = switch_first(Src, NewMap0),
447
NewOp = mk_op_pop(Op),
448
NewDst = get_new_opnd(Dst,NewMap1),
449
NewMap2 = pop(NewMap1),
450
%% Then Dst has to be popped.
451
{PopInsn,NewMap} = pop_member(Dst,NewMap2),
452
Insn = format_fp_binop(NewOp, mk_st(0), NewDst),
453
{PushInsn++SwitchInsn1++Insn++PopInsn,
456
%% Src isn't in the map so it doesn't have to be popped.
457
{SwitchInsn, NewMap1} = switch_first(Dst, NewMap0),
458
NewMap = pop(NewMap1),
459
{SwitchInsn++[#fp_unop{arg=Src,op=Op}]++pop_insn(),
463
format_fp_binop(Op, Src=#x86_temp{}, Dst=#x86_fpreg{reg=Reg}) ->
464
%% Handle that st(0) is sometimes implicit.
465
if Reg==0-> [hipe_x86:mk_fp_unop(Op, Src)];
466
true-> [hipe_x86:mk_fp_binop(Op, Src, Dst)]
468
format_fp_binop(Op, Src, Dst) ->
469
[hipe_x86:mk_fp_binop(Op, Src, Dst)].
472
lists:member(X, Map).
475
push_list(L, Map, []).
476
push_list([H|T], Map, Acc) ->
477
{Insn, NewMap} = push(H,Map),
478
push_list(T, NewMap, Acc++Insn);
479
push_list([], Map, Acc) ->
484
if length(Map0)>7 -> pop_a_temp(Map0);
487
NewX = get_new_opnd(X,Map),
489
PushOp = [hipe_x86:mk_fp_unop(fld, NewX)],
490
{PopInsn++PushOp, NewMap}.
496
[hipe_x86:mk_fp_unop('fstp',mk_st(0))].
498
pop_dead(Dead, Map) ->
499
Dead0 = [X || X <- Map, lists:member(X,Dead)],
500
pop_dead(Dead0, Map, []).
502
pop_dead([D|Dead], Map, Code) ->
503
{I, NewMap0} = switch_first(D, Map),
504
NewMap = pop(NewMap0),
506
#x86_temp{}->[hipe_x86:mk_fp_unop('fstp', D)];
510
pop_dead(Dead,NewMap,Code++I++Store);
511
pop_dead([],Map,Code) ->
515
{Code, _} = pop_dead(Map, Map),
518
pop_member(Member, Map)->
519
{Head,[_|T]} = lists:splitwith(fun(X)->X/=Member end,Map),
520
{[hipe_x86:mk_fp_unop('fstp', mk_st(get_pos(Member, Map, 0)))],
524
Temp = find_a_temp(Map),
525
{SwitchInsn, NewMap0} = switch_first(Temp, Map),
526
NewMap = pop(NewMap0),
527
{SwitchInsn++[hipe_x86:mk_fp_unop('fstp', Temp)], NewMap}.
529
find_a_temp([H = #x86_temp{}|_])->
531
find_a_temp([_|T]) ->
534
?EXIT({noTempOnFPStack,{}}).
536
switch_first(X, Map = [H|_]) ->
537
Pos = get_pos(X, Map, 0),
544
{[_|Head], [_|Tail]} = lists:splitwith(fun(Y)->Y/=X end, Map),
545
NewMap = [X|Head]++[H|Tail],
546
Ins = hipe_x86:mk_fp_unop(fxch, mk_st(Pos)),
549
switch_first(X, Map) ->
552
get_pos(X, [H|T], Pos) ->
554
true -> get_pos(X, T, Pos+1)
559
get_new_opnd(X, Map) ->
560
I = get_pos(X, Map, 0),
563
%% The operand is probably a spilled float.
569
is_fp(#x86_fpreg{}) ->
571
is_fp(#x86_mem{type=Type}) ->
573
is_fp(#x86_temp{type=Type}) ->
578
#move{src=#x86_imm{}} -> true;
582
#pseudo_call{}->true;
587
is_liveOut(X, LiveOut) ->
588
ordsets:is_element(X, LiveOut).
591
hipe_x86:mk_fpreg(X, false).
610
_ -> ?EXIT({operandHasNoPopVariant,{Op}})
613
butlast([X|Xs]) -> butlast(Xs,X).
615
butlast([X|Xs],Y) -> [Y|butlast(Xs,X)].
617
%%pp_insn(Op, Src, Dst)->
618
%% pp([hipe_x86:mk_fp_binop(Op, Src, Dst)]).
621
%% hipe_x86_pp:pp_insn(I),
626
pseudo_pop(Map) when (length(Map)>0) ->
627
Dst = hipe_x86:mk_new_temp('double'),
628
pseudo_pop(Dst, length(Map), []);
632
pseudo_pop(Dst, St, Acc) when (St>1)->
633
%% Store all members of the stack to a single temporary to force
634
%% any floating point overflow exceptions to occur even though we
635
%% don't have overflow for the extended double precision in the x87.
636
pseudo_pop(Dst, St-1,
637
[hipe_x86:mk_fp_unop('fxch', mk_st(St-1)),
638
hipe_x86:mk_fp_unop('fst', Dst),
639
hipe_x86:mk_fp_unop('fxch', mk_st(St-1))
641
pseudo_pop(Dst, _St, Acc) ->
642
[hipe_x86:mk_fp_unop('fst', Dst)|Acc].