1
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
3
% ANNOTATION OF PROGRAMS WITH PROFILE-INFORMATION
5
% Annotate CFGs on icode, RTL and sparc levels with counters.
6
% There are three versions (each takes CFG and const table Ctab):
7
% blocks(CFG,Ctab): each block is given a counter
8
% arcs(CFG,Ctab): each arc is given a counter
9
% events(CFG,Ctab): maintain an event counter throughout CFG
11
% Returns: {NewCFG, NewCtab, CountersAddrs, NumCtrs}
14
% - at this time, ONLY SPARC level is provided
15
% - should also provide paths(CFG), which counts the paths through CFG
16
% using the Ball-Larus algorithm.
18
-module(hipe_profile).
30
-define(elt_type,word).
31
%-define(elt_type,dword).
32
-define(counter_type,uw).
33
%-define(counter_type,ux).
34
-define(label_ty,constant).
36
-define(debug(Str,Args),ok).
37
%-define(debug(Str,Args),io:format(Str,Args)).
39
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
41
% Add a counter to each block.
43
% Returns { CFG, Ctab, CtrLabel, NumCtrs, Lst }
45
% CFG is the instrumented CFG
46
% Ctab is a constant table holding declared profile counters
47
% CtrLabel is the label of the profile counters block
48
% NumCtrs is the number of counters that was declared
49
% Lst is a list of {Block_label, Counter_ID}
52
?debug('starting~n',[]),
53
% Create two new temps:
54
{Lo,Hi} = hipe_sparc_cfg:var_range(CFG),
55
Ctr = hipe_sparc:mk_reg(Hi+1),
56
Tmp = hipe_sparc:mk_reg(Hi+2),
57
CFG1 = hipe_sparc_cfg:var_range_update(CFG,{Lo,Hi+2}),
58
% Insert counters into CFG
59
% 1. declare CtrLabel (could be done more elegantly)
61
% 3. set CtrLabel to refer to the proper # of counters
62
?debug('block insert~n',[]),
64
hipe_consttab:insert_global_block(Ctab,?align,?elt_type,
65
hipe_consttab:repeat(0,0)),
66
?debug('annot blocks~n',[]),
67
{NumCtrs1, NewCFG, Lst} = block_counters(hipe_sparc_cfg:labels(CFG1),
68
CFG1,Ctr,CtrLabel,Tmp,0),
70
?debug('update block~n',[]),
71
NewCtab = hipe_consttab:update_global_block(Ctab,CtrLabel,?align,?elt_type,
72
hipe_consttab:repeat(NumCtrs1,0)),
73
{ NewCFG, NewCtab, CtrLabel, NumCtrs, Lst }.
75
block_counters(Ls,CFG,Ctr,CtrL,Tmp,N) ->
76
block_counters(Ls,CFG,Ctr,CtrL,Tmp,N,[]).
78
block_counters([],CFG,Ctr,CtrL,Tmp,N,Ctrs_lst) -> {N,CFG,Ctrs_lst};
79
block_counters([L|Ls],CFG,Ctr,CtrL,Tmp,N,Ctrs) ->
80
NewCFG = block_counter(CFG,L,Ctr,CtrL,Tmp,N),
81
block_counters(Ls,NewCFG,Ctr,CtrL,Tmp,N+1,[{L,N}|Ctrs]).
83
block_counter(CFG,L,Ctr,CtrL,Tmp,N) ->
84
Code = hipe_bb:code(hipe_sparc_cfg:bb(CFG,L)),
85
NewCode = counter(Ctr,CtrL,Tmp,N,Code),
86
NewBB = hipe_bb:mk_bb(NewCode),
87
hipe_sparc_cfg:bb_update(CFG,L,NewBB).
89
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
91
% Instrument a CFG with profiling code that can count the probabilities
92
% of arcs as well as block frequencies. This is done by
93
% - splitting critical edges
94
% - inserting counters in all blocks
95
% - constructing arc expressions wherefrom we can easily reconstruct
96
% block frequencies and arc probabilities.
99
Ls = hipe_sparc_cfg:labels(CFG),
100
{NewCFG,CritEs} = split_critical_edges(CFG),
101
{InstrCFG,NewCtab,CtrL,NumCtrs,Ctr_lst} = blocks(NewCFG,Ctab),
102
ArcExps = arc_exps(Ls,hipe_sparc_cfg:succ_map(InstrCFG),CritEs,Ctr_lst),
103
{InstrCFG,NewCtab,CtrL,NumCtrs,Ctr_lst,ArcExps}.
105
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
106
% An arc (m -> n) is critical if m has more than one successor and
107
% n has more than one predecessor. We split such arcs by inserting
108
% a new basic block k, s.t. (m -> k -> n).
110
% Returns {NewCFG, [{K,N}]}
111
% where {M,K,N} signifies an arc (M->N) split into (M->K->N).
112
% We say that this is a 'redirect of K to N'.
114
split_critical_edges(CFG) ->
115
Succ = hipe_sparc_cfg:succ_map(CFG),
116
Pred = hipe_sparc_cfg:pred_map(CFG),
117
split_ce(hipe_sparc_cfg:labels(CFG),CFG,Succ,Pred).
119
split_ce(Ls,CFG,Succ,Pred) ->
120
split_ce(Ls,CFG,Succ,Pred,[]).
122
split_ce([],CFG,Succ,Pred,CE) -> {CFG,CE};
123
split_ce([M|Ms],CFG,Succ,Pred,CE) ->
124
{NewCFG,NewCE} = split_edges(M,CFG,Succ,Pred,CE),
125
split_ce(Ms,NewCFG,Succ,Pred,NewCE).
127
split_edges(M,CFG,Succ,Pred,CE) ->
128
case crit_pred(M,Succ) of
130
split_es(M,hipe_sparc_cfg:succ(Succ,M),CFG,Pred,CE);
135
split_es(M,[],CFG,Pred,CE) -> {CFG,CE};
136
split_es(M,[N|Ns],CFG,Pred,CE) ->
137
case crit_succ(N,Pred) of
139
{K,NxtCFG} = next_label(CFG),
140
NewBB = hipe_bb:mk_bb([hipe_sparc:goto_create(M)]),
141
NxtCFG2 = hipe_sparc_cfg:bb_update(NxtCFG,K,NewBB),
142
NxtCFG3 = hipe_sparc_cfg:redirect_jmp(NxtCFG2,M,N,K),
143
split_es(M,Ns,NxtCFG3,Pred,[{K,N}|CE]);
145
split_es(M,Ns,CFG,Pred,CE)
151
{Lo,Hi} = hipe_sparc_cfg:label_range(CFG),
152
{Hi+1,hipe_sparc_cfg:label_range_update(CFG,{Lo,Hi+1})}.
154
% A critical predecessor has more than one successor
157
case length(hipe_sparc_cfg:pred(Succ,L)) of
163
% A critical successor has more than one predecessor
166
case length(hipe_sparc_cfg:pred(Pred,L)) of
172
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
173
% Given a modified CFG, a list of {K,N} redirects (K is a block
174
% splitting the critical edge going to N) and a list of per-block
175
% counters, we now compute profiling expressions for each block:
176
% {L, {BlockFreq,[{SuccL,SuccProb}]}}
178
% Note: if L has 0 or 1 successors, the problem is easy ;-)
180
% Note: Ls is assumed to be the labels of the original CFG, while
181
% Succ is the successors of the instrumented CFG; Redirects is a
182
% list of {RedirecL,OldL} pairs and Ctrs is a list of {Label,Ctr} pairs.
184
arc_exps(Ls,Succ,Redirects,Ctrs) ->
185
arc_exps_list(Ls,Succ,Redirects,Ctrs).
187
arc_exps_list(Ls,Succ,Redir,Ctrs) ->
188
[ {L, arc_exp(L,hipe_sparc_cfg:succ(Succ,L),Redir,Ctrs)} || L <- Ls ].
190
arc_exp(L,Succ,Redir,Ctrs) ->
193
[ {redirect_of(S,Redir),{divide,ctr_of(S,Ctrs),C}} || S <- Succ ]}.
195
ctr_of(L,[{L,C}|_]) -> C;
196
ctr_of(L,[_|Xs]) -> ctr_of(L,Xs);
198
exit({no_counter_for,L}).
200
redirect_of(S,[]) -> S;
201
redirect_of(S,[{S,OldL}|_]) -> OldL;
202
redirect_of(S,[_|Xs]) -> redirect_of(S,Xs).
204
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
206
% Event profiling inserts code to read hardware specific counters
207
% and to maintain a count in an array of counters.
216
% t2 = load_addr(ctr_array)
229
% also, must declare the counter array as usual
230
St = make_event_regs(),
231
NewCFG = event_count_blocks(hipe_sparc_cfg:labels(CFG),CFG,St),
232
start_counter(hipe_sparc_cfg:start(CFG),CFG,St).
234
make_event_regs() -> nyi.
235
start_counter(_,_,_) -> nyi.
236
stop_clock(_) -> nyi.
238
event_count_blocks([],CFG,St) -> CFG;
239
event_count_blocks([L|Ls],CFG,St) ->
240
event_count_blocks(Ls,event_count_block(CFG,L,St),St).
242
event_count_block(CFG,L,St) ->
243
BB = hipe_bb:code(hipe_sparc_cfg:bb(CFG,L)),
244
NewBB = hipe_bb:mk_bb(ec_instrs(BB,St)),
245
hipe_sparc_cfg:bb_update(CFG,L,NewBB).
247
ec_instrs([],St) -> [];
248
ec_instrs([I|Is],St) ->
251
[I|ec_instrs(Is,St)];
253
stop_clock(St) ++ [I] ++ start_clock(St) ++ ec_instrs(Is,St);
255
stop_clock(St) ++ [I] ++ ec_instrs(Is,St);
257
start_clock(St) ++ [I] ++ ec_instrs(Is,St);
259
[I] ++ stop_clock(St) ++ ec_instrs(Is,St);
261
[I] ++ start_clock(St) ++ ec_instrs(Is,St)
265
case hipe_sparc:type(I) of
274
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
276
% Code to start and stop an event clock.
278
% Note: assumes T1-T4, Reg1, Reg2 are sparc vars,
279
% C1, C2 are sparc immediates and CtrL a valid label.
281
% Note: The new registers that are introduced are not saved across
282
% procedure calls. Thus, event counting must stop the clock before
283
% each call, and start it again after the call.
285
start_clock({T1,T2,T3,T4,CtrL,Reg1,Reg2}) ->
291
stop_clock({T1,T2,T3,T4,CtrL,Reg1,Reg2},C1,C2) ->
293
hipe_sparc:load_address_create(T2,CtrL,type,[]),
294
hipe_sparc:load_create(T3,uw,T2,C1,[]),
296
hipe_sparc:alu_create(T4,T4,'-',Reg1,[]),
297
hipe_sparc:store_create(T2,C1,uw,T4,[]),
298
hipe_sparc:load_create(T3,uw,T2,C1,[]),
300
hipe_sparc:alu_create(T4,T4,'-',Reg2,[]),
301
hipe_sparc:store_create(T2,C2,uw,T4,[])
304
read_pic(Dst) -> nyi.
306
get_hi(Dst,Src) -> % 64-bit shift >> 32 bits
309
get_lo(Dst,Src) -> % mask 32 low bits from 64 bits
312
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
314
% Note: if Ctr is initialized to CtrL
315
% - at each entry point and
317
% then we can simply use Ctr instead and avoid the load_address_create.
320
% This can be done as follows:
321
% * walk Code, after each call_link perform load_address_create
322
% * ... and insert "Ctr := load_address(CtrL)" at all entrypoints
324
counter(Ctr,CtrL,Tmp,N,Code) ->
325
Off = hipe_sparc:mk_imm(N*?align),
326
[hipe_sparc:comment_create({counter,N},[]),
327
hipe_sparc:load_address_create(Ctr,CtrL,?label_ty,[]),
328
hipe_sparc:load_create(Tmp,?counter_type,Ctr,Off,[{counter,N}]),
329
hipe_sparc:alu_create(Tmp,Tmp,'+',hipe_sparc:mk_imm(1),[]),
330
hipe_sparc:store_create(Ctr,Off,?counter_type,Tmp,[{counter,N}]),
331
hipe_sparc:comment_create({counter,N,done},[])
335
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
337
% Collect profiling information
338
% - read_counters(MFA) returns a list of {Label, Number_of_visits}
339
% - compute_counters( {CounterList, ComputeList} ) computes the
340
% values of implicit counters (defined as, say, {add,C1,C2} or
341
% similar); returns [{Label,Counter}]
342
% * note: the ComputeList can only refer to CounterList.
344
compute_counters({Ctrs,Comp}) ->
345
[ {L, counter_value_of(E,Ctrs)} || {L,E} <- Comp ].
347
counter_value_of(N,Ctrs) when is_integer(N) -> find_value(N,Ctrs);
348
counter_value_of({add,E1,E2},Ctrs) ->
349
counter_value_of(E1,Ctrs) + counter_value_of(E2,Ctrs);
350
counter_value_of({sub,E1,E2},Ctrs) ->
351
counter_value_of(E1,Ctrs) - counter_value_of(E2,Ctrs);
352
counter_value_of({mul,E1,E2},Ctrs) ->
353
counter_value_of(E1,Ctrs) * counter_value_of(E2,Ctrs).
355
find_value(Ctr,[{Ctr,Val}|_]) -> Val;
356
find_value(Ctr,[_|Xs]) -> find_value(Ctr,Xs).
358
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
360
read_counters(MFA) ->
361
io:format("Geting counters for ~w\n",[MFA]),
362
read_ctrs(MFA,counter_array(MFA),counter_info(MFA)).
364
read_ctrs(MFA,CtrL,Cs_lst) ->
365
Ctrs = ncode_server:find_address(MFA,CtrL),
366
[ {L, b32(erlang:hipe_peek(Ctrs+N*4))} || {L,N} <- Cs_lst ].
368
b32({Hi16,Lo16}) -> Hi16 bsl 16 + Lo16.
370
zero_counters(MFA) ->
371
zero_ctrs(MFA,counter_array(MFA),counter_info(MFA)).
373
zero_ctrs(MFA,CtrL,Cs_lst) ->
374
Ctrs = ncode_server:find_address(MFA,CtrL),
375
zero_counters(Cs_lst,Ctrs).
377
zero_counters([],Ctrs) -> ok;
378
zero_counters([{_,N}|Ns],Ctrs) ->
379
erlang:hipe_poke(Ctrs+N*4,hipe_converters:word_to_tuple(0)),
380
zero_counters(Ns,Ctrs).
382
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
384
% Annotate the original CFG of an MFA with collected profiling info.
387
case profiled_type(MFA) of
389
CFG = saved_cfg(MFA),
390
CtrL = counter_array(MFA),
391
Ctrs_lst = counter_info(MFA),
392
Prof_info = read_ctrs(MFA,CtrL,Ctrs_lst),
393
annot_blocks(Prof_info,CFG);
395
CFG = saved_cfg(MFA),
396
CtrL = counter_array(MFA),
397
Ctrs_lst = counter_info(MFA),
398
Arc_exps = arc_expressions(MFA),
399
Prof_info = read_ctrs(MFA,CtrL,Ctrs_lst),
400
Prof_arcs = eval_arc_exps(Arc_exps,Prof_info),
403
exit({not_compiled_with_profiling,MFA})
406
annot_blocks([],CFG) -> CFG;
407
annot_blocks([{L,Visits}|Xs],CFG) ->
408
NewBB = hipe_bb:mk_bb([hipe_sparc:comment_create({visits,Visits},[])
409
| hipe_bb:code(hipe_sparc_cfg:bb(CFG,L))]),
410
NewCFG = hipe_sparc_cfg:bb_update(CFG,L,NewBB),
411
annot_blocks(Xs,NewCFG).
413
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
415
% Evaluate arc expressions. Returns a list of
416
% {Label, {Freq, [{SuccLabel,Prob}]}}
418
eval_arc_exps(AEs,Prof_info) ->
419
[ eval_arc_exp(AE,Prof_info) || AE <- AEs ].
421
eval_arc_exp({L,C,SuccExps},Prof_info) ->
422
{L, {freq(C,Prof_info),
423
[ {S, freq(C1,Prof_info)/freq(C2,Prof_info)}
424
|| {S, {divide,C1,C2}} <- SuccExps ]}}.
426
freq(C,[{C,X}|_]) -> X;
427
freq(C,[_|Xs]) -> freq(C,Xs);
428
freq(C,[]) -> exit({eval_arc_exp,{counter_not_found,C}}).
430
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
432
% Lookup a given attribute.
434
profiled_type(MFA) ->
435
case lookup_info(MFA,profiled) of
444
set_unprofiled(MFA) ->
445
update_info(MFA,profiled,false).
448
update_info(MFA,profiled,true).
450
counter_array(MFA) ->
451
get_info(MFA,counter_array).
453
set_counter_array(MFA,L) ->
454
update_info(MFA,counter_array,L).
457
get_info(MFA,counter_info).
459
set_counter_info(MFA,Lst) ->
460
update_info(MFA,counter_info,Lst).
463
get_info(MFA,saved_cfg).
465
set_saved_cfg(MFA,CFG) ->
466
update_info(MFA,saved_cfg,CFG).
468
arc_expressions(MFA) ->
469
get_info(MFA,arc_exprs).
471
set_arc_expressions(MFA,AEs) ->
472
update_info(MFA,arc_exprs,AEs).
474
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
476
% Thanks to the interface from ncode_server,
477
% we have to resort to inefficient antics. Hopefully not on the
480
% (ncode_server should use a set ets, rather than a bag ets.)
482
insert_info(MFA,Key,Info) ->
483
case lookup_info(MFA,Key) of
485
ncode_server:add_userinfo(MFA,Key,Info);
487
exit({{insert_info,3},key_present})
490
update_info(MFA,Key,Info) ->
491
case lookup_info(MFA,Key) of
495
ncode_server:delete_userinfo(MFA,Key)
497
ncode_server:add_userinfo(MFA,Key,Info).
499
lookup_info(MFA,Key) ->
500
case catch ncode_server:lookup_userinfo(MFA,Key) of
501
{'EXIT',_} -> not_found;
506
ncode_server:lookup_userinfo(MFA,Key).
511
enum(K,N) when K > 0 ->