1
// Inferno utils/5c/reg.c
2
// http://code.google.com/p/inferno-os/source/browse/utils/5g/reg.c
4
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
5
// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
6
// Portions Copyright © 1997-1999 Vita Nuova Limited
7
// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
8
// Portions Copyright © 2004,2006 Bruce Ellis
9
// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
10
// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
11
// Portions Copyright © 2009 The Go Authors. All rights reserved.
13
// Permission is hereby granted, free of charge, to any person obtaining a copy
14
// of this software and associated documentation files (the "Software"), to deal
15
// in the Software without restriction, including without limitation the rights
16
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
// copies of the Software, and to permit persons to whom the Software is
18
// furnished to do so, subject to the following conditions:
20
// The above copyright notice and this permission notice shall be included in
21
// all copies or substantial portions of the Software.
23
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
36
#define REGBITS ((uint32)0xffffff)
37
#define P2R(p) (Reg*)(p->reg)
40
int noreturn(Prog *p);
59
rcmp(const void *a1, const void *a2)
70
return p2->varno - p1->varno;
83
t = structfirst(&save, getoutarg(curfn->type));
90
ovar.b[z] |= bit.b[z];
91
t = structnext(&save);
94
//print("ovars = %Q\n", &ovar);
104
p->scond = zprog.scond;
105
p->from = zprog.from;
118
// convert each bit to a variable
122
bit.b[i/32] &= ~(1L<<(i%32));
124
// disable all pieces of that variable
125
for(i=0; i<nvar; i++) {
127
if(v->sym == s && v->name == n)
133
static char* regname[] = {
170
fmtinstall('Q', Qconv);
179
print("optimizing %S\n", curfn->nname->sym);
182
// count instructions
184
for(p=firstp; p!=P; p=p->link)
187
// if too big dont bother
189
// print("********** %S is too big (%d)\n", curfn->nname->sym, nr);
198
* control flow is more complicated in generated go code
199
* than in generated c code. define pseudo-variables for
200
* registers, so we have complete register usage information.
203
memset(var, 0, NREGVAR*sizeof var[0]);
204
for(i=0; i<NREGVAR; i++)
205
var[i].sym = lookup(regname[i]);
207
regbits = RtoB(REGSP)|RtoB(REGLINK)|RtoB(REGPC);
208
for(z=0; z<BITS; z++) {
216
// build list of return variables
221
* build aux data structure
223
* find use and set of variables
226
for(p=firstp; p != P; p = p->link) {
250
switch(r1->prog->as) {
260
* left side always read
262
bit = mkvar(r, &p->from);
263
for(z=0; z<BITS; z++)
264
r->use1.b[z] |= bit.b[z];
267
* middle always read when present
270
if(p->from.type != D_FREG)
271
r->use1.b[0] |= RtoB(p->reg);
273
r->use1.b[0] |= FtoB(p->reg);
277
* right side depends on opcode
279
bit = mkvar(r, &p->to);
283
yyerror("reg: unknown op: %A", p->as);
296
for(z=0; z<BITS; z++)
297
r->use2.b[z] |= bit.b[z];
301
* right side read or read+write, depending on middle
303
* ADD x, y, z => z = x + y
329
* right side read+write
341
for(z=0; z<BITS; z++) {
342
r->use2.b[z] |= bit.b[z];
343
r->set.b[z] |= bit.b[z];
366
if((p->scond & C_SCOND) != C_SCOND_NONE)
367
for(z=0; z<BITS; z++)
368
r->use2.b[z] |= bit.b[z];
369
for(z=0; z<BITS; z++)
370
r->set.b[z] |= bit.b[z];
383
if(p->from.type == D_CONST)
395
for(i=0; i<nvar; i++) {
399
for(z=0; z<BITS; z++)
400
addrs.b[z] |= bit.b[z];
403
// print("bit=%2d addr=%d et=%-6E w=%-2d s=%S + %lld\n",
404
// i, v->addr, v->etype, v->width, v->sym, v->offset);
409
* turn branch references to pointers
410
* build back pointers
412
for(r=firstr; r!=R; r=r->link) {
414
if(p->to.type == D_BRANCH) {
415
if(p->to.branch == P)
417
r1 = p->to.branch->regp;
421
//fatal("ref to self %P", p);
431
print("\n%L %D\n", p->lineno, &p->from);
432
print(" addr = %Q\n", addrs);
437
* find looping structure
439
for(r = firstr; r != R; r = r->link)
446
* iterate propagating usage
447
* back until flow graph is complete
451
for(r = firstr; r != R; r = r->link)
453
for(r = firstr; r != R; r = r->link)
454
if(r->prog->as == ARET)
455
prop(r, zbits, zbits);
457
/* pick up unreachable code */
459
for(r = firstr; r != R; r = r1) {
461
if(r1 && r1->active && !r->active) {
462
prop(r, zbits, zbits);
474
* iterate propagating register/variable synchrony
475
* forward until graph is complete
479
for(r = firstr; r != R; r = r->link)
481
synch(firstr, zbits);
488
print("\nprop structure:\n");
489
for(r = firstr; r != R; r = r->link) {
490
print("%d:%P", r->loop, r->prog);
491
for(z=0; z<BITS; z++) {
492
bit.b[z] = r->set.b[z] |
493
r->refahead.b[z] | r->calahead.b[z] |
494
r->refbehind.b[z] | r->calbehind.b[z] |
495
r->use1.b[z] | r->use2.b[z];
496
bit.b[z] &= ~addrs.b[z];
502
print(" u1=%Q", r->use1);
504
print(" u2=%Q", r->use2);
506
print(" st=%Q", r->set);
507
if(bany(&r->refahead))
508
print(" ra=%Q", r->refahead);
509
if(bany(&r->calahead))
510
print(" ca=%Q", r->calahead);
511
if(bany(&r->refbehind))
512
print(" rb=%Q", r->refbehind);
513
if(bany(&r->calbehind))
514
print(" cb=%Q", r->calbehind);
522
* move register pseudo-variables into regu.
524
for(r = firstr; r != R; r = r->link) {
525
r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS;
527
r->set.b[0] &= ~REGBITS;
528
r->use1.b[0] &= ~REGBITS;
529
r->use2.b[0] &= ~REGBITS;
530
r->refbehind.b[0] &= ~REGBITS;
531
r->refahead.b[0] &= ~REGBITS;
532
r->calbehind.b[0] &= ~REGBITS;
533
r->calahead.b[0] &= ~REGBITS;
534
r->regdiff.b[0] &= ~REGBITS;
535
r->act.b[0] &= ~REGBITS;
541
* calculate costs (paint1)
545
for(z=0; z<BITS; z++)
546
bit.b[z] = (r->refahead.b[z] | r->calahead.b[z]) &
547
~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]);
548
if(bany(&bit) & !r->refset) {
549
// should never happen - all variables are preset
551
print("%L: used and not set: %Q\n", r->prog->lineno, bit);
556
for(r = firstr; r != R; r = r->link)
560
for(r = firstr; r != R; r = r->link) {
561
for(z=0; z<BITS; z++)
562
bit.b[z] = r->set.b[z] &
563
~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]);
564
if(bany(&bit) && !r->refset) {
566
print("%L: set and not used: %Q\n", r->prog->lineno, bit);
570
for(z=0; z<BITS; z++)
571
bit.b[z] = LOAD(r) & ~(r->act.b[z] | addrs.b[z]);
580
bit.b[i/32] &= ~(1L<<(i%32));
583
print("%L $%d: %Q\n",
584
r->prog->lineno, change, blsh(i));
589
if(nregion >= NRGN) {
591
print("too many regions\n");
598
qsort(region, nregion, sizeof(region[0]), rcmp);
602
* determine used registers (paint2)
603
* replace code (paint3)
606
for(i=0; i<nregion; i++) {
607
bit = blsh(rgp->varno);
608
vreg = paint2(rgp->enter, rgp->varno);
609
vreg = allreg(vreg, rgp);
611
if(rgp->regno >= NREG)
612
print("%L $%d F%d: %Q\n",
613
rgp->enter->prog->lineno,
618
print("%L $%d R%d: %Q\n",
619
rgp->enter->prog->lineno,
625
paint3(rgp->enter, rgp->varno, vreg, rgp->regno);
630
* peep-hole on basic block
632
if(!debug['R'] || debug['P']) {
639
* free aux structures
640
* adjust the stack pointer
641
* MOVW.W R1,-12(R13) <<- start
646
* BL ,runtime.newproc+0(SB)
647
* MOVW &ft+-32(SP),R7 <<- adjust
648
* MOVW &j+-40(SP),R6 <<- adjust
649
* MOVW autotmp_0003+-24(SP),R5 <<- adjust
650
* MOVW $12(R13),R13 <<- finish
653
for(p = firstp; p != P; p = p->link) {
654
while(p->link != P && p->link->as == ANOP)
655
p->link = p->link->link;
656
if(p->to.type == D_BRANCH)
657
while(p->to.branch != P && p->to.branch->as == ANOP)
658
p->to.branch = p->to.branch->link;
659
if(p->as == AMOVW && p->to.reg == 13) {
660
if(p->scond & C_WBIT) {
661
vreg = -p->to.offset; // in adjust region
662
// print("%P adjusting %d\n", p, vreg);
665
if(p->from.type == D_CONST && p->to.type == D_REG) {
666
if(p->from.offset != vreg)
667
print("in and out different\n");
668
// print("%P finish %d\n", p, vreg);
669
vreg = 0; // done adjust region
673
// print("%P %d %d from type\n", p, p->from.type, D_CONST);
674
// print("%P %d %d to type\n\n", p, p->to.type, D_REG);
677
if(p->as == AMOVW && vreg != 0) {
679
if(p->from.name == D_AUTO || p->from.name == D_PARAM) {
680
p->from.offset += vreg;
681
// print("%P adjusting from %d %d\n", p, vreg, p->from.type);
684
if(p->to.name == D_AUTO || p->to.name == D_PARAM) {
685
p->to.offset += vreg;
686
// print("%P adjusting to %d %d\n", p, vreg, p->from.type);
704
for(r = firstr; r != R; r = r->link) {
707
if(r->prog->as == ABL)
709
for(r1 = r->p2; r1 != R; r1 = r1->p2link) {
712
for(z=0; z<BITS; z++)
713
bit.b[z] = r1->calbehind.b[z] &
714
(r->refahead.b[z] | r->use1.b[z] | r->use2.b[z]) &
715
~(r->calahead.b[z] & addrs.b[z]);
718
bit.b[i/32] &= ~(1L << (i%32));
729
addmove(Reg *r, int bn, int rn, int f)
735
p1 = mal(sizeof(*p1));
741
p1->lineno = p->lineno;
749
a->offset = v->offset;
752
if(a->etype == TARRAY || a->sym == S)
756
fatal("addmove: shouldnt be doing this %A\n", a);
760
print("What is this %E\n", v->etype);
788
p1->from.type = D_REG;
791
p1->from.type = D_FREG;
792
p1->from.reg = rn-NREG;
803
if(v->etype == TUINT8 || v->etype == TBOOL)
805
if(v->etype == TUINT16)
809
print("%P\t.a%P\n", p, p1);
813
overlap(int32 o1, int w1, int32 o2, int w2)
820
if(!(t1 > o2 && t2 > o1))
827
mkvar(Reg *r, Adr *a)
830
int i, t, n, et, z, w, flag;
835
// mark registers used
845
print("type %d %d %D\n", t, a->name, a);
859
if(a->offset != NREG)
860
bit.b[0] |= RtoB(a->offset);
862
bit.b[0] |= RtoB(a->reg);
870
bit.b[0] = RtoB(a->reg);
877
if(a == &r->prog->from)
878
r->use1.b[0] |= RtoB(a->reg);
880
r->use2.b[0] |= RtoB(a->reg);
881
if(r->prog->scond & (C_PBIT|C_WBIT))
882
r->set.b[0] |= RtoB(a->reg);
889
bit.b[0] = FtoB(a->reg);
910
if(s->name[0] == '.')
916
for(i=0; i<nvar; i++) {
918
if(v->sym == s && v->name == n) {
925
// if they overlaps, disable both
926
if(overlap(v->offset, v->width, o, w)) {
942
if(debug['w'] > 1 && s)
943
fatal("variable not optimized: %D", a);
953
// v->gotype = a->gotype;
956
v->addr = flag; // funny punning
960
print("bit=%2d et=%E pun=%d %D\n", i, et, flag, a);
963
if(n == D_EXTERN || n == D_STATIC)
964
for(z=0; z<BITS; z++)
965
externs.b[z] |= bit.b[z];
967
for(z=0; z<BITS; z++)
968
params.b[z] |= bit.b[z];
977
prop(Reg *r, Bits ref, Bits cal)
982
for(r1 = r; r1 != R; r1 = r1->p1) {
983
for(z=0; z<BITS; z++) {
984
ref.b[z] |= r1->refahead.b[z];
985
if(ref.b[z] != r1->refahead.b[z]) {
986
r1->refahead.b[z] = ref.b[z];
989
cal.b[z] |= r1->calahead.b[z];
990
if(cal.b[z] != r1->calahead.b[z]) {
991
r1->calahead.b[z] = cal.b[z];
995
switch(r1->prog->as) {
997
if(noreturn(r1->prog))
999
for(z=0; z<BITS; z++) {
1000
cal.b[z] |= ref.b[z] | externs.b[z];
1006
for(z=0; z<BITS; z++) {
1013
for(z=0; z<BITS; z++) {
1014
cal.b[z] = externs.b[z] | ovar.b[z];
1019
for(z=0; z<BITS; z++) {
1020
ref.b[z] = (ref.b[z] & ~r1->set.b[z]) |
1021
r1->use1.b[z] | r1->use2.b[z];
1022
cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]);
1023
r1->refbehind.b[z] = ref.b[z];
1024
r1->calbehind.b[z] = cal.b[z];
1030
for(; r != r1; r = r->p1)
1031
for(r2 = r->p2; r2 != R; r2 = r2->p2link)
1032
prop(r2, r->refbehind, r->calbehind);
1036
* find looping structure
1038
* 1) find reverse postordering
1039
* 2) find approximate dominators,
1040
* the actual dominators if the flow graph is reducible
1041
* otherwise, dominators plus some other non-dominators.
1042
* See Matthew S. Hecht and Jeffrey D. Ullman,
1043
* "Analysis of a Simple Algorithm for Global Data Flow Problems",
1044
* Conf. Record of ACM Symp. on Principles of Prog. Langs, Boston, Massachusetts,
1045
* Oct. 1-3, 1973, pp. 207-217.
1046
* 3) find all nodes with a predecessor dominated by the current node.
1047
* such a node is a loop head.
1048
* recursively, all preds with a greater rpo number are in the loop
1051
postorder(Reg *r, Reg **rpo2r, int32 n)
1058
n = postorder(r1, rpo2r, n);
1061
n = postorder(r1, rpo2r, n);
1068
rpolca(int32 *idom, int32 rpo1, int32 rpo2)
1074
while(rpo1 != rpo2){
1091
doms(int32 *idom, int32 r, int32 s)
1099
loophead(int32 *idom, Reg *r)
1104
if(r->p1 != R && doms(idom, src, r->p1->rpo))
1106
for(r = r->p2; r != R; r = r->p2link)
1107
if(doms(idom, src, r->rpo))
1113
loopmark(Reg **rpo2r, int32 head, Reg *r)
1115
if(r->rpo < head || r->active == head)
1120
loopmark(rpo2r, head, r->p1);
1121
for(r = r->p2; r != R; r = r->p2link)
1122
loopmark(rpo2r, head, r);
1126
loopit(Reg *r, int32 nr)
1132
rpo2r = mal(nr * sizeof(Reg*));
1133
idom = mal(nr * sizeof(int32));
1136
d = postorder(r, rpo2r, 0);
1138
fatal("too many reg nodes");
1140
for(i = 0; i < nr / 2; i++){
1142
rpo2r[i] = rpo2r[nr - 1 - i];
1143
rpo2r[nr - 1 - i] = r1;
1145
for(i = 0; i < nr; i++)
1149
for(i = 0; i < nr; i++){
1153
if(r1->p1 != R && r1->p1->rpo < me)
1155
for(r1 = r1->p2; r1 != nil; r1 = r1->p2link)
1157
d = rpolca(idom, d, r1->rpo);
1161
for(i = 0; i < nr; i++){
1164
if(r1->p2 != R && loophead(idom, r1))
1165
loopmark(rpo2r, i, r1);
1170
synch(Reg *r, Bits dif)
1175
for(r1 = r; r1 != R; r1 = r1->s1) {
1176
for(z=0; z<BITS; z++) {
1177
dif.b[z] = (dif.b[z] &
1178
~(~r1->refbehind.b[z] & r1->refahead.b[z])) |
1179
r1->set.b[z] | r1->regdiff.b[z];
1180
if(dif.b[z] != r1->regdiff.b[z]) {
1181
r1->regdiff.b[z] = dif.b[z];
1188
for(z=0; z<BITS; z++)
1189
dif.b[z] &= ~(~r1->calbehind.b[z] & r1->calahead.b[z]);
1196
allreg(uint32 b, Rgn *r)
1206
fatal("unknown etype %d/%E", bitno(b), v->etype);
1221
if(i && r->cost >= 0) {
1230
if(i && r->cost >= 0) {
1248
paint1(Reg *r, int bn)
1257
if(r->act.b[z] & bb)
1260
if(!(r->refbehind.b[z] & bb))
1265
if(!(r1->refahead.b[z] & bb))
1267
if(r1->act.b[z] & bb)
1272
if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) {
1273
change -= CLOAD * r->loop;
1275
print("%d%P\td %Q $%d\n", r->loop,
1276
r->prog, blsh(bn), change);
1282
if(r->use1.b[z] & bb) {
1283
change += CREF * r->loop;
1285
print("%d%P\tu1 %Q $%d\n", r->loop,
1286
p, blsh(bn), change);
1289
if((r->use2.b[z]|r->set.b[z]) & bb) {
1290
change += CREF * r->loop;
1292
print("%d%P\tu2 %Q $%d\n", r->loop,
1293
p, blsh(bn), change);
1296
if(STORE(r) & r->regdiff.b[z] & bb) {
1297
change -= CLOAD * r->loop;
1299
print("%d%P\tst %Q $%d\n", r->loop,
1300
p, blsh(bn), change);
1303
if(r->refbehind.b[z] & bb)
1304
for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1305
if(r1->refahead.b[z] & bb)
1308
if(!(r->refahead.b[z] & bb))
1312
if(r1->refbehind.b[z] & bb)
1317
if(r->act.b[z] & bb)
1319
if(!(r->refbehind.b[z] & bb))
1325
paint2(Reg *r, int bn)
1334
if(!(r->act.b[z] & bb))
1337
if(!(r->refbehind.b[z] & bb))
1342
if(!(r1->refahead.b[z] & bb))
1344
if(!(r1->act.b[z] & bb))
1353
if(r->refbehind.b[z] & bb)
1354
for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1355
if(r1->refahead.b[z] & bb)
1356
vreg |= paint2(r1, bn);
1358
if(!(r->refahead.b[z] & bb))
1362
if(r1->refbehind.b[z] & bb)
1363
vreg |= paint2(r1, bn);
1367
if(!(r->act.b[z] & bb))
1369
if(!(r->refbehind.b[z] & bb))
1376
paint3(Reg *r, int bn, int32 rb, int rn)
1385
if(r->act.b[z] & bb)
1388
if(!(r->refbehind.b[z] & bb))
1393
if(!(r1->refahead.b[z] & bb))
1395
if(r1->act.b[z] & bb)
1400
if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb)
1401
addmove(r, bn, rn, 0);
1407
if(r->use1.b[z] & bb) {
1410
addreg(&p->from, rn);
1412
print("\t.c%P\n", p);
1414
if((r->use2.b[z]|r->set.b[z]) & bb) {
1419
print("\t.c%P\n", p);
1422
if(STORE(r) & r->regdiff.b[z] & bb)
1423
addmove(r, bn, rn, 1);
1426
if(r->refbehind.b[z] & bb)
1427
for(r1 = r->p2; r1 != R; r1 = r1->p2link)
1428
if(r1->refahead.b[z] & bb)
1429
paint3(r1, bn, rb, rn);
1431
if(!(r->refahead.b[z] & bb))
1435
if(r1->refbehind.b[z] & bb)
1436
paint3(r1, bn, rb, rn);
1440
if(r->act.b[z] & bb)
1442
if(!(r->refbehind.b[z] & bb))
1448
addreg(Adr *a, int rn)
1451
if(a->type == D_CONST)
1452
fatal("addreg: cant do this %D %d\n", a, rn);
1475
if(r < 2 || r >= REGTMP-2) // excluded R9 and R10 for m and g
1483
b &= 0x01fcL; // excluded R9 and R10 for m and g
1500
if(f < 2 || f > NFREG-1)
1502
return 1L << (f + 16);
1512
return bitno(b) - 16;
1515
static Sym* symlist[10];
1523
if(symlist[0] == S) {
1524
symlist[0] = pkglookup("panicindex", runtimepkg);
1525
symlist[1] = pkglookup("panicslice", runtimepkg);
1526
symlist[2] = pkglookup("throwinit", runtimepkg);
1527
symlist[3] = pkglookup("panic", runtimepkg);
1533
for(i=0; symlist[i]!=S; i++)
1545
print("%d:%P", r->loop, r->prog);
1546
for(z=0; z<BITS; z++)
1560
// if(bany(&r->set))
1561
// print(" s:%Q", r->set);
1562
// if(bany(&r->use1))
1563
// print(" u1:%Q", r->use1);
1564
// if(bany(&r->use2))
1565
// print(" u2:%Q", r->use2);
1566
// if(bany(&r->refbehind))
1567
// print(" rb:%Q ", r->refbehind);
1568
// if(bany(&r->refahead))
1569
// print(" ra:%Q ", r->refahead);
1570
// if(bany(&r->calbehind))
1571
// print("cb:%Q ", r->calbehind);
1572
// if(bany(&r->calahead))
1573
// print(" ca:%Q ", r->calahead);
1574
// if(bany(&r->regdiff))
1575
// print(" d:%Q ", r->regdiff);
1576
// if(bany(&r->act))
1577
// print(" a:%Q ", r->act);
1583
dumpit(char *str, Reg *r0)
1587
print("\n%s\n", str);
1588
for(r = r0; r != R; r = r->link) {
1593
for(; r1 != R; r1 = r1->p2link)
1594
print(" %.4ud", r1->prog->loc);
1600
// for(; r1 != R; r1 = r1->s1)
1601
// print(" %.4ud", r1->prog->loc);