1
// Copyright 2009 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
12
static Prog* appendp(Prog*, int, int, vlong, int, vlong);
15
defframe(Prog *ptxt, Bvec *bv)
21
// fill in argument size
22
ptxt->to.offset = rnd(curfn->type->argwid, widthptr);
24
// fill in final stack size
25
ptxt->to.offset <<= 32;
26
frame = rnd(stksize+maxarg, widthptr);
27
ptxt->to.offset |= frame;
29
// insert code to clear pointered part of the frame,
30
// so that garbage collector only sees initialized values
31
// when it looks for pointers.
33
if(stkzerosize >= 8*widthptr) {
34
p = appendp(p, AMOVQ, D_CONST, 0, D_AX, 0);
35
p = appendp(p, AMOVQ, D_CONST, stkzerosize/widthptr, D_CX, 0);
36
p = appendp(p, ALEAQ, D_SP+D_INDIR, frame-stkzerosize, D_DI, 0);
37
p = appendp(p, AREP, D_NONE, 0, D_NONE, 0);
38
appendp(p, ASTOSQ, D_NONE, 0, D_NONE, 0);
40
j = (stkptrsize - stkzerosize)/widthptr * 2;
41
for(i=0; i<stkzerosize; i+=widthptr) {
42
if(bvget(bv, j) || bvget(bv, j+1))
43
p = appendp(p, AMOVQ, D_CONST, 0, D_SP+D_INDIR, frame-stkzerosize+i);
50
appendp(Prog *p, int as, int ftype, vlong foffset, int ttype, vlong toffset)
57
q->lineno = p->lineno;
59
q->from.offset = foffset;
61
q->to.offset = toffset;
67
// Sweep the prog list to mark any used nodes.
71
for (; p; p = p->link) {
75
if (p->from.type == D_AUTO && p->from.node)
76
p->from.node->used = 1;
78
if (p->to.type == D_AUTO && p->to.node)
83
// Fixup instructions after allocauto (formerly compactframe) has moved all autos around.
89
for (lp=&p; (p=*lp) != P; ) {
90
if (p->as == ATYPE && p->from.node && p->from.type == D_AUTO && !p->from.node->used) {
94
if (p->from.type == D_AUTO && p->from.node)
95
p->from.offset += p->from.node->stkdelta;
97
if (p->to.type == D_AUTO && p->to.node)
98
p->to.offset += p->to.node->stkdelta;
108
* proc=-1 normal call but no return
110
* proc=1 goroutine run in new proc
111
* proc=2 defer call save away stack
112
* proc=3 normal call to C pointer (not Go func value)
115
ginscall(Node *f, int proc)
126
// Most functions have a fixed-size argument block, so traceback uses that during unwind.
127
// Not all, though: there are some variadic functions in package runtime,
128
// and for those we emit call-specific metadata recorded by caller.
129
// Reflect generates functions with variable argsize (see reflect.methodValueCall/makeFuncStub),
130
// so we do this for all indirect calls as well.
131
if(f->type != T && (f->sym == S || (f->sym != S && f->sym->pkg == runtimepkg) || proc == 1 || proc == 2)) {
132
arg = f->type->argwid;
133
if(proc == 1 || proc == 2)
142
fatal("ginscall: bad proc %d", proc);
145
case 0: // normal call
146
case -1: // normal call but no return
147
if(f->op == ONAME && f->class == PFUNC) {
148
if(f == deferreturn) {
149
// Deferred calls will appear to be returning to
150
// the CALL deferreturn(SB) that we are about to emit.
151
// However, the stack trace code will show the line
152
// of the instruction byte before the return PC.
153
// To avoid that being an unrelated instruction,
154
// insert an x86 NOP that we will have the right line number.
155
// x86 NOP 0x90 is really XCHG AX, AX; use that description
156
// because the NOP pseudo-instruction would be removed by
158
nodreg(®, types[TINT], D_AX);
159
gins(AXCHGL, ®, ®);
161
p = gins(ACALL, N, f);
163
if(proc == -1 || noreturn(p))
167
nodreg(®, types[tptr], D_DX);
168
nodreg(&r1, types[tptr], D_BX);
173
gins(ACALL, ®, &r1);
176
case 3: // normal call of c function pointer
180
case 1: // call in new proc (go)
181
case 2: // deferred call (defer)
182
nodreg(®, types[TINT64], D_CX);
183
if(flag_largemodel) {
184
regalloc(&r1, f->type, f);
186
gins(APUSHQ, &r1, N);
191
nodconst(&con, types[TINT32], argsize(f->type));
192
gins(APUSHQ, &con, N);
194
ginscall(newproc, 0);
197
fatal("hasdefer=0 but has defer");
198
ginscall(deferproc, 0);
200
gins(APOPQ, N, ®);
201
gins(APOPQ, N, ®);
203
nodreg(®, types[TINT64], D_AX);
204
gins(ATESTQ, ®, ®);
205
patch(gbranch(AJNE, T, -1), retpc);
215
* n is call to interface method.
219
cgen_callinter(Node *n, Node *res, int proc)
222
Node tmpi, nodi, nodo, nodr, nodsp;
225
if(i->op != ODOTINTER)
226
fatal("cgen_callinter: not ODOTINTER %O", i->op);
228
f = i->right; // field
230
fatal("cgen_callinter: not ONAME %O", f->op);
232
i = i->left; // interface
235
tempname(&tmpi, i->type);
240
genlist(n->list); // assign the args
242
// i is now addable, prepare an indirected
243
// register to hold its address.
244
igen(i, &nodi, res); // REG = &inter
246
nodindreg(&nodsp, types[tptr], D_SP);
247
nodi.type = types[tptr];
248
nodi.xoffset += widthptr;
249
cgen(&nodi, &nodsp); // 0(SP) = 8(REG) -- i.data
251
regalloc(&nodo, types[tptr], res);
252
nodi.type = types[tptr];
253
nodi.xoffset -= widthptr;
254
cgen(&nodi, &nodo); // REG = 0(REG) -- i.tab
257
regalloc(&nodr, types[tptr], &nodo);
258
if(n->left->xoffset == BADWIDTH)
259
fatal("cgen_callinter: badwidth");
260
cgen_checknil(&nodo); // in case offset is huge
262
nodo.xoffset = n->left->xoffset + 3*widthptr + 8;
264
// plain call: use direct c function pointer - more efficient
265
cgen(&nodo, &nodr); // REG = 32+offset(REG) -- i.tab->fun[f]
268
// go/defer. generate go func value.
269
gins(ALEAQ, &nodo, &nodr); // REG = &(32+offset(REG)) -- i.tab->fun[f]
272
nodr.type = n->left->type;
273
ginscall(&nodr, proc);
280
* generate function call;
282
* proc=1 goroutine run in new proc
283
* proc=2 defer call save away stack
286
cgen_call(Node *n, int proc)
294
if(n->left->ullman >= UINF) {
295
// if name involves a fn call
296
// precompute the address of the fn
297
tempname(&afun, types[tptr]);
298
cgen(n->left, &afun);
301
genlist(n->list); // assign the args
304
// call tempname pointer
305
if(n->left->ullman >= UINF) {
306
regalloc(&nod, types[tptr], N);
307
cgen_as(&nod, &afun);
309
ginscall(&nod, proc);
315
if(n->left->op != ONAME || n->left->class != PFUNC) {
316
regalloc(&nod, types[tptr], N);
317
cgen_as(&nod, n->left);
319
ginscall(&nod, proc);
326
ginscall(n->left, proc);
330
* call to n has already been generated.
332
* res = return value from call.
335
cgen_callret(Node *n, Node *res)
342
if(t->etype == TPTR32 || t->etype == TPTR64)
345
fp = structfirst(&flist, getoutarg(t));
347
fatal("cgen_callret: nil");
349
memset(&nod, 0, sizeof(nod));
351
nod.val.u.reg = D_SP;
354
nod.xoffset = fp->width;
360
* call to n has already been generated.
362
* res = &return value from call.
365
cgen_aret(Node *n, Node *res)
375
fp = structfirst(&flist, getoutarg(t));
377
fatal("cgen_aret: nil");
379
memset(&nod1, 0, sizeof(nod1));
381
nod1.val.u.reg = D_SP;
384
nod1.xoffset = fp->width;
385
nod1.type = fp->type;
387
if(res->op != OREGISTER) {
388
regalloc(&nod2, types[tptr], res);
389
gins(ALEAQ, &nod1, &nod2);
390
gins(AMOVQ, &nod2, res);
393
gins(ALEAQ, &nod1, res);
398
* n->left is assignments to return values.
405
genlist(n->list); // copy out args
406
if(hasdefer || curfn->exit) {
410
p = gins(ARET, N, N);
411
if(n->op == ORETJMP) {
412
p->to.type = D_EXTERN;
413
p->to.sym = n->left->sym;
418
* generate += *= etc.
432
if(nr->ullman >= UINF && nl->ullman >= UINF) {
433
tempname(&n1, nr->type);
441
if(!isint[nl->type->etype])
443
if(!isint[nr->type->etype])
448
if(smallintconst(nr))
449
if(mpgetfix(nr->val.u.xval) == 1) {
450
a = optoas(OINC, nl->type);
455
if(sudoaddable(a, nl, &addr)) {
465
if(smallintconst(nr))
466
if(mpgetfix(nr->val.u.xval) == 1) {
467
a = optoas(ODEC, nl->type);
472
if(sudoaddable(a, nl, &addr)) {
488
a = optoas(n->etype, nl->type);
490
if(smallintconst(nr)) {
494
regalloc(&n2, nr->type, N);
500
if(nr->ullman < UINF)
501
if(sudoaddable(a, nl, &addr)) {
502
if(smallintconst(nr)) {
508
regalloc(&n2, nr->type, N);
510
p1 = gins(a, &n2, N);
521
if(nr->op == OLITERAL) {
522
// don't allocate a register for literals.
523
} else if(nr->ullman >= nl->ullman || nl->addable) {
524
regalloc(&n2, nr->type, N);
528
tempname(&n2, nr->type);
542
regalloc(&n4, nl->type, N);
548
if(n2.op == OREGISTER)
557
samereg(Node *a, Node *b)
561
if(a->op != OREGISTER)
563
if(b->op != OREGISTER)
565
if(a->val.u.reg != b->val.u.reg)
578
dodiv(int op, Node *nl, Node *nr, Node *res)
583
Node ax, dx, ax1, n31, oldax, olddx;
586
// Have to be careful about handling
587
// most negative int divided by -1 correctly.
588
// The hardware will trap.
589
// Also the byte divide instruction needs AH,
590
// which we otherwise don't have to deal with.
591
// Easiest way to avoid for int8, int16: use int32.
592
// For int32 and int64, use explicit test.
593
// Could use int64 hw for int32.
597
if(issigned[t->etype]) {
599
if(isconst(nl, CTINT) && mpgetfix(nl->val.u.xval) != -(1ULL<<(t->width*8-1)))
601
else if(isconst(nr, CTINT) && mpgetfix(nr->val.u.xval) != -1)
605
if(issigned[t->etype])
613
regalloc(&n3, t0, N);
614
if(nl->ullman >= nr->ullman) {
615
savex(D_AX, &ax, &oldax, res, t0);
617
regalloc(&ax, t0, &ax); // mark ax live during cgen
622
savex(D_AX, &ax, &oldax, res, t0);
637
nodconst(&n4, t, -1);
638
gins(optoas(OCMP, t), &n3, &n4);
639
p1 = gbranch(optoas(ONE, t), T, +1);
642
gins(optoas(OMINUS, t), N, &ax);
649
p2 = gbranch(AJMP, T, 0);
652
savex(D_DX, &dx, &olddx, res, t);
653
if(!issigned[t->etype]) {
657
gins(optoas(OEXTEND, t), N, N);
671
* register dr is one of the special ones (AX, CX, DI, SI, etc.).
672
* we need to use it. if it is already allocated as a temporary
673
* (r > 1; can only happen if a routine like sgen passed a
674
* special as cgen's res and then cgen used regalloc to reuse
675
* it as its own temporary), then move it for now to another
676
* register. caller must call restx to move it back.
677
* the move is not necessary if dr == res, because res is
681
savex(int dr, Node *x, Node *oldx, Node *res, Type *t)
687
// save current ax and dx if they are live
688
// and not the destination
689
memset(oldx, 0, sizeof *oldx);
691
if(r > 1 && !samereg(x, res)) {
692
regalloc(oldx, types[TINT64], N);
693
x->type = types[TINT64];
696
oldx->ostk = r; // squirrel away old r value
702
restx(Node *x, Node *oldx)
705
x->type = types[TINT64];
706
reg[x->val.u.reg] = oldx->ostk;
713
* generate division according to op, one of:
718
cgen_div(int op, Node *nl, Node *nr, Node *res)
724
if(nr->op != OLITERAL)
726
w = nl->type->width*8;
728
// Front end handled 32-bit division. We only need to handle 64-bit.
729
// try to do division by multiply by (2^w)/d
730
// see hacker's delight chapter 10
731
switch(simtype[nl->type->etype]) {
737
m.ud = mpgetfix(nr->val.u.xval);
745
nodconst(&n2, nl->type, m.um);
746
regalloc(&n3, nl->type, res);
747
cgen_hmul(&n1, &n2, &n3);
750
// need to add numerator accounting for overflow
751
gins(optoas(OADD, nl->type), &n1, &n3);
752
nodconst(&n2, nl->type, 1);
753
gins(optoas(ORROTC, nl->type), &n2, &n3);
754
nodconst(&n2, nl->type, m.s-1);
755
gins(optoas(ORSH, nl->type), &n2, &n3);
757
nodconst(&n2, nl->type, m.s);
758
gins(optoas(ORSH, nl->type), &n2, &n3); // shift dx
768
m.sd = mpgetfix(nr->val.u.xval);
776
nodconst(&n2, nl->type, m.sm);
777
regalloc(&n3, nl->type, N);
778
cgen_hmul(&n1, &n2, &n3);
781
// need to add numerator
782
gins(optoas(OADD, nl->type), &n1, &n3);
785
nodconst(&n2, nl->type, m.s);
786
gins(optoas(ORSH, nl->type), &n2, &n3); // shift n3
788
nodconst(&n2, nl->type, w-1);
789
gins(optoas(ORSH, nl->type), &n2, &n1); // -1 iff num is neg
790
gins(optoas(OSUB, nl->type), &n1, &n3); // added
793
// this could probably be removed
794
// by factoring it into the multiplier
795
gins(optoas(OMINUS, nl->type), N, &n3);
806
// division and mod using (slow) hardware instruction
807
dodiv(op, nl, nr, res);
811
// mod using formula A%B = A-(A/B*B) but
812
// we know that there is a fast algorithm for A/B
813
regalloc(&n1, nl->type, res);
815
regalloc(&n2, nl->type, N);
816
cgen_div(ODIV, &n1, nr, &n2);
817
a = optoas(OMUL, nl->type);
819
// use 2-operand 16-bit multiply
820
// because there is no 2-operand 8-bit multiply
823
if(!smallintconst(nr)) {
824
regalloc(&n3, nl->type, N);
830
gins(optoas(OSUB, nl->type), &n2, &n1);
837
* generate high multiply:
838
* res = (nl*nr) >> width
841
cgen_hmul(Node *nl, Node *nr, Node *res)
845
Node n1, n2, ax, dx, *tmp;
848
a = optoas(OHMUL, t);
849
if(nl->ullman < nr->ullman) {
856
nodreg(&ax, t, D_AX);
863
// byte multiply behaves differently.
864
nodreg(&ax, t, D_AH);
865
nodreg(&dx, t, D_DL);
868
nodreg(&dx, t, D_DX);
873
* generate shift according to op, one of:
878
cgen_shift(int op, int bounded, Node *nl, Node *nr, Node *res)
880
Node n1, n2, n3, n4, n5, cx, oldcx;
886
a = optoas(op, nl->type);
888
if(nr->op == OLITERAL) {
889
regalloc(&n1, nl->type, res);
891
sc = mpgetfix(nr->val.u.xval);
892
if(sc >= nl->type->width*8) {
893
// large shift gets 2 shifts by width-1
894
nodconst(&n3, types[TUINT32], nl->type->width*8-1);
904
if(nl->ullman >= UINF) {
905
tempname(&n4, nl->type);
909
if(nr->ullman >= UINF) {
910
tempname(&n5, nr->type);
916
nodreg(&n1, types[TUINT32], D_CX);
918
// Allow either uint32 or uint64 as shift type,
919
// to avoid unnecessary conversion from uint32 to uint64
920
// just to do the comparison.
921
tcount = types[simtype[nr->type->etype]];
922
if(tcount->etype < TUINT32)
923
tcount = types[TUINT32];
925
regalloc(&n1, nr->type, &n1); // to hold the shift type in CX
926
regalloc(&n3, tcount, &n1); // to clear high bits of CX
928
nodreg(&cx, types[TUINT64], D_CX);
929
memset(&oldcx, 0, sizeof oldcx);
930
if(rcx > 0 && !samereg(&cx, res)) {
931
regalloc(&oldcx, types[TUINT64], N);
936
if(samereg(&cx, res))
937
regalloc(&n2, nl->type, N);
939
regalloc(&n2, nl->type, res);
940
if(nl->ullman >= nr->ullman) {
951
// test and fix up large shifts
953
nodconst(&n3, tcount, nl->type->width*8);
954
gins(optoas(OCMP, tcount), &n1, &n3);
955
p1 = gbranch(optoas(OLT, tcount), T, +1);
956
if(op == ORSH && issigned[nl->type->etype]) {
957
nodconst(&n3, types[TUINT32], nl->type->width*8-1);
960
nodconst(&n3, nl->type, 0);
969
cx.type = types[TUINT64];
984
* generate byte multiply:
986
* there is no 2-operand byte multiply instruction so
987
* we do a full-width multiplication and truncate afterwards.
990
cgen_bmul(int op, Node *nl, Node *nr, Node *res)
992
Node n1, n2, n1b, n2b, *tmp;
996
// largest ullman on left.
997
if(nl->ullman < nr->ullman) {
1003
// generate operands in "8-bit" registers.
1004
regalloc(&n1b, nl->type, res);
1006
regalloc(&n2b, nr->type, N);
1009
// perform full-width multiplication.
1011
if(issigned[nl->type->etype])
1013
nodreg(&n1, t, n1b.val.u.reg);
1014
nodreg(&n2, t, n2b.val.u.reg);
1028
Node n1, oldn1, ax, oldax;
1030
/* clear a fat object */
1032
dump("\nclearfat", nl);
1035
w = nl->type->width;
1036
// Avoid taking the address for simple enough types.
1037
if(componentgen(N, nl))
1043
savex(D_DI, &n1, &oldn1, N, types[tptr]);
1046
savex(D_AX, &ax, &oldax, N, types[tptr]);
1047
gconreg(AMOVQ, 0, D_AX);
1050
gconreg(AMOVQ, q, D_CX);
1051
gins(AREP, N, N); // repeat
1052
gins(ASTOSQ, N, N); // STOQ AL,*(DI)+
1055
gins(ASTOSQ, N, N); // STOQ AL,*(DI)+
1060
gconreg(AMOVQ, c, D_CX);
1061
gins(AREP, N, N); // repeat
1062
gins(ASTOSB, N, N); // STOB AL,*(DI)+
1065
gins(ASTOSB, N, N); // STOB AL,*(DI)+
1073
// Called after regopt and peep have run.
1074
// Expand CHECKNIL pseudo-op into actual nil pointer check.
1076
expandchecks(Prog *firstp)
1080
for(p = firstp; p != P; p = p->link) {
1081
if(p->as != ACHECKNIL)
1083
if(debug_checknil && p->lineno > 1) // p->lineno==1 in generated wrappers
1084
warnl(p->lineno, "generated nil check");
1087
// JNE 2(PC) (likely)
1089
p1 = mal(sizeof *p1);
1090
p2 = mal(sizeof *p2);
1096
p1->lineno = p->lineno;
1097
p2->lineno = p->lineno;
1101
p->to.type = D_CONST;
1104
p1->from.type = D_CONST;
1105
p1->from.offset = 1; // likely
1106
p1->to.type = D_BRANCH;
1107
p1->to.u.branch = p2->link;
1108
// crash by write to memory address 0.
1109
// if possible, since we know arg is 0, use 0(arg),
1110
// which will be shorter to encode than plain 0.
1112
p2->from.type = D_AX;
1113
if(regtyp(&p->from))
1114
p2->to.type = p->from.type + D_INDIR;
1116
p2->to.type = D_INDIR+D_NONE;