725
781
* generate byte multiply:
727
* no byte multiply instruction so have to do
728
* 16-bit multiply and take bottom half.
783
* there is no 2-operand byte multiply instruction so
784
* we do a full-width multiplication and truncate afterwards.
731
787
cgen_bmul(int op, Node *nl, Node *nr, Node *res)
733
Node n1b, n2b, n1w, n2w;
789
Node n1, n2, nt, *tmp;
737
if(nl->ullman >= nr->ullman) {
738
regalloc(&n1b, nl->type, res);
740
regalloc(&n2b, nr->type, N);
743
regalloc(&n2b, nr->type, N);
745
regalloc(&n1b, nl->type, res);
793
// copy from byte to full registers
795
if(issigned[nl->type->etype])
798
// largest ullman on left.
799
if(nl->ullman < nr->ullman) {
749
// copy from byte to short registers
751
if(issigned[nl->type->etype])
754
regalloc(&n2w, t, &n2b);
757
regalloc(&n1w, t, &n1b);
805
tempname(&nt, nl->type);
807
regalloc(&n1, t, res);
760
811
a = optoas(op, t);
772
regcmp(const void *va, const void *vb)
778
return ra->local - rb->local;
781
static Prog* throwpc;
783
// We're only going to bother inlining if we can
784
// convert all the arguments to 32 bits safely. Can we?
786
fix64(NodeList *nn, int n)
795
if(is64(r->type) && !smallintconst(r)) {
807
getargs(NodeList *nn, Node *reg, int n)
821
else if(smallintconst(r))
822
r->type = types[TUINT32];
826
if(!smallintconst(r) && !isslice(r->type)) {
827
if(i < 3) // AX CX DX
828
nodreg(reg+i, r->type, D_AX+i);
831
regalloc(reg+i, r->type, reg+i);
835
if(reg[i].local != 0)
836
yyerror("local used");
837
reg[i].local = l->n->left->xoffset;
840
qsort((void*)reg, n, sizeof(*reg), regcmp);
846
cmpandthrow(Node *nl, Node *nr)
819
* generate high multiply:
820
* res = (nl*nr) >> width
823
cgen_hmul(Node *nl, Node *nr, Node *res)
855
if(smallintconst(nl)) {
856
cl = mpgetfix(nl->val.u.xval);
859
if(smallintconst(nr))
861
// put the constant on the right
830
a = optoas(OHMUL, t);
835
regalloc(&n2, t, res);
839
nodreg(&ax, t, D_AX);
845
// byte multiply behaves differently.
846
nodreg(&ax, t, D_AH);
847
nodreg(&dx, t, D_DL);
850
nodreg(&dx, t, D_DX);
854
static void cgen_float387(Node *n, Node *res);
855
static void cgen_floatsse(Node *n, Node *res);
858
* generate floating-point operation.
861
cgen_float(Node *n, Node *res)
874
p1 = gbranch(AJMP, T, 0);
876
gmove(nodbool(1), res);
877
p3 = gbranch(AJMP, T, 0);
880
gmove(nodbool(0), res);
889
if(eqtype(n->type, nl->type) || noconv(n->type, nl->type)) {
894
tempname(&n2, n->type);
903
cgen_floatsse(n, res);
905
cgen_float387(n, res);
908
// floating-point. 387 (not SSE2)
910
cgen_float387(Node *n, Node *res)
917
nodreg(&f0, nl->type, D_F0);
918
nodreg(&f1, n->type, D_F0+1);
924
if(n->op != OCONV && n->op != OPLUS)
925
gins(foptoas(n->op, n->type, 0), N, N);
930
if(nl->ullman >= nr->ullman) {
933
gins(foptoas(n->op, n->type, 0), nr, &f0);
936
gins(foptoas(n->op, n->type, Fpop), &f0, &f1);
941
gins(foptoas(n->op, n->type, Frev), nl, &f0);
944
gins(foptoas(n->op, n->type, Frev|Fpop), &f0, &f1);
953
cgen_floatsse(Node *n, Node *res)
963
dump("cgen_floatsse", n);
964
fatal("cgen_floatsse %O", n->op);
969
nr = nodintconst(-1);
970
convlit(&nr, n->type);
971
a = foptoas(OMUL, nl->type, 0);
977
a = foptoas(n->op, nl->type, 0);
984
a = foptoas(n->op, nl->type, 0);
988
sbop: // symmetric binary
989
if(nl->ullman < nr->ullman || nl->op == OLITERAL) {
868
// Arguments are known not to be 64-bit,
869
// but they might be smaller than 32 bits.
870
// Check if we need to use a temporary.
871
// At least one of the arguments is 32 bits
872
// (the len or cap) so one temporary suffices.
875
if(nl->type->width != t->width) {
876
regalloc(&n1, t, nl);
995
abop: // asymmetric binary
996
if(nl->ullman >= nr->ullman) {
997
tempname(&nt, nl->type);
1000
regalloc(&n1, nl->type, res);
1007
regalloc(&n2, nr->type, res);
1009
regalloc(&n1, nl->type, N);
1020
bgen_float(Node *n, int true, int likely, Prog *to)
1024
Node n1, n2, n3, tmp, t1, t2, ax;
1031
// brcom is not valid on floats when NaN is involved.
1032
p1 = gbranch(AJMP, T, 0);
1033
p2 = gbranch(AJMP, T, 0);
1035
// No need to avoid re-genning ninit.
1036
bgen_float(n, 1, -likely, p2);
1037
patch(gbranch(AJMP, T, 0), to);
1048
a = brrev(a); // because the args are stacked
1049
if(a == OGE || a == OGT) {
1050
// only < and <= work right with NaN; reverse if needed
1057
nodreg(&tmp, nr->type, D_F0);
1058
nodreg(&n2, nr->type, D_F0 + 1);
1059
nodreg(&ax, types[TUINT16], D_AX);
1060
et = simsimtype(nr->type);
1061
if(et == TFLOAT64) {
1062
if(nl->ullman > nr->ullman) {
1065
gins(AFXCHD, &tmp, &n2);
1070
gins(AFUCOMIP, &tmp, &n2);
1071
gins(AFMOVDP, &tmp, &tmp); // annoying pop but still better than STSW+SAHF
1073
// TODO(rsc): The moves back and forth to memory
1074
// here are for truncating the value to 32 bits.
1075
// This handles 32-bit comparison but presumably
1076
// all the other ops have the same problem.
1077
// We need to figure out what the right general
1078
// solution is, besides telling people to use float64.
1079
tempname(&t1, types[TFLOAT32]);
1080
tempname(&t2, types[TFLOAT32]);
1084
gins(AFCOMFP, &t1, &tmp);
1085
gins(AFSTSW, N, &ax);
1093
tempname(&n1, nl->type);
879
} else if(nr->type->width != t->width) {
880
regalloc(&n1, t, nr);
884
gins(optoas(OCMP, t), nl, nr);
888
p1 = gbranch(optoas(op, t), T);
890
ginscall(panicslice, 0);
1098
tempname(&tmp, nr->type);
1102
regalloc(&n2, nr->type, N);
1106
if(nl->op != OREGISTER) {
1107
regalloc(&n3, nl->type, N);
1112
if(a == OGE || a == OGT) {
1113
// only < and <= work right with NaN; reverse if needed
1120
gins(foptoas(OCMP, nr->type, 0), nl, nr);
1121
if(nl->op == OREGISTER)
1128
p1 = gbranch(AJNE, T, -likely);
1129
p2 = gbranch(AJPS, T, -likely);
1130
patch(gbranch(AJMP, T, 0), to);
894
p1 = gbranch(optoas(op, t), T);
909
// generate inline code for
914
cgen_inline(Node *n, Node *res)
917
Node n1, n2, nres, ntemp;
921
if(n->op != OCALLFUNC)
923
if(!n->left->addable)
925
if(n->left->sym == S)
927
if(n->left->sym->pkg != runtimepkg)
929
if(strcmp(n->left->sym->name, "slicearray") == 0)
931
if(strcmp(n->left->sym->name, "sliceslice") == 0) {
935
if(strcmp(n->left->sym->name, "sliceslice1") == 0) {
944
if(!fix64(n->list, 5))
946
getargs(n->list, nodes, 5);
948
// if(hb[3] > nel[1]) goto throw
949
cmpandthrow(&nodes[3], &nodes[1]);
951
// if(lb[2] > hb[3]) goto throw
952
cmpandthrow(&nodes[2], &nodes[3]);
954
// len = hb[3] - lb[2] (destroys hb)
956
n2.xoffset += Array_nel;
957
n2.type = types[TUINT32];
959
if(smallintconst(&nodes[3]) && smallintconst(&nodes[2])) {
960
v = mpgetfix(nodes[3].val.u.xval) -
961
mpgetfix(nodes[2].val.u.xval);
962
nodconst(&n1, types[TUINT32], v);
963
gins(optoas(OAS, types[TUINT32]), &n1, &n2);
965
regalloc(&n1, types[TUINT32], &nodes[3]);
966
gmove(&nodes[3], &n1);
967
if(!smallintconst(&nodes[2]) || mpgetfix(nodes[2].val.u.xval) != 0)
968
gins(optoas(OSUB, types[TUINT32]), &nodes[2], &n1);
969
gins(optoas(OAS, types[TUINT32]), &n1, &n2);
973
// cap = nel[1] - lb[2] (destroys nel)
975
n2.xoffset += Array_cap;
976
n2.type = types[TUINT32];
978
if(smallintconst(&nodes[1]) && smallintconst(&nodes[2])) {
979
v = mpgetfix(nodes[1].val.u.xval) -
980
mpgetfix(nodes[2].val.u.xval);
981
nodconst(&n1, types[TUINT32], v);
982
gins(optoas(OAS, types[TUINT32]), &n1, &n2);
984
regalloc(&n1, types[TUINT32], &nodes[1]);
985
gmove(&nodes[1], &n1);
986
if(!smallintconst(&nodes[2]) || mpgetfix(nodes[2].val.u.xval) != 0)
987
gins(optoas(OSUB, types[TUINT32]), &nodes[2], &n1);
988
gins(optoas(OAS, types[TUINT32]), &n1, &n2);
992
// if slice could be too big, dereference to
993
// catch nil array pointer.
994
if(nodes[0].op == OREGISTER && nodes[0].type->type->width >= unmappedzero) {
998
n2.type = types[TUINT8];
999
gins(ATESTB, nodintconst(0), &n2);
1002
// ary = old[0] + (lb[2] * width[4]) (destroys old)
1004
n2.xoffset += Array_array;
1005
n2.type = types[tptr];
1007
if(smallintconst(&nodes[2]) && smallintconst(&nodes[4])) {
1008
v = mpgetfix(nodes[2].val.u.xval) *
1009
mpgetfix(nodes[4].val.u.xval);
1011
nodconst(&n1, types[tptr], v);
1012
gins(optoas(OADD, types[tptr]), &n1, &nodes[0]);
1015
regalloc(&n1, types[tptr], &nodes[2]);
1016
gmove(&nodes[2], &n1);
1017
if(!smallintconst(&nodes[4]) || mpgetfix(nodes[4].val.u.xval) != 1)
1018
gins(optoas(OMUL, types[tptr]), &nodes[4], &n1);
1019
gins(optoas(OADD, types[tptr]), &n1, &nodes[0]);
1022
gins(optoas(OAS, types[tptr]), &nodes[0], &n2);
1024
for(i=0; i<5; i++) {
1025
if(nodes[i].op == OREGISTER)
1031
if(!fix64(n->list, narg))
1033
nochk = n->etype; // skip bounds checking
1035
if(!sleasy(n->list->n->right)) {
1038
n0 = n->list->n->right;
1039
tempname(&ntemp, res->type);
1041
n->list->n->right = &ntemp;
1042
getargs(n->list, nodes, narg);
1043
n->list->n->right = n0;
1133
} else if(a == ONE) {
1135
patch(gbranch(AJNE, T, likely), to);
1136
patch(gbranch(AJPS, T, likely), to);
1045
getargs(n->list, nodes, narg);
1047
nres = *res; // result
1049
if(ntemp.op == OXXX)
1050
tempname(&ntemp, res->type);
1054
if(narg == 3) { // old[lb:]
1055
// move width to where it would be for old[lb:hb]
1056
nodes[3] = nodes[2];
1059
// if(lb[1] > old.nel[0]) goto throw;
1061
n2.xoffset += Array_nel;
1062
n2.type = types[TUINT32];
1064
cmpandthrow(&nodes[1], &n2);
1066
// ret.nel = old.nel[0]-lb[1];
1068
n2.xoffset += Array_nel;
1069
n2.type = types[TUINT32];
1071
regalloc(&n1, types[TUINT32], N);
1072
gins(optoas(OAS, types[TUINT32]), &n2, &n1);
1073
if(!smallintconst(&nodes[1]) || mpgetfix(nodes[1].val.u.xval) != 0)
1074
gins(optoas(OSUB, types[TUINT32]), &nodes[1], &n1);
1077
n2.xoffset += Array_nel;
1078
n2.type = types[TUINT32];
1079
gins(optoas(OAS, types[TUINT32]), &n1, &n2);
1081
} else { // old[lb:hb]
1083
n2.xoffset += Array_cap;
1084
n2.type = types[TUINT32];
1086
// if(hb[2] > old.cap[0]) goto throw;
1087
cmpandthrow(&nodes[2], &n2);
1088
// if(lb[1] > hb[2]) goto throw;
1089
cmpandthrow(&nodes[1], &nodes[2]);
1092
// ret.len = hb[2]-lb[1]; (destroys hb[2])
1094
n2.xoffset += Array_nel;
1095
n2.type = types[TUINT32];
1097
if(smallintconst(&nodes[2]) && smallintconst(&nodes[1])) {
1098
v = mpgetfix(nodes[2].val.u.xval) -
1099
mpgetfix(nodes[1].val.u.xval);
1100
nodconst(&n1, types[TUINT32], v);
1101
gins(optoas(OAS, types[TUINT32]), &n1, &n2);
1103
regalloc(&n1, types[TUINT32], &nodes[2]);
1104
gmove(&nodes[2], &n1);
1105
if(!smallintconst(&nodes[1]) || mpgetfix(nodes[1].val.u.xval) != 0)
1106
gins(optoas(OSUB, types[TUINT32]), &nodes[1], &n1);
1107
gins(optoas(OAS, types[TUINT32]), &n1, &n2);
1112
// ret.cap = old.cap[0]-lb[1]; (uses hb[2])
1114
n2.xoffset += Array_cap;
1115
n2.type = types[TUINT32];
1117
regalloc(&n1, types[TUINT32], &nodes[2]);
1118
gins(optoas(OAS, types[TUINT32]), &n2, &n1);
1119
if(!smallintconst(&nodes[1]) || mpgetfix(nodes[1].val.u.xval) != 0)
1120
gins(optoas(OSUB, types[TUINT32]), &nodes[1], &n1);
1123
n2.xoffset += Array_cap;
1124
n2.type = types[TUINT32];
1125
gins(optoas(OAS, types[TUINT32]), &n1, &n2);
1128
// ret.array = old.array[0]+lb[1]*width[3]; (uses lb[1])
1130
n2.xoffset += Array_array;
1131
n2.type = types[tptr];
1133
regalloc(&n1, types[tptr], &nodes[1]);
1134
if(smallintconst(&nodes[1]) && smallintconst(&nodes[3])) {
1135
gins(optoas(OAS, types[tptr]), &n2, &n1);
1136
v = mpgetfix(nodes[1].val.u.xval) *
1137
mpgetfix(nodes[3].val.u.xval);
1139
nodconst(&n2, types[tptr], v);
1140
gins(optoas(OADD, types[tptr]), &n2, &n1);
1143
gmove(&nodes[1], &n1);
1144
if(!smallintconst(&nodes[3]) || mpgetfix(nodes[3].val.u.xval) != 1)
1145
gins(optoas(OMUL, types[tptr]), &nodes[3], &n1);
1146
gins(optoas(OADD, types[tptr]), &n2, &n1);
1150
n2.xoffset += Array_array;
1151
n2.type = types[tptr];
1152
gins(optoas(OAS, types[tptr]), &n1, &n2);
1155
for(i=0; i<4; i++) {
1156
if(nodes[i].op == OREGISTER)
1138
patch(gbranch(optoas(a, nr->type), T, likely), to);