58
60
** A better solution is to keep all numbers as FP values and only narrow
59
61
** when it's beneficial to do so. LuaJIT uses predictive narrowing for
60
** induction variables and demand-driven narrowing for index expressions
61
** and bit operations. Additionally it can eliminate or hoists most of the
62
** resulting overflow checks. Regular arithmetic computations are never
63
** narrowed to integers.
62
** induction variables and demand-driven narrowing for index expressions,
63
** integer arguments and bit operations. Additionally it can eliminate or
64
** hoist most of the resulting overflow checks. Regular arithmetic
65
** computations are never narrowed to integers.
65
67
** The integer type in the IR has convenient wrap-around semantics and
66
68
** ignores overflow. Extra operations have been added for
67
69
** overflow-checking arithmetic (ADDOV/SUBOV) instead of an extra type.
68
70
** Apart from reducing overall complexity of the compiler, this also
69
71
** nicely solves the problem where you want to apply algebraic
70
** simplifications to ADD, but not to ADDOV. And the assembler can use lea
71
** instead of an add for integer ADD, but not for ADDOV (lea does not
72
** affect the flags, but it helps to avoid register moves).
74
** Note that all of the above has to be reconsidered if LuaJIT is to be
75
** ported to architectures with slow FP operations or with no hardware FPU
76
** at all. In the latter case an integer-only port may be the best overall
77
** solution (if this still meets user demands).
72
** simplifications to ADD, but not to ADDOV. And the x86/x64 assembler can
73
** use lea instead of an add for integer ADD, but not for ADDOV (lea does
74
** not affect the flags, but it helps to avoid register moves).
77
** All of the above has to be reconsidered for architectures with slow FP
78
** operations or without a hardware FPU. The dual-number mode of LuaJIT
79
** addresses this issue. Arithmetic operations are performed on integers
80
** as far as possible and overflow checks are added as needed.
82
** This implies that narrowing for integer arguments and bit operations
83
** should also strip overflow checks, e.g. replace ADDOV with ADD. The
84
** original overflow guards are weak and can be eliminated by DCE, if
85
** there's no other use.
87
** A slight twist is that it's usually beneficial to use overflow-checked
88
** integer arithmetics if all inputs are already integers. This is the only
89
** change that affects the single-number mode, too.
80
92
/* Some local macros to save typing. Undef'd at the end. */
231
266
/* Check the easy cases first. */
232
267
if (ir->o == IR_CONV && (ir->op2 & IRCONV_SRCMASK) == IRT_INT) {
233
if (nc->t == IRT_I64)
234
*nc->sp++ = NARROWINS(NARROW_SEXT, ir->op1); /* Reduce to sign-ext. */
268
if ((nc->mode & IRCONV_CONVMASK) <= IRCONV_ANY)
269
narrow_stripov_backprop(nc, ir->op1, depth+1);
236
271
*nc->sp++ = NARROWINS(NARROW_REF, ir->op1); /* Undo conversion. */
272
if (nc->t == IRT_I64)
273
*nc->sp++ = NARROWINS(NARROW_SEXT, 0); /* Sign-extend integer. */
238
275
} else if (ir->o == IR_KNUM) { /* Narrow FP constant. */
239
276
lua_Number n = ir_knum(ir)->n;
240
277
if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) {
241
278
/* Allows a wider range of constants. */
242
279
int64_t k64 = (int64_t)n;
243
if (n == cast_num(k64)) { /* Only if constant doesn't lose precision. */
280
if (n == (lua_Number)k64) { /* Only if const doesn't lose precision. */
244
281
*nc->sp++ = NARROWINS(NARROW_INT, 0);
245
282
*nc->sp++ = (NarrowIns)k64; /* But always truncate to 32 bits. */
249
286
int32_t k = lj_num2int(n);
250
if (n == cast_num(k)) { /* Only if constant is really an integer. */
287
if (n == (lua_Number)k) { /* Only if constant is really an integer. */
251
288
*nc->sp++ = NARROWINS(NARROW_INT, 0);
252
289
*nc->sp++ = (NarrowIns)k;
419
/* -- Narrowing of implicit conversions ----------------------------------- */
421
/* Recursively strip overflow checks. */
422
static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef mode)
424
IRRef ref = tref_ref(tr);
427
if (op >= IR_ADDOV && op <= lastop) {
428
BPropEntry *bp = narrow_bpc_get(J, ref, mode);
430
return TREF(bp->val, irt_t(IR(bp->val)->t));
432
IRRef op1 = ir->op1, op2 = ir->op2; /* The IR may be reallocated. */
433
op1 = narrow_stripov(J, op1, lastop, mode);
434
op2 = narrow_stripov(J, op2, lastop, mode);
435
tr = emitir(IRT(op - IR_ADDOV + IR_ADD,
436
((mode & IRCONV_DSTMASK) >> IRCONV_DSH)), op1, op2);
437
narrow_bpc_set(J, ref, tref_ref(tr), mode);
439
} else if (LJ_64 && (mode & IRCONV_SEXT) && !irt_is64(ir->t)) {
440
tr = emitir(IRT(IR_CONV, IRT_INTP), tr, mode);
445
/* Narrow array index. */
446
TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr)
449
lua_assert(tref_isnumber(tr));
450
if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */
451
return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX);
452
/* Omit some overflow checks for array indexing. See comments above. */
453
ir = IR(tref_ref(tr));
454
if ((ir->o == IR_ADDOV || ir->o == IR_SUBOV) && irref_isk(ir->op2) &&
455
(uint32_t)IR(ir->op2)->i + 0x40000000u < 0x80000000u)
456
return emitir(IRTI(ir->o - IR_ADDOV + IR_ADD), ir->op1, ir->op2);
460
/* Narrow conversion to integer operand (overflow undefined). */
461
TRef LJ_FASTCALL lj_opt_narrow_toint(jit_State *J, TRef tr)
464
tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
465
if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */
466
return emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY);
467
if (!tref_isinteger(tr))
468
lj_trace_err(J, LJ_TRERR_BADTYPE);
470
** Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV.
471
** Use IRCONV_TOBIT for the cache entries, since the semantics are the same.
473
return narrow_stripov(J, tr, IR_MULOV, (IRT_INT<<5)|IRT_INT|IRCONV_TOBIT);
476
/* Narrow conversion to bitop operand (overflow wrapped). */
477
TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr)
480
tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);
481
if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */
482
return emitir(IRTI(IR_TOBIT), tr, lj_ir_knum_tobit(J));
483
if (!tref_isinteger(tr))
484
lj_trace_err(J, LJ_TRERR_BADTYPE);
486
** Wrapped overflow semantics allow stripping of ADDOV and SUBOV.
487
** MULOV cannot be stripped due to precision widening.
489
return narrow_stripov(J, tr, IR_SUBOV, (IRT_INT<<5)|IRT_INT|IRCONV_TOBIT);
493
/* Narrow C array index (overflow undefined). */
494
TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)
496
lua_assert(tref_isnumber(tr));
498
return emitir(IRTI(IR_CONV), tr,
499
(IRT_INTP<<5)|IRT_NUM|IRCONV_TRUNC|IRCONV_ANY);
500
/* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */
501
return narrow_stripov(J, tr, IR_MULOV,
502
LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) :
503
((IRT_INTP<<5)|IRT_INT|IRCONV_TOBIT));
380
507
/* -- Narrowing of arithmetic operators ----------------------------------- */
382
509
/* Check whether a number fits into an int32_t (-0 is ok, too). */
383
510
static int numisint(lua_Number n)
385
return (n == cast_num(lj_num2int(n)));
512
return (n == (lua_Number)lj_num2int(n));
515
/* Narrowing of arithmetic operations. */
516
TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,
517
TValue *vb, TValue *vc, IROp op)
519
if (tref_isstr(rb)) {
520
rb = emitir(IRTG(IR_STRTO, IRT_NUM), rb, 0);
521
lj_str_tonum(strV(vb), vb);
523
if (tref_isstr(rc)) {
524
rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
525
lj_str_tonum(strV(vc), vc);
527
/* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */
528
if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) &&
529
tref_isinteger(rb) && tref_isinteger(rc) &&
530
numisint(lj_vm_foldarith(numberVnum(vb), numberVnum(vc),
531
(int)op - (int)IR_ADD)))
532
return emitir(IRTGI((int)op - (int)IR_ADD + (int)IR_ADDOV), rb, rc);
533
if (!tref_isnum(rb)) rb = emitir(IRTN(IR_CONV), rb, IRCONV_NUM_INT);
534
if (!tref_isnum(rc)) rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);
535
return emitir(IRTN(op), rb, rc);
538
/* Narrowing of unary minus operator. */
539
TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc)
541
if (tref_isstr(rc)) {
542
rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
543
lj_str_tonum(strV(vc), vc);
545
if (tref_isinteger(rc)) {
546
if ((uint32_t)numberVint(vc) != 0x80000000u)
547
return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc);
548
rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);
550
return emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J));
388
553
/* Narrowing of modulo operator. */
409
574
/* Narrowing of power operator or math.pow. */
410
575
TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc)
413
577
if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc))
414
578
lj_trace_err(J, LJ_TRERR_BADTYPE);
416
/* Limit narrowing for pow to small exponents (or for two constants). */
417
if ((tref_isk(rc) && tref_isint(rc) && tref_isk(rb)) ||
418
((J->flags & JIT_F_OPT_NARROW) &&
419
(numisint(n) && n >= -65536.0 && n <= 65536.0))) {
579
/* Narrowing must be unconditional to preserve (-x)^i semantics. */
580
if (tvisint(vc) || numisint(numV(vc))) {
582
/* Split pow is faster for bigger exponents. But do this only for (+k)^i. */
583
if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) {
584
int32_t k = numberVint(vc);
585
if (!(k >= -65536 && k <= 65536)) goto split_pow;
421
588
if (!tref_isinteger(rc)) {
422
589
if (tref_isstr(rc))
423
590
rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);
424
591
/* Guarded conversion to integer! */
425
592
rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK);
427
if (!tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */
428
tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536-2147483647-1));
429
emitir(IRTGI(IR_LE), tmp, lj_ir_kint(J, 2*65536-2147483647-1));
594
if (checkrange && !tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */
595
TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536));
596
emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536));
431
598
return emitir(IRTN(IR_POW), rb, rc);
433
601
/* FOLD covers most cases, but some are easier to do here. */
434
602
if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb)))))
435
603
return rb; /* 1 ^ x ==> 1 */
445
613
/* -- Predictive narrowing of induction variables ------------------------- */
615
/* Narrow a single runtime value. */
616
static int narrow_forl(jit_State *J, cTValue *o)
618
if (tvisint(o)) return 1;
619
if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return numisint(numV(o));
447
623
/* Narrow the FORL index type by looking at the runtime values. */
448
IRType lj_opt_narrow_forl(cTValue *forbase)
624
IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv)
450
lua_assert(tvisnum(&forbase[FORL_IDX]) &&
451
tvisnum(&forbase[FORL_STOP]) &&
452
tvisnum(&forbase[FORL_STEP]));
626
lua_assert(tvisnumber(&tv[FORL_IDX]) &&
627
tvisnumber(&tv[FORL_STOP]) &&
628
tvisnumber(&tv[FORL_STEP]));
453
629
/* Narrow only if the runtime values of start/stop/step are all integers. */
454
if (numisint(numV(&forbase[FORL_IDX])) &&
455
numisint(numV(&forbase[FORL_STOP])) &&
456
numisint(numV(&forbase[FORL_STEP]))) {
630
if (narrow_forl(J, &tv[FORL_IDX]) &&
631
narrow_forl(J, &tv[FORL_STOP]) &&
632
narrow_forl(J, &tv[FORL_STEP])) {
457
633
/* And if the loop index can't possibly overflow. */
458
lua_Number step = numV(&forbase[FORL_STEP]);
459
lua_Number sum = numV(&forbase[FORL_STOP]) + step;
460
if (0 <= step ? sum <= 2147483647.0 : sum >= -2147483648.0)
634
lua_Number step = numberVnum(&tv[FORL_STEP]);
635
lua_Number sum = numberVnum(&tv[FORL_STOP]) + step;
636
if (0 <= step ? (sum <= 2147483647.0) : (sum >= -2147483648.0))