~ubuntu-branches/ubuntu/trusty/luajit/trusty

« back to all changes in this revision

Viewing changes to src/lj_opt_narrow.c

Committer: Bazaar Package Importer
Author(s): Enrico Tassi
Date: 2011-05-09 23:14:21 UTC
mfrom: (1.1.5 upstream)
Revision ID: james.westby@ubuntu.com-20110509231421-zdcnqbqk5h6iryxr

Tags: 2.0.0~beta7+dfsg-1

New upstream release with arm support

files added:
dynasm/dasm_arm.h

dynasm/dasm_arm.lua

src/buildvm_arm.dasc

src/buildvm_arm.h

files modified:
Makefile

README

debian/changelog

debian/control

dynasm/dasm_ppc.lua

dynasm/dasm_proto.h

dynasm/dasm_x86.lua

dynasm/dynasm.lua

etc/luajit.pc

src/Makefile

src/Makefile.dep

src/buildvm.c

src/buildvm.h

src/buildvm_asm.c

src/buildvm_fold.c

src/buildvm_ppc.dasc

src/buildvm_ppcspe.h

src/buildvm_x64.h

src/buildvm_x64win.h

src/buildvm_x86.dasc

src/buildvm_x86.h

src/lib_base.c

src/lib_bit.c

src/lib_ffi.c

src/lib_io.c

src/lib_jit.c

src/lib_math.c

src/lib_package.c

src/lib_string.c

src/lib_table.c

src/lj_api.c

src/lj_arch.h

src/lj_asm.c

src/lj_bc.h

src/lj_carith.c

src/lj_ccall.c

src/lj_ccall.h

src/lj_cconv.c

src/lj_cconv.h

src/lj_cdata.c

src/lj_cdata.h

src/lj_clib.c

src/lj_cparse.c

src/lj_crecord.c

src/lj_crecord.h

src/lj_ctype.c

src/lj_ctype.h

src/lj_def.h

src/lj_dispatch.c

src/lj_err.c

src/lj_ffrecord.c

src/lj_frame.h

src/lj_func.c

src/lj_gc.c

src/lj_gc.h

src/lj_ir.c

src/lj_ir.h

src/lj_iropt.h

src/lj_jit.h

src/lj_lex.c

src/lj_lib.c

src/lj_lib.h

src/lj_meta.c

src/lj_meta.h

src/lj_obj.c

src/lj_obj.h

src/lj_opt_fold.c

src/lj_opt_loop.c

src/lj_opt_mem.c

src/lj_opt_narrow.c

src/lj_opt_split.c

src/lj_parse.c

src/lj_record.c

src/lj_snap.c

src/lj_snap.h

src/lj_state.c

src/lj_str.c

src/lj_str.h

src/lj_tab.c

src/lj_target_x86.h

src/lj_trace.c

src/lj_vm.h

src/luaconf.h

src/luajit.h

Show diffs side-by-side

added added

removed removed

src/lj_opt_narrow.c

** NARROW: Narrowing of numbers to integers (double to int32_t).

** STRIPOV: Stripping of overflow checks.

#include "lj_jit.h"

#include "lj_iropt.h"

#include "lj_trace.h"

#include "lj_vm.h"

/* Rationale for narrowing optimizations:

** A better solution is to keep all numbers as FP values and only narrow

** when it's beneficial to do so. LuaJIT uses predictive narrowing for

** induction variables and demand-driven narrowing for index expressions

** and bit operations. Additionally it can eliminate or hoists most of the

** resulting overflow checks. Regular arithmetic computations are never

** narrowed to integers.

** induction variables and demand-driven narrowing for index expressions,

** integer arguments and bit operations. Additionally it can eliminate or

** hoist most of the resulting overflow checks. Regular arithmetic

** computations are never narrowed to integers.

** The integer type in the IR has convenient wrap-around semantics and

** ignores overflow. Extra operations have been added for

** overflow-checking arithmetic (ADDOV/SUBOV) instead of an extra type.

** Apart from reducing overall complexity of the compiler, this also

** nicely solves the problem where you want to apply algebraic

** simplifications to ADD, but not to ADDOV. And the assembler can use lea

** instead of an add for integer ADD, but not for ADDOV (lea does not

** affect the flags, but it helps to avoid register moves).

** Note that all of the above has to be reconsidered if LuaJIT is to be

** ported to architectures with slow FP operations or with no hardware FPU

** at all. In the latter case an integer-only port may be the best overall

** solution (if this still meets user demands).

** simplifications to ADD, but not to ADDOV. And the x86/x64 assembler can

** use lea instead of an add for integer ADD, but not for ADDOV (lea does

** not affect the flags, but it helps to avoid register moves).

** All of the above has to be reconsidered for architectures with slow FP

** operations or without a hardware FPU. The dual-number mode of LuaJIT

** addresses this issue. Arithmetic operations are performed on integers

** as far as possible and overflow checks are added as needed.

** This implies that narrowing for integer arguments and bit operations

** should also strip overflow checks, e.g. replace ADDOV with ADD. The

** original overflow guards are weak and can be eliminated by DCE, if

** there's no other use.

** A slight twist is that it's usually beneficial to use overflow-checked

** integer arithmetics if all inputs are already integers. This is the only

** change that affects the single-number mode, too.

/* Some local macros to save typing. Undef'd at the end. */

106

** already takes care of eliminating simple redundant conversions like

107

** CONV.int.num(CONV.num.int(x)) ==> x.

108

** But the surrounding code is FP-heavy and all arithmetic operations are

** performed on FP numbers. Consider a common example such as 'x=t[i+1]',

** with 'i' already an integer (due to induction variable narrowing). The

100

** index expression would be recorded as

109

** But the surrounding code is FP-heavy and arithmetic operations are

110

** performed on FP numbers (for the single-number mode). Consider a common

111

** example such as 'x=t[i+1]', with 'i' already an integer (due to induction

112

** variable narrowing). The index expression would be recorded as

101

113

** CONV.int.num(ADD(CONV.num.int(i), 1))

102

114

** which is clearly suboptimal.

103

115

113

125

** FP ops remain in the IR and are eliminated by DCE since all references to

114

126

** them are gone.

115

127

128

** [In dual-number mode the trace recorder already emits ADDOV etc., but

129

** this can be further reduced. See below.]

130

116

131

** Special care has to be taken to avoid narrowing across an operation

117

132

** which is potentially operating on non-integral operands. One obvious

118

133

** case is when an expression contains a non-integral constant, but ends

221

236

bp->mode = mode;

222

237

}

223

238

239

/* Backpropagate overflow stripping. */

240

static void narrow_stripov_backprop(NarrowConv *nc, IRRef ref, int depth)

241

{

242

jit_State *J = nc->J;

243

IRIns *ir = IR(ref);

244

if (ir->o == IR_ADDOV || ir->o == IR_SUBOV ||

245

(ir->o == IR_MULOV && (nc->mode & IRCONV_CONVMASK) == IRCONV_ANY)) {

246

BPropEntry *bp = narrow_bpc_get(nc->J, ref, IRCONV_TOBIT);

247

if (bp) {

248

ref = bp->val;

249

} else if (++depth < NARROW_MAX_BACKPROP && nc->sp < nc->maxsp) {

250

narrow_stripov_backprop(nc, ir->op1, depth);

251

narrow_stripov_backprop(nc, ir->op2, depth);

252

*nc->sp++ = NARROWINS(IRT(ir->o - IR_ADDOV + IR_ADD, IRT_INT), ref);

253

return;

254

}

255

}

256

*nc->sp++ = NARROWINS(NARROW_REF, ref);

257

}

258

224

259

/* Backpropagate narrowing conversion. Return number of needed conversions. */

225

260

static int narrow_conv_backprop(NarrowConv *nc, IRRef ref, int depth)

226

261

{

230

265

231

266

/* Check the easy cases first. */

232

267

if (ir->o == IR_CONV && (ir->op2 & IRCONV_SRCMASK) == IRT_INT) {

233

if (nc->t == IRT_I64)

234

*nc->sp++ = NARROWINS(NARROW_SEXT, ir->op1); /* Reduce to sign-ext. */

268

if ((nc->mode & IRCONV_CONVMASK) <= IRCONV_ANY)

269

narrow_stripov_backprop(nc, ir->op1, depth+1);

235

270

else

236

271

*nc->sp++ = NARROWINS(NARROW_REF, ir->op1); /* Undo conversion. */

272

if (nc->t == IRT_I64)

273

*nc->sp++ = NARROWINS(NARROW_SEXT, 0); /* Sign-extend integer. */

237

274

return 0;

238

275

} else if (ir->o == IR_KNUM) { /* Narrow FP constant. */

239

276

lua_Number n = ir_knum(ir)->n;

240

277

if ((nc->mode & IRCONV_CONVMASK) == IRCONV_TOBIT) {

241

278

/* Allows a wider range of constants. */

242

279

int64_t k64 = (int64_t)n;

243

if (n == cast_num(k64)) { /* Only if constant doesn't lose precision. */

280

if (n == (lua_Number)k64) { /* Only if const doesn't lose precision. */

244

281

*nc->sp++ = NARROWINS(NARROW_INT, 0);

245

282

*nc->sp++ = (NarrowIns)k64; /* But always truncate to 32 bits. */

246

283

return 0;

247

284

}

248

285

} else {

249

286

int32_t k = lj_num2int(n);

250

if (n == cast_num(k)) { /* Only if constant is really an integer. */

287

if (n == (lua_Number)k) { /* Only if constant is really an integer. */

251

288

*nc->sp++ = NARROWINS(NARROW_INT, 0);

252

289

*nc->sp++ = (NarrowIns)k;

253

290

return 0;

287

324

mode = (IRT_INT<<5)|IRT_NUM|IRCONV_INDEX;

288

325

bp = narrow_bpc_get(nc->J, (IRRef1)ref, mode);

289

326

if (bp) {

290

*nc->sp++ = NARROWINS(NARROW_SEXT, bp->val);

327

*nc->sp++ = NARROWINS(NARROW_REF, bp->val);

328

*nc->sp++ = NARROWINS(NARROW_SEXT, 0);

291

329

return 0;

292

330

}

293

331

}

326

364

} else if (op == NARROW_CONV) {

327

365

*sp++ = emitir_raw(convot, ref, convop2); /* Raw emit avoids a loop. */

328

366

} else if (op == NARROW_SEXT) {

329

*sp++ = emitir(IRT(IR_CONV, IRT_I64), ref,

330

(IRT_I64<<5)|IRT_INT|IRCONV_SEXT);

367

lua_assert(sp >= nc->stack+1);

368

sp[-1] = emitir(IRT(IR_CONV, IRT_I64), sp[-1],

369

(IRT_I64<<5)|IRT_INT|IRCONV_SEXT);

331

370

} else if (op == NARROW_INT) {

332

371

lua_assert(next < last);

333

372

*sp++ = nc->t == IRT_I64 ?

340

379

/* Omit some overflow checks for array indexing. See comments above. */

341

380

if ((mode & IRCONV_CONVMASK) == IRCONV_INDEX) {

342

381

if (next == last && irref_isk(narrow_ref(sp[0])) &&

343

(uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000 < 0x80000000)

382

(uint32_t)IR(narrow_ref(sp[0]))->i + 0x40000000u < 0x80000000u)

344

383

guardot = 0;

345

384

else /* Otherwise cache a stronger check. */

346

385

mode += IRCONV_CHECK-IRCONV_INDEX;

377

416

return NEXTFOLD;

378

417

}

379

418

419

/* -- Narrowing of implicit conversions ----------------------------------- */

420

421

/* Recursively strip overflow checks. */

422

static TRef narrow_stripov(jit_State *J, TRef tr, int lastop, IRRef mode)

423

{

424

IRRef ref = tref_ref(tr);

425

IRIns *ir = IR(ref);

426

int op = ir->o;

427

if (op >= IR_ADDOV && op <= lastop) {

428

BPropEntry *bp = narrow_bpc_get(J, ref, mode);

429

if (bp) {

430

return TREF(bp->val, irt_t(IR(bp->val)->t));

431

} else {

432

IRRef op1 = ir->op1, op2 = ir->op2; /* The IR may be reallocated. */

433

op1 = narrow_stripov(J, op1, lastop, mode);

434

op2 = narrow_stripov(J, op2, lastop, mode);

435

tr = emitir(IRT(op - IR_ADDOV + IR_ADD,

436

((mode & IRCONV_DSTMASK) >> IRCONV_DSH)), op1, op2);

437

narrow_bpc_set(J, ref, tref_ref(tr), mode);

438

}

439

} else if (LJ_64 && (mode & IRCONV_SEXT) && !irt_is64(ir->t)) {

440

tr = emitir(IRT(IR_CONV, IRT_INTP), tr, mode);

441

}

442

return tr;

443

}

444

445

/* Narrow array index. */

446

TRef LJ_FASTCALL lj_opt_narrow_index(jit_State *J, TRef tr)

447

{

448

IRIns *ir;

449

lua_assert(tref_isnumber(tr));

450

if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */

451

return emitir(IRTGI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_INDEX);

452

/* Omit some overflow checks for array indexing. See comments above. */

453

ir = IR(tref_ref(tr));

454

if ((ir->o == IR_ADDOV || ir->o == IR_SUBOV) && irref_isk(ir->op2) &&

455

(uint32_t)IR(ir->op2)->i + 0x40000000u < 0x80000000u)

456

return emitir(IRTI(ir->o - IR_ADDOV + IR_ADD), ir->op1, ir->op2);

457

return tr;

458

}

459

460

/* Narrow conversion to integer operand (overflow undefined). */

461

TRef LJ_FASTCALL lj_opt_narrow_toint(jit_State *J, TRef tr)

462

{

463

if (tref_isstr(tr))

464

tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);

465

if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */

466

return emitir(IRTI(IR_CONV), tr, IRCONV_INT_NUM|IRCONV_ANY);

467

if (!tref_isinteger(tr))

468

lj_trace_err(J, LJ_TRERR_BADTYPE);

469

470

** Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV.

471

** Use IRCONV_TOBIT for the cache entries, since the semantics are the same.

472

473

return narrow_stripov(J, tr, IR_MULOV, (IRT_INT<<5)|IRT_INT|IRCONV_TOBIT);

474

}

475

476

/* Narrow conversion to bitop operand (overflow wrapped). */

477

TRef LJ_FASTCALL lj_opt_narrow_tobit(jit_State *J, TRef tr)

478

{

479

if (tref_isstr(tr))

480

tr = emitir(IRTG(IR_STRTO, IRT_NUM), tr, 0);

481

if (tref_isnum(tr)) /* Conversion may be narrowed, too. See above. */

482

return emitir(IRTI(IR_TOBIT), tr, lj_ir_knum_tobit(J));

483

if (!tref_isinteger(tr))

484

lj_trace_err(J, LJ_TRERR_BADTYPE);

485

486

** Wrapped overflow semantics allow stripping of ADDOV and SUBOV.

487

** MULOV cannot be stripped due to precision widening.

488

489

return narrow_stripov(J, tr, IR_SUBOV, (IRT_INT<<5)|IRT_INT|IRCONV_TOBIT);

490

}

491

492

#if LJ_HASFFI

493

/* Narrow C array index (overflow undefined). */

494

TRef LJ_FASTCALL lj_opt_narrow_cindex(jit_State *J, TRef tr)

495

{

496

lua_assert(tref_isnumber(tr));

497

if (tref_isnum(tr))

498

return emitir(IRTI(IR_CONV), tr,

499

(IRT_INTP<<5)|IRT_NUM|IRCONV_TRUNC|IRCONV_ANY);

500

/* Undefined overflow semantics allow stripping of ADDOV, SUBOV and MULOV. */

501

return narrow_stripov(J, tr, IR_MULOV,

502

LJ_64 ? ((IRT_INTP<<5)|IRT_INT|IRCONV_SEXT) :

503

((IRT_INTP<<5)|IRT_INT|IRCONV_TOBIT));

504

}

505

#endif

506

380

507

/* -- Narrowing of arithmetic operators ----------------------------------- */

381

508

382

509

/* Check whether a number fits into an int32_t (-0 is ok, too). */

383

510

static int numisint(lua_Number n)

384

511

{

385

return (n == cast_num(lj_num2int(n)));

512

return (n == (lua_Number)lj_num2int(n));

513

}

514

515

/* Narrowing of arithmetic operations. */

516

TRef lj_opt_narrow_arith(jit_State *J, TRef rb, TRef rc,

517

TValue *vb, TValue *vc, IROp op)

518

{

519

if (tref_isstr(rb)) {

520

rb = emitir(IRTG(IR_STRTO, IRT_NUM), rb, 0);

521

lj_str_tonum(strV(vb), vb);

522

}

523

if (tref_isstr(rc)) {

524

rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);

525

lj_str_tonum(strV(vc), vc);

526

}

527

/* Must not narrow MUL in non-DUALNUM variant, because it loses -0. */

528

if ((op >= IR_ADD && op <= (LJ_DUALNUM ? IR_MUL : IR_SUB)) &&

529

tref_isinteger(rb) && tref_isinteger(rc) &&

530

numisint(lj_vm_foldarith(numberVnum(vb), numberVnum(vc),

531

(int)op - (int)IR_ADD)))

532

return emitir(IRTGI((int)op - (int)IR_ADD + (int)IR_ADDOV), rb, rc);

533

if (!tref_isnum(rb)) rb = emitir(IRTN(IR_CONV), rb, IRCONV_NUM_INT);

534

if (!tref_isnum(rc)) rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);

535

return emitir(IRTN(op), rb, rc);

536

}

537

538

/* Narrowing of unary minus operator. */

539

TRef lj_opt_narrow_unm(jit_State *J, TRef rc, TValue *vc)

540

{

541

if (tref_isstr(rc)) {

542

rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);

543

lj_str_tonum(strV(vc), vc);

544

}

545

if (tref_isinteger(rc)) {

546

if ((uint32_t)numberVint(vc) != 0x80000000u)

547

return emitir(IRTGI(IR_SUBOV), lj_ir_kint(J, 0), rc);

548

rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);

549

}

550

return emitir(IRTN(IR_NEG), rc, lj_ir_knum_neg(J));

386

551

}

387

552

388

553

/* Narrowing of modulo operator. */

409

574

/* Narrowing of power operator or math.pow. */

410

575

TRef lj_opt_narrow_pow(jit_State *J, TRef rb, TRef rc, TValue *vc)

411

576

{

412

lua_Number n;

413

577

if (tvisstr(vc) && !lj_str_tonum(strV(vc), vc))

414

578

lj_trace_err(J, LJ_TRERR_BADTYPE);

415

n = numV(vc);

416

/* Limit narrowing for pow to small exponents (or for two constants). */

417

if ((tref_isk(rc) && tref_isint(rc) && tref_isk(rb)) ||

418

((J->flags & JIT_F_OPT_NARROW) &&

419

(numisint(n) && n >= -65536.0 && n <= 65536.0))) {

420

TRef tmp;

579

/* Narrowing must be unconditional to preserve (-x)^i semantics. */

580

if (tvisint(vc) || numisint(numV(vc))) {

581

int checkrange = 0;

582

/* Split pow is faster for bigger exponents. But do this only for (+k)^i. */

583

if (tref_isk(rb) && (int32_t)ir_knum(IR(tref_ref(rb)))->u32.hi >= 0) {

584

int32_t k = numberVint(vc);

585

if (!(k >= -65536 && k <= 65536)) goto split_pow;

586

checkrange = 1;

587

}

421

588

if (!tref_isinteger(rc)) {

422

589

if (tref_isstr(rc))

423

590

rc = emitir(IRTG(IR_STRTO, IRT_NUM), rc, 0);

424

591

/* Guarded conversion to integer! */

425

592

rc = emitir(IRTGI(IR_CONV), rc, IRCONV_INT_NUM|IRCONV_CHECK);

426

593

}

427

if (!tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */

428

tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536-2147483647-1));

429

emitir(IRTGI(IR_LE), tmp, lj_ir_kint(J, 2*65536-2147483647-1));

594

if (checkrange && !tref_isk(rc)) { /* Range guard: -65536 <= i <= 65536 */

595

TRef tmp = emitir(IRTI(IR_ADD), rc, lj_ir_kint(J, 65536));

596

emitir(IRTGI(IR_ULE), tmp, lj_ir_kint(J, 2*65536));

430

597

}

431

598

return emitir(IRTN(IR_POW), rb, rc);

432

599

}

600

split_pow:

433

601

/* FOLD covers most cases, but some are easier to do here. */

434

602

if (tref_isk(rb) && tvispone(ir_knum(IR(tref_ref(rb)))))

435

603

return rb; /* 1 ^ x ==> 1 */

444

612

445

613

/* -- Predictive narrowing of induction variables ------------------------- */

446

614

615

/* Narrow a single runtime value. */

616

static int narrow_forl(jit_State *J, cTValue *o)

617

{

618

if (tvisint(o)) return 1;

619

if (LJ_DUALNUM || (J->flags & JIT_F_OPT_NARROW)) return numisint(numV(o));

620

return 0;

621

}

622

447

623

/* Narrow the FORL index type by looking at the runtime values. */

448

IRType lj_opt_narrow_forl(cTValue *forbase)

624

IRType lj_opt_narrow_forl(jit_State *J, cTValue *tv)

449

625

{

450

lua_assert(tvisnum(&forbase[FORL_IDX]) &&

451

tvisnum(&forbase[FORL_STOP]) &&

452

tvisnum(&forbase[FORL_STEP]));

626

lua_assert(tvisnumber(&tv[FORL_IDX]) &&

627

tvisnumber(&tv[FORL_STOP]) &&

628

tvisnumber(&tv[FORL_STEP]));

453

629

/* Narrow only if the runtime values of start/stop/step are all integers. */

454

if (numisint(numV(&forbase[FORL_IDX])) &&

455

numisint(numV(&forbase[FORL_STOP])) &&

456

numisint(numV(&forbase[FORL_STEP]))) {

630

if (narrow_forl(J, &tv[FORL_IDX]) &&

631

narrow_forl(J, &tv[FORL_STOP]) &&

632

narrow_forl(J, &tv[FORL_STEP])) {

457

633

/* And if the loop index can't possibly overflow. */

458

lua_Number step = numV(&forbase[FORL_STEP]);

459

lua_Number sum = numV(&forbase[FORL_STOP]) + step;

460

if (0 <= step ? sum <= 2147483647.0 : sum >= -2147483648.0)

634

lua_Number step = numberVnum(&tv[FORL_STEP]);

635

lua_Number sum = numberVnum(&tv[FORL_STOP]) + step;

636

if (0 <= step ? (sum <= 2147483647.0) : (sum >= -2147483648.0))

461

637

return IRT_INT;

462

638

}

463

639

return IRT_NUM;

Older »