1
/* Peephole optimizations for bytecode compiler. */
5
#include "Python-ast.h"
12
#define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1]))
13
#define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD)
14
#define CONDITIONAL_JUMP(op) (op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
15
|| op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
16
#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP \
17
|| op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
18
|| op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
19
#define JUMPS_ON_TRUE(op) (op==POP_JUMP_IF_TRUE || op==JUMP_IF_TRUE_OR_POP)
20
#define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3))
21
#define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255
22
#define CODESIZE(op) (HAS_ARG(op) ? 3 : 1)
23
#define ISBASICBLOCK(blocks, start, bytes) \
24
(blocks[start]==blocks[start+bytes-1])
27
#define CONST_STACK_CREATE() { \
28
const_stack_size = 256; \
29
const_stack = PyMem_New(PyObject *, const_stack_size); \
30
load_const_stack = PyMem_New(Py_ssize_t, const_stack_size); \
31
if (!const_stack || !load_const_stack) { \
37
#define CONST_STACK_DELETE() do { \
39
PyMem_Free(const_stack); \
40
if (load_const_stack) \
41
PyMem_Free(load_const_stack); \
44
#define CONST_STACK_LEN() (const_stack_top + 1)
46
#define CONST_STACK_PUSH_OP(i) do { \
48
assert(codestr[i] == LOAD_CONST); \
49
assert(PyList_GET_SIZE(consts) > GETARG(codestr, i)); \
50
_x = PyList_GET_ITEM(consts, GETARG(codestr, i)); \
51
if (++const_stack_top >= const_stack_size) { \
52
const_stack_size *= 2; \
53
PyMem_Resize(const_stack, PyObject *, const_stack_size); \
54
PyMem_Resize(load_const_stack, Py_ssize_t, const_stack_size); \
55
if (!const_stack || !load_const_stack) { \
60
load_const_stack[const_stack_top] = i; \
61
const_stack[const_stack_top] = _x; \
65
#define CONST_STACK_RESET() do { \
66
const_stack_top = -1; \
69
#define CONST_STACK_TOP() \
70
const_stack[const_stack_top]
72
#define CONST_STACK_LASTN(i) \
73
&const_stack[const_stack_top - i + 1]
75
#define CONST_STACK_POP(i) do { \
76
assert(const_stack_top + 1 >= i); \
77
const_stack_top -= i; \
80
#define CONST_STACK_OP_LASTN(i) \
81
((const_stack_top >= i - 1) ? load_const_stack[const_stack_top - i + 1] : -1)
84
/* Replace LOAD_CONST c1. LOAD_CONST c2 ... LOAD_CONST cn BUILD_TUPLE n
85
with LOAD_CONST (c1, c2, ... cn).
86
The consts table must still be in list form so that the
87
new constant (c1, c2, ... cn) can be appended.
88
Called with codestr pointing to the first LOAD_CONST.
89
Bails out with no change if one or more of the LOAD_CONSTs is missing.
90
Also works for BUILD_LIST and BUILT_SET when followed by an "in" or "not in"
91
test; for BUILD_SET it assembles a frozenset rather than a tuple.
94
tuple_of_constants(unsigned char *codestr, Py_ssize_t n,
95
PyObject *consts, PyObject **objs)
97
PyObject *newconst, *constant;
98
Py_ssize_t i, len_consts;
101
assert(PyList_CheckExact(consts));
103
/* Buildup new tuple of constants */
104
newconst = PyTuple_New(n);
105
if (newconst == NULL)
107
len_consts = PyList_GET_SIZE(consts);
108
for (i=0 ; i<n ; i++) {
111
PyTuple_SET_ITEM(newconst, i, constant);
114
/* If it's a BUILD_SET, use the PyTuple we just built to create a
115
PyFrozenSet, and use that as the constant instead: */
116
if (codestr[0] == BUILD_SET) {
117
PyObject *tuple = newconst;
118
newconst = PyFrozenSet_New(tuple);
120
if (newconst == NULL)
124
/* Append folded constant onto consts */
125
if (PyList_Append(consts, newconst)) {
131
/* Write NOPs over old LOAD_CONSTS and
132
add a new LOAD_CONST newconst on top of the BUILD_TUPLE n */
133
codestr[0] = LOAD_CONST;
134
SETARG(codestr, 0, len_consts);
138
/* Replace LOAD_CONST c1. LOAD_CONST c2 BINOP
139
with LOAD_CONST binop(c1,c2)
140
The consts table must still be in list form so that the
141
new constant can be appended.
142
Called with codestr pointing to the BINOP.
143
Abandons the transformation if the folding fails (i.e. 1+'a').
144
If the new constant is a sequence, only folds when the size
145
is below a threshold value. That keeps pyc files from
146
becoming large in the presence of code like: (None,)*1000.
149
fold_binops_on_constants(unsigned char *codestr, PyObject *consts, PyObject **objs)
151
PyObject *newconst, *v, *w;
152
Py_ssize_t len_consts, size;
156
assert(PyList_CheckExact(consts));
158
/* Create new constant */
164
newconst = PyNumber_Power(v, w, Py_None);
166
case BINARY_MULTIPLY:
167
newconst = PyNumber_Multiply(v, w);
169
case BINARY_TRUE_DIVIDE:
170
newconst = PyNumber_TrueDivide(v, w);
172
case BINARY_FLOOR_DIVIDE:
173
newconst = PyNumber_FloorDivide(v, w);
176
newconst = PyNumber_Remainder(v, w);
179
newconst = PyNumber_Add(v, w);
181
case BINARY_SUBTRACT:
182
newconst = PyNumber_Subtract(v, w);
185
newconst = PyObject_GetItem(v, w);
188
newconst = PyNumber_Lshift(v, w);
191
newconst = PyNumber_Rshift(v, w);
194
newconst = PyNumber_And(v, w);
197
newconst = PyNumber_Xor(v, w);
200
newconst = PyNumber_Or(v, w);
203
/* Called with an unknown opcode */
204
PyErr_Format(PyExc_SystemError,
205
"unexpected binary operation %d on a constant",
209
if (newconst == NULL) {
210
if(!PyErr_ExceptionMatches(PyExc_KeyboardInterrupt))
214
size = PyObject_Size(newconst);
216
if (PyErr_ExceptionMatches(PyExc_KeyboardInterrupt))
219
} else if (size > 20) {
224
/* Append folded constant into consts table */
225
len_consts = PyList_GET_SIZE(consts);
226
if (PyList_Append(consts, newconst)) {
232
/* Write NOP NOP NOP NOP LOAD_CONST newconst */
233
codestr[-2] = LOAD_CONST;
234
SETARG(codestr, -2, len_consts);
239
fold_unaryops_on_constants(unsigned char *codestr, PyObject *consts, PyObject *v)
242
Py_ssize_t len_consts;
246
assert(PyList_CheckExact(consts));
247
assert(codestr[0] == LOAD_CONST);
249
/* Create new constant */
253
newconst = PyNumber_Negative(v);
256
newconst = PyNumber_Invert(v);
259
newconst = PyNumber_Positive(v);
262
/* Called with an unknown opcode */
263
PyErr_Format(PyExc_SystemError,
264
"unexpected unary operation %d on a constant",
268
if (newconst == NULL) {
269
if(!PyErr_ExceptionMatches(PyExc_KeyboardInterrupt))
274
/* Append folded constant into consts table */
275
len_consts = PyList_GET_SIZE(consts);
276
if (PyList_Append(consts, newconst)) {
283
/* Write NOP LOAD_CONST newconst */
285
codestr[1] = LOAD_CONST;
286
SETARG(codestr, 1, len_consts);
290
static unsigned int *
291
markblocks(unsigned char *code, Py_ssize_t len)
293
unsigned int *blocks = (unsigned int *)PyMem_Malloc(len*sizeof(int));
294
int i,j, opcode, blockcnt = 0;
296
if (blocks == NULL) {
300
memset(blocks, 0, len*sizeof(int));
302
/* Mark labels in the first pass */
303
for (i=0 ; i<len ; i+=CODESIZE(opcode)) {
308
case JUMP_IF_FALSE_OR_POP:
309
case JUMP_IF_TRUE_OR_POP:
310
case POP_JUMP_IF_FALSE:
311
case POP_JUMP_IF_TRUE:
318
j = GETJUMPTGT(code, i);
323
/* Build block numbers in the second pass */
324
for (i=0 ; i<len ; i++) {
325
blockcnt += blocks[i]; /* increment blockcnt over labels */
326
blocks[i] = blockcnt;
331
/* Perform basic peephole optimizations to components of a code object.
332
The consts object should still be in list form to allow new constants
335
To keep the optimizer simple, it bails out (does nothing) for code that
336
has a length over 32,700, and does not calculate extended arguments.
337
That allows us to avoid overflow and sign issues. Likewise, it bails when
338
the lineno table has complex encoding for gaps >= 255. EXTENDED_ARG can
339
appear before MAKE_FUNCTION; in this case both opcodes are skipped.
340
EXTENDED_ARG preceding any other opcode causes the optimizer to bail.
342
Optimizations are restricted to simple transformations occuring within a
343
single basic block. All transformations keep the code size the same or
344
smaller. For those that reduce size, the gaps are initially filled with
345
NOPs. Later those NOPs are removed and the jump addresses retargeted in
346
a single pass. Line numbering is adjusted accordingly. */
349
PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
350
PyObject *lineno_obj)
352
Py_ssize_t i, j, codelen;
354
int tgt, tgttgt, opcode;
355
unsigned char *codestr = NULL;
356
unsigned char *lineno;
358
int new_line, cum_orig_line, last_line, tabsiz;
359
PyObject **const_stack = NULL;
360
Py_ssize_t *load_const_stack = NULL;
361
Py_ssize_t const_stack_top = -1;
362
Py_ssize_t const_stack_size = 0;
363
int in_consts = 0; /* whether we are in a LOAD_CONST sequence */
364
unsigned int *blocks = NULL;
366
/* Bail out if an exception is set */
367
if (PyErr_Occurred())
370
/* Bypass optimization when the lineno table is too complex */
371
assert(PyBytes_Check(lineno_obj));
372
lineno = (unsigned char*)PyBytes_AS_STRING(lineno_obj);
373
tabsiz = PyBytes_GET_SIZE(lineno_obj);
374
if (memchr(lineno, 255, tabsiz) != NULL)
377
/* Avoid situations where jump retargeting could overflow */
378
assert(PyBytes_Check(code));
379
codelen = PyBytes_GET_SIZE(code);
383
/* Make a modifiable copy of the code string */
384
codestr = (unsigned char *)PyMem_Malloc(codelen);
385
if (codestr == NULL) {
389
codestr = (unsigned char *)memcpy(codestr,
390
PyBytes_AS_STRING(code), codelen);
392
/* Verify that RETURN_VALUE terminates the codestring. This allows
393
the various transformation patterns to look ahead several
394
instructions without additional checks to make sure they are not
395
looking beyond the end of the code string.
397
if (codestr[codelen-1] != RETURN_VALUE)
400
/* Mapping to new jump targets after NOPs are removed */
401
addrmap = (int *)PyMem_Malloc(codelen * sizeof(int));
402
if (addrmap == NULL) {
407
blocks = markblocks(codestr, codelen);
410
assert(PyList_Check(consts));
412
CONST_STACK_CREATE();
414
for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
424
/* Replace UNARY_NOT POP_JUMP_IF_FALSE
425
with POP_JUMP_IF_TRUE */
427
if (codestr[i+1] != POP_JUMP_IF_FALSE
428
|| !ISBASICBLOCK(blocks,i,4))
430
j = GETARG(codestr, i+1);
431
codestr[i] = POP_JUMP_IF_TRUE;
432
SETARG(codestr, i, j);
434
goto reoptimize_current;
436
/* not a is b --> a is not b
437
not a in b --> a not in b
438
not a is not b --> a is b
439
not a not in b --> a in b
442
j = GETARG(codestr, i);
443
if (j < 6 || j > 9 ||
444
codestr[i+3] != UNARY_NOT ||
445
!ISBASICBLOCK(blocks,i,4))
447
SETARG(codestr, i, (j^1));
451
/* Skip over LOAD_CONST trueconst
452
POP_JUMP_IF_FALSE xx. This improves
453
"while 1" performance. */
455
CONST_STACK_PUSH_OP(i);
456
j = GETARG(codestr, i);
457
if (codestr[i+3] != POP_JUMP_IF_FALSE ||
458
!ISBASICBLOCK(blocks,i,6) ||
459
!PyObject_IsTrue(PyList_GET_ITEM(consts, j)))
461
memset(codestr+i, NOP, 6);
465
/* Try to fold tuples of constants (includes a case for lists and sets
466
which are only used for "in" and "not in" tests).
467
Skip over BUILD_SEQN 1 UNPACK_SEQN 1.
468
Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2.
469
Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2. */
473
j = GETARG(codestr, i);
476
h = CONST_STACK_OP_LASTN(j);
477
assert((h >= 0 || CONST_STACK_LEN() < j));
478
if (h >= 0 && j > 0 && j <= CONST_STACK_LEN() &&
479
((opcode == BUILD_TUPLE &&
480
ISBASICBLOCK(blocks, h, i-h+3)) ||
481
((opcode == BUILD_LIST || opcode == BUILD_SET) &&
482
codestr[i+3]==COMPARE_OP &&
483
ISBASICBLOCK(blocks, h, i-h+6) &&
484
(GETARG(codestr,i+3)==6 ||
485
GETARG(codestr,i+3)==7))) &&
486
tuple_of_constants(&codestr[i], j, consts, CONST_STACK_LASTN(j))) {
487
assert(codestr[i] == LOAD_CONST);
488
memset(&codestr[h], NOP, i - h);
490
CONST_STACK_PUSH_OP(i);
493
if (codestr[i+3] != UNPACK_SEQUENCE ||
494
!ISBASICBLOCK(blocks,i,6) ||
495
j != GETARG(codestr, i+3) ||
499
memset(codestr+i, NOP, 6);
501
codestr[i] = ROT_TWO;
502
memset(codestr+i+1, NOP, 5);
505
codestr[i] = ROT_THREE;
506
codestr[i+1] = ROT_TWO;
507
memset(codestr+i+2, NOP, 4);
512
/* Fold binary ops on constants.
513
LOAD_CONST c1 LOAD_CONST c2 BINOP --> LOAD_CONST binop(c1,c2) */
515
case BINARY_MULTIPLY:
516
case BINARY_TRUE_DIVIDE:
517
case BINARY_FLOOR_DIVIDE:
520
case BINARY_SUBTRACT:
527
/* NOTE: LOAD_CONST is saved at `i-2` since it has an arg
528
while BINOP hasn't */
529
h = CONST_STACK_OP_LASTN(2);
530
assert((h >= 0 || CONST_STACK_LEN() < 2));
532
ISBASICBLOCK(blocks, h, i-h+1) &&
533
fold_binops_on_constants(&codestr[i], consts, CONST_STACK_LASTN(2))) {
535
memset(&codestr[h], NOP, i - h);
536
assert(codestr[i] == LOAD_CONST);
538
CONST_STACK_PUSH_OP(i);
542
/* Fold unary ops on constants.
543
LOAD_CONST c1 UNARY_OP --> LOAD_CONST unary_op(c) */
547
h = CONST_STACK_OP_LASTN(1);
548
assert((h >= 0 || CONST_STACK_LEN() < 1));
550
ISBASICBLOCK(blocks, h, i-h+1) &&
551
fold_unaryops_on_constants(&codestr[i-3], consts, CONST_STACK_TOP())) {
553
assert(codestr[i] == LOAD_CONST);
555
CONST_STACK_PUSH_OP(i);
559
/* Simplify conditional jump to conditional jump where the
560
result of the first test implies the success of a similar
561
test or the failure of the opposite test.
567
x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_FALSE_OR_POP z
568
--> x:JUMP_IF_FALSE_OR_POP z
569
x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_TRUE_OR_POP z
570
--> x:POP_JUMP_IF_FALSE y+3
571
where y+3 is the instruction following the second test.
573
case JUMP_IF_FALSE_OR_POP:
574
case JUMP_IF_TRUE_OR_POP:
575
tgt = GETJUMPTGT(codestr, i);
577
if (CONDITIONAL_JUMP(j)) {
578
/* NOTE: all possible jumps here are
580
if (JUMPS_ON_TRUE(j) == JUMPS_ON_TRUE(opcode)) {
581
/* The second jump will be
582
taken iff the first is. */
583
tgttgt = GETJUMPTGT(codestr, tgt);
584
/* The current opcode inherits
585
its target's stack behaviour */
587
SETARG(codestr, i, tgttgt);
588
goto reoptimize_current;
590
/* The second jump is not taken
591
if the first is (so jump past
592
it), and all conditional
593
jumps pop their argument when
594
they're not taken (so change
595
the first jump to pop its
596
argument when it's taken). */
597
if (JUMPS_ON_TRUE(opcode))
598
codestr[i] = POP_JUMP_IF_TRUE;
600
codestr[i] = POP_JUMP_IF_FALSE;
601
SETARG(codestr, i, (tgt + 3));
602
goto reoptimize_current;
605
/* Intentional fallthrough */
607
/* Replace jumps to unconditional jumps */
608
case POP_JUMP_IF_FALSE:
609
case POP_JUMP_IF_TRUE:
618
tgt = GETJUMPTGT(codestr, i);
619
/* Replace JUMP_* to a RETURN into just a RETURN */
620
if (UNCONDITIONAL_JUMP(opcode) &&
621
codestr[tgt] == RETURN_VALUE) {
622
codestr[i] = RETURN_VALUE;
623
memset(codestr+i+1, NOP, 2);
626
if (!UNCONDITIONAL_JUMP(codestr[tgt]))
628
tgttgt = GETJUMPTGT(codestr, tgt);
629
if (opcode == JUMP_FORWARD) /* JMP_ABS can go backwards */
630
opcode = JUMP_ABSOLUTE;
631
if (!ABSOLUTE_JUMP(opcode))
632
tgttgt -= i + 3; /* Calc relative jump addr */
633
if (tgttgt < 0) /* No backward relative jumps */
636
SETARG(codestr, i, tgttgt);
640
if (codestr[i+3] != MAKE_FUNCTION)
642
/* don't visit MAKE_FUNCTION as GETARG will be wrong */
646
/* Replace RETURN LOAD_CONST None RETURN with just RETURN */
647
/* Remove unreachable JUMPs after RETURN */
651
if (codestr[i+4] == RETURN_VALUE &&
652
ISBASICBLOCK(blocks,i,5))
653
memset(codestr+i+1, NOP, 4);
654
else if (UNCONDITIONAL_JUMP(codestr[i+1]) &&
655
ISBASICBLOCK(blocks,i,4))
656
memset(codestr+i+1, NOP, 3);
661
/* Fixup linenotab */
662
for (i=0, nops=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
663
addrmap[i] = i - nops;
664
if (codestr[i] == NOP)
669
for (i=0 ; i < tabsiz ; i+=2) {
670
cum_orig_line += lineno[i];
671
new_line = addrmap[cum_orig_line];
672
assert (new_line - last_line < 255);
673
lineno[i] =((unsigned char)(new_line - last_line));
674
last_line = new_line;
677
/* Remove NOPs and fixup jump targets */
678
for (i=0, h=0 ; i<codelen ; ) {
687
case POP_JUMP_IF_FALSE:
688
case POP_JUMP_IF_TRUE:
689
case JUMP_IF_FALSE_OR_POP:
690
case JUMP_IF_TRUE_OR_POP:
691
j = addrmap[GETARG(codestr, i)];
692
SETARG(codestr, i, j);
701
j = addrmap[GETARG(codestr, i) + i + 3] - addrmap[i] - 3;
702
SETARG(codestr, i, j);
705
adj = CODESIZE(opcode);
707
codestr[h++] = codestr[i++];
709
assert(h + nops == codelen);
711
code = PyBytes_FromStringAndSize((char *)codestr, h);
712
CONST_STACK_DELETE();
722
CONST_STACK_DELETE();