1
/* Peephole optimizations for bytecode compiler. */
5
#include "Python-ast.h"
14
#define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1]))
15
#define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD)
16
#define CONDITIONAL_JUMP(op) (op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
17
|| op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
18
#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP \
19
|| op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
20
|| op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
21
#define JUMPS_ON_TRUE(op) (op==POP_JUMP_IF_TRUE || op==JUMP_IF_TRUE_OR_POP)
22
#define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3))
23
#define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255
24
#define CODESIZE(op) (HAS_ARG(op) ? 3 : 1)
25
#define ISBASICBLOCK(blocks, start, bytes) \
26
(blocks[start]==blocks[start+bytes-1])
28
/* Replace LOAD_CONST c1. LOAD_CONST c2 ... LOAD_CONST cn BUILD_TUPLE n
29
with LOAD_CONST (c1, c2, ... cn).
30
The consts table must still be in list form so that the
31
new constant (c1, c2, ... cn) can be appended.
32
Called with codestr pointing to the first LOAD_CONST.
33
Bails out with no change if one or more of the LOAD_CONSTs is missing.
34
Also works for BUILD_LIST when followed by an "in" or "not in" test.
37
tuple_of_constants(unsigned char *codestr, Py_ssize_t n, PyObject *consts)
39
PyObject *newconst, *constant;
40
Py_ssize_t i, arg, len_consts;
43
assert(PyList_CheckExact(consts));
44
assert(codestr[n*3] == BUILD_TUPLE || codestr[n*3] == BUILD_LIST);
45
assert(GETARG(codestr, (n*3)) == n);
47
assert(codestr[i*3] == LOAD_CONST);
49
/* Buildup new tuple of constants */
50
newconst = PyTuple_New(n);
53
len_consts = PyList_GET_SIZE(consts);
54
for (i=0 ; i<n ; i++) {
55
arg = GETARG(codestr, (i*3));
56
assert(arg < len_consts);
57
constant = PyList_GET_ITEM(consts, arg);
59
PyTuple_SET_ITEM(newconst, i, constant);
62
/* Append folded constant onto consts */
63
if (PyList_Append(consts, newconst)) {
69
/* Write NOPs over old LOAD_CONSTS and
70
add a new LOAD_CONST newconst on top of the BUILD_TUPLE n */
71
memset(codestr, NOP, n*3);
72
codestr[n*3] = LOAD_CONST;
73
SETARG(codestr, (n*3), len_consts);
77
/* Replace LOAD_CONST c1. LOAD_CONST c2 BINOP
78
with LOAD_CONST binop(c1,c2)
79
The consts table must still be in list form so that the
80
new constant can be appended.
81
Called with codestr pointing to the first LOAD_CONST.
82
Abandons the transformation if the folding fails (i.e. 1+'a').
83
If the new constant is a sequence, only folds when the size
84
is below a threshold value. That keeps pyc files from
85
becoming large in the presence of code like: (None,)*1000.
88
fold_binops_on_constants(unsigned char *codestr, PyObject *consts)
90
PyObject *newconst, *v, *w;
91
Py_ssize_t len_consts, size;
95
assert(PyList_CheckExact(consts));
96
assert(codestr[0] == LOAD_CONST);
97
assert(codestr[3] == LOAD_CONST);
99
/* Create new constant */
100
v = PyList_GET_ITEM(consts, GETARG(codestr, 0));
101
w = PyList_GET_ITEM(consts, GETARG(codestr, 3));
105
newconst = PyNumber_Power(v, w, Py_None);
107
case BINARY_MULTIPLY:
108
newconst = PyNumber_Multiply(v, w);
110
case BINARY_TRUE_DIVIDE:
111
newconst = PyNumber_TrueDivide(v, w);
113
case BINARY_FLOOR_DIVIDE:
114
newconst = PyNumber_FloorDivide(v, w);
117
newconst = PyNumber_Remainder(v, w);
120
newconst = PyNumber_Add(v, w);
122
case BINARY_SUBTRACT:
123
newconst = PyNumber_Subtract(v, w);
126
newconst = PyObject_GetItem(v, w);
129
newconst = PyNumber_Lshift(v, w);
132
newconst = PyNumber_Rshift(v, w);
135
newconst = PyNumber_And(v, w);
138
newconst = PyNumber_Xor(v, w);
141
newconst = PyNumber_Or(v, w);
144
/* Called with an unknown opcode */
145
PyErr_Format(PyExc_SystemError,
146
"unexpected binary operation %d on a constant",
150
if (newconst == NULL) {
154
size = PyObject_Size(newconst);
157
else if (size > 20) {
162
/* Append folded constant into consts table */
163
len_consts = PyList_GET_SIZE(consts);
164
if (PyList_Append(consts, newconst)) {
170
/* Write NOP NOP NOP NOP LOAD_CONST newconst */
171
memset(codestr, NOP, 4);
172
codestr[4] = LOAD_CONST;
173
SETARG(codestr, 4, len_consts);
178
fold_unaryops_on_constants(unsigned char *codestr, PyObject *consts)
180
PyObject *newconst=NULL, *v;
181
Py_ssize_t len_consts;
185
assert(PyList_CheckExact(consts));
186
assert(codestr[0] == LOAD_CONST);
188
/* Create new constant */
189
v = PyList_GET_ITEM(consts, GETARG(codestr, 0));
193
/* Preserve the sign of -0.0 */
194
if (PyObject_IsTrue(v) == 1)
195
newconst = PyNumber_Negative(v);
198
newconst = PyNumber_Invert(v);
201
/* Called with an unknown opcode */
202
PyErr_Format(PyExc_SystemError,
203
"unexpected unary operation %d on a constant",
207
if (newconst == NULL) {
212
/* Append folded constant into consts table */
213
len_consts = PyList_GET_SIZE(consts);
214
if (PyList_Append(consts, newconst)) {
220
/* Write NOP LOAD_CONST newconst */
222
codestr[1] = LOAD_CONST;
223
SETARG(codestr, 1, len_consts);
227
static unsigned int *
228
markblocks(unsigned char *code, Py_ssize_t len)
230
unsigned int *blocks = (unsigned int *)PyMem_Malloc(len*sizeof(int));
231
int i,j, opcode, blockcnt = 0;
233
if (blocks == NULL) {
237
memset(blocks, 0, len*sizeof(int));
239
/* Mark labels in the first pass */
240
for (i=0 ; i<len ; i+=CODESIZE(opcode)) {
245
case JUMP_IF_FALSE_OR_POP:
246
case JUMP_IF_TRUE_OR_POP:
247
case POP_JUMP_IF_FALSE:
248
case POP_JUMP_IF_TRUE:
254
j = GETJUMPTGT(code, i);
259
/* Build block numbers in the second pass */
260
for (i=0 ; i<len ; i++) {
261
blockcnt += blocks[i]; /* increment blockcnt over labels */
262
blocks[i] = blockcnt;
267
/* Helper to replace LOAD_NAME None/True/False with LOAD_CONST
268
Returns: 0 if no change, 1 if change, -1 if error */
270
load_global(unsigned char *codestr, Py_ssize_t i, char *name, PyObject *consts)
276
if (strcmp(name, "None") == 0)
278
else if (strcmp(name, "True") == 0)
280
else if (strcmp(name, "False") == 0)
284
for (j = 0; j < PyList_GET_SIZE(consts); j++) {
285
if (PyList_GET_ITEM(consts, j) == obj)
288
if (j == PyList_GET_SIZE(consts)) {
289
if (PyList_Append(consts, obj) < 0)
292
assert(PyList_GET_ITEM(consts, j) == obj);
293
codestr[i] = LOAD_CONST;
294
SETARG(codestr, i, j);
298
/* Perform basic peephole optimizations to components of a code object.
299
The consts object should still be in list form to allow new constants
302
To keep the optimizer simple, it bails out (does nothing) for code that
303
has a length over 32,700, and does not calculate extended arguments.
304
That allows us to avoid overflow and sign issues. Likewise, it bails when
305
the lineno table has complex encoding for gaps >= 255. EXTENDED_ARG can
306
appear before MAKE_FUNCTION; in this case both opcodes are skipped.
307
EXTENDED_ARG preceding any other opcode causes the optimizer to bail.
309
Optimizations are restricted to simple transformations occuring within a
310
single basic block. All transformations keep the code size the same or
311
smaller. For those that reduce size, the gaps are initially filled with
312
NOPs. Later those NOPs are removed and the jump addresses retargeted in
313
a single pass. Line numbering is adjusted accordingly. */
316
PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
317
PyObject *lineno_obj)
319
Py_ssize_t i, j, codelen;
321
int tgt, tgttgt, opcode;
322
unsigned char *codestr = NULL;
323
unsigned char *lineno;
325
int new_line, cum_orig_line, last_line, tabsiz;
326
int cumlc=0, lastlc=0; /* Count runs of consecutive LOAD_CONSTs */
327
unsigned int *blocks = NULL;
330
/* Bail out if an exception is set */
331
if (PyErr_Occurred())
334
/* Bypass optimization when the lineno table is too complex */
335
assert(PyBytes_Check(lineno_obj));
336
lineno = (unsigned char*)PyBytes_AS_STRING(lineno_obj);
337
tabsiz = PyBytes_GET_SIZE(lineno_obj);
338
if (memchr(lineno, 255, tabsiz) != NULL)
341
/* Avoid situations where jump retargeting could overflow */
342
assert(PyBytes_Check(code));
343
codelen = PyBytes_GET_SIZE(code);
347
/* Make a modifiable copy of the code string */
348
codestr = (unsigned char *)PyMem_Malloc(codelen);
351
codestr = (unsigned char *)memcpy(codestr,
352
PyBytes_AS_STRING(code), codelen);
354
/* Verify that RETURN_VALUE terminates the codestring. This allows
355
the various transformation patterns to look ahead several
356
instructions without additional checks to make sure they are not
357
looking beyond the end of the code string.
359
if (codestr[codelen-1] != RETURN_VALUE)
362
/* Mapping to new jump targets after NOPs are removed */
363
addrmap = (int *)PyMem_Malloc(codelen * sizeof(int));
367
blocks = markblocks(codestr, codelen);
370
assert(PyList_Check(consts));
372
for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
380
/* Replace UNARY_NOT POP_JUMP_IF_FALSE
381
with POP_JUMP_IF_TRUE */
383
if (codestr[i+1] != POP_JUMP_IF_FALSE
384
|| !ISBASICBLOCK(blocks,i,4))
386
j = GETARG(codestr, i+1);
387
codestr[i] = POP_JUMP_IF_TRUE;
388
SETARG(codestr, i, j);
390
goto reoptimize_current;
392
/* not a is b --> a is not b
393
not a in b --> a not in b
394
not a is not b --> a is b
395
not a not in b --> a in b
398
j = GETARG(codestr, i);
399
if (j < 6 || j > 9 ||
400
codestr[i+3] != UNARY_NOT ||
401
!ISBASICBLOCK(blocks,i,4))
403
SETARG(codestr, i, (j^1));
407
/* Replace LOAD_GLOBAL/LOAD_NAME None/True/False
408
with LOAD_CONST None/True/False */
411
j = GETARG(codestr, i);
412
name = _PyUnicode_AsString(PyTuple_GET_ITEM(names, j));
413
h = load_global(codestr, i, name, consts);
421
/* Skip over LOAD_CONST trueconst
422
POP_JUMP_IF_FALSE xx. This improves
423
"while 1" performance. */
426
j = GETARG(codestr, i);
427
if (codestr[i+3] != POP_JUMP_IF_FALSE ||
428
!ISBASICBLOCK(blocks,i,6) ||
429
!PyObject_IsTrue(PyList_GET_ITEM(consts, j)))
431
memset(codestr+i, NOP, 6);
435
/* Try to fold tuples of constants (includes a case for lists
436
which are only used for "in" and "not in" tests).
437
Skip over BUILD_SEQN 1 UNPACK_SEQN 1.
438
Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2.
439
Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2. */
442
j = GETARG(codestr, i);
446
((opcode == BUILD_TUPLE &&
447
ISBASICBLOCK(blocks, h, 3*(j+1))) ||
448
(opcode == BUILD_LIST &&
449
codestr[i+3]==COMPARE_OP &&
450
ISBASICBLOCK(blocks, h, 3*(j+2)) &&
451
(GETARG(codestr,i+3)==6 ||
452
GETARG(codestr,i+3)==7))) &&
453
tuple_of_constants(&codestr[h], j, consts)) {
454
assert(codestr[i] == LOAD_CONST);
458
if (codestr[i+3] != UNPACK_SEQUENCE ||
459
!ISBASICBLOCK(blocks,i,6) ||
460
j != GETARG(codestr, i+3))
463
memset(codestr+i, NOP, 6);
465
codestr[i] = ROT_TWO;
466
memset(codestr+i+1, NOP, 5);
468
codestr[i] = ROT_THREE;
469
codestr[i+1] = ROT_TWO;
470
memset(codestr+i+2, NOP, 4);
474
/* Fold binary ops on constants.
475
LOAD_CONST c1 LOAD_CONST c2 BINOP --> LOAD_CONST binop(c1,c2) */
477
case BINARY_MULTIPLY:
478
case BINARY_TRUE_DIVIDE:
479
case BINARY_FLOOR_DIVIDE:
482
case BINARY_SUBTRACT:
490
ISBASICBLOCK(blocks, i-6, 7) &&
491
fold_binops_on_constants(&codestr[i-6], consts)) {
493
assert(codestr[i] == LOAD_CONST);
498
/* Fold unary ops on constants.
499
LOAD_CONST c1 UNARY_OP --> LOAD_CONST unary_op(c) */
503
ISBASICBLOCK(blocks, i-3, 4) &&
504
fold_unaryops_on_constants(&codestr[i-3], consts)) {
506
assert(codestr[i] == LOAD_CONST);
511
/* Simplify conditional jump to conditional jump where the
512
result of the first test implies the success of a similar
513
test or the failure of the opposite test.
519
x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_FALSE_OR_POP z
520
--> x:JUMP_IF_FALSE_OR_POP z
521
x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_TRUE_OR_POP z
522
--> x:POP_JUMP_IF_FALSE y+3
523
where y+3 is the instruction following the second test.
525
case JUMP_IF_FALSE_OR_POP:
526
case JUMP_IF_TRUE_OR_POP:
527
tgt = GETJUMPTGT(codestr, i);
529
if (CONDITIONAL_JUMP(j)) {
530
/* NOTE: all possible jumps here are
532
if (JUMPS_ON_TRUE(j) == JUMPS_ON_TRUE(opcode)) {
533
/* The second jump will be
534
taken iff the first is. */
535
tgttgt = GETJUMPTGT(codestr, tgt);
536
/* The current opcode inherits
537
its target's stack behaviour */
539
SETARG(codestr, i, tgttgt);
540
goto reoptimize_current;
542
/* The second jump is not taken
543
if the first is (so jump past
544
it), and all conditional
545
jumps pop their argument when
546
they're not taken (so change
547
the first jump to pop its
548
argument when it's taken). */
549
if (JUMPS_ON_TRUE(opcode))
550
codestr[i] = POP_JUMP_IF_TRUE;
552
codestr[i] = POP_JUMP_IF_FALSE;
553
SETARG(codestr, i, (tgt + 3));
554
goto reoptimize_current;
557
/* Intentional fallthrough */
559
/* Replace jumps to unconditional jumps */
560
case POP_JUMP_IF_FALSE:
561
case POP_JUMP_IF_TRUE:
569
tgt = GETJUMPTGT(codestr, i);
570
/* Replace JUMP_* to a RETURN into just a RETURN */
571
if (UNCONDITIONAL_JUMP(opcode) &&
572
codestr[tgt] == RETURN_VALUE) {
573
codestr[i] = RETURN_VALUE;
574
memset(codestr+i+1, NOP, 2);
577
if (!UNCONDITIONAL_JUMP(codestr[tgt]))
579
tgttgt = GETJUMPTGT(codestr, tgt);
580
if (opcode == JUMP_FORWARD) /* JMP_ABS can go backwards */
581
opcode = JUMP_ABSOLUTE;
582
if (!ABSOLUTE_JUMP(opcode))
583
tgttgt -= i + 3; /* Calc relative jump addr */
584
if (tgttgt < 0) /* No backward relative jumps */
587
SETARG(codestr, i, tgttgt);
591
if (codestr[i+3] != MAKE_FUNCTION)
593
/* don't visit MAKE_FUNCTION as GETARG will be wrong */
597
/* Replace RETURN LOAD_CONST None RETURN with just RETURN */
598
/* Remove unreachable JUMPs after RETURN */
602
if (codestr[i+4] == RETURN_VALUE &&
603
ISBASICBLOCK(blocks,i,5))
604
memset(codestr+i+1, NOP, 4);
605
else if (UNCONDITIONAL_JUMP(codestr[i+1]) &&
606
ISBASICBLOCK(blocks,i,4))
607
memset(codestr+i+1, NOP, 3);
612
/* Fixup linenotab */
613
for (i=0, nops=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
614
addrmap[i] = i - nops;
615
if (codestr[i] == NOP)
620
for (i=0 ; i < tabsiz ; i+=2) {
621
cum_orig_line += lineno[i];
622
new_line = addrmap[cum_orig_line];
623
assert (new_line - last_line < 255);
624
lineno[i] =((unsigned char)(new_line - last_line));
625
last_line = new_line;
628
/* Remove NOPs and fixup jump targets */
629
for (i=0, h=0 ; i<codelen ; ) {
638
case POP_JUMP_IF_FALSE:
639
case POP_JUMP_IF_TRUE:
640
case JUMP_IF_FALSE_OR_POP:
641
case JUMP_IF_TRUE_OR_POP:
642
j = addrmap[GETARG(codestr, i)];
643
SETARG(codestr, i, j);
651
j = addrmap[GETARG(codestr, i) + i + 3] - addrmap[i] - 3;
652
SETARG(codestr, i, j);
655
adj = CODESIZE(opcode);
657
codestr[h++] = codestr[i++];
659
assert(h + nops == codelen);
661
code = PyBytes_FromStringAndSize((char *)codestr, h);