1
"""A flow graph representation for Python bytecode"""
7
from compiler import misc
9
import CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS
13
self.current = self.entry = Block()
14
self.exit = Block("exit")
15
self.blocks = misc.Set()
16
self.blocks.add(self.entry)
17
self.blocks.add(self.exit)
19
def startBlock(self, block):
22
print "end", repr(self.current)
23
print " next", self.current.next
24
print " ", self.current.get_children()
28
def nextBlock(self, block=None):
29
# XXX think we need to specify when there is implicit transfer
30
# from one block to the next. might be better to represent this
31
# with explicit JUMP_ABSOLUTE instructions that are optimized
32
# out when they are unnecessary.
34
# I think this strategy works: each block has a child
35
# designated as "next" which is returned as the last of the
36
# children. because the nodes in a graph are emitted in
37
# reverse post order, the "next" block will always be emitted
38
# immediately after its parent.
39
# Worry: maintaining this invariant could be tricky
41
block = self.newBlock()
43
# Note: If the current block ends with an unconditional
44
# control transfer, then it is incorrect to add an implicit
45
# transfer to the block graph. The current code requires
46
# these edges to get the blocks emitted in the right order,
47
# however. :-( If a client needs to remove these edges, call
50
self.current.addNext(block)
51
self.startBlock(block)
58
def startExitBlock(self):
59
self.startBlock(self.exit)
63
def _enable_debug(self):
66
def _disable_debug(self):
69
def emit(self, *inst):
72
if inst[0] in ['RETURN_VALUE', 'YIELD_VALUE']:
73
self.current.addOutEdge(self.exit)
74
if len(inst) == 2 and isinstance(inst[1], Block):
75
self.current.addOutEdge(inst[1])
76
self.current.emit(inst)
78
def getBlocksInOrder(self):
79
"""Return the blocks in reverse postorder
81
i.e. each node appears before all of its successors
83
# XXX make sure every node that doesn't have an explicit next
84
# is set so that next points to exit
85
for b in self.blocks.elements():
90
order = dfs_postorder(self.entry, {})
92
self.fixupOrder(order, self.exit)
94
if not self.exit in order:
95
order.append(self.exit)
99
def fixupOrder(self, blocks, default_next):
100
"""Fixup bad order introduced by DFS."""
102
# XXX This is a total mess. There must be a better way to get
103
# the code blocks in the right order.
105
self.fixupOrderHonorNext(blocks, default_next)
106
self.fixupOrderForward(blocks, default_next)
108
def fixupOrderHonorNext(self, blocks, default_next):
109
"""Fix one problem with DFS.
111
The DFS uses child block, but doesn't know about the special
112
"next" block. As a result, the DFS can order blocks so that a
113
block isn't next to the right block for implicit control
117
for i in range(len(blocks)):
120
for i in range(0, len(blocks) - 1):
123
if not b.next or b.next[0] == default_next or b.next[0] == n:
125
# The blocks are in the wrong order. Find the chain of
126
# blocks to insert where they belong.
130
while elt.next and elt.next[0] != default_next:
131
chain.append(elt.next[0])
133
# Now remove the blocks in the chain from the current
134
# block list, so that they can be re-inserted.
138
l.append((index[b], b))
143
# Insert the chain in the proper location
144
blocks[i:i + 1] = [cur] + chain
145
# Finally, re-compute the block indexes
146
for i in range(len(blocks)):
149
def fixupOrderForward(self, blocks, default_next):
150
"""Make sure all JUMP_FORWARDs jump forward"""
155
index[b] = len(chains)
157
if b.next and b.next[0] == default_next:
165
for i in range(len(chains)):
168
for c in b.get_children():
172
if inst[0] == 'JUMP_FORWARD':
177
constraints.append((index[c], i))
182
# XXX just do one for now
183
# do swaps to get things in the right order
184
goes_before, a_chain = constraints[0]
185
assert a_chain > goes_before
188
chains.insert(goes_before, c)
196
return self.blocks.elements()
199
"""Return nodes appropriate for use with dominator"""
202
def getContainedGraphs(self):
204
for b in self.getBlocks():
205
l.extend(b.getContainedGraphs())
208
def dfs_postorder(b, seen):
209
"""Depth-first search of tree rooted at b, return in postorder"""
212
for c in b.get_children():
215
order = order + dfs_postorder(c, seen)
222
def __init__(self, label=''):
224
self.inEdges = misc.Set()
225
self.outEdges = misc.Set()
227
self.bid = Block._count
229
Block._count = Block._count + 1
233
return "<block %s id=%d>" % (self.label, self.bid)
235
return "<block id=%d>" % (self.bid)
238
insts = map(str, self.insts)
239
return "<block %s %d:\n%s>" % (self.label, self.bid,
242
def emit(self, inst):
245
self.outEdges.add(inst[1])
246
self.insts.append(inst)
248
def getInstructions(self):
251
def addInEdge(self, block):
252
self.inEdges.add(block)
254
def addOutEdge(self, block):
255
self.outEdges.add(block)
257
def addNext(self, block):
258
self.next.append(block)
259
assert len(self.next) == 1, map(str, self.next)
261
_uncond_transfer = ('RETURN_VALUE', 'RAISE_VARARGS', 'YIELD_VALUE',
262
'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP')
265
"""Remove bogus edge for unconditional transfers
267
Each block has a next edge that accounts for implicit control
268
transfers, e.g. from a JUMP_IF_FALSE to the block that will be
269
executed if the test is true.
271
These edges must remain for the current assembler code to
272
work. If they are removed, the dfs_postorder gets things in
273
weird orders. However, they shouldn't be there for other
274
purposes, e.g. conversion to SSA form. This method will
275
remove the next edge when it follows an unconditional control
279
op, arg = self.insts[-1]
280
except (IndexError, ValueError):
282
if op in self._uncond_transfer:
285
def get_children(self):
286
if self.next and self.next[0] in self.outEdges:
287
self.outEdges.remove(self.next[0])
288
return self.outEdges.elements() + self.next
290
def getContainedGraphs(self):
291
"""Return all graphs contained within this block.
293
For example, a MAKE_FUNCTION block will contain a reference to
294
the graph for the function body.
297
for inst in self.insts:
301
if hasattr(op, 'graph'):
302
contained.append(op.graph)
305
# flags for code objects
307
# the FlowGraph is transformed in place; it exists in one of these states
313
class PyFlowGraph(FlowGraph):
314
super_init = FlowGraph.__init__
316
def __init__(self, name, filename, args=(), optimized=0, klass=None):
319
self.filename = filename
320
self.docstring = None
321
self.args = args # XXX
322
self.argcount = getArgCount(args)
325
self.flags = CO_OPTIMIZED | CO_NEWLOCALS
330
# Free variables found by the symbol table scan, including
331
# variables used only in nested scopes, are included here.
334
# The closure list is used to track the order of cell
335
# variables and free variables in the resulting code object.
336
# The offsets used by LOAD_CLOSURE/LOAD_DEREF refer to both
337
# kinds of variables.
339
self.varnames = list(args) or []
340
for i in range(len(self.varnames)):
341
var = self.varnames[i]
342
if isinstance(var, TupleArg):
343
self.varnames[i] = var.getName()
346
def setDocstring(self, doc):
349
def setFlag(self, flag):
350
self.flags = self.flags | flag
351
if flag == CO_VARARGS:
352
self.argcount = self.argcount - 1
354
def checkFlag(self, flag):
355
if self.flags & flag:
358
def setFreeVars(self, names):
359
self.freevars = list(names)
361
def setCellVars(self, names):
362
self.cellvars = names
365
"""Get a Python code object"""
366
assert self.stage == RAW
367
self.computeStackDepth()
369
assert self.stage == FLAT
371
assert self.stage == CONV
373
assert self.stage == DONE
374
return self.newCodeObject()
376
def dump(self, io=None):
383
if opname == "SET_LINENO":
386
print "\t", "%3d" % pc, opname
389
print "\t", "%3d" % pc, opname, t[1]
394
def computeStackDepth(self):
395
"""Compute the max stack depth.
397
Approach is to compute the stack effect of each basic block.
398
Then find the path through the code with the largest total
403
for b in self.getBlocks():
404
depth[b] = findDepth(b.getInstructions())
413
children = b.get_children()
415
return max([max_depth(c, d) for c in children])
417
if not b.label == "exit":
418
return max_depth(self.exit, d)
422
self.stacksize = max_depth(self.entry, 0)
424
def flattenGraph(self):
425
"""Arrange the blocks in order and resolve jumps"""
426
assert self.stage == RAW
427
self.insts = insts = []
431
for b in self.getBlocksInOrder():
433
for inst in b.getInstructions():
437
elif inst[0] != "SET_LINENO":
442
for i in range(len(insts)):
446
elif inst[0] != "SET_LINENO":
449
if self.hasjrel.has_elt(opname):
451
offset = begin[oparg] - pc
452
insts[i] = opname, offset
453
elif self.hasjabs.has_elt(opname):
454
insts[i] = opname, begin[inst[1]]
458
for i in dis.hasjrel:
459
hasjrel.add(dis.opname[i])
461
for i in dis.hasjabs:
462
hasjabs.add(dis.opname[i])
464
def convertArgs(self):
465
"""Convert arguments from symbolic to concrete form"""
466
assert self.stage == FLAT
467
self.consts.insert(0, self.docstring)
469
for i in range(len(self.insts)):
473
conv = self._converters.get(opname, None)
475
self.insts[i] = opname, conv(self, oparg)
478
def sort_cellvars(self):
479
"""Sort cellvars in the order of varnames and prune from freevars.
482
for name in self.cellvars:
484
self.cellvars = [name for name in self.varnames
485
if cells.has_key(name)]
486
for name in self.cellvars:
488
self.cellvars = self.cellvars + cells.keys()
489
self.closure = self.cellvars + self.freevars
491
def _lookupName(self, name, list):
492
"""Return index of name in list, appending if necessary
494
This routine uses a list instead of a dictionary, because a
495
dictionary can't store two different keys if the keys have the
496
same value but different types, e.g. 2 and 2L. The compiler
497
must treat these two separately, so it does an explicit type
498
comparison before comparing the values.
501
for i in range(len(list)):
502
if t == type(list[i]) and list[i] == name:
509
def _convert_LOAD_CONST(self, arg):
510
if hasattr(arg, 'getCode'):
512
return self._lookupName(arg, self.consts)
514
def _convert_LOAD_FAST(self, arg):
515
self._lookupName(arg, self.names)
516
return self._lookupName(arg, self.varnames)
517
_convert_STORE_FAST = _convert_LOAD_FAST
518
_convert_DELETE_FAST = _convert_LOAD_FAST
520
def _convert_LOAD_NAME(self, arg):
521
if self.klass is None:
522
self._lookupName(arg, self.varnames)
523
return self._lookupName(arg, self.names)
525
def _convert_NAME(self, arg):
526
if self.klass is None:
527
self._lookupName(arg, self.varnames)
528
return self._lookupName(arg, self.names)
529
_convert_STORE_NAME = _convert_NAME
530
_convert_DELETE_NAME = _convert_NAME
531
_convert_IMPORT_NAME = _convert_NAME
532
_convert_IMPORT_FROM = _convert_NAME
533
_convert_STORE_ATTR = _convert_NAME
534
_convert_LOAD_ATTR = _convert_NAME
535
_convert_DELETE_ATTR = _convert_NAME
536
_convert_LOAD_GLOBAL = _convert_NAME
537
_convert_STORE_GLOBAL = _convert_NAME
538
_convert_DELETE_GLOBAL = _convert_NAME
540
def _convert_DEREF(self, arg):
541
self._lookupName(arg, self.names)
542
self._lookupName(arg, self.varnames)
543
return self._lookupName(arg, self.closure)
544
_convert_LOAD_DEREF = _convert_DEREF
545
_convert_STORE_DEREF = _convert_DEREF
547
def _convert_LOAD_CLOSURE(self, arg):
548
self._lookupName(arg, self.varnames)
549
return self._lookupName(arg, self.closure)
551
_cmp = list(dis.cmp_op)
552
def _convert_COMPARE_OP(self, arg):
553
return self._cmp.index(arg)
555
# similarly for other opcodes...
557
for name, obj in locals().items():
558
if name[:9] == "_convert_":
560
_converters[opname] = obj
561
del name, obj, opname
563
def makeByteCode(self):
564
assert self.stage == CONV
565
self.lnotab = lnotab = LineAddrTable()
569
lnotab.addCode(self.opnum[opname])
572
if opname == "SET_LINENO":
573
lnotab.nextLine(oparg)
575
hi, lo = twobyte(oparg)
577
lnotab.addCode(self.opnum[opname], lo, hi)
580
print self.opnum[opname], lo, hi
585
for num in range(len(dis.opname)):
586
opnum[dis.opname[num]] = num
589
def newCodeObject(self):
590
assert self.stage == DONE
591
if (self.flags & CO_NEWLOCALS) == 0:
594
nlocals = len(self.varnames)
595
argcount = self.argcount
596
if self.flags & CO_VARKEYWORDS:
597
argcount = argcount - 1
598
return new.code(argcount, nlocals, self.stacksize, self.flags,
599
self.lnotab.getCode(), self.getConsts(),
600
tuple(self.names), tuple(self.varnames),
601
self.filename, self.name, self.lnotab.firstline,
602
self.lnotab.getTable(), tuple(self.freevars),
603
tuple(self.cellvars))
606
"""Return a tuple for the const slot of the code object
608
Must convert references to code (MAKE_FUNCTION) to code
612
for elt in self.consts:
613
if isinstance(elt, PyFlowGraph):
619
if opname[:4] == 'JUMP':
623
"""Helper for marking func defs with nested tuples in arglist"""
624
def __init__(self, count, names):
628
return "TupleArg(%s, %s)" % (self.count, self.names)
630
return ".%d" % self.count
632
def getArgCount(args):
636
if isinstance(arg, TupleArg):
637
numNames = len(misc.flatten(arg.names))
638
argcount = argcount - numNames
642
"""Convert an int argument into high and low bytes"""
643
assert isinstance(val, int)
644
return divmod(val, 256)
649
This class builds the lnotab, which is documented in compile.c.
650
Here's a brief recap:
652
For each SET_LINENO instruction after the first one, two bytes are
653
added to lnotab. (In some cases, multiple two-byte entries are
654
added.) The first byte is the distance in bytes between the
655
instruction for the last SET_LINENO and the current SET_LINENO.
656
The second byte is offset in line numbers. If either offset is
657
greater than 255, multiple two-byte entries are added -- see
658
compile.c for the delicate details.
669
def addCode(self, *args):
671
self.code.append(chr(arg))
672
self.codeOffset = self.codeOffset + len(args)
674
def nextLine(self, lineno):
675
if self.firstline == 0:
676
self.firstline = lineno
677
self.lastline = lineno
680
addr = self.codeOffset - self.lastoff
681
line = lineno - self.lastline
682
# Python assumes that lineno always increases with
683
# increasing bytecode address (lnotab is unsigned char).
684
# Depending on when SET_LINENO instructions are emitted
685
# this is not always true. Consider the code:
688
# In the bytecode stream, the assignment to "a" occurs
689
# after the loading of "b". This works with the C Python
690
# compiler because it only generates a SET_LINENO instruction
691
# for the assignment.
693
push = self.lnotab.append
698
push(addr); push(255)
701
if addr > 0 or line > 0:
702
push(addr); push(line)
703
self.lastline = lineno
704
self.lastoff = self.codeOffset
707
return ''.join(self.code)
710
return ''.join(map(chr, self.lnotab))
712
class StackDepthTracker:
713
# XXX 1. need to keep track of stack depth on jumps
714
# XXX 2. at least partly as a result, this code is broken
716
def findDepth(self, insts, debug=0):
723
delta = self.effect.get(opname, None)
724
if delta is not None:
725
depth = depth + delta
728
for pat, pat_delta in self.patterns:
729
if opname[:len(pat)] == pat:
731
depth = depth + delta
733
# if we still haven't found a match
735
meth = getattr(self, opname, None)
737
depth = depth + meth(i[1])
741
print depth, maxDepth
755
'DELETE_SLICE+0': -1,
756
'DELETE_SLICE+1': -2,
757
'DELETE_SLICE+2': -2,
758
'DELETE_SLICE+3': -3,
777
'LOAD_ATTR': 0, # unlike other loads
790
def UNPACK_SEQUENCE(self, count):
792
def BUILD_TUPLE(self, count):
794
def BUILD_LIST(self, count):
796
def CALL_FUNCTION(self, argc):
797
hi, lo = divmod(argc, 256)
798
return -(lo + hi * 2)
799
def CALL_FUNCTION_VAR(self, argc):
800
return self.CALL_FUNCTION(argc)-1
801
def CALL_FUNCTION_KW(self, argc):
802
return self.CALL_FUNCTION(argc)-1
803
def CALL_FUNCTION_VAR_KW(self, argc):
804
return self.CALL_FUNCTION(argc)-2
805
def MAKE_FUNCTION(self, argc):
807
def MAKE_CLOSURE(self, argc):
808
# XXX need to account for free variables too!
810
def BUILD_SLICE(self, argc):
815
def DUP_TOPX(self, argc):
818
findDepth = StackDepthTracker().findDepth