1
"""Parse a Python module and describe its classes and methods.
3
Parse enough of a Python file to recognize imports and class and
4
method definitions, and to find out the superclasses of a class.
6
The interface consists of a single function:
7
readmodule_ex(module [, path])
8
where module is the name of a Python module, and path is an optional
9
list of directories where the module is to be searched. If present,
10
path is prepended to the system search path sys.path. The return
11
value is a dictionary. The keys of the dictionary are the names of
12
the classes defined in the module (including classes that are defined
13
via the from XXX import YYY construct). The values are class
14
instances of the class Class defined here. One special key/value pair
15
is present for packages: the key '__path__' has a list as its value
16
which contains the package search path.
18
A class is described by the class Class in this module. Instances
19
of this class have the following instance variables:
20
module -- the module name
21
name -- the name of the class
22
super -- a list of super classes (Class instances)
23
methods -- a dictionary of methods
24
file -- the file in which the class was defined
25
lineno -- the line in the file on which the class statement occurred
26
The dictionary of methods uses the method names as keys and the line
27
numbers on which the method was defined as values.
28
If the name of a super class is not recognized, the corresponding
29
entry in the list of super classes is not a class instance but a
30
string giving the name of the super class. Since import statements
31
are recognized and imported modules are scanned as well, this
32
shouldn't happen often.
34
A function is described by the class Function in this module.
35
Instances of this class have the following instance variables:
36
module -- the module name
37
name -- the name of the class
38
file -- the file in which the class was defined
39
lineno -- the line in the file on which the class statement occurred
45
from token import NAME, DEDENT, OP
46
from operator import itemgetter
48
__all__ = ["readmodule", "readmodule_ex", "Class", "Function"]
50
_modules = {} # cache of modules we've seen
52
# each Python class is represented by an instance of this class
54
'''Class to represent a Python class.'''
55
def __init__(self, module, name, super, file, lineno):
65
def _addmethod(self, name, lineno):
66
self.methods[name] = lineno
69
'''Class to represent a top-level Python function'''
70
def __init__(self, module, name, file, lineno):
76
def readmodule(module, path=None):
77
'''Backwards compatible interface.
79
Call readmodule_ex() and then only keep Class objects from the
80
resulting dictionary.'''
83
for key, value in _readmodule(module, path or []).items():
84
if isinstance(value, Class):
88
def readmodule_ex(module, path=None):
89
'''Read a module file and return a dictionary of classes.
91
Search for MODULE in PATH and sys.path, read and parse the
92
module and return a dictionary with one entry for each class
95
return _readmodule(module, path or [])
97
def _readmodule(module, path, inpackage=None):
98
'''Do the hard work for readmodule[_ex].
100
If INPACKAGE is given, it must be the dotted name of the package in
101
which we are searching for a submodule, and then PATH must be the
102
package search path; otherwise, we are searching for a top-level
103
module, and PATH is combined with sys.path.
105
# Compute the full module name (prepending inpackage if set)
106
if inpackage is not None:
107
fullmodule = "%s.%s" % (inpackage, module)
112
if fullmodule in _modules:
113
return _modules[fullmodule]
115
# Initialize the dict for this module's contents
118
# Check if it is a built-in module; we don't do much for these
119
if module in sys.builtin_module_names and inpackage is None:
120
_modules[module] = dict
123
# Check for a dotted module name
124
i = module.rfind('.')
127
submodule = module[i+1:]
128
parent = _readmodule(package, path, inpackage)
129
if inpackage is not None:
130
package = "%s.%s" % (inpackage, package)
131
return _readmodule(submodule, parent['__path__'], package)
133
# Search the path for the module
135
if inpackage is not None:
136
f, fname, (_s, _m, ty) = imp.find_module(module, path)
138
f, fname, (_s, _m, ty) = imp.find_module(module, path + sys.path)
139
if ty == imp.PKG_DIRECTORY:
140
dict['__path__'] = [fname]
141
path = [fname] + path
142
f, fname, (_s, _m, ty) = imp.find_module('__init__', [fname])
143
_modules[fullmodule] = dict
144
if ty != imp.PY_SOURCE:
145
# not Python source, can't do anything with this module
149
stack = [] # stack of (class, indent) pairs
151
g = tokenize.generate_tokens(f.readline)
153
for tokentype, token, start, _end, _line in g:
154
if tokentype == DEDENT:
155
lineno, thisindent = start
156
# close nested classes and defs
157
while stack and stack[-1][1] >= thisindent:
160
lineno, thisindent = start
161
# close previous nested classes and defs
162
while stack and stack[-1][1] >= thisindent:
164
tokentype, meth_name, start = g.next()[0:3]
165
if tokentype != NAME:
166
continue # Syntax error
168
cur_class = stack[-1][0]
169
if isinstance(cur_class, Class):
171
cur_class._addmethod(meth_name, lineno)
172
# else it's a nested def
175
dict[meth_name] = Function(fullmodule, meth_name,
177
stack.append((None, thisindent)) # Marker for nested fns
178
elif token == 'class':
179
lineno, thisindent = start
180
# close previous nested classes and defs
181
while stack and stack[-1][1] >= thisindent:
183
tokentype, class_name, start = g.next()[0:3]
184
if tokentype != NAME:
185
continue # Syntax error
186
# parse what follows the class name
187
tokentype, token, start = g.next()[0:3]
190
names = [] # List of superclasses
191
# there's a list of superclasses
193
super = [] # Tokens making up current superclass
195
tokentype, token, start = g.next()[0:3]
196
if token in (')', ',') and level == 1:
199
# we know this super class
204
# super class is of the form
205
# module.class: look in module for
221
elif token == ',' and level == 1:
223
# only use NAME and OP (== dot) tokens for type name
224
elif tokentype in (NAME, OP) and level == 1:
226
# expressions in the base list are not supported
228
cur_class = Class(fullmodule, class_name, inherit,
231
dict[class_name] = cur_class
232
stack.append((cur_class, thisindent))
233
elif token == 'import' and start[1] == 0:
234
modules = _getnamelist(g)
235
for mod, _mod2 in modules:
237
# Recursively read the imported module
238
if inpackage is None:
239
_readmodule(mod, path)
242
_readmodule(mod, path, inpackage)
246
# If we can't find or parse the imported module,
247
# too bad -- don't die here.
249
elif token == 'from' and start[1] == 0:
250
mod, token = _getname(g)
251
if not mod or token != "import":
253
names = _getnamelist(g)
255
# Recursively read the imported module
256
d = _readmodule(mod, path, inpackage)
258
# If we can't find or parse the imported module,
259
# too bad -- don't die here.
261
# add any classes that were defined in the imported module
262
# to our name space if they were mentioned in the list
267
# don't add names that start with _
271
except StopIteration:
278
# Helper to get a comma-separated list of dotted names plus 'as'
279
# clauses. Return a list of pairs (name, name2) where name2 is
280
# the 'as' name, or None if there is no 'as' clause.
283
name, token = _getname(g)
287
name2, token = _getname(g)
290
names.append((name, name2))
291
while token != "," and "\n" not in token:
298
# Helper to get a dotted name, return a pair (name, token) where
299
# name is the dotted name, or None if there was no dotted name,
300
# and token is the next input token.
302
tokentype, token = g.next()[0:2]
303
if tokentype != NAME and token != '*':
307
tokentype, token = g.next()[0:2]
310
tokentype, token = g.next()[0:2]
311
if tokentype != NAME:
314
return (".".join(parts), token)
317
# Main program for testing.
320
if os.path.exists(mod):
321
path = [os.path.dirname(mod)]
322
mod = os.path.basename(mod)
323
if mod.lower().endswith(".py"):
327
dict = readmodule_ex(mod, path)
329
objs.sort(lambda a, b: cmp(getattr(a, 'lineno', 0),
330
getattr(b, 'lineno', 0)))
332
if isinstance(obj, Class):
333
print "class", obj.name, obj.super, obj.lineno
334
methods = sorted(obj.methods.iteritems(), key=itemgetter(1))
335
for name, lineno in methods:
336
if name != "__path__":
337
print " def", name, lineno
338
elif isinstance(obj, Function):
339
print "def", obj.name, obj.lineno
341
if __name__ == "__main__":