1
"""Find modules used by a script, using introspection."""
3
from __future__ import generators
14
# XXX Clean up once str8's cstor matches bytes.
15
LOAD_CONST = bytes([dis.opname.index('LOAD_CONST')])
16
IMPORT_NAME = bytes([dis.opname.index('IMPORT_NAME')])
17
STORE_NAME = bytes([dis.opname.index('STORE_NAME')])
18
STORE_GLOBAL = bytes([dis.opname.index('STORE_GLOBAL')])
19
STORE_OPS = [STORE_NAME, STORE_GLOBAL]
20
HAVE_ARGUMENT = bytes([dis.HAVE_ARGUMENT])
22
# Modulefinder does a good job at simulating Python's, but it can not
23
# handle __path__ modifications packages make at runtime. Therefore there
24
# is a mechanism whereby you can register extra paths in this map for a
25
# package, and it will be honored.
27
# Note this is a mapping is lists of paths.
31
def AddPackagePath(packagename, path):
32
paths = packagePathMap.get(packagename, [])
34
packagePathMap[packagename] = paths
36
replacePackageMap = {}
38
# This ReplacePackage mechanism allows modulefinder to work around the
39
# way the _xmlplus package injects itself under the name "xml" into
40
# sys.modules at runtime by calling ReplacePackage("_xmlplus", "xml")
41
# before running ModuleFinder.
43
def ReplacePackage(oldname, newname):
44
replacePackageMap[oldname] = newname
49
def __init__(self, name, file=None, path=None):
54
# The set of global names that are assigned to in the module.
55
# This includes those names imported through starimports of
58
# The set of starimports this module did that could not be
59
# resolved, ie. a starimport from a non-Python module.
63
s = "Module(%r" % (self.__name__,)
64
if self.__file__ is not None:
65
s = s + ", %r" % (self.__file__,)
66
if self.__path__ is not None:
67
s = s + ", %r" % (self.__path__,)
73
def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
81
self.excludes = excludes
82
self.replace_paths = replace_paths
83
self.processed_paths = [] # Used in debugging only
85
def msg(self, level, str, *args):
86
if level <= self.debug:
87
for i in range(self.indent):
91
print(repr(arg), end=' ')
94
def msgin(self, *args):
96
if level <= self.debug:
97
self.indent = self.indent + 1
100
def msgout(self, *args):
102
if level <= self.debug:
103
self.indent = self.indent - 1
106
def run_script(self, pathname):
107
self.msg(2, "run_script", pathname)
108
fp = open(pathname, READ_MODE)
109
stuff = ("", "r", imp.PY_SOURCE)
110
self.load_module('__main__', fp, pathname, stuff)
112
def load_file(self, pathname):
113
dir, name = os.path.split(pathname)
114
name, ext = os.path.splitext(name)
115
fp = open(pathname, READ_MODE)
116
stuff = (ext, "r", imp.PY_SOURCE)
117
self.load_module(name, fp, pathname, stuff)
119
def import_hook(self, name, caller=None, fromlist=None, level=-1):
120
self.msg(3, "import_hook", name, caller, fromlist, level)
121
parent = self.determine_parent(caller, level=level)
122
q, tail = self.find_head_package(parent, name)
123
m = self.load_tail(q, tail)
127
self.ensure_fromlist(m, fromlist)
130
def determine_parent(self, caller, level=-1):
131
self.msgin(4, "determine_parent", caller, level)
132
if not caller or level == 0:
133
self.msgout(4, "determine_parent -> None")
135
pname = caller.__name__
136
if level >= 1: # relative import
140
parent = self.modules[pname]
141
assert parent is caller
142
self.msgout(4, "determine_parent ->", parent)
144
if pname.count(".") < level:
145
raise ImportError("relative importpath too deep")
146
pname = ".".join(pname.split(".")[:-level])
147
parent = self.modules[pname]
148
self.msgout(4, "determine_parent ->", parent)
151
parent = self.modules[pname]
152
assert caller is parent
153
self.msgout(4, "determine_parent ->", parent)
158
parent = self.modules[pname]
159
assert parent.__name__ == pname
160
self.msgout(4, "determine_parent ->", parent)
162
self.msgout(4, "determine_parent -> None")
165
def find_head_package(self, parent, name):
166
self.msgin(4, "find_head_package", parent, name)
175
qname = "%s.%s" % (parent.__name__, head)
178
q = self.import_module(head, qname, parent)
180
self.msgout(4, "find_head_package ->", (q, tail))
185
q = self.import_module(head, qname, parent)
187
self.msgout(4, "find_head_package ->", (q, tail))
189
self.msgout(4, "raise ImportError: No module named", qname)
190
raise ImportError("No module named " + qname)
192
def load_tail(self, q, tail):
193
self.msgin(4, "load_tail", q, tail)
197
if i < 0: i = len(tail)
198
head, tail = tail[:i], tail[i+1:]
199
mname = "%s.%s" % (m.__name__, head)
200
m = self.import_module(head, mname, m)
202
self.msgout(4, "raise ImportError: No module named", mname)
203
raise ImportError("No module named " + mname)
204
self.msgout(4, "load_tail ->", m)
207
def ensure_fromlist(self, m, fromlist, recursive=0):
208
self.msg(4, "ensure_fromlist", m, fromlist, recursive)
212
all = self.find_all_submodules(m)
214
self.ensure_fromlist(m, all, 1)
215
elif not hasattr(m, sub):
216
subname = "%s.%s" % (m.__name__, sub)
217
submod = self.import_module(sub, subname, m)
219
raise ImportError("No module named " + subname)
221
def find_all_submodules(self, m):
225
# 'suffixes' used to be a list hardcoded to [".py", ".pyc", ".pyo"].
226
# But we must also collect Python extension modules - although
227
# we cannot separate normal dlls from Python extensions.
229
for triple in imp.get_suffixes():
230
suffixes.append(triple[0])
231
for dir in m.__path__:
233
names = os.listdir(dir)
235
self.msg(2, "can't list directory", dir)
239
for suff in suffixes:
241
if name[-n:] == suff:
244
if mod and mod != "__init__":
246
return modules.keys()
248
def import_module(self, partname, fqname, parent):
249
self.msgin(3, "import_module", partname, fqname, parent)
251
m = self.modules[fqname]
255
self.msgout(3, "import_module ->", m)
257
if fqname in self.badmodules:
258
self.msgout(3, "import_module -> None")
260
if parent and parent.__path__ is None:
261
self.msgout(3, "import_module -> None")
264
fp, pathname, stuff = self.find_module(partname,
265
parent and parent.__path__, parent)
267
self.msgout(3, "import_module ->", None)
270
m = self.load_module(fqname, fp, pathname, stuff)
274
setattr(parent, partname, m)
275
self.msgout(3, "import_module ->", m)
278
def load_module(self, fqname, fp, pathname, file_info):
279
suffix, mode, type = file_info
280
self.msgin(2, "load_module", fqname, fp and "fp", pathname)
281
if type == imp.PKG_DIRECTORY:
282
m = self.load_package(fqname, pathname)
283
self.msgout(2, "load_module ->", m)
285
if type == imp.PY_SOURCE:
286
co = compile(fp.read()+'\n', pathname, 'exec')
287
elif type == imp.PY_COMPILED:
288
if fp.read(4) != imp.get_magic():
289
self.msgout(2, "raise ImportError: Bad magic number", pathname)
290
raise ImportError("Bad magic number in %s" % pathname)
292
co = marshal.load(fp)
295
m = self.add_module(fqname)
296
m.__file__ = pathname
298
if self.replace_paths:
299
co = self.replace_paths_in_code(co)
301
self.scan_code(co, m)
302
self.msgout(2, "load_module ->", m)
305
def _add_badmodule(self, name, caller):
306
if name not in self.badmodules:
307
self.badmodules[name] = {}
309
self.badmodules[name][caller.__name__] = 1
311
self.badmodules[name]["-"] = 1
313
def _safe_import_hook(self, name, caller, fromlist, level=-1):
314
# wrapper for self.import_hook() that won't raise ImportError
315
if name in self.badmodules:
316
self._add_badmodule(name, caller)
319
self.import_hook(name, caller, level=level)
320
except ImportError as msg:
321
self.msg(2, "ImportError:", str(msg))
322
self._add_badmodule(name, caller)
326
if sub in self.badmodules:
327
self._add_badmodule(sub, caller)
330
self.import_hook(name, caller, [sub], level=level)
331
except ImportError as msg:
332
self.msg(2, "ImportError:", str(msg))
333
fullname = name + "." + sub
334
self._add_badmodule(fullname, caller)
336
def scan_opcodes(self, co,
337
unpack = struct.unpack):
338
# Scan the code, and yield 'interesting' opcode combinations
339
# Version for Python 2.4 and older
342
consts = co.co_consts
346
oparg, = unpack('<H', code[1:3])
347
yield "store", (names[oparg],)
350
if c == LOAD_CONST and code[3] == IMPORT_NAME:
351
oparg_1, oparg_2 = unpack('<xHxH', code[:6])
352
yield "import", (consts[oparg_1], names[oparg_2])
355
if c >= HAVE_ARGUMENT:
360
def scan_opcodes_25(self, co,
361
unpack = struct.unpack):
362
# Scan the code, and yield 'interesting' opcode combinations
363
# Python 2.5 version (has absolute and relative imports)
366
consts = co.co_consts
367
LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME
371
oparg, = unpack('<H', code[1:3])
372
yield "store", (names[oparg],)
375
if code[:9:3] == LOAD_LOAD_AND_IMPORT:
376
oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9])
377
level = consts[oparg_1]
378
if level == 0: # absolute import
379
yield "absolute_import", (consts[oparg_2], names[oparg_3])
380
else: # relative import
381
yield "relative_import", (level, consts[oparg_2], names[oparg_3])
384
if c >= HAVE_ARGUMENT:
389
def scan_code(self, co, m):
391
if sys.version_info >= (2, 5):
392
scanner = self.scan_opcodes_25
394
scanner = self.scan_opcodes
395
for what, args in scanner(co):
398
m.globalnames[name] = 1
399
elif what == "absolute_import":
400
fromlist, name = args
402
if fromlist is not None:
405
fromlist = [f for f in fromlist if f != "*"]
406
self._safe_import_hook(name, m, fromlist, level=0)
408
# We've encountered an "import *". If it is a Python module,
409
# the code has already been parsed and we can suck out the
413
# At this point we don't know whether 'name' is a
414
# submodule of 'm' or a global module. Let's just try
415
# the full name first.
416
mm = self.modules.get(m.__name__ + "." + name)
418
mm = self.modules.get(name)
420
m.globalnames.update(mm.globalnames)
421
m.starimports.update(mm.starimports)
422
if mm.__code__ is None:
423
m.starimports[name] = 1
425
m.starimports[name] = 1
426
elif what == "relative_import":
427
level, fromlist, name = args
429
self._safe_import_hook(name, m, fromlist, level=level)
431
parent = self.determine_parent(m, level=level)
432
self._safe_import_hook(parent.__name__, None, fromlist, level=0)
434
# We don't expect anything else from the generator.
435
raise RuntimeError(what)
437
for c in co.co_consts:
438
if isinstance(c, type(co)):
441
def load_package(self, fqname, pathname):
442
self.msgin(2, "load_package", fqname, pathname)
443
newname = replacePackageMap.get(fqname)
446
m = self.add_module(fqname)
447
m.__file__ = pathname
448
m.__path__ = [pathname]
450
# As per comment at top of file, simulate runtime __path__ additions.
451
m.__path__ = m.__path__ + packagePathMap.get(fqname, [])
453
fp, buf, stuff = self.find_module("__init__", m.__path__)
454
self.load_module(fqname, fp, buf, stuff)
455
self.msgout(2, "load_package ->", m)
458
def add_module(self, fqname):
459
if fqname in self.modules:
460
return self.modules[fqname]
461
self.modules[fqname] = m = Module(fqname)
464
def find_module(self, name, path, parent=None):
465
if parent is not None:
466
# assert path is not None
467
fullname = parent.__name__+'.'+name
470
if fullname in self.excludes:
471
self.msgout(3, "find_module -> Excluded", fullname)
472
raise ImportError(name)
475
if name in sys.builtin_module_names:
476
return (None, None, ("", "", imp.C_BUILTIN))
479
return imp.find_module(name, path)
482
"""Print a report to stdout, listing the found modules with their
483
paths, as well as modules that are missing, or seem to be missing.
486
print(" %-25s %s" % ("Name", "File"))
487
print(" %-25s %s" % ("----", "----"))
488
# Print modules found
489
keys = sorted(self.modules.keys())
491
m = self.modules[key]
496
print("%-25s" % key, m.__file__ or "")
498
# Print missing modules
499
missing, maybe = self.any_missing_maybe()
502
print("Missing modules:")
504
mods = sorted(self.badmodules[name].keys())
505
print("?", name, "imported from", ', '.join(mods))
506
# Print modules that may be missing, but then again, maybe not...
509
print("Submodules thay appear to be missing, but could also be", end=' ')
510
print("global names in the parent package:")
512
mods = sorted(self.badmodules[name].keys())
513
print("?", name, "imported from", ', '.join(mods))
515
def any_missing(self):
516
"""Return a list of modules that appear to be missing. Use
517
any_missing_maybe() if you want to know which modules are
518
certain to be missing, and which *may* be missing.
520
missing, maybe = self.any_missing_maybe()
521
return missing + maybe
523
def any_missing_maybe(self):
524
"""Return two lists, one with modules that are certainly missing
525
and one with modules that *may* be missing. The latter names could
526
either be submodules *or* just global names in the package.
528
The reason it can't always be determined is that it's impossible to
529
tell which names are imported when "from module import *" is done
530
with an extension module, short of actually importing it.
534
for name in self.badmodules:
535
if name in self.excludes:
543
pkg = self.modules.get(pkgname)
545
if pkgname in self.badmodules[name]:
546
# The package tried to import this module itself and
547
# failed. It's definitely missing.
549
elif subname in pkg.globalnames:
550
# It's a global in the package: definitely not missing.
552
elif pkg.starimports:
553
# It could be missing, but the package did an "import *"
554
# from a non-Python module, so we simply can't be sure.
557
# It's not a global in the package, the package didn't
558
# do funny star imports, it's very likely to be missing.
559
# The symbol could be inserted into the package from the
560
# outside, but since that's not good style we simply list
567
return missing, maybe
569
def replace_paths_in_code(self, co):
570
new_filename = original_filename = os.path.normpath(co.co_filename)
571
for f, r in self.replace_paths:
572
if original_filename.startswith(f):
573
new_filename = r + original_filename[len(f):]
576
if self.debug and original_filename not in self.processed_paths:
577
if new_filename != original_filename:
578
self.msgout(2, "co_filename %r changed to %r" \
579
% (original_filename,new_filename,))
581
self.msgout(2, "co_filename %r remains unchanged" \
582
% (original_filename,))
583
self.processed_paths.append(original_filename)
585
consts = list(co.co_consts)
586
for i in range(len(consts)):
587
if isinstance(consts[i], type(co)):
588
consts[i] = self.replace_paths_in_code(consts[i])
590
return types.CodeType(co.co_argcount, co.co_nlocals, co.co_stacksize,
591
co.co_flags, co.co_code, tuple(consts), co.co_names,
592
co.co_varnames, new_filename, co.co_name,
593
co.co_firstlineno, co.co_lnotab,
594
co.co_freevars, co.co_cellvars)
601
opts, args = getopt.getopt(sys.argv[1:], "dmp:qx:")
602
except getopt.error as msg:
617
addpath = addpath + a.split(os.pathsep)
623
# Provide default arguments
629
# Set the path based on sys.path and the script directory
631
path[0] = os.path.dirname(script)
632
path = addpath + path
636
print(" ", repr(item))
638
# Create the module finder and turn its crank
639
mf = ModuleFinder(path, debug, exclude)
646
mf.import_hook(arg[:-2], None, ["*"])
651
mf.run_script(script)
653
return mf # for -i debugging
656
if __name__ == '__main__':
659
except KeyboardInterrupt:
660
print("\n[interrupt]")