find_modules.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. """
  2. modulegraph.find_modules - High-level module dependency finding interface
  3. =========================================================================
  4. History
  5. ........
  6. Originally (loosely) based on code in py2exe's build_exe.py by Thomas Heller.
  7. """
  8. import sys
  9. import os
  10. import imp
  11. import warnings
  12. import pkgutil
  13. from . import modulegraph
  14. from .modulegraph import Alias, Script, Extension
  15. from .util import imp_find_module
  16. __all__ = [
  17. 'find_modules', 'parse_mf_results'
  18. ]
  19. _PLATFORM_MODULES = {'posix', 'nt', 'os2', 'mac', 'ce', 'riscos'}
  20. def get_implies():
  21. result = {
  22. # imports done from builtin modules in C code
  23. # (untrackable by modulegraph)
  24. "_curses": ["curses"],
  25. "posix": ["resource"],
  26. "gc": ["time"],
  27. "time": ["_strptime"],
  28. "datetime": ["time"],
  29. "MacOS": ["macresource"],
  30. "cPickle": ["copy_reg", "cStringIO"],
  31. "parser": ["copy_reg"],
  32. "codecs": ["encodings"],
  33. "cStringIO": ["copy_reg"],
  34. "_sre": ["copy", "string", "sre"],
  35. "zipimport": ["zlib"],
  36. # Python 3.2:
  37. "_datetime": ["time", "_strptime"],
  38. "_json": ["json.decoder"],
  39. "_pickle": ["codecs", "copyreg", "_compat_pickle"],
  40. "_posixsubprocess": ["gc"],
  41. "_ssl": ["socket"],
  42. # Python 3.3:
  43. "_elementtree": ["copy", "xml.etree.ElementPath"],
  44. # mactoolboxglue can do a bunch more of these
  45. # that are far harder to predict, these should be tracked
  46. # manually for now.
  47. # this isn't C, but it uses __import__
  48. "anydbm": ["dbhash", "gdbm", "dbm", "dumbdbm", "whichdb"],
  49. # package aliases
  50. "wxPython.wx": Alias('wx'),
  51. }
  52. if sys.version_info[0] == 3:
  53. result["_sre"] = ["copy", "re"]
  54. result["parser"] = ["copyreg"]
  55. # _frozen_importlib is part of the interpreter itself
  56. result["_frozen_importlib"] = None
  57. if sys.version_info[0] == 2 and sys.version_info[1] >= 5:
  58. result.update({
  59. "email.base64MIME": Alias("email.base64mime"),
  60. "email.Charset": Alias("email.charset"),
  61. "email.Encoders": Alias("email.encoders"),
  62. "email.Errors": Alias("email.errors"),
  63. "email.Feedparser": Alias("email.feedParser"),
  64. "email.Generator": Alias("email.generator"),
  65. "email.Header": Alias("email.header"),
  66. "email.Iterators": Alias("email.iterators"),
  67. "email.Message": Alias("email.message"),
  68. "email.Parser": Alias("email.parser"),
  69. "email.quopriMIME": Alias("email.quoprimime"),
  70. "email.Utils": Alias("email.utils"),
  71. "email.MIMEAudio": Alias("email.mime.audio"),
  72. "email.MIMEBase": Alias("email.mime.base"),
  73. "email.MIMEImage": Alias("email.mime.image"),
  74. "email.MIMEMessage": Alias("email.mime.message"),
  75. "email.MIMEMultipart": Alias("email.mime.multipart"),
  76. "email.MIMENonMultipart": Alias("email.mime.nonmultipart"),
  77. "email.MIMEText": Alias("email.mime.text"),
  78. })
  79. if sys.version_info[:2] >= (2, 5):
  80. result["_elementtree"] = ["pyexpat"]
  81. import xml.etree
  82. for _, module_name, is_package in pkgutil.iter_modules(xml.etree.__path__):
  83. if not is_package:
  84. result["_elementtree"].append("xml.etree.%s" % (module_name,))
  85. if sys.version_info[:2] >= (2, 6):
  86. result['future_builtins'] = ['itertools']
  87. # os.path is an alias for a platform specific submodule,
  88. # ensure that the graph shows this.
  89. result['os.path'] = Alias(os.path.__name__)
  90. return result
  91. def parse_mf_results(mf):
  92. """
  93. Return two lists: the first one contains the python files in the graph,
  94. the second the C extensions.
  95. :param mf: a :class:`modulegraph.modulegraph.ModuleGraph` instance
  96. """
  97. # Retrieve modules from modulegraph
  98. py_files = []
  99. extensions = []
  100. for item in mf.iter_graph():
  101. # There may be __main__ modules (from mf.run_script), but
  102. # we don't need it in the zipfile we build.
  103. if item.identifier == "__main__":
  104. continue
  105. src = item.filename
  106. if src and src != '-':
  107. if isinstance(item, Script):
  108. # Scripts are python files
  109. py_files.append(item)
  110. elif isinstance(item, Extension):
  111. extensions.append(item)
  112. else:
  113. py_files.append(item)
  114. # sort on the file names, the output is nicer to read
  115. py_files.sort(key=lambda v: v.filename)
  116. extensions.sort(key=lambda v: v.filename)
  117. return py_files, extensions
  118. def plat_prepare(includes, packages, excludes):
  119. # used by Python itself
  120. includes.update(["warnings", "unicodedata", "weakref"])
  121. if not sys.platform.startswith('irix'):
  122. excludes.update([
  123. 'AL',
  124. 'sgi',
  125. 'vms_lib',
  126. ])
  127. if sys.platform not in ('mac', 'darwin'):
  128. # XXX - this doesn't look nearly complete
  129. excludes.update([
  130. 'Audio_mac',
  131. 'Carbon.File',
  132. 'Carbon.Folder',
  133. 'Carbon.Folders',
  134. 'EasyDialogs',
  135. 'MacOS',
  136. 'macfs',
  137. 'macostools',
  138. '_scproxy',
  139. ])
  140. if not sys.platform == 'win32':
  141. # only win32
  142. excludes.update([
  143. 'nturl2path',
  144. 'win32api',
  145. 'win32con',
  146. 'win32ctypes',
  147. 'win32event',
  148. 'win32evtlogutil',
  149. 'win32evtlog',
  150. 'win32file',
  151. 'win32gui',
  152. 'win32pipe',
  153. 'win32process',
  154. 'win32security',
  155. 'pywintypes',
  156. 'winsound',
  157. 'win32',
  158. '_winreg',
  159. '_winapi',
  160. 'msvcrt',
  161. 'winreg',
  162. '_subprocess',
  163. ])
  164. if not sys.platform == 'riscos':
  165. excludes.update([
  166. 'riscosenviron',
  167. 'rourl2path',
  168. ])
  169. if not sys.platform == 'dos' or sys.platform.startswith('ms-dos'):
  170. excludes.update([
  171. 'dos',
  172. ])
  173. if not sys.platform == 'os2emx':
  174. excludes.update([
  175. '_emx_link',
  176. ])
  177. excludes.update(_PLATFORM_MODULES - set(sys.builtin_module_names))
  178. # Carbon.Res depends on this, but the module hasn't been present
  179. # for a while...
  180. excludes.add('OverrideFrom23')
  181. excludes.add('OverrideFrom23._Res')
  182. # import trickery in the dummy_threading module (stdlib)
  183. excludes.add('_dummy_threading')
  184. try:
  185. imp_find_module('poll')
  186. except ImportError:
  187. excludes.update([
  188. 'poll',
  189. ])
  190. def find_needed_modules(
  191. mf=None, scripts=(), includes=(), packages=(), warn=warnings.warn):
  192. if mf is None:
  193. mf = modulegraph.ModuleGraph()
  194. # feed Modulefinder with everything, and return it.
  195. for path in scripts:
  196. mf.add_script(path)
  197. for mod in includes:
  198. try:
  199. if mod[-2:] == '.*':
  200. mf.import_hook(mod[:-2], None, ['*'])
  201. else:
  202. mf.import_hook(mod)
  203. except ImportError:
  204. warn("No module named %s" % (mod,))
  205. for f in packages:
  206. # If modulegraph has seen a reference to the package, then
  207. # we prefer to believe that (imp_find_module doesn't seem to locate
  208. # sub-packages)
  209. m = mf.find_node(f)
  210. if m is not None:
  211. path = m.packagepath[0]
  212. else:
  213. # Find path of package
  214. # TODO: use imp_find_module_or_importer
  215. try:
  216. path = imp_find_module(f, mf.path)[1]
  217. except ImportError:
  218. warn("No package named %s" % f)
  219. continue
  220. # walk the path to find subdirs containing __init__.py files
  221. # scan the results (directory of __init__.py files)
  222. # first trim the path (of the head package),
  223. # then convert directory name in package name,
  224. # finally push into modulegraph.
  225. # FIXME:
  226. # 1) Needs to be adjusted for namespace packages in python 3.3
  227. # 2) Code is fairly dodgy and needs better tests
  228. for (dirpath, dirnames, filenames) in os.walk(path):
  229. if '__init__.py' in filenames and dirpath.startswith(path):
  230. package = f + '.' + dirpath[len(path)+1:].replace(os.sep, '.')
  231. if package.endswith('.'):
  232. package = package[:-1]
  233. m = mf.import_hook(package, None, ["*"])
  234. else:
  235. # Exclude subtrees that aren't packages
  236. dirnames[:] = []
  237. return mf
  238. #
  239. # resource constants
  240. #
  241. PY_SUFFIXES = ['.py', '.pyw', '.pyo', '.pyc']
  242. C_SUFFIXES = [
  243. _triple[0] for _triple in imp.get_suffixes()
  244. if _triple[2] == imp.C_EXTENSION
  245. ]
  246. #
  247. # side-effects
  248. #
  249. def _replacePackages():
  250. REPLACEPACKAGES = {
  251. '_xmlplus': 'xml',
  252. }
  253. for k, v in REPLACEPACKAGES.items():
  254. modulegraph.replacePackage(k, v)
  255. _replacePackages()
  256. def find_modules(
  257. scripts=(), includes=(), packages=(), excludes=(), path=None, debug=0):
  258. """
  259. High-level interface, takes iterables for:
  260. scripts, includes, packages, excludes
  261. And returns a :class:`modulegraph.modulegraph.ModuleGraph` instance,
  262. python_files, and extensions
  263. python_files is a list of pure python dependencies as modulegraph.Module
  264. objects, extensions is a list of platform-specific C extension dependencies
  265. as modulegraph.Module objects
  266. """
  267. scripts = set(scripts)
  268. includes = set(includes)
  269. packages = set(packages)
  270. excludes = set(excludes)
  271. plat_prepare(includes, packages, excludes)
  272. mf = modulegraph.ModuleGraph(
  273. path=path,
  274. excludes=(excludes - includes),
  275. implies=get_implies(),
  276. debug=debug,
  277. )
  278. find_needed_modules(mf, scripts, includes, packages)
  279. return mf