pyimod02_archive.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. #-----------------------------------------------------------------------------
  2. # Copyright (c) 2005-2021, PyInstaller Development Team.
  3. #
  4. # Distributed under the terms of the GNU General Public License (version 2
  5. # or later) with exception for distributing the bootloader.
  6. #
  7. # The full license is in the file COPYING.txt, distributed with this software.
  8. #
  9. # SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
  10. #-----------------------------------------------------------------------------
  11. # TODO clean up this module
  12. # Subclasses may not need marshal or struct, but since they're
  13. # builtin, importing is safe.
  14. #
  15. # While an Archive is really an abstraction for any "filesystem
  16. # within a file", it is tuned for use with imputil.FuncImporter.
  17. # This assumes it contains python code objects, indexed by the
  18. # the internal name (ie, no '.py').
  19. # See pyi_carchive.py for a more general archive (contains anything)
  20. # that can be understood by a C program.
  21. ### **NOTE** This module is used during bootstrap.
  22. ### Import *ONLY* builtin modules.
  23. import marshal
  24. import struct
  25. import sys
  26. import zlib
  27. import _thread as thread
  28. # For decrypting Python modules.
  29. CRYPT_BLOCK_SIZE = 16
  30. # content types for PYZ
  31. PYZ_TYPE_MODULE = 0
  32. PYZ_TYPE_PKG = 1
  33. PYZ_TYPE_DATA = 2
  34. PYZ_TYPE_NSPKG = 3 # PEP-420 namespace package
  35. class FilePos(object):
  36. """
  37. This class keeps track of the file object representing and current position
  38. in a file.
  39. """
  40. def __init__(self):
  41. # The file object representing this file.
  42. self.file = None
  43. # The position in the file when it was last closed.
  44. self.pos = 0
  45. class ArchiveFile(object):
  46. """
  47. File class support auto open when access member from file object
  48. This class is use to avoid file locking on windows
  49. """
  50. def __init__(self, *args, **kwargs):
  51. self.args = args
  52. self.kwargs = kwargs
  53. self._filePos = {}
  54. def local(self):
  55. """
  56. Return an instance of FilePos for the current thread. This is a crude
  57. # re-implementation of threading.local, which isn't a built-in module
  58. # and therefore isn't available.
  59. """
  60. ti = thread.get_ident()
  61. if ti not in self._filePos:
  62. self._filePos[ti] = FilePos()
  63. return self._filePos[ti]
  64. def __getattr__(self, name):
  65. """
  66. Make this class act like a file, by invoking most methods on its
  67. underlying file object.
  68. """
  69. file = self.local().file
  70. assert file
  71. return getattr(file, name)
  72. def __enter__(self):
  73. """
  74. Open file and seek to pos record from last close.
  75. """
  76. # The file shouldn't be open yet.
  77. fp = self.local()
  78. assert not fp.file
  79. # Open the file and seek to the last position.
  80. fp.file = open(*self.args, **self.kwargs)
  81. fp.file.seek(fp.pos)
  82. def __exit__(self, type, value, traceback):
  83. """
  84. Close file and record pos.
  85. """
  86. # The file should still be open.
  87. fp = self.local()
  88. assert fp.file
  89. # Close the file and record its position.
  90. fp.pos = fp.file.tell()
  91. fp.file.close()
  92. fp.file = None
  93. class ArchiveReadError(RuntimeError):
  94. pass
  95. class ArchiveReader(object):
  96. """
  97. A base class for a repository of python code objects.
  98. The extract method is used by imputil.ArchiveImporter
  99. to get code objects by name (fully qualified name), so
  100. an enduser "import a.b" would become
  101. extract('a.__init__')
  102. extract('a.b')
  103. """
  104. MAGIC = b'PYL\0'
  105. HDRLEN = 12 # default is MAGIC followed by python's magic, int pos of toc
  106. TOCPOS = 8
  107. os = None
  108. _bincache = None
  109. def __init__(self, path=None, start=0):
  110. """
  111. Initialize an Archive. If path is omitted, it will be an empty Archive.
  112. """
  113. self.toc = None
  114. self.path = path
  115. self.start = start
  116. # In Python 3 module 'imp' is no longer built-in and we cannot use it.
  117. # There is for Python 3 another way how to obtain magic value.
  118. # We cannot use at this bootstrap stage importlib directly
  119. # but its frozen variant.
  120. import _frozen_importlib
  121. self.pymagic = _frozen_importlib._bootstrap_external.MAGIC_NUMBER
  122. if path is not None:
  123. self.lib = ArchiveFile(self.path, 'rb')
  124. with self.lib:
  125. self.checkmagic()
  126. self.loadtoc()
  127. def loadtoc(self):
  128. """
  129. Overridable.
  130. Default: After magic comes an int (4 byte native) giving the
  131. position of the TOC within self.lib.
  132. Default: The TOC is a marshal-able string.
  133. """
  134. self.lib.seek(self.start + self.TOCPOS)
  135. (offset,) = struct.unpack('!i', self.lib.read(4))
  136. self.lib.seek(self.start + offset)
  137. # Use marshal.loads() since load() arg must be a file object
  138. # Convert the read list into a dict for faster access
  139. self.toc = dict(marshal.loads(self.lib.read()))
  140. ######## This is what is called by FuncImporter #######
  141. ## Since an Archive is flat, we ignore parent and modname.
  142. #XXX obsolete - imputil only code
  143. ## def get_code(self, parent, modname, fqname):
  144. ## pass
  145. def is_package(self, name):
  146. ispkg, pos = self.toc.get(name, (0, None))
  147. if pos is None:
  148. return None
  149. return bool(ispkg)
  150. ####### Core method - Override as needed #########
  151. def extract(self, name):
  152. """
  153. Get the object corresponding to name, or None.
  154. For use with imputil ArchiveImporter, object is a python code object.
  155. 'name' is the name as specified in an 'import name'.
  156. 'import a.b' will become:
  157. extract('a') (return None because 'a' is not a code object)
  158. extract('a.__init__') (return a code object)
  159. extract('a.b') (return a code object)
  160. Default implementation:
  161. self.toc is a dict
  162. self.toc[name] is pos
  163. self.lib has the code object marshal-ed at pos
  164. """
  165. ispkg, pos = self.toc.get(name, (0, None))
  166. if pos is None:
  167. return None
  168. with self.lib:
  169. self.lib.seek(self.start + pos)
  170. # use marshal.loads() sind load() arg must be a file object
  171. obj = marshal.loads(self.lib.read())
  172. return ispkg, obj
  173. ########################################################################
  174. # Informational methods
  175. def contents(self):
  176. """
  177. Return a list of the contents
  178. Default implementation assumes self.toc is a dict like object.
  179. Not required by ArchiveImporter.
  180. """
  181. return list(self.toc.keys())
  182. def checkmagic(self):
  183. """
  184. Overridable.
  185. Check to see if the file object self.lib actually has a file
  186. we understand.
  187. """
  188. self.lib.seek(self.start) # default - magic is at start of file
  189. if self.lib.read(len(self.MAGIC)) != self.MAGIC:
  190. raise ArchiveReadError("%s is not a valid %s archive file"
  191. % (self.path, self.__class__.__name__))
  192. if self.lib.read(len(self.pymagic)) != self.pymagic:
  193. raise ArchiveReadError("%s has version mismatch to dll" %
  194. (self.path))
  195. self.lib.read(4)
  196. class Cipher(object):
  197. """
  198. This class is used only to decrypt Python modules.
  199. """
  200. def __init__(self):
  201. # At build-type the key is given to us from inside the spec file, at
  202. # bootstrap-time, we must look for it ourselves by trying to import
  203. # the generated 'pyi_crypto_key' module.
  204. import pyimod00_crypto_key
  205. key = pyimod00_crypto_key.key
  206. assert type(key) is str
  207. if len(key) > CRYPT_BLOCK_SIZE:
  208. self.key = key[0:CRYPT_BLOCK_SIZE]
  209. else:
  210. self.key = key.zfill(CRYPT_BLOCK_SIZE)
  211. assert len(self.key) == CRYPT_BLOCK_SIZE
  212. import tinyaes
  213. self._aesmod = tinyaes
  214. # Issue #1663: Remove the AES module from sys.modules list. Otherwise
  215. # it interferes with using 'tinyaes' module in users' code.
  216. del sys.modules['tinyaes']
  217. def __create_cipher(self, iv):
  218. # The 'AES' class is stateful, this factory method is used to
  219. # re-initialize the block cipher class with each call to xcrypt().
  220. return self._aesmod.AES(self.key.encode(), iv)
  221. def decrypt(self, data):
  222. cipher = self.__create_cipher(data[:CRYPT_BLOCK_SIZE])
  223. return cipher.CTR_xcrypt_buffer(data[CRYPT_BLOCK_SIZE:])
  224. class ZlibArchiveReader(ArchiveReader):
  225. """
  226. ZlibArchive - an archive with compressed entries. Archive is read
  227. from the executable created by PyInstaller.
  228. This archive is used for bundling python modules inside the executable.
  229. NOTE: The whole ZlibArchive (PYZ) is compressed so it is not necessary
  230. to compress single modules with zlib.
  231. """
  232. MAGIC = b'PYZ\0'
  233. TOCPOS = 8
  234. HDRLEN = ArchiveReader.HDRLEN + 5
  235. def __init__(self, path=None, offset=None):
  236. if path is None:
  237. offset = 0
  238. elif offset is None:
  239. for i in range(len(path) - 1, - 1, - 1):
  240. if path[i] == '?':
  241. try:
  242. offset = int(path[i + 1:])
  243. except ValueError:
  244. # Just ignore any spurious "?" in the path
  245. # (like in Windows UNC \\?\<path>).
  246. continue
  247. path = path[:i]
  248. break
  249. else:
  250. offset = 0
  251. super(ZlibArchiveReader, self).__init__(path, offset)
  252. # Try to import the key module. If the key module is not available
  253. # then it means that encryption is disabled.
  254. try:
  255. import pyimod00_crypto_key
  256. self.cipher = Cipher()
  257. except ImportError:
  258. self.cipher = None
  259. def is_package(self, name):
  260. (typ, pos, length) = self.toc.get(name, (0, None, 0))
  261. if pos is None:
  262. return None
  263. return typ in (PYZ_TYPE_PKG, PYZ_TYPE_NSPKG)
  264. def is_pep420_namespace_package(self, name):
  265. (typ, pos, length) = self.toc.get(name, (0, None, 0))
  266. if pos is None:
  267. return None
  268. return typ == PYZ_TYPE_NSPKG
  269. def extract(self, name):
  270. (typ, pos, length) = self.toc.get(name, (0, None, 0))
  271. if pos is None:
  272. return None
  273. with self.lib:
  274. self.lib.seek(self.start + pos)
  275. obj = self.lib.read(length)
  276. try:
  277. if self.cipher:
  278. obj = self.cipher.decrypt(obj)
  279. obj = zlib.decompress(obj)
  280. if typ in (PYZ_TYPE_MODULE, PYZ_TYPE_PKG, PYZ_TYPE_NSPKG):
  281. obj = marshal.loads(obj)
  282. except EOFError as e:
  283. raise ImportError("PYZ entry '%s' failed to unmarshal" %
  284. name) from e
  285. return typ, obj