pyimod02_archive.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. #-----------------------------------------------------------------------------
  2. # Copyright (c) 2005-2021, PyInstaller Development Team.
  3. #
  4. # Distributed under the terms of the GNU General Public License (version 2
  5. # or later) with exception for distributing the bootloader.
  6. #
  7. # The full license is in the file COPYING.txt, distributed with this software.
  8. #
  9. # SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
  10. #-----------------------------------------------------------------------------
  11. # TODO clean up this module
  12. # Subclasses may not need marshal or struct, but since they are builtin, importing is safe.
  13. #
  14. # While an Archive is really an abstraction for any "filesystem within a file", it is tuned for use with the
  15. # imputil.FuncImporter. This assumes it contains python code objects, indexed by the the internal name (i.e.,
  16. # without '.py' suffix).
  17. # See pyi_carchive.py for a more general archive (contains anything) that can be understood by a C program.
  18. # **NOTE** This module is used during bootstrap.
  19. # Import *ONLY* builtin modules.
  20. import _thread as thread
  21. import marshal
  22. import struct
  23. import sys
  24. import zlib
  25. # For decrypting Python modules.
  26. CRYPT_BLOCK_SIZE = 16
  27. # content types for PYZ
  28. PYZ_TYPE_MODULE = 0
  29. PYZ_TYPE_PKG = 1
  30. PYZ_TYPE_DATA = 2
  31. PYZ_TYPE_NSPKG = 3 # PEP-420 namespace package
  32. class FilePos(object):
  33. """
  34. This class keeps track of the file object representing and current position in a file.
  35. """
  36. def __init__(self):
  37. # The file object representing this file.
  38. self.file = None
  39. # The position in the file when it was last closed.
  40. self.pos = 0
  41. class ArchiveFile(object):
  42. """
  43. File class support auto open when access member from file object This class is use to avoid file locking on windows.
  44. """
  45. def __init__(self, *args, **kwargs):
  46. self.args = args
  47. self.kwargs = kwargs
  48. self._filePos = {}
  49. def local(self):
  50. """
  51. Return an instance of FilePos for the current thread. This is a crude # re-implementation of threading.local,
  52. which isn't a built-in module # and therefore isn't available.
  53. """
  54. ti = thread.get_ident()
  55. if ti not in self._filePos:
  56. self._filePos[ti] = FilePos()
  57. return self._filePos[ti]
  58. def __getattr__(self, name):
  59. """
  60. Make this class act like a file, by invoking most methods on its underlying file object.
  61. """
  62. file = self.local().file
  63. assert file
  64. return getattr(file, name)
  65. def __enter__(self):
  66. """
  67. Open file and seek to pos record from last close.
  68. """
  69. # The file shouldn't be open yet.
  70. fp = self.local()
  71. assert not fp.file
  72. # Open the file and seek to the last position.
  73. fp.file = open(*self.args, **self.kwargs)
  74. fp.file.seek(fp.pos)
  75. def __exit__(self, type, value, traceback):
  76. """
  77. Close file and record pos.
  78. """
  79. # The file should still be open.
  80. fp = self.local()
  81. assert fp.file
  82. # Close the file and record its position.
  83. fp.pos = fp.file.tell()
  84. fp.file.close()
  85. fp.file = None
  86. class ArchiveReadError(RuntimeError):
  87. pass
  88. class ArchiveReader(object):
  89. """
  90. A base class for a repository of python code objects. The extract method is used by imputil.ArchiveImporter to
  91. get code objects by name (fully qualified name), so an end-user "import a.b" becomes:
  92. extract('a.__init__')
  93. extract('a.b')
  94. """
  95. MAGIC = b'PYL\0'
  96. HDRLEN = 12 # default is MAGIC followed by python's magic, int pos of toc
  97. TOCPOS = 8
  98. os = None
  99. _bincache = None
  100. def __init__(self, path=None, start=0):
  101. """
  102. Initialize an Archive. If path is omitted, it will be an empty Archive.
  103. """
  104. self.toc = None
  105. self.path = path
  106. self.start = start
  107. # In Python3, the MAGIC_NUMBER value is available in the importlib module. However, in the bootstrap phase
  108. # we cannot use importlib directly, but rather its frozen variant.
  109. import _frozen_importlib
  110. self.pymagic = _frozen_importlib._bootstrap_external.MAGIC_NUMBER
  111. if path is not None:
  112. self.lib = ArchiveFile(self.path, 'rb')
  113. with self.lib:
  114. self.checkmagic()
  115. self.loadtoc()
  116. def loadtoc(self):
  117. """
  118. Overridable. Default: After magic comes an int (4 byte native) giving the position of the TOC within
  119. self.lib. Default: The TOC is a marshal-able string.
  120. """
  121. self.lib.seek(self.start + self.TOCPOS)
  122. (offset,) = struct.unpack('!i', self.lib.read(4))
  123. self.lib.seek(self.start + offset)
  124. # Use marshal.loads() since load() arg must be a file object. Convert the loaded list into a dict for
  125. # faster access.
  126. self.toc = dict(marshal.loads(self.lib.read()))
  127. #------ This is what is called by FuncImporter ------
  128. def is_package(self, name):
  129. ispkg, pos = self.toc.get(name, (0, None))
  130. if pos is None:
  131. return None
  132. return bool(ispkg)
  133. #------ Core method - Override as needed ------
  134. def extract(self, name):
  135. """
  136. Get the object corresponding to name, or None. For use with imputil ArchiveImporter, object is a python code
  137. object. 'name' is the name as specified in an 'import name'. 'import a.b' becomes:
  138. extract('a') (return None because 'a' is not a code object)
  139. extract('a.__init__') (return a code object)
  140. extract('a.b') (return a code object)
  141. Default implementation:
  142. self.toc is a dict
  143. self.toc[name] is pos
  144. self.lib has the code object marshal-ed at pos
  145. """
  146. ispkg, pos = self.toc.get(name, (0, None))
  147. if pos is None:
  148. return None
  149. with self.lib:
  150. self.lib.seek(self.start + pos)
  151. # Use marshal.loads() since load() arg must be a file object.
  152. obj = marshal.loads(self.lib.read())
  153. return ispkg, obj
  154. #------ Informational methods ------
  155. def contents(self):
  156. """
  157. Return a list of the contents Default implementation assumes self.toc is a dict like object. Not required by
  158. ArchiveImporter.
  159. """
  160. return list(self.toc.keys())
  161. def checkmagic(self):
  162. """
  163. Overridable. Check to see if the file object self.lib actually has a file we understand.
  164. """
  165. self.lib.seek(self.start) # default - magic is at the start of file
  166. if self.lib.read(len(self.MAGIC)) != self.MAGIC:
  167. raise ArchiveReadError("%s is not a valid %s archive file" % (self.path, self.__class__.__name__))
  168. if self.lib.read(len(self.pymagic)) != self.pymagic:
  169. raise ArchiveReadError("%s has version mismatch to dll" % self.path)
  170. self.lib.read(4)
  171. class Cipher(object):
  172. """
  173. This class is used only to decrypt Python modules.
  174. """
  175. def __init__(self):
  176. # At build-time the key is given to us from inside the spec file. At bootstrap-time, we must look for it
  177. # ourselves, by trying to import the generated 'pyi_crypto_key' module.
  178. import pyimod00_crypto_key
  179. key = pyimod00_crypto_key.key
  180. assert type(key) is str
  181. if len(key) > CRYPT_BLOCK_SIZE:
  182. self.key = key[0:CRYPT_BLOCK_SIZE]
  183. else:
  184. self.key = key.zfill(CRYPT_BLOCK_SIZE)
  185. assert len(self.key) == CRYPT_BLOCK_SIZE
  186. import tinyaes
  187. self._aesmod = tinyaes
  188. # Issue #1663: Remove the AES module from sys.modules list. Otherwise it interferes with using 'tinyaes' module
  189. # in users' code.
  190. del sys.modules['tinyaes']
  191. def __create_cipher(self, iv):
  192. # The 'AES' class is stateful, and this factory method is used to re-initialize the block cipher class with
  193. # each call to xcrypt().
  194. return self._aesmod.AES(self.key.encode(), iv)
  195. def decrypt(self, data):
  196. cipher = self.__create_cipher(data[:CRYPT_BLOCK_SIZE])
  197. return cipher.CTR_xcrypt_buffer(data[CRYPT_BLOCK_SIZE:])
  198. class ZlibArchiveReader(ArchiveReader):
  199. """
  200. ZlibArchive - an archive with compressed entries. Archive is read from the executable created by PyInstaller.
  201. This archive is used for bundling python modules inside the executable.
  202. NOTE: The whole ZlibArchive (PYZ) is compressed, so it is not necessary to compress individual modules.
  203. """
  204. MAGIC = b'PYZ\0'
  205. TOCPOS = 8
  206. HDRLEN = ArchiveReader.HDRLEN + 5
  207. def __init__(self, path=None, offset=None):
  208. if path is None:
  209. offset = 0
  210. elif offset is None:
  211. for i in range(len(path) - 1, -1, -1):
  212. if path[i] == '?':
  213. try:
  214. offset = int(path[i + 1:])
  215. except ValueError:
  216. # Just ignore any spurious "?" in the path (like in Windows UNC \\?\<path>).
  217. continue
  218. path = path[:i]
  219. break
  220. else:
  221. offset = 0
  222. super().__init__(path, offset)
  223. # Try to import the key module. Its lack of availability indicates that the encryption is disabled.
  224. try:
  225. import pyimod00_crypto_key # noqa: F401
  226. self.cipher = Cipher()
  227. except ImportError:
  228. self.cipher = None
  229. def is_package(self, name):
  230. (typ, pos, length) = self.toc.get(name, (0, None, 0))
  231. if pos is None:
  232. return None
  233. return typ in (PYZ_TYPE_PKG, PYZ_TYPE_NSPKG)
  234. def is_pep420_namespace_package(self, name):
  235. (typ, pos, length) = self.toc.get(name, (0, None, 0))
  236. if pos is None:
  237. return None
  238. return typ == PYZ_TYPE_NSPKG
  239. def extract(self, name):
  240. (typ, pos, length) = self.toc.get(name, (0, None, 0))
  241. if pos is None:
  242. return None
  243. with self.lib:
  244. self.lib.seek(self.start + pos)
  245. obj = self.lib.read(length)
  246. try:
  247. if self.cipher:
  248. obj = self.cipher.decrypt(obj)
  249. obj = zlib.decompress(obj)
  250. if typ in (PYZ_TYPE_MODULE, PYZ_TYPE_PKG, PYZ_TYPE_NSPKG):
  251. obj = marshal.loads(obj)
  252. except EOFError as e:
  253. raise ImportError("PYZ entry '%s' failed to unmarshal" % name) from e
  254. return typ, obj