readers.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. #-----------------------------------------------------------------------------
  2. # Copyright (c) 2013-2021, PyInstaller Development Team.
  3. #
  4. # Distributed under the terms of the GNU General Public License (version 2
  5. # or later) with exception for distributing the bootloader.
  6. #
  7. # The full license is in the file COPYING.txt, distributed with this software.
  8. #
  9. # SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
  10. #-----------------------------------------------------------------------------
  11. """
  12. This CArchiveReader is used only by the archieve_viewer utility.
  13. """
  14. # TODO clean up this module
  15. import struct
  16. import os
  17. from PyInstaller.loader.pyimod02_archive import ArchiveReader
  18. class NotAnArchiveError(Exception):
  19. pass
  20. class CTOCReader(object):
  21. """
  22. A class encapsulating the table of contents of a CArchive.
  23. When written to disk, it is easily read from C.
  24. """
  25. # (structlen, dpos, dlen, ulen, flag, typcd) followed by name
  26. ENTRYSTRUCT = '!iIIIBB'
  27. ENTRYLEN = struct.calcsize(ENTRYSTRUCT)
  28. def __init__(self):
  29. self.data = []
  30. def frombinary(self, s):
  31. """
  32. Decode the binary string into an in memory list.
  33. S is a binary string.
  34. """
  35. p = 0
  36. while p < len(s):
  37. (slen, dpos, dlen, ulen, flag, typcd) = struct.unpack(self.ENTRYSTRUCT,
  38. s[p:p + self.ENTRYLEN])
  39. nmlen = slen - self.ENTRYLEN
  40. p = p + self.ENTRYLEN
  41. (nm,) = struct.unpack('%is' % nmlen, s[p:p + nmlen])
  42. p = p + nmlen
  43. # nm may have up to 15 bytes of padding
  44. nm = nm.rstrip(b'\0')
  45. nm = nm.decode('utf-8')
  46. typcd = chr(typcd)
  47. self.data.append((dpos, dlen, ulen, flag, typcd, nm))
  48. def get(self, ndx):
  49. """
  50. Return the table of contents entry (tuple) at index NDX.
  51. """
  52. return self.data[ndx]
  53. def __getitem__(self, ndx):
  54. return self.data[ndx]
  55. def find(self, name):
  56. """
  57. Return the index of the toc entry with name NAME.
  58. Return -1 for failure.
  59. """
  60. for i, nm in enumerate(self.data):
  61. if nm[-1] == name:
  62. return i
  63. return -1
  64. class CArchiveReader(ArchiveReader):
  65. """
  66. An Archive subclass that can hold arbitrary data.
  67. This class encapsulates all files that are bundled within an executable.
  68. It can contain ZlibArchive (Python .pyc files), dlls, Python C extensions
  69. and all other data files that are bundled in --onefile mode.
  70. Easily handled from C or from Python.
  71. """
  72. # MAGIC is useful to verify that conversion of Python data types
  73. # to C structure and back works properly.
  74. MAGIC = b'MEI\014\013\012\013\016'
  75. HDRLEN = 0
  76. LEVEL = 9
  77. # Cookie - holds some information for the bootloader. C struct format
  78. # definition. '!' at the beginning means network byte order.
  79. # C struct looks like:
  80. #
  81. # typedef struct _cookie {
  82. # char magic[8]; /* 'MEI\014\013\012\013\016' */
  83. # uint32_t len; /* len of entire package */
  84. # uint32_t TOC; /* pos (rel to start) of TableOfContents */
  85. # int TOClen; /* length of TableOfContents */
  86. # int pyvers; /* new in v4 */
  87. # char pylibname[64]; /* Filename of Python dynamic library. */
  88. # } COOKIE;
  89. #
  90. _cookie_format = '!8sIIii64s'
  91. _cookie_size = struct.calcsize(_cookie_format)
  92. def __init__(self, archive_path=None, start=0, length=0, pylib_name=''):
  93. """
  94. Constructor.
  95. archive_path path name of file (create empty CArchive if path is None).
  96. start is the seekposition within PATH.
  97. len is the length of the CArchive (if 0, then read till EOF).
  98. pylib_name name of Python DLL which bootloader will use.
  99. """
  100. self.length = length
  101. self._pylib_name = pylib_name
  102. # A CArchive created from scratch starts at 0, no leading bootloader.
  103. self.pkg_start = 0
  104. super(CArchiveReader, self).__init__(archive_path, start)
  105. def checkmagic(self):
  106. """
  107. Verify that self is a valid CArchive.
  108. Magic signature is at end of the archive.
  109. This fuction is used by ArchiveViewer.py utility.
  110. """
  111. # Magic is at EOF; if we're embedded, we need to figure where that is.
  112. if self.length:
  113. self.lib.seek(self.start + self.length, 0)
  114. else:
  115. self.lib.seek(0, os.SEEK_END)
  116. end_pos = self.lib.tell()
  117. SEARCH_CHUNK_SIZE = 8192
  118. magic_offset = -1
  119. while end_pos >= len(self.MAGIC):
  120. start_pos = max(end_pos - SEARCH_CHUNK_SIZE, 0)
  121. chunk_size = end_pos - start_pos
  122. # Is the remaining chunk large enough to hold the pattern?
  123. if chunk_size < len(self.MAGIC):
  124. break
  125. # Read and scan the chunk
  126. self.lib.seek(start_pos, os.SEEK_SET)
  127. buf = self.lib.read(chunk_size)
  128. pos = buf.rfind(self.MAGIC)
  129. if pos != -1:
  130. magic_offset = start_pos + pos
  131. break
  132. # Adjust search location for next chunk; ensure proper
  133. # overlap
  134. end_pos = start_pos + len(self.MAGIC) - 1
  135. if magic_offset == -1:
  136. raise RuntimeError("%s is not a valid %s archive file" %
  137. (self.path, self.__class__.__name__))
  138. filelen = magic_offset + self._cookie_size
  139. # Read the whole cookie
  140. self.lib.seek(magic_offset, os.SEEK_SET)
  141. buf = self.lib.read(self._cookie_size)
  142. (magic, totallen, tocpos, toclen, pyvers, pylib_name) = struct.unpack(
  143. self._cookie_format, buf)
  144. if magic != self.MAGIC:
  145. raise RuntimeError("%s is not a valid %s archive file" %
  146. (self.path, self.__class__.__name__))
  147. self.pkg_start = filelen - totallen
  148. if self.length:
  149. if totallen != self.length or self.pkg_start != self.start:
  150. raise RuntimeError('Problem with embedded archive in %s' %
  151. self.path)
  152. # Verify presence of Python library name.
  153. if not pylib_name:
  154. raise RuntimeError('Python library filename not defined in archive.')
  155. self.tocpos, self.toclen = tocpos, toclen
  156. def loadtoc(self):
  157. """
  158. Load the table of contents into memory.
  159. """
  160. self.toc = CTOCReader()
  161. self.lib.seek(self.pkg_start + self.tocpos)
  162. tocstr = self.lib.read(self.toclen)
  163. self.toc.frombinary(tocstr)
  164. def extract(self, name):
  165. """
  166. Get the contents of an entry.
  167. NAME is an entry name OR the index to the TOC.
  168. Return the tuple (ispkg, contents).
  169. For non-Python resoures, ispkg is meaningless (and 0).
  170. Used by the import mechanism.
  171. """
  172. if isinstance(name, str):
  173. ndx = self.toc.find(name)
  174. if ndx == -1:
  175. return None
  176. else:
  177. ndx = name
  178. (dpos, dlen, ulen, flag, typcd, nm) = self.toc.get(ndx)
  179. with self.lib:
  180. self.lib.seek(self.pkg_start + dpos)
  181. rslt = self.lib.read(dlen)
  182. if flag == 1:
  183. import zlib
  184. rslt = zlib.decompress(rslt)
  185. if typcd == 'M':
  186. return (1, rslt)
  187. return (typcd == 'M', rslt)
  188. def contents(self):
  189. """
  190. Return the names of the entries.
  191. """
  192. rslt = []
  193. for (dpos, dlen, ulen, flag, typcd, nm) in self.toc:
  194. rslt.append(nm)
  195. return rslt
  196. def openEmbedded(self, name):
  197. """
  198. Open a CArchive of name NAME embedded within this CArchive.
  199. This fuction is used by ArchiveViewer.py utility.
  200. """
  201. ndx = self.toc.find(name)
  202. if ndx == -1:
  203. raise KeyError("Member '%s' not found in %s" % (name, self.path))
  204. (dpos, dlen, ulen, flag, typcd, nm) = self.toc.get(ndx)
  205. if typcd not in "zZ":
  206. raise NotAnArchiveError('%s is not an archive' % name)
  207. if flag:
  208. raise ValueError('Cannot open compressed archive %s in place' %
  209. name)
  210. return CArchiveReader(self.path, self.pkg_start + dpos, dlen)