pdfutils.py 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. #Copyright ReportLab Europe Ltd. 2000-2017
  2. #see license.txt for license details
  3. #history https://hg.reportlab.com/hg-public/reportlab/log/tip/src/reportlab/pdfbase/pdfutils.py
  4. __version__='3.3.0'
  5. __doc__=''
  6. # pdfutils.py - everything to do with images, streams,
  7. # compression, and some constants
  8. import sys
  9. import os
  10. import binascii
  11. from reportlab import rl_config
  12. from reportlab.lib.utils import getBytesIO, ImageReader, isUnicode
  13. from reportlab.lib.rl_accel import asciiBase85Encode, asciiBase85Decode
  14. def _chunker(src,dst=[],chunkSize=60):
  15. for i in range(0,len(src),chunkSize):
  16. dst.append(src[i:i+chunkSize])
  17. return dst
  18. ##########################################################
  19. #
  20. # Image compression helpers. Preprocessing a directory
  21. # of images will offer a vast speedup.
  22. #
  23. ##########################################################
  24. _mode2cs = {'RGB':'RGB', 'CMYK': 'CMYK', 'L': 'G'}
  25. _mode2bpp = {'RGB': 3, 'CMYK':4, 'L':1}
  26. def makeA85Image(filename,IMG=None, detectJpeg=False):
  27. import zlib
  28. img = ImageReader(filename)
  29. if IMG is not None:
  30. IMG.append(img)
  31. if detectJpeg and img.jpeg_fh():
  32. return None
  33. imgwidth, imgheight = img.getSize()
  34. raw = img.getRGBData()
  35. code = []
  36. append = code.append
  37. # this describes what is in the image itself
  38. append('BI')
  39. append('/W %s /H %s /BPC 8 /CS /%s /F [/A85 /Fl]' % (imgwidth, imgheight,_mode2cs[img.mode]))
  40. append('ID')
  41. #use a flate filter and Ascii Base 85
  42. assert len(raw) == imgwidth * imgheight*_mode2bpp[img.mode], "Wrong amount of data for image"
  43. compressed = zlib.compress(raw) #this bit is very fast...
  44. encoded = asciiBase85Encode(compressed) #...sadly this may not be
  45. #append in blocks of 60 characters
  46. _chunker(encoded,code)
  47. append('EI')
  48. return code
  49. def makeRawImage(filename,IMG=None,detectJpeg=False):
  50. import zlib
  51. img = ImageReader(filename)
  52. if IMG is not None:
  53. IMG.append(img)
  54. if detectJpeg and img.jpeg_fh():
  55. return None
  56. imgwidth, imgheight = img.getSize()
  57. raw = img.getRGBData()
  58. code = []
  59. append = code.append
  60. # this describes what is in the image itself
  61. append('BI')
  62. append('/W %s /H %s /BPC 8 /CS /%s /F [/Fl]' % (imgwidth, imgheight,_mode2cs[img.mode]))
  63. append('ID')
  64. #use a flate filter
  65. assert len(raw) == imgwidth * imgheight*_mode2bpp[img.mode], "Wrong amount of data for image"
  66. compressed = zlib.compress(raw) #this bit is very fast...
  67. #append in blocks of 60 characters
  68. _chunker(compressed,code)
  69. append('EI')
  70. return code
  71. def cacheImageFile(filename, returnInMemory=0, IMG=None):
  72. "Processes image as if for encoding, saves to a file with .a85 extension."
  73. cachedname = os.path.splitext(filename)[0] + (rl_config.useA85 and '.a85' or '.bin')
  74. if filename==cachedname:
  75. if cachedImageExists(filename):
  76. from reportlab.lib.utils import open_for_read
  77. if returnInMemory: return filter(None,open_for_read(cachedname).read().split('\r\n'))
  78. else:
  79. raise IOError('No such cached image %s' % filename)
  80. else:
  81. if rl_config.useA85:
  82. code = makeA85Image(filename,IMG)
  83. else:
  84. code = makeRawImage(filename,IMG)
  85. if returnInMemory: return code
  86. #save it to a file
  87. f = open(cachedname,'wb')
  88. f.write('\r\n'.join(code)+'\r\n')
  89. f.close()
  90. if rl_config.verbose:
  91. print('cached image as %s' % cachedname)
  92. def preProcessImages(spec):
  93. """Preprocesses one or more image files.
  94. Accepts either a filespec ('C:\\mydir\\*.jpg') or a list
  95. of image filenames, crunches them all to save time. Run this
  96. to save huge amounts of time when repeatedly building image
  97. documents."""
  98. import glob
  99. if isinstance(spec,str):
  100. filelist = glob.glob(spec)
  101. else: #list or tuple OK
  102. filelist = spec
  103. for filename in filelist:
  104. if cachedImageExists(filename):
  105. if rl_config.verbose:
  106. print('cached version of %s already exists' % filename)
  107. else:
  108. cacheImageFile(filename)
  109. def cachedImageExists(filename):
  110. """Determines if a cached image already exists for a given file.
  111. Determines if a cached image exists which has the same name
  112. and equal or newer date to the given file."""
  113. cachedname = os.path.splitext(filename)[0] + (rl_config.useA85 and '.a85' or 'bin')
  114. if os.path.isfile(cachedname):
  115. #see if it is newer
  116. original_date = os.stat(filename)[8]
  117. cached_date = os.stat(cachedname)[8]
  118. if original_date > cached_date:
  119. return 0
  120. else:
  121. return 1
  122. else:
  123. return 0
  124. ##############################################################
  125. #
  126. # PDF Helper functions
  127. #
  128. ##############################################################
  129. def _normalizeLineEnds(text,desired='\r\n',unlikely='\x00\x01\x02\x03'):
  130. """Normalizes different line end character(s).
  131. Ensures all instances of CR, LF and CRLF end up as
  132. the specified one."""
  133. return (text
  134. .replace('\r\n', unlikely)
  135. .replace('\r', unlikely)
  136. .replace('\n', unlikely)
  137. .replace(unlikely, desired))
  138. def _AsciiHexEncode(input):
  139. """Encodes input using ASCII-Hex coding.
  140. This is a verbose encoding used for binary data within
  141. a PDF file. One byte binary becomes two bytes of ASCII.
  142. Helper function used by images."""
  143. if isUnicode(input):
  144. input = input.encode('utf-8')
  145. output = getBytesIO()
  146. output.write(binascii.b2a_hex(input))
  147. output.write(b'>')
  148. return output.getvalue()
  149. def _AsciiHexDecode(input):
  150. """Decodes input using ASCII-Hex coding.
  151. Not used except to provide a test of the inverse function."""
  152. #strip out all whitespace
  153. if not isUnicode(input):
  154. input = input.decode('utf-8')
  155. stripped = ''.join(input.split())
  156. assert stripped[-1] == '>', 'Invalid terminator for Ascii Hex Stream'
  157. stripped = stripped[:-1] #chop off terminator
  158. assert len(stripped) % 2 == 0, 'Ascii Hex stream has odd number of bytes'
  159. return ''.join([chr(int(stripped[i:i+2],16)) for i in range(0,len(stripped),2)])
  160. def _wrap(input, columns=60):
  161. "Wraps input at a given column size by inserting \r\n characters."
  162. output = []
  163. length = len(input)
  164. i = 0
  165. pos = columns * i
  166. while pos < length:
  167. output.append(input[pos:pos+columns])
  168. i = i + 1
  169. pos = columns * i
  170. #avoid HP printer problem
  171. if len(output[-1])==1:
  172. output[-2:] = [output[-2][:-1],output[-2][-1]+output[-1]]
  173. return '\r\n'.join(output)
  174. #########################################################################
  175. #
  176. # JPEG processing code - contributed by Eric Johnson
  177. #
  178. #########################################################################
  179. # Read data from the JPEG file. We should probably be using PIL to
  180. # get this information for us -- but this way is more fun!
  181. # Returns (width, height, color components) as a triple
  182. # This is based on Thomas Merz's code from GhostScript (viewjpeg.ps)
  183. def readJPEGInfo(image):
  184. "Read width, height and number of components from open JPEG file."
  185. import struct
  186. from reportlab.pdfbase.pdfdoc import PDFError
  187. #Acceptable JPEG Markers:
  188. # SROF0=baseline, SOF1=extended sequential or SOF2=progressive
  189. validMarkers = [0xC0, 0xC1, 0xC2]
  190. #JPEG markers without additional parameters
  191. noParamMarkers = \
  192. [ 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0x01 ]
  193. #Unsupported JPEG Markers
  194. unsupportedMarkers = \
  195. [ 0xC3, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCD, 0xCE, 0xCF ]
  196. #read JPEG marker segments until we find SOFn marker or EOF
  197. dpi = (72,72)
  198. done = 0
  199. while not done:
  200. x = struct.unpack('B', image.read(1))
  201. if x[0] == 0xFF: #found marker
  202. x = struct.unpack('B', image.read(1))
  203. #print('marker=%2x' % x[0])
  204. if x[0] in validMarkers:
  205. image.seek(2, 1) #skip segment length
  206. x = struct.unpack('B', image.read(1)) #data precision
  207. if x[0] != 8:
  208. raise PDFError('JPEG must have 8 bits per component')
  209. y = struct.unpack('BB', image.read(2))
  210. height = (y[0] << 8) + y[1]
  211. y = struct.unpack('BB', image.read(2))
  212. width = (y[0] << 8) + y[1]
  213. y = struct.unpack('B', image.read(1))
  214. color = y[0]
  215. return width, height, color, dpi
  216. elif x[0]==0xE0:
  217. x = struct.unpack('BB', image.read(2))
  218. n = (x[0] << 8) + x[1] - 2
  219. x = image.read(n)
  220. y = struct.unpack('BB', x[10:12])
  221. x = struct.unpack('BB', x[8:10])
  222. dpi = ((x[0]<<8) + x[1],(y[0]<<8)+y[1])
  223. elif x[0] in unsupportedMarkers:
  224. raise PDFError('JPEG Unsupported JPEG marker: %0.2x' % x[0])
  225. elif x[0] not in noParamMarkers:
  226. #skip segments with parameters
  227. #read length and skip the data
  228. x = struct.unpack('BB', image.read(2))
  229. image.seek( (x[0] << 8) + x[1] - 2, 1)
  230. class _fusc:
  231. def __init__(self,k, n):
  232. assert k, 'Argument k should be a non empty string'
  233. self._k = k
  234. self._klen = len(k)
  235. self._n = int(n) or 7
  236. def encrypt(self,s):
  237. return self.__rotate(asciiBase85Encode(''.join(map(chr,self.__fusc(list(map(ord,s)))))),self._n)
  238. def decrypt(self,s):
  239. return ''.join(map(chr,self.__fusc(list(map(ord,asciiBase85Decode(self.__rotate(s,-self._n)))))))
  240. def __rotate(self,s,n):
  241. l = len(s)
  242. if n<0: n = l+n
  243. n %= l
  244. if not n: return s
  245. return s[-n:]+s[:l-n]
  246. def __fusc(self,s):
  247. slen = len(s)
  248. return list(map(lambda x,y: x ^ y,s,list(map(ord,((int(slen/self._klen)+1)*self._k)[:slen]))))