pdfimages.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. #Copyright ReportLab Europe Ltd. 2000-2017
  2. #see license.txt for license details
  3. #history https://hg.reportlab.com/hg-public/reportlab/log/tip/src/reportlab/pdfgen/pdfimages.py
  4. __version__='3.3.0'
  5. __doc__="""
  6. Image functionality sliced out of canvas.py for generalization
  7. """
  8. import os
  9. import reportlab
  10. from reportlab import rl_config
  11. from reportlab.pdfbase import pdfutils
  12. from reportlab.pdfbase import pdfdoc
  13. from reportlab.lib.utils import import_zlib, haveImages, getBytesIO, isStr
  14. from reportlab.lib.rl_accel import fp_str, asciiBase85Encode
  15. from reportlab.lib.boxstuff import aspectRatioFix
  16. class PDFImage:
  17. """Wrapper around different "image sources". You can make images
  18. from a PIL Image object, a filename (in which case it uses PIL),
  19. an image we previously cached (optimisation, hardly used these
  20. days) or a JPEG (which PDF supports natively)."""
  21. def __init__(self, image, x,y, width=None, height=None, caching=0):
  22. self.image = image
  23. self.x = x
  24. self.y = y
  25. self.width = width
  26. self.height = height
  27. self.filename = None
  28. self.imageCaching = caching
  29. # the following facts need to be determined,
  30. # whatever the source. Declare what they are
  31. # here for clarity.
  32. self.colorSpace = 'DeviceRGB'
  33. self.bitsPerComponent = 8
  34. self.filters = []
  35. self.source = None # JPEG or PIL, set later
  36. self.getImageData()
  37. def jpg_imagedata(self):
  38. #directly process JPEG files
  39. #open file, needs some error handling!!
  40. fp = open(self.image, 'rb')
  41. try:
  42. result = self._jpg_imagedata(fp)
  43. finally:
  44. fp.close()
  45. return result
  46. def _jpg_imagedata(self,imageFile):
  47. info = pdfutils.readJPEGInfo(imageFile)
  48. self.source = 'JPEG'
  49. imgwidth, imgheight = info[0], info[1]
  50. if info[2] == 1:
  51. colorSpace = 'DeviceGray'
  52. elif info[2] == 3:
  53. colorSpace = 'DeviceRGB'
  54. else: #maybe should generate an error, is this right for CMYK?
  55. colorSpace = 'DeviceCMYK'
  56. imageFile.seek(0) #reset file pointer
  57. imagedata = []
  58. #imagedata.append('BI /Width %d /Height /BitsPerComponent 8 /ColorSpace /%s /Filter [/Filter [ /ASCII85Decode /DCTDecode] ID' % (info[0], info[1], colorSpace))
  59. imagedata.append('BI /W %d /H %d /BPC 8 /CS /%s /F [%s/DCT] ID' % (imgwidth, imgheight, colorSpace, rl_config.useA85 and '/A85 ' or ''))
  60. #write in blocks of (??) 60 characters per line to a list
  61. data = imageFile.read()
  62. if rl_config.useA85:
  63. data = asciiBase85Encode(data)
  64. pdfutils._chunker(data,imagedata)
  65. imagedata.append('EI')
  66. return (imagedata, imgwidth, imgheight)
  67. def cache_imagedata(self):
  68. image = self.image
  69. if not pdfutils.cachedImageExists(image):
  70. zlib = import_zlib()
  71. if not zlib: return
  72. if not haveImages: return
  73. pdfutils.cacheImageFile(image)
  74. #now we have one cached, slurp it in
  75. cachedname = os.path.splitext(image)[0] + (rl_config.useA85 and '.a85' or '.bin')
  76. imagedata = open(cachedname,'rb').readlines()
  77. #trim off newlines...
  78. imagedata = list(map(str.strip, imagedata))
  79. return imagedata
  80. def PIL_imagedata(self):
  81. image = self.image
  82. if image.format=='JPEG':
  83. fp=image.fp
  84. fp.seek(0)
  85. return self._jpg_imagedata(fp)
  86. self.source = 'PIL'
  87. zlib = import_zlib()
  88. if not zlib: return
  89. bpc = 8
  90. # Use the colorSpace in the image
  91. if image.mode == 'CMYK':
  92. myimage = image
  93. colorSpace = 'DeviceCMYK'
  94. bpp = 4
  95. elif image.mode == '1':
  96. myimage = image
  97. colorSpace = 'DeviceGray'
  98. bpp = 1
  99. bpc = 1
  100. elif image.mode == 'L':
  101. myimage = image
  102. colorSpace = 'DeviceGray'
  103. bpp = 1
  104. else:
  105. myimage = image.convert('RGB')
  106. colorSpace = 'RGB'
  107. bpp = 3
  108. imgwidth, imgheight = myimage.size
  109. # this describes what is in the image itself
  110. # *NB* according to the spec you can only use the short form in inline images
  111. imagedata=['BI /W %d /H %d /BPC %d /CS /%s /F [%s/Fl] ID' % (imgwidth, imgheight, bpc, colorSpace, rl_config.useA85 and '/A85 ' or '')]
  112. #use a flate filter and, optionally, Ascii Base 85 to compress
  113. raw = (myimage.tobytes if hasattr(myimage,'tobytes') else myimage.tostring)()
  114. rowstride = (imgwidth*bpc*bpp+7)>>3
  115. assert len(raw) == rowstride*imgheight, "Wrong amount of data for image"
  116. data = zlib.compress(raw) #this bit is very fast...
  117. if rl_config.useA85:
  118. data = asciiBase85Encode(data) #...sadly this may not be
  119. #append in blocks of 60 characters
  120. pdfutils._chunker(data,imagedata)
  121. imagedata.append('EI')
  122. return (imagedata, imgwidth, imgheight)
  123. def non_jpg_imagedata(self,image):
  124. if not self.imageCaching:
  125. imagedata = pdfutils.cacheImageFile(image,returnInMemory=1)
  126. else:
  127. imagedata = self.cache_imagedata()
  128. words = imagedata[1].split()
  129. imgwidth = int(words[1])
  130. imgheight = int(words[3])
  131. return imagedata, imgwidth, imgheight
  132. def getImageData(self,preserveAspectRatio=False):
  133. "Gets data, height, width - whatever type of image"
  134. image = self.image
  135. if isStr(image):
  136. self.filename = image
  137. if os.path.splitext(image)[1] in ['.jpg', '.JPG', '.jpeg', '.JPEG']:
  138. try:
  139. imagedata, imgwidth, imgheight = self.jpg_imagedata()
  140. except:
  141. imagedata, imgwidth, imgheight = self.non_jpg_imagedata(image) #try for normal kind of image
  142. else:
  143. imagedata, imgwidth, imgheight = self.non_jpg_imagedata(image)
  144. else:
  145. import sys
  146. if sys.platform[0:4] == 'java':
  147. #jython, PIL not available
  148. imagedata, imgwidth, imgheight = self.JAVA_imagedata()
  149. else:
  150. imagedata, imgwidth, imgheight = self.PIL_imagedata()
  151. self.imageData = imagedata
  152. self.imgwidth = imgwidth
  153. self.imgheight = imgheight
  154. self.width = self.width or imgwidth
  155. self.height = self.height or imgheight
  156. def drawInlineImage(self, canvas, preserveAspectRatio=False,anchor='sw', anchorAtXY=False, showBoundary=False):
  157. """Draw an Image into the specified rectangle. If width and
  158. height are omitted, they are calculated from the image size.
  159. Also allow file names as well as images. This allows a
  160. caching mechanism"""
  161. width = self.width
  162. height = self.height
  163. if width<1e-6 or height<1e-6: return False
  164. x,y,self.width,self.height, scaled = aspectRatioFix(preserveAspectRatio,anchor,self.x,self.y,width,height,self.imgwidth,self.imgheight,anchorAtXY)
  165. # this says where and how big to draw it
  166. if not canvas.bottomup: y = y+height
  167. canvas._code.append('q %s 0 0 %s cm' % (fp_str(self.width), fp_str(self.height, x, y)))
  168. # self._code.extend(imagedata) if >=python-1.5.2
  169. for line in self.imageData:
  170. canvas._code.append(line)
  171. canvas._code.append('Q')
  172. if showBoundary:
  173. canvas.drawBoundary(showBoundary,x,y,width,height)
  174. return True
  175. def format(self, document):
  176. """Allow it to be used within pdfdoc framework. This only
  177. defines how it is stored, not how it is drawn later."""
  178. dict = pdfdoc.PDFDictionary()
  179. dict['Type'] = '/XObject'
  180. dict['Subtype'] = '/Image'
  181. dict['Width'] = self.width
  182. dict['Height'] = self.height
  183. dict['BitsPerComponent'] = 8
  184. dict['ColorSpace'] = pdfdoc.PDFName(self.colorSpace)
  185. content = '\n'.join(self.imageData[3:-1]) + '\n'
  186. strm = pdfdoc.PDFStream(dictionary=dict, content=content)
  187. return strm.format(document)
  188. if __name__=='__main__':
  189. srcfile = os.path.join(
  190. os.path.dirname(reportlab.__file__),
  191. 'test',
  192. 'pythonpowered.gif'
  193. )
  194. assert os.path.isfile(srcfile), 'image not found'
  195. pdfdoc.LongFormat = 1
  196. img = PDFImage(srcfile, 100, 100)
  197. import pprint
  198. doc = pdfdoc.PDFDocument()
  199. print('source=',img.source)
  200. print(img.format(doc))