123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297 |
- #Copyright ReportLab Europe Ltd. 2000-2017
- #see license.txt for license details
- #history https://hg.reportlab.com/hg-public/reportlab/log/tip/src/reportlab/pdfbase/pdfutils.py
- __version__='3.3.0'
- __doc__=''
- # pdfutils.py - everything to do with images, streams,
- # compression, and some constants
- import sys
- import os
- import binascii
- from reportlab import rl_config
- from reportlab.lib.utils import getBytesIO, ImageReader, isUnicode
- from reportlab.lib.rl_accel import asciiBase85Encode, asciiBase85Decode
- def _chunker(src,dst=[],chunkSize=60):
- for i in range(0,len(src),chunkSize):
- dst.append(src[i:i+chunkSize])
- return dst
- ##########################################################
- #
- # Image compression helpers. Preprocessing a directory
- # of images will offer a vast speedup.
- #
- ##########################################################
- _mode2cs = {'RGB':'RGB', 'CMYK': 'CMYK', 'L': 'G'}
- _mode2bpp = {'RGB': 3, 'CMYK':4, 'L':1}
- def makeA85Image(filename,IMG=None, detectJpeg=False):
- import zlib
- img = ImageReader(filename)
- if IMG is not None:
- IMG.append(img)
- if detectJpeg and img.jpeg_fh():
- return None
- imgwidth, imgheight = img.getSize()
- raw = img.getRGBData()
- code = []
- append = code.append
- # this describes what is in the image itself
- append('BI')
- append('/W %s /H %s /BPC 8 /CS /%s /F [/A85 /Fl]' % (imgwidth, imgheight,_mode2cs[img.mode]))
- append('ID')
- #use a flate filter and Ascii Base 85
- assert len(raw) == imgwidth * imgheight*_mode2bpp[img.mode], "Wrong amount of data for image"
- compressed = zlib.compress(raw) #this bit is very fast...
- encoded = asciiBase85Encode(compressed) #...sadly this may not be
- #append in blocks of 60 characters
- _chunker(encoded,code)
- append('EI')
- return code
- def makeRawImage(filename,IMG=None,detectJpeg=False):
- import zlib
- img = ImageReader(filename)
- if IMG is not None:
- IMG.append(img)
- if detectJpeg and img.jpeg_fh():
- return None
- imgwidth, imgheight = img.getSize()
- raw = img.getRGBData()
- code = []
- append = code.append
- # this describes what is in the image itself
- append('BI')
- append('/W %s /H %s /BPC 8 /CS /%s /F [/Fl]' % (imgwidth, imgheight,_mode2cs[img.mode]))
- append('ID')
- #use a flate filter
- assert len(raw) == imgwidth * imgheight*_mode2bpp[img.mode], "Wrong amount of data for image"
- compressed = zlib.compress(raw) #this bit is very fast...
- #append in blocks of 60 characters
- _chunker(compressed,code)
- append('EI')
- return code
- def cacheImageFile(filename, returnInMemory=0, IMG=None):
- "Processes image as if for encoding, saves to a file with .a85 extension."
- cachedname = os.path.splitext(filename)[0] + (rl_config.useA85 and '.a85' or '.bin')
- if filename==cachedname:
- if cachedImageExists(filename):
- from reportlab.lib.utils import open_for_read
- if returnInMemory: return filter(None,open_for_read(cachedname).read().split('\r\n'))
- else:
- raise IOError('No such cached image %s' % filename)
- else:
- if rl_config.useA85:
- code = makeA85Image(filename,IMG)
- else:
- code = makeRawImage(filename,IMG)
- if returnInMemory: return code
- #save it to a file
- f = open(cachedname,'wb')
- f.write('\r\n'.join(code)+'\r\n')
- f.close()
- if rl_config.verbose:
- print('cached image as %s' % cachedname)
- def preProcessImages(spec):
- """Preprocesses one or more image files.
- Accepts either a filespec ('C:\\mydir\\*.jpg') or a list
- of image filenames, crunches them all to save time. Run this
- to save huge amounts of time when repeatedly building image
- documents."""
- import glob
- if isinstance(spec,str):
- filelist = glob.glob(spec)
- else: #list or tuple OK
- filelist = spec
- for filename in filelist:
- if cachedImageExists(filename):
- if rl_config.verbose:
- print('cached version of %s already exists' % filename)
- else:
- cacheImageFile(filename)
- def cachedImageExists(filename):
- """Determines if a cached image already exists for a given file.
- Determines if a cached image exists which has the same name
- and equal or newer date to the given file."""
- cachedname = os.path.splitext(filename)[0] + (rl_config.useA85 and '.a85' or 'bin')
- if os.path.isfile(cachedname):
- #see if it is newer
- original_date = os.stat(filename)[8]
- cached_date = os.stat(cachedname)[8]
- if original_date > cached_date:
- return 0
- else:
- return 1
- else:
- return 0
- ##############################################################
- #
- # PDF Helper functions
- #
- ##############################################################
- def _normalizeLineEnds(text,desired='\r\n',unlikely='\x00\x01\x02\x03'):
- """Normalizes different line end character(s).
- Ensures all instances of CR, LF and CRLF end up as
- the specified one."""
-
- return (text
- .replace('\r\n', unlikely)
- .replace('\r', unlikely)
- .replace('\n', unlikely)
- .replace(unlikely, desired))
- def _AsciiHexEncode(input):
- """Encodes input using ASCII-Hex coding.
- This is a verbose encoding used for binary data within
- a PDF file. One byte binary becomes two bytes of ASCII.
- Helper function used by images."""
- if isUnicode(input):
- input = input.encode('utf-8')
- output = getBytesIO()
- output.write(binascii.b2a_hex(input))
- output.write(b'>')
- return output.getvalue()
- def _AsciiHexDecode(input):
- """Decodes input using ASCII-Hex coding.
- Not used except to provide a test of the inverse function."""
- #strip out all whitespace
- if not isUnicode(input):
- input = input.decode('utf-8')
- stripped = ''.join(input.split())
- assert stripped[-1] == '>', 'Invalid terminator for Ascii Hex Stream'
- stripped = stripped[:-1] #chop off terminator
- assert len(stripped) % 2 == 0, 'Ascii Hex stream has odd number of bytes'
- return ''.join([chr(int(stripped[i:i+2],16)) for i in range(0,len(stripped),2)])
-
- def _wrap(input, columns=60):
- "Wraps input at a given column size by inserting \r\n characters."
- output = []
- length = len(input)
- i = 0
- pos = columns * i
- while pos < length:
- output.append(input[pos:pos+columns])
- i = i + 1
- pos = columns * i
- #avoid HP printer problem
- if len(output[-1])==1:
- output[-2:] = [output[-2][:-1],output[-2][-1]+output[-1]]
- return '\r\n'.join(output)
- #########################################################################
- #
- # JPEG processing code - contributed by Eric Johnson
- #
- #########################################################################
- # Read data from the JPEG file. We should probably be using PIL to
- # get this information for us -- but this way is more fun!
- # Returns (width, height, color components) as a triple
- # This is based on Thomas Merz's code from GhostScript (viewjpeg.ps)
- def readJPEGInfo(image):
- "Read width, height and number of components from open JPEG file."
- import struct
- from reportlab.pdfbase.pdfdoc import PDFError
- #Acceptable JPEG Markers:
- # SROF0=baseline, SOF1=extended sequential or SOF2=progressive
- validMarkers = [0xC0, 0xC1, 0xC2]
- #JPEG markers without additional parameters
- noParamMarkers = \
- [ 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0x01 ]
- #Unsupported JPEG Markers
- unsupportedMarkers = \
- [ 0xC3, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xCB, 0xCD, 0xCE, 0xCF ]
- #read JPEG marker segments until we find SOFn marker or EOF
- dpi = (72,72)
- done = 0
- while not done:
- x = struct.unpack('B', image.read(1))
- if x[0] == 0xFF: #found marker
- x = struct.unpack('B', image.read(1))
- #print('marker=%2x' % x[0])
- if x[0] in validMarkers:
- image.seek(2, 1) #skip segment length
- x = struct.unpack('B', image.read(1)) #data precision
- if x[0] != 8:
- raise PDFError('JPEG must have 8 bits per component')
- y = struct.unpack('BB', image.read(2))
- height = (y[0] << 8) + y[1]
- y = struct.unpack('BB', image.read(2))
- width = (y[0] << 8) + y[1]
- y = struct.unpack('B', image.read(1))
- color = y[0]
- return width, height, color, dpi
- elif x[0]==0xE0:
- x = struct.unpack('BB', image.read(2))
- n = (x[0] << 8) + x[1] - 2
- x = image.read(n)
- y = struct.unpack('BB', x[10:12])
- x = struct.unpack('BB', x[8:10])
- dpi = ((x[0]<<8) + x[1],(y[0]<<8)+y[1])
- elif x[0] in unsupportedMarkers:
- raise PDFError('JPEG Unsupported JPEG marker: %0.2x' % x[0])
- elif x[0] not in noParamMarkers:
- #skip segments with parameters
- #read length and skip the data
- x = struct.unpack('BB', image.read(2))
- image.seek( (x[0] << 8) + x[1] - 2, 1)
- class _fusc:
- def __init__(self,k, n):
- assert k, 'Argument k should be a non empty string'
- self._k = k
- self._klen = len(k)
- self._n = int(n) or 7
- def encrypt(self,s):
- return self.__rotate(asciiBase85Encode(''.join(map(chr,self.__fusc(list(map(ord,s)))))),self._n)
- def decrypt(self,s):
- return ''.join(map(chr,self.__fusc(list(map(ord,asciiBase85Decode(self.__rotate(s,-self._n)))))))
- def __rotate(self,s,n):
- l = len(s)
- if n<0: n = l+n
- n %= l
- if not n: return s
- return s[-n:]+s[:l-n]
- def __fusc(self,s):
- slen = len(s)
- return list(map(lambda x,y: x ^ y,s,list(map(ord,((int(slen/self._klen)+1)*self._k)[:slen]))))
|