rl_accel.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. #this is the interface module that imports all from the C extension _rl_accel
  2. _c_funcs = {}
  3. _py_funcs = {}
  4. ### NOTE! FP_STR SHOULD PROBABLY ALWAYS DO A PYTHON STR() CONVERSION ON ARGS
  5. ### IN CASE THEY ARE "LAZY OBJECTS". ACCELLERATOR DOESN'T DO THIS (YET)
  6. __all__ = list(filter(None,'''
  7. fp_str
  8. unicode2T1
  9. instanceStringWidthT1
  10. instanceStringWidthTTF
  11. asciiBase85Encode
  12. asciiBase85Decode
  13. escapePDF
  14. sameFrag
  15. calcChecksum
  16. add32
  17. hex32
  18. '''.split()))
  19. import reportlab
  20. testing = getattr(reportlab,'_rl_testing',False)
  21. del reportlab
  22. for fn in __all__:
  23. D={}
  24. try:
  25. exec('from reportlab.lib._rl_accel import %s as f' % fn,D)
  26. _c_funcs[fn] = D['f']
  27. if testing: _py_funcs[fn] = None
  28. except ImportError:
  29. _py_funcs[fn] = None
  30. del D
  31. if _py_funcs:
  32. from reportlab.lib.utils import isBytes, isUnicode, isSeq, rawBytes, asNative, asUnicode, asBytes
  33. from math import log
  34. from struct import unpack
  35. if 'fp_str' in _py_funcs:
  36. _log_10 = lambda x,log=log,_log_e_10=log(10.0): log(x)/_log_e_10
  37. _fp_fmts = "%.0f", "%.1f", "%.2f", "%.3f", "%.4f", "%.5f", "%.6f"
  38. def fp_str(*a):
  39. '''convert separate arguments (or single sequence arg) into space separated numeric strings'''
  40. if len(a)==1 and isSeq(a[0]): a = a[0]
  41. s = []
  42. A = s.append
  43. for i in a:
  44. sa =abs(i)
  45. if sa<=1e-7: A('0')
  46. else:
  47. l = sa<=1 and 6 or min(max(0,(6-int(_log_10(sa)))),6)
  48. n = _fp_fmts[l]%i
  49. if l:
  50. j = len(n)
  51. while j:
  52. j -= 1
  53. if n[j]!='0':
  54. if n[j]!='.': j += 1
  55. break
  56. n = n[:j]
  57. A((n[0]!='0' or len(n)==1) and n or n[1:])
  58. return ' '.join(s)
  59. #hack test for comma users
  60. if ',' in fp_str(0.25):
  61. _FP_STR = _fp_str
  62. def _fp_str(*a):
  63. return _FP_STR(*a).replace(',','.')
  64. _py_funcs['fp_str'] = fp_str
  65. if 'unicode2T1' in _py_funcs:
  66. def unicode2T1(utext,fonts):
  67. '''return a list of (font,string) pairs representing the unicode text'''
  68. R = []
  69. font, fonts = fonts[0], fonts[1:]
  70. enc = font.encName
  71. if 'UCS-2' in enc:
  72. enc = 'UTF16'
  73. while utext:
  74. try:
  75. if isUnicode(utext):
  76. s = utext.encode(enc)
  77. else:
  78. s = utext
  79. R.append((font,s))
  80. break
  81. except UnicodeEncodeError as e:
  82. i0, il = e.args[2:4]
  83. if i0:
  84. R.append((font,utext[:i0].encode(enc)))
  85. if fonts:
  86. R.extend(unicode2T1(utext[i0:il],fonts))
  87. else:
  88. R.append((font._notdefFont,font._notdefChar*(il-i0)))
  89. utext = utext[il:]
  90. return R
  91. _py_funcs['unicode2T1'] = unicode2T1
  92. if 'instanceStringWidthT1' in _py_funcs:
  93. def instanceStringWidthT1(self, text, size, encoding='utf8'):
  94. """This is the "purist" approach to width"""
  95. if not isUnicode(text): text = text.decode(encoding)
  96. return sum([sum(map(f.widths.__getitem__,t)) for f, t in unicode2T1(text,[self]+self.substitutionFonts)])*0.001*size
  97. _py_funcs['instanceStringWidthT1'] = instanceStringWidthT1
  98. if 'instanceStringWidthTTF' in _py_funcs:
  99. def instanceStringWidthTTF(self, text, size, encoding='utf-8'):
  100. "Calculate text width"
  101. if not isUnicode(text):
  102. text = text.decode(encoding or 'utf-8')
  103. g = self.face.charWidths.get
  104. dw = self.face.defaultWidth
  105. return 0.001*size*sum([g(ord(u),dw) for u in text])
  106. _py_funcs['instanceStringWidthTTF'] = instanceStringWidthTTF
  107. if 'hex32' in _py_funcs:
  108. def hex32(i):
  109. return '0X%8.8X' % (int(i)&0xFFFFFFFF)
  110. _py_funcs['hex32'] = hex32
  111. if 'add32' in _py_funcs:
  112. def add32(x, y):
  113. "Calculate (x + y) modulo 2**32"
  114. return (x+y) & 0xFFFFFFFF
  115. _py_funcs['add32'] = add32
  116. if 'calcChecksum' in _py_funcs:
  117. def calcChecksum(data):
  118. """Calculates TTF-style checksums"""
  119. data = rawBytes(data)
  120. if len(data)&3: data = data + (4-(len(data)&3))*b"\0"
  121. return sum(unpack(">%dl" % (len(data)>>2), data)) & 0xFFFFFFFF
  122. _py_funcs['calcChecksum'] = calcChecksum
  123. if 'escapePDF' in _py_funcs:
  124. _ESCAPEDICT={}
  125. for c in range(256):
  126. if c<32 or c>=127:
  127. _ESCAPEDICT[c]= '\\%03o' % c
  128. elif c in (ord('\\'),ord('('),ord(')')):
  129. _ESCAPEDICT[c] = '\\'+chr(c)
  130. else:
  131. _ESCAPEDICT[c] = chr(c)
  132. del c
  133. #Michael Hudson donated this
  134. def escapePDF(s):
  135. r = []
  136. for c in s:
  137. if not type(c) is int:
  138. c = ord(c)
  139. r.append(_ESCAPEDICT[c])
  140. return ''.join(r)
  141. _py_funcs['escapePDF'] = escapePDF
  142. if 'asciiBase85Encode' in _py_funcs:
  143. def asciiBase85Encode(input):
  144. """Encodes input using ASCII-Base85 coding.
  145. This is a compact encoding used for binary data within
  146. a PDF file. Four bytes of binary data become five bytes of
  147. ASCII. This is the default method used for encoding images."""
  148. doOrd = isUnicode(input)
  149. # special rules apply if not a multiple of four bytes.
  150. whole_word_count, remainder_size = divmod(len(input), 4)
  151. cut = 4 * whole_word_count
  152. body, lastbit = input[0:cut], input[cut:]
  153. out = [].append
  154. for i in range(whole_word_count):
  155. offset = i*4
  156. b1 = body[offset]
  157. b2 = body[offset+1]
  158. b3 = body[offset+2]
  159. b4 = body[offset+3]
  160. if doOrd:
  161. b1 = ord(b1)
  162. b2 = ord(b2)
  163. b3 = ord(b3)
  164. b4 = ord(b4)
  165. if b1<128:
  166. num = (((((b1<<8)|b2)<<8)|b3)<<8)|b4
  167. else:
  168. num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4
  169. if num == 0:
  170. #special case
  171. out('z')
  172. else:
  173. #solve for five base-85 numbers
  174. temp, c5 = divmod(num, 85)
  175. temp, c4 = divmod(temp, 85)
  176. temp, c3 = divmod(temp, 85)
  177. c1, c2 = divmod(temp, 85)
  178. assert ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5 == num, 'dodgy code!'
  179. out(chr(c1+33))
  180. out(chr(c2+33))
  181. out(chr(c3+33))
  182. out(chr(c4+33))
  183. out(chr(c5+33))
  184. # now we do the final bit at the end. I repeated this separately as
  185. # the loop above is the time-critical part of a script, whereas this
  186. # happens only once at the end.
  187. #encode however many bytes we have as usual
  188. if remainder_size > 0:
  189. lastbit += (4-len(lastbit))*('\0' if doOrd else b'\000')
  190. b1 = lastbit[0]
  191. b2 = lastbit[1]
  192. b3 = lastbit[2]
  193. b4 = lastbit[3]
  194. if doOrd:
  195. b1 = ord(b1)
  196. b2 = ord(b2)
  197. b3 = ord(b3)
  198. b4 = ord(b4)
  199. num = 16777216 * b1 + 65536 * b2 + 256 * b3 + b4
  200. #solve for c1..c5
  201. temp, c5 = divmod(num, 85)
  202. temp, c4 = divmod(temp, 85)
  203. temp, c3 = divmod(temp, 85)
  204. c1, c2 = divmod(temp, 85)
  205. #print 'encoding: %d %d %d %d -> %d -> %d %d %d %d %d' % (
  206. # b1,b2,b3,b4,num,c1,c2,c3,c4,c5)
  207. lastword = chr(c1+33) + chr(c2+33) + chr(c3+33) + chr(c4+33) + chr(c5+33)
  208. #write out most of the bytes.
  209. out(lastword[0:remainder_size + 1])
  210. #terminator code for ascii 85
  211. out('~>')
  212. return ''.join(out.__self__)
  213. _py_funcs['asciiBase85Encode'] = asciiBase85Encode
  214. if 'asciiBase85Decode' in _py_funcs:
  215. def asciiBase85Decode(input):
  216. """Decodes input using ASCII-Base85 coding.
  217. This is not normally used - Acrobat Reader decodes for you
  218. - but a round trip is essential for testing."""
  219. #strip all whitespace
  220. stripped = ''.join(asNative(input).split())
  221. #check end
  222. assert stripped[-2:] == '~>', 'Invalid terminator for Ascii Base 85 Stream'
  223. stripped = stripped[:-2] #chop off terminator
  224. #may have 'z' in it which complicates matters - expand them
  225. stripped = stripped.replace('z','!!!!!')
  226. # special rules apply if not a multiple of five bytes.
  227. whole_word_count, remainder_size = divmod(len(stripped), 5)
  228. #print '%d words, %d leftover' % (whole_word_count, remainder_size)
  229. #assert remainder_size != 1, 'invalid Ascii 85 stream!'
  230. cut = 5 * whole_word_count
  231. body, lastbit = stripped[0:cut], stripped[cut:]
  232. out = [].append
  233. for i in range(whole_word_count):
  234. offset = i*5
  235. c1 = ord(body[offset]) - 33
  236. c2 = ord(body[offset+1]) - 33
  237. c3 = ord(body[offset+2]) - 33
  238. c4 = ord(body[offset+3]) - 33
  239. c5 = ord(body[offset+4]) - 33
  240. num = ((85**4) * c1) + ((85**3) * c2) + ((85**2) * c3) + (85*c4) + c5
  241. temp, b4 = divmod(num,256)
  242. temp, b3 = divmod(temp,256)
  243. b1, b2 = divmod(temp, 256)
  244. assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!'
  245. out(chr(b1))
  246. out(chr(b2))
  247. out(chr(b3))
  248. out(chr(b4))
  249. #decode however many bytes we have as usual
  250. if remainder_size > 0:
  251. while len(lastbit) < 5:
  252. lastbit = lastbit + '!'
  253. c1 = ord(lastbit[0]) - 33
  254. c2 = ord(lastbit[1]) - 33
  255. c3 = ord(lastbit[2]) - 33
  256. c4 = ord(lastbit[3]) - 33
  257. c5 = ord(lastbit[4]) - 33
  258. num = (((85*c1+c2)*85+c3)*85+c4)*85 + (c5
  259. +(0,0,0xFFFFFF,0xFFFF,0xFF)[remainder_size])
  260. temp, b4 = divmod(num,256)
  261. temp, b3 = divmod(temp,256)
  262. b1, b2 = divmod(temp, 256)
  263. assert num == 16777216 * b1 + 65536 * b2 + 256 * b3 + b4, 'dodgy code!'
  264. #print 'decoding: %d %d %d %d %d -> %d -> %d %d %d %d' % (
  265. # c1,c2,c3,c4,c5,num,b1,b2,b3,b4)
  266. #the last character needs 1 adding; the encoding loses
  267. #data by rounding the number to x bytes, and when
  268. #divided repeatedly we get one less
  269. if remainder_size == 2:
  270. lastword = chr(b1)
  271. elif remainder_size == 3:
  272. lastword = chr(b1) + chr(b2)
  273. elif remainder_size == 4:
  274. lastword = chr(b1) + chr(b2) + chr(b3)
  275. else:
  276. lastword = ''
  277. out(lastword)
  278. r = ''.join(out.__self__)
  279. return asBytes(r,enc='latin1')
  280. _py_funcs['asciiBase85Decode'] = asciiBase85Decode
  281. if 'sameFrag' in _py_funcs:
  282. def sameFrag(f,g):
  283. 'returns 1 if two ParaFrags map out the same'
  284. if (hasattr(f,'cbDefn') or hasattr(g,'cbDefn')
  285. or hasattr(f,'lineBreak') or hasattr(g,'lineBreak')): return 0
  286. for a in ('fontName', 'fontSize', 'textColor', 'rise', 'us_lines', 'link', "backColor", "nobr"):
  287. if getattr(f,a,None)!=getattr(g,a,None): return 0
  288. return 1
  289. _py_funcs['sameFrag'] = sameFrag
  290. G=globals()
  291. for fn in __all__:
  292. f = _c_funcs[fn] if fn in _c_funcs else _py_funcs[fn]
  293. if not f:
  294. raise RuntimeError('function %s is not properly defined' % fn)
  295. G[fn] = f
  296. del fn, f, G
  297. if __name__=='__main__':
  298. import sys, os, subprocess
  299. for modname in 'reportlab.lib.rl_accel','reportlab.lib._rl_accel':
  300. for cmd in (
  301. #"unicode2T1('abcde fghi . jkl ; mno',fonts)",
  302. #"unicode2T1(u'abcde fghi . jkl ; mno',fonts)",
  303. "instanceStringWidthT1(font,'abcde fghi . jkl ; mno',10)",
  304. "instanceStringWidthT1(font,u'abcde fghi . jkl ; mno',10)",
  305. ):
  306. print('%s %s' % (modname,cmd))
  307. s=';'.join((
  308. "from reportlab.pdfbase.pdfmetrics import getFont",
  309. "from %s import unicode2T1,instanceStringWidthT1" % modname,
  310. "fonts=[getFont('Helvetica')]+getFont('Helvetica').substitutionFonts""",
  311. "font=fonts[0]",
  312. ))
  313. subprocess.check_call([sys.executable,'-mtimeit','-s',s,cmd])