CompoundDoc.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. # -*- coding: windows-1252 -*-
  2. import struct
  3. from .compat import xrange
  4. # This implementation writes only 'Root Entry', 'Workbook' streams
  5. # and 2 empty streams for aligning directory stream on sector boundary
  6. #
  7. # LAYOUT:
  8. # 0 header
  9. # 76 MSAT (1st part: 109 SID)
  10. # 512 workbook stream
  11. # ... additional MSAT sectors if streams' size > about 7 Mb == (109*512 * 128)
  12. # ... SAT
  13. # ... directory stream
  14. #
  15. # NOTE: this layout is "ad hoc". It can be more general. RTFM
  16. class XlsDoc:
  17. SECTOR_SIZE = 0x0200
  18. MIN_LIMIT = 0x1000
  19. SID_FREE_SECTOR = -1
  20. SID_END_OF_CHAIN = -2
  21. SID_USED_BY_SAT = -3
  22. SID_USED_BY_MSAT = -4
  23. def __init__(self):
  24. #self.book_stream = '' # padded
  25. self.book_stream_sect = []
  26. self.dir_stream = ''
  27. self.dir_stream_sect = []
  28. self.packed_SAT = ''
  29. self.SAT_sect = []
  30. self.packed_MSAT_1st = ''
  31. self.packed_MSAT_2nd = ''
  32. self.MSAT_sect_2nd = []
  33. self.header = ''
  34. def _build_directory(self): # align on sector boundary
  35. self.dir_stream = b''
  36. dentry_name = u'Root Entry\x00'.encode('utf-16-le')
  37. dentry_name_sz = len(dentry_name)
  38. dentry_name_pad = b'\x00'*(64 - dentry_name_sz)
  39. dentry_type = 0x05 # root storage
  40. dentry_colour = 0x01 # black
  41. dentry_did_left = -1
  42. dentry_did_right = -1
  43. dentry_did_root = 1
  44. dentry_start_sid = -2
  45. dentry_stream_sz = 0
  46. self.dir_stream += struct.pack('<64s H 2B 3l 9L l L L',
  47. dentry_name + dentry_name_pad,
  48. dentry_name_sz,
  49. dentry_type,
  50. dentry_colour,
  51. dentry_did_left,
  52. dentry_did_right,
  53. dentry_did_root,
  54. 0, 0, 0, 0, 0, 0, 0, 0, 0,
  55. dentry_start_sid,
  56. dentry_stream_sz,
  57. 0
  58. )
  59. dentry_name = u'Workbook\x00'.encode('utf-16-le')
  60. dentry_name_sz = len(dentry_name)
  61. dentry_name_pad = b'\x00'*(64 - dentry_name_sz)
  62. dentry_type = 0x02 # user stream
  63. dentry_colour = 0x01 # black
  64. dentry_did_left = -1
  65. dentry_did_right = -1
  66. dentry_did_root = -1
  67. dentry_start_sid = 0
  68. dentry_stream_sz = self.book_stream_len
  69. self.dir_stream += struct.pack('<64s H 2B 3l 9L l L L',
  70. dentry_name + dentry_name_pad,
  71. dentry_name_sz,
  72. dentry_type,
  73. dentry_colour,
  74. dentry_did_left,
  75. dentry_did_right,
  76. dentry_did_root,
  77. 0, 0, 0, 0, 0, 0, 0, 0, 0,
  78. dentry_start_sid,
  79. dentry_stream_sz,
  80. 0
  81. )
  82. # padding
  83. dentry_name = b''
  84. dentry_name_sz = len(dentry_name)
  85. dentry_name_pad = b'\x00'*(64 - dentry_name_sz)
  86. dentry_type = 0x00 # empty
  87. dentry_colour = 0x01 # black
  88. dentry_did_left = -1
  89. dentry_did_right = -1
  90. dentry_did_root = -1
  91. dentry_start_sid = -2
  92. dentry_stream_sz = 0
  93. self.dir_stream += struct.pack('<64s H 2B 3l 9L l L L',
  94. dentry_name + dentry_name_pad,
  95. dentry_name_sz,
  96. dentry_type,
  97. dentry_colour,
  98. dentry_did_left,
  99. dentry_did_right,
  100. dentry_did_root,
  101. 0, 0, 0, 0, 0, 0, 0, 0, 0,
  102. dentry_start_sid,
  103. dentry_stream_sz,
  104. 0
  105. ) * 2
  106. def _build_sat(self):
  107. # Build SAT
  108. book_sect_count = self.book_stream_len >> 9
  109. dir_sect_count = len(self.dir_stream) >> 9
  110. total_sect_count = book_sect_count + dir_sect_count
  111. SAT_sect_count = 0
  112. MSAT_sect_count = 0
  113. SAT_sect_count_limit = 109
  114. while total_sect_count > 128*SAT_sect_count or SAT_sect_count > SAT_sect_count_limit:
  115. SAT_sect_count += 1
  116. total_sect_count += 1
  117. if SAT_sect_count > SAT_sect_count_limit:
  118. MSAT_sect_count += 1
  119. total_sect_count += 1
  120. SAT_sect_count_limit += 127
  121. SAT = [self.SID_FREE_SECTOR]*128*SAT_sect_count
  122. sect = 0
  123. while sect < book_sect_count - 1:
  124. self.book_stream_sect.append(sect)
  125. SAT[sect] = sect + 1
  126. sect += 1
  127. self.book_stream_sect.append(sect)
  128. SAT[sect] = self.SID_END_OF_CHAIN
  129. sect += 1
  130. while sect < book_sect_count + MSAT_sect_count:
  131. self.MSAT_sect_2nd.append(sect)
  132. SAT[sect] = self.SID_USED_BY_MSAT
  133. sect += 1
  134. while sect < book_sect_count + MSAT_sect_count + SAT_sect_count:
  135. self.SAT_sect.append(sect)
  136. SAT[sect] = self.SID_USED_BY_SAT
  137. sect += 1
  138. while sect < book_sect_count + MSAT_sect_count + SAT_sect_count + dir_sect_count - 1:
  139. self.dir_stream_sect.append(sect)
  140. SAT[sect] = sect + 1
  141. sect += 1
  142. self.dir_stream_sect.append(sect)
  143. SAT[sect] = self.SID_END_OF_CHAIN
  144. sect += 1
  145. self.packed_SAT = struct.pack('<%dl' % (SAT_sect_count*128), *SAT)
  146. MSAT_1st = [self.SID_FREE_SECTOR]*109
  147. for i, SAT_sect_num in zip(range(0, 109), self.SAT_sect):
  148. MSAT_1st[i] = SAT_sect_num
  149. self.packed_MSAT_1st = struct.pack('<109l', *MSAT_1st)
  150. MSAT_2nd = [self.SID_FREE_SECTOR]*128*MSAT_sect_count
  151. if MSAT_sect_count > 0:
  152. MSAT_2nd[- 1] = self.SID_END_OF_CHAIN
  153. i = 109
  154. msat_sect = 0
  155. sid_num = 0
  156. while i < SAT_sect_count:
  157. if (sid_num + 1) % 128 == 0:
  158. #print 'link: ',
  159. msat_sect += 1
  160. if msat_sect < len(self.MSAT_sect_2nd):
  161. MSAT_2nd[sid_num] = self.MSAT_sect_2nd[msat_sect]
  162. else:
  163. #print 'sid: ',
  164. MSAT_2nd[sid_num] = self.SAT_sect[i]
  165. i += 1
  166. #print sid_num, MSAT_2nd[sid_num]
  167. sid_num += 1
  168. self.packed_MSAT_2nd = struct.pack('<%dl' % (MSAT_sect_count*128), *MSAT_2nd)
  169. #print vars()
  170. #print zip(range(0, sect), SAT)
  171. #print self.book_stream_sect
  172. #print self.MSAT_sect_2nd
  173. #print MSAT_2nd
  174. #print self.SAT_sect
  175. #print self.dir_stream_sect
  176. def _build_header(self):
  177. doc_magic = b'\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1'
  178. file_uid = b'\x00'*16
  179. rev_num = b'\x3E\x00'
  180. ver_num = b'\x03\x00'
  181. byte_order = b'\xFE\xFF'
  182. log_sect_size = struct.pack('<H', 9)
  183. log_short_sect_size = struct.pack('<H', 6)
  184. not_used0 = b'\x00'*10
  185. total_sat_sectors = struct.pack('<L', len(self.SAT_sect))
  186. dir_start_sid = struct.pack('<l', self.dir_stream_sect[0])
  187. not_used1 = b'\x00'*4
  188. min_stream_size = struct.pack('<L', 0x1000)
  189. ssat_start_sid = struct.pack('<l', -2)
  190. total_ssat_sectors = struct.pack('<L', 0)
  191. if len(self.MSAT_sect_2nd) == 0:
  192. msat_start_sid = struct.pack('<l', -2)
  193. else:
  194. msat_start_sid = struct.pack('<l', self.MSAT_sect_2nd[0])
  195. total_msat_sectors = struct.pack('<L', len(self.MSAT_sect_2nd))
  196. self.header = b''.join([ doc_magic,
  197. file_uid,
  198. rev_num,
  199. ver_num,
  200. byte_order,
  201. log_sect_size,
  202. log_short_sect_size,
  203. not_used0,
  204. total_sat_sectors,
  205. dir_start_sid,
  206. not_used1,
  207. min_stream_size,
  208. ssat_start_sid,
  209. total_ssat_sectors,
  210. msat_start_sid,
  211. total_msat_sectors
  212. ])
  213. def save(self, file_name_or_filelike_obj, stream):
  214. # 1. Align stream on 0x1000 boundary (and therefore on sector boundary)
  215. padding = b'\x00' * (0x1000 - (len(stream) % 0x1000))
  216. self.book_stream_len = len(stream) + len(padding)
  217. self._build_directory()
  218. self._build_sat()
  219. self._build_header()
  220. f = file_name_or_filelike_obj
  221. we_own_it = not hasattr(f, 'write')
  222. if we_own_it:
  223. f = open(file_name_or_filelike_obj, 'w+b')
  224. f.write(self.header)
  225. f.write(self.packed_MSAT_1st)
  226. # There are reports of large writes failing when writing to "network shares" on Windows.
  227. # MS says in KB899149 that it happens at 32KB less than 64MB.
  228. # This is said to be alleviated by using "w+b" mode instead of "wb".
  229. # One xlwt user has reported anomalous results at much smaller sizes,
  230. # The fallback is to write the stream in 4 MB chunks.
  231. try:
  232. f.write(stream)
  233. except IOError as e:
  234. if e.errno != 22: # "Invalid argument" i.e. 'stream' is too big
  235. raise # some other problem
  236. chunk_size = 4 * 1024 * 1024
  237. for offset in xrange(0, len(stream), chunk_size):
  238. f.write(buffer(stream, offset, chunk_size))
  239. f.write(padding)
  240. f.write(self.packed_MSAT_2nd)
  241. f.write(self.packed_SAT)
  242. f.write(self.dir_stream)
  243. if we_own_it:
  244. f.close()