excel-formula.g 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. header {
  2. import struct
  3. import Utils
  4. from UnicodeUtils import upack1
  5. from ExcelMagic import *
  6. _RVAdelta = {"R": 0, "V": 0x20, "A": 0x40}
  7. _RVAdeltaRef = {"R": 0, "V": 0x20, "A": 0x40, "D": 0x20}
  8. _RVAdeltaArea = {"R": 0, "V": 0x20, "A": 0x40, "D": 0}
  9. class FormulaParseException(Exception):
  10. """
  11. An exception indicating that a Formula could not be successfully parsed.
  12. """
  13. }
  14. header "ExcelFormulaParser.__init__" {
  15. self.rpn = ""
  16. self.sheet_references = []
  17. self.xcall_references = []
  18. }
  19. options {
  20. language = "Python";
  21. }
  22. class ExcelFormulaParser extends Parser;
  23. options {
  24. k = 2;
  25. defaultErrorHandler = false;
  26. buildAST = false;
  27. }
  28. tokens {
  29. TRUE_CONST;
  30. FALSE_CONST;
  31. STR_CONST;
  32. NUM_CONST;
  33. INT_CONST;
  34. FUNC_IF;
  35. FUNC_CHOOSE;
  36. NAME;
  37. QUOTENAME;
  38. EQ;
  39. NE;
  40. GT;
  41. LT;
  42. GE;
  43. LE;
  44. ADD;
  45. SUB;
  46. MUL;
  47. DIV;
  48. POWER;
  49. PERCENT;
  50. LP;
  51. RP;
  52. LB;
  53. RB;
  54. COLON;
  55. COMMA;
  56. SEMICOLON;
  57. REF2D;
  58. REF2D_R1C1;
  59. BANG;
  60. }
  61. formula
  62. : expr["V"]
  63. ;
  64. expr[arg_type]
  65. : // {print "\n**expr %s" % arg_type}
  66. prec0_expr[arg_type]
  67. (
  68. (
  69. EQ { op = struct.pack('B', ptgEQ) }
  70. | NE { op = struct.pack('B', ptgNE) }
  71. | GT { op = struct.pack('B', ptgGT) }
  72. | LT { op = struct.pack('B', ptgLT) }
  73. | GE { op = struct.pack('B', ptgGE) }
  74. | LE { op = struct.pack('B', ptgLE) }
  75. )
  76. prec0_expr[arg_type] { self.rpn += op }
  77. )*
  78. ;
  79. prec0_expr[arg_type]
  80. : prec1_expr[arg_type]
  81. (
  82. (
  83. CONCAT { op = struct.pack('B', ptgConcat) }
  84. )
  85. prec1_expr[arg_type] { self.rpn += op }
  86. )*
  87. ;
  88. prec1_expr[arg_type]
  89. : // {print "**prec1_expr1 %s" % arg_type}
  90. prec2_expr[arg_type]
  91. // {print "**prec1_expr2 %s" % arg_type}
  92. (
  93. (
  94. ADD { op = struct.pack('B', ptgAdd) }
  95. | SUB { op = struct.pack('B', ptgSub) }
  96. )
  97. // {print "**prec1_expr3 %s" % arg_type}
  98. prec2_expr[arg_type]
  99. { self.rpn += op;
  100. // print "**prec1_expr4 %s" % arg_type
  101. }
  102. )*
  103. ;
  104. prec2_expr[arg_type]
  105. : prec3_expr[arg_type]
  106. (
  107. (
  108. MUL { op = struct.pack('B', ptgMul) }
  109. | DIV { op = struct.pack('B', ptgDiv) }
  110. )
  111. prec3_expr[arg_type] { self.rpn += op }
  112. )*
  113. ;
  114. prec3_expr[arg_type]
  115. : prec4_expr[arg_type]
  116. (
  117. (
  118. POWER { op = struct.pack('B', ptgPower) }
  119. )
  120. prec4_expr[arg_type] { self.rpn += op }
  121. )*
  122. ;
  123. prec4_expr[arg_type]
  124. : prec5_expr[arg_type]
  125. (
  126. PERCENT { self.rpn += struct.pack('B', ptgPercent) }
  127. )?
  128. ;
  129. prec5_expr[arg_type]
  130. : primary[arg_type]
  131. | SUB primary[arg_type] { self.rpn += struct.pack('B', ptgUminus) }
  132. ;
  133. primary[arg_type]
  134. : TRUE_CONST
  135. {
  136. self.rpn += struct.pack("2B", ptgBool, 1)
  137. }
  138. | FALSE_CONST
  139. {
  140. self.rpn += struct.pack("2B", ptgBool, 0)
  141. }
  142. | str_tok:STR_CONST
  143. {
  144. self.rpn += struct.pack("B", ptgStr) + upack1(str_tok.text[1:-1].replace("\"\"", "\""))
  145. }
  146. | int_tok:INT_CONST
  147. {
  148. // print "**int_const", int_tok.text
  149. int_value = int(int_tok.text)
  150. if int_value <= 65535:
  151. self.rpn += struct.pack("<BH", ptgInt, int_value)
  152. else:
  153. self.rpn += struct.pack("<Bd", ptgNum, float(int_value))
  154. }
  155. | num_tok:NUM_CONST
  156. {
  157. self.rpn += struct.pack("<Bd", ptgNum, float(num_tok.text))
  158. }
  159. | ref2d_tok:REF2D
  160. {
  161. // print "**ref2d %s %s" % (ref2d_tok.text, arg_type)
  162. r, c = Utils.cell_to_packed_rowcol(ref2d_tok.text)
  163. ptg = ptgRefR + _RVAdeltaRef[arg_type]
  164. self.rpn += struct.pack("<B2H", ptg, r, c)
  165. }
  166. | ref2d1_tok:REF2D COLON ref2d2_tok:REF2D
  167. {
  168. r1, c1 = Utils.cell_to_packed_rowcol(ref2d1_tok.text)
  169. r2, c2 = Utils.cell_to_packed_rowcol(ref2d2_tok.text)
  170. ptg = ptgAreaR + _RVAdeltaArea[arg_type]
  171. self.rpn += struct.pack("<B4H", ptg, r1, r2, c1, c2)
  172. }
  173. | sheet1 = sheet
  174. {
  175. sheet2 = sheet1
  176. }
  177. ( COLON sheet2 = sheet )? BANG ref3d_ref2d: REF2D
  178. {
  179. ptg = ptgRef3dR + _RVAdeltaRef[arg_type]
  180. rpn_ref2d = ""
  181. r1, c1 = Utils.cell_to_packed_rowcol(ref3d_ref2d.text)
  182. rpn_ref2d = struct.pack("<3H", 0x0000, r1, c1)
  183. }
  184. ( COLON ref3d_ref2d2: REF2D
  185. {
  186. ptg = ptgArea3dR + _RVAdeltaArea[arg_type]
  187. r2, c2 = Utils.cell_to_packed_rowcol(ref3d_ref2d2.text)
  188. rpn_ref2d = struct.pack("<5H", 0x0000, r1, r2, c1, c2)
  189. }
  190. )?
  191. {
  192. self.rpn += struct.pack("<B", ptg)
  193. self.sheet_references.append((sheet1, sheet2, len(self.rpn)))
  194. self.rpn += rpn_ref2d
  195. }
  196. | FUNC_IF
  197. LP expr["V"] (SEMICOLON | COMMA)
  198. {
  199. self.rpn += struct.pack("<BBH", ptgAttr, 0x02, 0) // tAttrIf
  200. pos0 = len(self.rpn) - 2
  201. }
  202. expr[arg_type] (SEMICOLON | COMMA)
  203. {
  204. self.rpn += struct.pack("<BBH", ptgAttr, 0x08, 0) // tAttrSkip
  205. pos1 = len(self.rpn) - 2
  206. self.rpn = self.rpn[:pos0] + struct.pack("<H", pos1-pos0) + self.rpn[pos0+2:]
  207. }
  208. expr[arg_type] RP
  209. {
  210. self.rpn += struct.pack("<BBH", ptgAttr, 0x08, 3) // tAttrSkip
  211. self.rpn += struct.pack("<BBH", ptgFuncVarR, 3, 1) // 3 = nargs, 1 = IF func
  212. pos2 = len(self.rpn)
  213. self.rpn = self.rpn[:pos1] + struct.pack("<H", pos2-(pos1+2)-1) + self.rpn[pos1+2:]
  214. }
  215. | FUNC_CHOOSE
  216. {
  217. arg_type = "R"
  218. rpn_chunks = []
  219. }
  220. LP expr["V"] // first argument (the selector)
  221. {
  222. rpn_start = len(self.rpn)
  223. ref_markers = [len(self.sheet_references)]
  224. }
  225. (
  226. (SEMICOLON | COMMA)
  227. { mark = len(self.rpn) }
  228. (
  229. expr[arg_type]
  230. | { self.rpn += struct.pack("B", ptgMissArg) }
  231. )
  232. {
  233. rpn_chunks.append(self.rpn[mark:])
  234. ref_markers.append(len(self.sheet_references))
  235. }
  236. )*
  237. RP
  238. {
  239. self.rpn = self.rpn[:rpn_start]
  240. nc = len(rpn_chunks)
  241. chunklens = [len(chunk) for chunk in rpn_chunks]
  242. skiplens = [0] * nc
  243. skiplens[-1] = 3
  244. for ic in xrange(nc-1, 0, -1):
  245. skiplens[ic-1] = skiplens[ic] + chunklens[ic] + 4
  246. jump_pos = [2 * nc + 2]
  247. for ic in xrange(nc):
  248. jump_pos.append(jump_pos[-1] + chunklens[ic] + 4)
  249. chunk_shift = 2 * nc + 6 // size of tAttrChoose
  250. for ic in xrange(nc):
  251. for refx in xrange(ref_markers[ic], ref_markers[ic+1]):
  252. ref = self.sheet_references[refx]
  253. self.sheet_references[refx] = (ref[0], ref[1], ref[2] + chunk_shift)
  254. chunk_shift += 4 // size of tAttrSkip
  255. choose_rpn = []
  256. choose_rpn.append(struct.pack("<BBH", ptgAttr, 0x04, nc)) // 0x04 is tAttrChoose
  257. choose_rpn.append(struct.pack("<%dH" % (nc+1), *jump_pos))
  258. for ic in xrange(nc):
  259. choose_rpn.append(rpn_chunks[ic])
  260. choose_rpn.append(struct.pack("<BBH", ptgAttr, 0x08, skiplens[ic])) // 0x08 is tAttrSkip
  261. choose_rpn.append(struct.pack("<BBH", ptgFuncVarV, nc+1, 100)) // 100 is CHOOSE fn
  262. self.rpn += "".join(choose_rpn)
  263. }
  264. | name_tok:NAME
  265. {
  266. raise Exception("[formula] found unexpected NAME token (%r)" % name_tok.txt)
  267. // #### TODO: handle references to defined names here
  268. }
  269. | func_tok:NAME
  270. {
  271. func_toku = func_tok.text.upper()
  272. if func_toku in all_funcs_by_name:
  273. (opcode,
  274. min_argc,
  275. max_argc,
  276. func_type,
  277. arg_type_str) = all_funcs_by_name[func_toku]
  278. arg_type_list = list(arg_type_str)
  279. else:
  280. raise Exception("[formula] unknown function (%s)" % func_tok.text)
  281. // print "**func_tok1 %s %s" % (func_toku, func_type)
  282. xcall = opcode < 0
  283. if xcall:
  284. // The name of the add-in function is passed as the 1st arg
  285. // of the hidden XCALL function
  286. self.xcall_references.append((func_toku, len(self.rpn) + 1))
  287. self.rpn += struct.pack("<BHHH",
  288. ptgNameXR,
  289. 0xadde, // ##PATCHME## index to REF entry in EXTERNSHEET record
  290. 0xefbe, // ##PATCHME## one-based index to EXTERNNAME record
  291. 0x0000) // unused
  292. }
  293. LP arg_count = expr_list[arg_type_list, min_argc, max_argc] RP
  294. {
  295. if arg_count > max_argc or arg_count < min_argc:
  296. raise Exception, "%d parameters for function: %s" % (arg_count, func_tok.text)
  297. if xcall:
  298. func_ptg = ptgFuncVarR + _RVAdelta[func_type]
  299. self.rpn += struct.pack("<2BH", func_ptg, arg_count + 1, 255) // 255 is magic XCALL function
  300. elif min_argc == max_argc:
  301. func_ptg = ptgFuncR + _RVAdelta[func_type]
  302. self.rpn += struct.pack("<BH", func_ptg, opcode)
  303. elif arg_count == 1 and func_tok.text.upper() == "SUM":
  304. self.rpn += struct.pack("<BBH", ptgAttr, 0x10, 0) // tAttrSum
  305. else:
  306. func_ptg = ptgFuncVarR + _RVAdelta[func_type]
  307. self.rpn += struct.pack("<2BH", func_ptg, arg_count, opcode)
  308. }
  309. | LP expr[arg_type] RP
  310. {
  311. self.rpn += struct.pack("B", ptgParen)
  312. }
  313. ;
  314. expr_list[arg_type_list, min_argc, max_argc] returns [arg_cnt]
  315. {
  316. arg_cnt = 0
  317. arg_type = arg_type_list[arg_cnt]
  318. // print "**expr_list1[%d] req=%s" % (arg_cnt, arg_type)
  319. }
  320. : expr[arg_type] { arg_cnt += 1 }
  321. (
  322. {
  323. if arg_cnt < len(arg_type_list):
  324. arg_type = arg_type_list[arg_cnt]
  325. else:
  326. arg_type = arg_type_list[-1]
  327. if arg_type == "+":
  328. arg_type = arg_type_list[-2]
  329. // print "**expr_list2[%d] req=%s" % (arg_cnt, arg_type)
  330. }
  331. (SEMICOLON | COMMA)
  332. (
  333. expr[arg_type]
  334. | { self.rpn += struct.pack("B", ptgMissArg) }
  335. )
  336. { arg_cnt += 1 }
  337. )*
  338. |
  339. ;
  340. sheet returns[ref]
  341. : sheet_ref_name: NAME
  342. { ref = sheet_ref_name.text }
  343. | sheet_ref_int: INT_CONST
  344. { ref = sheet_ref_int.text }
  345. | sheet_ref_quote: QUOTENAME
  346. { ref = sheet_ref_quote.text[1:-1].replace("''", "'") }
  347. ;