plural.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632
  1. # -*- coding: utf-8 -*-
  2. """
  3. babel.numbers
  4. ~~~~~~~~~~~~~
  5. CLDR Plural support. See UTS #35.
  6. :copyright: (c) 2013-2021 by the Babel Team.
  7. :license: BSD, see LICENSE for more details.
  8. """
  9. import re
  10. from babel._compat import decimal
  11. _plural_tags = ('zero', 'one', 'two', 'few', 'many', 'other')
  12. _fallback_tag = 'other'
  13. def extract_operands(source):
  14. """Extract operands from a decimal, a float or an int, according to `CLDR rules`_.
  15. The result is a 6-tuple (n, i, v, w, f, t), where those symbols are as follows:
  16. ====== ===============================================================
  17. Symbol Value
  18. ------ ---------------------------------------------------------------
  19. n absolute value of the source number (integer and decimals).
  20. i integer digits of n.
  21. v number of visible fraction digits in n, with trailing zeros.
  22. w number of visible fraction digits in n, without trailing zeros.
  23. f visible fractional digits in n, with trailing zeros.
  24. t visible fractional digits in n, without trailing zeros.
  25. ====== ===============================================================
  26. .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Operands
  27. :param source: A real number
  28. :type source: int|float|decimal.Decimal
  29. :return: A n-i-v-w-f-t tuple
  30. :rtype: tuple[decimal.Decimal, int, int, int, int, int]
  31. """
  32. n = abs(source)
  33. i = int(n)
  34. if isinstance(n, float):
  35. if i == n:
  36. n = i
  37. else:
  38. # Cast the `float` to a number via the string representation.
  39. # This is required for Python 2.6 anyway (it will straight out fail to
  40. # do the conversion otherwise), and it's highly unlikely that the user
  41. # actually wants the lossless conversion behavior (quoting the Python
  42. # documentation):
  43. # > If value is a float, the binary floating point value is losslessly
  44. # > converted to its exact decimal equivalent.
  45. # > This conversion can often require 53 or more digits of precision.
  46. # Should the user want that behavior, they can simply pass in a pre-
  47. # converted `Decimal` instance of desired accuracy.
  48. n = decimal.Decimal(str(n))
  49. if isinstance(n, decimal.Decimal):
  50. dec_tuple = n.as_tuple()
  51. exp = dec_tuple.exponent
  52. fraction_digits = dec_tuple.digits[exp:] if exp < 0 else ()
  53. trailing = ''.join(str(d) for d in fraction_digits)
  54. no_trailing = trailing.rstrip('0')
  55. v = len(trailing)
  56. w = len(no_trailing)
  57. f = int(trailing or 0)
  58. t = int(no_trailing or 0)
  59. else:
  60. v = w = f = t = 0
  61. return n, i, v, w, f, t
  62. class PluralRule(object):
  63. """Represents a set of language pluralization rules. The constructor
  64. accepts a list of (tag, expr) tuples or a dict of `CLDR rules`_. The
  65. resulting object is callable and accepts one parameter with a positive or
  66. negative number (both integer and float) for the number that indicates the
  67. plural form for a string and returns the tag for the format:
  68. >>> rule = PluralRule({'one': 'n is 1'})
  69. >>> rule(1)
  70. 'one'
  71. >>> rule(2)
  72. 'other'
  73. Currently the CLDR defines these tags: zero, one, two, few, many and
  74. other where other is an implicit default. Rules should be mutually
  75. exclusive; for a given numeric value, only one rule should apply (i.e.
  76. the condition should only be true for one of the plural rule elements.
  77. .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules
  78. """
  79. __slots__ = ('abstract', '_func')
  80. def __init__(self, rules):
  81. """Initialize the rule instance.
  82. :param rules: a list of ``(tag, expr)``) tuples with the rules
  83. conforming to UTS #35 or a dict with the tags as keys
  84. and expressions as values.
  85. :raise RuleError: if the expression is malformed
  86. """
  87. if isinstance(rules, dict):
  88. rules = rules.items()
  89. found = set()
  90. self.abstract = []
  91. for key, expr in sorted(list(rules)):
  92. if key not in _plural_tags:
  93. raise ValueError('unknown tag %r' % key)
  94. elif key in found:
  95. raise ValueError('tag %r defined twice' % key)
  96. found.add(key)
  97. ast = _Parser(expr).ast
  98. if ast:
  99. self.abstract.append((key, ast))
  100. def __repr__(self):
  101. rules = self.rules
  102. return '<%s %r>' % (
  103. type(self).__name__,
  104. ', '.join(['%s: %s' % (tag, rules[tag]) for tag in _plural_tags
  105. if tag in rules])
  106. )
  107. @classmethod
  108. def parse(cls, rules):
  109. """Create a `PluralRule` instance for the given rules. If the rules
  110. are a `PluralRule` object, that object is returned.
  111. :param rules: the rules as list or dict, or a `PluralRule` object
  112. :raise RuleError: if the expression is malformed
  113. """
  114. if isinstance(rules, cls):
  115. return rules
  116. return cls(rules)
  117. @property
  118. def rules(self):
  119. """The `PluralRule` as a dict of unicode plural rules.
  120. >>> rule = PluralRule({'one': 'n is 1'})
  121. >>> rule.rules
  122. {'one': 'n is 1'}
  123. """
  124. _compile = _UnicodeCompiler().compile
  125. return dict([(tag, _compile(ast)) for tag, ast in self.abstract])
  126. tags = property(lambda x: frozenset([i[0] for i in x.abstract]), doc="""
  127. A set of explicitly defined tags in this rule. The implicit default
  128. ``'other'`` rules is not part of this set unless there is an explicit
  129. rule for it.""")
  130. def __getstate__(self):
  131. return self.abstract
  132. def __setstate__(self, abstract):
  133. self.abstract = abstract
  134. def __call__(self, n):
  135. if not hasattr(self, '_func'):
  136. self._func = to_python(self)
  137. return self._func(n)
  138. def to_javascript(rule):
  139. """Convert a list/dict of rules or a `PluralRule` object into a JavaScript
  140. function. This function depends on no external library:
  141. >>> to_javascript({'one': 'n is 1'})
  142. "(function(n) { return (n == 1) ? 'one' : 'other'; })"
  143. Implementation detail: The function generated will probably evaluate
  144. expressions involved into range operations multiple times. This has the
  145. advantage that external helper functions are not required and is not a
  146. big performance hit for these simple calculations.
  147. :param rule: the rules as list or dict, or a `PluralRule` object
  148. :raise RuleError: if the expression is malformed
  149. """
  150. to_js = _JavaScriptCompiler().compile
  151. result = ['(function(n) { return ']
  152. for tag, ast in PluralRule.parse(rule).abstract:
  153. result.append('%s ? %r : ' % (to_js(ast), tag))
  154. result.append('%r; })' % _fallback_tag)
  155. return ''.join(result)
  156. def to_python(rule):
  157. """Convert a list/dict of rules or a `PluralRule` object into a regular
  158. Python function. This is useful in situations where you need a real
  159. function and don't are about the actual rule object:
  160. >>> func = to_python({'one': 'n is 1', 'few': 'n in 2..4'})
  161. >>> func(1)
  162. 'one'
  163. >>> func(3)
  164. 'few'
  165. >>> func = to_python({'one': 'n in 1,11', 'few': 'n in 3..10,13..19'})
  166. >>> func(11)
  167. 'one'
  168. >>> func(15)
  169. 'few'
  170. :param rule: the rules as list or dict, or a `PluralRule` object
  171. :raise RuleError: if the expression is malformed
  172. """
  173. namespace = {
  174. 'IN': in_range_list,
  175. 'WITHIN': within_range_list,
  176. 'MOD': cldr_modulo,
  177. 'extract_operands': extract_operands,
  178. }
  179. to_python_func = _PythonCompiler().compile
  180. result = [
  181. 'def evaluate(n):',
  182. ' n, i, v, w, f, t = extract_operands(n)',
  183. ]
  184. for tag, ast in PluralRule.parse(rule).abstract:
  185. # the str() call is to coerce the tag to the native string. It's
  186. # a limited ascii restricted set of tags anyways so that is fine.
  187. result.append(' if (%s): return %r' % (to_python_func(ast), str(tag)))
  188. result.append(' return %r' % _fallback_tag)
  189. code = compile('\n'.join(result), '<rule>', 'exec')
  190. eval(code, namespace)
  191. return namespace['evaluate']
  192. def to_gettext(rule):
  193. """The plural rule as gettext expression. The gettext expression is
  194. technically limited to integers and returns indices rather than tags.
  195. >>> to_gettext({'one': 'n is 1', 'two': 'n is 2'})
  196. 'nplurals=3; plural=((n == 1) ? 0 : (n == 2) ? 1 : 2)'
  197. :param rule: the rules as list or dict, or a `PluralRule` object
  198. :raise RuleError: if the expression is malformed
  199. """
  200. rule = PluralRule.parse(rule)
  201. used_tags = rule.tags | {_fallback_tag}
  202. _compile = _GettextCompiler().compile
  203. _get_index = [tag for tag in _plural_tags if tag in used_tags].index
  204. result = ['nplurals=%d; plural=(' % len(used_tags)]
  205. for tag, ast in rule.abstract:
  206. result.append('%s ? %d : ' % (_compile(ast), _get_index(tag)))
  207. result.append('%d)' % _get_index(_fallback_tag))
  208. return ''.join(result)
  209. def in_range_list(num, range_list):
  210. """Integer range list test. This is the callback for the "in" operator
  211. of the UTS #35 pluralization rule language:
  212. >>> in_range_list(1, [(1, 3)])
  213. True
  214. >>> in_range_list(3, [(1, 3)])
  215. True
  216. >>> in_range_list(3, [(1, 3), (5, 8)])
  217. True
  218. >>> in_range_list(1.2, [(1, 4)])
  219. False
  220. >>> in_range_list(10, [(1, 4)])
  221. False
  222. >>> in_range_list(10, [(1, 4), (6, 8)])
  223. False
  224. """
  225. return num == int(num) and within_range_list(num, range_list)
  226. def within_range_list(num, range_list):
  227. """Float range test. This is the callback for the "within" operator
  228. of the UTS #35 pluralization rule language:
  229. >>> within_range_list(1, [(1, 3)])
  230. True
  231. >>> within_range_list(1.0, [(1, 3)])
  232. True
  233. >>> within_range_list(1.2, [(1, 4)])
  234. True
  235. >>> within_range_list(8.8, [(1, 4), (7, 15)])
  236. True
  237. >>> within_range_list(10, [(1, 4)])
  238. False
  239. >>> within_range_list(10.5, [(1, 4), (20, 30)])
  240. False
  241. """
  242. return any(num >= min_ and num <= max_ for min_, max_ in range_list)
  243. def cldr_modulo(a, b):
  244. """Javaish modulo. This modulo operator returns the value with the sign
  245. of the dividend rather than the divisor like Python does:
  246. >>> cldr_modulo(-3, 5)
  247. -3
  248. >>> cldr_modulo(-3, -5)
  249. -3
  250. >>> cldr_modulo(3, 5)
  251. 3
  252. """
  253. reverse = 0
  254. if a < 0:
  255. a *= -1
  256. reverse = 1
  257. if b < 0:
  258. b *= -1
  259. rv = a % b
  260. if reverse:
  261. rv *= -1
  262. return rv
  263. class RuleError(Exception):
  264. """Raised if a rule is malformed."""
  265. _VARS = 'nivwft'
  266. _RULES = [
  267. (None, re.compile(r'\s+', re.UNICODE)),
  268. ('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|[{0}])\b'
  269. .format(_VARS))),
  270. ('value', re.compile(r'\d+')),
  271. ('symbol', re.compile(r'%|,|!=|=')),
  272. ('ellipsis', re.compile(r'\.{2,3}|\u2026', re.UNICODE)) # U+2026: ELLIPSIS
  273. ]
  274. def tokenize_rule(s):
  275. s = s.split('@')[0]
  276. result = []
  277. pos = 0
  278. end = len(s)
  279. while pos < end:
  280. for tok, rule in _RULES:
  281. match = rule.match(s, pos)
  282. if match is not None:
  283. pos = match.end()
  284. if tok:
  285. result.append((tok, match.group()))
  286. break
  287. else:
  288. raise RuleError('malformed CLDR pluralization rule. '
  289. 'Got unexpected %r' % s[pos])
  290. return result[::-1]
  291. def test_next_token(tokens, type_, value=None):
  292. return tokens and tokens[-1][0] == type_ and \
  293. (value is None or tokens[-1][1] == value)
  294. def skip_token(tokens, type_, value=None):
  295. if test_next_token(tokens, type_, value):
  296. return tokens.pop()
  297. def value_node(value):
  298. return 'value', (value, )
  299. def ident_node(name):
  300. return name, ()
  301. def range_list_node(range_list):
  302. return 'range_list', range_list
  303. def negate(rv):
  304. return 'not', (rv,)
  305. class _Parser(object):
  306. """Internal parser. This class can translate a single rule into an abstract
  307. tree of tuples. It implements the following grammar::
  308. condition = and_condition ('or' and_condition)*
  309. ('@integer' samples)?
  310. ('@decimal' samples)?
  311. and_condition = relation ('and' relation)*
  312. relation = is_relation | in_relation | within_relation
  313. is_relation = expr 'is' ('not')? value
  314. in_relation = expr (('not')? 'in' | '=' | '!=') range_list
  315. within_relation = expr ('not')? 'within' range_list
  316. expr = operand (('mod' | '%') value)?
  317. operand = 'n' | 'i' | 'f' | 't' | 'v' | 'w'
  318. range_list = (range | value) (',' range_list)*
  319. value = digit+
  320. digit = 0|1|2|3|4|5|6|7|8|9
  321. range = value'..'value
  322. samples = sampleRange (',' sampleRange)* (',' ('…'|'...'))?
  323. sampleRange = decimalValue '~' decimalValue
  324. decimalValue = value ('.' value)?
  325. - Whitespace can occur between or around any of the above tokens.
  326. - Rules should be mutually exclusive; for a given numeric value, only one
  327. rule should apply (i.e. the condition should only be true for one of
  328. the plural rule elements).
  329. - The in and within relations can take comma-separated lists, such as:
  330. 'n in 3,5,7..15'.
  331. - Samples are ignored.
  332. The translator parses the expression on instanciation into an attribute
  333. called `ast`.
  334. """
  335. def __init__(self, string):
  336. self.tokens = tokenize_rule(string)
  337. if not self.tokens:
  338. # If the pattern is only samples, it's entirely possible
  339. # no stream of tokens whatsoever is generated.
  340. self.ast = None
  341. return
  342. self.ast = self.condition()
  343. if self.tokens:
  344. raise RuleError('Expected end of rule, got %r' %
  345. self.tokens[-1][1])
  346. def expect(self, type_, value=None, term=None):
  347. token = skip_token(self.tokens, type_, value)
  348. if token is not None:
  349. return token
  350. if term is None:
  351. term = repr(value is None and type_ or value)
  352. if not self.tokens:
  353. raise RuleError('expected %s but end of rule reached' % term)
  354. raise RuleError('expected %s but got %r' % (term, self.tokens[-1][1]))
  355. def condition(self):
  356. op = self.and_condition()
  357. while skip_token(self.tokens, 'word', 'or'):
  358. op = 'or', (op, self.and_condition())
  359. return op
  360. def and_condition(self):
  361. op = self.relation()
  362. while skip_token(self.tokens, 'word', 'and'):
  363. op = 'and', (op, self.relation())
  364. return op
  365. def relation(self):
  366. left = self.expr()
  367. if skip_token(self.tokens, 'word', 'is'):
  368. return skip_token(self.tokens, 'word', 'not') and 'isnot' or 'is', \
  369. (left, self.value())
  370. negated = skip_token(self.tokens, 'word', 'not')
  371. method = 'in'
  372. if skip_token(self.tokens, 'word', 'within'):
  373. method = 'within'
  374. else:
  375. if not skip_token(self.tokens, 'word', 'in'):
  376. if negated:
  377. raise RuleError('Cannot negate operator based rules.')
  378. return self.newfangled_relation(left)
  379. rv = 'relation', (method, left, self.range_list())
  380. return negate(rv) if negated else rv
  381. def newfangled_relation(self, left):
  382. if skip_token(self.tokens, 'symbol', '='):
  383. negated = False
  384. elif skip_token(self.tokens, 'symbol', '!='):
  385. negated = True
  386. else:
  387. raise RuleError('Expected "=" or "!=" or legacy relation')
  388. rv = 'relation', ('in', left, self.range_list())
  389. return negate(rv) if negated else rv
  390. def range_or_value(self):
  391. left = self.value()
  392. if skip_token(self.tokens, 'ellipsis'):
  393. return left, self.value()
  394. else:
  395. return left, left
  396. def range_list(self):
  397. range_list = [self.range_or_value()]
  398. while skip_token(self.tokens, 'symbol', ','):
  399. range_list.append(self.range_or_value())
  400. return range_list_node(range_list)
  401. def expr(self):
  402. word = skip_token(self.tokens, 'word')
  403. if word is None or word[1] not in _VARS:
  404. raise RuleError('Expected identifier variable')
  405. name = word[1]
  406. if skip_token(self.tokens, 'word', 'mod'):
  407. return 'mod', ((name, ()), self.value())
  408. elif skip_token(self.tokens, 'symbol', '%'):
  409. return 'mod', ((name, ()), self.value())
  410. return ident_node(name)
  411. def value(self):
  412. return value_node(int(self.expect('value')[1]))
  413. def _binary_compiler(tmpl):
  414. """Compiler factory for the `_Compiler`."""
  415. return lambda self, l, r: tmpl % (self.compile(l), self.compile(r))
  416. def _unary_compiler(tmpl):
  417. """Compiler factory for the `_Compiler`."""
  418. return lambda self, x: tmpl % self.compile(x)
  419. compile_zero = lambda x: '0'
  420. class _Compiler(object):
  421. """The compilers are able to transform the expressions into multiple
  422. output formats.
  423. """
  424. def compile(self, arg):
  425. op, args = arg
  426. return getattr(self, 'compile_' + op)(*args)
  427. compile_n = lambda x: 'n'
  428. compile_i = lambda x: 'i'
  429. compile_v = lambda x: 'v'
  430. compile_w = lambda x: 'w'
  431. compile_f = lambda x: 'f'
  432. compile_t = lambda x: 't'
  433. compile_value = lambda x, v: str(v)
  434. compile_and = _binary_compiler('(%s && %s)')
  435. compile_or = _binary_compiler('(%s || %s)')
  436. compile_not = _unary_compiler('(!%s)')
  437. compile_mod = _binary_compiler('(%s %% %s)')
  438. compile_is = _binary_compiler('(%s == %s)')
  439. compile_isnot = _binary_compiler('(%s != %s)')
  440. def compile_relation(self, method, expr, range_list):
  441. raise NotImplementedError()
  442. class _PythonCompiler(_Compiler):
  443. """Compiles an expression to Python."""
  444. compile_and = _binary_compiler('(%s and %s)')
  445. compile_or = _binary_compiler('(%s or %s)')
  446. compile_not = _unary_compiler('(not %s)')
  447. compile_mod = _binary_compiler('MOD(%s, %s)')
  448. def compile_relation(self, method, expr, range_list):
  449. compile_range_list = '[%s]' % ','.join(
  450. ['(%s, %s)' % tuple(map(self.compile, range_))
  451. for range_ in range_list[1]])
  452. return '%s(%s, %s)' % (method.upper(), self.compile(expr),
  453. compile_range_list)
  454. class _GettextCompiler(_Compiler):
  455. """Compile into a gettext plural expression."""
  456. compile_i = _Compiler.compile_n
  457. compile_v = compile_zero
  458. compile_w = compile_zero
  459. compile_f = compile_zero
  460. compile_t = compile_zero
  461. def compile_relation(self, method, expr, range_list):
  462. rv = []
  463. expr = self.compile(expr)
  464. for item in range_list[1]:
  465. if item[0] == item[1]:
  466. rv.append('(%s == %s)' % (
  467. expr,
  468. self.compile(item[0])
  469. ))
  470. else:
  471. min, max = map(self.compile, item)
  472. rv.append('(%s >= %s && %s <= %s)' % (
  473. expr,
  474. min,
  475. expr,
  476. max
  477. ))
  478. return '(%s)' % ' || '.join(rv)
  479. class _JavaScriptCompiler(_GettextCompiler):
  480. """Compiles the expression to plain of JavaScript."""
  481. # XXX: presently javascript does not support any of the
  482. # fraction support and basically only deals with integers.
  483. compile_i = lambda x: 'parseInt(n, 10)'
  484. compile_v = compile_zero
  485. compile_w = compile_zero
  486. compile_f = compile_zero
  487. compile_t = compile_zero
  488. def compile_relation(self, method, expr, range_list):
  489. code = _GettextCompiler.compile_relation(
  490. self, method, expr, range_list)
  491. if method == 'in':
  492. expr = self.compile(expr)
  493. code = '(parseInt(%s, 10) == %s && %s)' % (expr, expr, code)
  494. return code
  495. class _UnicodeCompiler(_Compiler):
  496. """Returns a unicode pluralization rule again."""
  497. # XXX: this currently spits out the old syntax instead of the new
  498. # one. We can change that, but it will break a whole bunch of stuff
  499. # for users I suppose.
  500. compile_is = _binary_compiler('%s is %s')
  501. compile_isnot = _binary_compiler('%s is not %s')
  502. compile_and = _binary_compiler('%s and %s')
  503. compile_or = _binary_compiler('%s or %s')
  504. compile_mod = _binary_compiler('%s mod %s')
  505. def compile_not(self, relation):
  506. return self.compile_relation(negated=True, *relation[1])
  507. def compile_relation(self, method, expr, range_list, negated=False):
  508. ranges = []
  509. for item in range_list[1]:
  510. if item[0] == item[1]:
  511. ranges.append(self.compile(item[0]))
  512. else:
  513. ranges.append('%s..%s' % tuple(map(self.compile, item)))
  514. return '%s%s %s %s' % (
  515. self.compile(expr), negated and ' not' or '',
  516. method, ','.join(ranges)
  517. )