123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632 |
- # -*- coding: utf-8 -*-
- """
- babel.numbers
- ~~~~~~~~~~~~~
- CLDR Plural support. See UTS #35.
- :copyright: (c) 2013-2021 by the Babel Team.
- :license: BSD, see LICENSE for more details.
- """
- import re
- from babel._compat import decimal
- _plural_tags = ('zero', 'one', 'two', 'few', 'many', 'other')
- _fallback_tag = 'other'
- def extract_operands(source):
- """Extract operands from a decimal, a float or an int, according to `CLDR rules`_.
- The result is a 6-tuple (n, i, v, w, f, t), where those symbols are as follows:
- ====== ===============================================================
- Symbol Value
- ------ ---------------------------------------------------------------
- n absolute value of the source number (integer and decimals).
- i integer digits of n.
- v number of visible fraction digits in n, with trailing zeros.
- w number of visible fraction digits in n, without trailing zeros.
- f visible fractional digits in n, with trailing zeros.
- t visible fractional digits in n, without trailing zeros.
- ====== ===============================================================
- .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Operands
- :param source: A real number
- :type source: int|float|decimal.Decimal
- :return: A n-i-v-w-f-t tuple
- :rtype: tuple[decimal.Decimal, int, int, int, int, int]
- """
- n = abs(source)
- i = int(n)
- if isinstance(n, float):
- if i == n:
- n = i
- else:
- # Cast the `float` to a number via the string representation.
- # This is required for Python 2.6 anyway (it will straight out fail to
- # do the conversion otherwise), and it's highly unlikely that the user
- # actually wants the lossless conversion behavior (quoting the Python
- # documentation):
- # > If value is a float, the binary floating point value is losslessly
- # > converted to its exact decimal equivalent.
- # > This conversion can often require 53 or more digits of precision.
- # Should the user want that behavior, they can simply pass in a pre-
- # converted `Decimal` instance of desired accuracy.
- n = decimal.Decimal(str(n))
- if isinstance(n, decimal.Decimal):
- dec_tuple = n.as_tuple()
- exp = dec_tuple.exponent
- fraction_digits = dec_tuple.digits[exp:] if exp < 0 else ()
- trailing = ''.join(str(d) for d in fraction_digits)
- no_trailing = trailing.rstrip('0')
- v = len(trailing)
- w = len(no_trailing)
- f = int(trailing or 0)
- t = int(no_trailing or 0)
- else:
- v = w = f = t = 0
- return n, i, v, w, f, t
- class PluralRule(object):
- """Represents a set of language pluralization rules. The constructor
- accepts a list of (tag, expr) tuples or a dict of `CLDR rules`_. The
- resulting object is callable and accepts one parameter with a positive or
- negative number (both integer and float) for the number that indicates the
- plural form for a string and returns the tag for the format:
- >>> rule = PluralRule({'one': 'n is 1'})
- >>> rule(1)
- 'one'
- >>> rule(2)
- 'other'
- Currently the CLDR defines these tags: zero, one, two, few, many and
- other where other is an implicit default. Rules should be mutually
- exclusive; for a given numeric value, only one rule should apply (i.e.
- the condition should only be true for one of the plural rule elements.
- .. _`CLDR rules`: https://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules
- """
- __slots__ = ('abstract', '_func')
- def __init__(self, rules):
- """Initialize the rule instance.
- :param rules: a list of ``(tag, expr)``) tuples with the rules
- conforming to UTS #35 or a dict with the tags as keys
- and expressions as values.
- :raise RuleError: if the expression is malformed
- """
- if isinstance(rules, dict):
- rules = rules.items()
- found = set()
- self.abstract = []
- for key, expr in sorted(list(rules)):
- if key not in _plural_tags:
- raise ValueError('unknown tag %r' % key)
- elif key in found:
- raise ValueError('tag %r defined twice' % key)
- found.add(key)
- ast = _Parser(expr).ast
- if ast:
- self.abstract.append((key, ast))
- def __repr__(self):
- rules = self.rules
- return '<%s %r>' % (
- type(self).__name__,
- ', '.join(['%s: %s' % (tag, rules[tag]) for tag in _plural_tags
- if tag in rules])
- )
- @classmethod
- def parse(cls, rules):
- """Create a `PluralRule` instance for the given rules. If the rules
- are a `PluralRule` object, that object is returned.
- :param rules: the rules as list or dict, or a `PluralRule` object
- :raise RuleError: if the expression is malformed
- """
- if isinstance(rules, cls):
- return rules
- return cls(rules)
- @property
- def rules(self):
- """The `PluralRule` as a dict of unicode plural rules.
- >>> rule = PluralRule({'one': 'n is 1'})
- >>> rule.rules
- {'one': 'n is 1'}
- """
- _compile = _UnicodeCompiler().compile
- return dict([(tag, _compile(ast)) for tag, ast in self.abstract])
- tags = property(lambda x: frozenset([i[0] for i in x.abstract]), doc="""
- A set of explicitly defined tags in this rule. The implicit default
- ``'other'`` rules is not part of this set unless there is an explicit
- rule for it.""")
- def __getstate__(self):
- return self.abstract
- def __setstate__(self, abstract):
- self.abstract = abstract
- def __call__(self, n):
- if not hasattr(self, '_func'):
- self._func = to_python(self)
- return self._func(n)
- def to_javascript(rule):
- """Convert a list/dict of rules or a `PluralRule` object into a JavaScript
- function. This function depends on no external library:
- >>> to_javascript({'one': 'n is 1'})
- "(function(n) { return (n == 1) ? 'one' : 'other'; })"
- Implementation detail: The function generated will probably evaluate
- expressions involved into range operations multiple times. This has the
- advantage that external helper functions are not required and is not a
- big performance hit for these simple calculations.
- :param rule: the rules as list or dict, or a `PluralRule` object
- :raise RuleError: if the expression is malformed
- """
- to_js = _JavaScriptCompiler().compile
- result = ['(function(n) { return ']
- for tag, ast in PluralRule.parse(rule).abstract:
- result.append('%s ? %r : ' % (to_js(ast), tag))
- result.append('%r; })' % _fallback_tag)
- return ''.join(result)
- def to_python(rule):
- """Convert a list/dict of rules or a `PluralRule` object into a regular
- Python function. This is useful in situations where you need a real
- function and don't are about the actual rule object:
- >>> func = to_python({'one': 'n is 1', 'few': 'n in 2..4'})
- >>> func(1)
- 'one'
- >>> func(3)
- 'few'
- >>> func = to_python({'one': 'n in 1,11', 'few': 'n in 3..10,13..19'})
- >>> func(11)
- 'one'
- >>> func(15)
- 'few'
- :param rule: the rules as list or dict, or a `PluralRule` object
- :raise RuleError: if the expression is malformed
- """
- namespace = {
- 'IN': in_range_list,
- 'WITHIN': within_range_list,
- 'MOD': cldr_modulo,
- 'extract_operands': extract_operands,
- }
- to_python_func = _PythonCompiler().compile
- result = [
- 'def evaluate(n):',
- ' n, i, v, w, f, t = extract_operands(n)',
- ]
- for tag, ast in PluralRule.parse(rule).abstract:
- # the str() call is to coerce the tag to the native string. It's
- # a limited ascii restricted set of tags anyways so that is fine.
- result.append(' if (%s): return %r' % (to_python_func(ast), str(tag)))
- result.append(' return %r' % _fallback_tag)
- code = compile('\n'.join(result), '<rule>', 'exec')
- eval(code, namespace)
- return namespace['evaluate']
- def to_gettext(rule):
- """The plural rule as gettext expression. The gettext expression is
- technically limited to integers and returns indices rather than tags.
- >>> to_gettext({'one': 'n is 1', 'two': 'n is 2'})
- 'nplurals=3; plural=((n == 1) ? 0 : (n == 2) ? 1 : 2)'
- :param rule: the rules as list or dict, or a `PluralRule` object
- :raise RuleError: if the expression is malformed
- """
- rule = PluralRule.parse(rule)
- used_tags = rule.tags | {_fallback_tag}
- _compile = _GettextCompiler().compile
- _get_index = [tag for tag in _plural_tags if tag in used_tags].index
- result = ['nplurals=%d; plural=(' % len(used_tags)]
- for tag, ast in rule.abstract:
- result.append('%s ? %d : ' % (_compile(ast), _get_index(tag)))
- result.append('%d)' % _get_index(_fallback_tag))
- return ''.join(result)
- def in_range_list(num, range_list):
- """Integer range list test. This is the callback for the "in" operator
- of the UTS #35 pluralization rule language:
- >>> in_range_list(1, [(1, 3)])
- True
- >>> in_range_list(3, [(1, 3)])
- True
- >>> in_range_list(3, [(1, 3), (5, 8)])
- True
- >>> in_range_list(1.2, [(1, 4)])
- False
- >>> in_range_list(10, [(1, 4)])
- False
- >>> in_range_list(10, [(1, 4), (6, 8)])
- False
- """
- return num == int(num) and within_range_list(num, range_list)
- def within_range_list(num, range_list):
- """Float range test. This is the callback for the "within" operator
- of the UTS #35 pluralization rule language:
- >>> within_range_list(1, [(1, 3)])
- True
- >>> within_range_list(1.0, [(1, 3)])
- True
- >>> within_range_list(1.2, [(1, 4)])
- True
- >>> within_range_list(8.8, [(1, 4), (7, 15)])
- True
- >>> within_range_list(10, [(1, 4)])
- False
- >>> within_range_list(10.5, [(1, 4), (20, 30)])
- False
- """
- return any(num >= min_ and num <= max_ for min_, max_ in range_list)
- def cldr_modulo(a, b):
- """Javaish modulo. This modulo operator returns the value with the sign
- of the dividend rather than the divisor like Python does:
- >>> cldr_modulo(-3, 5)
- -3
- >>> cldr_modulo(-3, -5)
- -3
- >>> cldr_modulo(3, 5)
- 3
- """
- reverse = 0
- if a < 0:
- a *= -1
- reverse = 1
- if b < 0:
- b *= -1
- rv = a % b
- if reverse:
- rv *= -1
- return rv
- class RuleError(Exception):
- """Raised if a rule is malformed."""
- _VARS = 'nivwft'
- _RULES = [
- (None, re.compile(r'\s+', re.UNICODE)),
- ('word', re.compile(r'\b(and|or|is|(?:with)?in|not|mod|[{0}])\b'
- .format(_VARS))),
- ('value', re.compile(r'\d+')),
- ('symbol', re.compile(r'%|,|!=|=')),
- ('ellipsis', re.compile(r'\.{2,3}|\u2026', re.UNICODE)) # U+2026: ELLIPSIS
- ]
- def tokenize_rule(s):
- s = s.split('@')[0]
- result = []
- pos = 0
- end = len(s)
- while pos < end:
- for tok, rule in _RULES:
- match = rule.match(s, pos)
- if match is not None:
- pos = match.end()
- if tok:
- result.append((tok, match.group()))
- break
- else:
- raise RuleError('malformed CLDR pluralization rule. '
- 'Got unexpected %r' % s[pos])
- return result[::-1]
- def test_next_token(tokens, type_, value=None):
- return tokens and tokens[-1][0] == type_ and \
- (value is None or tokens[-1][1] == value)
- def skip_token(tokens, type_, value=None):
- if test_next_token(tokens, type_, value):
- return tokens.pop()
- def value_node(value):
- return 'value', (value, )
- def ident_node(name):
- return name, ()
- def range_list_node(range_list):
- return 'range_list', range_list
- def negate(rv):
- return 'not', (rv,)
- class _Parser(object):
- """Internal parser. This class can translate a single rule into an abstract
- tree of tuples. It implements the following grammar::
- condition = and_condition ('or' and_condition)*
- ('@integer' samples)?
- ('@decimal' samples)?
- and_condition = relation ('and' relation)*
- relation = is_relation | in_relation | within_relation
- is_relation = expr 'is' ('not')? value
- in_relation = expr (('not')? 'in' | '=' | '!=') range_list
- within_relation = expr ('not')? 'within' range_list
- expr = operand (('mod' | '%') value)?
- operand = 'n' | 'i' | 'f' | 't' | 'v' | 'w'
- range_list = (range | value) (',' range_list)*
- value = digit+
- digit = 0|1|2|3|4|5|6|7|8|9
- range = value'..'value
- samples = sampleRange (',' sampleRange)* (',' ('…'|'...'))?
- sampleRange = decimalValue '~' decimalValue
- decimalValue = value ('.' value)?
- - Whitespace can occur between or around any of the above tokens.
- - Rules should be mutually exclusive; for a given numeric value, only one
- rule should apply (i.e. the condition should only be true for one of
- the plural rule elements).
- - The in and within relations can take comma-separated lists, such as:
- 'n in 3,5,7..15'.
- - Samples are ignored.
- The translator parses the expression on instanciation into an attribute
- called `ast`.
- """
- def __init__(self, string):
- self.tokens = tokenize_rule(string)
- if not self.tokens:
- # If the pattern is only samples, it's entirely possible
- # no stream of tokens whatsoever is generated.
- self.ast = None
- return
- self.ast = self.condition()
- if self.tokens:
- raise RuleError('Expected end of rule, got %r' %
- self.tokens[-1][1])
- def expect(self, type_, value=None, term=None):
- token = skip_token(self.tokens, type_, value)
- if token is not None:
- return token
- if term is None:
- term = repr(value is None and type_ or value)
- if not self.tokens:
- raise RuleError('expected %s but end of rule reached' % term)
- raise RuleError('expected %s but got %r' % (term, self.tokens[-1][1]))
- def condition(self):
- op = self.and_condition()
- while skip_token(self.tokens, 'word', 'or'):
- op = 'or', (op, self.and_condition())
- return op
- def and_condition(self):
- op = self.relation()
- while skip_token(self.tokens, 'word', 'and'):
- op = 'and', (op, self.relation())
- return op
- def relation(self):
- left = self.expr()
- if skip_token(self.tokens, 'word', 'is'):
- return skip_token(self.tokens, 'word', 'not') and 'isnot' or 'is', \
- (left, self.value())
- negated = skip_token(self.tokens, 'word', 'not')
- method = 'in'
- if skip_token(self.tokens, 'word', 'within'):
- method = 'within'
- else:
- if not skip_token(self.tokens, 'word', 'in'):
- if negated:
- raise RuleError('Cannot negate operator based rules.')
- return self.newfangled_relation(left)
- rv = 'relation', (method, left, self.range_list())
- return negate(rv) if negated else rv
- def newfangled_relation(self, left):
- if skip_token(self.tokens, 'symbol', '='):
- negated = False
- elif skip_token(self.tokens, 'symbol', '!='):
- negated = True
- else:
- raise RuleError('Expected "=" or "!=" or legacy relation')
- rv = 'relation', ('in', left, self.range_list())
- return negate(rv) if negated else rv
- def range_or_value(self):
- left = self.value()
- if skip_token(self.tokens, 'ellipsis'):
- return left, self.value()
- else:
- return left, left
- def range_list(self):
- range_list = [self.range_or_value()]
- while skip_token(self.tokens, 'symbol', ','):
- range_list.append(self.range_or_value())
- return range_list_node(range_list)
- def expr(self):
- word = skip_token(self.tokens, 'word')
- if word is None or word[1] not in _VARS:
- raise RuleError('Expected identifier variable')
- name = word[1]
- if skip_token(self.tokens, 'word', 'mod'):
- return 'mod', ((name, ()), self.value())
- elif skip_token(self.tokens, 'symbol', '%'):
- return 'mod', ((name, ()), self.value())
- return ident_node(name)
- def value(self):
- return value_node(int(self.expect('value')[1]))
- def _binary_compiler(tmpl):
- """Compiler factory for the `_Compiler`."""
- return lambda self, l, r: tmpl % (self.compile(l), self.compile(r))
- def _unary_compiler(tmpl):
- """Compiler factory for the `_Compiler`."""
- return lambda self, x: tmpl % self.compile(x)
- compile_zero = lambda x: '0'
- class _Compiler(object):
- """The compilers are able to transform the expressions into multiple
- output formats.
- """
- def compile(self, arg):
- op, args = arg
- return getattr(self, 'compile_' + op)(*args)
- compile_n = lambda x: 'n'
- compile_i = lambda x: 'i'
- compile_v = lambda x: 'v'
- compile_w = lambda x: 'w'
- compile_f = lambda x: 'f'
- compile_t = lambda x: 't'
- compile_value = lambda x, v: str(v)
- compile_and = _binary_compiler('(%s && %s)')
- compile_or = _binary_compiler('(%s || %s)')
- compile_not = _unary_compiler('(!%s)')
- compile_mod = _binary_compiler('(%s %% %s)')
- compile_is = _binary_compiler('(%s == %s)')
- compile_isnot = _binary_compiler('(%s != %s)')
- def compile_relation(self, method, expr, range_list):
- raise NotImplementedError()
- class _PythonCompiler(_Compiler):
- """Compiles an expression to Python."""
- compile_and = _binary_compiler('(%s and %s)')
- compile_or = _binary_compiler('(%s or %s)')
- compile_not = _unary_compiler('(not %s)')
- compile_mod = _binary_compiler('MOD(%s, %s)')
- def compile_relation(self, method, expr, range_list):
- compile_range_list = '[%s]' % ','.join(
- ['(%s, %s)' % tuple(map(self.compile, range_))
- for range_ in range_list[1]])
- return '%s(%s, %s)' % (method.upper(), self.compile(expr),
- compile_range_list)
- class _GettextCompiler(_Compiler):
- """Compile into a gettext plural expression."""
- compile_i = _Compiler.compile_n
- compile_v = compile_zero
- compile_w = compile_zero
- compile_f = compile_zero
- compile_t = compile_zero
- def compile_relation(self, method, expr, range_list):
- rv = []
- expr = self.compile(expr)
- for item in range_list[1]:
- if item[0] == item[1]:
- rv.append('(%s == %s)' % (
- expr,
- self.compile(item[0])
- ))
- else:
- min, max = map(self.compile, item)
- rv.append('(%s >= %s && %s <= %s)' % (
- expr,
- min,
- expr,
- max
- ))
- return '(%s)' % ' || '.join(rv)
- class _JavaScriptCompiler(_GettextCompiler):
- """Compiles the expression to plain of JavaScript."""
- # XXX: presently javascript does not support any of the
- # fraction support and basically only deals with integers.
- compile_i = lambda x: 'parseInt(n, 10)'
- compile_v = compile_zero
- compile_w = compile_zero
- compile_f = compile_zero
- compile_t = compile_zero
- def compile_relation(self, method, expr, range_list):
- code = _GettextCompiler.compile_relation(
- self, method, expr, range_list)
- if method == 'in':
- expr = self.compile(expr)
- code = '(parseInt(%s, 10) == %s && %s)' % (expr, expr, code)
- return code
- class _UnicodeCompiler(_Compiler):
- """Returns a unicode pluralization rule again."""
- # XXX: this currently spits out the old syntax instead of the new
- # one. We can change that, but it will break a whole bunch of stuff
- # for users I suppose.
- compile_is = _binary_compiler('%s is %s')
- compile_isnot = _binary_compiler('%s is not %s')
- compile_and = _binary_compiler('%s and %s')
- compile_or = _binary_compiler('%s or %s')
- compile_mod = _binary_compiler('%s mod %s')
- def compile_not(self, relation):
- return self.compile_relation(negated=True, *relation[1])
- def compile_relation(self, method, expr, range_list, negated=False):
- ranges = []
- for item in range_list[1]:
- if item[0] == item[1]:
- ranges.append(self.compile(item[0]))
- else:
- ranges.append('%s..%s' % tuple(map(self.compile, item)))
- return '%s%s %s %s' % (
- self.compile(expr), negated and ' not' or '',
- method, ','.join(ranges)
- )
|