units.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. # -- encoding: UTF-8 --
  2. from babel._compat import string_types
  3. from babel.core import Locale
  4. from babel.numbers import format_decimal, LC_NUMERIC
  5. class UnknownUnitError(ValueError):
  6. def __init__(self, unit, locale):
  7. ValueError.__init__(self, "%s is not a known unit in %s" % (unit, locale))
  8. def get_unit_name(measurement_unit, length='long', locale=LC_NUMERIC):
  9. """
  10. Get the display name for a measurement unit in the given locale.
  11. >>> get_unit_name("radian", locale="en")
  12. 'radians'
  13. Unknown units will raise exceptions:
  14. >>> get_unit_name("battery", locale="fi")
  15. Traceback (most recent call last):
  16. ...
  17. UnknownUnitError: battery/long is not a known unit/length in fi
  18. :param measurement_unit: the code of a measurement unit.
  19. Known units can be found in the CLDR Unit Validity XML file:
  20. https://unicode.org/repos/cldr/tags/latest/common/validity/unit.xml
  21. :param length: "short", "long" or "narrow"
  22. :param locale: the `Locale` object or locale identifier
  23. :return: The unit display name, or None.
  24. """
  25. locale = Locale.parse(locale)
  26. unit = _find_unit_pattern(measurement_unit, locale=locale)
  27. if not unit:
  28. raise UnknownUnitError(unit=measurement_unit, locale=locale)
  29. return locale.unit_display_names.get(unit, {}).get(length)
  30. def _find_unit_pattern(unit_id, locale=LC_NUMERIC):
  31. """
  32. Expand an unit into a qualified form.
  33. Known units can be found in the CLDR Unit Validity XML file:
  34. https://unicode.org/repos/cldr/tags/latest/common/validity/unit.xml
  35. >>> _find_unit_pattern("radian", locale="en")
  36. 'angle-radian'
  37. Unknown values will return None.
  38. >>> _find_unit_pattern("horse", locale="en")
  39. :param unit_id: the code of a measurement unit.
  40. :return: A key to the `unit_patterns` mapping, or None.
  41. """
  42. locale = Locale.parse(locale)
  43. unit_patterns = locale._data["unit_patterns"]
  44. if unit_id in unit_patterns:
  45. return unit_id
  46. for unit_pattern in sorted(unit_patterns, key=len):
  47. if unit_pattern.endswith(unit_id):
  48. return unit_pattern
  49. def format_unit(value, measurement_unit, length='long', format=None, locale=LC_NUMERIC):
  50. """Format a value of a given unit.
  51. Values are formatted according to the locale's usual pluralization rules
  52. and number formats.
  53. >>> format_unit(12, 'length-meter', locale='ro_RO')
  54. u'12 metri'
  55. >>> format_unit(15.5, 'length-mile', locale='fi_FI')
  56. u'15,5 mailia'
  57. >>> format_unit(1200, 'pressure-millimeter-ofhg', locale='nb')
  58. u'1\\xa0200 millimeter kvikks\\xf8lv'
  59. >>> format_unit(270, 'ton', locale='en')
  60. u'270 tons'
  61. Number formats may be overridden with the ``format`` parameter.
  62. >>> from babel._compat import decimal
  63. >>> format_unit(decimal.Decimal("-42.774"), 'temperature-celsius', 'short', format='#.0', locale='fr')
  64. u'-42,8\\u202f\\xb0C'
  65. The locale's usual pluralization rules are respected.
  66. >>> format_unit(1, 'length-meter', locale='ro_RO')
  67. u'1 metru'
  68. >>> format_unit(0, 'length-mile', locale='cy')
  69. u'0 mi'
  70. >>> format_unit(1, 'length-mile', locale='cy')
  71. u'1 filltir'
  72. >>> format_unit(3, 'length-mile', locale='cy')
  73. u'3 milltir'
  74. >>> format_unit(15, 'length-horse', locale='fi')
  75. Traceback (most recent call last):
  76. ...
  77. UnknownUnitError: length-horse is not a known unit in fi
  78. .. versionadded:: 2.2.0
  79. :param value: the value to format. If this is a string, no number formatting will be attempted.
  80. :param measurement_unit: the code of a measurement unit.
  81. Known units can be found in the CLDR Unit Validity XML file:
  82. https://unicode.org/repos/cldr/tags/latest/common/validity/unit.xml
  83. :param length: "short", "long" or "narrow"
  84. :param format: An optional format, as accepted by `format_decimal`.
  85. :param locale: the `Locale` object or locale identifier
  86. """
  87. locale = Locale.parse(locale)
  88. q_unit = _find_unit_pattern(measurement_unit, locale=locale)
  89. if not q_unit:
  90. raise UnknownUnitError(unit=measurement_unit, locale=locale)
  91. unit_patterns = locale._data["unit_patterns"][q_unit].get(length, {})
  92. if isinstance(value, string_types): # Assume the value is a preformatted singular.
  93. formatted_value = value
  94. plural_form = "one"
  95. else:
  96. formatted_value = format_decimal(value, format, locale)
  97. plural_form = locale.plural_form(value)
  98. if plural_form in unit_patterns:
  99. return unit_patterns[plural_form].format(formatted_value)
  100. # Fall back to a somewhat bad representation.
  101. # nb: This is marked as no-cover, as the current CLDR seemingly has no way for this to happen.
  102. return '%s %s' % ( # pragma: no cover
  103. formatted_value,
  104. (get_unit_name(measurement_unit, length=length, locale=locale) or measurement_unit)
  105. )
  106. def _find_compound_unit(numerator_unit, denominator_unit, locale=LC_NUMERIC):
  107. """
  108. Find a predefined compound unit pattern.
  109. Used internally by format_compound_unit.
  110. >>> _find_compound_unit("kilometer", "hour", locale="en")
  111. 'speed-kilometer-per-hour'
  112. >>> _find_compound_unit("mile", "gallon", locale="en")
  113. 'consumption-mile-per-gallon'
  114. If no predefined compound pattern can be found, `None` is returned.
  115. >>> _find_compound_unit("gallon", "mile", locale="en")
  116. >>> _find_compound_unit("horse", "purple", locale="en")
  117. :param numerator_unit: The numerator unit's identifier
  118. :param denominator_unit: The denominator unit's identifier
  119. :param locale: the `Locale` object or locale identifier
  120. :return: A key to the `unit_patterns` mapping, or None.
  121. :rtype: str|None
  122. """
  123. locale = Locale.parse(locale)
  124. # Qualify the numerator and denominator units. This will turn possibly partial
  125. # units like "kilometer" or "hour" into actual units like "length-kilometer" and
  126. # "duration-hour".
  127. numerator_unit = _find_unit_pattern(numerator_unit, locale=locale)
  128. denominator_unit = _find_unit_pattern(denominator_unit, locale=locale)
  129. # If either was not found, we can't possibly build a suitable compound unit either.
  130. if not (numerator_unit and denominator_unit):
  131. return None
  132. # Since compound units are named "speed-kilometer-per-hour", we'll have to slice off
  133. # the quantities (i.e. "length", "duration") from both qualified units.
  134. bare_numerator_unit = numerator_unit.split("-", 1)[-1]
  135. bare_denominator_unit = denominator_unit.split("-", 1)[-1]
  136. # Now we can try and rebuild a compound unit specifier, then qualify it:
  137. return _find_unit_pattern("%s-per-%s" % (bare_numerator_unit, bare_denominator_unit), locale=locale)
  138. def format_compound_unit(
  139. numerator_value, numerator_unit=None,
  140. denominator_value=1, denominator_unit=None,
  141. length='long', format=None, locale=LC_NUMERIC
  142. ):
  143. """
  144. Format a compound number value, i.e. "kilometers per hour" or similar.
  145. Both unit specifiers are optional to allow for formatting of arbitrary values still according
  146. to the locale's general "per" formatting specifier.
  147. >>> format_compound_unit(7, denominator_value=11, length="short", locale="pt")
  148. '7/11'
  149. >>> format_compound_unit(150, "kilometer", denominator_unit="hour", locale="sv")
  150. '150 kilometer per timme'
  151. >>> format_compound_unit(150, "kilowatt", denominator_unit="year", locale="fi")
  152. '150 kilowattia / vuosi'
  153. >>> format_compound_unit(32.5, "ton", 15, denominator_unit="hour", locale="en")
  154. '32.5 tons per 15 hours'
  155. >>> format_compound_unit(160, denominator_unit="square-meter", locale="fr")
  156. '160 par m\\xe8tre carr\\xe9'
  157. >>> format_compound_unit(4, "meter", "ratakisko", length="short", locale="fi")
  158. '4 m/ratakisko'
  159. >>> format_compound_unit(35, "minute", denominator_unit="fathom", locale="sv")
  160. '35 minuter per famn'
  161. >>> from babel.numbers import format_currency
  162. >>> format_compound_unit(format_currency(35, "JPY", locale="de"), denominator_unit="liter", locale="de")
  163. '35\\xa0\\xa5 pro Liter'
  164. See https://www.unicode.org/reports/tr35/tr35-general.html#perUnitPatterns
  165. :param numerator_value: The numerator value. This may be a string,
  166. in which case it is considered preformatted and the unit is ignored.
  167. :param numerator_unit: The numerator unit. See `format_unit`.
  168. :param denominator_value: The denominator value. This may be a string,
  169. in which case it is considered preformatted and the unit is ignored.
  170. :param denominator_unit: The denominator unit. See `format_unit`.
  171. :param length: The formatting length. "short", "long" or "narrow"
  172. :param format: An optional format, as accepted by `format_decimal`.
  173. :param locale: the `Locale` object or locale identifier
  174. :return: A formatted compound value.
  175. """
  176. locale = Locale.parse(locale)
  177. # Look for a specific compound unit first...
  178. if numerator_unit and denominator_unit and denominator_value == 1:
  179. compound_unit = _find_compound_unit(numerator_unit, denominator_unit, locale=locale)
  180. if compound_unit:
  181. return format_unit(numerator_value, compound_unit, length=length, format=format, locale=locale)
  182. # ... failing that, construct one "by hand".
  183. if isinstance(numerator_value, string_types): # Numerator is preformatted
  184. formatted_numerator = numerator_value
  185. elif numerator_unit: # Numerator has unit
  186. formatted_numerator = format_unit(
  187. numerator_value, numerator_unit, length=length, format=format, locale=locale
  188. )
  189. else: # Unitless numerator
  190. formatted_numerator = format_decimal(numerator_value, format=format, locale=locale)
  191. if isinstance(denominator_value, string_types): # Denominator is preformatted
  192. formatted_denominator = denominator_value
  193. elif denominator_unit: # Denominator has unit
  194. if denominator_value == 1: # support perUnitPatterns when the denominator is 1
  195. denominator_unit = _find_unit_pattern(denominator_unit, locale=locale)
  196. per_pattern = locale._data["unit_patterns"].get(denominator_unit, {}).get(length, {}).get("per")
  197. if per_pattern:
  198. return per_pattern.format(formatted_numerator)
  199. # See TR-35's per-unit pattern algorithm, point 3.2.
  200. # For denominator 1, we replace the value to be formatted with the empty string;
  201. # this will make `format_unit` return " second" instead of "1 second".
  202. denominator_value = ""
  203. formatted_denominator = format_unit(
  204. denominator_value, denominator_unit, length=length, format=format, locale=locale
  205. ).strip()
  206. else: # Bare denominator
  207. formatted_denominator = format_decimal(denominator_value, format=format, locale=locale)
  208. # TODO: this doesn't support "compound_variations" (or "prefix"), and will fall back to the "x/y" representation
  209. per_pattern = locale._data["compound_unit_patterns"].get("per", {}).get(length, {}).get("compound", "{0}/{1}")
  210. return per_pattern.format(formatted_numerator, formatted_denominator)