localedata.py 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258
  1. # -*- coding: utf-8 -*-
  2. """
  3. babel.localedata
  4. ~~~~~~~~~~~~~~~~
  5. Low-level locale data access.
  6. :note: The `Locale` class, which uses this module under the hood, provides a
  7. more convenient interface for accessing the locale data.
  8. :copyright: (c) 2013-2021 by the Babel Team.
  9. :license: BSD, see LICENSE for more details.
  10. """
  11. import os
  12. import re
  13. import sys
  14. import threading
  15. from itertools import chain
  16. from babel._compat import pickle, string_types, abc
  17. _cache = {}
  18. _cache_lock = threading.RLock()
  19. _dirname = os.path.join(os.path.dirname(__file__), 'locale-data')
  20. _windows_reserved_name_re = re.compile("^(con|prn|aux|nul|com[0-9]|lpt[0-9])$", re.I)
  21. def normalize_locale(name):
  22. """Normalize a locale ID by stripping spaces and apply proper casing.
  23. Returns the normalized locale ID string or `None` if the ID is not
  24. recognized.
  25. """
  26. if not name or not isinstance(name, string_types):
  27. return None
  28. name = name.strip().lower()
  29. for locale_id in chain.from_iterable([_cache, locale_identifiers()]):
  30. if name == locale_id.lower():
  31. return locale_id
  32. def resolve_locale_filename(name):
  33. """
  34. Resolve a locale identifier to a `.dat` path on disk.
  35. """
  36. # Clean up any possible relative paths.
  37. name = os.path.basename(name)
  38. # Ensure we're not left with one of the Windows reserved names.
  39. if sys.platform == "win32" and _windows_reserved_name_re.match(os.path.splitext(name)[0]):
  40. raise ValueError("Name %s is invalid on Windows" % name)
  41. # Build the path.
  42. return os.path.join(_dirname, '%s.dat' % name)
  43. def exists(name):
  44. """Check whether locale data is available for the given locale.
  45. Returns `True` if it exists, `False` otherwise.
  46. :param name: the locale identifier string
  47. """
  48. if not name or not isinstance(name, string_types):
  49. return False
  50. if name in _cache:
  51. return True
  52. file_found = os.path.exists(resolve_locale_filename(name))
  53. return True if file_found else bool(normalize_locale(name))
  54. def locale_identifiers():
  55. """Return a list of all locale identifiers for which locale data is
  56. available.
  57. This data is cached after the first invocation in `locale_identifiers.cache`.
  58. Removing the `locale_identifiers.cache` attribute or setting it to `None`
  59. will cause this function to re-read the list from disk.
  60. .. versionadded:: 0.8.1
  61. :return: a list of locale identifiers (strings)
  62. """
  63. data = getattr(locale_identifiers, 'cache', None)
  64. if data is None:
  65. locale_identifiers.cache = data = [
  66. stem
  67. for stem, extension in
  68. (os.path.splitext(filename) for filename in os.listdir(_dirname))
  69. if extension == '.dat' and stem != 'root'
  70. ]
  71. return data
  72. def load(name, merge_inherited=True):
  73. """Load the locale data for the given locale.
  74. The locale data is a dictionary that contains much of the data defined by
  75. the Common Locale Data Repository (CLDR). This data is stored as a
  76. collection of pickle files inside the ``babel`` package.
  77. >>> d = load('en_US')
  78. >>> d['languages']['sv']
  79. u'Swedish'
  80. Note that the results are cached, and subsequent requests for the same
  81. locale return the same dictionary:
  82. >>> d1 = load('en_US')
  83. >>> d2 = load('en_US')
  84. >>> d1 is d2
  85. True
  86. :param name: the locale identifier string (or "root")
  87. :param merge_inherited: whether the inherited data should be merged into
  88. the data of the requested locale
  89. :raise `IOError`: if no locale data file is found for the given locale
  90. identifer, or one of the locales it inherits from
  91. """
  92. name = os.path.basename(name)
  93. _cache_lock.acquire()
  94. try:
  95. data = _cache.get(name)
  96. if not data:
  97. # Load inherited data
  98. if name == 'root' or not merge_inherited:
  99. data = {}
  100. else:
  101. from babel.core import get_global
  102. parent = get_global('parent_exceptions').get(name)
  103. if not parent:
  104. parts = name.split('_')
  105. if len(parts) == 1:
  106. parent = 'root'
  107. else:
  108. parent = '_'.join(parts[:-1])
  109. data = load(parent).copy()
  110. filename = resolve_locale_filename(name)
  111. with open(filename, 'rb') as fileobj:
  112. if name != 'root' and merge_inherited:
  113. merge(data, pickle.load(fileobj))
  114. else:
  115. data = pickle.load(fileobj)
  116. _cache[name] = data
  117. return data
  118. finally:
  119. _cache_lock.release()
  120. def merge(dict1, dict2):
  121. """Merge the data from `dict2` into the `dict1` dictionary, making copies
  122. of nested dictionaries.
  123. >>> d = {1: 'foo', 3: 'baz'}
  124. >>> merge(d, {1: 'Foo', 2: 'Bar'})
  125. >>> sorted(d.items())
  126. [(1, 'Foo'), (2, 'Bar'), (3, 'baz')]
  127. :param dict1: the dictionary to merge into
  128. :param dict2: the dictionary containing the data that should be merged
  129. """
  130. for key, val2 in dict2.items():
  131. if val2 is not None:
  132. val1 = dict1.get(key)
  133. if isinstance(val2, dict):
  134. if val1 is None:
  135. val1 = {}
  136. if isinstance(val1, Alias):
  137. val1 = (val1, val2)
  138. elif isinstance(val1, tuple):
  139. alias, others = val1
  140. others = others.copy()
  141. merge(others, val2)
  142. val1 = (alias, others)
  143. else:
  144. val1 = val1.copy()
  145. merge(val1, val2)
  146. else:
  147. val1 = val2
  148. dict1[key] = val1
  149. class Alias(object):
  150. """Representation of an alias in the locale data.
  151. An alias is a value that refers to some other part of the locale data,
  152. as specified by the `keys`.
  153. """
  154. def __init__(self, keys):
  155. self.keys = tuple(keys)
  156. def __repr__(self):
  157. return '<%s %r>' % (type(self).__name__, self.keys)
  158. def resolve(self, data):
  159. """Resolve the alias based on the given data.
  160. This is done recursively, so if one alias resolves to a second alias,
  161. that second alias will also be resolved.
  162. :param data: the locale data
  163. :type data: `dict`
  164. """
  165. base = data
  166. for key in self.keys:
  167. data = data[key]
  168. if isinstance(data, Alias):
  169. data = data.resolve(base)
  170. elif isinstance(data, tuple):
  171. alias, others = data
  172. data = alias.resolve(base)
  173. return data
  174. class LocaleDataDict(abc.MutableMapping):
  175. """Dictionary wrapper that automatically resolves aliases to the actual
  176. values.
  177. """
  178. def __init__(self, data, base=None):
  179. self._data = data
  180. if base is None:
  181. base = data
  182. self.base = base
  183. def __len__(self):
  184. return len(self._data)
  185. def __iter__(self):
  186. return iter(self._data)
  187. def __getitem__(self, key):
  188. orig = val = self._data[key]
  189. if isinstance(val, Alias): # resolve an alias
  190. val = val.resolve(self.base)
  191. if isinstance(val, tuple): # Merge a partial dict with an alias
  192. alias, others = val
  193. val = alias.resolve(self.base).copy()
  194. merge(val, others)
  195. if type(val) is dict: # Return a nested alias-resolving dict
  196. val = LocaleDataDict(val, base=self.base)
  197. if val is not orig:
  198. self._data[key] = val
  199. return val
  200. def __setitem__(self, key, value):
  201. self._data[key] = value
  202. def __delitem__(self, key):
  203. del self._data[key]
  204. def copy(self):
  205. return LocaleDataDict(self._data.copy(), base=self.base)