catalog.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851
  1. # -*- coding: utf-8 -*-
  2. """
  3. babel.messages.catalog
  4. ~~~~~~~~~~~~~~~~~~~~~~
  5. Data structures for message catalogs.
  6. :copyright: (c) 2013-2021 by the Babel Team.
  7. :license: BSD, see LICENSE for more details.
  8. """
  9. import re
  10. import time
  11. from cgi import parse_header
  12. from collections import OrderedDict
  13. from datetime import datetime, time as time_
  14. from difflib import get_close_matches
  15. from email import message_from_string
  16. from copy import copy
  17. from babel import __version__ as VERSION
  18. from babel.core import Locale, UnknownLocaleError
  19. from babel.dates import format_datetime
  20. from babel.messages.plurals import get_plural
  21. from babel.util import distinct, LOCALTZ, FixedOffsetTimezone
  22. from babel._compat import string_types, number_types, PY2, cmp, text_type, force_text
  23. __all__ = ['Message', 'Catalog', 'TranslationError']
  24. PYTHON_FORMAT = re.compile(r'''
  25. \%
  26. (?:\(([\w]*)\))?
  27. (
  28. [-#0\ +]?(?:\*|[\d]+)?
  29. (?:\.(?:\*|[\d]+))?
  30. [hlL]?
  31. )
  32. ([diouxXeEfFgGcrs%])
  33. ''', re.VERBOSE)
  34. def _parse_datetime_header(value):
  35. match = re.match(r'^(?P<datetime>.*?)(?P<tzoffset>[+-]\d{4})?$', value)
  36. tt = time.strptime(match.group('datetime'), '%Y-%m-%d %H:%M')
  37. ts = time.mktime(tt)
  38. dt = datetime.fromtimestamp(ts)
  39. # Separate the offset into a sign component, hours, and # minutes
  40. tzoffset = match.group('tzoffset')
  41. if tzoffset is not None:
  42. plus_minus_s, rest = tzoffset[0], tzoffset[1:]
  43. hours_offset_s, mins_offset_s = rest[:2], rest[2:]
  44. # Make them all integers
  45. plus_minus = int(plus_minus_s + '1')
  46. hours_offset = int(hours_offset_s)
  47. mins_offset = int(mins_offset_s)
  48. # Calculate net offset
  49. net_mins_offset = hours_offset * 60
  50. net_mins_offset += mins_offset
  51. net_mins_offset *= plus_minus
  52. # Create an offset object
  53. tzoffset = FixedOffsetTimezone(net_mins_offset)
  54. # Store the offset in a datetime object
  55. dt = dt.replace(tzinfo=tzoffset)
  56. return dt
  57. class Message(object):
  58. """Representation of a single message in a catalog."""
  59. def __init__(self, id, string=u'', locations=(), flags=(), auto_comments=(),
  60. user_comments=(), previous_id=(), lineno=None, context=None):
  61. """Create the message object.
  62. :param id: the message ID, or a ``(singular, plural)`` tuple for
  63. pluralizable messages
  64. :param string: the translated message string, or a
  65. ``(singular, plural)`` tuple for pluralizable messages
  66. :param locations: a sequence of ``(filename, lineno)`` tuples
  67. :param flags: a set or sequence of flags
  68. :param auto_comments: a sequence of automatic comments for the message
  69. :param user_comments: a sequence of user comments for the message
  70. :param previous_id: the previous message ID, or a ``(singular, plural)``
  71. tuple for pluralizable messages
  72. :param lineno: the line number on which the msgid line was found in the
  73. PO file, if any
  74. :param context: the message context
  75. """
  76. self.id = id
  77. if not string and self.pluralizable:
  78. string = (u'', u'')
  79. self.string = string
  80. self.locations = list(distinct(locations))
  81. self.flags = set(flags)
  82. if id and self.python_format:
  83. self.flags.add('python-format')
  84. else:
  85. self.flags.discard('python-format')
  86. self.auto_comments = list(distinct(auto_comments))
  87. self.user_comments = list(distinct(user_comments))
  88. if isinstance(previous_id, string_types):
  89. self.previous_id = [previous_id]
  90. else:
  91. self.previous_id = list(previous_id)
  92. self.lineno = lineno
  93. self.context = context
  94. def __repr__(self):
  95. return '<%s %r (flags: %r)>' % (type(self).__name__, self.id,
  96. list(self.flags))
  97. def __cmp__(self, other):
  98. """Compare Messages, taking into account plural ids"""
  99. def values_to_compare(obj):
  100. if isinstance(obj, Message) and obj.pluralizable:
  101. return obj.id[0], obj.context or ''
  102. return obj.id, obj.context or ''
  103. return cmp(values_to_compare(self), values_to_compare(other))
  104. def __gt__(self, other):
  105. return self.__cmp__(other) > 0
  106. def __lt__(self, other):
  107. return self.__cmp__(other) < 0
  108. def __ge__(self, other):
  109. return self.__cmp__(other) >= 0
  110. def __le__(self, other):
  111. return self.__cmp__(other) <= 0
  112. def __eq__(self, other):
  113. return self.__cmp__(other) == 0
  114. def __ne__(self, other):
  115. return self.__cmp__(other) != 0
  116. def clone(self):
  117. return Message(*map(copy, (self.id, self.string, self.locations,
  118. self.flags, self.auto_comments,
  119. self.user_comments, self.previous_id,
  120. self.lineno, self.context)))
  121. def check(self, catalog=None):
  122. """Run various validation checks on the message. Some validations
  123. are only performed if the catalog is provided. This method returns
  124. a sequence of `TranslationError` objects.
  125. :rtype: ``iterator``
  126. :param catalog: A catalog instance that is passed to the checkers
  127. :see: `Catalog.check` for a way to perform checks for all messages
  128. in a catalog.
  129. """
  130. from babel.messages.checkers import checkers
  131. errors = []
  132. for checker in checkers:
  133. try:
  134. checker(catalog, self)
  135. except TranslationError as e:
  136. errors.append(e)
  137. return errors
  138. @property
  139. def fuzzy(self):
  140. """Whether the translation is fuzzy.
  141. >>> Message('foo').fuzzy
  142. False
  143. >>> msg = Message('foo', 'foo', flags=['fuzzy'])
  144. >>> msg.fuzzy
  145. True
  146. >>> msg
  147. <Message 'foo' (flags: ['fuzzy'])>
  148. :type: `bool`"""
  149. return 'fuzzy' in self.flags
  150. @property
  151. def pluralizable(self):
  152. """Whether the message is plurizable.
  153. >>> Message('foo').pluralizable
  154. False
  155. >>> Message(('foo', 'bar')).pluralizable
  156. True
  157. :type: `bool`"""
  158. return isinstance(self.id, (list, tuple))
  159. @property
  160. def python_format(self):
  161. """Whether the message contains Python-style parameters.
  162. >>> Message('foo %(name)s bar').python_format
  163. True
  164. >>> Message(('foo %(name)s', 'foo %(name)s')).python_format
  165. True
  166. :type: `bool`"""
  167. ids = self.id
  168. if not isinstance(ids, (list, tuple)):
  169. ids = [ids]
  170. return any(PYTHON_FORMAT.search(id) for id in ids)
  171. class TranslationError(Exception):
  172. """Exception thrown by translation checkers when invalid message
  173. translations are encountered."""
  174. DEFAULT_HEADER = u"""\
  175. # Translations template for PROJECT.
  176. # Copyright (C) YEAR ORGANIZATION
  177. # This file is distributed under the same license as the PROJECT project.
  178. # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.
  179. #"""
  180. if PY2:
  181. def _parse_header(header_string):
  182. # message_from_string only works for str, not for unicode
  183. headers = message_from_string(header_string.encode('utf8'))
  184. decoded_headers = {}
  185. for name, value in headers.items():
  186. name = name.decode('utf8')
  187. value = value.decode('utf8')
  188. decoded_headers[name] = value
  189. return decoded_headers
  190. else:
  191. _parse_header = message_from_string
  192. class Catalog(object):
  193. """Representation of a message catalog."""
  194. def __init__(self, locale=None, domain=None, header_comment=DEFAULT_HEADER,
  195. project=None, version=None, copyright_holder=None,
  196. msgid_bugs_address=None, creation_date=None,
  197. revision_date=None, last_translator=None, language_team=None,
  198. charset=None, fuzzy=True):
  199. """Initialize the catalog object.
  200. :param locale: the locale identifier or `Locale` object, or `None`
  201. if the catalog is not bound to a locale (which basically
  202. means it's a template)
  203. :param domain: the message domain
  204. :param header_comment: the header comment as string, or `None` for the
  205. default header
  206. :param project: the project's name
  207. :param version: the project's version
  208. :param copyright_holder: the copyright holder of the catalog
  209. :param msgid_bugs_address: the email address or URL to submit bug
  210. reports to
  211. :param creation_date: the date the catalog was created
  212. :param revision_date: the date the catalog was revised
  213. :param last_translator: the name and email of the last translator
  214. :param language_team: the name and email of the language team
  215. :param charset: the encoding to use in the output (defaults to utf-8)
  216. :param fuzzy: the fuzzy bit on the catalog header
  217. """
  218. self.domain = domain
  219. self.locale = locale
  220. self._header_comment = header_comment
  221. self._messages = OrderedDict()
  222. self.project = project or 'PROJECT'
  223. self.version = version or 'VERSION'
  224. self.copyright_holder = copyright_holder or 'ORGANIZATION'
  225. self.msgid_bugs_address = msgid_bugs_address or 'EMAIL@ADDRESS'
  226. self.last_translator = last_translator or 'FULL NAME <EMAIL@ADDRESS>'
  227. """Name and email address of the last translator."""
  228. self.language_team = language_team or 'LANGUAGE <LL@li.org>'
  229. """Name and email address of the language team."""
  230. self.charset = charset or 'utf-8'
  231. if creation_date is None:
  232. creation_date = datetime.now(LOCALTZ)
  233. elif isinstance(creation_date, datetime) and not creation_date.tzinfo:
  234. creation_date = creation_date.replace(tzinfo=LOCALTZ)
  235. self.creation_date = creation_date
  236. if revision_date is None:
  237. revision_date = 'YEAR-MO-DA HO:MI+ZONE'
  238. elif isinstance(revision_date, datetime) and not revision_date.tzinfo:
  239. revision_date = revision_date.replace(tzinfo=LOCALTZ)
  240. self.revision_date = revision_date
  241. self.fuzzy = fuzzy
  242. self.obsolete = OrderedDict() # Dictionary of obsolete messages
  243. self._num_plurals = None
  244. self._plural_expr = None
  245. def _set_locale(self, locale):
  246. if locale is None:
  247. self._locale_identifier = None
  248. self._locale = None
  249. return
  250. if isinstance(locale, Locale):
  251. self._locale_identifier = text_type(locale)
  252. self._locale = locale
  253. return
  254. if isinstance(locale, string_types):
  255. self._locale_identifier = text_type(locale)
  256. try:
  257. self._locale = Locale.parse(locale)
  258. except UnknownLocaleError:
  259. self._locale = None
  260. return
  261. raise TypeError('`locale` must be a Locale, a locale identifier string, or None; got %r' % locale)
  262. def _get_locale(self):
  263. return self._locale
  264. def _get_locale_identifier(self):
  265. return self._locale_identifier
  266. locale = property(_get_locale, _set_locale)
  267. locale_identifier = property(_get_locale_identifier)
  268. def _get_header_comment(self):
  269. comment = self._header_comment
  270. year = datetime.now(LOCALTZ).strftime('%Y')
  271. if hasattr(self.revision_date, 'strftime'):
  272. year = self.revision_date.strftime('%Y')
  273. comment = comment.replace('PROJECT', self.project) \
  274. .replace('VERSION', self.version) \
  275. .replace('YEAR', year) \
  276. .replace('ORGANIZATION', self.copyright_holder)
  277. locale_name = (self.locale.english_name if self.locale else self.locale_identifier)
  278. if locale_name:
  279. comment = comment.replace('Translations template', '%s translations' % locale_name)
  280. return comment
  281. def _set_header_comment(self, string):
  282. self._header_comment = string
  283. header_comment = property(_get_header_comment, _set_header_comment, doc="""\
  284. The header comment for the catalog.
  285. >>> catalog = Catalog(project='Foobar', version='1.0',
  286. ... copyright_holder='Foo Company')
  287. >>> print(catalog.header_comment) #doctest: +ELLIPSIS
  288. # Translations template for Foobar.
  289. # Copyright (C) ... Foo Company
  290. # This file is distributed under the same license as the Foobar project.
  291. # FIRST AUTHOR <EMAIL@ADDRESS>, ....
  292. #
  293. The header can also be set from a string. Any known upper-case variables
  294. will be replaced when the header is retrieved again:
  295. >>> catalog = Catalog(project='Foobar', version='1.0',
  296. ... copyright_holder='Foo Company')
  297. >>> catalog.header_comment = '''\\
  298. ... # The POT for my really cool PROJECT project.
  299. ... # Copyright (C) 1990-2003 ORGANIZATION
  300. ... # This file is distributed under the same license as the PROJECT
  301. ... # project.
  302. ... #'''
  303. >>> print(catalog.header_comment)
  304. # The POT for my really cool Foobar project.
  305. # Copyright (C) 1990-2003 Foo Company
  306. # This file is distributed under the same license as the Foobar
  307. # project.
  308. #
  309. :type: `unicode`
  310. """)
  311. def _get_mime_headers(self):
  312. headers = []
  313. headers.append(('Project-Id-Version',
  314. '%s %s' % (self.project, self.version)))
  315. headers.append(('Report-Msgid-Bugs-To', self.msgid_bugs_address))
  316. headers.append(('POT-Creation-Date',
  317. format_datetime(self.creation_date, 'yyyy-MM-dd HH:mmZ',
  318. locale='en')))
  319. if isinstance(self.revision_date, (datetime, time_) + number_types):
  320. headers.append(('PO-Revision-Date',
  321. format_datetime(self.revision_date,
  322. 'yyyy-MM-dd HH:mmZ', locale='en')))
  323. else:
  324. headers.append(('PO-Revision-Date', self.revision_date))
  325. headers.append(('Last-Translator', self.last_translator))
  326. if self.locale_identifier:
  327. headers.append(('Language', str(self.locale_identifier)))
  328. if self.locale_identifier and ('LANGUAGE' in self.language_team):
  329. headers.append(('Language-Team',
  330. self.language_team.replace('LANGUAGE',
  331. str(self.locale_identifier))))
  332. else:
  333. headers.append(('Language-Team', self.language_team))
  334. if self.locale is not None:
  335. headers.append(('Plural-Forms', self.plural_forms))
  336. headers.append(('MIME-Version', '1.0'))
  337. headers.append(('Content-Type',
  338. 'text/plain; charset=%s' % self.charset))
  339. headers.append(('Content-Transfer-Encoding', '8bit'))
  340. headers.append(('Generated-By', 'Babel %s\n' % VERSION))
  341. return headers
  342. def _set_mime_headers(self, headers):
  343. for name, value in headers:
  344. name = force_text(name.lower(), encoding=self.charset)
  345. value = force_text(value, encoding=self.charset)
  346. if name == 'project-id-version':
  347. parts = value.split(' ')
  348. self.project = u' '.join(parts[:-1])
  349. self.version = parts[-1]
  350. elif name == 'report-msgid-bugs-to':
  351. self.msgid_bugs_address = value
  352. elif name == 'last-translator':
  353. self.last_translator = value
  354. elif name == 'language':
  355. value = value.replace('-', '_')
  356. self._set_locale(value)
  357. elif name == 'language-team':
  358. self.language_team = value
  359. elif name == 'content-type':
  360. mimetype, params = parse_header(value)
  361. if 'charset' in params:
  362. self.charset = params['charset'].lower()
  363. elif name == 'plural-forms':
  364. _, params = parse_header(' ;' + value)
  365. self._num_plurals = int(params.get('nplurals', 2))
  366. self._plural_expr = params.get('plural', '(n != 1)')
  367. elif name == 'pot-creation-date':
  368. self.creation_date = _parse_datetime_header(value)
  369. elif name == 'po-revision-date':
  370. # Keep the value if it's not the default one
  371. if 'YEAR' not in value:
  372. self.revision_date = _parse_datetime_header(value)
  373. mime_headers = property(_get_mime_headers, _set_mime_headers, doc="""\
  374. The MIME headers of the catalog, used for the special ``msgid ""`` entry.
  375. The behavior of this property changes slightly depending on whether a locale
  376. is set or not, the latter indicating that the catalog is actually a template
  377. for actual translations.
  378. Here's an example of the output for such a catalog template:
  379. >>> from babel.dates import UTC
  380. >>> created = datetime(1990, 4, 1, 15, 30, tzinfo=UTC)
  381. >>> catalog = Catalog(project='Foobar', version='1.0',
  382. ... creation_date=created)
  383. >>> for name, value in catalog.mime_headers:
  384. ... print('%s: %s' % (name, value))
  385. Project-Id-Version: Foobar 1.0
  386. Report-Msgid-Bugs-To: EMAIL@ADDRESS
  387. POT-Creation-Date: 1990-04-01 15:30+0000
  388. PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE
  389. Last-Translator: FULL NAME <EMAIL@ADDRESS>
  390. Language-Team: LANGUAGE <LL@li.org>
  391. MIME-Version: 1.0
  392. Content-Type: text/plain; charset=utf-8
  393. Content-Transfer-Encoding: 8bit
  394. Generated-By: Babel ...
  395. And here's an example of the output when the locale is set:
  396. >>> revised = datetime(1990, 8, 3, 12, 0, tzinfo=UTC)
  397. >>> catalog = Catalog(locale='de_DE', project='Foobar', version='1.0',
  398. ... creation_date=created, revision_date=revised,
  399. ... last_translator='John Doe <jd@example.com>',
  400. ... language_team='de_DE <de@example.com>')
  401. >>> for name, value in catalog.mime_headers:
  402. ... print('%s: %s' % (name, value))
  403. Project-Id-Version: Foobar 1.0
  404. Report-Msgid-Bugs-To: EMAIL@ADDRESS
  405. POT-Creation-Date: 1990-04-01 15:30+0000
  406. PO-Revision-Date: 1990-08-03 12:00+0000
  407. Last-Translator: John Doe <jd@example.com>
  408. Language: de_DE
  409. Language-Team: de_DE <de@example.com>
  410. Plural-Forms: nplurals=2; plural=(n != 1)
  411. MIME-Version: 1.0
  412. Content-Type: text/plain; charset=utf-8
  413. Content-Transfer-Encoding: 8bit
  414. Generated-By: Babel ...
  415. :type: `list`
  416. """)
  417. @property
  418. def num_plurals(self):
  419. """The number of plurals used by the catalog or locale.
  420. >>> Catalog(locale='en').num_plurals
  421. 2
  422. >>> Catalog(locale='ga').num_plurals
  423. 5
  424. :type: `int`"""
  425. if self._num_plurals is None:
  426. num = 2
  427. if self.locale:
  428. num = get_plural(self.locale)[0]
  429. self._num_plurals = num
  430. return self._num_plurals
  431. @property
  432. def plural_expr(self):
  433. """The plural expression used by the catalog or locale.
  434. >>> Catalog(locale='en').plural_expr
  435. '(n != 1)'
  436. >>> Catalog(locale='ga').plural_expr
  437. '(n==1 ? 0 : n==2 ? 1 : n>=3 && n<=6 ? 2 : n>=7 && n<=10 ? 3 : 4)'
  438. >>> Catalog(locale='ding').plural_expr # unknown locale
  439. '(n != 1)'
  440. :type: `string_types`"""
  441. if self._plural_expr is None:
  442. expr = '(n != 1)'
  443. if self.locale:
  444. expr = get_plural(self.locale)[1]
  445. self._plural_expr = expr
  446. return self._plural_expr
  447. @property
  448. def plural_forms(self):
  449. """Return the plural forms declaration for the locale.
  450. >>> Catalog(locale='en').plural_forms
  451. 'nplurals=2; plural=(n != 1)'
  452. >>> Catalog(locale='pt_BR').plural_forms
  453. 'nplurals=2; plural=(n > 1)'
  454. :type: `str`"""
  455. return 'nplurals=%s; plural=%s' % (self.num_plurals, self.plural_expr)
  456. def __contains__(self, id):
  457. """Return whether the catalog has a message with the specified ID."""
  458. return self._key_for(id) in self._messages
  459. def __len__(self):
  460. """The number of messages in the catalog.
  461. This does not include the special ``msgid ""`` entry."""
  462. return len(self._messages)
  463. def __iter__(self):
  464. """Iterates through all the entries in the catalog, in the order they
  465. were added, yielding a `Message` object for every entry.
  466. :rtype: ``iterator``"""
  467. buf = []
  468. for name, value in self.mime_headers:
  469. buf.append('%s: %s' % (name, value))
  470. flags = set()
  471. if self.fuzzy:
  472. flags |= {'fuzzy'}
  473. yield Message(u'', '\n'.join(buf), flags=flags)
  474. for key in self._messages:
  475. yield self._messages[key]
  476. def __repr__(self):
  477. locale = ''
  478. if self.locale:
  479. locale = ' %s' % self.locale
  480. return '<%s %r%s>' % (type(self).__name__, self.domain, locale)
  481. def __delitem__(self, id):
  482. """Delete the message with the specified ID."""
  483. self.delete(id)
  484. def __getitem__(self, id):
  485. """Return the message with the specified ID.
  486. :param id: the message ID
  487. """
  488. return self.get(id)
  489. def __setitem__(self, id, message):
  490. """Add or update the message with the specified ID.
  491. >>> catalog = Catalog()
  492. >>> catalog[u'foo'] = Message(u'foo')
  493. >>> catalog[u'foo']
  494. <Message u'foo' (flags: [])>
  495. If a message with that ID is already in the catalog, it is updated
  496. to include the locations and flags of the new message.
  497. >>> catalog = Catalog()
  498. >>> catalog[u'foo'] = Message(u'foo', locations=[('main.py', 1)])
  499. >>> catalog[u'foo'].locations
  500. [('main.py', 1)]
  501. >>> catalog[u'foo'] = Message(u'foo', locations=[('utils.py', 5)])
  502. >>> catalog[u'foo'].locations
  503. [('main.py', 1), ('utils.py', 5)]
  504. :param id: the message ID
  505. :param message: the `Message` object
  506. """
  507. assert isinstance(message, Message), 'expected a Message object'
  508. key = self._key_for(id, message.context)
  509. current = self._messages.get(key)
  510. if current:
  511. if message.pluralizable and not current.pluralizable:
  512. # The new message adds pluralization
  513. current.id = message.id
  514. current.string = message.string
  515. current.locations = list(distinct(current.locations +
  516. message.locations))
  517. current.auto_comments = list(distinct(current.auto_comments +
  518. message.auto_comments))
  519. current.user_comments = list(distinct(current.user_comments +
  520. message.user_comments))
  521. current.flags |= message.flags
  522. message = current
  523. elif id == '':
  524. # special treatment for the header message
  525. self.mime_headers = _parse_header(message.string).items()
  526. self.header_comment = '\n'.join([('# %s' % c).rstrip() for c
  527. in message.user_comments])
  528. self.fuzzy = message.fuzzy
  529. else:
  530. if isinstance(id, (list, tuple)):
  531. assert isinstance(message.string, (list, tuple)), \
  532. 'Expected sequence but got %s' % type(message.string)
  533. self._messages[key] = message
  534. def add(self, id, string=None, locations=(), flags=(), auto_comments=(),
  535. user_comments=(), previous_id=(), lineno=None, context=None):
  536. """Add or update the message with the specified ID.
  537. >>> catalog = Catalog()
  538. >>> catalog.add(u'foo')
  539. <Message ...>
  540. >>> catalog[u'foo']
  541. <Message u'foo' (flags: [])>
  542. This method simply constructs a `Message` object with the given
  543. arguments and invokes `__setitem__` with that object.
  544. :param id: the message ID, or a ``(singular, plural)`` tuple for
  545. pluralizable messages
  546. :param string: the translated message string, or a
  547. ``(singular, plural)`` tuple for pluralizable messages
  548. :param locations: a sequence of ``(filename, lineno)`` tuples
  549. :param flags: a set or sequence of flags
  550. :param auto_comments: a sequence of automatic comments
  551. :param user_comments: a sequence of user comments
  552. :param previous_id: the previous message ID, or a ``(singular, plural)``
  553. tuple for pluralizable messages
  554. :param lineno: the line number on which the msgid line was found in the
  555. PO file, if any
  556. :param context: the message context
  557. """
  558. message = Message(id, string, list(locations), flags, auto_comments,
  559. user_comments, previous_id, lineno=lineno,
  560. context=context)
  561. self[id] = message
  562. return message
  563. def check(self):
  564. """Run various validation checks on the translations in the catalog.
  565. For every message which fails validation, this method yield a
  566. ``(message, errors)`` tuple, where ``message`` is the `Message` object
  567. and ``errors`` is a sequence of `TranslationError` objects.
  568. :rtype: ``iterator``
  569. """
  570. for message in self._messages.values():
  571. errors = message.check(catalog=self)
  572. if errors:
  573. yield message, errors
  574. def get(self, id, context=None):
  575. """Return the message with the specified ID and context.
  576. :param id: the message ID
  577. :param context: the message context, or ``None`` for no context
  578. """
  579. return self._messages.get(self._key_for(id, context))
  580. def delete(self, id, context=None):
  581. """Delete the message with the specified ID and context.
  582. :param id: the message ID
  583. :param context: the message context, or ``None`` for no context
  584. """
  585. key = self._key_for(id, context)
  586. if key in self._messages:
  587. del self._messages[key]
  588. def update(self, template, no_fuzzy_matching=False, update_header_comment=False, keep_user_comments=True):
  589. """Update the catalog based on the given template catalog.
  590. >>> from babel.messages import Catalog
  591. >>> template = Catalog()
  592. >>> template.add('green', locations=[('main.py', 99)])
  593. <Message ...>
  594. >>> template.add('blue', locations=[('main.py', 100)])
  595. <Message ...>
  596. >>> template.add(('salad', 'salads'), locations=[('util.py', 42)])
  597. <Message ...>
  598. >>> catalog = Catalog(locale='de_DE')
  599. >>> catalog.add('blue', u'blau', locations=[('main.py', 98)])
  600. <Message ...>
  601. >>> catalog.add('head', u'Kopf', locations=[('util.py', 33)])
  602. <Message ...>
  603. >>> catalog.add(('salad', 'salads'), (u'Salat', u'Salate'),
  604. ... locations=[('util.py', 38)])
  605. <Message ...>
  606. >>> catalog.update(template)
  607. >>> len(catalog)
  608. 3
  609. >>> msg1 = catalog['green']
  610. >>> msg1.string
  611. >>> msg1.locations
  612. [('main.py', 99)]
  613. >>> msg2 = catalog['blue']
  614. >>> msg2.string
  615. u'blau'
  616. >>> msg2.locations
  617. [('main.py', 100)]
  618. >>> msg3 = catalog['salad']
  619. >>> msg3.string
  620. (u'Salat', u'Salate')
  621. >>> msg3.locations
  622. [('util.py', 42)]
  623. Messages that are in the catalog but not in the template are removed
  624. from the main collection, but can still be accessed via the `obsolete`
  625. member:
  626. >>> 'head' in catalog
  627. False
  628. >>> list(catalog.obsolete.values())
  629. [<Message 'head' (flags: [])>]
  630. :param template: the reference catalog, usually read from a POT file
  631. :param no_fuzzy_matching: whether to use fuzzy matching of message IDs
  632. """
  633. messages = self._messages
  634. remaining = messages.copy()
  635. self._messages = OrderedDict()
  636. # Prepare for fuzzy matching
  637. fuzzy_candidates = []
  638. if not no_fuzzy_matching:
  639. fuzzy_candidates = dict([
  640. (self._key_for(msgid), messages[msgid].context)
  641. for msgid in messages if msgid and messages[msgid].string
  642. ])
  643. fuzzy_matches = set()
  644. def _merge(message, oldkey, newkey):
  645. message = message.clone()
  646. fuzzy = False
  647. if oldkey != newkey:
  648. fuzzy = True
  649. fuzzy_matches.add(oldkey)
  650. oldmsg = messages.get(oldkey)
  651. if isinstance(oldmsg.id, string_types):
  652. message.previous_id = [oldmsg.id]
  653. else:
  654. message.previous_id = list(oldmsg.id)
  655. else:
  656. oldmsg = remaining.pop(oldkey, None)
  657. message.string = oldmsg.string
  658. if keep_user_comments:
  659. message.user_comments = list(distinct(oldmsg.user_comments))
  660. if isinstance(message.id, (list, tuple)):
  661. if not isinstance(message.string, (list, tuple)):
  662. fuzzy = True
  663. message.string = tuple(
  664. [message.string] + ([u''] * (len(message.id) - 1))
  665. )
  666. elif len(message.string) != self.num_plurals:
  667. fuzzy = True
  668. message.string = tuple(message.string[:len(oldmsg.string)])
  669. elif isinstance(message.string, (list, tuple)):
  670. fuzzy = True
  671. message.string = message.string[0]
  672. message.flags |= oldmsg.flags
  673. if fuzzy:
  674. message.flags |= {u'fuzzy'}
  675. self[message.id] = message
  676. for message in template:
  677. if message.id:
  678. key = self._key_for(message.id, message.context)
  679. if key in messages:
  680. _merge(message, key, key)
  681. else:
  682. if not no_fuzzy_matching:
  683. # do some fuzzy matching with difflib
  684. if isinstance(key, tuple):
  685. matchkey = key[0] # just the msgid, no context
  686. else:
  687. matchkey = key
  688. matches = get_close_matches(matchkey.lower().strip(),
  689. fuzzy_candidates.keys(), 1)
  690. if matches:
  691. newkey = matches[0]
  692. newctxt = fuzzy_candidates[newkey]
  693. if newctxt is not None:
  694. newkey = newkey, newctxt
  695. _merge(message, newkey, key)
  696. continue
  697. self[message.id] = message
  698. for msgid in remaining:
  699. if no_fuzzy_matching or msgid not in fuzzy_matches:
  700. self.obsolete[msgid] = remaining[msgid]
  701. if update_header_comment:
  702. # Allow the updated catalog's header to be rewritten based on the
  703. # template's header
  704. self.header_comment = template.header_comment
  705. # Make updated catalog's POT-Creation-Date equal to the template
  706. # used to update the catalog
  707. self.creation_date = template.creation_date
  708. def _key_for(self, id, context=None):
  709. """The key for a message is just the singular ID even for pluralizable
  710. messages, but is a ``(msgid, msgctxt)`` tuple for context-specific
  711. messages.
  712. """
  713. key = id
  714. if isinstance(key, (list, tuple)):
  715. key = id[0]
  716. if context is not None:
  717. key = (key, context)
  718. return key