parser.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. # Copyright (C) 2001-2007 Python Software Foundation
  2. # Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
  3. # Contact: email-sig@python.org
  4. """A parser of RFC 2822 and MIME email messages."""
  5. from __future__ import unicode_literals
  6. from __future__ import division
  7. from __future__ import absolute_import
  8. __all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser']
  9. import warnings
  10. from io import StringIO, TextIOWrapper
  11. from future.backports.email.feedparser import FeedParser, BytesFeedParser
  12. from future.backports.email.message import Message
  13. from future.backports.email._policybase import compat32
  14. class Parser(object):
  15. def __init__(self, _class=Message, **_3to2kwargs):
  16. """Parser of RFC 2822 and MIME email messages.
  17. Creates an in-memory object tree representing the email message, which
  18. can then be manipulated and turned over to a Generator to return the
  19. textual representation of the message.
  20. The string must be formatted as a block of RFC 2822 headers and header
  21. continuation lines, optionally preceeded by a `Unix-from' header. The
  22. header block is terminated either by the end of the string or by a
  23. blank line.
  24. _class is the class to instantiate for new message objects when they
  25. must be created. This class must have a constructor that can take
  26. zero arguments. Default is Message.Message.
  27. The policy keyword specifies a policy object that controls a number of
  28. aspects of the parser's operation. The default policy maintains
  29. backward compatibility.
  30. """
  31. if 'policy' in _3to2kwargs: policy = _3to2kwargs['policy']; del _3to2kwargs['policy']
  32. else: policy = compat32
  33. self._class = _class
  34. self.policy = policy
  35. def parse(self, fp, headersonly=False):
  36. """Create a message structure from the data in a file.
  37. Reads all the data from the file and returns the root of the message
  38. structure. Optional headersonly is a flag specifying whether to stop
  39. parsing after reading the headers or not. The default is False,
  40. meaning it parses the entire contents of the file.
  41. """
  42. feedparser = FeedParser(self._class, policy=self.policy)
  43. if headersonly:
  44. feedparser._set_headersonly()
  45. while True:
  46. data = fp.read(8192)
  47. if not data:
  48. break
  49. feedparser.feed(data)
  50. return feedparser.close()
  51. def parsestr(self, text, headersonly=False):
  52. """Create a message structure from a string.
  53. Returns the root of the message structure. Optional headersonly is a
  54. flag specifying whether to stop parsing after reading the headers or
  55. not. The default is False, meaning it parses the entire contents of
  56. the file.
  57. """
  58. return self.parse(StringIO(text), headersonly=headersonly)
  59. class HeaderParser(Parser):
  60. def parse(self, fp, headersonly=True):
  61. return Parser.parse(self, fp, True)
  62. def parsestr(self, text, headersonly=True):
  63. return Parser.parsestr(self, text, True)
  64. class BytesParser(object):
  65. def __init__(self, *args, **kw):
  66. """Parser of binary RFC 2822 and MIME email messages.
  67. Creates an in-memory object tree representing the email message, which
  68. can then be manipulated and turned over to a Generator to return the
  69. textual representation of the message.
  70. The input must be formatted as a block of RFC 2822 headers and header
  71. continuation lines, optionally preceeded by a `Unix-from' header. The
  72. header block is terminated either by the end of the input or by a
  73. blank line.
  74. _class is the class to instantiate for new message objects when they
  75. must be created. This class must have a constructor that can take
  76. zero arguments. Default is Message.Message.
  77. """
  78. self.parser = Parser(*args, **kw)
  79. def parse(self, fp, headersonly=False):
  80. """Create a message structure from the data in a binary file.
  81. Reads all the data from the file and returns the root of the message
  82. structure. Optional headersonly is a flag specifying whether to stop
  83. parsing after reading the headers or not. The default is False,
  84. meaning it parses the entire contents of the file.
  85. """
  86. fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
  87. with fp:
  88. return self.parser.parse(fp, headersonly)
  89. def parsebytes(self, text, headersonly=False):
  90. """Create a message structure from a byte string.
  91. Returns the root of the message structure. Optional headersonly is a
  92. flag specifying whether to stop parsing after reading the headers or
  93. not. The default is False, meaning it parses the entire contents of
  94. the file.
  95. """
  96. text = text.decode('ASCII', errors='surrogateescape')
  97. return self.parser.parsestr(text, headersonly)
  98. class BytesHeaderParser(BytesParser):
  99. def parse(self, fp, headersonly=True):
  100. return BytesParser.parse(self, fp, headersonly=True)
  101. def parsebytes(self, text, headersonly=True):
  102. return BytesParser.parsebytes(self, text, headersonly=True)