我有一封原始RFC 2822格式的电子邮件,我正在尝试将其解析为Python 3.6中的EmailMessage对象。电子邮件太大了,无法上传到pastebin(637 kB),但我将它托管在dropbox.
我正在尝试使用以下代码段将此电子邮件解析为EmailMessage对象。
from email.message import EmailMessage
from email.policy import SMTP
from email import message_from_bytes
message = message_from_bytes(open('broken_email', 'rb').read(), _class=EmailMessage, policy=SMTP)
但我得到以下输出
Traceback (most recent call last):
File "email_class.py", line 134, in <module>
message = message_from_bytes(open('broken_email', 'rb').read(), _class=EmailMessage, policy=SMTP)
File "/usr/lib/python3.6/email/__init__.py", line 46, in message_from_bytes
return BytesParser(*args, **kws).parsebytes(s)
File "/usr/lib/python3.6/email/parser.py", line 124, in parsebytes
return self.parser.parsestr(text, headersonly)
File "/usr/lib/python3.6/email/parser.py", line 68, in parsestr
return self.parse(StringIO(text), headersonly=headersonly)
File "/usr/lib/python3.6/email/parser.py", line 57, in parse
feedparser.feed(data)
File "/usr/lib/python3.6/email/feedparser.py", line 176, in feed
self._call_parse()
File "/usr/lib/python3.6/email/feedparser.py", line 180, in _call_parse
self._parse()
File "/usr/lib/python3.6/email/feedparser.py", line 385, in _parsegen
for retval in self._parsegen():
File "/usr/lib/python3.6/email/feedparser.py", line 256, in _parsegen
if self._cur.get_content_type() == 'message/delivery-status':
File "/usr/lib/python3.6/email/message.py", line 578, in get_content_type
value = self.get('content-type', missing)
File "/usr/lib/python3.6/email/message.py", line 471, in get
return self.policy.header_fetch_parse(k, v)
File "/usr/lib/python3.6/email/policy.py", line 162, in header_fetch_parse
return self.header_factory(name, value)
File "/usr/lib/python3.6/email/headerregistry.py", line 589, in __call__
return self[name](name, value)
File "/usr/lib/python3.6/email/headerregistry.py", line 197, in __new__
cls.parse(value, kwds)
File "/usr/lib/python3.6/email/headerregistry.py", line 446, in parse
kwds['parse_tree'] = parse_tree = cls.value_parser(value)
File "/usr/lib/python3.6/email/_header_value_parser.py", line 2503, in parse_content_type_header
ctype.append(parse_mime_parameters(value[1:]))
File "/usr/lib/python3.6/email/_header_value_parser.py", line 2423, in parse_mime_parameters
token, value = get_invalid_parameter(value)
File "/usr/lib/python3.6/email/_header_value_parser.py", line 2062, in get_invalid_parameter
token, value = get_phrase(value)
File "/usr/lib/python3.6/email/_header_value_parser.py", line 1365, in get_phrase
token, value = get_word(value)
File "/usr/lib/python3.6/email/_header_value_parser.py", line 1341, in get_word
token, value = get_quoted_string(value)
File "/usr/lib/python3.6/email/_header_value_parser.py", line 1242, in get_quoted_string
token, value = get_bare_quoted_string(value)
File "/usr/lib/python3.6/email/_header_value_parser.py", line 1171, in get_bare_quoted_string
if value[0] == '"':
IndexError: string index out of range
我可以将电子邮件解析为旧版Python中的Message对象 2电子邮件API,但我更愿意将其作为较新的EmailMessage对象。