我试图从电子邮件中获取有关主题,发件人,收件人,日期的信息,这已经完成,我还需要从同一封电子邮件中的HTML获取所有内容,我尝试使用Soup,但是我做错了。那么我如何才能从HTML获得唯一的范围?感谢您的帮助,我的代码如下:
import imaplib
import email
import mimetypes
from email import header
from bs4 import BeautifulSoup
def decodeHeader(headerMsg):
L = header.decode_header(headerMsg)
s = ''
for s1, chset in L:
if type(s1) == bytes:
s += s1.decode(chset) if chset else s1.decode()
else:
s += s1
return s
host = 'imap.yandex.ru'
userid = 'myid'
passwd = 'mypass'
imap = imaplib.IMAP4_SSL(host)
imap.login(userid, passwd)
imap.select('INBOX')
status, email_ids = imap.search(None, '(ALL)')
for num in email_ids[0].split():
type1, data = imap.fetch(num, '(RFC822)')
raw_email = data[0][1]
email_msg = email.message_from_bytes(raw_email)
if email_msg.is_multipart():
for payload in email_msg.get_payload():
# if payload.is_multipart(): ...
doc = (payload.get_payload())
source_code = doc
soup = BeautifulSoup(source_code, 'lxml')
print(soup.find('span', {'class': 'mail-ThreadSidebar-List-Item_content'}).text)
else:
doc = (email_msg.get_payload())
source_code = doc
soup = BeautifulSoup(source_code, 'lxml')
print(soup.find('span', {'class': 'mail-ThreadSidebar-List-Item_content'}))
print('Subject: ', decodeHeader(email_msg['Subject']))
print('From: ', decodeHeader(email_msg['From']))
print('To: ', decodeHeader(email_msg['To']))
print('Date: ', decodeHeader(email_msg['Date']))
type1, data = imap.fetch(num, '(UID BODY[TEXT])')
raw_email = str(data[0][1])
# print('contents: ', raw_email)
print('----\n\n')