如何使用python将mbox文件转换为.msg文件格式?

时间:2018-12-06 06:44:12

标签: python python-2.7 outlook win32com

我想将mbox文件转换为msg格式。为此,我已经完成,但是我没有得到正确的格式。我能够读取mbox文件,但无法获取与此相关的msg文件。我已经将mbox文件转换为eml文件,但是我想创建msg文件的方式相同,但是我没有得到如何做的信息。

下面是将mbox转换为eml的代码。

 import os
import mailbox
from email import generator
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

count = 0


def emlGenerator(body, thisemail):
    global count
    msg = MIMEMultipart('alternative')
    msg['Subject'] = thisemail['subject']
    msg['From'] = thisemail['From']
    msg['To'] = thisemail['To']
    msg['Cc'] = thisemail['Cc']
    msg['Bcc'] = thisemail['Bcc']
    msg['Date'] = thisemail['Date']
    name = str(count) + '.eml'
    count += 1
    part = MIMEText(body)
    msg.attach(part)
    outfile_name = os.path.join('xxxxx/test2', name)
    with open(outfile_name, 'w') as outfile:
        gen = generator.Generator(outfile)
        gen.flatten(msg)


def getcharsets(msg):
    charsets = set({})
    for c in msg.get_charsets():
        if c is not None:
            charsets.update([c])
    return charsets


def handleerror(errmsg, emailmsg, cs):
    print()
    print(errmsg)
    print("This error occurred while decoding with ", cs, " charset.")
    print("These charsets were found in the one email.", getcharsets(emailmsg))
    print("This is the subject:", emailmsg['subject'])
    print("This is the sender:", emailmsg['From'])


def getbodyfromemail(msg):
    body = None
    # Walk through the parts of the email to find the text body.
    if msg.is_multipart():
        for part in msg.walk():
            # If part is multipart, walk through the subparts.
            if part.is_multipart():
                for subpart in part.walk():
                    if subpart.get_content_type() == 'text/plain':
                        # Get the subpart payload (i.e the message body)
                        body = subpart.get_payload(decode=True)
                        # charset = subpart.get_charset()
            # Part isn't multipart so get the email body
            elif part.get_content_type() == 'text/plain':
                body = part.get_payload(decode=True)
                # charset = part.get_charset()
    # If this isn't a multi-part message then get the payload (i.e the message body)
    elif msg.get_content_type() == 'text/plain':
        body = msg.get_payload(decode=True)
        # No checking done to match the charset with the correct part.
    charsets = set({})
    for c in msg.get_charsets():
        if c is not None:
            charsets.update([c])
    for charset in charsets:
        try:
            body = body.decode(charset)
        except:
            print("Hit a UnicodeDecodeError or AttributeError. Moving right along.")
    return body


if __name__ == "__main__":
    for thisemail in mailbox.mbox('xxxxxx/topics.mbox'):
        print (thisemail['Message-id'])
        body = getbodyfromemail(thisemail)
        emlGenerator(body, thisemail)
    print("=========== DONE ============")
    print("Total ", count, " File")

1 个答案:

答案 0 :(得分:0)

要将.mbox转换为.msg文件格式,可以使用第三方库Aspose.Email for Python via .NET

这是一个功能强大的电子邮件编程API。 Email API可用于基本的电子邮件管理功能(例如,邮件内容编辑和附件操纵),以及高级功能(例如,邮件存储文件的管理,通过包括POP3,IMAP和SMTP在内的多种协议发送和接收电子邮件)。 >

例如,您可以看到下面的代码。

reader = MboxrdStorageReader(dir + "template.mbox", False)
eml = reader.read_next_message()
# Read all messages in a loop
while (eml is not None):
    # show message subject
    print("Subject: " + eml.subject)
    # save message in EML & MSG formats
    eml.save("output.msg", aspose.email.SaveOptions.default_msg_unicode)
    # get the next message
    eml = reader.read_next_message();
reader.dispose();

对于我来说,这种方法对您可能有用。

我是Aspose的开发人员布道者。