我已将文件托管在远程flask服务器上。使用以下命令启动服务器:
from bs4 import BeautifulSoup
from html2text import HTML2Text
import pandas as pd
import easyimap
import getpass
import email
import base64
import os
import email
import mimetypes
from datetime import datetime
from email.utils import parsedate_to_datetime
def to_text(html, rehtml=False):
parser = HTML2Text()
parser.wrap_links = False
parser.skip_internal_links = True
parser.inline_links = True
parser.ignore_anchors = True
parser.ignore_images = True
parser.ignore_emphasis = True
parser.ignore_links = True
text = parser.handle(html)
text = text.strip(' \t\n\r')
if rehtml:
text = text.replace('\n', '<br/>')
text = text.replace('\\', '')
return text
imap_password = getpass.getpass()
user = 'email@email.com'
host = 'outlook.office365.com'
password = imap_password
#'
folders = ('"INBOX/Americas/Not Raised"', '"INBOX/Americas/Raised"', '"INBOX/APAC/Not Raised"', '"INBOX/APAC/Raised"',
'"INBOX/Consolidated/Not Raised"', '"INBOX/Consolidated/Raised"', '"INBOX/EMEA"', '"INBOX/EMEA/Not Raised"', '"INBOX/EMEA/Raised"')
df = pd.DataFrame(columns=['Subject','Sender','From','To','Body','References','content_type', 'local_date_time',
'Classification', 'in_reply_to','return_path', 'mime_version', 'message_id', 'folder_name'])
for mailbox in folders:
#Connect to mailbox read_only = True to ensure the mail is not marked as read.
imapper = easyimap.connect(host, user, password, mailbox,read_only=True)
#fetch each mail up to limit and return email data and add to a dataframe
for mail_id in imapper.listids(limit=5000):
try:
mail = imapper.mail(mail_id, include_raw=True)
#convert body to text using to_text function and add to dataframe
df.loc[mail_id, ['Body']] = to_text(mail.body, rehtml=False)
#return mail features to dataframe
df.loc[mail_id, ['Subject']] = mail.title
df.loc[mail_id, ['Sender']] = mail.sender
df.loc [mail_id, ['From']] = mail.from_addr
df.loc [mail_id, ['To']] = mail.to
df.loc [mail_id, ['References']] = mail.references
df.loc [mail_id, ['content_type']] = mail.content_type
#converting the date to datetime and taking account of time difference changes
date_= mail.date
df.loc [mail_id, ['local_date_time']] = datetime.fromtimestamp(parsedate_to_datetime(date_).timestamp()).strftime('%Y-%m-%d %H:%M:%S')
#parsing the keyword data from the raw mail data to provide the classification
raw_data = mail.raw
email_message = email.message_from_bytes(raw_data)
df.loc [mail_id, ['Classification']] = email_message['Keywords']
df.loc [mail_id, ['in_reply_to']] = mail.in_reply_to
df.loc [mail_id, ['return_path']] = mail.return_path
df.loc [mail_id, ['mime_version']] = mail.mime_version
df.loc [mail_id, ['message_id']] = mail.message_id
df.loc [mail_id, ['folder_name']] = mailbox
except:
#if error print email to file
counter = 1
for part in email_message.walk():
if part.get_content_maintype() == "multipart":
continue
filename = part.get_filename()
content_type = part.get_content_type()
if not filename:
ext = mimetypes.guess_extension(content_type)
if not ext:
ext = '.bin'
if 'text' in content_type:
ext = '.txt'
elif 'html' in content_type:
ext = '.html'
filename = 'msg-part-%08d%s' %(counter, ext)
counter += 1
#save file
date_ = datetime.fromtimestamp(parsedate_to_datetime(date_).timestamp()).strftime('%Y-%m-%d %H:%M:%S')
save_path = os.path.join(os.getcwd(), "emails", date_, mail.title)
if not os.path.exists(save_path):
os.makedirs(save_path)
with open(os.path.join(save_path, filename), 'wb') as fp:
fp.write(part.get_payload(decode=True))
counter += 1
在应用内的python代码中,
flask run --host= ...
服务器启动时没有任何问题,但是我无法远程访问该页面。
我也检查了端口。没问题。
答案 0 :(得分:1)
您不应该使用内置的“应用”来托管服务器。而是使用像Gunicorn + Nginx这样的反向代理
一种简便的解决方案是使用PythonAnywhere(请参阅https://help.pythonanywhere.com/pages/Flask/),但显然有局限性。
此外:解决此问题的方法不只一种。有很多,例如:https://www.digitalocean.com/community/tutorials/how-to-serve-flask-applications-with-uswgi-and-nginx-on-ubuntu-18-04或https://dev.to/ishankhare07/nginx-as-reverse-proxy-for-a-flask-app-using-docker-3ajg
要查看服务器的外观,请参阅: