我正在尝试在Heroku中使用flask部署一个ML模型。这是我拥有的文件: model.py,app.py,requirement.txt,procfile Github链接-:https://github.com/dee-walia20/clickbait_detector
Model.py代码段
# Importing the libraries
import pandas as pd
import pickle
from sklearn.base import BaseEstimator, TransformerMixin
import nltk
import re
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet, stopwords
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
#Import dataset and assign Input & Output
data = pd.read_csv('clickbait_data.csv')
X=data.headline
y=data.clickbait
#Custom Transformer built from sklearn for Text Processing
class TextPreprocessor(BaseEstimator, TransformerMixin):
def __init__(self, lemmatizer=None, stopwords=None,token=True):
self.token = token
self.lemmatizer=lemmatizer
self.stopwords=stopwords
def fit(self, X, y=None):
return self
def transform(self, X, y=None):
if self.token:
nltk.download('averaged_perceptron_tagger')
nltk.download('wordnet')
X=X.apply(lambda x: re.sub('\d+',"",x))
X=X.apply(lambda x: re.sub('\W'," ",x))
X=X.apply(lambda x: re.sub(" "," ",x))
X=X.apply(lambda x: x.lower())
self.lemmatizer=WordNetLemmatizer()
nltk.download("stopwords")
self.stopwords=stopwords.words('english')
X=X.apply(lambda x: " ".join([self.lemmatizer.lemmatize(word, self._get_wordnet_pos(word))
for word in x.split() if word not in self.stopwords]))
return X
def _get_wordnet_pos(self, word):
tag=nltk.pos_tag([word])[0][1][0].upper()
tag_dict={'J':wordnet.ADJ,
'N':wordnet.NOUN,
'V':wordnet.VERB,
'R':wordnet.ADV
}
return tag_dict.get(tag,wordnet.NOUN)
#Splitting Training and Test Set
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20, random_state=10)
#Creating the pipeline for End to end ML model
tp=TextPreprocessor()
tf=TfidfVectorizer()
model=MultinomialNB()
pipeline=make_pipeline(tp,tf,model)
#Fitting model with trainig data
pipeline.fit(X_train, y_train)
# Saving model to disk
pickle.dump(pipeline, open('pipeline.pkl','wb'), protocol=-1)
# Loading model to check performance on Test set
if __name__ == "__main__":
saved_pipeline = pickle.load(open('pipeline.pkl','rb'))
print("Classitication Report\n",classification_report(y_test, saved_pipeline.predict(X_test)))
app.py
import pandas as pd
from flask import Flask, request, render_template
import pickle
from model import TextPreprocessor
app = Flask(__name__)
model = pickle.load(open('pipeline.pkl', 'rb'))
@app.route('/')
def home():
return render_template('index.html')
@app.route('/predict',methods=['GET','POST'])
def predict():
'''
For rendering results on HTML GUI
'''
final_features = pd.Series(data=str(request.form['headline']))
prediction = model.predict(final_features)
output = prediction[0]
return render_template('result.html', prediction=output)
if __name__ == "__main__":
app.run(debug=True)
错误消息:
2020-05-02T11:25:36.304845 + 00:00 heroku [router]:at =错误代码= H12 desc =“请求超时”方法=获取路径=“ / favicon.ico” host = clickbait-headline-detector.herokuapp.com request_id = 3aa2f839-109a-4c41-a121-3738e440a62f fwd =“ 180.151.118.230” dyno = web.1 connect = 1ms服务= 30001ms status = 503字节= 0 协议= https
我已经阅读了Heroku中有关H12错误的30ms超时以及创建背景/工人任务的可能解决方案。但是我不知道在这里哪个工作是工人,该怎么做?