在制定了以下计划之后,我的一位大四学生建议我这样做 不包括后端的机器学习程序。我怎么能够 使用pickle文件吗?
from flask import Flask,render_template,url_for,request
import pandas as pd
import pickle
from sklearn.naive_bayes import MultinomialNB
from sklearn.externals import joblib
app = Flask(__name__)
@app.route('/')
def home():
return render_template('home.html')
@app.route('/predict',methods=['POST'])
def predict():
df= pd.read_csv("YoutubeSpamMergedData.csv")
df_data = df[["CONTENT","CLASS"]]
# Features and Labels
df_x = df_data['CONTENT']
df_y = df_data.CLASS
# Extract Feature With CountVectorizer
corpus = df_x
cv = TfidfVectorizer(ngram_range=[1,2])
X = cv.fit_transform(corpus) # Fit the Data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, df_y, test_size=0.33, random_state=42)
#Naive Bayes Classifier
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB()
clf.fit(X_train,y_train)
acc = clf.score(X_test,y_test)
#Alternative Usage of Saved Model
# ytb_model = open("naivebayes_spam_model.pkl","rb")
# clf = joblib.load(ytb_model)
if request.method == 'POST':
comment = request.form['comment']
data = [comment]
vect = cv.transform(data).toarray()
my_prediction = clf.predict(vect)
return render_template('result.html',prediction = my_prediction, accuracy = acc)
if _name_ == '__main__':
app.run(debug=True)
答案 0 :(得分:2)
您应该离线训练MultinomialNB分类器并将其转储到文件中。然后在Flask后端加载模型文件以预测请求数据。
# train classifier model
from sklearn.externals import joblib
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB()
clf.fit(X, y)
joblib.dump(clf, 'filename.pkl')
# flask backend
classifier = joblib.load("filename.pkl")
@app.route("/predict", methods=["POST"])
def predict():
# get vect
result = classifier.predict(vect)
return result