如何使用pickle序列化

时间:2018-05-17 07:55:42

标签: python machine-learning flask

在制定了以下计划之后,我的一位大四学生建议我这样做 不包括后端的机器学习程序。我怎么能够 使用pickle文件吗?

from flask import Flask,render_template,url_for,request
import pandas as pd

import pickle

from sklearn.naive_bayes import MultinomialNB
from sklearn.externals import joblib

app = Flask(__name__)

@app.route('/')
def home():
    return render_template('home.html')

@app.route('/predict',methods=['POST'])
def predict():
    df= pd.read_csv("YoutubeSpamMergedData.csv")
    df_data = df[["CONTENT","CLASS"]]
    # Features and Labels
    df_x = df_data['CONTENT']
    df_y = df_data.CLASS
    # Extract Feature With CountVectorizer
    corpus = df_x


    cv = TfidfVectorizer(ngram_range=[1,2])
    X = cv.fit_transform(corpus) # Fit the Data
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, df_y, test_size=0.33, random_state=42)
    #Naive Bayes Classifier
    from sklearn.naive_bayes import MultinomialNB
    clf = MultinomialNB()
    clf.fit(X_train,y_train)
    acc = clf.score(X_test,y_test)
    #Alternative Usage of Saved Model
    # ytb_model = open("naivebayes_spam_model.pkl","rb")
    # clf = joblib.load(ytb_model)

    if request.method == 'POST':
        comment = request.form['comment']
        data = [comment]
        vect = cv.transform(data).toarray()
        my_prediction = clf.predict(vect)
    return render_template('result.html',prediction = my_prediction, accuracy = acc)

if _name_ == '__main__':
    app.run(debug=True)

1 个答案:

答案 0 :(得分:2)

您应该离线训练MultinomialNB分类器并将其转储到文件中。然后在Flask后端加载模型文件以预测请求数据。

# train classifier model 

from sklearn.externals import joblib
from sklearn.naive_bayes import MultinomialNB

clf = MultinomialNB()
clf.fit(X, y)

joblib.dump(clf, 'filename.pkl') 


# flask backend

classifier = joblib.load("filename.pkl")

@app.route("/predict", methods=["POST"])
def predict():

    # get vect

    result = classifier.predict(vect)

    return result