flask _pickle.PicklingError:

时间:2019-02-12 16:23:38

标签: python flask scikit-learn

我是flask的新手,我正在尝试在python中实现我的文本(词包)分类器模型,并通过flask Web应用程序进行部署。但是通过以下代码进入其他页面时出现错误:

通过使用朴素贝叶斯分类器来实现,它将在新闻页面上呈现它,从而给出pos或neg的结果。     从烧瓶导入烧瓶,render_template,url_for,请求     将熊猫作为pd导入     进口泡菜     从sklearn.externals导入joblib

import matplotlib.pyplot as plt
import csv
from textblob import TextBlob
import sklearn
import _pickle as cPickle
import numpy as np
from scipy.sparse.csr import csr_matrix
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import classification_report, f1_score, accuracy_score, confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold, cross_val_score, train_test_split,GridSearchCV,learning_curve
from sklearn.tree import DecisionTreeClassifier 
from sklearn.model_selection import StratifiedKFold
from sklearn.externals import joblib

app = Flask(__name__)



@app.route('/')
def home():
    return render_template('home.html')

@app.route('/predict',methods=['POST'])
def predict():
    messages = pd.read_csv('bitcoin_reddit.csv', usecols=["title","class"])
    messages['length'] = messages['title'].map(lambda text: len(text))

    def split_into_tokens(title):
        return TextBlob(title).words

    def split_into_lemmas(title):
        words = TextBlob(title).words.lower()
        # for each word, take its "base form" = lemma 
        return [word.lemma for word in words]

    bow_transformer = CountVectorizer(analyzer=split_into_lemmas).fit(messages['title'])
    messages_bow = bow_transformer.transform(messages['title'])

    tfidf_transformer = TfidfTransformer().fit(messages_bow) #normalization can be done with TF-IDF
    messages_tfidf = tfidf_transformer.transform(messages_bow)

    #Training Model NB
    spam_detector = MultinomialNB().fit(messages_tfidf, messages['class'])
    all_predictions = spam_detector.predict(messages_tfidf)

    msg_train, msg_test, label_train, label_test = \
    train_test_split(messages['title'], messages['class'], test_size=0.2)

    pipeline = Pipeline([
        ('bow', CountVectorizer(analyzer=split_into_lemmas)),  # strings to token integer counts
        ('tfidf', TfidfTransformer()),  # integer counts to weighted TF-IDF scores
        ('classifier', MultinomialNB())]) # train on TF-IDF vectors w/ Naive Bayes classifier


    scores = cross_val_score(pipeline,  # steps to convert raw messages into models
                             msg_train,  # training data
                             label_train,  # training labels
                             cv=10,  # split data randomly into 10 parts: 9 for training, 1 for scoring
                             scoring='accuracy')  # which scoring metric? 

    params = {
    'tfidf__use_idf': (True, False),
    'bow__analyzer': (split_into_lemmas, split_into_tokens),
    }

    grid = GridSearchCV(
    pipeline,  # pipeline from above
    params,  # parameters to tune via cross validation
    refit=True,  # fit using all available data at the end, on the best found param combination
    scoring='accuracy',  # what score are we optimizing?
    cv=StratifiedKFold(n_splits=5))  # what type of cross validation to use

    nb_detector = grid.fit(msg_train, label_train)
    predictions = nb_detector.predict(msg_test)

    joblib.dump(nb_detector, 'NB_model_bow.pkl')

    if request.method == 'POST':
        message = request.form['message']
        data = [message]
        vect = cv.transform(data).toarray()
        my_prediction = nb_detector.predict(vect)
    return render_template('result.html',prediction = my_prediction)



if __name__ == '__main__':
    app.run(debug=True)

但是我遇到了这种错误

_pickle.PicklingError: Can't pickle <function predict.<locals>.split_into_lemmas at 0x000001ABF618AE18>: it's not found as __main__.predict.<locals>.split_into_lemmas

0 个答案:

没有答案