我是flask的新手,我正在尝试在python中实现我的文本(词包)分类器模型,并通过flask Web应用程序进行部署。但是通过以下代码进入其他页面时出现错误:
通过使用朴素贝叶斯分类器来实现,它将在新闻页面上呈现它,从而给出pos或neg的结果。 从烧瓶导入烧瓶,render_template,url_for,请求 将熊猫作为pd导入 进口泡菜 从sklearn.externals导入joblib
import matplotlib.pyplot as plt
import csv
from textblob import TextBlob
import sklearn
import _pickle as cPickle
import numpy as np
from scipy.sparse.csr import csr_matrix
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer, TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC, LinearSVC
from sklearn.metrics import classification_report, f1_score, accuracy_score, confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.model_selection import StratifiedKFold, cross_val_score, train_test_split,GridSearchCV,learning_curve
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.externals import joblib
app = Flask(__name__)
@app.route('/')
def home():
return render_template('home.html')
@app.route('/predict',methods=['POST'])
def predict():
messages = pd.read_csv('bitcoin_reddit.csv', usecols=["title","class"])
messages['length'] = messages['title'].map(lambda text: len(text))
def split_into_tokens(title):
return TextBlob(title).words
def split_into_lemmas(title):
words = TextBlob(title).words.lower()
# for each word, take its "base form" = lemma
return [word.lemma for word in words]
bow_transformer = CountVectorizer(analyzer=split_into_lemmas).fit(messages['title'])
messages_bow = bow_transformer.transform(messages['title'])
tfidf_transformer = TfidfTransformer().fit(messages_bow) #normalization can be done with TF-IDF
messages_tfidf = tfidf_transformer.transform(messages_bow)
#Training Model NB
spam_detector = MultinomialNB().fit(messages_tfidf, messages['class'])
all_predictions = spam_detector.predict(messages_tfidf)
msg_train, msg_test, label_train, label_test = \
train_test_split(messages['title'], messages['class'], test_size=0.2)
pipeline = Pipeline([
('bow', CountVectorizer(analyzer=split_into_lemmas)), # strings to token integer counts
('tfidf', TfidfTransformer()), # integer counts to weighted TF-IDF scores
('classifier', MultinomialNB())]) # train on TF-IDF vectors w/ Naive Bayes classifier
scores = cross_val_score(pipeline, # steps to convert raw messages into models
msg_train, # training data
label_train, # training labels
cv=10, # split data randomly into 10 parts: 9 for training, 1 for scoring
scoring='accuracy') # which scoring metric?
params = {
'tfidf__use_idf': (True, False),
'bow__analyzer': (split_into_lemmas, split_into_tokens),
}
grid = GridSearchCV(
pipeline, # pipeline from above
params, # parameters to tune via cross validation
refit=True, # fit using all available data at the end, on the best found param combination
scoring='accuracy', # what score are we optimizing?
cv=StratifiedKFold(n_splits=5)) # what type of cross validation to use
nb_detector = grid.fit(msg_train, label_train)
predictions = nb_detector.predict(msg_test)
joblib.dump(nb_detector, 'NB_model_bow.pkl')
if request.method == 'POST':
message = request.form['message']
data = [message]
vect = cv.transform(data).toarray()
my_prediction = nb_detector.predict(vect)
return render_template('result.html',prediction = my_prediction)
if __name__ == '__main__':
app.run(debug=True)
但是我遇到了这种错误
_pickle.PicklingError: Can't pickle <function predict.<locals>.split_into_lemmas at 0x000001ABF618AE18>: it's not found as __main__.predict.<locals>.split_into_lemmas