我的项目中有典型的烧瓶结构。一切都工作正常,直到我试图在我的烧瓶应用程序中加载一个腌制对象。我使用不同的python脚本创建了pickle对象,并且依赖于某些自定义类。我认为这是一个问题,当你在一个主要内部腌制并期望这些类位于那里,但我还没有想出如何解决它。我尝试将类添加到pipeline_classes.py并导入它们但它不起作用。任何想法都将不胜感激。
这是产生pickle对象的脚本:
train.py
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.pipeline import Pipeline
import pickle
from sklearn.externals import joblib
from sklearn.pipeline import FeatureUnion
from sklearn.feature_extraction import DictVectorizer
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.base import BaseEstimator, TransformerMixin
class ItemSelector(BaseEstimator, TransformerMixin):
def __init__(self, column):
self.column = column
def fit(self, X, y=None, **fit_params):
return self
def transform(self, X):
return (X[self.column])
class TextStats(BaseEstimator, TransformerMixin):
"""Extract features from each document for DictVectorizer"""
def fit(self, x, y=None):
return self
def transform(self, posts):
return [{'REPORT_M': text}
for text in posts]
def train():
data = joblib.load('data_df.pkl')
# train and predict
classifier = Pipeline([
('union', FeatureUnion([
('text', Pipeline([
('selector', ItemSelector(column='TEXT')),
('tfidf_vec', TfidfVectorizer(max_df=0.8
])),
('category', Pipeline([
('selector', ItemSelector(column='CATEGORY')),
('stats', TextStats()),
('vect', DictVectorizer())
]))
])),
('clf', ExtraTreesClassifier(n_estimators=30, max_depth=300, min_samples_split=6, class_weight='balanced'))])
classifier.fit(data, data.y)
joblib.dump(classifier, 'et.pkl')
if __name__ == '__main__':
train()
然后有我的烧瓶应用程序,我尝试加载该腌制对象。
init.py
from flask import Flask
from .pipeline_classes import ItemSelector
from .pipeline_classes import TextStats
app = Flask(__name__)
app.config.from_object('config')
from app import views
run.py
from app import app
app.run(debug=True)
views.py
from app import app
from flask import render_template
from .load import load
@app.before_first_request
def load_classifier():
print("data loading")
global loaded
loaded = load()
print("data loaded")
load.py
import pickle
import pandas as pd
def load():
clf_ = pd.read_pickle('et.pkl')
我收到以下错误:
builtins.AttributeError
AttributeError: module '__main__' has no attribute 'ItemSelector'
使用Traceback :
Traceback (most recent call last) File
"/usr/local/lib/python3.5/dist-packages/flask/app.py", line 1836, in
__call__ return self.wsgi_app(environ, start_response) File "/usr/local/lib/python3.5/dist-packages/flask/app.py", line 1820, in
wsgi_app response = self.make_response(self.handle_exception(e)) File
"/usr/local/lib/python3.5/dist-packages/flask/app.py", line 1403, in
handle_exception reraise(exc_type, exc_value, tb) File
"/usr/local/lib/python3.5/dist-packages/flask/_compat.py", line 33, in
reraise raise value File
"/usr/local/lib/python3.5/dist-packages/flask/app.py", line 1817, in
wsgi_app response = self.full_dispatch_request() File
"/usr/local/lib/python3.5/dist-packages/flask/app.py", line 1470, in
full_dispatch_request
self.try_trigger_before_first_request_functions() File
"/usr/local/lib/python3.5/dist-packages/flask/app.py", line 1497, in
try_trigger_before_first_request_functions func() File
"/home/q423446/server/app/views.py", line 17, in load_classifier
loaded = load() File "/home/q423446/server/app/load.py", line 11, in
load clf_ = pd.read_pickle('app/ml/et_30.pkl') File
"/usr/local/lib/python3.5/dist-packages/pandas/io/pickle.py", line 68,
in read_pickle return try_read(path, encoding='latin1') File
"/usr/local/lib/python3.5/dist-packages/pandas/io/pickle.py", line 62,
in try_read return pc.load(fh, encoding=encoding, compat=True) File
"/usr/local/lib/python3.5/dist-packages/pandas/compat/pickle_compat.py",
line 117, in load return up.load() File
"/usr/lib/python3.5/pickle.py", line 1039, in load
dispatch[key[0]](self) File "/usr/lib/python3.5/pickle.py", line 1334,
in load_global klass = self.find_class(module, name) File
"/usr/lib/python3.5/pickle.py", line 1388, in find_class return
getattr(sys.modules[module], name) AttributeError: module '__main__'
has no attribute 'ItemSelector'
答案 0 :(得分:1)
尝试在第一个文件pipeline_classes.py
:
if __name__ == "__main__":
ItemSelector.__module__ = "pipeline_classes"
train()