我有一个名为app.py
的脚本,它在docker容器中运行,看起来像这样:
from my_ml.flask_blueprints import construct_ml_blueprint
from my_ml.pipeline import PredictPipeline
from flask import Flask, jsonify, Response, request
import re
import json
import os
import operator
from operator import itemgetter
import numpy as np
import pandas as pd
import joblib
from scipy.sparse import csr_matrix, hstack
import pickle
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.validation import check_is_fitted, column_or_1d
class LabelEncoderExtension(LabelEncoder):
def __init__(self, ignore_unknown=False,
unknown_original_value='NA',
unknown_encoded_value=-1):
self.ignore_unknown = ignore_unknown
self.unknown_original_value = unknown_original_value
self.unknown_encoded_value = unknown_encoded_value
def transform(self, y):
check_is_fitted(self, 'classes_')
y = column_or_1d(y, warn=True)
indices = np.isin(y, self.classes_)
if not self.ignore_unknown and not np.all(indices):
raise ValueError("y contains new labels: %s"
% str(np.setdiff1d(y, self.classes_)))
y_transformed = np.searchsorted(self.classes_, y)
y_transformed[~indices]=self.unknown_encoded_value
return y_transformed
def inverse_transform(self, y):
check_is_fitted(self, 'classes_')
labels = np.arange(len(self.classes_))
indices = np.isin(y, labels)
if not self.ignore_unknown and not np.all(indices):
raise ValueError("y contains new labels: %s"
% str(np.setdiff1d(y, self.classes_)))
y_transformed = np.asarray(self.classes_[y], dtype=object)
y_transformed[~indices]=self.unknown_original_value
return y_transformed
# ...
# More code here
def run_app():
with open('./user-config.json', 'r') as f:
user_config = json.load(f)
pipeline = PredictPipeline(user_config=user_config)
# Load the models
model_predictor = joblib.load('model.pkl')
encoders = joblib.load('encoders.pkl')
@pipeline.step('predict')
def predict(data, model, params):
# ...
# More code here
ml_api = construct_ml_blueprint(pipeline)
app = Flask(__name__)
app.register_blueprint(ml_api)
print('App loaded')
return app
def main():
run_app()
if __name__ == '__main__':
test = True
if test:
test_app = run_app()
test_app.debug = True
test_app.run(host='0.0.0.0')
else:
main()
else:
gunicorn_app = run_app()
encoders
是标签编码器的列表,其中每个标签编码器都是通过扩展SkLearn的LabelEncoder
-我继承自LabelEncoder
的新自定义类称为LabelEncoderExtension
。
运行脚本时,出现以下错误:
File "/app.py", line 708, in <module>
gunicorn_app = run_app()
File "/app.py", line 569, in run_app
encoders = joblib.load('encoders.pkl')
File "/usr/local/lib/python3.8/site-packages/joblib/numpy_pickle.py", line 605, in load
obj = _unpickle(fobj, filename, mmap_mode)
File "/usr/local/lib/python3.8/site-packages/joblib/numpy_pickle.py", line 529, in _unpickle
obj = unpickler.load()
File "/usr/local/lib/python3.8/pickle.py", line 1210, in load
dispatch[key[0]](self)
File "/usr/local/lib/python3.8/pickle.py", line 1526, in load_global
klass = self.find_class(module, name)
File "/usr/local/lib/python3.8/pickle.py", line 1581, in find_class
return getattr(sys.modules[module], name)
AttributeError: module '__main__' has no attribute 'LabelEncoderExtension'
我该如何解决?
我在Internet和StackOverflow上找到了类似的帖子,并且我测试了一些建议的内容,但仍然没有使它们有用。
其中写得最好的两个是: