我在代码中使用了sklearn管道,并保存了管道对象以在另一个环境中进行部署。我有一个自定义类来删除功能。我已经成功保存了模型,但是当我在具有相同版本sklearn的另一个环境中使用管道对象时,会引发错误。当我不包含自定义类 DropFeatures 时,管道运行良好。下面是代码
from sklearn import svm
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.externals import joblib
# Load the Iris dataset
df = pd.read_csv('Iris.csv')
label = 'Species'
labels = df[label]
df.drop(['Species'],axis=1,inplace=True)
# Set up a pipeline with a feature selection preprocessor that
# selects the top 2 features to use.
# The pipeline then uses a RandomForestClassifier to train the model.
class DropFeatures(BaseEstimator, TransformerMixin):
def __init__(self, features_to_drop=None):
self.features = features_to_drop
def fit(self, X, y=None):
return self
def transform(self, X):
# encode labels
if len(self.features) != 0:
X = X.copy()
X = X.drop(self.features, axis=1)
return X
return X
pipeline = Pipeline([
('drop_features', DropFeatures(['Id'])),
('feature_selection', SelectKBest(chi2, k=1)),
('classification', RandomForestClassifier())
])
pipeline.fit(df, labels)
print(pipeline.predict(query))
# Export the classifier to a file
joblib.dump(pipeline, 'model.joblib')
在另一个环境中使用 model.joblib 时,出现错误。下面是在图像中加载模型和错误的代码
from sklearn.externals import joblib
model = joblib.load('model1.joblib')
print(model)