我目前正在研究音频,以按情感对它们进行分类。我使用的策略是将每种情感与其他情感进行分类。为了开心,我使用了这段代码并取得了商品结果,然后保存了模型:
from sklearn.base import BaseEstimator
class MyClassifier(BaseEstimator):
def __init__(self, classifier_type: str = 'XGBoost'): #""A Custome BaseEstimator that can switch between
#classifiers :param classifier_type: string - The switch for different classifiers
self.classifier_type = classifier_type
def fit(self, X, y=None):
if self.classifier_type == 'XGBoost':
self.classifier_ = XGBClassifier()
elif self.classifier_type == 'RandomForest':
self.classifier_ = RandomForestClassifier()
elif self.classifier_type == 'CART':
self.classifier_ = DecisionTreeClassifier()
elif self.classifier_type == 'KNearestNeighbord':
self.classifier_ = KNeighborsClassifier()
elif self.classifier_type == 'GaussianNB':
self.classifier_ = GaussianNB()
elif self.classifier_type == 'SupportVectorMachine':
self.classifier_ = SVC(kernel = "linear")
elif self.classifier_type == 'LogisticRegression':
self.classifier_ = LogisticRegression()
elif self.classifier_type == 'SGDClassifier':
self.classifier_ = SGDClassifier()
else:
raise ValueError('Unkown classifier type.')
self.classifier_.fit(X, y)
return self
def predict(self, X, y=None):
return self.classifier_.predict(X)
#feature selection
clf = ExtraTreesClassifier()
clf = clf.fit(X, y)
clf.feature_importances_
model = SelectFromModel(clf, prefit=True)
X_selected = model.transform(X)
#split train and test
training_size=0.7 #70% of the data set for training
testsize= 1-training_size
seed=30
X_train,X_test,y_train,y_test=train_test_split(X_selected, y,
train_size= training_size, random_state=seed, test_size=testsize)
pipeline = Pipeline([
('scale', StandardScaler()),
('clf', MyClassifier())
])
parameter_space = {
'clf__classifier_type': ['XGBoost', 'RandomForest',
'CART', 'KNearestNeighbord',
'GaussianNB', 'SupportVectorMachine', 'LogisticRegression', 'SGDClassifier']
}
from sklearn.model_selection import GridSearchCV
prec_scorer = make_scorer(precision_score, average="micro")
search = GridSearchCV(pipeline , parameter_space, n_jobs=-1, cv=5, scoring= prec_scorer)
search.fit(X_train, y_train)
print('Best model:\n', search.best_params_, search.best_score_)
#save the model to disk
filename = 'happy_MODEL.pkl'
pickle.dump(search.best_estimator_, open(filename, 'wb'))
但是问题是,当我使用保存的模型预测新数据的准确性时,我遇到了这个错误:'无法获取属性'MyClassifier' '。我不理解这个问题。为了测试我的模型,我使用以下代码:
filename = 'happy_MODEL.pkl'
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.predict(X_selected)
print('\n', accuracy_score( y, result))
有人可以帮我吗?