为什么使用AveragingModels
时cross_val_score
的“权重”不起作用?
无论“权重”输入是什么,结果都是相同的。(score1 = score2)但是我的lasso
,ENet
,KRR
有不同的预测。有什么问题吗?
import numpy as np
from sklearn.linear_model import Lasso,ElasticNet
from sklearn.kernel_ridge import KernelRidge
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin, clone
from sklearn.preprocessing import RobustScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score
lasso = make_pipeline(RobustScaler(), Lasso(alpha =0.00035, random_state=1))
ENet = make_pipeline(RobustScaler(), ElasticNet(alpha=0.0004, l1_ratio=0.885))
KRR = KernelRidge(alpha=[0.004], kernel='polynomial', degree= 0.39, coef0=2.5)
class AveragingModels(BaseEstimator, RegressorMixin, TransformerMixin):
def __init__(self, models ,weight=[]):
self.models = models
if weight:
self.weig=(np.array([weight])/sum(weight)).T
else:
self.weig=np.array([[1/len(models)]]*len(models))
def fit(self, X, y):
self.models_ = [clone(model) for model in self.models]
for model in self.models_:
model.fit(X, y)
return self
def predict(self, X):
self.multi_predictions = np.column_stack([model.predict(X) for model in self.models_])
return (self.multi_predictions@self.weig).squeeze()
def rmsle_cv(model):
rmse= np.sqrt(-cross_val_score(model, train.values, y.values, scoring="neg_mean_squared_error", cv = 5))
return rmse
averaged_models1 = AveragingModels(models = (ridge,lasso,ENet),weight=[1,0,0])
score1 = rmsle_cv(averaged_models1)
averaged_models2 = AveragingModels(models = (ridge,lasso,ENet),weight=[1,1,1])
score2 = rmsle_cv(averaged_models2)
print(score1.mean(),score1.std(),score2.mean(),score2.std())