我想创建一个使用两个Regressor(这里是Ridge和RandomForestRegressor)的Residual类,我希望能够将此类提供给GridSearchCV,但是设置这两个Regressor的参数对我来说是个问题。我试图将这些参数设置为残差类的默认参数,以便能够使用它们,但它不起作用。
我尝试使用self.parameter等。此外,如果我在类中设置参数(直接给它们提供数字)也很好
class Residual(base.BaseEstimator, base.RegressorMixin):
def __init__(self, alpha_ = 5, max_depth_ = 10, n_estimators_ = 10):
self.base = Ridge(alpha = alpha_)
self.residual = RandomForestRegressor(max_depth = max_depth_, n_estimators = n_estimators_)
def fit(self, X, y=None):
self.base = self.base.fit(X, y)
self.residual = self.residual.fit(X, np.array(y) - np.array(self.base.predict(X)))
return self
def predict(self, X):
return np.array(self.base.predict(X))+np.array(self.residual.predict(X))
pipline = Pipeline([('CST', ColumnSelectTransformer(['attributes'])),
('DF', DictFlatter()),
('DV', DictVectorizer(sparse = False)),
('res', Residual())])
param_grid = [{'res__alpha_': [.001, .005, .01, .05, .1],{'res__n_estimators_': [200],'res__max_depth_': [10]}]
GSCV = GridSearchCV(pipline, param_grid, cv = 10)
GSCV.fit(data, star_ratings)
我得到的是一个错误告诉我:
ValueError: array must not contain infs or NaNs
如果我执行以下操作,一切都很好:
class Residual(base.BaseEstimator, base.TransformerMixin, base.RegressorMixin):
def __init__(self, alpha_ = .01, max_depth_ = 10, n_estimators_ = 150):
self.base = Ridge(.01)
self.residual = RandomForestRegressor(max_depth = 10, n_estimators = 150)
def fit(self, X, y=None):
self.base = self.base.fit(X, y)
self.residual = self.residual.fit(X, np.array(y) - np.array(self.base.predict(X)))
return self
def predict(self, X):
return np.array(self.base.predict(X))+np.array(self.residual.predict(X))