我正在尝试执行贝叶斯优化来优化XGBoost Regressor的参数,遵循以下代码:
def xgboostcv(max_depth,
learning_rate,
n_estimators,
gamma,
min_child_weight,
max_delta_step,
subsample,
colsample_bytree,
reg_alpha,
reg_lambda,
silent=True,
nthread=-1,
random_state=1):
return cross_val_score(xgb.XGBRegressor(max_depth=int(max_depth),
learning_rate=learning_rate,
n_estimators=int(n_estimators),
silent=silent,
nthread=nthread,
gamma=gamma,
min_child_weight=min_child_weight,
max_delta_step=max_delta_step,
subsample=subsample,
colsample_bytree=colsample_bytree,
reg_alpha=reg_alpha,
reg_lambda = reg_lambda),
train,
y_train,
"mean_squared_error",
cv=5).mean()
xgboostBO = BayesianOptimization(xgboostcv,
{'max_depth': (2, 5),
'learning_rate': (0.01, 0.3),
'n_estimators': (1000, 2500),
'gamma': (1., 0.01),
'min_child_weight': (1, 10),
'max_delta_step': (0, 0.1),
'subsample': (0.5, 0.8),
'colsample_bytree' :(0.1, 0.99),
'reg_alpha':(0.1, 0.5),
'reg_lambda':(0.1, 0.9)
})
xgboostBO.maximize()
print('Final Results')
print('XGBOOST: %f' % xgboostBO.res['max']['max_val'])
此优化代替GridSearchCV和RandomizedSearchCV用于查找学习模型的最佳参数。查找最佳参数时,它必须优于GridSearchCV。但是我收到一个错误,无法找出问题所在。有人有建议吗?:
Initialization
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Step | Time | Value | colsample_bytree | gamma | learning_rate | max_delta_step | max_depth | min_child_weight | n_estimators | reg_alpha | reg_lambda | subsample |
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-90-397f04e73c8f> in <module>()
12 })
13
---> 14 xgboostBO.maximize()
15
16 print('Final Results')
/usr/local/lib/python2.7/dist-packages/bayes_opt/bayesian_optimization.pyc in maximize(self, init_points, n_iter, acq, kappa, xi, **gp_params)
241 if self.verbose:
242 self.plog.print_header()
--> 243 self.init(init_points)
244
245 y_max = self.space.Y.max()
/usr/local/lib/python2.7/dist-packages/bayes_opt/bayesian_optimization.pyc in init(self, init_points)
87 # Evaluate target function at all initialization points
88 for x in self.init_points:
---> 89 y = self._observe_point(x)
90
91 # Add the points from `self.initialize` to the observations
/usr/local/lib/python2.7/dist-packages/bayes_opt/bayesian_optimization.pyc in _observe_point(self, x)
102
103 def _observe_point(self, x):
--> 104 y = self.space.observe_point(x)
105 if self.verbose:
106 self.plog.print_step(x, y)
/usr/local/lib/python2.7/dist-packages/bayes_opt/target_space.pyc in observe_point(self, x)
137 # measure the target function
138 params = dict(zip(self.keys, x))
--> 139 y = self.target_func(**params)
140 self.add_observation(x, y)
141 return y
<ipython-input-89-3d7f26b78f91> in xgboostcv(max_depth, learning_rate, n_estimators, gamma, min_child_weight, max_delta_step, subsample, colsample_bytree, reg_alpha, reg_lambda, silent, nthread, random_state)
27 y_train,
28 "root_mean_squared_error",
---> 29 cv=5).mean()
/usr/local/lib/python2.7/dist-packages/sklearn/model_selection/_validation.pyc in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch)
340 n_jobs=n_jobs, verbose=verbose,
341 fit_params=fit_params,
--> 342 pre_dispatch=pre_dispatch)
343 return cv_results['test_score']
344
/usr/local/lib/python2.7/dist-packages/sklearn/model_selection/_validation.pyc in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score)
190
191 """
--> 192 X, y, groups = indexable(X, y, groups)
193
194 cv = check_cv(cv, y, classifier=is_classifier(estimator))
/usr/local/lib/python2.7/dist-packages/sklearn/utils/validation.pyc in indexable(*iterables)
227 else:
228 result.append(np.array(X))
--> 229 check_consistent_length(*result)
230 return result
231
/usr/local/lib/python2.7/dist-packages/sklearn/utils/validation.pyc in check_consistent_length(*arrays)
202 if len(uniques) > 1:
203 raise ValueError("Found input variables with inconsistent numbers of"
--> 204 " samples: %r" % [int(l) for l in lengths])
205
206
ValueError: Found input variables with inconsistent numbers of samples: [1456, 1456, 23]
答案 0 :(得分:0)
从代码中删除“ mean_squared_error”