我正在尝试使用hyperopt查找RandomForestClassifier的参数。这是我的代码:
X, y = load_wine(return_X_y=True)
def rf_neg_score(params):
X, y = params.pop('X'), params.pop('y')
cv = params.pop('cv')
scoring = params.pop('scoring')
rf_clf = RandomForestClassifier(**params)
score = cross_val_score(rf_clf, X=X, y=y, n_jobs=-1, scoring=scoring,
cv=cv).mean()
return -score
rf_search_space = {
'n_estimators': hp.choice('n_estimators', np.arange(10, 1000, dtype=int)),
'max_depth': hp.choice('max_depth', np.arange(2, 8, dtype=int)),
'max_leaf_nodes': hp.choice('max_leaf_nodes', np.arange(2, 65, dtype=int)),
'n_jobs': -1,
'X': X,
'y': y,
'cv': StratifiedKFold(n_splits=5),
'scoring': 'f1_micro'
}
rf_best_params = fmin(fn=rf_neg_score, space=rf_search_space, max_evals=100,
algo=tpe.suggest)
我运行ValueError后立即引发:
/usr/local/lib/python3.6/dist-packages/hyperopt/utils.py in use_obj_for_literal_in_memo(expr, obj, lit, memo)
167 for node in pyll.dfs(expr):
168 try:
--> 169 if node.obj == lit:
170 memo[node] = obj
171 except AttributeError:
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
您认为我在做什么错?
答案 0 :(得分:0)
找到决定。似乎hyperopt会检查搜索空间中的每个项目是否具有hyperopt.hp。*功能,并在检查ValueError时进行检查。因此,没有机会以这种方式提供数据。这是正确的代码:
def rf_neg_score(params):
scoring = params.pop('scoring')
cv = params.pop('cv')
rf_clf = RandomForestClassifier(**params)
# X and y are provided out of function
score = cross_val_score(rf_clf, X=X, y=y, n_jobs=-1,
scoring='f1_micro', cv=5).mean()
return -score
rf_search_space = {
'n_estimators': hp.choice('n_estimators', np.arange(10, 1000, dtype=int)),
'max_depth': hp.choice('max_depth', np.arange(2, 8, dtype=int)),
'max_leaf_nodes': hp.choice('max_leaf_nodes', np.arange(2, 65, dtype=int)),
'scoring': 'f1_micro',
'cv': StratifiedKFold(n_splits=5)
}
rf_best_params = fmin(fn=rf_neg_score, space=rf_search_space, max_evals=100,
algo=tpe.suggest)