这是我的代码
param_grid = [{'kneighborsclassifier__n_neighbors': [3, 4, 5, 6, 7]},
{'svc__C': [0.1, 1, 10, 100], 'svc__gamma': [0.001, 0.01, 0.1, 1, 10]},
{'randomforestclassifier__n_estimators': [50, 100, 200, 300 ,400]},
{'decisiontreeclassifier__max_depth': [12, 25, 50, 75, 100]},
{'adaboostclassifier__n_estimators': [50, 100, 200, 300 ,400]},
{'baggingclassifier__n_estimators': [50, 100, 200, 300, 400]}
]
inner_cv = StratifiedShuffleSplit(test_size=.8, train_size=.2, n_splits=5, random_state=0)
outer_cv = StratifiedShuffleSplit(test_size=.8, train_size=.2, n_splits=5, random_state=0)
models = [knn, svc, forest, dtc, ada, bag]
model_names = ['knn', 'svc','forest', 'dtc', 'ada', 'bag']
for m, mname in zip(models, model_names):
pipe = make_pipeline(VarianceThreshold(threshold=1),
MinMaxScaler(),
SelectKBest(f_classif, k=20),
m)
grid = GridSearchCV(pipe, param_grid=param_grid, cv=inner_cv)
grid.fit(X_train_test, y_train_test)
nested_score = cross_val_score(grid, X=X_train_test, y=y_train_test.values.ravel(), cv=outer_cv)
print(mname)
print(grid.best_params_)
print(grid.best_score_)
print('\n')
这是错误:
ValueError: Invalid parameter svc for estimator Pipeline(memory=None,
steps=[('variancethreshold', VarianceThreshold(threshold=1)),
('minmaxscaler', MinMaxScaler(copy=True, feature_range=(0, 1))),
('selectkbest',
SelectKBest(k=20,
score_func=<function f_classif at 0x0000019E0A485AF8>)),
('kneighborsclassifier',
KNeighborsClassifier(algorithm='auto', leaf_size=30,
metric='minkowski', metric_params=None,
n_jobs=None, n_neighbors=5, p=2,
weights='uniform'))],
verbose=False). Check the list of available parameters with `estimator.get_params().keys()`.
我不知道怎么了。我从管道的named_steps复制了参数和模型名称。如果我在没有参数网格的情况下运行它,那么它将起作用,因此很可能是问题所在。
答案 0 :(得分:0)
似乎可以这样工作,但我不喜欢它。
pg1 = {'kneighborsclassifier__n_neighbors': [3, 4, 5, 6, 7]}
pg2 = {'svc__C': [0.1, 1, 10, 100],
'svc__gamma': [0.001, 0.01, 0.1, 1, 10]}
pg3 = {'randomforestclassifier__n_estimators': [50, 100, 200, 300 ,400]}
pg4 = {'decisiontreeclassifier__max_depth': [12, 25, 50, 75, 100]}
pg5 = {'adaboostclassifier__n_estimators': [50, 100, 200, 300 ,400]}
pg6 = {'baggingclassifier__n_estimators': [50, 100, 200, 300, 400]}
param_grid_list = [pg1, pg2, pg3, pg4, pg5, pg6]
然后循环变为:
for m, p, mname in zip(models, param_grid_list, model_names):
pipe = make_pipeline(VarianceThreshold(threshold=1),
MinMaxScaler(),
SelectKBest(f_classif, k=20),
m)
grid = GridSearchCV(pipe, param_grid=p, cv=inner_cv)
grid.fit(X_train_test, y_train_test)
nested_score = cross_val_score(grid, X=X_train_test, y=y_train_test.values.ravel(), cv=outer_cv)
print(mname)
print(grid.best_params_)
print(grid.best_score_)
print('\n')