假设X_train的形状为(751,411),Y_train的形状为(751L,)。我想使用网格搜索的交叉验证来找到GBR的最佳参数。我使用了以下代码,但无法成功。
from sklearn.grid_search import GridSearchCV
param_grid={'n_estimators':[100,500],
'learning_rate': [0.1,0.05,0.02],
'max_depth':[4],
'min_samples_leaf':[3],
'max_features':[1.0] }
n_jobs=4
def GradientBooster(param_grid, n_jobs):
estimator = GradientBoostingRegressor()
classifier = GridSearchCV(estimator=estimator, cv=5, param_grid=param_grid,
n_jobs=n_jobs)
classifier.fit(X_train, Y_train)
print classifier.best_estimator_
return cv, classifier.best_estimator_
cv,best_est=GradientBooster(param_grid, n_jobs)
它给了我以下错误:
51 from pandas.core.config import get_option
52
53 encoding = get_option("display.encoding")
---> 54 return self.__unicode__().encode(encoding, 'replace')
self.__unicode__.encode = undefined
encoding = 'cp0'
55
56 def __repr__(self):
57 """
58 Return a string representation for a particular object.
LookupError: unknown encoding: cp0
然后,我想使用这些参数来预测使用预测函数的X_test
。
我对以下代码也有同样的问题:
param_grid = {
'bootstrap': [True],
'max_depth': [80, 90, 100, 110],
'max_features': [2, 3],
'min_samples_leaf': [3, 4, 5],
'min_samples_split': [8, 10, 12],
'n_estimators': [100, 200, 300, 1000]
}
rf = RandomForestRegressor()
grid_search = GridSearchCV(estimator = rf, param_grid = param_grid,
cv = 3, n_jobs = -1, verbose = 2)
grid_search.fit(X_train, Y_train)
答案 0 :(得分:0)
以下是测试数据集的工作示例
from sklearn.datasets import load_boston
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
X,y = load_boston(return_X_y=True)
param_grid = {
'bootstrap': [True],
'max_depth': [80, 90, 100, 110],
'max_features': [2, 3],
'min_samples_leaf': [3, 4, 5],
'min_samples_split': [8, 10, 12],
'n_estimators': [100, 200, 300, 1000]
}
rf = RandomForestRegressor()
grid_search = GridSearchCV(estimator = rf, param_grid = param_grid,
cv = 3, n_jobs = -1, verbose = 2)
grid_search.fit(X, y)
您的数据很可能存在问题。