我正在运行此代码,并得到我无法解决的错误:
import xgboost as xgb1
from sklearn.grid_search import GridSearchCV
cv_params = {'max_depth': [3,5,7], 'min_child_weight': [1,3,5] }
ind_params = {
'learning_rate': 0.1,
'n_estimators': 1000,
'seed': 0,
'subsample': 0.8,
'colsample_bytree': 0.8,
'objective': 'count:poisson'
}
optimized_GBM = GridSearchCV(xgb1.XGBRegressor(**ind_params),
cv_params,
scoring = 'mean_squared_error',
cv = 5,
n_jobs = -1)
optimized_GBM.fit(train_X, train_y)
print(optimized_GBM.best_score_)
print(optimized_GBM.best_params_)
错误:
validation.py in _assert_all_finite(X=array([nan, nan, nan, ..., nan, nan, nan], dtype=float32))
39 # everything is finite; fall back to O(n) space np.isfinite to prevent
40 # false positives from overflow in sum method.
41 if (X.dtype.char in np.typecodes['AllFloat'] and not np.isfinite(X.sum())
42 and not np.isfinite(X).all()):
43 raise ValueError("Input contains NaN, infinity"
---> 44 " or a value too large for %r." % X.dtype)
X.dtype = dtype('float32')
45
46
47 def assert_all_finite(X):
48 """Throw a ValueError if X contains NaN or infinity.
ValueError: Input contains NaN, infinity or a value too large for dtype('float32').
所有X个特征的类型均为float64
,而y
是整数。我还尝试将X
更改为float32
。
我检查了是否没有NaN和inf数据:
XY = XY.replace([np.inf, -np.inf], np.nan).dropna(subset=XY.columns, how="all").round(2)
scaler = preprocessing.MinMaxScaler()
scaler.fit(XY.drop("DEPARTURE_DELAY_MIN",axis=1))
standardized_df = pd.DataFrame(scaler.transform(XY.drop("DEPARTURE_DELAY_MIN",axis=1)),columns=XY.drop("DEPARTURE_DELAY_MIN",axis=1).columns)
X = standardized_df
y = XY.DEPARTURE_DELAY_MIN.astype("int")
X = X.astype("double")
y = y.astype("int")
# Split the data set into train and test sets
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.25)
我不明白会发生什么。