我适合使用多类分类模型,但是一旦完成,就无法做出预测。相反,我收到一条我不明白的错误消息。
有关可重现的示例,请从网上下载此小型数据集(只需将地址粘贴到浏览器中,csv文件将自动下载):
已完成以下代码:
df = pd.read_csv('teleCust1000t.csv')
X = df[['region', 'tenure','age', 'marital', 'address', 'income', 'ed', 'employ','retire', 'gender', 'reside']] .values
y = df['custcat'].values
X = preprocessing.StandardScaler().fit(X).transform(X.astype(float))
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=4)
def f1_eval(preds, dtrain):
from sklearn import metrics
f1_score = metrics.f1_score(dtrain.get_label(), preds >= 0.5, average = 'weighted')
return 'f1_score', f1_score
def logregobj(preds, dtrain):
labels = dtrain.get_label()
preds = 1.0 / (1.0 + np.exp(-preds))
grad = preds - labels
hess = preds * (1.0 - preds)
return grad, hess
dtrain = xgb.DMatrix(X_train, y_train)
dtest = xgb.DMatrix(X_test, y_test)
param = {'max_depth':max_depth, 'eta': eta, 'silent': silent, 'objective':'binary:logistic', 'gamma':gamma,\
'min_child_weight':min_child_weight, 'subsample': subsample, 'colsample_bytree': colsample_bytree, \
'colsample_bylevel':colsample_bylevel, 'reg_alpha':reg_alpha, 'reg_lambda':reg_lambda}
watchlist = [(dtrain, 'train'), (dtest, 'eval')]
max_depth = 6
eta = 0.1
silent = 0
gamma = 1
min_child_weight = 1
subsample = 0.8
colsample_bytree = 0.8
colsample_bylevel = 1
reg_alpha = 0
reg_lambda = 1
early_stopping_rounds = 5
num_boost_round = 20
bst = xgb.train(param, dtrain, num_boost_round, watchlist, obj = logregobj, feval = f1_eval, maximize = True,\
early_stopping_rounds = early_stopping_rounds)
bst.predict(X_train)
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-203-959b9ab4d35d> in <module>()
----> 1 bst.predict(X_train)
C:\ProgramData\Anaconda3\lib\site-packages\xgboost\core.py in predict(self, data, output_margin, ntree_limit, pred_leaf, pred_contribs, approx_contribs, pred_interactions, validate_features)
1053
1054 if validate_features:
-> 1055 self._validate_features(data)
1056
1057 length = c_bst_ulong()
C:\ProgramData\Anaconda3\lib\site-packages\xgboost\core.py in _validate_features(self, data)
1296 else:
1297 # Booster can't accept data with different feature names
-> 1298 if self.feature_names != data.feature_names:
1299 dat_missing = set(self.feature_names) - set(data.feature_names)
1300 my_missing = set(data.feature_names) - set(self.feature_names)
AttributeError: 'numpy.ndarray' object has no attribute 'feature_names'
您的建议将不胜感激。
答案 0 :(得分:0)
Booster.predict
应该将xgboost.DMatrix
作为对象。不是numpy数组。
# wrong
bst.predict(X_train)
# right
bst.predict(dtrain)
# or
bst.predict(dtest)