我正在使用以下函数来存储对目标列的预测。
#define target and Id column
target = 'is_promoted'
IDcol = 'employee_id'
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score
def modelfit(alg, dtrain, dtest, pred, target, IDcol, filename):
alg.fit(dtrain[pred],dtrain[target])
#predict training set
dtrain_pred = alg.predict(dtrain[pred])
#perform cross validation
cv_score = cross_val_score(alg, dtrain[pred],dtrain[target],cv=20)
cv_score = np.sqrt(np.abs(cv_score))
print('\nmodel report')
print(f1_score(dtrain[target],dtrain_pred))
print("CV Score : Mean - %.4g | Std - %.4g | Min - %.4g | Max - %.4g" % (np.mean(cv_score),
np.std(cv_score),
np.min(cv_score),np.max(cv_score)))
#predict on testing data
dtest[target] = alg.predict(dtest[pred])
#export submission file
IDcol.append(target)
submission =pd.DataFrame({x: dtest[x] for x in IDcol})
submission.to_csv(filename,index=False)
from sklearn.linear_model import LogisticRegression
pred = [x for x in train.columns if x not in [target]+IDcol]
alg1=LogisticRegression()
modelfit(alg1, train, test, pred, target, IDcol, 'sub_log.csv')
有人可以解释为什么会发生这种情况以及如何解决吗?