我正在使用与SMOTE(imblearn库)交叉验证的管道来检查欺诈和非欺诈客户的不平衡数据集
gbm0 = GradientBoostingClassifier(random_state=10)
samplers = [['SMOTE', SMOTE(random_state=RANDOM_STATE, ratio=0.5, kind='borderline1')]]
classifier = ['gbm', gbm0]
pipelines = [
['{}-{}'.format(sampler[0], classifier[0]),
make_pipeline(sampler[1], classifier[1])]
for sampler in samplers
]
stdsc = StandardScaler()
cv = StratifiedKFold(n_splits=3)
mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
Xstd = stdsc.fit_transform(X)
scores = []
confusion = np.array([[0, 0], [0, 0]])
for name, pipeline in pipelines:
mean_tpr = 0.0
mean_fpr = np.linspace(0, 1, 100)
for tr,ts in cv.split(Xstd, y):
xtrain = Xstd[tr]
ytrain = y[tr]
test = y[ts]
xtest = Xstd[ts]
pipeline.fit(xtrain, ytrain)
probas_ = pipeline.predict_proba(xtest)
fpr, tpr, thresholds = roc_curve(test, probas_[:, 1])
mean_tpr += interp(mean_fpr, fpr, tpr)
mean_tpr[0] = 0.0
roc_auc = auc(fpr, tpr)
predictions = pipeline.predict(xtest)
confusion += confusion_matrix(test, predictions)
score = f1_score(test, predictions)
scores.append(score)
mean_tpr /= cv.get_n_splits(Xstd, y)
mean_tpr[-1] = 1.0

我能够得到混淆矩阵和ROC曲线,但我需要精确度和总回忆率,我应该怎么做呢?
修改
我知道scikit-learn中有classification_report但是如何将它用于CV中的预测呢?
答案 0 :(得分:0)
所以我最终使用
from sklearn.metrics import precision_recall_fscore_support as score
scores = []
recalls = []
precisions = []
precision, recall, fscore, support = score(test, predictions)
recalls.append(recall)
recalls.append(recall)
precisions.append(precision)
接着是
print('Score:', sum(scores) / len(scores))
Recall:', sum(recalls) / len(recalls))
Precision:', sum(precisions) / len(precisions))