我正在训练3个模型。他们将预测什么是欺诈交易(二进制分类)。 Logistic回归,knn和哑元。但是我为logreg计算的每个指标都等于1!我不相信而且我几乎没有开始寻找错误。但是:
我的火车和测试数据分开了
仅在火车数据上拟合模型,并且仅在测试中进行预测。
所以,然后我尝试用Google搜索它。但是没有找到像我这样的情况。 这是代码:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.dummy import DummyClassifier
from sklearn.metrics import accuracy_score, precision_score, \
recall_score, f1_score, roc_auc_score, roc_curve
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
df = pd.read_csv("creditcard.csv").drop('Time', axis=1) # read the dataset except of the column time(when transaction was committed), because this column is not valuable.
x = df.iloc[:,0:30].values
y = df.Class.values
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.1, rando
m_state=437)
logistic_regression_model = LogisticRegression()
logistic_regression_model.fit(x_train, y_train)
knn_model = KNeighborsClassifier()
knn_model.fit(x_train, y_train)
dummy_model = DummyClassifier(constant=0)
dummy_model.fit(x_train, y_train)
y_logreg_pred = logistic_regression_model.predict(x_test)
y_knn_pred = knn_model.predict(x_test)
y_dummy_pred = dummy_model.predict(x_test)
print("ACCURACY")
print("\tlogreg:", accuracy_score(y_logreg_pred, y_test))
print("\tkngg:", accuracy_score(y_knn_pred, y_test))
print("\tdummy:", accuracy_score(y_dummy_pred, y_test))
print("PRESICION")
print("\tlogreg:", precision_score(y_logreg_pred, y_test))
print("\tknn:", precision_score(y_knn_pred, y_test))
print("\tdummy:", precision_score(y_dummy_pred, y_test))
print("RECALL")
print("\tlogreg:", recall_score(y_logreg_pred, y_test))
print("\tknn:", recall_score(y_knn_pred, y_test))
print("\tdummy:", recall_score(y_dummy_pred, y_test))
print("F1")
print("\tlogreg:", f1_score(y_logreg_pred, y_test))
print("\tknn:", f1_score(y_knn_pred, y_test))
print("\tdummy:", f1_score(y_dummy_pred, y_test))
print("ROC AUC")
print("\tlogreg:", roc_auc_score(y_logreg_pred, y_test))
print("\tknn:", roc_auc_score(y_knn_pred, y_test))
print("\tdummy:", roc_auc_score(y_dummy_pred, y_test))
logreg_y_pred_proba = logistic_regression_model.predict_proba(x_test)[:, 1]
knn_y_pred_proba = knn_model.predict_proba(x_test)[:, 1]
dummy_y_pred_proba = dummy_model.predict_proba(x_test)[:, 1]
fpr, tpr, tresholds = roc_curve(y_test, logreg_y_pred_proba)
plt.figure(figsize=(7, 7))
plt.plot(fpr, tpr)
输出:
ACCURACY
logreg: 1.0
kngg: 0.9992275552122467
dummy: 0.9967346652154068
PRESICION
logreg: 1.0
knn: 0.5957446808510638
dummy: 0.0
RECALL
logreg: 1.0
knn: 0.9032258064516129
dummy: 0.0
F1
logreg: 1.0
knn: 0.7179487179487178
dummy: 0.0
ROC AUC
logreg: 1.0
knn: 0.9512789840693917
dummy: 0.4991735537190083
我在做什么错了?