在二进制分类中获得零精度,召回率和f1-分数吗?

时间:2018-09-23 17:52:59

标签: python binary classification precision

我正在研究必须在二进制类之间进行分类的分类。 但是我总是得到这些结果,即第二级的精度,召回率和F1-socre为0。 我曾尝试调整分类器的参数,但仍无任何改进。 这是代码(Python)和结果。预先感谢.... !!

from sklearn.cross_validation import train_test_split
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix , classification_report
from sklearn.metrics import accuracy_score
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix , classification_report
from sklearn.neural_network import MLPClassifier
import itertools
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
#Code for Plotting Confusion Matrix
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
#Code for Live plot of Loss and Traningn score
    #Code for Live Graphs Score and Loss
import keras
from matplotlib import pyplot as plt
from IPython.display import clear_output
class PlotLearning(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.i = 0
        self.x = []
        self.losses = []
        self.val_losses = []
        self.acc = []
        self.val_acc = []
        self.fig = plt.figure()

        self.logs = []

    def on_epoch_end(self, epoch, logs={}):

        self.logs.append(logs)
        self.x.append(self.i)
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))
        self.acc.append(logs.get('acc'))
        self.val_acc.append(logs.get('val_acc'))
        self.i += 1
        f, (ax1, ax2) = plt.subplots(1, 2, sharex=True)

        clear_output(wait=True)

        ax1.set_yscale('log')
        ax1.plot(self.x, self.losses, label="loss")
        ax1.plot(self.x, self.val_losses, label="val_loss")
        ax1.legend()

        ax2.plot(self.x, self.acc, label="accuracy")
        ax2.plot(self.x, self.val_acc, label="validation accuracy")
        ax2.legend()

        plt.show();

plot = PlotLearning()

#Loading Data
#____________Loading Traning
dataset = pd.read_csv('DOS2012.csv')
#dataset = dataset.astype('float32')
#scaler = MinMaxScaler(feature_range=(0, 1))
#dataset = scaler.fit_transform(dataset)
#dataset = pd.DataFrame(dataset)
x_train = dataset.iloc[: , 0:11].values
x_train = pd.DataFrame(x_train)
y_train = dataset.iloc[: , 11:12].values
y_train = pd.DataFrame(y_train)

look_back = 11

#
x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, random_state=0, test_size = 0.2)



#PrePorcessing
x_train = np.array(x_train)
x_test = np.array(x_test)
x_train = np.reshape(x_train, (x_train.shape[0], 1, x_train.shape[1]))
x_test = np.reshape(x_test, (x_test.shape[0], 1, x_test.shape[1]))




# create and fit the LSTM network
model = Sequential()
model.add(LSTM(11, input_shape=(1, look_back)))
model.add(Dense(8,activation='relu'))
model.add(Dense(3,activation='relu'))
model.add(Dense(1,activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

score = model.fit(x_train, y_train, epochs=40, batch_size=32, verbose=1, callbacks=[plot])

#************
#Ploting Traning Score
#************
print(score.history.keys())
plt.plot(score.history['loss'])
plt.plot(score.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

#Plot summarize history for loss
plt.plot(score.history['loss'])
plt.plot(score.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()


# make predictions
predictions = model.predict(x_test)



#Accuracy
print ("Accuracy is ", accuracy_score(y_test,np.round(predictions))*100)


# Compute confusion matrix Test
cnf_matrix = confusion_matrix(y_test, np.round(predictions))
np.set_printoptions(precision=2)
print(cnf_matrix)

# Plot non-normalized confusion matrix
classes = ['A','B']


#*******************************
#Ploting Confusion Matrix 
#*******************************
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=classes,
                      title='Confusion matrix, Test')
plt.show()

#Printing table values
print(classification_report(y_test,np.round(predictions)))


#***************************************************************
#*********ROC CURVE*8=******************************************
#***************************************************************
from sklearn.metrics import roc_curve, auc
false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, predictions)
roc_auc = auc(false_positive_rate, true_positive_rate)
plt.title('ROC LSTM')
plt.plot(false_positive_rate, true_positive_rate, 'blue', label='AUC = %0.2f'% roc_auc)
plt.legend(loc='lower right')
plt.plot([0,1],[0,1],'m--')
plt.xlim([0,1])
plt.ylim([0,1.1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

结果: 混淆矩阵 A [[33511 0] B [765 0]]      A B

分类报告              精确召回f1得分支持

      0       0.98      1.00      0.99     33511
      1       0.00      0.00      0.00       765

平均/总计0.96 0.98 0.97 34276

0 个答案:

没有答案