如何利用SVM保存混淆矩阵在图像分类中的误报和漏报?

时间:2018-01-02 12:13:10

标签: python image machine-learning svm confusion-matrix

我使用支持向量机成功分类了两类图像,并正确计算了混淆矩阵。这是我的代码:

import sklearn
from sklearn import metrics
from sklearn import svm
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix
import numpy as np
import PIL
from PIL import Image
import os


savecorrect=("C:/correct")
saveincorrect=("C:/incorrect")

COMMON_SIZE = (50, 50)
def image_to_matrix(filename):
    """
takes a filename and turns it into a numpy array of RGB pixels
"""
    img = Image.open(filename)
    img = img.resize(COMMON_SIZE)
    img = np.asarray(img)
    return img

def flatten_matrix(matrix):
    """
takes in an (m, n) numpy array and flattens it
into an array of shape (1, m * n * 3)
"""
    #s = matrix.shape[0] * matrix.shape[1] * 3
    s = matrix.shape[0] * matrix.shape[1] * 3
    mat = matrix.reshape(1,s)
    #print(mat.shape)
    return mat[0]


classes = ['positive', 'negative']

#train_image_dirs = ["C:/Dataset/Train/good/", "C:/Dataset/Train/broken/"]
train_image_dirs = ["C:/potholes_50/train/positive/", "C:/potholes_50/train/negative/"]
train_images = []
train_labels = []
# Retrieval of path of all pictures from "train" folder
for directory in train_image_dirs:
    temp_list = [directory+ f for f in os.listdir(directory)]
    train_labels.extend(len(temp_list)*[classes[train_image_dirs.index(directory)]])
    train_images.extend(temp_list)

# Creation of the whole training dataset
train_data = []
for image in train_images:
    image = image_to_matrix(image)
    image = flatten_matrix(image)
    train_data.append(image)
train_data = np.array(train_data)

# Creation of the whole test set
test_labels = []
test_image_dirs = ["C:/potholes_50/model_test/positive/", "C:/potholes_50/model_test/negative/"]
test_images = []
for directory in test_image_dirs:
    temp_list = [directory+ f for f in os.listdir(directory)]
    test_labels.extend(len(temp_list)*[classes[test_image_dirs.index(directory)]])
    test_images.extend(temp_list)

test_data = []
for image in test_images:
    image = image_to_matrix(image)
    image = flatten_matrix(image)
    test_data.append(image)
test_data = np.array(test_data)


clf = svm.SVC(kernel = 'linear',gamma =5, C=1)


clf.fit(train_data, train_labels)
#tree.fit(train_data, train_labels)
#kn.fit(train_data, train_labels)
#rf.fit(train_data, train_labels)

print("SVM average score: %f" % clf.score(test_data, test_labels))


predicted_label = clf.predict(test_data)

Accuracy_Score = accuracy_score(test_labels, predicted_label)
Precision_Score = precision_score(test_labels, predicted_label,  average="macro")
Recall_Score = recall_score(test_labels, predicted_label,  average="macro")
F1_Score = f1_score(test_labels, predicted_label,  average="macro")

print('Average Accuracy: %0.2f +/- (%0.1f) %%' % (Accuracy_Score.mean()*100, Accuracy_Score.std()*100))
print('Average Precision: %0.2f +/- (%0.1f) %%' % (Precision_Score.mean()*100, Precision_Score.std()*100))
print('Average Recall: %0.2f +/- (%0.1f) %%' % (Recall_Score.mean()*100, Recall_Score.std()*100))
print('Average F1-Score: %0.2f +/- (%0.1f) %%' % (F1_Score.mean()*100, F1_Score.std()*100))

print("")
CM = confusion_matrix(test_labels, predicted_label)

print(CM)

这是我到目前为止的结果

    SVM average score: 0.832000

    Average Accuracy: 83.20 +/- (0.0) %
    Average Precision: 86.78 +/- (0.0) %
    Average Recall: 83.20 +/- (0.0) %
    Average F1-Score: 82.78 +/- (0.0) %

    [[247   3]
     [ 81 169]]

现在我想从混淆矩阵中单独保存假阳性和假阴性,这样我就可以很容易地找出哪些图像被预测为假阳性和假阴性。

我现在正在堆叠,如果有人能帮忙解决这个问题,我将非常感激。 谢谢。

0 个答案:

没有答案