尝试使用损失函数作为categorical_crossentropy构建CXR分类器

时间:2019-12-20 13:10:42

标签: tensorflow machine-learning cross-entropy

作为一个有趣的自我项目,我一直试图在CheXNet数据库上构建分类器。我将图像加载为“ diagnosis-x.png ”,其中诊断是类别,x是计数,即“ Pneumonia-5.png ”。

但是,虽然我可以使用100张左右图像的测试文件夹进行编译,但是当我尝试使用2000张以上图像的较大文件夹时,却遇到了此错误:

  

ValueError:检查目标时出错:预期density_2具有形状(14,)但具有形状(1,)的数组

我有14个班级,损失函数为categorical_crossentropy。将不胜感激关于如何克服这个问题的建议!

import csv
from PIL import Image, ImageFile # used for loading images
import numpy as np
import os # used for navigating to image path
import imageio # used for writing images
from random import shuffle
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers. normalization import BatchNormalization
import numpy as np

ImageFile.LOAD_TRUNCATED_IMAGES = True

os.chdir('D:\\Python\data')

def label_img(name):
  word_label = name.split('-')[0]
  if word_label == 'Atelectasis' : return np.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,])
  elif word_label == 'Cardiomegaly' : return np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,])
  elif word_label == 'Effusion' : return np.array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,])
  elif word_label == 'Infiltration' : return np.array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,])
  elif word_label == 'Mass' : return np.array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,])
  elif word_label == 'Nodule' : return np.array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,])
  elif word_label == 'Pneumonia' : return np.array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,])
  elif word_label == 'Pneumothorax' : return np.array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,])
  elif word_label == 'Consolidation' : return np.array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,])
  elif word_label == 'Edema' : return np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,])
  elif word_label == 'Emphysema' : return np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,])
  elif word_label == 'Fibrosis' : return np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,])
  elif word_label == 'Pleural_Thickening' : return np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,])
  elif word_label == 'Hernia' : return np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,])
  else:
      pass

IMG_SIZE = 300

def load_training_data():
    DIR = 'labeled_train'
    train_data = []
    for img in os.listdir(DIR):
        label = label_img(img)
        path = os.path.join(DIR, img)
        img = Image.open(path)
        img = img.convert('L')
        img = img.resize((IMG_SIZE, IMG_SIZE), Image.ANTIALIAS)
        train_data.append([np.array(img), label])

    shuffle(train_data)
    return train_data


#loading data
train_data = load_training_data()
plt.imshow(train_data[1][0], cmap = 'gist_gray')
#plt.show()

trainImages = np.array([i[0] for i in train_data]).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
trainLabels = np.array([i[1] for i in train_data])


#model
model = Sequential()
model.add(Conv2D(32, kernel_size = (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 1)))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(96, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(32, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(BatchNormalization())
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
#model.add(Dropout(0.3))
model.add(Dense(14, activation = 'softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics = ['accuracy'])
model.fit(trainImages, trainLabels, batch_size = 50, epochs = 5, verbose = 1)



#test data
TEST_DIR = 'test_dir'
def load_test_data():
    test_data = []
    for img in os.listdir(TEST_DIR):
        label = label_img(img)
        path = os.path.join(TEST_DIR, img)
        if "DS_Store" not in path:
            img = Image.open(path)
            img = img.convert('L')
            img = img.resize((IMG_SIZE, IMG_SIZE), Image.ANTIALIAS)
            test_data.append([np.array(img), label])
    shuffle(test_data)
    return test_data


test_data = load_test_data()    
#plt.imshow(test_data[10][0], cmap = 'gist_gray')
testImages = np.array([i[0] for i in test_data]).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
testLabels = np.array([i[1] for i in test_data])

loss, acc = model.evaluate(testImages, testLabels, verbose = 0)
print(acc * 100)

0 个答案:

没有答案