图像分类 Cnn 模型总是给出相同的结果

时间:2020-12-27 17:29:30

标签: python machine-learning keras deep-learning conv-neural-network

import numpy as np
import pickle
import cv2
import tensorflow.keras
import tensorflow.core
import tensorflow.python
from os import listdir
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.models import Sequential
from tensorflow.python.keras.layers.normalization import BatchNormalization
from tensorflow.python.keras.layers.convolutional import Conv2D
from tensorflow.python.keras.layers.convolutional import MaxPooling2D
from tensorflow.python.keras.layers.core import Activation, Flatten, Dropout, Dense
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

EPOCHS = 25
INIT_LR = 1e-3
BS = 32
default_image_size = tuple((256, 256))
image_size = 0
directory_root = 'C:/Users/Btl/Desktop/PlantVillage'
width=256
height=256
depth=3

def convert_image_to_array(image_dir):
    try:
        image = cv2.imread(image_dir)
        if image is not None :
            image = cv2.resize(image, default_image_size)
            return img_to_array(image)
        else :
            return np.array([])
    except Exception as e:
        print(f"Error : {e}")
        return None


image_list, label_list = [], []
try:
        print("[INFO] Loading images ...")
        root_dir = listdir(directory_root)
        for directory in root_dir:
                # remove .DS_Store from list
                if directory == ".DS_Store":
                        root_dir.remove(directory)

        for plant_folder in root_dir:
                plant_disease_folder_list = listdir(f"{directory_root}/{plant_folder}")

                for disease_folder in plant_disease_folder_list:
                        # remove .DS_Store from list
                        if disease_folder == ".DS_Store":
                                plant_disease_folder_list.remove(disease_folder)

                for plant_disease_folder in plant_disease_folder_list:
                        print(f"[INFO] Processing {plant_disease_folder} ...")
                        plant_disease_image_list = listdir(f"{directory_root}/{plant_folder}/{plant_disease_folder}/")

                        for single_plant_disease_image in plant_disease_image_list:
                                if single_plant_disease_image == ".DS_Store":
                                        plant_disease_image_list.remove(single_plant_disease_image)

                        for image in plant_disease_image_list[:200]:
                                image_directory = f"{directory_root}/{plant_folder}/{plant_disease_folder}/{image}"
                                if image_directory.endswith(".jpg") == True or image_directory.endswith(".JPG") == True:
                                        image_list.append(convert_image_to_array(image_directory))
                                        label_list.append(plant_disease_folder)
        print("[INFO] Image loading completed")
except Exception as e:
        print(f"Error : {e}")

image_size = len(image_list)

label_binarizer = LabelBinarizer()
image_labels = label_binarizer.fit_transform(label_list)
pickle.dump(label_binarizer,open('label_transform.pkl', 'wb'))
n_classes = len(label_binarizer.classes_)

print(label_binarizer.classes_)

np_image_list = np.array(image_list, dtype=np.float16) / 225.0

print("[INFO] Spliting data to train, test")
x_train, x_test, y_train, y_test = train_test_split(np_image_list, image_labels, test_size=0.2, random_state = 42)

aug = ImageDataGenerator(
    rotation_range=25, width_shift_range=0.1,
    height_shift_range=0.1, shear_range=0.2,
    zoom_range=0.2,horizontal_flip=True,
    fill_mode="nearest")

model = Sequential()
inputShape = (height, width, depth)
chanDim = -1
if K.image_data_format() == "channels_first":
    inputShape = (depth, height, width)
    chanDim = 1
model.add(Conv2D(32, (3, 3), padding="same",input_shape=inputShape))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(3, 3)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(Conv2D(128, (3, 3), padding="same"))
model.add(Activation("relu"))
model.add(BatchNormalization(axis=chanDim))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(1024))
model.add(Activation("relu"))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(n_classes))
model.add(Activation("softmax"))

model.summary()

opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
# distribution
model.compile(loss="binary_crossentropy", optimizer=opt,metrics=["accuracy"])
# train the network
print("[INFO] training network...")

history = model.fit_generator(
    aug.flow(x_train, y_train, batch_size=BS),
    validation_data=(x_test, y_test),
    steps_per_epoch=len(x_train) // BS,
    epochs=EPOCHS, verbose=1
    )

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
#Train and validation accuracy
plt.plot(epochs, acc, 'b', label='Training accurarcy')
plt.plot(epochs, val_acc, 'r', label='Validation accurarcy')
plt.title('Training and Validation accurarcy')
plt.legend()

plt.figure()
#Train and validation loss
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation loss')
plt.legend()
plt.show()

print("[INFO] Calculating model accuracy")
scores = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {scores[1]*100}")

filename = 'label_transform.pkl'
image_labels = pickle.load(open(filename, 'rb'))

def predict_disease(image_path):
    image_array = convert_image_to_array(image_path)
    np_image = np.array(image_array, dtype=np.float16) / 225.0
    np_image = np.expand_dims(np_image,0)
    plt.imshow(plt.imread(image_path))
    result = np.argmax(model.predict_classes(np_image))
    print((image_labels.classes_[result][0]))

predict_disease('C:/Users/Betul/Desktop/PlantVillage/Pepper__bell___/Pepper__bell___healthy/0a3f2927-4410-46a3-bfda-5f4769a5aaf8___JR_HL 8275.JPG')

问题是无论我做什么,输出结果都是相同的值,相同的类。我是初学者,所以我不知道这个模型有什么问题。请帮帮我。

我尝试更改最后一层、损失函数(二进制、分类、稀疏)、纪元计数但没有。

它对 1 个 epoch 或 50 个 epoch 给出相同的分类。相同的输出。

我使用这个数据集并像图片中一样改变了它们。

Dataset directories sample

Dataset

1 个答案:

答案 0 :(得分:0)

我这是因为你的模型是一个有偏差的模型,即它倾向于预测一个类,即它大部分时间或所有时间预测同一个类。这是由于数据不平衡而发生的。例如,假设您正在构建一个模型,该模型对图像是狗还是猫进行分类,并说数据集的 80% 包含狗的图像,那么在您训练模型后,您的模型将始终预测图像是狗。

因此,您可以检查您的数据集是否平衡,或者您可以使用成本敏感学习,以防万一您无法平衡您的数据集。虽然平衡肯定会更好。