我正在使用keras(Tensorflow后端)VGG16模型和food-101图像数据集进行食品分类项目。但是,我遇到了一些与验证准确性有关的问题。 (我认为问题在于过度拟合)。 我的验证准确性没有增加,总是坚持到48-51%左右 我有40个班级(40种不同的食物),700个火车图像和300个图像,用于验证每种食物。我用一堆随机食物图像评估了我的模型。我试过了:
虽然它对我有所帮助,但它没有大幅提高验证准确性。我听说有人使用preprocess_input()函数来提高验证的准确性,但我不确定。
这是我的代码:
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dropout, Flatten, Dense
from keras import applications
from keras.utils import to_categorical
from keras import optimizers
# Dimensions of images
img_width, img_height = 150, 150
top_model_weights_path = 'test2_classes.h5'
train_data_dir = 'D:\intallation\dataset\dataset-101/food/train'
validation_data_dir = 'D:\intallation\dataset\dataset-101/food/validation'
nb_train_samples = 28000
nb_validation_samples = 12000
epochs = 80
batch_size = 32
def save_bottlebeck_features():
datagen = ImageDataGenerator(rescale=1. / 255)
# build the VGG16 network
model = applications.VGG16(include_top=False, weights='imagenet')
generator = datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=None,
shuffle=False)
bottleneck_features_train = model.predict_generator(
generator, nb_train_samples // batch_size)
np.save('test_trained.npy', bottleneck_features_train)
generator = datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=None,
shuffle=False)
bottleneck_features_validation = model.predict_generator(
generator, nb_validation_samples // batch_size)
np.save('test_validation.npy', bottleneck_features_validation)
def train_top_model():
# Class Labels for Training Data
datagen_top = ImageDataGenerator(rescale=1./255,
width_shift_range=0.05,
height_shift_range=0.05,
shear_range=0.05,
zoom_range=0.05,
fill_mode='nearest',
channel_shift_range=0.2*255)
datagen_top_val = ImageDataGenerator(rescale=1./255)
generator_top = datagen_top_val.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical',
shuffle=False)
np.save('test_class_indices.npy', generator_top.class_indices)
num_classes = len(generator_top.class_indices)
train_data = np.load('test_trained.npy')
train_labels = generator_top.classes # Get Class Labels
train_labels = to_categorical(train_labels, num_classes=num_classes)
# Class Labels for Validation Data
generator_top = datagen_top.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode=None,
shuffle=False)
validation_data = np.load('test_validation.npy')
validation_labels = generator_top.classes
validation_labels = to_categorical(validation_labels, num_classes=num_classes)
model = Sequential()
model.add(Flatten(input_shape=train_data.shape[1:]))
model.add(Dense(4096, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
sgd = optimizers.SGD(lr=1e-4, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd,
loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(train_data, train_labels,
epochs=epochs,
batch_size=batch_size,
validation_data=(validation_data, validation_labels))
model.save_weights(top_model_weights_path)
save_bottlebeck_features()
train_top_model()