我首先训练了一个小型网络,如下所示:
model = models.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', input_shape=(256, 256, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(Dropout(0.2))
model.add(layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(Dropout(0.2))
model.add(layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(Dropout(0.2))
model.add(layers.Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(Dropout(0.2))
model.add(layers.Flatten())
model.add(layers.Dropout(0.5)) #Dropout for regularization
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid')) #Sigmoid function at the end because we have just two classes
准确率达到70-75%,没有过度拟合的问题。
然后,我决定在Imagenet上训练具有预训练权重的VGG16,在其中冻结所有转换层(但一个冻结),然后添加两个fc层,然后对其进行训练。
我尝试使用Adam优化器优化模型(即使设置了很小的学习率,性能也会更差吗?)我使用ReduceLROnPlateau和EarlyStopping,并且我的网络的准确度约为80-82%,但是过拟合度很大(val损失正在增加,并且训练和验证准确性之间存在巨大差距)
网络如下:
epochs = 50
callbacks = []
decay = 0.0
earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
mcp_save = ModelCheckpoint('.mdl_wts.hdf5', save_best_only=True, monitor='val_loss', mode='min')
reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, epsilon=1e-5, mode='min')
# build the VGG16 network
base_model = applications.VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
print('Model loaded.')
for layer in base_model.layers[:15]:
layer.trainable = False
#for layer in base_model.layers:
# layer.trainable = False
# add new classifier layers
flat1 = Flatten()(base_model.layers[-1].output)
class1 = Dense(128, kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01), activation='relu', kernel_initializer='he_uniform')(flat1)
output = Dense(1, kernel_regularizer=l2(0.01), bias_regularizer=l2(0.01), activation='sigmoid')(class1)
# define new model
base_model = Model(inputs=base_model.inputs, outputs=output)
base_model.compile(loss='binary_crossentropy',
optimizer=optimizers.Adam(lr=1e-4, decay=decay),
metrics=['accuracy'])
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
train_dir = '/home/d/Desktop/s/data/train'
eval_dir = '/home/d/Desktop/s/data/eval'
test_dir = '/home/d/Desktop/s/data/test'
# create a data generator
train_datagen = ImageDataGenerator(rescale=1./255, #Scale the image between 0 and 1
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,)
val_datagen = ImageDataGenerator(rescale=1./255) #We do not augment validation data. we only perform rescale
test_datagen = ImageDataGenerator(rescale=1./255) #We do not augment validation data. we only perform rescale
# load and iterate training dataset
train_generator = train_datagen.flow_from_directory(train_dir, target_size=(224,224),class_mode='binary', batch_size=16, shuffle='True', seed=42)
# load and iterate validation dataset
val_generator = val_datagen.flow_from_directory(eval_dir, target_size=(224,224),class_mode='binary', batch_size=16, shuffle='True', seed=42)
# load and iterate test dataset
test_generator = test_datagen.flow_from_directory(test_dir, target_size=(224,224), class_mode=None, batch_size=1, shuffle='False', seed=42)
#The training part
#We train for 64 epochs with about 100 steps per epoch
history = base_model.fit_generator(train_generator,
steps_per_epoch=train_generator.n // train_generator.batch_size,
epochs=epochs,
validation_data=val_generator,
validation_steps=val_generator.n // val_generator.batch_size,
callbacks=[earlyStopping, mcp_save, reduce_lr_loss])
我的数据集很小,与Imagenet有很大的不同,我做了很多图像增强。
所以我唯一能想到的是VGG16网络对于我的数据集来说太复杂了。我想使用较小的网络,但将预训练的权重转移到初始层,该怎么做?也欢迎其他任何建议。