我正在尝试训练CNN以识别汽车。 我正在使用this数据集,而我的CNN使用传递学习(具有ImageNet权重的VGG19)。 我正在使用AWS的SageMaker ml.m4.2xlarge实例对其进行训练,并且在经过10个时间段(即10小时)后,准确性没有提高一点(与默认准确性相同,大约为0.0050)。
这又带我到下一个问题:这是什么问题? 这是我第一次尝试CNN,所以我想我一定做错了什么,但我看不到它是什么...
无论如何,这是我的代码(它来自Jupyter笔记本,因此每行'########'代表一个新单元格。
########
# TensorFlow and tf.keras
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import optimizers
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils
from keras.applications import VGG19
# Helper libraries
import numpy as np
import os
from PIL import Image
import scipy.io as sio
########
# Download data
!wget -N http://imagenet.stanford.edu/internal/car196/cars_train.tgz
########
# Extract files
!tar -xvzf cars_train.tgz
!unzip -o cars_train.zip
########
# Download devkit
!wget -N https://ai.stanford.edu/~jkrause/cars/car_devkit.tgz
########
# Extract files
!tar -xvzf car_devkit.tgz
########
# load data
train_labels = sio.loadmat('devkit/cars_train_annos.mat')['annotations']['class'][0]
class_names = sio.loadmat('devkit/cars_meta.mat')['class_names'][0]
########
# We must substract 1 to make the value an index in the range [0, 196)
for i, label in enumerate(train_labels):
train_labels[i] = train_labels[i] - 1
train_labels = keras.utils.to_categorical(train_labels, len(class_names))
########
# Preprocess images
train_images = np.zeros((8144,227,227,3))
bbox = sio.loadmat('devkit/cars_train_annos.mat')['annotations'] # bounding boxes
xmin = bbox['bbox_x1'][0]
xmax = bbox['bbox_x2'][0]
ymin = bbox['bbox_y1'][0]
ymax = bbox['bbox_y2'][0]
for i, fname in enumerate(os.listdir('cars_train')):
print(i)
image = Image.open('cars_train/' + fname)
image = image.crop((xmin[i][0][0], ymin[i][0][0], xmax[i][0][0], ymax[i][0][0])) # cropping using bboxes
image = image.resize((227, 227), Image.ANTIALIAS) # we want 227*227 images
if np.array(image).ndim == 2: # a few images are grayscale, must be converted to 3-channel (fake RGB)
train_images[i] = np.asarray(np.dstack((image, image, image)) / 255.0).reshape(-1,227,227,3)
else:
train_images[i] = (np.array(image) / 255.0).reshape(-1,227,227,3) # we want values between 0 and 1
image.close() # saves memory
########
# Build model using pre-trained VGG19 to save time (transfer learning)
base_model = VGG19(weights='imagenet',
include_top=False,
input_shape=(227, 227, 3))
x = base_model.output
x = Flatten()(x)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.25)(x)
x = Dense(1024, activation="relu")(x)
predictions = Dense(len(class_names), activation="softmax")(x)
head_model = Model(inputs=base_model.input, output=predictions)
########
# Compile model
head_model.compile(optimizer=optimizers.SGD(lr=0.0001, momentum=0.9),
loss='categorical_crossentropy',
metrics=['categorical_accuracy'])
########
# Train model using checkpoints (in case there's a crash)
checkpoint = ModelCheckpoint('weights.h5', monitor='loss', verbose=1, save_best_only=True, save_weights_only=True, period=1)
head_model.fit(np.array(train_images), train_labels, epochs=10, callbacks=[checkpoint])
########
感谢您的帮助!