Question

我正在Keras（Tensorflow后端）训练CNN模型。我用fit_generator()进行了飞行增强。该模型将图像作为输入，并且应该预测自驾车的转向角。在此之后训练就冻结了。我曾尝试更改批量大小，学习率等，但它不起作用。

训练在第一纪元结束时冻结。

请帮忙！

[BATCH_SIZE=32
INPUT_IMAGE_ROWS=160
INPUT_IMAGE_COLS=320
INPUT_IMAGE_CHANNELS=3
AUGMENTATION_NUM_BINS=200
NUM_EPOCHS=3
AUGMENTATION_BIN_MAX_PERC=0.5
AUGMENTATION_FACTOR=3

import csv
import cv2
import numpy as np
from random import shuffle
from sklearn.model_selection import train_test_split
import keras
from keras.callbacks import Callback
import math
from keras.preprocessing.image import *

print("\nLoading the dataset from file ...")

def load_dataset(file_path):
    dataset = \[\]
    with open(file_path) as csvfile:
        reader = csv.reader(csvfile)
        for line in reader:
            try:
                dataset.append({'center':line\[0\], 'left':line\[1\], 'right':line\[2\], 'steering':float(line\[3\]),
                            'throttle':float(line\[4\]), 'brake':float(line\[5\]), 'speed':float(line\[6\])})
            except:
                continue # some images throw error during loading
    return dataset

dataset = load_dataset('C:\\Users\\kiit1\\Documents\\steering angle prediction\\dataset_coldivision\\data\\driving_log.csv')
print("Loaded {} samples from file {}".format(len(dataset),'C:\\Users\\kiit1\\Documents\\steering angle prediction\\dataset_coldivision\\data\\driving_log.csv'))



print("Partioning the dataset:")

shuffle(dataset)

#partitioning data into 80% training, 19% validation and 1% testing

X_train,X_validation=train_test_split(dataset,test_size=0.2)
X_validation,X_test=train_test_split(X_validation,test_size=0.05)

print("X_train has {} elements.".format(len(X_train)))
print("X_validation has {} elements.".format(len(X_validation)))
print("X_test has {} elements.".format(len(X_test)))
print("Partitioning the dataset complete.")


def generate_batch_data(dataset, batch_size = 32):

    global augmented_steering_angles
    global epoch_steering_count
    global epoch_bin_hits
    batch_images = np.zeros((batch_size, INPUT_IMAGE_ROWS, INPUT_IMAGE_COLS, INPUT_IMAGE_CHANNELS))
    batch_steering_angles = np.zeros(batch_size)

    while 1:
        for batch_index in range(batch_size):

            # select a random image from the dataset
            image_index = np.random.randint(len(dataset))
            image_data = dataset\[image_index\]

            while 1:
                try:
                    image, steering_angle = load_and_augment_image(image_data)

                except:
                    continue


                bin_idx = int (steering_angle * AUGMENTATION_NUM_BINS / 2)

                if( epoch_bin_hits\[bin_idx\] < epoch_steering_count/AUGMENTATION_NUM_BINS*AUGMENTATION_BIN_MAX_PERC
                    or epoch_steering_count<500 ):
                    batch_images\[batch_index\] = image
                    batch_steering_angles\[batch_index\] = steering_angle
                    augmented_steering_angles.append(steering_angle)

                    epoch_bin_hits\[bin_idx\] = epoch_bin_hits\[bin_idx\] + 1
                    epoch_steering_count = epoch_steering_count + 1
                    break

        yield batch_images, batch_steering_angles

print("\nTraining the model ...")

class LifecycleCallback(keras.callbacks.Callback):

    def on_epoch_begin(self, epoch, logs={}):
        pass

    def on_epoch_end(self, epoch, logs={}):
        global epoch_steering_count
        global epoch_bin_hits
        global bin_range
        epoch_steering_count = 0
        epoch_bin_hits = {k:0 for k in range(-bin_range, bin_range)}

    def on_batch_begin(self, batch, logs={}):
        pass

    def on_batch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))

    def on_train_begin(self, logs={}):
        print('Beginning training')
        self.losses = \[\]

    def on_train_end(self, logs={}):
        print('Ending training')

# Compute the correct number of samples per epoch based on batch size
def compute_samples_per_epoch(array_size, batch_size):
    num_batches = array_size / batch_size
    samples_per_epoch = math.ceil(num_batches)
    samples_per_epoch = samples_per_epoch * batch_size
    return samples_per_epoch


def load_and_augment_image(image_data, side_camera_offset=0.2):

    # select a value between 0 and 2 to swith between center, left and right image
    index = np.random.randint(3)
    if (index==0):
        image_file = image_data\['left'\].strip()
        angle_offset = side_camera_offset
    elif (index==1):
        image_file = image_data\['center'\].strip()
        angle_offset = 0.
    elif (index==2):
        image_file = image_data\['right'\].strip()
        angle_offset = - side_camera_offset

    steering_angle = image_data\['steering'\] + angle_offset

    image = cv2.imread(image_file)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # apply a misture of several augumentation methods
    image, steering_angle = random_transform(image, steering_angle)

    return image, steering_angle


augmented_steering_angles = \[\]

epoch_steering_count = 0
bin_range = int(AUGMENTATION_NUM_BINS / 4 * 3)
epoch_bin_hits = {k:0 for k in range(-bin_range, bin_range)}



#flips image about y-axis
def horizontal_flip(image,steering_angle):

    flipped_image=cv2.flip(image,1);
    steering_angle=-steering_angle
    return flipped_image,steering_angle

def translate(image,steering_angle,width_shift_range=50.0,height_shift_range=5.0):

    tx = width_shift_range * np.random.uniform() - width_shift_range / 2
    ty = height_shift_range * np.random.uniform() - height_shift_range / 2

     # new steering angle
    steering_angle += tx / width_shift_range * 2 * 0.2

    transformed_matrix=np.float32(\[\[1,0,tx\],\[0,1,ty\]\])
    rows,cols=(image.shape\[0\],image.shape\[1\])

    translated_image=cv2.warpAffine(image,transformed_matrix,(cols,rows))
    return translated_image,steering_angle

def brightness(image,bright_increase=None):

    if(image.shape\[2\]>1):
        image_hsv=cv2.cvtColor(image,cv2.COLOR_RGB2HSV)
    else:
        image_hsv=image

    if bright_increase:
        image_hsv\[:,:,2\] += bright_increase
    else:
        bright_increase = int(30 * np.random.uniform(-0.3,1))
        image_hsv\[:,:,2\] = image\[:,:,2\] + bright_increase

    image = cv2.cvtColor(image_hsv, cv2.COLOR_HSV2RGB)
    return image

def rotation(image,rotation_range=5):

    image=random_rotation(image,rotation_range);
    return image


# Shift range for each channels
def channel_shift(image, intensity=30, channel_axis=2):

    image = random_channel_shift(image, intensity, channel_axis)
    return image

# Crop and resize the image
def crop_resize_image(image, cols=INPUT_IMAGE_COLS, rows=INPUT_IMAGE_ROWS, top_crop_perc=0.1, bottom_crop_perc=0.2):

    height = image.shape\[0\]
    width= image.shape\[1\]

    # crop top and bottom
    top_rows = int(height*top_crop_perc)
    bottom_rows = int(height*bottom_crop_perc)
    image = image\[top_rows:height-bottom_rows, 0:width\]

    # resize to the final sizes even the aspect ratio is destroyed
    image = cv2.resize(image, (cols, rows), interpolation=cv2.INTER_LINEAR)
    return image


# Apply a sequence of random tranformations for a better generalization and to prevent overfitting
def random_transform(image, steering_angle):

    # all further transformations are done on the smaller image to reduce the processing time
    image = crop_resize_image(image)

    # every second image is flipped horizontally
    if np.random.random() < 0.5:
        image, steering_angle = horizontal_flip(image, steering_angle)

    image, steering_angle = translate(image, steering_angle)
    image = rotation(image)
    image = brightness(image)
    image = channel_shift(image)

    return img_to_array(image), steering_angle



from keras.models import Sequential, Model
from keras.layers.core import Lambda, Dense, Activation, Flatten, Dropout
from keras.layers.convolutional import Cropping2D, Convolution2D
from keras.layers.advanced_activations import ELU
from keras.layers.noise import GaussianNoise
from keras.optimizers import Adam


print("\nBuilding and compiling the model ...")

model = Sequential()
model.add(Lambda(lambda x: (x / 127.5) - 1.0, input_shape=(INPUT_IMAGE_ROWS, INPUT_IMAGE_COLS, INPUT_IMAGE_CHANNELS)))
    # Conv Layer1 of 16 filters having size(8, 8) with strides (4,4)
model.add(Convolution2D(16, 8, 8, subsample=(4, 4), border_mode="same"))
model.add(ELU())
    # Conv Layer1 of 32 filters having size(5, 5) with strides (2,2)
model.add(Convolution2D(32, 5, 5, subsample=(2, 2), border_mode="same"))
model.add(ELU())
    # Conv Layer1 of 64 filters having size(5, 5) with strides (2,2)
model.add(Convolution2D(64, 5, 5, subsample=(2, 2), border_mode="same"))
model.add(Flatten())
model.add(Dropout(.5))
model.add(ELU())
model.add(Dense(512))
model.add(Dropout(.5))
model.add(ELU())
model.add(Dense(1))

model.summary()
adam = Adam(lr=0.0001)
model.compile(loss='mse', optimizer=adam)



lifecycle_callback = LifecycleCallback()

train_generator = generate_batch_data(X_train, BATCH_SIZE)
validation_generator = generate_batch_data(X_validation, BATCH_SIZE)

samples_per_epoch = compute_samples_per_epoch((len(X_train)*AUGMENTATION_FACTOR), BATCH_SIZE)
nb_val_samples = compute_samples_per_epoch((len(X_validation)*AUGMENTATION_FACTOR), BATCH_SIZE)

history = model.fit_generator(train_generator,
                              validation_data = validation_generator,
                              samples_per_epoch = ((len(X_train) // BATCH_SIZE ) * BATCH_SIZE) * 2,
                              nb_val_samples = ((len(X_validation) // BATCH_SIZE ) * BATCH_SIZE) * 2,
                              nb_epoch = NUM_EPOCHS, verbose=1,
                              )

print("\nTraining the model ended.")][1]

Answer 1

你有一个奇怪的数据生成器结构，这很可能导致这个问题，虽然我不能完全确定。

您的结构如下：

while 1:
    ....
    for _ in range(batch_size):
        randomly select an image  # this is inefficient, see below for comments
        while 1:
            process image
            if epoch is not done:
                collect images in a list
                break
    yield ...

现在，

不要在每次迭代时随机选择图像。而是在每个纪元的开始处对数据集进行一次洗牌，然后按顺序选择。
据我所知，if epoch is not done，然后break是一个错字。您的意思是if epoch is not done然后collect images，否则是break？您的break位于if内，这意味着当它第一次进入if时，它将从最里面的while 1循环中出来。肯定不是你打算做什么，对吗？
yield位于for循环之外。您应该yield每个批次，因此如果for正在批量处理，那么yield应该在for内。

基本数据生成器的结构应为：

while 1:
    shuffle entire dataset once  # not applicable for massive datasets
    for _ in range(n_batches_per_epoch):
        get a data batch

        Optionally, do some preprocessing  # preferably on the entire batch, 
        not one by one, you could also preprocess the entire dataset if its simple
        enough, such as mean subtraction.

        yield batches, labels

我建议你再次编写数据生成器。您可以在this页面上看到基本数据生成器的myGenerator()函数。编写生成器后，将其作为独立函数进行测试，以确保它无限期地输出数据并保持跟踪时期。

Answer 2

简而言之，很难说哪个部分有问题，可能是数据，可能是模型，还是其他什么。请耐心等待，最终解决问题。

首先，您可以在没有数据扩充的情况下训练baseLine模型。如果您的数据扩充很有帮助，那么在将数据扩充应用于新的augmLine模型后，您应该可以获得性能提升。

如果baseLine的行为与augmLine类似，您可以考虑更改网络设计。例如，在您当前的设计中，1）没有任何激活的Conv2D图层非常罕见，您可能希望使用relu或tanh，并且2）ELU(alpha)已知是敏感的到alpha值。

如果baseLine实际上工作正常，则表明您的augmLine数据存在问题。为确保增强数据的正确性，您可以更好地绘制图像数据和目标值，并手动验证它们。图像数据增强的一个常见错误是，如果目标值取决于输入图像，则必须根据增强图像生成新的目标值。有时这项任务并非无足轻重。

注意，为了进行公平的比较，您需要保持两个实验的验证数据不变。

在Keras的CNN训练冻结

2 个答案: