Question

我正在从事的项目涉及培训深度学习网络，以识别可变长度（5到8个字符）的摩洛哥车牌 ，到目前为止，我能够检测它们并使用Tensorflow Object Detection API裁剪它们，现在我想识别边界框中的文本。所以我选择了Keras Functional API。

解决了一些问题后，我可以使用Keras开始训练过程，但是我损失了0.0556。

这是main.py（original）的代码：

import os
import codecs
import cv2
import numpy as np
from keras import backend as K
from keras.layers import Input, Dense, Activation, Conv2D, Reshape
from keras.layers import BatchNormalization, Lambda, MaxPooling2D, Dropout
from keras.layers.merge import add, concatenate
from keras.callbacks import EarlyStopping,Callback
from keras.layers.recurrent import GRU
from keras.models import Model
from keras import optimizers 
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.models import load_model
from keras.utils.vis_utils import plot_model
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session

CHARS = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
         'A', 'B', 'J', 'D', 'H', 'O', 'W'
        ]
CHARS_DICT = {char:i for i, char in enumerate(CHARS)}
NUM_CHARS = len(CHARS)
# The GPU used 1060, and is not selected will automatically call the set display
# os.environ['CUDA_VISIBLE_DEVICES'] = '1'
#Dynamic application memory
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
set_session(tf.Session(config=config))


#The necessary parameters
num_channels = 3
ti = '../car_pic/image/train/' #Training picture directory
tl = '../car_pic/image/train_labels.txt' #Training label file
vi = '../car_pic/image/val/'  #Verify image directory
vl = '../car_pic/image/val_labels.txt' #Verify that the label file
img_size = [230,50] #Training picture width and height
label_len = 8 #The length of the label
dir_log = './logs/'
c = '../car_pic/image/' #checkpoints format string
num_epochs = 200 #number of epochs
start_of_epoch = 0

#Network parameters
conv_filters = 16
kernel_size = (3, 3)
pool_size = 2
time_dense_size = 32
rnn_size = 512
batch_size = 16

def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    #Why is from 2 to start?
    y_pred = y_pred[:, 2:, :]  
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

############The model structure############
input_tensor = Input(name='the_input', shape=(img_size[0], img_size[1], num_channels), dtype='float32')
x = input_tensor
base_conv = 32
#Convolutional layer 1
x = Conv2D(base_conv * 1, (3,3), padding="same",name='conv1')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
#Convolution layer 2
x = Conv2D(base_conv * 2, (3,3), padding="same",name='conv2')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
#Convolution layer 3
x = Conv2D(base_conv * 4, (3,3), padding="same",name='conv3')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
# Parameter view
# conv_shape = x.get_shape().as_list()
# rnn_length = conv_shape[1]
# rnn_dimen = conv_shape[2]*conv_shape[3]
# print(conv_shape, rnn_length, rnn_dimen)
#Dimension conversion
conv_to_rnn_dims = (img_size[0]//(2**3),(img_size[1]//(2**3))*128)
x = Reshape(target_shape=conv_to_rnn_dims,name='reshape')(x)
x =Dense(time_dense_size,activation='relu',name='dense1')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)

# x = Dropout(0.2)(x)
#Two layersbidirecitonal GRUs
gru_1 = GRU(rnn_size,return_sequences=True,kernel_initializer='he_normal',name='gru_1')(x)
gru_1b = GRU(rnn_size,return_sequences=True,go_backwards=True,kernel_initializer='he_normal',name='gru_1b')(x)
gru1_merged = add([gru_1,gru_1b])
gru_2 = GRU(rnn_size,return_sequences=True,kernel_initializer='he_normal',name='gru_2')(gru1_merged)
gru_2b = GRU(rnn_size,return_sequences=True,go_backwards=True,kernel_initializer='he_normal',name='gru_2b')(gru1_merged)

# transforms RNN output to character activations:  
x = Dense(NUM_CHARS+1,kernel_initializer='he_normal',name='dense2')(concatenate([gru_2,gru_2b]))
x = Activation('softmax',name='softmax')(x)

#Print Out model profile
base_model = Model(inputs=input_tensor, outputs=x)
base_model.summary()
#ComputingctcThe necessary parameters
pred_length = int(x.shape[1])  #Why would minus the 2 before you can run it???
labels = Input(name='the_labels', shape=[label_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int32')
label_length = Input(name='label_length', shape=[1], dtype='int32')

loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length])

model = Model(inputs=[input_tensor, labels, input_length, label_length], outputs=[loss_out])

plot_model(model,to_file=" gru_model.png",show_shapes=True) #show_shapes 带参数显示

# adam = optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08) 

model.compile(loss={'ctc': lambda y_true, y_pred: x}, optimizer='adam')

#The license plate corresponding to thelables
def encode_label(s):
    label = np.zeros([len(s)])
    for i, c in enumerate(s):
        label[i] = CHARS_DICT[c]
    return label

# def encode_label(text):
#     return list(map(lambda x: CHARS.index(x), text))

def labels_to_text(labels):
    return ''.join(list(map(lambda x: CHARS[int(x)], labels)))

def parse_line(line):
    parts = line.split('.')
    filename = parts[0]
    label = encode_label(parts[0].strip().upper())
    return filename, label

class TextImageGenerator:
    def __init__(self, img_dir, label_file, batch_size, img_size, input_length, num_channels=3, label_len=8):
        self._img_dir = img_dir
        self._label_file = label_file
        self._batch_size = batch_size
        self._num_channels = num_channels
        self._label_len = label_len
        self._input_len = input_length
        self._img_w, self._img_h = img_size
        self._num_examples = 0
        self._next_index = 0
        self._num_epoches = 0
        self.filenames = []
        self.labels = None
        self.init()

    def init(self):
        #self.labels = []
        with open(self._label_file) as f:
            for c, l in enumerate(f):
                pass
        self.labels = np.ones([c+1, self._label_len]) * -1
        with open(self._label_file) as f:
            for i, line in enumerate(f):
                filename, label = parse_line(line)
                self.filenames.append(filename+".jpg")
                self.labels[i,0:len(label)] = label
                #self.labels.append(label)
                self._num_examples += 1

        #self.labels = np.array([v + [26.] * (self._label_len - len(v)) for v in self.labels])
        self.labels = np.float32(self.labels)
        # self.labels = [[np.float32(v) for v in lll] for lll in self.labels]

    def next_batch(self):
        # Shuffle the data
        if self._next_index == 0:
            perm = np.arange(self._num_examples)
            np.random.shuffle(perm)
            self._filenames = [self.filenames[i] for i in perm]
            self._labels = self.labels[perm]

        batch_size = self._batch_size
        start = self._next_index
        end = self._next_index + batch_size
        if end >= self._num_examples:
            self._next_index = 0
            self._num_epoches += 1
            end = self._num_examples
            batch_size = self._num_examples - start
        else:
            self._next_index = end
        images = np.zeros([batch_size, self._img_h, self._img_w, self._num_channels])
        # labels = np.zeros([batch_size, self._label_len])
        for j, i in enumerate(range(start, end)):
            fname = self._filenames[i]
            img = cv2.imread(os.path.join(self._img_dir, fname))
            images[j, ...] = img
        images = np.transpose(images, axes=[0, 2, 1, 3])
        labels = self._labels[start:end]
        # print("HHHHHHHHHHHHHHHHHHHHHHH")
        # print("++++",start, "+++",end)
        # print(self._labels)
        # print("HHHHHHHHHHHHHHHHHHHHHHH")
        # print(labels)
        # print("HHHHHHHHHHHHHHHHHHHHHHH")
        input_length = np.zeros([batch_size, 1])
        label_length = np.zeros([batch_size, 1])
        input_length[:] = self._input_len
        label_length[:] = self._label_len
        outputs = {'ctc': np.zeros([batch_size])}
        inputs = {'the_input': images,
                  'the_labels': labels,
                  'input_length': input_length,
                  'label_length': label_length,
                  }
        return inputs, outputs

    def get_data(self):
        while True:
            yield self.next_batch()



#Generated data
train_gen = TextImageGenerator(img_dir=ti,
                                 label_file=tl,
                                 batch_size=batch_size,
                                 img_size=img_size,
                                 input_length=pred_length,
                                 num_channels=num_channels,
                                 label_len=label_len)

val_gen = TextImageGenerator(img_dir=vi,
                                 label_file=vl,
                                 batch_size=batch_size,
                                 img_size=img_size,
                                 input_length=pred_length,
                                 num_channels=num_channels,
                                 label_len=label_len)

# # Model evaluation
def evaluate(steps=10):
    batch_acc = 0
    generator = train_gen
    for i in range(steps):
        x_test, y_test = next(generator)
        y_pred = base_model.predict(x_test)
        shape = y_pred[:,2:,:].shape
        ctc_decode = K.ctc_decode(y_pred[:,2:,:], input_length=np.ones(shape[0])*shape[1])[0][0]
        out = K.get_value(ctc_decode)[:, :label_len]
        if out.shape[1] == label_len:
            batch_acc += (y_test == out).all(axis=1).mean()
    return batch_acc / steps

class Evaluator(Callback):
    def __init__(self):
        self.accs = []

    def on_epoch_end(self, epoch, logs=None):
        acc = evaluate(steps=20)*100
        self.accs.append(acc)
        print('')
        print('acc: %f%%' % acc)

evaluator = Evaluator()        
# #The callback function will in eachepochAfter saving the model to the path
# checkpoints_cb = ModelCheckpoint(c, period=1)
# cbs = [checkpoints_cb]

# #tensorboard 
# if dir_log != '':
# tfboard_cb = TensorBoard(log_dir=dir_log, write_images=True)
# cbs.append(tfboard_cb)
import matplotlib.pyplot as plt

checkpoints_cb = ModelCheckpoint("../checkpoints/'weights.{epoch:02d}-{val_loss:.2f}.h5'", period=1)
cbs = [checkpoints_cb]

h = model.fit_generator(generator=train_gen.get_data(),
                    steps_per_epoch=100,
                    epochs=20,
                    validation_data=val_gen.get_data(),
                    validation_steps=20,
                    callbacks=cbs,
                    initial_epoch=0)
                    # callbacks=[EarlyStopping(patience=10)])

# Save the model to save the weight value
model = Model(inputs=input_tensor, outputs=x)
# model.save(save_name)
model.save_weights('my_model_weight.h5')
print('model saved to {}'.format('my_model_weight.h5'))

这是我遇到的问题，固定损失：

Epoch 1/20
100/100 [==============================] - 209s 2s/step - loss: 0.0556 - val_loss: 0.0556
Epoch 2/20
100/100 [==============================] - 203s 2s/step - loss: 0.0556 - val_loss: 0.0556
Epoch 3/20
100/100 [==============================] - 217s 2s/step - loss: 0.0556 - val_loss: 0.0556
Epoch 4/20
100/100 [==============================] - 215s 2s/step - loss: 0.0556 - val_loss: 0.0556

此外，当我尝试使用导出的权重进行预测时，得到的矩阵为0.0556。

固定训练Keras变长车牌时的固定损失（CNN，GRU，CTC损失）

0 个答案: