我正在从事的项目涉及培训深度学习网络,以识别可变长度(5到8个字符)的摩洛哥车牌 ,到目前为止,我能够检测它们并使用Tensorflow Object Detection API裁剪它们,现在我想识别边界框中的文本。所以我选择了Keras Functional API
。
解决了一些问题后,我可以使用Keras开始训练过程,但是我损失了0.0556。
这是main.py
(original)的代码:
import os
import codecs
import cv2
import numpy as np
from keras import backend as K
from keras.layers import Input, Dense, Activation, Conv2D, Reshape
from keras.layers import BatchNormalization, Lambda, MaxPooling2D, Dropout
from keras.layers.merge import add, concatenate
from keras.callbacks import EarlyStopping,Callback
from keras.layers.recurrent import GRU
from keras.models import Model
from keras import optimizers
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.models import load_model
from keras.utils.vis_utils import plot_model
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
CHARS = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'A', 'B', 'J', 'D', 'H', 'O', 'W'
]
CHARS_DICT = {char:i for i, char in enumerate(CHARS)}
NUM_CHARS = len(CHARS)
# The GPU used 1060, and is not selected will automatically call the set display
# os.environ['CUDA_VISIBLE_DEVICES'] = '1'
#Dynamic application memory
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
set_session(tf.Session(config=config))
#The necessary parameters
num_channels = 3
ti = '../car_pic/image/train/' #Training picture directory
tl = '../car_pic/image/train_labels.txt' #Training label file
vi = '../car_pic/image/val/' #Verify image directory
vl = '../car_pic/image/val_labels.txt' #Verify that the label file
img_size = [230,50] #Training picture width and height
label_len = 8 #The length of the label
dir_log = './logs/'
c = '../car_pic/image/' #checkpoints format string
num_epochs = 200 #number of epochs
start_of_epoch = 0
#Network parameters
conv_filters = 16
kernel_size = (3, 3)
pool_size = 2
time_dense_size = 32
rnn_size = 512
batch_size = 16
def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
#Why is from 2 to start?
y_pred = y_pred[:, 2:, :]
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
############The model structure############
input_tensor = Input(name='the_input', shape=(img_size[0], img_size[1], num_channels), dtype='float32')
x = input_tensor
base_conv = 32
#Convolutional layer 1
x = Conv2D(base_conv * 1, (3,3), padding="same",name='conv1')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
#Convolution layer 2
x = Conv2D(base_conv * 2, (3,3), padding="same",name='conv2')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
#Convolution layer 3
x = Conv2D(base_conv * 4, (3,3), padding="same",name='conv3')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
# Parameter view
# conv_shape = x.get_shape().as_list()
# rnn_length = conv_shape[1]
# rnn_dimen = conv_shape[2]*conv_shape[3]
# print(conv_shape, rnn_length, rnn_dimen)
#Dimension conversion
conv_to_rnn_dims = (img_size[0]//(2**3),(img_size[1]//(2**3))*128)
x = Reshape(target_shape=conv_to_rnn_dims,name='reshape')(x)
x =Dense(time_dense_size,activation='relu',name='dense1')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
# x = Dropout(0.2)(x)
#Two layersbidirecitonal GRUs
gru_1 = GRU(rnn_size,return_sequences=True,kernel_initializer='he_normal',name='gru_1')(x)
gru_1b = GRU(rnn_size,return_sequences=True,go_backwards=True,kernel_initializer='he_normal',name='gru_1b')(x)
gru1_merged = add([gru_1,gru_1b])
gru_2 = GRU(rnn_size,return_sequences=True,kernel_initializer='he_normal',name='gru_2')(gru1_merged)
gru_2b = GRU(rnn_size,return_sequences=True,go_backwards=True,kernel_initializer='he_normal',name='gru_2b')(gru1_merged)
# transforms RNN output to character activations:
x = Dense(NUM_CHARS+1,kernel_initializer='he_normal',name='dense2')(concatenate([gru_2,gru_2b]))
x = Activation('softmax',name='softmax')(x)
#Print Out model profile
base_model = Model(inputs=input_tensor, outputs=x)
base_model.summary()
#ComputingctcThe necessary parameters
pred_length = int(x.shape[1]) #Why would minus the 2 before you can run it???
labels = Input(name='the_labels', shape=[label_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int32')
label_length = Input(name='label_length', shape=[1], dtype='int32')
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length])
model = Model(inputs=[input_tensor, labels, input_length, label_length], outputs=[loss_out])
plot_model(model,to_file=" gru_model.png",show_shapes=True) #show_shapes 带参数显示
# adam = optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
model.compile(loss={'ctc': lambda y_true, y_pred: x}, optimizer='adam')
#The license plate corresponding to thelables
def encode_label(s):
label = np.zeros([len(s)])
for i, c in enumerate(s):
label[i] = CHARS_DICT[c]
return label
# def encode_label(text):
# return list(map(lambda x: CHARS.index(x), text))
def labels_to_text(labels):
return ''.join(list(map(lambda x: CHARS[int(x)], labels)))
def parse_line(line):
parts = line.split('.')
filename = parts[0]
label = encode_label(parts[0].strip().upper())
return filename, label
class TextImageGenerator:
def __init__(self, img_dir, label_file, batch_size, img_size, input_length, num_channels=3, label_len=8):
self._img_dir = img_dir
self._label_file = label_file
self._batch_size = batch_size
self._num_channels = num_channels
self._label_len = label_len
self._input_len = input_length
self._img_w, self._img_h = img_size
self._num_examples = 0
self._next_index = 0
self._num_epoches = 0
self.filenames = []
self.labels = None
self.init()
def init(self):
#self.labels = []
with open(self._label_file) as f:
for c, l in enumerate(f):
pass
self.labels = np.ones([c+1, self._label_len]) * -1
with open(self._label_file) as f:
for i, line in enumerate(f):
filename, label = parse_line(line)
self.filenames.append(filename+".jpg")
self.labels[i,0:len(label)] = label
#self.labels.append(label)
self._num_examples += 1
#self.labels = np.array([v + [26.] * (self._label_len - len(v)) for v in self.labels])
self.labels = np.float32(self.labels)
# self.labels = [[np.float32(v) for v in lll] for lll in self.labels]
def next_batch(self):
# Shuffle the data
if self._next_index == 0:
perm = np.arange(self._num_examples)
np.random.shuffle(perm)
self._filenames = [self.filenames[i] for i in perm]
self._labels = self.labels[perm]
batch_size = self._batch_size
start = self._next_index
end = self._next_index + batch_size
if end >= self._num_examples:
self._next_index = 0
self._num_epoches += 1
end = self._num_examples
batch_size = self._num_examples - start
else:
self._next_index = end
images = np.zeros([batch_size, self._img_h, self._img_w, self._num_channels])
# labels = np.zeros([batch_size, self._label_len])
for j, i in enumerate(range(start, end)):
fname = self._filenames[i]
img = cv2.imread(os.path.join(self._img_dir, fname))
images[j, ...] = img
images = np.transpose(images, axes=[0, 2, 1, 3])
labels = self._labels[start:end]
# print("HHHHHHHHHHHHHHHHHHHHHHH")
# print("++++",start, "+++",end)
# print(self._labels)
# print("HHHHHHHHHHHHHHHHHHHHHHH")
# print(labels)
# print("HHHHHHHHHHHHHHHHHHHHHHH")
input_length = np.zeros([batch_size, 1])
label_length = np.zeros([batch_size, 1])
input_length[:] = self._input_len
label_length[:] = self._label_len
outputs = {'ctc': np.zeros([batch_size])}
inputs = {'the_input': images,
'the_labels': labels,
'input_length': input_length,
'label_length': label_length,
}
return inputs, outputs
def get_data(self):
while True:
yield self.next_batch()
#Generated data
train_gen = TextImageGenerator(img_dir=ti,
label_file=tl,
batch_size=batch_size,
img_size=img_size,
input_length=pred_length,
num_channels=num_channels,
label_len=label_len)
val_gen = TextImageGenerator(img_dir=vi,
label_file=vl,
batch_size=batch_size,
img_size=img_size,
input_length=pred_length,
num_channels=num_channels,
label_len=label_len)
# # Model evaluation
def evaluate(steps=10):
batch_acc = 0
generator = train_gen
for i in range(steps):
x_test, y_test = next(generator)
y_pred = base_model.predict(x_test)
shape = y_pred[:,2:,:].shape
ctc_decode = K.ctc_decode(y_pred[:,2:,:], input_length=np.ones(shape[0])*shape[1])[0][0]
out = K.get_value(ctc_decode)[:, :label_len]
if out.shape[1] == label_len:
batch_acc += (y_test == out).all(axis=1).mean()
return batch_acc / steps
class Evaluator(Callback):
def __init__(self):
self.accs = []
def on_epoch_end(self, epoch, logs=None):
acc = evaluate(steps=20)*100
self.accs.append(acc)
print('')
print('acc: %f%%' % acc)
evaluator = Evaluator()
# #The callback function will in eachepochAfter saving the model to the path
# checkpoints_cb = ModelCheckpoint(c, period=1)
# cbs = [checkpoints_cb]
# #tensorboard
# if dir_log != '':
# tfboard_cb = TensorBoard(log_dir=dir_log, write_images=True)
# cbs.append(tfboard_cb)
import matplotlib.pyplot as plt
checkpoints_cb = ModelCheckpoint("../checkpoints/'weights.{epoch:02d}-{val_loss:.2f}.h5'", period=1)
cbs = [checkpoints_cb]
h = model.fit_generator(generator=train_gen.get_data(),
steps_per_epoch=100,
epochs=20,
validation_data=val_gen.get_data(),
validation_steps=20,
callbacks=cbs,
initial_epoch=0)
# callbacks=[EarlyStopping(patience=10)])
# Save the model to save the weight value
model = Model(inputs=input_tensor, outputs=x)
# model.save(save_name)
model.save_weights('my_model_weight.h5')
print('model saved to {}'.format('my_model_weight.h5'))
这是我遇到的问题,固定损失:
Epoch 1/20
100/100 [==============================] - 209s 2s/step - loss: 0.0556 - val_loss: 0.0556
Epoch 2/20
100/100 [==============================] - 203s 2s/step - loss: 0.0556 - val_loss: 0.0556
Epoch 3/20
100/100 [==============================] - 217s 2s/step - loss: 0.0556 - val_loss: 0.0556
Epoch 4/20
100/100 [==============================] - 215s 2s/step - loss: 0.0556 - val_loss: 0.0556
此外,当我尝试使用导出的权重进行预测时,得到的矩阵为0.0556
。