我正在开发一个验证码识别项目,可以识别最多5位数的验证码。为此,我生成的验证码(宽度= 160,高度= 60,通道= 1)
然后我将这些图像验证码转换为TFRecords
def decode_label(label):
one_hot_label = np.zeros([5, 10])
index = [0, 1, 2, 3, 4].extend(map(int, list(label)))
one_hot_label[index] = 1.0
return one_hot_label.astype(np.uint8)
def bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def convert_to(dir, name, path):
filenames = map(lambda filename: os.path.join(dir, filename), os.listdir(dir))
tfrecords_name = os.path.join(path, name + ".tfrecords")
writer = tf.python_io.TFRecordWriter(tfrecords_name)
for filename in filenames:
image = imread(filename)
print(filename)
label = decode_label(re.findall("_(.*?)\.", filename)[0])
example = tf.train.Example(features=tf.train.Features(feature={'label': bytes_feature(label.tostring()),
'image': bytes_feature(image.tostring())}))
writer.write(example.SerializeToString())
writer.close()
print("successfully convert data to tfrecords!")
def read_records(filename_queue):
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(
serialized_example,
features={'image': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.string)})
image = tf.decode_raw(features['image'], tf.uint8)
image.set_shape([captcha_model.HEIGHT * captcha_model.WIDTH * captcha_model.CHANNELS])
label = tf.decode_raw(features['label'], tf.uint8)
label.set_shape([captcha_model.NUMBERS * captcha_model.CLASSES])
reshape_image = tf.reshape(image, shape=[captcha_model.WIDTH, captcha_model.HEIGHT, captcha_model.CHANNELS])
reshape_label = tf.reshape(label, shape=[captcha_model.NUMBERS, captcha_model.CLASSES])
return tf.cast(reshape_image, tf.float32), tf.cast(reshape_label, tf.float32)
def records_inputs(image, label, min_after_dequeue):
images, labels = tf.train.shuffle_batch([image, label],
batch_size=captcha_model.BATCH_SIZE,
capacity=min_after_dequeue + 3 * captcha_model.BATCH_SIZE,
min_after_dequeue=min_after_dequeue)
return images, labels
阅读记录
FRACTION = 0.4
NUM_EXAMPLES_PER_EPOCH = captcha_model.NUM_EXAMPLES_PER_EPOCH
MIN_AFTER_DEQUEUE = int(FRACTION * NUM_EXAMPLES_PER_EPOCH)
VALIDATION_MIN_AFTER_DEQUEUE = 4000
filename_queue = tf.train.string_input_producer(["./images/train.tfrecords"])
image, label = captcha_inputs.read_records(filename_queue)
images, labels = captcha_inputs.records_inputs(image, label, MIN_AFTER_DEQUEUE)
然而,在尝试从记录中读取时,我正在采取以下错误;
InvalidArgumentError(参见上面的回溯):重塑的输入是a 张量为38400的值,但请求的形状有9600 [[节点: 重塑=重塑[T = DT_UINT8,Tshape = DT_INT32, _device =" / job:localhost / replica:0 / task:0 / cpu:0"](DecodeRaw,Reshape / shape)]]
它来自read_records方法中的以下行;
reshape_image = tf.reshape(image, shape=[captcha_model.WIDTH, captcha_model.HEIGHT, captcha_model.CHANNELS])
这也是我的模型类
import tensorflow as tf
BATCH_SIZE = 128
NUM_EXAMPLES_PER_EPOCH = 50000
VALIDATION_SIZE = 10000
WIDTH = 160
HEIGHT = 60
CHANNELS = 1
CLASSES = 10
NUMBERS = 5
def inference(inputs):
with tf.variable_scope("conv_pool_1"):
kernel = tf.get_variable(name="kernel",
shape=[5, 5, 1, 48],
initializer=tf.truncated_normal_initializer(stddev=0.05),
dtype=tf.float32)
biases = tf.get_variable(name="biases",
shape=[48],
initializer=tf.constant_initializer(value=0.),
dtype=tf.float32)
conv = tf.nn.conv2d(input=inputs,
filter=kernel,
strides=[1, 1, 1, 1],
padding="SAME")
conv_bias = tf.nn.bias_add(value=conv,
bias=biases,
name="add_biases")
relu = tf.nn.relu(conv_bias)
pool = tf.nn.max_pool(value=relu,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding="SAME",
name="pooling")
with tf.variable_scope("conv_pool_2"):
kernel = tf.get_variable(name="kernel",
shape=[5, 5, 48, 64],
initializer=tf.truncated_normal_initializer(stddev=0.05),
dtype=tf.float32)
biases = tf.get_variable(name="biases",
shape=[64],
initializer=tf.constant_initializer(value=0.),
dtype=tf.float32)
conv = tf.nn.conv2d(input=pool,
filter=kernel,
strides=[1, 1, 1, 1],
padding="SAME")
conv_bias = tf.nn.bias_add(value=conv,
bias=biases,
name="add_biases")
relu = tf.nn.relu(conv_bias)
pool = tf.nn.max_pool(value=relu,
ksize=[1, 2, 1, 1],
strides=[1, 2, 1, 1],
padding="SAME",
name="pooling")
with tf.variable_scope("conv_pool_3"):
kernel = tf.get_variable(name="kernel",
shape=[5, 5, 64, 128],
initializer=tf.truncated_normal_initializer(stddev=0.05),
dtype=tf.float32)
biases = tf.get_variable(name="biases",
shape=[128],
initializer=tf.constant_initializer(value=0.),
dtype=tf.float32)
conv = tf.nn.conv2d(input=pool,
filter=kernel,
strides=[1, 1, 1, 1],
padding="SAME")
conv_bias = tf.nn.bias_add(value=conv,
bias=biases,
name="add_biases")
relu = tf.nn.relu(conv_bias)
pool = tf.nn.max_pool(value=relu,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding="SAME",
name="pooling")
reshape = tf.reshape(pool,
shape=[BATCH_SIZE, -1],
name="reshape")
dims = reshape.get_shape().as_list()[-1]
with tf.variable_scope("fully_conn"):
weights = tf.get_variable(name="weights",
shape=[dims, 2048],
initializer=tf.truncated_normal_initializer(stddev=0.05),
dtype=tf.float32)
biases = tf.get_variable(name="biases",
shape=[2048],
initializer=tf.constant_initializer(value=0.),
dtype=tf.float32)
output = tf.nn.xw_plus_b(x=reshape,
weights=weights,
biases=biases)
conn = tf.nn.relu(output)
with tf.variable_scope("output"):
weights = tf.get_variable(name="weights",
shape=[2048, NUMBERS * CLASSES],
initializer=tf.truncated_normal_initializer(stddev=0.05),
dtype=tf.float32)
biases = tf.get_variable(name="biases",
shape=[NUMBERS * CLASSES],
initializer=tf.constant_initializer(value=0.),
dtype=tf.float32)
logits = tf.nn.xw_plus_b(x=conn,
weights=weights,
biases=biases)
reshape = tf.reshape(logits, shape=[BATCH_SIZE, NUMBERS, CLASSES])
return reshape
编辑:TFRecords中的标签不正确。我从记录中打印出标签的形状,并显示(?,_)类似的东西。我认为问题在于decode_label函数 -