Question

我正在开发一个验证码识别项目，可以识别最多5位数的验证码。为此，我生成的验证码（宽度= 160，高度= 60，通道= 1）

然后我将这些图像验证码转换为TFRecords

def decode_label(label):
    one_hot_label = np.zeros([5, 10])
    index = [0, 1, 2, 3, 4].extend(map(int, list(label)))
    one_hot_label[index] = 1.0
    return one_hot_label.astype(np.uint8)


def bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def convert_to(dir, name, path):
    filenames = map(lambda filename: os.path.join(dir, filename), os.listdir(dir))
    tfrecords_name = os.path.join(path, name + ".tfrecords")
    writer = tf.python_io.TFRecordWriter(tfrecords_name)
    for filename in filenames:
        image = imread(filename)
        print(filename)
        label = decode_label(re.findall("_(.*?)\.", filename)[0])
        example = tf.train.Example(features=tf.train.Features(feature={'label': bytes_feature(label.tostring()),
                                                                       'image': bytes_feature(image.tostring())}))
        writer.write(example.SerializeToString())
    writer.close()
    print("successfully convert data to tfrecords!")


def read_records(filename_queue):
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    features = tf.parse_single_example(
        serialized_example,
        features={'image': tf.FixedLenFeature([], tf.string),
                  'label': tf.FixedLenFeature([], tf.string)})
    image = tf.decode_raw(features['image'], tf.uint8)
    image.set_shape([captcha_model.HEIGHT * captcha_model.WIDTH * captcha_model.CHANNELS])
    label = tf.decode_raw(features['label'], tf.uint8)
    label.set_shape([captcha_model.NUMBERS * captcha_model.CLASSES])
    reshape_image = tf.reshape(image, shape=[captcha_model.WIDTH, captcha_model.HEIGHT, captcha_model.CHANNELS])
    reshape_label = tf.reshape(label, shape=[captcha_model.NUMBERS, captcha_model.CLASSES])
    return tf.cast(reshape_image, tf.float32), tf.cast(reshape_label, tf.float32)


def records_inputs(image, label, min_after_dequeue):
    images, labels = tf.train.shuffle_batch([image, label],
                                            batch_size=captcha_model.BATCH_SIZE,
                                            capacity=min_after_dequeue + 3 * captcha_model.BATCH_SIZE,
                                            min_after_dequeue=min_after_dequeue)
    return images, labels

阅读记录

   FRACTION = 0.4
    NUM_EXAMPLES_PER_EPOCH = captcha_model.NUM_EXAMPLES_PER_EPOCH
    MIN_AFTER_DEQUEUE = int(FRACTION * NUM_EXAMPLES_PER_EPOCH)
    VALIDATION_MIN_AFTER_DEQUEUE = 4000

  filename_queue = tf.train.string_input_producer(["./images/train.tfrecords"])
  image, label = captcha_inputs.read_records(filename_queue)
  images, labels = captcha_inputs.records_inputs(image, label, MIN_AFTER_DEQUEUE)

然而，在尝试从记录中读取时，我正在采取以下错误;

InvalidArgumentError（参见上面的回溯）：重塑的输入是a 张量为38400的值，但请求的形状有9600 [[节点：重塑=重塑[T = DT_UINT8，Tshape = DT_INT32， _device =＆＃34; / job：localhost / replica：0 / task：0 / cpu：0＆＃34;]（DecodeRaw，Reshape / shape）]]

它来自read_records方法中的以下行;

reshape_image = tf.reshape(image, shape=[captcha_model.WIDTH, captcha_model.HEIGHT, captcha_model.CHANNELS])

这也是我的模型类

import tensorflow as tf

BATCH_SIZE = 128
NUM_EXAMPLES_PER_EPOCH = 50000
VALIDATION_SIZE = 10000
WIDTH = 160
HEIGHT = 60
CHANNELS = 1
CLASSES = 10
NUMBERS = 5


def inference(inputs):
    with tf.variable_scope("conv_pool_1"):
        kernel = tf.get_variable(name="kernel",
                                 shape=[5, 5, 1, 48],
                                 initializer=tf.truncated_normal_initializer(stddev=0.05),
                                 dtype=tf.float32)
        biases = tf.get_variable(name="biases",
                                 shape=[48],
                                 initializer=tf.constant_initializer(value=0.),
                                 dtype=tf.float32)
        conv = tf.nn.conv2d(input=inputs,
                            filter=kernel,
                            strides=[1, 1, 1, 1],
                            padding="SAME")
        conv_bias = tf.nn.bias_add(value=conv,
                                   bias=biases,
                                   name="add_biases")
        relu = tf.nn.relu(conv_bias)
        pool = tf.nn.max_pool(value=relu,
                              ksize=[1, 2, 2, 1],
                              strides=[1, 2, 2, 1],
                              padding="SAME",
                              name="pooling")

    with tf.variable_scope("conv_pool_2"):
        kernel = tf.get_variable(name="kernel",
                                 shape=[5, 5, 48, 64],
                                 initializer=tf.truncated_normal_initializer(stddev=0.05),
                                 dtype=tf.float32)
        biases = tf.get_variable(name="biases",
                                 shape=[64],
                                 initializer=tf.constant_initializer(value=0.),
                                 dtype=tf.float32)
        conv = tf.nn.conv2d(input=pool,
                            filter=kernel,
                            strides=[1, 1, 1, 1],
                            padding="SAME")
        conv_bias = tf.nn.bias_add(value=conv,
                                   bias=biases,
                                   name="add_biases")
        relu = tf.nn.relu(conv_bias)
        pool = tf.nn.max_pool(value=relu,
                              ksize=[1, 2, 1, 1],
                              strides=[1, 2, 1, 1],
                              padding="SAME",
                              name="pooling")
    with tf.variable_scope("conv_pool_3"):
        kernel = tf.get_variable(name="kernel",
                                 shape=[5, 5, 64, 128],
                                 initializer=tf.truncated_normal_initializer(stddev=0.05),
                                 dtype=tf.float32)
        biases = tf.get_variable(name="biases",
                                 shape=[128],
                                 initializer=tf.constant_initializer(value=0.),
                                 dtype=tf.float32)
        conv = tf.nn.conv2d(input=pool,
                            filter=kernel,
                            strides=[1, 1, 1, 1],
                            padding="SAME")
        conv_bias = tf.nn.bias_add(value=conv,
                                   bias=biases,
                                   name="add_biases")
        relu = tf.nn.relu(conv_bias)
        pool = tf.nn.max_pool(value=relu,
                              ksize=[1, 2, 2, 1],
                              strides=[1, 2, 2, 1],
                              padding="SAME",
                              name="pooling")
    reshape = tf.reshape(pool,
                         shape=[BATCH_SIZE, -1],
                         name="reshape")
    dims = reshape.get_shape().as_list()[-1]
    with tf.variable_scope("fully_conn"):
        weights = tf.get_variable(name="weights",
                                  shape=[dims, 2048],
                                  initializer=tf.truncated_normal_initializer(stddev=0.05),
                                  dtype=tf.float32)
        biases = tf.get_variable(name="biases",
                                 shape=[2048],
                                 initializer=tf.constant_initializer(value=0.),
                                 dtype=tf.float32)
        output = tf.nn.xw_plus_b(x=reshape,
                                 weights=weights,
                                 biases=biases)
        conn = tf.nn.relu(output)
    with tf.variable_scope("output"):
        weights = tf.get_variable(name="weights",
                                  shape=[2048, NUMBERS * CLASSES],
                                  initializer=tf.truncated_normal_initializer(stddev=0.05),
                                  dtype=tf.float32)
        biases = tf.get_variable(name="biases",
                                 shape=[NUMBERS * CLASSES],
                                 initializer=tf.constant_initializer(value=0.),
                                 dtype=tf.float32)
        logits = tf.nn.xw_plus_b(x=conn,
                                 weights=weights,
                                 biases=biases)
        reshape = tf.reshape(logits, shape=[BATCH_SIZE, NUMBERS, CLASSES])
    return reshape

编辑：TFRecords中的标签不正确。我从记录中打印出标签的形状，并显示（？，_）类似的东西。我认为问题在于decode_label函数 -

尝试从TFRecords读取时形状不匹配

0 个答案: