Question

我想使用TensorFlow在我的自定义数据集上执行图像分类。我已经导入了我自己的数据集但是仍然停留在训练步骤（不确定它是否导入了完整的数据集或单个批次的50个图像，尽管列表包含所有文件名）。

数据集信息：图像分辨率= 88 * 128（单通道），批量大小= 50。

以下是我要执行的操作列表：

导入完整数据集（如果只创建一批50张图像，则更改代码）
使用我自己的数据集训练模型（训练图像和测试图像）
正确创建批次的方式。

到目前为止，这是完整的代码：

import tensorflow as tf
import os


def init_weights(shape):
    init_random_dist = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(init_random_dist)

def init_bias(shape):
    init_bias_vals = tf.constant(0.1, shape=shape)
    return tf.Variable(init_bias_vals)


def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2by2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')


def convolutional_layer(input_x, shape):
    W = init_weights(shape)
    b = init_bias([shape[3]])

    return tf.nn.relu(conv2d(input_x, W) + b)


def normal_full_layer(input_layer, size):
    input_size = int(input_layer.get_shape()[1])
    W = init_weights([input_size, size])
    b = init_bias([size])

    return tf.matmul(input_layer, W) + b


def get_labels(path):
    return os.listdir(path)


def files_list(path):
    return [val for sublist in [[os.path.join(j) for j in i[2]] for i in os.walk(path)] for val in sublist]


def image_tensors(filesQueue): 
    reader = tf.WholeFileReader()
    filename, content = reader.read(filesQueue)
    image = tf.image.decode_jpeg(content, channels=1)
    image = tf.cast(image, tf.float32)
    resized_image = tf.image.resize_images(image, [88, 128])

    return resized_image


path = './data/train'
trainLabels = get_labels(path)
trainingFiles = files_list(path)

trainQueue = tf.train.string_input_producer(trainingFiles)
trainBatch = tf.train.batch([image_tensors(trainQueue)], batch_size=50)
# ^^^^^^^^ a complete dataset or only a single batch? How to check?

path = './data/test'
testLabels = get_labels(path)
testingFiles = files_list(path)

testQueue = tf.train.string_input_producer(testingFiles)
testBatch = tf.train.batch([image_tensors(testQueue)], batch_size=50)
# ^^^^^^^ same here

x = tf.placeholder(tf.float32,shape=[88, 128])
y_true = tf.placeholder(tf.float32,shape=[None,len(trainLabels)])

x_image = tf.reshape(x,[-1,88,128,1])

convo_1 = convolutional_layer(x_image,shape=[6,6,1,32])
convo_1_pooling = max_pool_2by2(convo_1)

convo_2 = convolutional_layer(convo_1_pooling,shape=[6,6,32,64])
convo_2_pooling = max_pool_2by2(convo_2)


convo_2_flat = tf.reshape(convo_2_pooling,[-1,22*32*64])
full_layer_one = tf.nn.relu(normal_full_layer(convo_2_flat,1024))

hold_prob = tf.placeholder(tf.float32)
full_one_dropout = tf.nn.dropout(full_layer_one,keep_prob=hold_prob)


y_pred = normal_full_layer(full_one_dropout,10)

cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true,logits=y_pred))

optimizer = tf.train.AdamOptimizer(learning_rate=0.0001)
train = optimizer.minimize(cross_entropy)

init = tf.global_variables_initializer()



steps = 4000

with tf.Session() as sess:
    sess.run(init)
    for i in range(steps):
        batch_x , batch_y = tf.train.batch(trainBatch, batch_size=50)
        #                                  ^^^^^^^^^^^ Error
        sess.run(train,feed_dict={x:batch_x,y_true:batch_y,hold_prob:0.5})

        if i%400 == 0:
            print('Currently on step {}'.format(i))
            print('Accuracy is:')

            matches = tf.equal(tf.argmax(y_pred,1),tf.argmax(y_true,1))
            acc = tf.reduce_mean(tf.cast(matches,tf.float32))
            print(sess.run(acc,feed_dict={x:testBatch,y_true:testLabels,hold_prob:1.0}))
            #                             ^^^^^^^^^^^^ Test Images?
            print('\n')

这是我得到的错误：

TypeError                                 Traceback (most recent call last)
<ipython-input-24-5d0dac5724cd> in <module>()
      5     sess.run(init)
      6     for i in range(steps):
----> 7         batch_x , batch_y = tf.train.batch([trainBatch], batch_size=50)
      8         sess.run(train,feed_dict={x:batch_x,y_true:batch_y,hold_prob:0.5})
      9 

c:\users\TF_User\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in __iter__(self)
    503       TypeError: when invoked.
    504     """
--> 505     raise TypeError("'Tensor' object is not iterable.")
    506 
    507   def __bool__(self):

TypeError: 'Tensor' object is not iterable.

似乎输入错误的类型而不是 Tensor 或列表，但无法弄清楚。请更正，并帮助我解决上面列出的问题。

Answer 1

看起来你正在使用tf.train.batch的不必要的第二次调用。

通常你会做类似的事情：

...     
images, labels = tf.train.batch([images, labels], batch_size=50)

with tf.Session() as sess:
    sess.run(init)
    for i in range(steps):
        sess.run(train, feed_dict={x:images,y_true:labels,hold_prob:0.5})
...

我认为TensorFlow: does tf.train.batch automatically load the next batch when the batch has finished training?应该让您更好地了解tf.train.batch正在做什么以及如何使用它。此外，Reading Data上的文档也应该有所帮助。

在TensorFlow中训练自定义数据集会产生错误

1 个答案: