我在为BiRNN网络构建输入数据时遇到了问题。
我正在创建如下所述的车牌检测系统:https://arxiv.org/pdf/1601.05610v1.pdf
我需要“4.2.3序列标记”部分,我需要训练BiRNN的数据集为(total_count_of_images,None,256)shape,None,因为它是图像的长度,并且它对于数据集中的每个图片都是不同的
假设我有3000张图片。然后形状看起来像:
train.shape:(3000,)但实际上是(3000,无,256)!?
所以我从
获得了示例代码所以即使开始训练我的RNN,我也在苦苦挣扎。我不明白我需要如何构建输入数据/模型,输入占位符,变量等来实现任何培训过程。
据我所知,一切都应该有效。我的代码:
reset_graph()
'''
Dataset : (10000, 784)
Labels : (10000, 10)
To classify images using a bidirectional reccurent neural network, we consider
every image row as a sequence of pixels. Because MNIST image shape is 28*28px,
we will then handle 28 sequences of 28 steps for every sample.
'''
# Parameters
learning_rate = 0.001
training_iters = 100 # 100000
display_step = 10
batch_size = 40
# Network Parameters
n_input = 256 # data inpit size/256D
n_steps = 256 # timesteps
n_hidden = 200 # hidden layer num of features
n_classes = 36 # MNIST total classes (0-9 digits and a-z letters)
# tf Graph input
x = tf.placeholder("float", [batch_size, None , n_input], name='input_placeholder')
y = tf.placeholder("float", [batch_size, None, n_classes], name='labels_placeholder')
# Define weights
weights = {
# Hidden layer weights => 2*n_hidden because of foward + backward cells
'out': tf.Variable(tf.random_normal([2*n_hidden, n_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([n_classes]))
}
def BiRNN(x, weights, biases):
print('Input x',x.get_shape().as_list())
print('weights[\'out\']', weights['out'].get_shape().as_list())
print('biases[\'out\']', biases['out'].get_shape().as_list())
# Prepare data shape to match `bidirectional_rnn` function requirements
# Current data input shape: (batch_size, n_steps, n_input)
# Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
# Permuting batch_size and n_steps
#x = tf.transpose(x, [1, 0, 2])
#print('Transposed x',x.get_shape().as_list())
# Reshape to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, n_steps])
print('Reshaped x',x.get_shape().as_list())
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, n_input, x)
print(len(x),'of [ ',x[0],' ] kinds')
# Define lstm cells with tensorflow
# Forward direction cell
lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)
# Backward direction cell
lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1.0, state_is_tuple=True)
# Get lstm cell output
outputs, _, _ = rnn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, x, dtype=tf.float32)
print( len(outputs),'of [ ',outputs[0],' ] kinds' )
# Linear activation, using rnn inner loop last output
ret = tf.matmul(outputs[-1], weights['out']) + biases['out']
print('ret', ret.get_shape().as_list())
return ret
pred = BiRNN(x, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred,1), tf.argmax(y,1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
输出:
Input x [40, None, 256]
weights['out'] [400, 36]
biases['out'] [36]
Reshaped x [None, 256]
256 of [ Tensor("split:0", shape=(?, 256), dtype=float32) ] kinds
256 of [ Tensor("concat:0", shape=(?, 400), dtype=float32) ] kinds
ret [None, 36]
一切就在那里。 问题从会话部分开始:
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
batch_data = batch_gen(batch_size)
# Keep training until reach max iterations
while step * batch_size < training_iters:
batch_x, batch_y = next(batch_data)
print(batch_x.shape)
print(batch_y.shape)
#m[:,0, None, None].shape
#Run optimization op (backprop)
print('Optimizer')
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
if step % display_step == 0:
print('Display')
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y})
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y})
print("Iter " + str(step * batch_size) + ", Minibatch Loss= " + \
"{:.6f}".format(loss) + ", Training Accuracy= " + \
"{:.5f}".format(acc))
step += 1
print("Optimization Finished!")
# Calculate accuracy for 128 mnist test images
test_len = 128
test_data = mnist.test.images[:test_len].reshape((-1, n_steps, n_input))
test_label = mnist.test.labels[:test_len]
print("Testing Accuracy:", \
sess.run(accuracy, feed_dict={x: test_data, y: test_label}))
我遇到以下错误:
(40,)
(40,)
Optimizer
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-96-a53814db8181> in <module>()
14 #Run optimization op (backprop)
15 print('Optimizer')
---> 16 sess.run(optimizer, feed_dict={x: batch_x, y: batch_y})
17
18 if step % display_step == 0:
/home/nauris/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
715 try:
716 result = self._run(None, fetches, feed_dict, options_ptr,
--> 717 run_metadata_ptr)
718 if run_metadata:
719 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/home/nauris/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
886 ' to a larger type (e.g. int64).')
887
--> 888 np_val = np.asarray(subfeed_val, dtype=subfeed_dtype)
889
890 if not subfeed_t.get_shape().is_compatible_with(np_val.shape):
/home/nauris/anaconda3/lib/python3.5/site-packages/numpy/core/numeric.py in asarray(a, dtype, order)
480
481 """
--> 482 return array(a, dtype, copy=False, order=order)
483
484 def asanyarray(a, dtype=None, order=None):
ValueError: setting an array element with a sequence.
任何帮助都将受到高度赞赏。提前谢谢大家。
意识到发生错误是因为你不能在我的情况下为不一致的维度提供numpy ndarray,例如(3000,None,256)。还没有找到任何解决方案。