在代码下面运行时出现意外错误:
with tf.Session(graph=graph) as session:
tf.initialize_all_variables().run()
print('Initialized')
mean_loss = 0
for step in xrange(num_steps) :
print ("Train data ",len(train_data))
batch_inputs, batch_labels = generate_batches(train_dataset, batch_size=64, unrollings=5)
feed_dict = dict()
for i in range(unrollings):
batch_labels[i] = np.reshape(batch_labels[i], (batch_size, 1))
batch_inputs[i] = np.array(batch_inputs[i]).astype('int32')
batch_labels[i] = batch_labels[i].astype('float32')
print (train_inputs[i], train_labels[i])
feed_dict = {train_inputs[i] : batch_inputs[i], train_labels[i] : batch_labels[i]}
_, l, predictions, lr = session.run([optimizer, loss, train_prediction, learning_rate], feed_dict=feed_dict)
mean_loss += l
这是生成批次,lstm单元格和计算损失代码:
def generate_batches(raw_data, batch_size, unrollings):
global data_index
data_len = len(raw_data)
num_batches = data_len // batch_size
inputs = []
labels = []
label = np.zeros(shape=(batch_size, 1), dtype=np.float)
print (num_batches, data_len, batch_size)
for j in xrange(unrollings) :
inputs.append([])
labels.append([])
for i in xrange(batch_size) :
inputs[j].append(raw_data[i + data_index])
label[i, 0] = raw_data[i + data_index + 1]
data_index = (data_index + 1) % len(raw_data)
print (len(inputs), len(inputs[0]), len(labels), label.shape)
labels[j].append(label.tolist())
return inputs, labels
embedding_size = 128
num_nodes = 32
graph = tf.Graph()
with graph.as_default():
# Parameters:
# Input,Forget,Candidate,Output gate: input, previous output, and bias.
ifcox = tf.Variable(tf.truncated_normal([embedding_size, num_nodes*4], -0.1, 0.1))
ifcom = tf.Variable(tf.truncated_normal([num_nodes, num_nodes*4], -0.1, 0.1))
ifcob = tf.Variable(tf.zeros([1, num_nodes*4]))
# Variables saving state across unrollings.
saved_output = tf.Variable(tf.zeros([batch_size, num_nodes]), trainable=False)
saved_state = tf.Variable(tf.zeros([batch_size, num_nodes]), trainable=False)
# Classifier weights and biases.
w = tf.Variable(tf.truncated_normal([num_nodes, 1], -0.1, 0.1))
b = tf.Variable(tf.zeros([1]))
# Definition of the cell computation.
def lstm_cell(i, o, state):
embeddings = tf.Variable(
tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0))
embed = tf.nn.embedding_lookup(embeddings, i)
i = tf.to_float(embed)
print (i.get_shape())
combined = tf.matmul(i, ifcox) + tf.matmul(o, ifcom) + ifcob
input_gate = tf.sigmoid(combined[:, 0:num_nodes])
forget_gate = tf.sigmoid(combined[:, num_nodes:2*num_nodes])
update = tf.sigmoid(combined[:, 2*num_nodes:3*num_nodes])
state = forget_gate * state + input_gate * tf.tanh(update)
output_gate = tf.sigmoid(combined[:, 3*num_nodes:4*num_nodes])
return output_gate * tf.tanh(state), state
train_data = list()
train_label = list()
for _ in range(unrollings) :
train_data.append(tf.placeholder(shape=[batch_size], dtype=tf.int32))
train_label.append(tf.placeholder(shape=[batch_size, 1], dtype=tf.float32))
train_inputs = train_data[:unrollings]
train_labels = train_label[:unrollings]
print (train_inputs, train_labels)
outputs = list()
output = saved_output
state = saved_state
for i in train_inputs :
output, state = lstm_cell(i, output, state)
outputs.append(output)
# State saving across unrollings.
with tf.control_dependencies([saved_output.assign(output),saved_state.assign(state)]):
# Classifier.
logits = tf.nn.xw_plus_b(tf.concat(0, outputs), w, b)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf.to_float(tf.concat(0, train_labels))))
# Optimizer.
global_step = tf.Variable(0)
learning_rate = tf.train.exponential_decay(10.0, global_step, 5000, 0.1, staircase=True)
optimizer = tf.train.AdamOptimizer(learning_rate)
gradients, v = zip(*optimizer.compute_gradients(loss))
gradients, _ = tf.clip_by_global_norm(gradients, 1.25)
optimizer = optimizer.apply_gradients(zip(gradients, v), global_step=global_step
错误:
tensorflow/core/framework/op_kernel.cc:940] Invalid argument: You must feed a value for placeholder tensor 'Placeholder_3' with dtype float and shape [64,1]
[[Node: Placeholder_3 = Placeholder[dtype=DT_FLOAT, shape=[64,1], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Traceback (most recent call last):
File "ptb_rnn.py", line 232, in <module>
_, l, predictions, lr = session.run([optimizer, loss, train_prediction, learning_rate], feed_dict=feed_dict)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 710, in run
run_metadata_ptr)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 908, in _run
feed_dict_string, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 958, in _do_run
target_list, options, run_metadata)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.py", line 978, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors.InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder_3' with dtype float and shape [64,1]
[[Node: Placeholder_3 = Placeholder[dtype=DT_FLOAT, shape=[64,1], _device="/job:localhost/replica:0/task:0/cpu:0"]()]]
Caused by op u'Placeholder_3', defined at:
File "ptb_rnn.py", line 163, in <module>
train_label.append(tf.placeholder(shape=[batch_size, 1], dtype=tf.float32))
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/array_ops.py", line 1212, in placeholder
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 1530, in _placeholder
name=name)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/op_def_library.py", line 703, in apply_op
op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 2317, in create_op
original_op=self._default_original_op, op_def=op_def)
File "/usr/local/lib/python2.7/dist-packages/tensorflow/python/framework/ops.py", line 1239, in __init__
self._traceback = _extract_stack()
看起来我没有使用feed_dict将值提供给张量为train_labels [i]且大小为(64,1)。但是当我使用batch_labels [i] .shape进行打印时,我得到大小(64,1)并且两者的dtype都是float32。
train_input的大小为(64,),batch_inputs的大小(64,)和dtype都相同。
那么,我的代码中的error
在哪里?
P.S。我认为错误符合我正在重塑batch_labels batch_labels[i] = np.reshape(batch_labels[i], (batch_size, 1))
的地方。尺寸或等级是否会发生变化?这可能是train_labels [i]不接受batch_labels大小为(64,1)的一个原因,因为它的维度和排名可能不相同。随着3-d batch_labels [i]被转换为2-d(batch_size) ,1)等级增加了吗?下面是生成1个展开的批量输出:([9976, 9980, 9981, 9982, 9983, 9984, 9986, 9987, 9988, 9989, 9991, 9992, 9993, 9994, 9995, 9996, 9997, 9998, 9999, 2, 9256, 1, 3, 72, 393, 33, 2133, 0, 146, 19, 6, 9207, 276, 407, 3, 2, 23, 1, 13, 141, 4, 1, 5465, 0, 3081, 1596, 96, 2, 7682, 1, 3, 72, 393, 8, 337, 141, 4, 2477, 657, 2170, 955, 24, 521, 6], [[[9980.0], [9981.0], [9982.0], [9983.0], [9984.0], [9986.0], [9987.0], [9988.0], [9989.0], [9991.0], [9992.0], [9993.0], [9994.0], [9995.0], [9996.0], [9997.0], [9998.0], [9999.0], [2.0], [9256.0], [1.0], [3.0], [72.0], [393.0], [33.0], [2133.0], [0.0], [146.0], [19.0], [6.0], [9207.0], [276.0], [407.0], [3.0], [2.0], [23.0], [1.0], [13.0], [141.0], [4.0], [1.0], [5465.0], [0.0], [3081.0], [1596.0], [96.0], [2.0], [7682.0], [1.0], [3.0], [72.0], [393.0], [8.0], [337.0], [141.0], [4.0], [2477.0], [657.0], [2170.0], [955.0], [24.0], [521.0], [6.0], [9207.0]]])