我试图按照https://www.tensorflow.org/tutorials/seq2seq中的seq2seq示例进行操作,但区别在于我的输入和输出是实际连续值,而不是示例中的单词。
我有两个张量X和Y,它们是输入和输出占位符的基本示例。这是我现在的代码:
import tensorflow as tf
from tensorflow.contrib import rnn
import numpy as np
from tensorflow.python.layers.core import Dense
N_HIDDEN = 50
sess = tf.Session()
X = np.random.randn(3, 10, 8)
Y = X + 0.1*np.random.randn(3, 10, 8)
X_lengths = [10, 10, 10]
BATCH_SIZE = 3
with tf.variable_scope("myrnn", reuse=tf.AUTO_REUSE) as scope:
encoder_cell = rnn.BasicLSTMCell(N_HIDDEN)
encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
encoder_cell, X, sequence_length=X_lengths, dtype=tf.float64)
# encoder_outputs: shape=(3, 10, 50)
# encoder_state: shape=(3, 50)
decoder_cell = rnn.BasicLSTMCell(N_HIDDEN)
# Helper
helper = tf.contrib.seq2seq.TrainingHelper(
Y, sequence_length=X_lengths, time_major=True)
# Decoder
basic_decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, helper, encoder_state)
# Dynamic decoding
outputs, _ = tf.contrib.seq2seq.dynamic_decode(basic_decoder)
但我在最后一行收到以下错误:
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\common_shapes.py in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)
685 graph_def_version, node_def_str, input_shapes, input_tensors,
--> 686 input_tensors_as_shapes, status)
687 except errors.InvalidArgumentError as err:
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
515 compat.as_text(c_api.TF_Message(self.status.status)),
--> 516 c_api.TF_GetCode(self.status.status))
517 # Delete the underlying status object from memory otherwise it stays alive
InvalidArgumentError: Dimension 0 in both shapes must be equal, but are 10 and 3. Shapes are [10] and [3]. for 'myrnn_14/decoder/while/BasicDecoderStep/basic_lstm_cell/concat' (op: 'ConcatV2') with input shapes: [10,8], [3,50], [] and with computed input tensors: input[2] = <1>.
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-20-5d7a4b10734c> in <module>()
37
38 # Dynamic decoding
---> 39 outputs, _ = tf.contrib.seq2seq.dynamic_decode(basic_decoder)
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py in dynamic_decode(decoder, output_time_major, impute_finished, maximum_iterations, parallel_iterations, swap_memory, scope)
307 ],
308 parallel_iterations=parallel_iterations,
--> 309 swap_memory=swap_memory)
310
311 final_outputs_ta = res[1]
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name, maximum_iterations)
3094 swap_memory=swap_memory)
3095 ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, loop_context)
-> 3096 result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
3097 if maximum_iterations is not None:
3098 return result[1]
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py in BuildLoop(self, pred, body, loop_vars, shape_invariants)
2872 self.Enter()
2873 original_body_result, exit_vars = self._BuildLoop(
-> 2874 pred, body, original_loop_vars, loop_vars, shape_invariants)
2875 finally:
2876 self.Exit()
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
2812 flat_sequence=vars_for_body_with_tensor_arrays)
2813 pre_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION) # pylint: disable=protected-access
-> 2814 body_result = body(*packed_vars_for_body)
2815 post_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION) # pylint: disable=protected-access
2816 if not nest.is_sequence(body_result):
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py in body(time, outputs_ta, state, inputs, finished, sequence_lengths)
252 """
253 (next_outputs, decoder_state, next_inputs,
--> 254 decoder_finished) = decoder.step(time, inputs, state)
255 if decoder.tracks_own_finished:
256 next_finished = decoder_finished
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\basic_decoder.py in step(self, time, inputs, state, name)
136 """
137 with ops.name_scope(name, "BasicDecoderStep", (time, inputs, state)):
--> 138 cell_outputs, cell_state = self._cell(inputs, state)
139 if self._output_layer is not None:
140 cell_outputs = self._output_layer(cell_outputs)
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py in __call__(self, inputs, state, scope, *args, **kwargs)
294 # method. See the class docstring for more details.
295 return base_layer.Layer.__call__(self, inputs, state, scope=scope,
--> 296 *args, **kwargs)
297
298
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\layers\base.py in __call__(self, inputs, *args, **kwargs)
694
695 if not in_deferred_mode:
--> 696 outputs = self.call(inputs, *args, **kwargs)
697 if outputs is None:
698 raise ValueError('A layer\'s `call` method should return a Tensor '
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py in call(self, inputs, state)
575
576 gate_inputs = math_ops.matmul(
--> 577 array_ops.concat([inputs, h], 1), self._kernel)
578 gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)
579
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\array_ops.py in concat(values, axis, name)
1173 tensor_shape.scalar())
1174 return identity(values[0], name=scope)
-> 1175 return gen_array_ops._concat_v2(values=values, axis=axis, name=name)
1176
1177
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\gen_array_ops.py in _concat_v2(values, axis, name)
775 if _ctx.in_graph_mode():
776 _, _, _op = _op_def_lib._apply_op_helper(
--> 777 "ConcatV2", values=values, axis=axis, name=name)
778 _result = _op.outputs[:]
779 _inputs_flat = _op.inputs
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
785 op = g.create_op(op_type_name, inputs, output_types, name=scope,
786 input_types=input_types, attrs=attr_protos,
--> 787 op_def=op_def)
788 return output_structure, op_def.is_stateful, op
789
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in create_op(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_shapes, compute_device)
3271 op_def=op_def)
3272 self._create_op_helper(ret, compute_shapes=compute_shapes,
-> 3273 compute_device=compute_device)
3274 return ret
3275
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in _create_op_helper(self, op, compute_shapes, compute_device)
3311 # compute_shapes argument.
3312 if op._c_op or compute_shapes: # pylint: disable=protected-access
-> 3313 set_shapes_for_outputs(op)
3314 # TODO(b/XXXX): move to Operation.__init__ once _USE_C_API flag is removed.
3315 self._add_op(op)
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in set_shapes_for_outputs(op)
2499 return _set_shapes_for_outputs_c_api(op)
2500 else:
-> 2501 return _set_shapes_for_outputs(op)
2502
2503
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in _set_shapes_for_outputs(op)
2472 shape_func = _call_cpp_shape_fn_and_require_op
2473
-> 2474 shapes = shape_func(op)
2475 if shapes is None:
2476 raise RuntimeError(
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in call_with_requiring(op)
2402
2403 def call_with_requiring(op):
-> 2404 return call_cpp_shape_fn(op, require_shape_fn=True)
2405
2406 _call_cpp_shape_fn_and_require_op = call_with_requiring
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\common_shapes.py in call_cpp_shape_fn(op, require_shape_fn)
625 res = _call_cpp_shape_fn_impl(op, input_tensors_needed,
626 input_tensors_as_shapes_needed,
--> 627 require_shape_fn)
628 if not isinstance(res, dict):
629 # Handles the case where _call_cpp_shape_fn_impl calls unknown_shape(op).
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\common_shapes.py in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)
689 missing_shape_fn = True
690 else:
--> 691 raise ValueError(err.message)
692
693 if missing_shape_fn:
ValueError: Dimension 0 in both shapes must be equal, but are 10 and 3. Shapes are [10] and [3]. for 'myrnn_14/decoder/while/BasicDecoderStep/basic_lstm_cell/concat' (op: 'ConcatV2') with input shapes: [10,8], [3,50], [] and with computed input tensors: input[2] = <1>.
正如你所看到的那样,维度问题我无法弄明白。我究竟做错了什么?
另外,我无法真正理解帮助者类的目的(这可能是我在这里犯的错误)。任何解释都表示赞赏。
答案 0 :(得分:0)
我发现了类似的问题How to use tf.contrib.seq2seq.Helper for non-embedding data?并对我的代码进行了一些更改,似乎它正在运行。在提供培训数据时,不确定这是否可行。但到目前为止,这里的代码对我有用。这是在tensorflow 1.6.0中测试的。
import tensorflow as tf
import numpy as np
from tensorflow.python.layers import core as layers_core
input_seq_len = 10 # Sequence length as input
input_dim = 8 # Nb of features in input
output_seq_len = 10 #
output_dim = 8 # nb of features in output
encoder_units = 50 # nb of units in each cell for the encoder
decoder_units = 50 # nb of units in each cell for the decoder
batch_size = 3
graph = tf.Graph()
with graph.as_default():
learning_ = tf.placeholder(tf.float32)
with tf.variable_scope('Seq2Seq'):
# Placeholder for encoder input
enc_input = \
tf.placeholder(tf.float32, [None, input_seq_len, input_dim])
# Placeholder for decoder output - Targets
target = \
tf.placeholder(tf.float32, [None, output_seq_len, output_dim])
### THE ENCODER
# Build RNN cell
encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(encoder_units)
initial_state = \
encoder_cell.zero_state(batch_size, dtype=tf.float32)
# Run Dynamic RNN
# encoder_outputs: [batch_size, seq_size, num_units]
# encoder_state: [batch_size, num_units]
encoder_outputs, encoder_state = \
tf.nn.dynamic_rnn(encoder_cell, enc_input, initial_state=initial_state)
### THE DECODER
# Simple Dense layer to project from rnn_dim to the desired output_dim
projection = \
layers_core.Dense(output_dim, use_bias=True, name="output_projection")
helper = \
tf.contrib.seq2seq.TrainingHelper(target, sequence_length=[output_seq_len for _ in range(batch_size)])
decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(decoder_units)
decoder = \
tf.contrib.seq2seq.BasicDecoder(decoder_cell, initial_state=encoder_state, helper=helper, output_layer=projection)
outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder=decoder)
# Could be reduced into fewer lines
diff = tf.square( outputs[0] - target )
loss = 0.5*tf.reduce_sum( diff, -1 )
loss = tf.reduce_mean(loss, 1)
loss = tf.reduce_mean(loss)
optimizer = tf.train.AdamOptimizer(learning_).minimize(loss)