Tensoflow seq2seq解码器不起作用

时间:2018-04-25 15:50:00

标签: python tensorflow seq2seq

我试图按照https://www.tensorflow.org/tutorials/seq2seq中的seq2seq示例进行操作,但区别在于我的输入和输出是实际连续值,而不是示例中的单词。
我有两个张量X和Y,它们是输入和输出占位符的基本示例。这是我现在的代码:

import tensorflow as tf
from tensorflow.contrib import rnn 
import numpy as np
from tensorflow.python.layers.core import Dense

N_HIDDEN = 50

sess = tf.Session()

X = np.random.randn(3, 10, 8)
Y = X + 0.1*np.random.randn(3, 10, 8)

X_lengths = [10, 10, 10] 
BATCH_SIZE = 3

with tf.variable_scope("myrnn", reuse=tf.AUTO_REUSE) as scope:

    encoder_cell = rnn.BasicLSTMCell(N_HIDDEN) 

    encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
        encoder_cell, X, sequence_length=X_lengths, dtype=tf.float64)


    # encoder_outputs: shape=(3, 10, 50)
    # encoder_state: shape=(3, 50)    

    decoder_cell = rnn.BasicLSTMCell(N_HIDDEN)

    # Helper
    helper = tf.contrib.seq2seq.TrainingHelper(
     Y, sequence_length=X_lengths, time_major=True)

    # Decoder
    basic_decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell, helper, encoder_state)

    # Dynamic decoding
    outputs, _ = tf.contrib.seq2seq.dynamic_decode(basic_decoder)

但我在最后一行收到以下错误:

---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\common_shapes.py in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)
    685           graph_def_version, node_def_str, input_shapes, input_tensors,
--> 686           input_tensors_as_shapes, status)
    687   except errors.InvalidArgumentError as err:

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\errors_impl.py in __exit__(self, type_arg, value_arg, traceback_arg)
    515             compat.as_text(c_api.TF_Message(self.status.status)),
--> 516             c_api.TF_GetCode(self.status.status))
    517     # Delete the underlying status object from memory otherwise it stays alive

InvalidArgumentError: Dimension 0 in both shapes must be equal, but are 10 and 3. Shapes are [10] and [3]. for 'myrnn_14/decoder/while/BasicDecoderStep/basic_lstm_cell/concat' (op: 'ConcatV2') with input shapes: [10,8], [3,50], [] and with computed input tensors: input[2] = <1>.

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<ipython-input-20-5d7a4b10734c> in <module>()
     37 
     38     # Dynamic decoding
---> 39     outputs, _ = tf.contrib.seq2seq.dynamic_decode(basic_decoder)

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py in dynamic_decode(decoder, output_time_major, impute_finished, maximum_iterations, parallel_iterations, swap_memory, scope)
    307         ],
    308         parallel_iterations=parallel_iterations,
--> 309         swap_memory=swap_memory)
    310 
    311     final_outputs_ta = res[1]

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name, maximum_iterations)
   3094         swap_memory=swap_memory)
   3095     ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, loop_context)
-> 3096     result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
   3097     if maximum_iterations is not None:
   3098       return result[1]

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py in BuildLoop(self, pred, body, loop_vars, shape_invariants)
   2872       self.Enter()
   2873       original_body_result, exit_vars = self._BuildLoop(
-> 2874           pred, body, original_loop_vars, loop_vars, shape_invariants)
   2875     finally:
   2876       self.Exit()

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\control_flow_ops.py in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
   2812         flat_sequence=vars_for_body_with_tensor_arrays)
   2813     pre_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION)  # pylint: disable=protected-access
-> 2814     body_result = body(*packed_vars_for_body)
   2815     post_summaries = ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION)  # pylint: disable=protected-access
   2816     if not nest.is_sequence(body_result):

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py in body(time, outputs_ta, state, inputs, finished, sequence_lengths)
    252       """
    253       (next_outputs, decoder_state, next_inputs,
--> 254        decoder_finished) = decoder.step(time, inputs, state)
    255       if decoder.tracks_own_finished:
    256         next_finished = decoder_finished

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\basic_decoder.py in step(self, time, inputs, state, name)
    136     """
    137     with ops.name_scope(name, "BasicDecoderStep", (time, inputs, state)):
--> 138       cell_outputs, cell_state = self._cell(inputs, state)
    139       if self._output_layer is not None:
    140         cell_outputs = self._output_layer(cell_outputs)

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py in __call__(self, inputs, state, scope, *args, **kwargs)
    294     # method.  See the class docstring for more details.
    295     return base_layer.Layer.__call__(self, inputs, state, scope=scope,
--> 296                                      *args, **kwargs)
    297 
    298 

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\layers\base.py in __call__(self, inputs, *args, **kwargs)
    694 
    695         if not in_deferred_mode:
--> 696           outputs = self.call(inputs, *args, **kwargs)
    697           if outputs is None:
    698             raise ValueError('A layer\'s `call` method should return a Tensor '

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py in call(self, inputs, state)
    575 
    576     gate_inputs = math_ops.matmul(
--> 577         array_ops.concat([inputs, h], 1), self._kernel)
    578     gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)
    579 

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\array_ops.py in concat(values, axis, name)
   1173               tensor_shape.scalar())
   1174       return identity(values[0], name=scope)
-> 1175   return gen_array_ops._concat_v2(values=values, axis=axis, name=name)
   1176 
   1177 

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\ops\gen_array_ops.py in _concat_v2(values, axis, name)
    775   if _ctx.in_graph_mode():
    776     _, _, _op = _op_def_lib._apply_op_helper(
--> 777         "ConcatV2", values=values, axis=axis, name=name)
    778     _result = _op.outputs[:]
    779     _inputs_flat = _op.inputs

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
    785         op = g.create_op(op_type_name, inputs, output_types, name=scope,
    786                          input_types=input_types, attrs=attr_protos,
--> 787                          op_def=op_def)
    788       return output_structure, op_def.is_stateful, op
    789 

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in create_op(self, op_type, inputs, dtypes, input_types, name, attrs, op_def, compute_shapes, compute_device)
   3271         op_def=op_def)
   3272     self._create_op_helper(ret, compute_shapes=compute_shapes,
-> 3273                            compute_device=compute_device)
   3274     return ret
   3275 

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in _create_op_helper(self, op, compute_shapes, compute_device)
   3311     # compute_shapes argument.
   3312     if op._c_op or compute_shapes:  # pylint: disable=protected-access
-> 3313       set_shapes_for_outputs(op)
   3314     # TODO(b/XXXX): move to Operation.__init__ once _USE_C_API flag is removed.
   3315     self._add_op(op)

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in set_shapes_for_outputs(op)
   2499     return _set_shapes_for_outputs_c_api(op)
   2500   else:
-> 2501     return _set_shapes_for_outputs(op)
   2502 
   2503 

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in _set_shapes_for_outputs(op)
   2472       shape_func = _call_cpp_shape_fn_and_require_op
   2473 
-> 2474   shapes = shape_func(op)
   2475   if shapes is None:
   2476     raise RuntimeError(

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\ops.py in call_with_requiring(op)
   2402 
   2403   def call_with_requiring(op):
-> 2404     return call_cpp_shape_fn(op, require_shape_fn=True)
   2405 
   2406   _call_cpp_shape_fn_and_require_op = call_with_requiring

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\common_shapes.py in call_cpp_shape_fn(op, require_shape_fn)
    625     res = _call_cpp_shape_fn_impl(op, input_tensors_needed,
    626                                   input_tensors_as_shapes_needed,
--> 627                                   require_shape_fn)
    628     if not isinstance(res, dict):
    629       # Handles the case where _call_cpp_shape_fn_impl calls unknown_shape(op).

c:\users\mntiv\anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\framework\common_shapes.py in _call_cpp_shape_fn_impl(op, input_tensors_needed, input_tensors_as_shapes_needed, require_shape_fn)
    689       missing_shape_fn = True
    690     else:
--> 691       raise ValueError(err.message)
    692 
    693   if missing_shape_fn:

ValueError: Dimension 0 in both shapes must be equal, but are 10 and 3. Shapes are [10] and [3]. for 'myrnn_14/decoder/while/BasicDecoderStep/basic_lstm_cell/concat' (op: 'ConcatV2') with input shapes: [10,8], [3,50], [] and with computed input tensors: input[2] = <1>.

正如你所看到的那样,维度问题我无法弄明白。我究竟做错了什么?

另外,我无法真正理解帮助者类的目的(这可能是我在这里犯的错误)。任何解释都表示赞赏。

1 个答案:

答案 0 :(得分:0)

我发现了类似的问题How to use tf.contrib.seq2seq.Helper for non-embedding data?并对我的代码进行了一些更改,似乎它正在运行。在提供培训数据时,不确定这是否可行。但到目前为止,这里的代码对我有用。这是在tensorflow 1.6.0中测试的。

import tensorflow as tf
import numpy as np
from tensorflow.python.layers import core as layers_core

input_seq_len = 10 # Sequence length as input
input_dim = 8 # Nb of features in input

output_seq_len = 10 # 
output_dim = 8 # nb of features in output

encoder_units = 50 # nb of units in each cell for the encoder
decoder_units = 50 # nb of units in each cell for the decoder

batch_size = 3

graph = tf.Graph()
with graph.as_default():
    learning_ = tf.placeholder(tf.float32)

    with tf.variable_scope('Seq2Seq'):

        # Placeholder for encoder input
        enc_input = \
        tf.placeholder(tf.float32, [None, input_seq_len, input_dim])

        # Placeholder for decoder output - Targets
        target = \
        tf.placeholder(tf.float32, [None, output_seq_len, output_dim])


        ### THE ENCODER

        # Build RNN cell
        encoder_cell = tf.nn.rnn_cell.BasicLSTMCell(encoder_units)

        initial_state = \
        encoder_cell.zero_state(batch_size, dtype=tf.float32)

        # Run Dynamic RNN
        #   encoder_outputs: [batch_size, seq_size, num_units]
        #   encoder_state: [batch_size, num_units]
        encoder_outputs, encoder_state = \
        tf.nn.dynamic_rnn(encoder_cell, enc_input, initial_state=initial_state)

        ### THE DECODER

        # Simple Dense layer to project from rnn_dim to the desired output_dim
        projection = \
        layers_core.Dense(output_dim, use_bias=True, name="output_projection")

        helper = \
        tf.contrib.seq2seq.TrainingHelper(target, sequence_length=[output_seq_len for _ in range(batch_size)])

        decoder_cell = tf.nn.rnn_cell.BasicLSTMCell(decoder_units)                        

        decoder = \
        tf.contrib.seq2seq.BasicDecoder(decoder_cell, initial_state=encoder_state, helper=helper, output_layer=projection)

        outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder=decoder)

        # Could be reduced into fewer lines
        diff = tf.square( outputs[0] - target )
        loss = 0.5*tf.reduce_sum( diff, -1 )
        loss = tf.reduce_mean(loss, 1)
        loss = tf.reduce_mean(loss)

        optimizer = tf.train.AdamOptimizer(learning_).minimize(loss)