使用Tensorflow Seq2Seq本机API(v1.3)中的注意机制

时间:2017-08-23 14:40:38

标签: python machine-learning tensorflow artificial-intelligence

我已经在Tensorflow(版本1.3)和Seq2Seq模型上工作了一段时间,我有一个工作的Seq2Seq模型,但是当我尝试添加注意它时我出错了错误:

    ---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-118-a6d1c9e78e5d> in <module>()
     26                                                    rnn_size,
     27                                                    num_layers,
---> 28                                                    target_vocab_to_int)
     29 
     30 

<ipython-input-116-41a4ee1f8ac2> in seq2seq_model(input_data, target_data, keep_prob, batch_size, source_sequence_length, target_sequence_length, max_target_sentence_length, source_vocab_size, target_vocab_size, enc_embedding_size, dec_embedding_size, rnn_size, num_layers, target_vocab_to_int)
     32                    2*rnn_size,
     33                    num_layers, target_vocab_to_int, target_vocab_size,
---> 34                    batch_size, keep_prob, dec_embedding_size , enc_out)
     35 
     36     return training_decoder_output, inference_decoder_output

<ipython-input-115-3a03827107f3> in decoding_layer(dec_input, encoder_state, target_sequence_length, max_target_sequence_length, rnn_size, num_layers, target_vocab_to_int, target_vocab_size, batch_size, keep_prob, decoding_embedding_size, encoder_outputs)
     55     with tf.variable_scope("decode"):
     56         train_decoder_out = decoding_layer_train(new_state, attn_cell, dec_embed_input, 
---> 57                          target_sequence_length, max_target_sequence_length, output_layer, keep_prob)
     58 
     59     with tf.variable_scope("decode", reuse=True):

<ipython-input-12-484f2d84b18e> in decoding_layer_train(encoder_state, dec_cell, dec_embed_input, target_sequence_length, max_summary_length, output_layer, keep_prob)
     23     training_decoder_output = tf.contrib.seq2seq.dynamic_decode(training_decoder,
     24                                                                 impute_finished=True,
---> 25                                                                 maximum_iterations=max_summary_length)[0]
     26     return training_decoder_output
     27 

/Users/alsulaimi/Documents/AI/TensorFlow/workSpace/lib/python2.7/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.pyc in dynamic_decode(decoder, output_time_major, impute_finished, maximum_iterations, parallel_iterations, swap_memory, scope)
    284         ],
    285         parallel_iterations=parallel_iterations,
--> 286         swap_memory=swap_memory)
    287 
    288     final_outputs_ta = res[1]

/Users/alsulaimi/Documents/AI/TensorFlow/workSpace/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.pyc in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name)
   2773     context = WhileContext(parallel_iterations, back_prop, swap_memory, name)
   2774     ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, context)
-> 2775     result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
   2776     return result
   2777 

/Users/alsulaimi/Documents/AI/TensorFlow/workSpace/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.pyc in BuildLoop(self, pred, body, loop_vars, shape_invariants)
   2602       self.Enter()
   2603       original_body_result, exit_vars = self._BuildLoop(
-> 2604           pred, body, original_loop_vars, loop_vars, shape_invariants)
   2605     finally:
   2606       self.Exit()

/Users/alsulaimi/Documents/AI/TensorFlow/workSpace/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.pyc in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
   2493     # Let the context know the loop variables so the loop variables
   2494     # would be added in the outer contexts properly.
-> 2495     self._InitializeValues(loop_vars)
   2496     real_vars = loop_vars
   2497     if self._outer_context:

/Users/alsulaimi/Documents/AI/TensorFlow/workSpace/lib/python2.7/site-packages/tensorflow/python/ops/control_flow_ops.pyc in _InitializeValues(self, values)
   2475         self._values.add(x.name)
   2476       else:
-> 2477         self._values.add(x.values.name)
   2478         self._values.add(x.indices.name)
   2479         if isinstance(x, ops.IndexedSlices):

AttributeError: 'NoneType' object has no attribute 'values'.

我试图谷歌修复,但我没有找到任何解决方案,在BeamSearchDecoder上发现类似的问题,但它似乎是一个错误( here)

如果我在代码中做错了,或者我只是踩了一个错误,我就不会放弃,下面是我的代码的相关部分:

def decoding_layer(dec_input, encoder_state,
                   target_sequence_length, max_target_sequence_length,
                   rnn_size,
                   num_layers, target_vocab_to_int, target_vocab_size,
                   batch_size, keep_prob, decoding_embedding_size , encoder_outputs):
    """
    Create decoding layer
    :param dec_input: Decoder input
    :param encoder_state: Encoder state
    :param target_sequence_length: The lengths of each sequence in the target batch
    :param max_target_sequence_length: Maximum length of target sequences
    :param rnn_size: RNN Size
    :param num_layers: Number of layers
    :param target_vocab_to_int: Dictionary to go from the target words to an id
    :param target_vocab_size: Size of target vocabulary
    :param batch_size: The size of the batch
    :param keep_prob: Dropout keep probability
    :param decoding_embedding_size: Decoding embedding size
    :return: Tuple of (Training BasicDecoderOutput, Inference BasicDecoderOutput)
    """
    # 1. Decoder Embedding
    dec_embeddings = tf.Variable(tf.random_uniform([target_vocab_size, decoding_embedding_size]))
    dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, dec_input)

    # 2. Construct the decoder cell
    def create_cell(rnn_size):
        lstm_cell = tf.contrib.rnn.LSTMCell(rnn_size,
                                            initializer=tf.random_uniform_initializer(-0.1,0.1,seed=2))
        drop = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)
        return drop


    dec_cell = tf.contrib.rnn.MultiRNNCell([create_cell(rnn_size) for _ in range(num_layers)])
    #dec_cell = tf.contrib.rnn.MultiRNNCell(cells_a)  

    #attention details 
        attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(num_units=rnn_size, memory=encoder_outputs) 

attn_cell = tf.contrib.seq2seq.AttentionWrapper(dec_cell, attention_mechanism , attention_layer_size=rnn_size/2)

attn_zero = attn_cell.zero_state(batch_size , tf.float32 )

attn_zero = attn_zero.clone(cell_state = encoder_state)

new_state = tf.contrib.seq2seq.AttentionWrapperState(cell_state = encoder_state, attention = attn_zero  , time = 0 ,alignments=None , alignment_history=())

"""out_cell = tf.contrib.rnn.OutputProjectionWrapper(
            attn_cell, target_vocab_size, reuse=True
        )"""

    #end of attention 

    output_layer = Dense(target_vocab_size,
                         kernel_initializer = tf.truncated_normal_initializer(mean = 0.0, stddev=0.1))

    with tf.variable_scope("decode"):
        train_decoder_out = decoding_layer_train(new_state, attn_cell, dec_embed_input, 
                         target_sequence_length, max_target_sequence_length, output_layer, keep_prob)

    with tf.variable_scope("decode", reuse=True):
        infer_decoder_out = decoding_layer_infer(new_state, attn_cell, dec_embeddings, 
                             target_vocab_to_int['<GO>'], target_vocab_to_int['<EOS>'], max_target_sequence_length, 
                             target_vocab_size, output_layer, batch_size, keep_prob)

    return (train_decoder_out, infer_decoder_out)

"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
#tests.test_decoding_layer(decoding_layer)



def decoding_layer_train(encoder_state, dec_cell, dec_embed_input, 
                         target_sequence_length, max_summary_length, 
                         output_layer, keep_prob):
    """
    Create a decoding layer for training
    :param encoder_state: Encoder State
    :param dec_cell: Decoder RNN Cell
    :param dec_embed_input: Decoder embedded input
    :param target_sequence_length: The lengths of each sequence in the target batch
    :param max_summary_length: The length of the longest sequence in the batch
    :param output_layer: Function to apply the output layer
    :param keep_prob: Dropout keep probability
    :return: BasicDecoderOutput containing training logits and sample_id
    """

    training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=dec_embed_input,
                                                        sequence_length=target_sequence_length,
                                                        time_major=False)

    training_decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell, training_helper, encoder_state, output_layer)

    training_decoder_output = tf.contrib.seq2seq.dynamic_decode(training_decoder,
                                                                impute_finished=True,
                                                                maximum_iterations=max_summary_length)[0]
    return training_decoder_output



"""
DON'T MODIFY ANYTHING IN THIS CELL THAT IS BELOW THIS LINE
"""
#tests.test_decoding_layer_train(decoding_layer_train)

我非常感谢你的帮助,谢谢你的推荐

1 个答案:

答案 0 :(得分:0)

好的,所以我做错了,原来问题是在函数decode_layer中我不应该显式创建AttentionWrapperState,下面是更新后的代码:

def decoding_layer(dec_input, encoder_state,
               target_sequence_length, max_target_sequence_length,
               rnn_size,
               num_layers, target_vocab_to_int, target_vocab_size,
               batch_size, keep_prob, decoding_embedding_size , encoder_outputs):
"""
Create decoding layer
:param dec_input: Decoder input
:param encoder_state: Encoder state
:param target_sequence_length: The lengths of each sequence in the target batch
:param max_target_sequence_length: Maximum length of target sequences
:param rnn_size: RNN Size
:param num_layers: Number of layers
:param target_vocab_to_int: Dictionary to go from the target words to an id
:param target_vocab_size: Size of target vocabulary
:param batch_size: The size of the batch
:param keep_prob: Dropout keep probability
:param decoding_embedding_size: Decoding embedding size
:return: Tuple of (Training BasicDecoderOutput, Inference BasicDecoderOutput)
"""
# 1. Decoder Embedding
dec_embeddings = tf.Variable(tf.random_uniform([target_vocab_size, decoding_embedding_size]))
dec_embed_input = tf.nn.embedding_lookup(dec_embeddings, dec_input)

# 2. Construct the decoder cell
def create_cell(rnn_size):
    lstm_cell = tf.contrib.rnn.LSTMCell(rnn_size,
                                        initializer=tf.random_uniform_initializer(-0.1,0.1,seed=2))
    drop = tf.contrib.rnn.DropoutWrapper(lstm_cell, output_keep_prob=keep_prob)
    return drop


dec_cell = tf.contrib.rnn.MultiRNNCell([create_cell(rnn_size) for _ in range(num_layers)])
#dec_cell = tf.contrib.rnn.MultiRNNCell(cells_a)  

#attention details 
attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(num_units=rnn_size, memory=encoder_outputs) 

attn_cell = tf.contrib.seq2seq.AttentionWrapper(dec_cell, attention_mechanism , attention_layer_size=rnn_size/2)

attn_zero = attn_cell.zero_state(batch_size , tf.float32 )

attn_zero = attn_zero.clone(cell_state = encoder_state)

#new_state = tf.contrib.seq2seq.AttentionWrapperState(cell_state = encoder_state, attention = attn_zero  , time = 0 ,alignments=None , alignment_history=())

"""out_cell = tf.contrib.rnn.OutputProjectionWrapper(
            attn_cell, target_vocab_size, reuse=True
        )"""
#end of attention 
#tensor_util.make_tensor_proto(attn_cell)
output_layer = Dense(target_vocab_size,
                     kernel_initializer = tf.truncated_normal_initializer(mean = 0.0, stddev=0.1))

with tf.variable_scope("decode"):
    train_decoder_out = decoding_layer_train(attn_zero, attn_cell, dec_embed_input, 
                     target_sequence_length, max_target_sequence_length, output_layer, keep_prob)

with tf.variable_scope("decode", reuse=True):
    infer_decoder_out = decoding_layer_infer(attn_zero, attn_cell, dec_embeddings, 
                         target_vocab_to_int['<GO>'], target_vocab_to_int['<EOS>'], max_target_sequence_length, 
                         target_vocab_size, output_layer, batch_size, keep_prob)

return (train_decoder_out, infer_decoder_out)

我希望这有助于其他人

谢谢