我试图使用tensorflow来编码RDD编码器和解码器以及不同长度的序列输入,因此希望编码器和解码器都是动态的。另外,解码器输入由编码器最终隐藏状态(上下文矢量)决定,这类似于第3页中的Related Paper参见图片a。解码器试图在训练期间完全推断以前输出和上下文矢量作为每一步的输入。
import tensorflow as tf
import copy
import math
from tensorflow.python.layers.core import Dense
class RNNEncoder_Decoder(object):
def __init__(self,input_dim,
context_dim,output_dim,hidden_dim,
layers_stacked_count,learning_rate):
self.graph = tf.get_default_graph()
self.input_dim = input_dim
self.output_dim = output_dim
self.context_dim = context_dim
self.hidden_dim = hidden_dim
self.layers_stacked_count = layers_stacked_count
self.learning_rate = learning_rate
self.sampling_probability = tf.constant(dtype=tf.float32,value=1.0)
# [batch_size,sequence_length,input_dimension]
self.enc_inp = tf.placeholder(tf.float32, [None,None,self.input_dim], name='encoder_inputs')
self.expected_out = tf.placeholder(tf.float32, [None,None,self.input_dim], name='expected_outs')
# fullly inference during trianing
self.dec_inp = tf.zeros_like(self.expected_out,dtype=tf.float32,name='decoder_inputs')
seq_length = tf.reduce_sum(tf.sign(tf.reduce_max(tf.abs(self.enc_inp), 2)), 1)
self.seq_length = tf.cast(seq_length, tf.int32)
with tf.variable_scope('RNNEncoderDecoder'):
with tf.variable_scope("Enocder") as encoder_varscope:
# create encoder LSTM cell
encoder_cells = []
for i in range(self.layers_stacked_count):
with tf.variable_scope('EncoderCell_{}'.format(i)):
encoder_cells.append(tf.nn.rnn_cell.LSTMCell(self.hidden_dim,
use_peepholes=True))
self.encoder_cell = tf.nn.rnn_cell.MultiRNNCell(encoder_cells)
# ruuning dynamic rnn encoder
_, enc_state = tf.nn.dynamic_rnn(cell = self.encoder_cell,
initial_state=None,
dtype=tf.float32,
inputs = self.enc_inp,
sequence_length = self.seq_length
)
# extract top layer hidden state as feature representation
self.context_vector = enc_state[-1].h
cell_state0 = tf.zeros_like(enc_state[0].c,dtype=tf.float32)
hidden_state0 = tf.zeros_like(enc_state[0].h,dtype=tf.float32)
dec_init_state = (enc_state[1], # pass the top layer state of enocder to the bottom layer of decoder
tf.nn.rnn_cell.LSTMStateTuple(cell_state0, hidden_state0))
# condition extracted features on decoder inputs
# with a shape that matches decoder inputs in all but (potentially) the final dimension.
# tile context vector from [batch_size,context_dim] to [batch_size,decoder_sequence_length,context_dim]
context_vector_shape = tf.shape(self.context_vector)
context_vector_reshaped = tf.reshape(self.context_vector,
[context_vector_shape[0], 1, context_vector_shape[1]]
)
enc_inp_shape = tf.shape(self.enc_inp)
self.auxiliary_inputs = tf.tile(context_vector_reshaped,
multiples=[1,enc_inp_shape[1],1]
)
with tf.variable_scope("Deocder") as decoder_varscope:
# create decoder LSTM cell
decoder_cells = []
for i in range(self.layers_stacked_count):
with tf.variable_scope('DecoderCell_{}'.format(i)):
decoder_cells.append(tf.nn.rnn_cell.LSTMCell(self.hidden_dim,
use_peepholes=True))
self.decoder_cell = tf.nn.rnn_cell.MultiRNNCell(decoder_cells)
dec_out_dense = Dense(units = self.output_dim,
activation = None,
use_bias = False,
kernel_initializer = tf.truncated_normal_initializer(
dtype=tf.float32,
stddev = 1.0 / math.sqrt(float(self.hidden_dim))
),
name = 'dec_outp_linear_projection'
)
training_helper = tf.contrib.seq2seq.ScheduledOutputTrainingHelper(
inputs = self.dec_inp,
sequence_length = self.seq_length,
auxiliary_inputs = self.auxiliary_inputs, # condtional on inputs
sampling_probability = 1.0, # for fullly inference
name = 'feeding_conditional_input'
)
decoder = tf.contrib.seq2seq.BasicDecoder(
cell = self.decoder_cell,
helper = training_helper,
initial_state = dec_init_state,
output_layer = dec_out_dense
)
outputs, _ , final_seq_lengths = tf.contrib.seq2seq.dynamic_decode(decoder=decoder,
impute_finished = True
)
self.outputs = outputs
### optimize loss part
def get_decoder_prediction(self,X,session):
feed_dict = {
self.enc_inp:X
}
feed_dict.update({self.expected_out:X})
run = [self.outputs]
return session.run(run,feed_dict=feed_dict)
context_dim = 32
output_dim = input_dim = 1
hidden_dim = 32
layers_stacked_count = 2
learning_rate = 0.01
test = RNNEncoder_Decoder(input_dim=input_dim,
context_dim=context_dim,
output_dim=output_dim,
hidden_dim=hidden_dim,
layers_stacked_count=layers_stacked_count,
learning_rate=learning_rate
)
没有" auxiliary_inputs = self.auxiliary_inputs",它成功运行,
但是对于auxiliary_inputs = self.auxiliary_inputs,我收到了以下错误:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-3-02522a01f0d8> in <module>()
9 hidden_dim=hidden_dim,
10 layers_stacked_count=layers_stacked_count,
---> 11 learning_rate=learning_rate
12 )
<ipython-input-2-86494b8d99fa> in __init__(self, input_dim, context_dim, output_dim, hidden_dim, layers_stacked_count, learning_rate)
98
99 outputs, _ , final_seq_lengths = tf.contrib.seq2seq.dynamic_decode(decoder=decoder,
--> 100 impute_finished = True
101 )
102 self.outputs = outputs
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py in dynamic_decode(decoder, output_time_major, impute_finished, maximum_iterations, parallel_iterations, swap_memory, scope)
284 ],
285 parallel_iterations=parallel_iterations,
--> 286 swap_memory=swap_memory)
287
288 final_outputs_ta = res[1]
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in while_loop(cond, body, loop_vars, shape_invariants, parallel_iterations, back_prop, swap_memory, name)
2773 context = WhileContext(parallel_iterations, back_prop, swap_memory, name)
2774 ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, context)
-> 2775 result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
2776 return result
2777
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in BuildLoop(self, pred, body, loop_vars, shape_invariants)
2602 self.Enter()
2603 original_body_result, exit_vars = self._BuildLoop(
-> 2604 pred, body, original_loop_vars, loop_vars, shape_invariants)
2605 finally:
2606 self.Exit()
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py in _BuildLoop(self, pred, body, original_loop_vars, loop_vars, shape_invariants)
2552 structure=original_loop_vars,
2553 flat_sequence=vars_for_body_with_tensor_arrays)
-> 2554 body_result = body(*packed_vars_for_body)
2555 if not nest.is_sequence(body_result):
2556 body_result = [body_result]
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/decoder.py in body(time, outputs_ta, state, inputs, finished, sequence_lengths)
232 """
233 (next_outputs, decoder_state, next_inputs,
--> 234 decoder_finished) = decoder.step(time, inputs, state)
235 next_finished = math_ops.logical_or(decoder_finished, finished)
236 if maximum_iterations is not None:
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py in step(self, time, inputs, state, name)
137 """
138 with ops.name_scope(name, "BasicDecoderStep", (time, inputs, state)):
--> 139 cell_outputs, cell_state = self._cell(inputs, state)
140 if self._output_layer is not None:
141 cell_outputs = self._output_layer(cell_outputs)
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope)
178 with vs.variable_scope(vs.get_variable_scope(),
179 custom_getter=self._rnn_get_variable):
--> 180 return super(RNNCell, self).__call__(inputs, state)
181
182 def _rnn_get_variable(self, getter, *args, **kwargs):
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs)
448 # Check input assumptions set after layer building, e.g. input shape.
449 self._assert_input_compatibility(inputs)
--> 450 outputs = self.call(inputs, *args, **kwargs)
451
452 # Apply activity regularization.
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state)
936 [-1, cell.state_size])
937 cur_state_pos += cell.state_size
--> 938 cur_inp, new_state = cell(cur_inp, cur_state)
939 new_states.append(new_state)
940
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in __call__(self, inputs, state, scope)
178 with vs.variable_scope(vs.get_variable_scope(),
179 custom_getter=self._rnn_get_variable):
--> 180 return super(RNNCell, self).__call__(inputs, state)
181
182 def _rnn_get_variable(self, getter, *args, **kwargs):
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/layers/base.py in __call__(self, inputs, *args, **kwargs)
448 # Check input assumptions set after layer building, e.g. input shape.
449 self._assert_input_compatibility(inputs)
--> 450 outputs = self.call(inputs, *args, **kwargs)
451
452 # Apply activity regularization.
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py in call(self, inputs, state)
554 input_size = inputs.get_shape().with_rank(2)[1]
555 if input_size.value is None:
--> 556 raise ValueError("Could not infer input size from inputs.get_shape()[-1]")
557 scope = vs.get_variable_scope()
558 with vs.variable_scope(scope, initializer=self._initializer) as unit_scope:
ValueError: Could not infer input size from inputs.get_shape()[-1]
我刚刚开始使用tensforflow,所以任何人都可以帮助我: 这是解决解码器输入上编码器的最后隐藏状态的正确方法吗? 以及在将auxiliary_inputs作为错误提供后,解码器输入为什么变为无?
答案 0 :(得分:0)
找到我犯的错误:
使用&#34; context_vector_shape&#34;定义auxiliary_inputs的形状张量将导致所有维度大小(?,?,?),这导致&#34; ValueError:无法从inputs.get_shape()[ - 1]&#34推断输入大小;,
直接定义auxiliary_inputs张量的形状为(?,?,context_dim)将解决这个问题。