我正在尝试基于本文实现卷积lstm网络:https://arxiv.org/abs/1506.04214, 我已经实现了这样的编码器:
def new_convLSTM_layer(input, # The previous layer.
num_input_channels, # Num. channels in prev. layer.
filter_size, # Width and height of each filter.
num_filters, # Number of filters.
img_size, #size of image (width or height)
hid_st, #Hidden state of previous level
Cell,
use_pooling): #Cell
# Shape of the filter-weights for the convolution.
shape = [filter_size, filter_size, num_input_channels, num_filters]
shape_2 = [filter_size, filter_size, num_filters, num_filters]
# Create new weights aka. filters with the given shape.
#filters or weights for input gate
W_xi = new_weights(shape=shape)
W_hi = new_weights(shape=shape_2)
#filters or weights for forget gate
W_xf = new_weights(shape=shape)
W_hf = new_weights(shape=shape_2)
#filters or weights for cell
W_xc = new_weights(shape=shape)
W_hc = new_weights(shape=shape_2)
#filters or weights for output gate
W_xo = new_weights(shape=shape)
W_ho = new_weights(shape=shape_2)
#weights from the cell to gate vectors
#these weights should be the size of input
#The weight matrices from the cell to gate vectors (e.g. Wci) are diagonal, so element m in each gate vector only
#receives input from element m of the cell vector.
#one for each filter
W_ci = new_weights_dia(num_filters=num_filters, img_size=img_size)
W_cf = new_weights_dia(num_filters=num_filters, img_size=img_size)
W_co = new_weights_dia(num_filters=num_filters, img_size=img_size)
# Create new biases, one for each filter.
biases_input = new_biases(length=num_filters)
biases_forget = new_biases(length=num_filters)
biases_cell = new_biases(length=num_filters)
biases_output = new_biases(length=num_filters)
#weights * x(t)
Wxi_x = tf.nn.conv2d(input=input,
filter=W_xi,
strides=[1, 1, 1, 1],
padding='SAME')
Wxf_x = tf.nn.conv2d(input=input,
filter=W_xf,
strides=[1, 1, 1, 1],
padding='SAME')
Wxc_x = tf.nn.conv2d(input=input,
filter=W_xc,
strides=[1, 1, 1, 1],
padding='SAME')
Wxo_x = tf.nn.conv2d(input=input,
filter=W_xo,
strides=[1, 1, 1, 1],
padding='SAME')
#weight * h(t-1)
Whi_h = tf.nn.conv2d(input=hid_st,
filter=W_hi,
strides=[1, 1, 1, 1],
padding='SAME')
Whf_h = tf.nn.conv2d(input=hid_st,
filter=W_hf,
strides=[1, 1, 1, 1],
padding='SAME')
Whc_h = tf.nn.conv2d(input=hid_st,
filter=W_hc,
strides=[1, 1, 1, 1],
padding='SAME')
Who_h = tf.nn.conv2d(input=hid_st,
filter=W_ho,
strides=[1, 1, 1, 1],
padding='SAME')
#Hadamard products
Wci_c = tf.multiply(W_ci,Cell)
Wcf_c = tf.multiply(W_cf,Cell)
#Gates
I = tf.sigmoid(Wxi_x + Whi_h + Wci_c + biases_input)
F = tf.sigmoid(Wxf_x + Whf_h + Wcf_c + biases_forget)
Cell = (tf.multiply(F, Cell) + tf.multiply(I, tf.tanh(Wxc_x + Whc_h + biases_cell)))
Wco_c = tf.multiply(W_co,Cell)
O = tf.sigmoid(Wxo_x + Who_h + Wco_c + biases_output)
hid_st = tf.multiply(O,tf.tanh(Cell))
if use_pooling:
hid_st = tf.nn.max_pool(value=hid_st,
ksize=[1, 2, 2, 1],
strides=[1, 1, 1, 1],
padding='SAME')
O = tf.nn.max_pool(value=O,
ksize=[1, 2, 2, 1],
strides=[1, 1, 1, 1],
padding='SAME')
layer = tf.nn.relu(hid_st)
layer_Output = tf.nn.relu(O)
return layer, layer_Output, Cell
对于解码器我所做的只是使用上面的函数并提供了一个零的张量和编码器的隐藏状态和单元状态(在读取最后一个输入之后),之后我将预测输出作为下一次输入。 但是当我在每个时间步骤绘制单元状态和解码器的隐藏状态时,我看到它没有学习并且具有与解码器的隐藏和单元状态相同的值。
如果有人告诉我哪里错了,我会非常感激! 这是我的解码器:def deconvLSTM(input, # Output of the previous layer.
num_input_channels, # Num. channels in prev. layer.
filter_size, # Width and height of each filter.
num_filters, # Number of filters.
img_size, #size of image (width or height)
hid_st, #Hidden state of previous level
Cell, # Cell-state of previous layer
use_pooling):
De_INPUT = input
De_OUTPUT = tf.zeros([0, 64, 64, 16])
De_Hidden = tf.zeros([0, 64, 64, 16])
De_CELL = tf.zeros([0, 64, 64, 16])
De_Hidden = tf.concat([De_Hidden, hid_st], 0)
De_CELL = tf.concat([De_CELL, Cell], 0)
for i in range(10):
deconv_Hidden_State_1, deconv_Output_1, deconv_Cell_1 = new_convLSTM_layer(input=De_INPUT, # The previous layer.
num_input_channels=num_input_channels, # Num. channels in prev. layer.
filter_size=filter_size, # Width and height of each filter.
num_filters=num_filters, # Number of filters.
img_size=img_size, #size of image (width or height)
hid_st=hid_st, #Hidden state of previous level
Cell=Cell,
use_pooling=True)
De_OUTPUT = tf.concat([De_OUTPUT, deconv_Output_1], 0)
De_Hidden = tf.concat([De_Hidden, deconv_Hidden_State_1], 0)
De_CELL = tf.concat([De_CELL, deconv_Cell_1], 0)
De_INPUT = deconv_Output_1
return De_OUTPUT, De_Hidden, De_CELL