Question

我使用Tensorflow ConvLSTMCell（），tf.nn.dynamic_rnn（）和tf.contrib.legacy_seq2seq.rnn_decoder（）构建了一个Convolutional LSTM模型。我有3层编码器和3层解码器，解码器的初始状态来自编码器的最终状态。我有分别用于第1层，第2层和第3层的128,64和64个过滤器。最后，我连接解码器的输出并通过卷积层传递它们以将通道数减少到1。然后我应用了损失函数。我的数据集是移动mnist数据集。在移动mnist数据集中，每个序列有20帧，通过这个模型，我试图根据前10帧预测帧11到20。但是10帧序列的输出远非基本事实，并且基本上尝试再现最后一个输入帧，即第10帧。我把代码放在这里，谢谢你的帮助。

import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot as plt
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix
import time
from datetime import timedelta
import math
import random
from random import getrandbits
from tensorflow.contrib.rnn.python.ops.rnn_cell import ConvLSTMCell
from tensorflow.python.ops.rnn_cell import LSTMStateTuple

tf.reset_default_graph()


# cell = ConvLSTMCell()


num_channels = 1
img_size = 64

#filter sizes
filter_size1 = 5
filter_size2 = 5
filter_size3 = 5

#number of filters in each layer
num_filters1 = 128
num_filters2 = 64
num_filters3 = 64


img_size_flat = img_size * img_size


y = tf.placeholder(tf.float32, shape=[None, img_size_flat], name='y')
y_image = tf.reshape(y, [-1, img_size, img_size, num_channels], name='y_image')

z = tf.placeholder(tf.float32, shape=[None, img_size_flat], name='z')
z_image = tf.reshape(z, [-1, img_size, img_size, num_channels], name='z_image')

x = tf.placeholder(tf.float32, shape=[None,None,img_size,img_size,num_channels],
                   name='x')

with tf.variable_scope("Encoder"):
    with tf.variable_scope("Encoder_Layer1"):
        InputShape = [img_size, img_size, num_channels]
        encoder_1_KernelShape = [filter_size1, filter_size1]
        rnn_cell = ConvLSTMCell(2, InputShape, num_filters1, encoder_1_KernelShape,
                        use_bias=True, forget_bias=1.0, name='Encoder_1')
        # defining initial state
        #initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32)
        initial_state = rnn_cell.zero_state(1, dtype=tf.float32)
        encoder_1_outputs, encoder_1_state = tf.nn.dynamic_rnn(rnn_cell, x,
                                                       initial_state=initial_state,
                                                       dtype=tf.float32)
    with tf.variable_scope("Encoder_Layer2"):
        Encoder_2_InputShape = [img_size, img_size, num_filters1]
        encoder_2_KernelShape = [filter_size2, filter_size2]
        encoder_2_cell = ConvLSTMCell(2, Encoder_2_InputShape, num_filters2, encoder_2_KernelShape,
                        use_bias=True, forget_bias=1.0, name='Encoder_2')
        initial_state_2 = encoder_2_cell.zero_state(1, dtype=tf.float32)
        encoder_2_outputs, encoder_2_state = tf.nn.dynamic_rnn(encoder_2_cell, encoder_1_outputs,
                                                       initial_state=initial_state_2,
                                                       dtype=tf.float32)
    with tf.variable_scope("Encoder_Layer3"):
        Encoder_3_InputShape = [img_size, img_size, num_filters2]
        encoder_3_KernelShape = [filter_size3, filter_size3]
        encoder_3_cell = ConvLSTMCell(2, Encoder_3_InputShape, num_filters3, encoder_3_KernelShape,
                        use_bias=True, forget_bias=1.0, name='Encoder_3')
        initial_state_3 = encoder_3_cell.zero_state(1, dtype=tf.float32)
        encoder_3_outputs, encoder_3_state = tf.nn.dynamic_rnn(encoder_3_cell, encoder_2_outputs,
                                                       initial_state=initial_state_3,
                                                       dtype=tf.float32)




#Weights function
def new_weights(shape, name):
    return tf.get_variable(name, shape, initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.05))

#Convolution function
def conv_layer(input,              # The previous layer.
               num_input_channels, # Num. channels in prev. layer.
               filter_size,        # Width and height of each filter.
               num_filters):        # Number of filters.
    #with tf.variable_scope("ConvLayer") as Conv_Layer:
    filter_shape = [filter_size, filter_size, num_input_channels, num_filters]
    w = new_weights(shape=filter_shape, name='ConvLayer_Weights')
    conv_output = tf.nn.conv2d(input=input,
                               filter=w,
                               strides=[1, 1, 1, 1],
                               padding='SAME')
    #relu_output = tf.nn.relu(conv_output)

    return conv_output

#Loss function
def loss(prediction, label):
    #with tf.variable_scope("Loss") as Loss_scope:
    log_pred = tf.log(tf.clip_by_value((prediction),1e-10,1.0), name='Prediction_Log')
    log_pred_2 = tf.log(tf.clip_by_value((1-prediction),1e-10,1.0), name='1-Prediction_Log')
    cross_entropy = -tf.multiply(label, log_pred) - tf.multiply((1-label), log_pred_2) 

    return cross_entropy


# In[ ]:


labels = tf.reshape(y_image, [1, 10, 64, 64, 1])

w = tf.get_variable(name = "decoder_1_weights", shape =[10, 5, 5, 1, num_filters1], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.05))
true_label = tf.nn.conv3d(input=labels,
           filter=w,
           strides=[1, 1, 1, 1, 1],
           padding='SAME')
true_label = tf.reshape(true_label, [1, 10, 64, 64, num_filters1])
true_label = tf.unstack(true_label, num = 10, axis = 1)


# In[ ]:


START = np.zeros((((1, 10, 64, 64, num_filters1))))
START = np.float32(START)
GO = tf.unstack(START, num = 10, axis = 1)


# In[ ]:


def loop_fn(previous_output, time):
    if previous_output is None:    # time == 0
        START = tf.placeholder(tf.float32, shape=[None,1, img_size, img_size, 1], name='START')
        return START
    else:
        return previous_output


# In[ ]:


#loop function for the first decoder in the training phase, we are randomly feeding the ground truth   
def loop_fn_train_1(previous_output, time):
    if previous_output is None:    # time == 0
        START = tf.placeholder(tf.float32, shape=[None,1, img_size, img_size, 1], name='START')
        return START
    else:
        if(bool(random.getrandbits(1))):
            return previous_output
        else:
            return true_label[time]




with tf.variable_scope("Decoder"):
    with tf.variable_scope("Decoder_Layer1"):
        decoder_1_InputShape = [img_size,img_size, num_filters1]
        decoder_1_KernelShape = [filter_size1,filter_size1]
        decoder_1_rnn_cell = ConvLSTMCell(2, decoder_1_InputShape, num_filters1, decoder_1_KernelShape, 
                                  use_bias=True, forget_bias=1.0, name='Decoder_1')

        decoder_1_outputs, decoder_1_states = tf.contrib.legacy_seq2seq.rnn_decoder(true_label, encoder_1_state, 
                                                                decoder_1_rnn_cell, loop_fn_train_1)

    with tf.variable_scope("Decoder_Layer2"):
        decoder_2_InputShape = [img_size,img_size, num_filters2]
        decoder_2_KernelShape = [filter_size2,filter_size2]
        decoder_2_rnn_cell = ConvLSTMCell(2, decoder_2_InputShape, num_filters2, decoder_2_KernelShape, 
                                  use_bias=True, forget_bias=1.0, name='Decoder_2')

        w = tf.get_variable(name = "decoder_2_weights", shape =[10, 5, 5, num_filters1, num_filters2], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.05))
        decoder_2_inputs = tf.nn.conv3d(input=decoder_1_outputs,
                   filter=w,
                   strides=[1, 1, 1, 1, 1],
                   padding='SAME')
        decoder_2_inputs = tf.reshape(decoder_2_inputs, [1, 10, 64, 64, num_filters2])
        decoder_2_inputs = tf.unstack(decoder_2_inputs, num = 10, axis = 1)



        #loop function for the second decoder in the training phase, we are randomly feeding the ground truth   
        def loop_fn_train_2(previous_output, time):
            if previous_output is None:    # time == 0
                START = tf.placeholder(tf.float32, shape=[None,1, img_size, img_size, 1], name='START')
                return START
            else:
                if(bool(random.getrandbits(1))):
                    return previous_output
                else:
                    return decoder_2_inputs[time]



        decoder_2_outputs, decoder_2_states = tf.contrib.legacy_seq2seq.rnn_decoder(decoder_2_inputs, encoder_2_state, 
                                                                            decoder_2_rnn_cell, loop_fn_train_2)

    with tf.variable_scope("Decoder_Layer3"):
        decoder_3_InputShape = [img_size,img_size, num_filters3]
        decoder_3_KernelShape = [filter_size3,filter_size3]
        decoder_3_rnn_cell = ConvLSTMCell(2, decoder_3_InputShape, num_filters3, decoder_3_KernelShape, 
                                  use_bias=True, forget_bias=1.0, name='Decoder_3')

        w = tf.get_variable(name = "decoder_3_weights", shape =[10, 5, 5, num_filters2, num_filters3], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.05))
        decoder_3_inputs = tf.nn.conv3d(input=decoder_2_outputs,
                           filter=w,
                           strides=[1, 1, 1, 1, 1],
                           padding='SAME')

        decoder_3_inputs = tf.reshape(decoder_3_inputs, [1, 10, 64, 64, num_filters3])
        decoder_3_inputs = tf.unstack(decoder_3_inputs, num = 10, axis = 1)



        #loop function for the second decoder in the training phase, we are randomly feeding the ground truth   
        def loop_fn_train_3(previous_output, time):
            if previous_output is None:    # time == 0
                START = tf.placeholder(tf.float32, shape=[None,1, img_size, img_size, 1], name='START')
                return START
            else:
                if(bool(random.getrandbits(1))):
                    return previous_output
                else:
                    return decoder_3_inputs[time]



        decoder_3_outputs, decoder_3_states = tf.contrib.legacy_seq2seq.rnn_decoder(decoder_3_inputs, encoder_3_state, 
                                                                decoder_3_rnn_cell, loop_fn_train_3)




# In[ ]:


with tf.variable_scope("Decoder", reuse=True):
    with tf.variable_scope("Decoder_Layer1"):
        decoder_1_InputShape = [img_size,img_size, num_filters1]
        decoder_1_KernelShape = [filter_size1,filter_size1]
        decoder_1_rnn_cell = ConvLSTMCell(2, decoder_1_InputShape, num_filters1, decoder_1_KernelShape, 
                                  use_bias=True, forget_bias=1.0, name='Decoder_1')
        Test_decoder_1_outputs, Test_decoder_1_states = tf.contrib.legacy_seq2seq.rnn_decoder(GO, encoder_1_state, 
                                                                decoder_1_rnn_cell, loop_fn)

    with tf.variable_scope("Decoder_Layer2"):
        decoder_2_InputShape = [img_size,img_size, num_filters2]
        decoder_2_KernelShape = [filter_size2,filter_size2]
        decoder_2_rnn_cell = ConvLSTMCell(2, decoder_2_InputShape, num_filters2, decoder_2_KernelShape, 
                                  use_bias=True, forget_bias=1.0, name='Decoder_2')

        w = tf.get_variable(name = "decoder_2_weights", shape =[10, 5, 5, num_filters1, num_filters2], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.05))
        decoder_2_inputs = tf.nn.conv3d(input=Test_decoder_1_outputs,
                   filter=w,
                   strides=[1, 1, 1, 1, 1],
                   padding='SAME')
        decoder_2_inputs = tf.reshape(decoder_2_inputs, [1, 10, 64, 64, num_filters2])
        decoder_2_inputs = tf.unstack(decoder_2_inputs, num = 10, axis = 1)

        Test_decoder_2_outputs, Test_decoder_2_states = tf.contrib.legacy_seq2seq.rnn_decoder(decoder_2_inputs, encoder_2_state, 
                                                                            decoder_2_rnn_cell, 
                                                                            loop_fn)

    with tf.variable_scope("Decoder_Layer3"):
        decoder_3_InputShape = [img_size,img_size, num_filters3]
        decoder_3_KernelShape = [filter_size3,filter_size3]
        decoder_3_rnn_cell = ConvLSTMCell(2, decoder_3_InputShape, num_filters3, decoder_3_KernelShape, 
                                  use_bias=True, forget_bias=1.0, name='Decoder_3')

        w = tf.get_variable(name = "decoder_3_weights", shape =[10, 5, 5, num_filters2, num_filters3], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.05))
        decoder_3_inputs = tf.nn.conv3d(input=Test_decoder_2_outputs,
                           filter=w,
                           strides=[1, 1, 1, 1, 1],
                           padding='SAME')

        decoder_3_inputs = tf.reshape(decoder_3_inputs, [1, 10, 64, 64, num_filters3])
        decoder_3_inputs = tf.unstack(decoder_3_inputs, num = 10, axis = 1)

        Test_decoder_3_outputs, Test_decoder_3_states = tf.contrib.legacy_seq2seq.rnn_decoder(decoder_3_inputs, encoder_3_state, 
                                                                decoder_3_rnn_cell, loop_fn)





Conv_inputs = tf.concat([decoder_1_outputs,decoder_2_outputs, decoder_3_outputs], 4)
Conv_inputs = tf.reshape(Conv_inputs, [10, 64, 64, num_filters1 + num_filters2 + num_filters3])


# In[ ]:


Test_Conv_inputs = tf.concat([Test_decoder_1_outputs, Test_decoder_2_outputs, Test_decoder_3_outputs], 4)
Test_Conv_inputs = tf.reshape(Test_Conv_inputs, [10, 64, 64, num_filters1 + num_filters2 + num_filters3])


# In[ ]:


with tf.variable_scope("ConvLayer"):
    with tf.variable_scope("ConvLayer_Pred"):
        pred_1 = conv_layer(input=Conv_inputs,              # The previous layer.
                          num_input_channels=num_filters1 + num_filters2 + num_filters3, # Num. channels in prev. layer.
                          filter_size=1,        # Width and height of each filter.
                          num_filters=1)  


# In[ ]:


with tf.variable_scope("ConvLayer", reuse=True):
    with tf.variable_scope("ConvLayer_Pred"):
        Test_pred_1 = conv_layer(input=Test_Conv_inputs,              # The previous layer.
                          num_input_channels=num_filters1 + num_filters2 + num_filters3, # Num. channels in prev. layer.
                          filter_size=1,        # Width and height of each filter.
                          num_filters=1)  





with tf.variable_scope("Training_Loss"):
    with tf.variable_scope("Loss_Pred"):
        Pdistance = loss(prediction=pred_1, label=y_image)
        #cost = tf.reduce_sum(distance)

with tf.variable_scope("Training_Loss", reuse=True):
    with tf.variable_scope("Loss_Pred"):
        Test_Pdistance = loss(prediction=Test_pred_1, label=y_image)
        #cost = tf.reduce_sum(distance)

cost = tf.reduce_sum(Pdistance)
Test_cost = tf.reduce_sum(Test_Pdistance)
#batch_cost += cost

with tf.variable_scope("Optimizer"):
    optimizer =  tf.train.AdamOptimizer(1e-3).minimize(cost)



session = tf.Session()
session.run(tf.global_variables_initializer())


# In[ ]:


cwd = '/Users/maryamr/Tensorflow/'


# In[ ]:


data = np.load(cwd+'mnist_test_seq.npy')
data_2 = data.reshape([20*10000,64*64])
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
scaler = MinMaxScaler()
scaler.fit(data_2)
print(scaler.data_max_)
data_3 = scaler.transform(data_2)


# In[ ]:


data_3 = data_3.reshape([20, 10000, 64, 64])


# In[ ]:


cost_record = np.zeros(10000)


for i in range(10000):
    x_train = data_3[0:10, i, :, :]
    x_train = x_train.flatten()
    x_train = x_train.reshape([1, 10, img_size, img_size, 1])
    x_train = np.float32(x_train)

    y_train = data_3[10:20, i, :, :]
    #print("true_y_sum: {}".format(np.sum(y_train)))
    y_train = y_train.flatten()
    y_train = y_train.reshape([10, img_size * img_size])
    y_train = np.float32(y_train)

    x_2 = np.reshape(x_train,[10, 64, 64, 1])
    x_train_reverse = np.flip(x_2, 0)
    z_train = np.reshape(x_train_reverse,[10, 64*64])




    feed_dict_train = {x: x_train,
                       y: y_train,
                       z:z_train}

    if(i < 9990):

        session.run(optimizer, feed_dict=feed_dict_train)


        cost_out = session.run(cost, feed_dict=feed_dict_train)  
        cost_record[i]=cost_out

    else:

        final_pred_1 = session.run(Test_pred_1, feed_dict=feed_dict_train)
        true_label = session.run(y_image, feed_dict=feed_dict_train)
        #Hid = session.run(encoder_1_state.h, feed_dict=feed_dict_train)

        Cell_1, Cell_2, Cell_3 = session.run([encoder_1_state.c, encoder_2_state.c, encoder_3_state.c], feed_dict=feed_dict_train)


        cost_out = session.run(Test_cost, feed_dict=feed_dict_train)  
        print("cost: {}".format(cost_out))
        cost_record[i]=cost_out   



# In[ ]:


plt.plot(cost_record)
plt.xlabel('number of iterations')
plt.ylabel('loss')
#plt.show()
plt.savefig('/Users/maryamr/Loss_plot.png', bbox_inches = 'tight')


# In[ ]:


plt.imsave('/Users/maryamr/Cell_1.png', Cell_1[0,:,:,15], cmap='gray')
plt.imsave('/Users/maryamr/Cell_2.png', Cell_2[0,:,:,15], cmap='gray')
plt.imsave('/Users/maryamr/Cell_3.png', Cell_3[0,:,:,15], cmap='gray')


# In[ ]:


f, axarr = plt.subplots(2, 5)
m = 0
for i in range(2):
    for j in range(5):
        axarr[i, j].imshow(final_pred_1[m,:,:,0], cmap='gray')
        axarr[i, j].get_xaxis()
        axarr[i, j].get_yaxis()
        m+=1

plt.savefig('/Users/maryamr/final_pred_1_10.png', bbox_inches = 'tight')


# In[ ]:


f, axarr = plt.subplots(2, 5)
m = 0
for i in range(2):
    for j in range(5):
        axarr[i, j].imshow(true_label[m,:,:,0], cmap='gray')
        axarr[i, j].get_xaxis()
        axarr[i, j].get_yaxis()
        m+=1

plt.savefig('/Users/maryamr/true_label_10.png', bbox_inches = 'tight')

这些是输入，输出和丢失功能（输入前10个图像，后10个图像是预测的基础事实）。我在9990序列上训练模型并从9990到10000开始测试，这就是为什么你看到损失情节的跳跃。这些结果也是第10000个序列：

Answer 1

由于您尚未保存模型，因此如果保存模型，则可以恢复模型并进行预测。

为什么我的ConvLSTM模型无法预测？

1 个答案: