我使用Tensorflow ConvLSTMCell(),tf.nn.dynamic_rnn()和tf.contrib.legacy_seq2seq.rnn_decoder()构建了一个Convolutional LSTM模型。我有3层编码器和3层解码器,解码器的初始状态来自编码器的最终状态。我有分别用于第1层,第2层和第3层的128,64和64个过滤器。最后,我连接解码器的输出并通过卷积层传递它们以将通道数减少到1。然后我应用了损失函数。我的数据集是移动mnist数据集。在移动mnist数据集中,每个序列有20帧,通过这个模型,我试图根据前10帧预测帧11到20。但是10帧序列的输出远非基本事实,并且基本上尝试再现最后一个输入帧,即第10帧。 我把代码放在这里,谢谢你的帮助。
import matplotlib
matplotlib.use('Agg')
from matplotlib import pyplot as plt
import tensorflow as tf
import numpy as np
from sklearn.metrics import confusion_matrix
import time
from datetime import timedelta
import math
import random
from random import getrandbits
from tensorflow.contrib.rnn.python.ops.rnn_cell import ConvLSTMCell
from tensorflow.python.ops.rnn_cell import LSTMStateTuple
tf.reset_default_graph()
# cell = ConvLSTMCell()
num_channels = 1
img_size = 64
#filter sizes
filter_size1 = 5
filter_size2 = 5
filter_size3 = 5
#number of filters in each layer
num_filters1 = 128
num_filters2 = 64
num_filters3 = 64
img_size_flat = img_size * img_size
y = tf.placeholder(tf.float32, shape=[None, img_size_flat], name='y')
y_image = tf.reshape(y, [-1, img_size, img_size, num_channels], name='y_image')
z = tf.placeholder(tf.float32, shape=[None, img_size_flat], name='z')
z_image = tf.reshape(z, [-1, img_size, img_size, num_channels], name='z_image')
x = tf.placeholder(tf.float32, shape=[None,None,img_size,img_size,num_channels],
name='x')
with tf.variable_scope("Encoder"):
with tf.variable_scope("Encoder_Layer1"):
InputShape = [img_size, img_size, num_channels]
encoder_1_KernelShape = [filter_size1, filter_size1]
rnn_cell = ConvLSTMCell(2, InputShape, num_filters1, encoder_1_KernelShape,
use_bias=True, forget_bias=1.0, name='Encoder_1')
# defining initial state
#initial_state = rnn_cell.zero_state(batch_size, dtype=tf.float32)
initial_state = rnn_cell.zero_state(1, dtype=tf.float32)
encoder_1_outputs, encoder_1_state = tf.nn.dynamic_rnn(rnn_cell, x,
initial_state=initial_state,
dtype=tf.float32)
with tf.variable_scope("Encoder_Layer2"):
Encoder_2_InputShape = [img_size, img_size, num_filters1]
encoder_2_KernelShape = [filter_size2, filter_size2]
encoder_2_cell = ConvLSTMCell(2, Encoder_2_InputShape, num_filters2, encoder_2_KernelShape,
use_bias=True, forget_bias=1.0, name='Encoder_2')
initial_state_2 = encoder_2_cell.zero_state(1, dtype=tf.float32)
encoder_2_outputs, encoder_2_state = tf.nn.dynamic_rnn(encoder_2_cell, encoder_1_outputs,
initial_state=initial_state_2,
dtype=tf.float32)
with tf.variable_scope("Encoder_Layer3"):
Encoder_3_InputShape = [img_size, img_size, num_filters2]
encoder_3_KernelShape = [filter_size3, filter_size3]
encoder_3_cell = ConvLSTMCell(2, Encoder_3_InputShape, num_filters3, encoder_3_KernelShape,
use_bias=True, forget_bias=1.0, name='Encoder_3')
initial_state_3 = encoder_3_cell.zero_state(1, dtype=tf.float32)
encoder_3_outputs, encoder_3_state = tf.nn.dynamic_rnn(encoder_3_cell, encoder_2_outputs,
initial_state=initial_state_3,
dtype=tf.float32)
#Weights function
def new_weights(shape, name):
return tf.get_variable(name, shape, initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.05))
#Convolution function
def conv_layer(input, # The previous layer.
num_input_channels, # Num. channels in prev. layer.
filter_size, # Width and height of each filter.
num_filters): # Number of filters.
#with tf.variable_scope("ConvLayer") as Conv_Layer:
filter_shape = [filter_size, filter_size, num_input_channels, num_filters]
w = new_weights(shape=filter_shape, name='ConvLayer_Weights')
conv_output = tf.nn.conv2d(input=input,
filter=w,
strides=[1, 1, 1, 1],
padding='SAME')
#relu_output = tf.nn.relu(conv_output)
return conv_output
#Loss function
def loss(prediction, label):
#with tf.variable_scope("Loss") as Loss_scope:
log_pred = tf.log(tf.clip_by_value((prediction),1e-10,1.0), name='Prediction_Log')
log_pred_2 = tf.log(tf.clip_by_value((1-prediction),1e-10,1.0), name='1-Prediction_Log')
cross_entropy = -tf.multiply(label, log_pred) - tf.multiply((1-label), log_pred_2)
return cross_entropy
# In[ ]:
labels = tf.reshape(y_image, [1, 10, 64, 64, 1])
w = tf.get_variable(name = "decoder_1_weights", shape =[10, 5, 5, 1, num_filters1], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.05))
true_label = tf.nn.conv3d(input=labels,
filter=w,
strides=[1, 1, 1, 1, 1],
padding='SAME')
true_label = tf.reshape(true_label, [1, 10, 64, 64, num_filters1])
true_label = tf.unstack(true_label, num = 10, axis = 1)
# In[ ]:
START = np.zeros((((1, 10, 64, 64, num_filters1))))
START = np.float32(START)
GO = tf.unstack(START, num = 10, axis = 1)
# In[ ]:
def loop_fn(previous_output, time):
if previous_output is None: # time == 0
START = tf.placeholder(tf.float32, shape=[None,1, img_size, img_size, 1], name='START')
return START
else:
return previous_output
# In[ ]:
#loop function for the first decoder in the training phase, we are randomly feeding the ground truth
def loop_fn_train_1(previous_output, time):
if previous_output is None: # time == 0
START = tf.placeholder(tf.float32, shape=[None,1, img_size, img_size, 1], name='START')
return START
else:
if(bool(random.getrandbits(1))):
return previous_output
else:
return true_label[time]
with tf.variable_scope("Decoder"):
with tf.variable_scope("Decoder_Layer1"):
decoder_1_InputShape = [img_size,img_size, num_filters1]
decoder_1_KernelShape = [filter_size1,filter_size1]
decoder_1_rnn_cell = ConvLSTMCell(2, decoder_1_InputShape, num_filters1, decoder_1_KernelShape,
use_bias=True, forget_bias=1.0, name='Decoder_1')
decoder_1_outputs, decoder_1_states = tf.contrib.legacy_seq2seq.rnn_decoder(true_label, encoder_1_state,
decoder_1_rnn_cell, loop_fn_train_1)
with tf.variable_scope("Decoder_Layer2"):
decoder_2_InputShape = [img_size,img_size, num_filters2]
decoder_2_KernelShape = [filter_size2,filter_size2]
decoder_2_rnn_cell = ConvLSTMCell(2, decoder_2_InputShape, num_filters2, decoder_2_KernelShape,
use_bias=True, forget_bias=1.0, name='Decoder_2')
w = tf.get_variable(name = "decoder_2_weights", shape =[10, 5, 5, num_filters1, num_filters2], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.05))
decoder_2_inputs = tf.nn.conv3d(input=decoder_1_outputs,
filter=w,
strides=[1, 1, 1, 1, 1],
padding='SAME')
decoder_2_inputs = tf.reshape(decoder_2_inputs, [1, 10, 64, 64, num_filters2])
decoder_2_inputs = tf.unstack(decoder_2_inputs, num = 10, axis = 1)
#loop function for the second decoder in the training phase, we are randomly feeding the ground truth
def loop_fn_train_2(previous_output, time):
if previous_output is None: # time == 0
START = tf.placeholder(tf.float32, shape=[None,1, img_size, img_size, 1], name='START')
return START
else:
if(bool(random.getrandbits(1))):
return previous_output
else:
return decoder_2_inputs[time]
decoder_2_outputs, decoder_2_states = tf.contrib.legacy_seq2seq.rnn_decoder(decoder_2_inputs, encoder_2_state,
decoder_2_rnn_cell, loop_fn_train_2)
with tf.variable_scope("Decoder_Layer3"):
decoder_3_InputShape = [img_size,img_size, num_filters3]
decoder_3_KernelShape = [filter_size3,filter_size3]
decoder_3_rnn_cell = ConvLSTMCell(2, decoder_3_InputShape, num_filters3, decoder_3_KernelShape,
use_bias=True, forget_bias=1.0, name='Decoder_3')
w = tf.get_variable(name = "decoder_3_weights", shape =[10, 5, 5, num_filters2, num_filters3], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.05))
decoder_3_inputs = tf.nn.conv3d(input=decoder_2_outputs,
filter=w,
strides=[1, 1, 1, 1, 1],
padding='SAME')
decoder_3_inputs = tf.reshape(decoder_3_inputs, [1, 10, 64, 64, num_filters3])
decoder_3_inputs = tf.unstack(decoder_3_inputs, num = 10, axis = 1)
#loop function for the second decoder in the training phase, we are randomly feeding the ground truth
def loop_fn_train_3(previous_output, time):
if previous_output is None: # time == 0
START = tf.placeholder(tf.float32, shape=[None,1, img_size, img_size, 1], name='START')
return START
else:
if(bool(random.getrandbits(1))):
return previous_output
else:
return decoder_3_inputs[time]
decoder_3_outputs, decoder_3_states = tf.contrib.legacy_seq2seq.rnn_decoder(decoder_3_inputs, encoder_3_state,
decoder_3_rnn_cell, loop_fn_train_3)
# In[ ]:
with tf.variable_scope("Decoder", reuse=True):
with tf.variable_scope("Decoder_Layer1"):
decoder_1_InputShape = [img_size,img_size, num_filters1]
decoder_1_KernelShape = [filter_size1,filter_size1]
decoder_1_rnn_cell = ConvLSTMCell(2, decoder_1_InputShape, num_filters1, decoder_1_KernelShape,
use_bias=True, forget_bias=1.0, name='Decoder_1')
Test_decoder_1_outputs, Test_decoder_1_states = tf.contrib.legacy_seq2seq.rnn_decoder(GO, encoder_1_state,
decoder_1_rnn_cell, loop_fn)
with tf.variable_scope("Decoder_Layer2"):
decoder_2_InputShape = [img_size,img_size, num_filters2]
decoder_2_KernelShape = [filter_size2,filter_size2]
decoder_2_rnn_cell = ConvLSTMCell(2, decoder_2_InputShape, num_filters2, decoder_2_KernelShape,
use_bias=True, forget_bias=1.0, name='Decoder_2')
w = tf.get_variable(name = "decoder_2_weights", shape =[10, 5, 5, num_filters1, num_filters2], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.05))
decoder_2_inputs = tf.nn.conv3d(input=Test_decoder_1_outputs,
filter=w,
strides=[1, 1, 1, 1, 1],
padding='SAME')
decoder_2_inputs = tf.reshape(decoder_2_inputs, [1, 10, 64, 64, num_filters2])
decoder_2_inputs = tf.unstack(decoder_2_inputs, num = 10, axis = 1)
Test_decoder_2_outputs, Test_decoder_2_states = tf.contrib.legacy_seq2seq.rnn_decoder(decoder_2_inputs, encoder_2_state,
decoder_2_rnn_cell,
loop_fn)
with tf.variable_scope("Decoder_Layer3"):
decoder_3_InputShape = [img_size,img_size, num_filters3]
decoder_3_KernelShape = [filter_size3,filter_size3]
decoder_3_rnn_cell = ConvLSTMCell(2, decoder_3_InputShape, num_filters3, decoder_3_KernelShape,
use_bias=True, forget_bias=1.0, name='Decoder_3')
w = tf.get_variable(name = "decoder_3_weights", shape =[10, 5, 5, num_filters2, num_filters3], initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.05))
decoder_3_inputs = tf.nn.conv3d(input=Test_decoder_2_outputs,
filter=w,
strides=[1, 1, 1, 1, 1],
padding='SAME')
decoder_3_inputs = tf.reshape(decoder_3_inputs, [1, 10, 64, 64, num_filters3])
decoder_3_inputs = tf.unstack(decoder_3_inputs, num = 10, axis = 1)
Test_decoder_3_outputs, Test_decoder_3_states = tf.contrib.legacy_seq2seq.rnn_decoder(decoder_3_inputs, encoder_3_state,
decoder_3_rnn_cell, loop_fn)
Conv_inputs = tf.concat([decoder_1_outputs,decoder_2_outputs, decoder_3_outputs], 4)
Conv_inputs = tf.reshape(Conv_inputs, [10, 64, 64, num_filters1 + num_filters2 + num_filters3])
# In[ ]:
Test_Conv_inputs = tf.concat([Test_decoder_1_outputs, Test_decoder_2_outputs, Test_decoder_3_outputs], 4)
Test_Conv_inputs = tf.reshape(Test_Conv_inputs, [10, 64, 64, num_filters1 + num_filters2 + num_filters3])
# In[ ]:
with tf.variable_scope("ConvLayer"):
with tf.variable_scope("ConvLayer_Pred"):
pred_1 = conv_layer(input=Conv_inputs, # The previous layer.
num_input_channels=num_filters1 + num_filters2 + num_filters3, # Num. channels in prev. layer.
filter_size=1, # Width and height of each filter.
num_filters=1)
# In[ ]:
with tf.variable_scope("ConvLayer", reuse=True):
with tf.variable_scope("ConvLayer_Pred"):
Test_pred_1 = conv_layer(input=Test_Conv_inputs, # The previous layer.
num_input_channels=num_filters1 + num_filters2 + num_filters3, # Num. channels in prev. layer.
filter_size=1, # Width and height of each filter.
num_filters=1)
with tf.variable_scope("Training_Loss"):
with tf.variable_scope("Loss_Pred"):
Pdistance = loss(prediction=pred_1, label=y_image)
#cost = tf.reduce_sum(distance)
with tf.variable_scope("Training_Loss", reuse=True):
with tf.variable_scope("Loss_Pred"):
Test_Pdistance = loss(prediction=Test_pred_1, label=y_image)
#cost = tf.reduce_sum(distance)
cost = tf.reduce_sum(Pdistance)
Test_cost = tf.reduce_sum(Test_Pdistance)
#batch_cost += cost
with tf.variable_scope("Optimizer"):
optimizer = tf.train.AdamOptimizer(1e-3).minimize(cost)
session = tf.Session()
session.run(tf.global_variables_initializer())
# In[ ]:
cwd = '/Users/maryamr/Tensorflow/'
# In[ ]:
data = np.load(cwd+'mnist_test_seq.npy')
data_2 = data.reshape([20*10000,64*64])
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
scaler = MinMaxScaler()
scaler.fit(data_2)
print(scaler.data_max_)
data_3 = scaler.transform(data_2)
# In[ ]:
data_3 = data_3.reshape([20, 10000, 64, 64])
# In[ ]:
cost_record = np.zeros(10000)
for i in range(10000):
x_train = data_3[0:10, i, :, :]
x_train = x_train.flatten()
x_train = x_train.reshape([1, 10, img_size, img_size, 1])
x_train = np.float32(x_train)
y_train = data_3[10:20, i, :, :]
#print("true_y_sum: {}".format(np.sum(y_train)))
y_train = y_train.flatten()
y_train = y_train.reshape([10, img_size * img_size])
y_train = np.float32(y_train)
x_2 = np.reshape(x_train,[10, 64, 64, 1])
x_train_reverse = np.flip(x_2, 0)
z_train = np.reshape(x_train_reverse,[10, 64*64])
feed_dict_train = {x: x_train,
y: y_train,
z:z_train}
if(i < 9990):
session.run(optimizer, feed_dict=feed_dict_train)
cost_out = session.run(cost, feed_dict=feed_dict_train)
cost_record[i]=cost_out
else:
final_pred_1 = session.run(Test_pred_1, feed_dict=feed_dict_train)
true_label = session.run(y_image, feed_dict=feed_dict_train)
#Hid = session.run(encoder_1_state.h, feed_dict=feed_dict_train)
Cell_1, Cell_2, Cell_3 = session.run([encoder_1_state.c, encoder_2_state.c, encoder_3_state.c], feed_dict=feed_dict_train)
cost_out = session.run(Test_cost, feed_dict=feed_dict_train)
print("cost: {}".format(cost_out))
cost_record[i]=cost_out
# In[ ]:
plt.plot(cost_record)
plt.xlabel('number of iterations')
plt.ylabel('loss')
#plt.show()
plt.savefig('/Users/maryamr/Loss_plot.png', bbox_inches = 'tight')
# In[ ]:
plt.imsave('/Users/maryamr/Cell_1.png', Cell_1[0,:,:,15], cmap='gray')
plt.imsave('/Users/maryamr/Cell_2.png', Cell_2[0,:,:,15], cmap='gray')
plt.imsave('/Users/maryamr/Cell_3.png', Cell_3[0,:,:,15], cmap='gray')
# In[ ]:
f, axarr = plt.subplots(2, 5)
m = 0
for i in range(2):
for j in range(5):
axarr[i, j].imshow(final_pred_1[m,:,:,0], cmap='gray')
axarr[i, j].get_xaxis()
axarr[i, j].get_yaxis()
m+=1
plt.savefig('/Users/maryamr/final_pred_1_10.png', bbox_inches = 'tight')
# In[ ]:
f, axarr = plt.subplots(2, 5)
m = 0
for i in range(2):
for j in range(5):
axarr[i, j].imshow(true_label[m,:,:,0], cmap='gray')
axarr[i, j].get_xaxis()
axarr[i, j].get_yaxis()
m+=1
plt.savefig('/Users/maryamr/true_label_10.png', bbox_inches = 'tight')
这些是输入,输出和丢失功能(输入前10个图像,后10个图像是预测的基础事实)。我在9990序列上训练模型并从9990到10000开始测试,这就是为什么你看到损失情节的跳跃。这些结果也是第10000个序列:
答案 0 :(得分:0)
由于您尚未保存模型,因此如果保存模型,则可以恢复模型并进行预测。