我尝试使用RNN
制作LSTM
。
我制作了LSTM
模型,之后有两个DNN
网络和一个回归输出层。
我训练了我的数据,最终的训练损失大约为0.009
。
但是,当我将模型应用于测试数据时,损失大约为0.5
。
第1纪元训练损失约为0.5
。
因此,我认为训练过的变量不会在测试模型中使用。
培训和测试模型之间的唯一区别是批量大小。
Trainning Batch = 100~200
,Test Batch Size = 1
。
在main函数中我创建了LSTM
个实例。
在LSTM
innitializer中,制作了模型。
def __init__(self,config,train_model=None):
self.sess = sess = tf.Session()
self.num_steps = num_steps = config.num_steps
self.lstm_size = lstm_size = config.lstm_size
self.num_features = num_features = config.num_features
self.num_layers = num_layers = config.num_layers
self.num_hiddens = num_hiddens = config.num_hiddens
self.batch_size = batch_size = config.batch_size
self.train = train = config.train
self.epoch = config.epoch
self.learning_rate = learning_rate = config.learning_rate
with tf.variable_scope('model') as scope:
self.lstm_cell = lstm_cell = tf.nn.rnn_cell.LSTMCell(lstm_size,initializer = tf.contrib.layers.xavier_initializer(uniform=False))
self.cell = cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers)
with tf.name_scope('placeholders'):
self.x = tf.placeholder(tf.float32,[self.batch_size,num_steps,num_features],
name='input-x')
self.y = tf.placeholder(tf.float32, [self.batch_size,num_features],name='input-y')
self.init_state = cell.zero_state(self.batch_size,tf.float32)
with tf.variable_scope('model'):
self.W1 = tf.Variable(tf.truncated_normal([lstm_size*num_steps,num_hiddens],stddev=0.1),name='W1')
self.b1 = tf.Variable(tf.truncated_normal([num_hiddens],stddev=0.1),name='b1')
self.W2 = tf.Variable(tf.truncated_normal([num_hiddens,num_hiddens],stddev=0.1),name='W2')
self.b2 = tf.Variable(tf.truncated_normal([num_hiddens],stddev=0.1),name='b2')
self.W3 = tf.Variable(tf.truncated_normal([num_hiddens,num_features],stddev=0.1),name='W3')
self.b3 = tf.Variable(tf.truncated_normal([num_features],stddev=0.1),name='b3')
self.output, self.loss = self.inference()
tf.initialize_all_variables().run(session=sess)
tf.initialize_variables([self.b2]).run(session=sess)
if train_model == None:
self.train_step = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(self.loss)
使用上面的LSTM init ,在LSTM实例下面。
with tf.variable_scope("model",reuse=None):
train_model = LSTM(main_config)
with tf.variable_scope("model", reuse=True):
predict_model = LSTM(predict_config)
在制作了两个LSTM
实例后,我训练了train_model
。
我在predict_model
输入了测试集。
为什么不重用变量?
答案 0 :(得分:2)
问题是,如果您重复使用tf.get_variable()
,则应该使用tf.Variable()
来创建变量,而不是scope
。
看看at this tutorial分享变量,你会更好地理解它。
此外,您不需要在此处使用会话,因为在定义模型时不必初始化变量,在您要训练模型时应初始化变量。
重用变量的代码如下:
def __init__(self,config,train_model=None):
self.num_steps = num_steps = config.num_steps
self.lstm_size = lstm_size = config.lstm_size
self.num_features = num_features = config.num_features
self.num_layers = num_layers = config.num_layers
self.num_hiddens = num_hiddens = config.num_hiddens
self.batch_size = batch_size = config.batch_size
self.train = train = config.train
self.epoch = config.epoch
self.learning_rate = learning_rate = config.learning_rate
with tf.variable_scope('model') as scope:
self.lstm_cell = lstm_cell = tf.nn.rnn_cell.LSTMCell(lstm_size,initializer = tf.contrib.layers.xavier_initializer(uniform=False))
self.cell = cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers)
with tf.name_scope('placeholders'):
self.x = tf.placeholder(tf.float32,[self.batch_size,num_steps,num_features],
name='input-x')
self.y = tf.placeholder(tf.float32, [self.batch_size,num_features],name='input-y')
self.init_state = cell.zero_state(self.batch_size,tf.float32)
with tf.variable_scope('model'):
self.W1 = tf.get_variable(initializer=tf.truncated_normal([lstm_size*num_steps,num_hiddens],stddev=0.1),name='W1')
self.b1 = tf.get_variable(initializer=tf.truncated_normal([num_hiddens],stddev=0.1),name='b1')
self.W2 = tf.get_variable(initializer=tf.truncated_normal([num_hiddens,num_hiddens],stddev=0.1),name='W2')
self.b2 = tf.get_variable(initializer=tf.truncated_normal([num_hiddens],stddev=0.1),name='b2')
self.W3 = tf.get_variable(initializer=tf.truncated_normal([num_hiddens,num_features],stddev=0.1),name='W3')
self.b3 = tf.get_variable(initializer=tf.truncated_normal([num_features],stddev=0.1),name='b3')
self.output, self.loss = self.inference()
if train_model == None:
self.train_step = tf.train.GradientDescentOptimizer(self.learning_rate).minimize(self.loss)
要查看创建train_model
和predict_model
后创建的变量,请使用以下代码:
for v in tf.all_variables():
print(v.name)