当我使用tensorflow转换我的lstm时:几次传票后损失增加,我检查了代码,但找不到解决方案:
def build_lstm(lstm_size, num_layers, batch_size, keep_prob):
stack_drop=[]
for i in range(num_layers):
lstm = tf.nn.rnn_cell.GRUCell(lstm_size)
# drop = tf.nn.rnn_cell.DropoutWrapper(lstm_size, output_keep_prob=keep_prob)
# stack_drop.append(drop)
stack_drop.append(lstm)
cell = tf.nn.rnn_cell.MultiRNNCell(stack_drop, state_is_tuple = True)
initial_state = cell.zero_state(batch_size, tf.float32)
return cell, initial_state
# In[11]:
def build_output(lstm_output, in_size, out_size):
#seq_output = tf.concat(1, lstm_output) # tf.concat(concat_dim, values)
# in_size: the lstm layers outpt
# out_size: should be 1*2 dimension
seq_output = tf.concat_dim(lstm_output, 1)
# reshape
x = tf.reshape(seq_output, [-1, in_size])
with tf.variable_scope('softmax'):
softmax_w = tf.Variable(tf.truncated_normal([in_size, out_size], stddev=1.))
softmax_b = tf.Variable(tf.zeros(out_size))
logits = tf.matmul(x, softmax_w) + softmax_b
out = tf.nn.softmax(logits, name='predictions')
return out, logits
# In[12]:
def build_loss(logits, targets, lstm_size, num_classes):
y_one_hot = tf.one_hot(targets, num_classes)
y_reshaped = tf.reshape(y_one_hot, logits.get_shape())
# Softmax cross entropy loss
loss = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped)
loss = tf.reduce_mean(loss)
return loss
def build_optimizer(loss, learning_rate, grad_clip):
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars), grad_clip)
train_op = tf.train.AdamOptimizer(learning_rate)
optimizer = train_op.apply_gradients(zip(grads, tvars))
return optimizer
# In[14]:
class CharRNN:
def __init__(self, num_classes, batch_size=64, num_steps=50,
lstm_size=2*len_arr, num_layers=1, learning_rate=0.001,
grad_clip=5, sampling=False):
if sampling == True:
batch_size, num_steps = 1, 1
else:
batch_size, num_steps = batch_size, num_steps
tf.reset_default_graph()
self.inputs, self.targets, self.keep_prob = build_inputs(batch_size, num_steps)
cell, self.initial_state = build_lstm(lstm_size, num_layers, batch_size, self.keep_prob)
x_one_hot = tf.one_hot(self.inputs, num_classes)
outputs, state = tf.nn.dynamic_rnn(cell,x_one_hot, initial_state=self.initial_state)
self.final_state = state
self.prediction, self.logits = build_output(outputs, lstm_size, num_classes)
self.loss = build_loss(self.logits, self.targets, lstm_size, num_classes)
self.optimizer = build_optimizer(self.loss, learning_rate, grad_clip)
batch_size = 500 # Sequences per batch
num_steps = len_arr # Number of sequence steps per batch
lstm_size = 100 # Size of hidden layers in LSTMs
num_layers = 3 # Number of LSTM layers
learning_rate = 0.001 # Learning rate
keep_prob = 0.8 # Dropout keep probability
# In[16]:
epochs =300
save_every_n = 50
model = CharRNN(len(spin), batch_size=batch_size, num_steps=num_steps,
lstm_size=lstm_size, num_layers=num_layers,
learning_rate=learning_rate)
saver = tf.train.Saver(max_to_keep=2)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
counter = 0
for e in range(epochs):
# Train network
new_state = sess.run(model.initial_state)
loss = 0
for x, y in get_batches(encoded, batch_size, num_steps):
counter += 1
start = time.time()
feed = {model.inputs: x,
model.targets: y,
model.keep_prob: keep_prob,
model.initial_state: new_state}
batch_loss, new_state, _ = sess.run([model.loss,
model.final_state,
model.optimizer],
feed_dict=feed)
end = time.time()
# control the print lines
if counter % 1000 == 0:
print('epoch: {}/{}... '.format(e+1, epochs),
'steps: {}... '.format(counter),
'loss: {:.4f}... '.format(batch_loss),
'{:.4f} sec/batch'.format((end-start)))
#if (counter % save_every_n == 0):
# saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))
saver.save(sess, "checkpoints/i{}_l{}.ckpt".format(counter, lstm_size))
损失由grad_clip定义,我使用了softmax_cross_entropy,我认为“运行”会话可能有问题吗? 还是我应该尝试其他超参数,然后再次查看它的损失?