编辑:
此帖子的原始标题为
“当学习率降低时,损耗值会跳跃,然后缓慢下降”
无论如何我现在认为这是在恢复保存的模型时发生的,而不管学习速率是否改变。
10个纪元后,学习率为0.001,损失达到〜10。然后,我保存并恢复了模型,重新开始了训练,第11个时期的损失约为15。再过5个时期,它又没有达到10个。
我建立了一个自动编码器模型,如下所示。它使用document.getElementById("btnLogin").addEventListener("click", e => {
e.preventDefault(); // Stop the form from submitting
var email = document.getElementById("inputEmail").value;
var pwd = document.getElementById("inputPassword").value;
firebase.auth().signInWithEmailAndPassword(email, pwd).catch(error => {
var errorCode = error.code;
var errorMessage = error.message;
console.log(errorCode + "-" + errorMessage);
alert(errorCode + "-" + errorMessage);
}); //Login
});//Login button click
将经过训练的模型保存到内存或从内存还原。我将学习率设为tf.train.Saver()
,因此可以在tf.placeholder
中指定学习率。
在训练模型时,我首先以较高的学习率开始,让它进行训练直到损失开始达到平稳,然后保存模型,手动降低学习率,恢复并重新开始训练。
但是,当这种变化发生时,损耗总是跳到比变化之前更高的水平,然后缓慢但持续地减小。我不明白为什么会发生这种跳跃,除非我的实现出现问题。
feed_dict
以下是模型的训练方式:
import os
import pickle as pk
import tensorflow as tf
class Autoencoder:
def __init__(self, encoderDims, sparseInput=False, tiedWeights=False, denoise=False):
self.encoderDims = encoderDims
self.decoderDims = list(reversed(encoderDims))
self.sparseInput = sparseInput
self.tiedWeights = tiedWeights
self.denoise = denoise # Only works for greyscale image data
self.input = tf.placeholder(tf.float32, [None, encoderDims[0]])
self.learningRate = tf.placeholder(tf.float32, [])
self.activationFunction = tf.nn.sigmoid # TO DO: Allow to be specified by user
# self.activationFunction = tf.tanh
# self.activationFunction = tf.nn.selu
self.SGD = tf.train.AdamOptimizer(self.learningRate)
if self.denoise:
self.__addNoise()
self.__buildNetwork() # Constructs Encoder & Decoder
self.__buildTensorFlowGraph() # Creates sequential TensorFlow operations
self.session = tf.Session()
self.session.run(tf.global_variables_initializer()) # Initialise weights & biases
self.saver = tf.train.Saver()
self.session.graph.finalize() # Avoids memory leaks through duplicating graph nodes
def __addNoise(self):
# Create a tensor of random numbers with unit variance
# Then sets pixels to black where values of random tensor > 1
# (i.e. all values outside the std dev -> ~32% of pixels)
random = tf.random_normal(tf.shape(self.input))
mask = tf.greater(random, 1.0)
self.noisyInput = tf.where(
mask, tf.ones_like(self.input) * 255, self.input)
def __buildNetwork(self):
# Lists of weights and biases per layer of encoder and decoder
self.encoderWeights, self.encoderBiases = [], []
self.decoderWeights, self.decoderBiases = [], []
for layer in range(len(self.encoderDims) - 1):
self.encoderWeights.append(
tf.Variable(tf.random_normal(
[self.encoderDims[layer], self.encoderDims[layer + 1]]))
)
self.encoderBiases.append(
tf.Variable(tf.zeros([self.encoderDims[layer + 1]]))
)
# if layer != len(self.decoderDims) - 2: # BIAS IN OUTPUT LAYER????
self.decoderBiases.append(
tf.Variable(tf.zeros([self.decoderDims[layer + 1]]))
)
if not self.tiedWeights:
self.decoderWeights.append(
tf.Variable(tf.random_normal(
[self.decoderDims[layer], self.decoderDims[layer + 1]]))
)
if self.tiedWeights:
self.decoderWeights = [tf.transpose(
i) for i in reversed(self.encoderWeights)]
def __buildTensorFlowGraph(self):
self.encoded = self.encode() # Encoded/compressed data
self.decoded = self.decode() # Decoded/reconstructed data
self.loss = self.__calculateLoss()
self.train = self.SGD.minimize(self.loss)
def encode(self):
if self.denoise:
encoded = self.noisyInput
else:
encoded = self.input
for layer in range(len(self.encoderDims) - 1):
encoded = tf.matmul(encoded, self.encoderWeights[layer])
encoded = tf.add(encoded, self.encoderBiases[layer])
# if layer != len(self.encoderDims) - 2: # KEEP LAST LINEAR?
encoded = self.activationFunction(encoded)
return encoded
def decode(self):
decoded = self.encoded
for layer in range(len(self.decoderDims) - 1):
decoded = tf.matmul(decoded, self.decoderWeights[layer])
# if layer != len(self.decoderDims) - 2: # BIAS IN OUTPUT LAYER????
decoded = tf.add(decoded, self.decoderBiases[layer])
if layer != len(self.decoderDims) - 2: # Keep output layer linear
decoded = self.activationFunction(decoded)
return decoded
def __calculateLoss(self):
# TO DO: ADD REGULARISATION
if self.sparseInput:
nonZeros = tf.where(tf.greater(self.input, 0))
input = tf.gather(self.input, nonZeros)
output = tf.gather(self.decoded, nonZeros)
else:
input = self.input
output = self.decoded
return tf.sqrt(
tf.losses.mean_squared_error(
labels=input,
predictions=output
)
)
def setBatch(self, input, learningRate=0.0):
self.batchDict = {
self.input: input,
self.learningRate: learningRate
}
def run(self, operations=None, train=False):
# Returns values of specified list of operations
# Trains network's parameters if specified
if not type(operations) is list:
operations = [operations]
if train:
ops = [self.train]
else:
ops = []
if operations is not None:
for op in operations:
if op == 'input':
ops.append(self.input)
if op == 'noisyInput':
ops.append(self.noisyInput)
if op == 'encoded':
ops.append(self.encoded)
if op == 'decoded':
ops.append(self.decoded)
if op == 'loss':
ops.append(self.loss)
if (train and len(ops) == 2) or (not train and len(ops) == 1):
return self.session.run(ops, self.batchDict)[-1]
elif train:
return self.session.run(ops, self.batchDict)[1:]
else:
return self.session.run(ops, self.batchDict)
def save(self, epoch, modelName="Autoencoder"):
modelName += '.ckpt'
dir = os.path.dirname(os.path.realpath(__file__)) + '/SavedModels/'
self.saver.save(self.session, dir + modelName)
loss = self.session.run(self.loss, self.batchDict)
with open(dir + modelName + '_epoch.pk', 'wb') as epochFile:
pk.dump(epoch, epochFile)
with open(dir + modelName + '_loss.pk', 'wb') as lossFile:
pk.dump(loss, lossFile)
def restore(self, modelName="Autoencoder"):
modelName += '.ckpt'
dir = os.path.dirname(os.path.realpath(__file__)) + '/SavedModels/'
self.saver.restore(self.session, dir + modelName)
with open(dir + modelName + '_epoch.pk', 'rb') as epochFile:
epoch = pk.load(epochFile)
with open(dir + modelName + '_loss.pk', 'rb') as lossFile:
loss = pk.load(lossFile)
return epoch, loss
def kill(self):
self.session.close()
学习速率为0.001时,经过100个纪元后损失达到〜1.0。然后我将其减小到0.0001,并且损失在第一个时期立即跃升至3.9,然后在该时期之后持续减小,但是非常缓慢。