使用tf.train.Saver()保存和恢复训练后的模型时,损耗值会跳跃

时间:2018-07-25 12:12:12

标签: python tensorflow machine-learning neural-network deep-learning

编辑:

此帖子的原始标题为

“当学习率降低时,损耗值会跳跃,然后缓慢下降”

无论如何我现在认为这是在恢复保存的模型时发生的,而不管学习速率是否改变。

10个纪元后,学习率为0.001,损失达到〜10。然后,我保存并恢复了模型,重新开始了训练,第11个时期的损失约为15。再过5个时期,它又没有达到10个。


我建立了一个自动编码器模型,如下所示。它使用document.getElementById("btnLogin").addEventListener("click", e => { e.preventDefault(); // Stop the form from submitting var email = document.getElementById("inputEmail").value; var pwd = document.getElementById("inputPassword").value; firebase.auth().signInWithEmailAndPassword(email, pwd).catch(error => { var errorCode = error.code; var errorMessage = error.message; console.log(errorCode + "-" + errorMessage); alert(errorCode + "-" + errorMessage); }); //Login });//Login button click 将经过训练的模型保存到内存或从内存还原。我将学习率设为tf.train.Saver(),因此可以在tf.placeholder中指定学习率。

在训练模型时,我首先以较高的学习率开始,让它进行训练直到损失开始达到平稳,然后保存模型,手动降低学习率,恢复并重新开始训练。

但是,当这种变化发生时,损耗总是跳到比变化之前更高的水平,然后缓慢但持续地减小。我不明白为什么会发生这种跳跃,除非我的实现出现问题。

feed_dict

以下是模型的训练方式:

import os
import pickle as pk

import tensorflow as tf


class Autoencoder:

    def __init__(self, encoderDims, sparseInput=False, tiedWeights=False, denoise=False):
        self.encoderDims = encoderDims
        self.decoderDims = list(reversed(encoderDims))
        self.sparseInput = sparseInput
        self.tiedWeights = tiedWeights
        self.denoise = denoise          # Only works for greyscale image data

        self.input = tf.placeholder(tf.float32, [None, encoderDims[0]])
        self.learningRate = tf.placeholder(tf.float32, [])

        self.activationFunction = tf.nn.sigmoid             # TO DO: Allow to be specified by user
        # self.activationFunction = tf.tanh
        # self.activationFunction = tf.nn.selu
        self.SGD = tf.train.AdamOptimizer(self.learningRate)

        if self.denoise:
            self.__addNoise()
        self.__buildNetwork()           # Constructs Encoder & Decoder
        self.__buildTensorFlowGraph()   # Creates sequential TensorFlow operations

        self.session = tf.Session()
        self.session.run(tf.global_variables_initializer())  # Initialise weights & biases
        self.saver = tf.train.Saver()
        self.session.graph.finalize()   # Avoids memory leaks through duplicating graph nodes

    def __addNoise(self):
        # Create a tensor of random numbers with unit variance
        # Then sets pixels to black where values of random tensor > 1
        # (i.e. all values outside the std dev -> ~32% of pixels)
        random = tf.random_normal(tf.shape(self.input))
        mask = tf.greater(random, 1.0)
        self.noisyInput = tf.where(
            mask, tf.ones_like(self.input) * 255, self.input)

    def __buildNetwork(self):
        # Lists of weights and biases per layer of encoder and decoder
        self.encoderWeights, self.encoderBiases = [], []
        self.decoderWeights, self.decoderBiases = [], []
        for layer in range(len(self.encoderDims) - 1):
            self.encoderWeights.append(
                tf.Variable(tf.random_normal(
                    [self.encoderDims[layer], self.encoderDims[layer + 1]]))
            )
            self.encoderBiases.append(
                tf.Variable(tf.zeros([self.encoderDims[layer + 1]]))
            )
            # if layer != len(self.decoderDims) - 2:  # BIAS IN OUTPUT LAYER????
            self.decoderBiases.append(
                tf.Variable(tf.zeros([self.decoderDims[layer + 1]]))
            )
            if not self.tiedWeights:
                self.decoderWeights.append(
                    tf.Variable(tf.random_normal(
                        [self.decoderDims[layer], self.decoderDims[layer + 1]]))
                )
        if self.tiedWeights:
            self.decoderWeights = [tf.transpose(
                i) for i in reversed(self.encoderWeights)]

    def __buildTensorFlowGraph(self):
        self.encoded = self.encode()        # Encoded/compressed data
        self.decoded = self.decode()        # Decoded/reconstructed data
        self.loss = self.__calculateLoss()
        self.train = self.SGD.minimize(self.loss)

    def encode(self):
        if self.denoise:
            encoded = self.noisyInput
        else:
            encoded = self.input
        for layer in range(len(self.encoderDims) - 1):
            encoded = tf.matmul(encoded, self.encoderWeights[layer])
            encoded = tf.add(encoded, self.encoderBiases[layer])
            # if layer != len(self.encoderDims) - 2:    # KEEP LAST LINEAR?
            encoded = self.activationFunction(encoded)
        return encoded

    def decode(self):
        decoded = self.encoded
        for layer in range(len(self.decoderDims) - 1):
            decoded = tf.matmul(decoded, self.decoderWeights[layer])
            # if layer != len(self.decoderDims) - 2:  # BIAS IN OUTPUT LAYER????
            decoded = tf.add(decoded, self.decoderBiases[layer])
            if layer != len(self.decoderDims) - 2:  # Keep output layer linear
                decoded = self.activationFunction(decoded)
        return decoded

    def __calculateLoss(self):
        # TO DO: ADD REGULARISATION
        if self.sparseInput:
            nonZeros = tf.where(tf.greater(self.input, 0))
            input = tf.gather(self.input, nonZeros)
            output = tf.gather(self.decoded, nonZeros)
        else:
            input = self.input
            output = self.decoded

        return tf.sqrt(
            tf.losses.mean_squared_error(
                labels=input,
                predictions=output
            )
        )

    def setBatch(self, input, learningRate=0.0):
        self.batchDict = {
            self.input: input,
            self.learningRate: learningRate
        }

    def run(self, operations=None, train=False):
        # Returns values of specified list of operations
        # Trains network's parameters if specified
        if not type(operations) is list:
            operations = [operations]

        if train:
            ops = [self.train]
        else:
            ops = []

        if operations is not None:
            for op in operations:
                if op == 'input':
                    ops.append(self.input)
                if op == 'noisyInput':
                    ops.append(self.noisyInput)
                if op == 'encoded':
                    ops.append(self.encoded)
                if op == 'decoded':
                    ops.append(self.decoded)
                if op == 'loss':
                    ops.append(self.loss)

        if (train and len(ops) == 2) or (not train and len(ops) == 1):
            return self.session.run(ops, self.batchDict)[-1]
        elif train:
            return self.session.run(ops, self.batchDict)[1:]
        else:
            return self.session.run(ops, self.batchDict)

    def save(self, epoch, modelName="Autoencoder"):
        modelName += '.ckpt'
        dir = os.path.dirname(os.path.realpath(__file__)) + '/SavedModels/'
        self.saver.save(self.session, dir + modelName)
        loss = self.session.run(self.loss, self.batchDict)
        with open(dir + modelName + '_epoch.pk', 'wb') as epochFile:
            pk.dump(epoch, epochFile)
        with open(dir + modelName + '_loss.pk', 'wb') as lossFile:
            pk.dump(loss, lossFile)

    def restore(self, modelName="Autoencoder"):
        modelName += '.ckpt'
        dir = os.path.dirname(os.path.realpath(__file__)) + '/SavedModels/'
        self.saver.restore(self.session, dir + modelName)
        with open(dir + modelName + '_epoch.pk', 'rb') as epochFile:
            epoch = pk.load(epochFile)
        with open(dir + modelName + '_loss.pk', 'rb') as lossFile:
            loss = pk.load(lossFile)
        return epoch, loss

    def kill(self):
        self.session.close()

Here is the data I am using.

学习速率为0.001时,经过100个纪元后损失达到〜1.0。然后我将其减小到0.0001,并且损失在第一个时期立即跃升至3.9,然后在该时期之后持续减小,但是非常缓慢。

0 个答案:

没有答案