为什么我的验证损失只是直线,但跟随训练损失,如图所示?我的验证损失为蓝色

时间:2019-12-13 06:45:08

标签: python validation tensorflow tensorboard

enter image description here

这是我的代码,据我所知,我尝试打印验证损失并且它是恒定的,但确实知道为什么我检查了太多的代码示例并且没有将训练操作传递给tensorflow会话验证时我不应该更新权重,我在这里缺少什么     将tensorflow作为tf导入     将numpy导入为np     导入glob,随机,操作系统

# tf.logging.set_verbosity(tf.logging.ERROR)

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

model_path = "saved_models/"
model_name = model_path + 'model'
training_dataset_path = "Dataset/training"
validation_dataset_path = "Dataset/validation"
training_iter = 100000
validation_iter = 1000
total_t = 0
validation_period = 5000


class Network(object):
    # Create model
    def __init__(self):
        self.image = tf.placeholder(tf.float32, [None, 300, 300, 3], name='image')
        self.resized_image = tf.image.resize_images(self.image, [256, 256])
        self.normalized_image = tf.image.per_image_standardization(self.resized_image)
        tf.summary.image('resized', self.normalized_image, 20)
        self.save_path = 'AE_model/ae.ckpt'

        self.z_mu = self.encoder(self.normalized_image)
        self.reconstructions = self.decoder(self.z_mu)
        tf.summary.image('reconstructed_normalized_image', self.reconstructions, 20)
        tf.summary.histogram("reconstructed", self.reconstructions)

        self.loss = self.compute_loss()
        tf.summary.scalar('loss', self.loss)

        self.merged = tf.summary.merge_all()

    def encoder(self, x):
        conv_1 = tf.layers.conv2d(x, filters=4, kernel_size=3, strides=2, padding='same',
                                  activation=tf.nn.leaky_relu,
                                  kernel_initializer=tf.initializers.he_normal())

        conv_1_1 = tf.layers.conv2d(conv_1, filters=4, kernel_size=3, strides=1, padding='same',
                                    activation=tf.nn.leaky_relu,
                                    kernel_initializer=tf.initializers.he_normal())

        conv_2 = tf.layers.conv2d(conv_1_1, filters=8, kernel_size=3, strides=2, padding='same',
                                  activation=tf.nn.leaky_relu,
                                  kernel_initializer=tf.initializers.he_normal())
        conv_2_1 = tf.layers.conv2d(conv_2, filters=8, kernel_size=3, strides=1, padding='same',
                                    activation=tf.nn.leaky_relu,
                                    kernel_initializer=tf.initializers.he_normal())
        conv_2_2 = tf.layers.conv2d(conv_2_1, filters=8, kernel_size=3, strides=1, padding='same',
                                    activation=tf.nn.leaky_relu,
                                    kernel_initializer=tf.initializers.he_normal())

        conv_3 = tf.layers.conv2d(conv_2_2, filters=16, kernel_size=3, strides=2, padding='same',
                                  activation=tf.nn.leaky_relu,
                                  kernel_initializer=tf.initializers.he_normal())
        conv_3_1 = tf.layers.conv2d(conv_3, filters=16, kernel_size=3, strides=1, padding='same',
                                    activation=tf.nn.leaky_relu,
                                    kernel_initializer=tf.initializers.he_normal())

        conv_4 = tf.layers.conv2d(conv_3_1, filters=32, kernel_size=3, strides=2, padding='same',
                                  activation=tf.nn.leaky_relu,
                                  kernel_initializer=tf.initializers.he_normal())
        conv_4_1 = tf.layers.conv2d(conv_4, filters=32, kernel_size=3, strides=1, padding='same',
                                    activation=tf.nn.leaky_relu,
                                    kernel_initializer=tf.initializers.he_normal())
        conv_4_2 = tf.layers.conv2d(conv_4_1, filters=32, kernel_size=3, strides=1, padding='same',
                                    activation=tf.nn.leaky_relu,
                                    kernel_initializer=tf.initializers.he_normal())

        conv_5 = tf.layers.conv2d(conv_4_2, filters=64, kernel_size=3, strides=2, padding='same',
                                  activation=tf.nn.leaky_relu,
                                  kernel_initializer=tf.initializers.he_normal())
        conv_5_1 = tf.layers.conv2d(conv_5, filters=64, kernel_size=3, strides=1, padding='same',
                                    activation=tf.nn.leaky_relu,
                                    kernel_initializer=tf.initializers.he_normal())

        conv_6 = tf.layers.conv2d(conv_5_1, filters=128, kernel_size=3, strides=1, padding='same',
                                  activation=tf.nn.leaky_relu,
                                  kernel_initializer=tf.initializers.he_normal())
        conv_6_1 = tf.layers.conv2d(conv_6, filters=128, kernel_size=3, strides=1, padding='same',
                                    activation=tf.nn.leaky_relu,
                                    kernel_initializer=tf.initializers.he_normal())

        x = tf.layers.flatten(conv_6_1)
        z = tf.layers.dense(x, units=512, name='z_mu')
        return z

    def decoder(self, z):
        x = tf.layers.dense(z, 1024, activation=tf.nn.leaky_relu)  # 65536
        x = tf.reshape(x, [-1, 8, 8, 16])
        conv_8 = tf.layers.conv2d_transpose(x, filters=256, kernel_size=3, strides=2, padding='same',
                                            activation=tf.nn.leaky_relu,
                                            kernel_initializer=tf.initializers.he_normal())
        conv_8_1 = tf.layers.conv2d_transpose(conv_8, filters=256, kernel_size=3, strides=1, padding='same',
                                              activation=tf.nn.leaky_relu,
                                              kernel_initializer=tf.initializers.he_normal())

        conv_9 = tf.layers.conv2d_transpose(conv_8_1, filters=128, kernel_size=3, strides=2, padding='same',
                                            activation=tf.nn.leaky_relu,
                                            kernel_initializer=tf.initializers.he_normal())
        conv_9_1 = tf.layers.conv2d_transpose(conv_9, filters=128, kernel_size=3, strides=1, padding='same',
                                              activation=tf.nn.leaky_relu,
                                              kernel_initializer=tf.initializers.he_normal())
        conv_9_2 = tf.layers.conv2d_transpose(conv_9_1, filters=128, kernel_size=3, strides=1, padding='same',
                                              activation=tf.nn.leaky_relu,
                                              kernel_initializer=tf.initializers.he_normal())

        conv_10 = tf.layers.conv2d_transpose(conv_9_2, filters=64, kernel_size=3, strides=2, padding='same',
                                             activation=tf.nn.leaky_relu,
                                             kernel_initializer=tf.initializers.he_normal())

        conv_10_1 = tf.layers.conv2d_transpose(conv_10, filters=64, kernel_size=3, strides=1, padding='same',
                                               activation=tf.nn.leaky_relu,
                                               kernel_initializer=tf.initializers.he_normal())

        conv_11 = tf.layers.conv2d_transpose(conv_10_1, filters=32, kernel_size=3, strides=2, padding='same',
                                             activation=tf.nn.leaky_relu,
                                             kernel_initializer=tf.initializers.he_normal())
        conv_11_1 = tf.layers.conv2d_transpose(conv_11, filters=32, kernel_size=3, strides=1, padding='same',
                                               activation=tf.nn.leaky_relu,
                                               kernel_initializer=tf.initializers.he_normal())
        conv_11_2 = tf.layers.conv2d_transpose(conv_11_1, filters=32, kernel_size=3, strides=1, padding='same',
                                               activation=tf.nn.leaky_relu,
                                               kernel_initializer=tf.initializers.he_normal())

        conv_12 = tf.layers.conv2d_transpose(conv_11_2, filters=16, kernel_size=3, strides=2, padding='same',
                                             activation=tf.nn.leaky_relu,
                                             kernel_initializer=tf.initializers.he_normal())
        conv_12_1 = tf.layers.conv2d_transpose(conv_12, filters=16, kernel_size=3, strides=1, padding='same',
                                               activation=tf.nn.leaky_relu,
                                               kernel_initializer=tf.initializers.he_normal())
        conv_13 = tf.layers.conv2d_transpose(conv_12_1, filters=3, kernel_size=3, strides=1, padding='same',
                                             activation=tf.nn.leaky_relu,
                                             kernel_initializer=tf.initializers.he_normal())
        conv_13_1 = tf.layers.conv2d_transpose(conv_13, filters=3, kernel_size=3, strides=1, padding='same',
                                               activation=None,
                                               kernel_initializer=tf.initializers.he_normal())
        return conv_13_1

    def compute_loss(self):
        batch_shape = tf.shape(self.normalized_image)[0]
        logits_flat = tf.reshape(self.reconstructions, [batch_shape, -1])
        labels_flat = tf.reshape(self.normalized_image, [batch_shape, -1])
        reconstruction_loss = tf.reduce_sum(tf.square(logits_flat - labels_flat), axis=1)
        vae_loss = tf.reduce_mean(reconstruction_loss)

        return vae_loss

    def load(self, sess):
        self.saver = tf.train.Saver(tf.global_variables())
        load_was_success = True
        try:
            save_dir = '/'.join(self.save_path.split('/')[:-1])
            ckpt = tf.train.get_checkpoint_state(save_dir)
            load_path = ckpt.model_checkpoint_path
            self.saver.restore(sess, load_path)
        except:
            print("no saved model to load. starting new session")
            load_was_success = False
        else:
            print("loaded model: {}".format(load_path))
            saver = tf.train.Saver(tf.global_variables())
            episode_number = int(load_path.split('-')[-1])

    def save(self, sess, n):
        self.saver.save(sess, self.save_path, global_step=n)
        print("SAVED MODEL #{}".format(n))


def data_iterator(batch_size, path):
    data_files = glob.glob(path + '/**/VAE_FloorPlan*', recursive=True)
    while True:
        data = np.load(random.sample(data_files, 1)[0])
        np.random.shuffle(data)
        np.random.shuffle(data)
        N = data.shape[0]
        start = np.random.randint(0, N - batch_size)
        yield data[start:start + batch_size]


def train_vae():
    global_step = tf.Variable(0, name='global_step', trainable=False)

    with tf.Session() as sess:

        loss_writer = tf.summary.FileWriter('logdir_AE/train', sess.graph)
        validation_writer = tf.summary.FileWriter('logdir_AE/validation')

        network = Network()
        train_op = tf.train.AdamOptimizer(1e-3).minimize(network.loss, global_step=global_step)
        sess.run(tf.global_variables_initializer())

        saver = tf.train.Saver(max_to_keep=1)
        step = global_step.eval()
        print(step)
        training_data = data_iterator(batch_size=8, path=training_dataset_path)
        validation_data = data_iterator(batch_size=4, path=validation_dataset_path)

        try:
            network.load(sess=sess)
        except:
            print("Could not restore saved model")

        for step in range(training_iter):
            training_images = next(training_data)
            _, training_loss, loss_summary = sess.run([train_op, network.loss, network.merged],
                                                      feed_dict={network.image: training_images})
            # print("reconstructed",network.reconstructions.eval(feed_dict={network.image: training_images}))
            # print("input resized",network.normalized_image.eval(feed_dict={network.image: training_images}))
            # print("input",network.image.eval(feed_dict={network.image: training_images}))

            loss_writer.add_summary(loss_summary, step)

            if np.isnan(training_loss):
                raise ValueError('Loss value is NaN')
            if step % 10 == 0 and step > 0:
                print('step {}: training loss {:.6f}'.format(step, training_loss))
            if step % 1000 == 0 and step > 0:
                network.save(sess=sess, n=step)
            if step % 1000 == 0:  # validation
                print("validation")
                for i in range(validation_iter):
                    validation_images = next(validation_data)
                    validation_loss, validation_summary = sess.run([network.loss, network.merged],
                                                                   feed_dict={network.image: validation_images})
                    validation_writer.add_summary(validation_summary, step)
                    print('step {}: Validation loss {:.6f}'.format(step, training_loss))
            if training_loss <= 35:
                print('step {}: training loss {:.6f}'.format(step, training_loss))
                network.save(sess=sess, n=step)
                break
            step += 1


def load_vae():
    graph = tf.Graph()
    with graph.as_default():
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config, graph=graph)

        network = Network()
        init = tf.global_variables_initializer()
        sess.run(init)

        saver = tf.train.Saver(max_to_keep=1)
        training_data = data_iterator(batch_size=32)

        try:
            saver.restore(sess, tf.train.latest_checkpoint(model_path))
        except:
            raise ImportError("Could not restore saved model")

        return sess, network


if __name__ == "__main__":
    train_vae()

0 个答案:

没有答案