具有常量初始化器的TensorFlow实验/估算器

时间:2017-12-25 17:28:45

标签: python tensorflow machine-learning neural-network

我在使用带有constant_initializer的Estimator API时遇到了一些麻烦。最初,我试图从.npy文件加载模型权重,但评估损失似乎根本没有移动。

我做了一个似乎有同样问题的小例子。当我用任何其他随机初始化程序替换constant_initializer时,它似乎工作。谁能解释一下发生了什么?

以下是代码的主要部分:

# Big thanks to https://medium.com/onfido-tech/higher-level-apis-in-tensorflow-67bfb602e6c0
import os
import tensorflow as tf
from tensorflow.contrib.learn import ModeKeys
from tensorflow.contrib.learn import learn_runner
from fcn import fcn32_vgg
from fcn import loss as fcn_loss
import voc_dataset
from voc_to_tfrecord import load_voc_dataset
from test_model import SimpleNet, WeightInitializerHook

FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string(
    flag_name='weights_dir', default_value='...',
    docstring='Top-level directory where the input data will be looked for.')
tf.app.flags.DEFINE_string(
    flag_name='model_dir', default_value='...',
    docstring='Output directory for model and training stats.')
tf.app.flags.DEFINE_string(
    flag_name='data_dir', default_value='...',
    docstring='Directory containing the "voc_segmentation_{train|val}.tfrecord" files.')


def run_experiment(argv=None):
    # Define model parameters
    params = tf.contrib.training.HParams(
        learning_rate=0.002,
        n_classes=22,
        train_steps=100,
        eval_steps=1,
        min_eval_frequency=10,
        eval_delay_secs=0
    )

    # Set the run_config and the directory to save the model and stats
    run_config = tf.contrib.learn.RunConfig()
    run_config = run_config.replace(model_dir=FLAGS.model_dir)
    run_config = run_config.replace(tf_random_seed=42)

    learn_runner.run(
        experiment_fn=experiment_fn,
        run_config=run_config,  # RunConfig
        schedule="train_and_evaluate",  # What to run
        hparams=params  # HParams
    )


def experiment_fn(run_config, params):
    # You can change a subset of the run_config properties as
    run_config = run_config.replace(
        save_checkpoints_steps=params.min_eval_frequency)

    estimator = tf.estimator.Estimator(
        model_fn=model_fn,  # First-class function
        params=params,  # HParams
        config=run_config  # RunConfig
    )

    # Setup data loaders
    train_input_fn, train_input_hook = voc_dataset.get_inputs(
        batch_size=64,
        tfrecords_path=os.path.join(FLAGS.data_dir,'voc_segmentation_train.tfrecords'),
        name_scope='train_data',
        shuffle_and_repeat=True)

    eval_input_fn, eval_input_hook = voc_dataset.get_inputs(
        batch_size=64,
        tfrecords_path=os.path.join(FLAGS.data_dir, 'voc_segmentation_val.tfrecords'),
        name_scope='eval_data',
        shuffle_and_repeat=False)

    # Define the experiment
    experiment = tf.contrib.learn.Experiment(
        estimator=estimator,  # Estimator
        train_input_fn=train_input_fn,
        eval_input_fn=eval_input_fn,
        train_steps=params.train_steps,
        eval_steps=params.eval_steps,
        min_eval_frequency=params.min_eval_frequency,  # Eval frequency
        train_monitors=[train_input_hook],  # Hooks for training
        eval_hooks=[eval_input_hook],  # Hooks for evaluation
        eval_delay_secs=params.eval_delay_secs,
    )
    return experiment


def model_fn(features, labels, mode, params):
    is_training = mode == ModeKeys.TRAIN
    net = SimpleNet()
    net.build(features, is_training=is_training)

    logits = net.logits
    predictions = net.predictions
    loss = None
    train_op = None
    eval_metric_ops = {}

    if mode != ModeKeys.INFER:
        loss = fcn_loss.loss(logits, labels, params.n_classes)
    if mode == ModeKeys.TRAIN:
        train_op = get_train_op_fn(loss, params)

    tf.summary.image('INPUT' + str(is_training), features, max_outputs=64)
    tf.summary.image('OUTPUT' + str(is_training), tf.expand_dims(tf.argmax(predictions, -1) / 22, -1), max_outputs=64)
    tf.summary.image('LABELS' + str(is_training), tf.expand_dims(tf.argmax(labels, -1) / 22, -1), max_outputs=64)

    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions={'result': predictions},
        loss=loss,
        train_op=train_op,
        # eval_metric_ops=eval_metric_ops
    )


def get_train_op_fn(loss, params):
    return tf.contrib.layers.optimize_loss(
        loss=loss,
        global_step=tf.train.get_global_step(),
        optimizer=tf.train.AdamOptimizer,
        learning_rate=params.learning_rate,
        name='optimize_loss',
        summaries=['loss']
    )


# Run script ##############################################
if __name__ == "__main__":
    tf.app.run(
        main=train_manual
    )

这是架构:

class SimpleNet:

    def __init__(self, vgg16_npy_path=None):
        pass

    def build(self, rgb, is_training=False, debug=False):
        k_init = None
        if is_training:
            k_init = tf.constant_initializer(0.1)

        self.conv_1 = tf.layers.conv2d(rgb, 5, (5, 5), activation=tf.nn.elu, padding='same', name='conv1', kernel_initializer=k_init)
        self.conv_2 = tf.layers.conv2d(self.conv_1, 10, (5, 5), activation=tf.nn.elu, padding='same', name='conv2', kernel_initializer=k_init)
        self.conv_3 = tf.layers.conv2d(self.conv_2, 15, (5, 5), activation=tf.nn.elu, padding='same', name='conv3', kernel_initializer=k_init)
        self.conv_4 = tf.layers.conv2d(self.conv_3, 20, (5, 5), activation=tf.nn.elu, padding='same', name='conv4', kernel_initializer=k_init)
        self.logits = tf.layers.conv2d(self.conv_4, 22, (5, 5), activation=None, padding='same', name='logits', kernel_initializer=k_init)
        with tf.name_scope('softmax'):
            self.predictions = tf.nn.softmax(self.logits)

如果我将is_training标志设置为False,那么评估损失似乎会下降。否则,它是完全平坦的。关于为什么会出现这种情况的任何想法?

0 个答案:

没有答案