sef.run()期间tf.layers.batch_normalization冻结(1.5.0-dev20171031)

时间:2017-11-01 01:59:23

标签: tensorflow batch-normalization

图形构建阶段没有错误地通过,但程序在第一个时期的第一个小批量中的sess.run()期间冻结(没有读取硬盘驱动器,没有内存更改,没有...)。如果我删除此图层或用tf.contrib.layers.layer_norm替换它,程序运行没有问题。

我传入tf.layers.batch_normalization的张量(x)的形状为[#batches, 200]。我使用大多数默认值,但关闭了中心和缩放。

x_BN = tf.layers.batch_normalization(
    x,
    axis=-1, 
    momentum=0.99,
    epsilon=1e-10, #0.001,
    center=False, #True,
    scale=False, #True,
    beta_initializer=tf.zeros_initializer(),
    gamma_initializer=tf.ones_initializer(),
    moving_mean_initializer=tf.zeros_initializer(),
    moving_variance_initializer=tf.ones_initializer(),
    beta_regularizer=None,
    gamma_regularizer=None,
    beta_constraint=None,
    gamma_constraint=None,
    training=Flg_training, #False,
    trainable=True,
    name=None,
    reuse=None,
    renorm=False,
    renorm_clipping=None,
    renorm_momentum=0.99,
    fused=False,
    virtual_batch_size=None,
    adjustment=None
    )

我使用的tensorflow版本是tf-nightly-gpu(1.5.0-dev20171031或1.5.0-dev20171023)。有没有人遇到类似的问题?

更新

如果tf.layers.batch_normalization的输入来自tf.nn.bidirectional_dynamic_rnn,则会发生这种情况,请参阅简化代码以重现此问题:

import tensorflow as tf
import numpy as np

starter_learning_rate = 0.001
decay_steps = 100
decay_rate = 0.96
num_RNN_layers = 3
LSTM_CELL_SIZE = 100
keep_prob = 0.95

with tf.name_scope('Inputs'):
    x = tf.placeholder(dtype=tf.float32, shape=[None, 200])
    y = tf.placeholder(dtype=tf.float32, shape=[None, 200])
    length = tf.placeholder(dtype=tf.int32, shape=[None])
    Flg_training = tf.placeholder(dtype=tf.bool, shape=[])

    x_1 = tf.expand_dims(x, -1)

with tf.name_scope('BiLSTM'):
    dropcells = []
    for iiLyr in list(range(num_RNN_layers)):
        cell_iiLyr = tf.nn.rnn_cell.LSTMCell(num_units=LSTM_CELL_SIZE, state_is_tuple=True)
        dropcells.append(tf.nn.rnn_cell.DropoutWrapper(cell=cell_iiLyr, output_keep_prob=keep_prob))  #,, input_keep_prob=self.keep_prob input_keep_prob=1.0, seed=None

    MultiLyr_cell = tf.nn.rnn_cell.MultiRNNCell(cells=dropcells, state_is_tuple=True)

    outputs, states  = tf.nn.bidirectional_dynamic_rnn(
        cell_fw=MultiLyr_cell, 
        cell_bw=MultiLyr_cell, 
        dtype=tf.float32,
        sequence_length=length, #tf_b_lens 
        inputs=x_1, #stacked_RefPts_desc, #tf_b_VCCs_AMs_BN1
        scope = "BiLSTM"
        )

    #output_fw, output_bw = outputs
    states_fw, states_bw = states

    c_fw_lstLyr, h_fw_lstLyr = states_fw[-1]
    c_bw_lstLyr, h_bw_lstLyr = states_bw[-1]

    states_concat1 = tf.concat([h_fw_lstLyr, h_bw_lstLyr], axis = 1, name = 'states_concat')

with tf.name_scope("cs_BN1"):
    x_BN = tf.layers.batch_normalization(
        states_concat1,
        axis=-1, # axis that should be normalized (typically the features axis, in this case the concated states or hidden vectors)
        momentum=0.99,
        epsilon=1e-10, #0.001,
        center=False, #True,
        scale=False, #True,
        beta_initializer=tf.zeros_initializer(),
        gamma_initializer=tf.ones_initializer(),
        moving_mean_initializer=tf.zeros_initializer(),
        moving_variance_initializer=tf.ones_initializer(),
        beta_regularizer=None,
        gamma_regularizer=None,
        beta_constraint=None,
        gamma_constraint=None,
        training=Flg_training, #False,
        trainable=True,
        name="test_BN", #None,
        reuse=None,
        renorm=False,
        renorm_clipping=None,
        renorm_momentum=0.99,
        fused=False,
        virtual_batch_size=None,
        adjustment=None
        )

with tf.name_scope("Regression"):
    a = tf.get_variable("a", shape=[1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))
    b = tf.get_variable("b", shape=[1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))

with tf.name_scope("Prediction"):
    y_pred = tf.multiply(x_BN, a) + b

with tf.name_scope('Loss'):
    losses = tf.losses.mean_squared_error(y, y_pred, reduction=tf.losses.Reduction.NONE)
    mean_loss = tf.reduce_mean(losses)

with tf.name_scope('Training'):
    global_step = tf.Variable(0, trainable=False)
    learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                               decay_steps, decay_rate, staircase=True) 

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        train_step = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(losses, global_step=global_step)


#x_mean = tf.reduce_mean(x_BN, axis=0)

sess = tf.InteractiveSession()
train_writer = tf.summary.FileWriter("G:\\Surface_Ozone\\Temp\\", sess.graph)   
sess.run(tf.global_variables_initializer())

for ii in list(range(2000)):
    x_in = (np.random.rand(20, 200))
    y_in = x_in * 1.5 + 3.0
    length_in = np.full([20], 200, dtype=np.int32)

    _, mean_loss_val, a_val, b_val = sess.run([train_step, mean_loss, a, b], feed_dict={
        x: x_in, 
        Flg_training: True, 
        y: y_in,
        length: length_in
        })

    if (ii < 50):
        print("step {}: {} | a: {} | b: {}".format(ii, mean_loss_val, a_val, b_val))
    else:
        if (ii % 100 == 0):
            print("step {}: {} | a: {} | b: {}".format(ii, mean_loss_val, a_val, b_val))

print("Normal End.")

0 个答案:

没有答案