使用tf.layers或通过tf.get_variable指定之间的Tensorflow差异

时间:2017-12-20 15:57:28

标签: tensorflow

我的代码包含一般部分(未显示)和可变部分(参见代码块)。当使用下一个块时,一切运行良好:

def generator(x, reuse=False):
    with tf.variable_scope('generator', reuse=reuse):
        # initializers
        w_init = tf.truncated_normal_initializer(mean=0, stddev=0.02)
        b_init = tf.constant_initializer(0.)
        # 1st hidden layer
        w0 = tf.get_variable('G_w0', [x.get_shape()[1], 256], initializer=w_init)
        b0 = tf.get_variable('G_b0', [256], initializer=b_init)
        h0 = tf.nn.relu(tf.matmul(x, w0) + b0)
        # 2nd hidden layer
        w1 = tf.get_variable('G_w1', [h0.get_shape()[1], 512], initializer=w_init)
        b1 = tf.get_variable('G_b1', [512], initializer=b_init)
        h1 = tf.nn.relu(tf.matmul(h0, w1) + b1)
        # 3rd hidden layer
        w2 = tf.get_variable('G_w2', [h1.get_shape()[1], 1024], initializer=w_init)
        b2 = tf.get_variable('G_b2', [1024], initializer=b_init)
        h2 = tf.nn.relu(tf.matmul(h1, w2) + b2)
        # output hidden layer
        w3 = tf.get_variable('G_w3', [h2.get_shape()[1], 784], initializer=w_init)
        b3 = tf.get_variable('G_b3', [784], initializer=b_init)
        o = tf.nn.tanh(tf.matmul(h2, w3) + b3)
    return o

def discriminator(x, drop_out, reuse=False):
    with tf.variable_scope('discriminator', reuse=reuse):
        x = tf.reshape(x, [-1, 784])
        # initializers
        w_init = tf.truncated_normal_initializer(mean=0, stddev=0.02)
        b_init = tf.constant_initializer(0.)
        # 1st hidden layer
        w0 = tf.get_variable('D_w0', [x.get_shape()[1], 1024], initializer=w_init)
        b0 = tf.get_variable('D_b0', [1024], initializer=b_init)
        h0 = tf.nn.relu(tf.matmul(x, w0) + b0)
        h0 = tf.nn.dropout(h0, 1-drop_out)
        # 2nd hidden layer
        w1 = tf.get_variable('D_w1', [h0.get_shape()[1], 512], initializer=w_init)
        b1 = tf.get_variable('D_b1', [512], initializer=b_init)
        h1 = tf.nn.relu(tf.matmul(h0, w1) + b1)
        h1 = tf.nn.dropout(h1, 1-drop_out)
    # 3rd hidden layer
        w2 = tf.get_variable('D_w2', [h1.get_shape()[1], 256], initializer=w_init)
        b2 = tf.get_variable('D_b2', [256], initializer=b_init)
        h2 = tf.nn.relu(tf.matmul(h1, w2) + b2)
        h2 = tf.nn.dropout(h2, 1-drop_out)
    # output layer
        w3 = tf.get_variable('D_w3', [h2.get_shape()[1], 1], initializer=w_init)
        b3 = tf.get_variable('D_b3', [1], initializer=b_init)
        o = tf.sigmoid(tf.matmul(h2, w3) + b3)
    return o

当使用以下块时(它应该与前一个块完全相同,对吗?)训练不再起作用了:

def generator(x, reuse=False):
    with tf.variable_scope('generator', reuse=reuse):
        w_init = tf.truncated_normal_initializer(0., 0.02)
        b_init = tf.constant_initializer(0.)
        l1 = tf.layers.dense(x, 256, kernel_initializer=w_init, bias_initializer=b_init, activation=tf.nn.relu)
        l2 = tf.layers.dense(l1, 512, kernel_initializer=w_init, bias_initializer=b_init, activation=tf.nn.relu)
        l3 = tf.layers.dense(l2, 1024, kernel_initializer=w_init, bias_initializer=b_init, activation=tf.nn.relu)
        o = tf.layers.dense(l3, 784, kernel_initializer=w_init, bias_initializer=b_init, activation=tf.nn.tanh)
    return o

def discriminator(x, drop_out, reuse=False):
    with tf.variable_scope('discriminator', reuse=reuse):
        x = tf.reshape(x, [-1, 784])
        w_init = tf.truncated_normal_initializer(0., 0.02)
        b_init = tf.constant_initializer(0.)
        l1 = tf.layers.dense(x, 1024, kernel_initializer=w_init, bias_initializer=b_init, activation=tf.nn.relu)
        l1_ = tf.layers.dropout(l1, drop_out)
        l2 = tf.layers.dense(l1_, 512, kernel_initializer=w_init, bias_initializer=b_init, activation=tf.nn.relu)
        l2_ = tf.layers.dropout(l2, drop_out)
        l3 = tf.layers.dense(l2_, 256, kernel_initializer=w_init, bias_initializer=b_init, activation=tf.nn.relu)
        l3_ = tf.layers.dropout(l3, drop_out)
        o = tf.layers.dense(l3_, 1, kernel_initializer=w_init, bias_initializer=b_init, activation=tf.nn.sigmoid)
    return o

我不明白差异在哪里,但是在学习和查看创建的图像时,显然会有所不同。第一个块总是很完美。第二个仅用于几次迭代。

请帮助谢谢!

1 个答案:

答案 0 :(得分:1)

好吧,我无法发表评论......我唯一想到的是tf.nn.dropout使用keep_probtf.layers.dropout使用辍学率。请检查此费率是否过高。