我的代码包含一般部分(未显示)和可变部分(参见代码块)。当使用下一个块时,一切运行良好:
def generator(x, reuse=False):
with tf.variable_scope('generator', reuse=reuse):
# initializers
w_init = tf.truncated_normal_initializer(mean=0, stddev=0.02)
b_init = tf.constant_initializer(0.)
# 1st hidden layer
w0 = tf.get_variable('G_w0', [x.get_shape()[1], 256], initializer=w_init)
b0 = tf.get_variable('G_b0', [256], initializer=b_init)
h0 = tf.nn.relu(tf.matmul(x, w0) + b0)
# 2nd hidden layer
w1 = tf.get_variable('G_w1', [h0.get_shape()[1], 512], initializer=w_init)
b1 = tf.get_variable('G_b1', [512], initializer=b_init)
h1 = tf.nn.relu(tf.matmul(h0, w1) + b1)
# 3rd hidden layer
w2 = tf.get_variable('G_w2', [h1.get_shape()[1], 1024], initializer=w_init)
b2 = tf.get_variable('G_b2', [1024], initializer=b_init)
h2 = tf.nn.relu(tf.matmul(h1, w2) + b2)
# output hidden layer
w3 = tf.get_variable('G_w3', [h2.get_shape()[1], 784], initializer=w_init)
b3 = tf.get_variable('G_b3', [784], initializer=b_init)
o = tf.nn.tanh(tf.matmul(h2, w3) + b3)
return o
def discriminator(x, drop_out, reuse=False):
with tf.variable_scope('discriminator', reuse=reuse):
x = tf.reshape(x, [-1, 784])
# initializers
w_init = tf.truncated_normal_initializer(mean=0, stddev=0.02)
b_init = tf.constant_initializer(0.)
# 1st hidden layer
w0 = tf.get_variable('D_w0', [x.get_shape()[1], 1024], initializer=w_init)
b0 = tf.get_variable('D_b0', [1024], initializer=b_init)
h0 = tf.nn.relu(tf.matmul(x, w0) + b0)
h0 = tf.nn.dropout(h0, 1-drop_out)
# 2nd hidden layer
w1 = tf.get_variable('D_w1', [h0.get_shape()[1], 512], initializer=w_init)
b1 = tf.get_variable('D_b1', [512], initializer=b_init)
h1 = tf.nn.relu(tf.matmul(h0, w1) + b1)
h1 = tf.nn.dropout(h1, 1-drop_out)
# 3rd hidden layer
w2 = tf.get_variable('D_w2', [h1.get_shape()[1], 256], initializer=w_init)
b2 = tf.get_variable('D_b2', [256], initializer=b_init)
h2 = tf.nn.relu(tf.matmul(h1, w2) + b2)
h2 = tf.nn.dropout(h2, 1-drop_out)
# output layer
w3 = tf.get_variable('D_w3', [h2.get_shape()[1], 1], initializer=w_init)
b3 = tf.get_variable('D_b3', [1], initializer=b_init)
o = tf.sigmoid(tf.matmul(h2, w3) + b3)
return o
当使用以下块时(它应该与前一个块完全相同,对吗?)训练不再起作用了:
def generator(x, reuse=False):
with tf.variable_scope('generator', reuse=reuse):
w_init = tf.truncated_normal_initializer(0., 0.02)
b_init = tf.constant_initializer(0.)
l1 = tf.layers.dense(x, 256, kernel_initializer=w_init, bias_initializer=b_init, activation=tf.nn.relu)
l2 = tf.layers.dense(l1, 512, kernel_initializer=w_init, bias_initializer=b_init, activation=tf.nn.relu)
l3 = tf.layers.dense(l2, 1024, kernel_initializer=w_init, bias_initializer=b_init, activation=tf.nn.relu)
o = tf.layers.dense(l3, 784, kernel_initializer=w_init, bias_initializer=b_init, activation=tf.nn.tanh)
return o
def discriminator(x, drop_out, reuse=False):
with tf.variable_scope('discriminator', reuse=reuse):
x = tf.reshape(x, [-1, 784])
w_init = tf.truncated_normal_initializer(0., 0.02)
b_init = tf.constant_initializer(0.)
l1 = tf.layers.dense(x, 1024, kernel_initializer=w_init, bias_initializer=b_init, activation=tf.nn.relu)
l1_ = tf.layers.dropout(l1, drop_out)
l2 = tf.layers.dense(l1_, 512, kernel_initializer=w_init, bias_initializer=b_init, activation=tf.nn.relu)
l2_ = tf.layers.dropout(l2, drop_out)
l3 = tf.layers.dense(l2_, 256, kernel_initializer=w_init, bias_initializer=b_init, activation=tf.nn.relu)
l3_ = tf.layers.dropout(l3, drop_out)
o = tf.layers.dense(l3_, 1, kernel_initializer=w_init, bias_initializer=b_init, activation=tf.nn.sigmoid)
return o
我不明白差异在哪里,但是在学习和查看创建的图像时,显然会有所不同。第一个块总是很完美。第二个仅用于几次迭代。
请帮助谢谢!
答案 0 :(得分:1)
好吧,我无法发表评论......我唯一想到的是tf.nn.dropout
使用keep_prob
而tf.layers.dropout
使用辍学率。请检查此费率是否过高。