我正在尝试重现最近paper deep video portrait(2018, Standford)
中引入的cGAN网络架构我按照论文的注释将Generator
定义为T(x)
。
T(x)是指上面列出的operation blocks
,例如conv_down(), conv_upsample(), biLinearDown() and finalTanH()
。
我已经用'with tf.variable_scope()'语法标记了它们的范围。
当我包括一个损失和优化器时,发现我需要将所有与Generator相关的变量收集在一起,因为我们将训练两个不同的优化器,一个用于鉴别器,一个用于生成器。
歧视者是我的同事,所以这不是我关心的问题,因此我将其保留为psheudo。
但是,我想在我的代码中列出T(x)
中定义的变量。
我该怎么做?有帮助吗?
import tensorflow as tf
import numpy as np
# hyper-params
learning_rate = 0.0002
epochs = 250
batch_size = 16
N_w = 11 #number of frames concatenated together
channels = 9*N_w
drop_out = [0.5, 0.5, 0.5, 0, 0, 0, 0, 0]
lambda_ = 100 #for Weighting of T_loss
tf.reset_default_graph()
with tf.Graph().as_default():
def conv_down(x, N, count): #Conv [4x4, str_2] > Batch_Normalization > Leaky_ReLU
with tf.variable_scope("conv_down_{}_count{}".format(N, count)): #N == depth of tensor
x = tf.layers.conv2d(x, N, kernel_size=4, strides=2, padding='same', kernel_initializer=tf.truncated_normal_initializer(stddev=np.sqrt(0.2)))
x = tf.contrib.layers.batch_norm(x)
x = tf.nn.leaky_relu(x) #for conv_down, implement leakyReLU
return x
def conv_upsample(x, N, drop_rate, count):
with tf.variable_scope("conv_upsamp_{}_count{}".format(N,count)) :
#up
with tf.variable_scope("conv_up_count{}".format(count)):
x = tf.layers.conv2d_transpose(x, N, kernel_size=4, strides=2, padding='same', kernel_initializer=tf.truncated_normal_initializer(stddev=np.sqrt(0.2)))
x = tf.contrib.layers.batch_norm(x)
with tf.variable_scope("convdrop_{}".format(count)):
if drop_rate is not 0:
x = tf.nn.dropout(x, keep_prob=drop_rate)
x = tf.nn.relu(x)
#refine1
with tf.variable_scope("refine1"):
x = tf.layers.conv2d(x, N, kernel_size=3, strides=1, padding='same', kernel_initializer=tf.truncated_normal_initializer(stddev=np.sqrt(0.2)))
x = tf.contrib.layers.batch_norm(x)
with tf.variable_scope("rf1drop_out_{}".format(count)):
if drop_rate is not 0:
x = tf.nn.dropout(x, keep_prob=drop_rate)
x = tf.nn.relu(x)
#refine2
with tf.variable_scope("refine2"):
x = tf.layers.conv2d(x, N, kernel_size=3, strides=1, padding='same', kernel_initializer=tf.truncated_normal_initializer(stddev=np.sqrt(0.2)))
x = tf.contrib.layers.batch_norm(x)
with tf.variable_scope("rf2drop_out{}".format(count)):
if drop_rate is not 0:
x = tf.nn.dropout(x, keep_prob=drop_rate)
x = tf.nn.relu(x)
return x
def biLinearDown(x, N):
return tf.image.resize_images(x, [N, N])
def finalTanH(x):
with tf.variable_scope("tanh"):
x = tf.nn.tanh(x)
return x
def T(x):
#channel_output_structure
down_channel_output = [64, 128, 256, 512, 512, 512, 512, 512]
up_channel_output= [512, 512, 512, 512, 256, 128, 64, 3]
biLinearDown_output= [32, 64, 128] #for skip-connection
#down_sampling
conv1 = conv_down(x, down_channel_output[0], 1)
conv2 = conv_down(conv1, down_channel_output[1], 2)
conv3 = conv_down(conv2, down_channel_output[2], 3)
conv4 = conv_down(conv3, down_channel_output[3], 4)
conv5 = conv_down(conv4, down_channel_output[4], 5)
conv6 = conv_down(conv5, down_channel_output[5], 6)
conv7 = conv_down(conv6, down_channel_output[6], 7)
conv8 = conv_down(conv7, down_channel_output[7], 8)
#upsampling
dconv1 = conv_upsample(conv8, up_channel_output[0], drop_out[0], 1)
dconv2 = conv_upsample(dconv1, up_channel_output[1], drop_out[1], 2)
dconv3 = conv_upsample(dconv2, up_channel_output[2], drop_out[2], 3)
dconv4 = conv_upsample(dconv3, up_channel_output[3], drop_out[3], 4)
dconv5 = conv_upsample(dconv4, up_channel_output[4], drop_out[4], 5)
dconv6 = conv_upsample(tf.concat([dconv5, biLinearDown(x, biLinearDown_output[0])], axis=3), up_channel_output[5], drop_out[5], 6)
dconv7 = conv_upsample(tf.concat([dconv6, biLinearDown(x, biLinearDown_output[1])], axis=3), up_channel_output[6], drop_out[6], 7)
dconv8 = conv_upsample(tf.concat([dconv7, biLinearDown(x, biLinearDown_output[2])], axis=3), up_channel_output[7], drop_out[7], 8)
#final_tanh
T_x = finalTanH(dconv8)
return T_x
# input_tensor x : to feed as Fake
x = tf.placeholder(tf.float32, [batch_size, 256, 256, channels]) # batch_size x Height x Width x N_w
# generated tensor T(x)
T_x = T(x)
# Ground_truth tensor Y : to feed as Real
Y = tf.placeholder(tf.float32, [batch_size, 256, 256, 3]) # just a capture of video frame
# define sheudo Discriminator
def D(x, to_be_discriminated): #truth is either T(x) or GroudnTruth with a shape [256 x 256 x 3]
sheudo_prob = np.float32(np.random.uniform(low=0., high=1.))
return sheudo_prob
theta_D = [] #tf.Variables of Discriminator
# Discrminated Result
D_real = D(Y)
D_fake = D(T_x)
# Define loss
E_cGAN = tf.reduce_mean(tf.log(D_real)+ tf.log(1. - D_fake))
E_l1 = tf.reduce_mean(tf.norm((Y-T_x)))
Loss = EcGAN + lambda_*E_l1
# Optimizer
D_solver = tf.train.AdamOptimizer().minimize(-Loss, var_list=theta_D) # Only update D(X)'s parameters, so var_list = theta_D
T_solver = tf.train.AdamOptimizer().minimize(Loss, var_list=theta_T) # Only update G(X)'s parameters, so var_list = theta_T
####TEST####
# define sheudo_input for testing
sheudo_x = np.float32(np.random.uniform(low=-1., high=1., size=[16, 256,256, 99]))
sheudo_Y = np.float32(np.random.uniform(low=-1., high=1., size=[16, 256,256, 3]))
####Run####
init_g = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init_g)
sess.run(output_tensor, feed_dict={x: sheudo_input Y: sheudo_Y})