class Conv_VAE():
def __init__(self, image_list , number_of_filters, z_dim=2, filter_size=3, filter_stride=2, conv_after_pool=None, pool_size=2, pool_stride=2,
filter_pad='SAME',pool_pad='SAME',residual_block=None, bernoulli=False, gaussian_cov=False):
self.he, self.wid, self.cha = image_list
self.output = self.he
self.num_filter=number_of_filters
self.X = tf.placeholder('float', [None, self.he, self.wid, self.cha])
self.z_dim=z_dim
self.bernoulli=bernoulli
self.covariance=gaussian_cov
self.z=tf.placeholder('float',[None,self.z_dim])
self.num_output=number_of_filters[-1]
if type(filter_size)==int:
print('All filter_size allocated %d' % filter_size)
self.size_filter=[filter_size]*len(number_of_filters)
else:
self.size_filter=filter_size
if type(filter_stride)==int:
print('All filter_stride allocated %d' %filter_stride)
self.filter_stride=[filter_stride]*len(number_of_filters)
else:
self.filter_stride=filter_stride
self.size_pool=pool_size
self.pool_stride=pool_stride
if filter_pad=='SAME' or filter_pad=='VALID':
print('All filter_pad allocated '+str(filter_pad))
self.pad_filter=['SAME']*len(number_of_filters)
else:
self.pad_filter=filter_pad
self.pad_pool=pool_pad
if conv_after_pool is None:
if not residual_block:
print('Conv_after_pool allocated None so we regard it as conv_after_pool in all layer ')
self.when_pool=[i for i in range(len(number_of_filters))]
else:
self.when_pool=conv_after_pool
self.is_train=tf.placeholder(tf.bool)
self.residual_block= residual_block
if residual_block:
self.res_interval=residual_block[1]-residual_block[0]
def gaussian_MLP_encoder(self, x):
L1=tf.identity(x)
for i in range(len(self.num_filter)+1):
if i==len(self.num_filter):
_,self.H,self.W,self.C=L1.shape
L1=tf.reshape(L1,[-1,self.H*self.W*self.C])
L1=tf.contrib.layers.fully_connected(L1, num_outputs=256, activation_fn=tf.nn.relu,
weights_initializer=tf.contrib.layers.xavier_initializer())
L1=tf.contrib.layers.fully_connected(L1, num_outputs=self.z_dim*2, activation_fn=None,
weights_initializer=tf.contrib.layers.xavier_initializer())
else:
L1 = tf.layers.conv2d(L1, filters=self.num_filter[i],kernel_size=[self.size_filter[i], self.size_filter[i]],
strides=(self.filter_stride[i], self.filter_stride[i]),
padding=self.pad_filter[i], kernel_initializer=tf.contrib.layers.xavier_initializer())
L1 = tf.nn.relu(L1)
print('After Encoding: ',L1.shape)
# The mean parameter is unconstrained
mean = L1[:, :self.z_dim]
# The standard deviation must be positive. Parametrize with a softplus and
# add a small epsilon for numerical stability
stddev = 1e-6 + tf.nn.softplus(L1[:, self.z_dim:])
self.z = mean + stddev * tf.random_normal(tf.shape(mean), 0, 1, dtype=tf.float32)
return self.z, mean, stddev
def bernoulli_MLP_decoder(self, z):
# initializers
L1=tf.identity(z)
L1=tf.contrib.layers.fully_connected(L1, num_outputs=256, activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer())
L1=tf.contrib.layers.fully_connected(L1, num_outputs=int(self.H*self.W*16), activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer())
L1=tf.reshape(L1,[-1,self.H, self.W, 16])
decode_layer=self.num_filter[::-1]
# Decoder Hidden layer with sigmoid activation #1
for i in range(len(decode_layer)):
L1 =tf.layers.conv2d_transpose(L1, filters=decode_layer[i], kernel_size=2, strides=2, activation=tf.nn.relu,
kernel_initializer=tf.contrib.layers.xavier_initializer())
out= tf.layers.conv2d_transpose(L1, filters=self.cha, kernel_size=1, strides=1,activation=tf.sigmoid,
kernel_initializer=tf.contrib.layers.xavier_initializer())
return out
def Gaussian_MLP_decoder(self, z):
# initializers
if self.covariance:
L1=tf.identity(z)
L1=tf.contrib.layers.fully_connected(L1, num_outputs=256, activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer())
L1=tf.contrib.layers.fully_connected(L1, num_outputs=int(self.H*self.W*16), activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer())
L1=tf.reshape(L1,[-1,self.H, self.W, 16])
decode_layer=self.num_filter[::-1]
# Decoder Hidden layer with sigmoid activation #1
for i in range(len(decode_layer)):
L1 =tf.layers.conv2d_transpose(L1, filters=decode_layer[i], kernel_size=2, strides=2, activation=tf.nn.relu,
kernel_initializer=tf.contrib.layers.xavier_initializer())
out= tf.layers.conv2d_transpose(L1, filters=self.cha*2, kernel_size=1, strides=1,activation=tf.sigmoid,
kernel_initializer=tf.contrib.layers.xavier_initializer())
# The mean parameter is unconstrained
mean = out[:, :,:,:self.cha]
# The standard deviation must be positive. Parametrize with a softplus and
# add a small epsilon for numerical stability
stddev = 1e-6 + tf.nn.softplus(out[:,:,:, self.cha:])
return mean,stddev
else:
L1=tf.identity(z)
L1=tf.contrib.layers.fully_connected(L1, num_outputs=256, activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer())
L1=tf.contrib.layers.fully_connected(L1, num_outputs=int(self.H*self.W*16), activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer())
L1=tf.reshape(L1,[-1,self.H, self.W, 16])
decode_layer=self.num_filter[::-1]
# Decoder Hidden layer with sigmoid activation #1
for i in range(len(decode_layer)):
L1 =tf.layers.conv2d_transpose(L1, filters=decode_layer[i], kernel_size=2, strides=2, activation=tf.nn.relu,
kernel_initializer=tf.contrib.layers.xavier_initializer())
out= tf.layers.conv2d_transpose(L1, filters=self.cha, kernel_size=1, strides=1,activation=tf.sigmoid,
kernel_initializer=tf.contrib.layers.xavier_initializer())
return out,tf.constant(1.0)
def loss(self, lr):
self.z , mu_E, sigma_E= self.gaussian_MLP_encoder(self.X)
# sampling by re-parameterization technique
# decoding
print('z shape: ', self.z.shape)
# loss
if self.bernoulli:
self.y=self.bernoulli_MLP_decoder(self.z)
self.marginal_likelihood = tf.reduce_sum(self.X * tf.log(self.y+1e-6) + (1 - self.X) * tf.log(1+1e-6 - self.y), 1)
self.KL = 0.5 * tf.reduce_sum(tf.square(mu_E) + tf.square(sigma_E) - tf.log(1e-6 + tf.square(sigma_E)) - 1, 1)
self.marginal_likelihood = tf.reduce_mean(self.marginal_likelihood)
self.KL= tf.reduce_mean(self.KL)
self.ELBO = self.marginal_likelihood - self.KL
else:
self.mu_D, self.sigma_D=self.Gaussian_MLP_decoder(self.z)
self.marginal_likelihood = tf.reduce_sum(0.5* tf.log(tf.square(self.sigma_D)) + tf.square(self.X-self.mu_D)/(2*tf.square(self.sigma_D)),1)
self.KL = 0.5 * tf.reduce_sum(tf.square(mu_E) + tf.square(sigma_E) - tf.log(1e-6 + tf.square(sigma_E)) - 1, 1)
self.marginal_likelihood = tf.reduce_mean(self.marginal_likelihood)
self.KL= tf.reduce_mean(self.KL)
self.ELBO = -self.marginal_likelihood - self.KL
self.cost = -self.ELBO
self.train_op = tf.train.AdamOptimizer(lr).minimize(self.cost)
self.sess=tf.Session()
self.sess.run(tf.global_variables_initializer())
def predict(self, z):
if self.bernoulli:
predicted=self.sess.run(self.y, feed_dict={self.z:z})
else:
predicted=self.sess.run(self.mu_D, feed_dict={self.z:z})
return predicted
def training(self, total_training_batch, batch_xs, avg_cost, avg_n, avg_K):
if self.bernoulli:
_,y_p,z_p,loss_p,n_p,K_p=self.sess.run([self.train_op,self.y,self.z,self.cost,-self.marginal_likelihood, self.KL], feed_dict={self.X:batch_xs})
avg_cost+=loss_p/total_training_batch
avg_n+=n_p/total_training_batch
avg_K+=K_p/total_training_batch
return y_p, z_p, avg_cost, avg_n, avg_K
else:
_,mu,sigma,z_p,loss_p,n_p,K_p=self.sess.run([self.train_op,self.mu_D, self.sigma_D,self.z,self.cost,self.marginal_likelihood, self.KL], feed_dict={self.X:batch_xs})
avg_cost+=loss_p/total_training_batch
avg_n+=n_p/total_training_batch
avg_K+=K_p/total_training_batch
return mu,sigma, z_p, avg_cost, avg_n, avg_K
mnist_2list=[]
for i in range(len(batch_ys)):
if np.argmax(batch_ys[i])==2:
mnist_2list.append(batch_xs[i].reshape([28,28,1]))
model=Conv_VAE([28,28,1], number_of_filters=[64,32],bernoulli=True)
model.loss(1e-3)
for epoch in range(0, 151):
avg_cost=0
avg_n=0
avg_K=0
for train in range(30):
y_p,z_p,avg_cost,avg_n,avg_K=model.training(30, mnist_2list, avg_cost, avg_n, avg_K)
if epoch%50==0:
print('loss: ',avg_cost, '\n',
'neg_ikelihood: ',avg_n,'\n',
'KL_divergence: ', avg_K)
我的ConvVariational自动编码器不起作用 我不知道我做错了什么。 我的成本函数有问题吗?
实际上,我的成本打印为负值,但我想它并没有影响模型的效率。而且,我想我没有写错任何东西。
但是,我想知道为什么KL值会最大化而Neg_likelihood会最小化。我猜肯定是错误的。
在KL差异最小时,必须最大程度地提高负说谎感。
这怎么了?
请帮助我