我的ConvVAE(可变自动编码器)有什么问题

时间:2018-09-03 20:45:58

标签: python tensorflow autoencoder

class Conv_VAE():
def __init__(self, image_list , number_of_filters, z_dim=2, filter_size=3, filter_stride=2, conv_after_pool=None, pool_size=2, pool_stride=2,
             filter_pad='SAME',pool_pad='SAME',residual_block=None,  bernoulli=False, gaussian_cov=False):
    self.he, self.wid, self.cha = image_list
    self.output = self.he
    self.num_filter=number_of_filters
    self.X = tf.placeholder('float', [None, self.he, self.wid, self.cha])

    self.z_dim=z_dim

    self.bernoulli=bernoulli
    self.covariance=gaussian_cov

    self.z=tf.placeholder('float',[None,self.z_dim])
    self.num_output=number_of_filters[-1]


    if type(filter_size)==int:
        print('All filter_size allocated %d' % filter_size)
        self.size_filter=[filter_size]*len(number_of_filters)
    else:
        self.size_filter=filter_size
    if type(filter_stride)==int:
        print('All filter_stride allocated %d' %filter_stride)
        self.filter_stride=[filter_stride]*len(number_of_filters)
    else:
        self.filter_stride=filter_stride

    self.size_pool=pool_size
    self.pool_stride=pool_stride

    if filter_pad=='SAME' or filter_pad=='VALID':
        print('All filter_pad allocated '+str(filter_pad))
        self.pad_filter=['SAME']*len(number_of_filters)
    else:
        self.pad_filter=filter_pad
    self.pad_pool=pool_pad
    if conv_after_pool is None:
        if not residual_block:
            print('Conv_after_pool allocated None so we regard it as conv_after_pool in all layer ')
            self.when_pool=[i for i in range(len(number_of_filters))]
    else:
        self.when_pool=conv_after_pool
    self.is_train=tf.placeholder(tf.bool)
    self.residual_block= residual_block
    if residual_block:
        self.res_interval=residual_block[1]-residual_block[0]


def gaussian_MLP_encoder(self, x):

    L1=tf.identity(x)
    for i in range(len(self.num_filter)+1):
        if i==len(self.num_filter):
            _,self.H,self.W,self.C=L1.shape
            L1=tf.reshape(L1,[-1,self.H*self.W*self.C])
            L1=tf.contrib.layers.fully_connected(L1, num_outputs=256, activation_fn=tf.nn.relu,
                                                   weights_initializer=tf.contrib.layers.xavier_initializer())
            L1=tf.contrib.layers.fully_connected(L1, num_outputs=self.z_dim*2, activation_fn=None,
                                                   weights_initializer=tf.contrib.layers.xavier_initializer())

        else:
            L1 = tf.layers.conv2d(L1, filters=self.num_filter[i],kernel_size=[self.size_filter[i], self.size_filter[i]],
                                     strides=(self.filter_stride[i], self.filter_stride[i]), 
                             padding=self.pad_filter[i], kernel_initializer=tf.contrib.layers.xavier_initializer())

            L1 = tf.nn.relu(L1)
    print('After Encoding: ',L1.shape)

    # The mean parameter is unconstrained
    mean = L1[:, :self.z_dim]

    # The standard deviation must be positive. Parametrize with a softplus and
    # add a small epsilon for numerical stability
    stddev = 1e-6 + tf.nn.softplus(L1[:, self.z_dim:])
    self.z = mean + stddev * tf.random_normal(tf.shape(mean), 0, 1, dtype=tf.float32)

    return self.z, mean, stddev

def bernoulli_MLP_decoder(self, z):
# initializers


    L1=tf.identity(z)
    L1=tf.contrib.layers.fully_connected(L1, num_outputs=256, activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer())
    L1=tf.contrib.layers.fully_connected(L1, num_outputs=int(self.H*self.W*16), activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer())
    L1=tf.reshape(L1,[-1,self.H, self.W, 16])
    decode_layer=self.num_filter[::-1]
    # Decoder Hidden layer with sigmoid activation #1
    for i in range(len(decode_layer)):
        L1 =tf.layers.conv2d_transpose(L1, filters=decode_layer[i], kernel_size=2, strides=2, activation=tf.nn.relu,
                                               kernel_initializer=tf.contrib.layers.xavier_initializer())

    out= tf.layers.conv2d_transpose(L1, filters=self.cha, kernel_size=1, strides=1,activation=tf.sigmoid,
                                               kernel_initializer=tf.contrib.layers.xavier_initializer())
    return out

def Gaussian_MLP_decoder(self, z):
# initializers


    if self.covariance:

        L1=tf.identity(z)
        L1=tf.contrib.layers.fully_connected(L1, num_outputs=256, activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer())
        L1=tf.contrib.layers.fully_connected(L1, num_outputs=int(self.H*self.W*16), activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer())
        L1=tf.reshape(L1,[-1,self.H, self.W, 16])
        decode_layer=self.num_filter[::-1]
        # Decoder Hidden layer with sigmoid activation #1
        for i in range(len(decode_layer)):
            L1 =tf.layers.conv2d_transpose(L1, filters=decode_layer[i], kernel_size=2, strides=2, activation=tf.nn.relu,
                                                   kernel_initializer=tf.contrib.layers.xavier_initializer())

        out= tf.layers.conv2d_transpose(L1, filters=self.cha*2, kernel_size=1, strides=1,activation=tf.sigmoid,
                                                   kernel_initializer=tf.contrib.layers.xavier_initializer())

        # The mean parameter is unconstrained
        mean = out[:, :,:,:self.cha]

        # The standard deviation must be positive. Parametrize with a softplus and
        # add a small epsilon for numerical stability
        stddev = 1e-6 + tf.nn.softplus(out[:,:,:, self.cha:])

        return mean,stddev

    else:

        L1=tf.identity(z)
        L1=tf.contrib.layers.fully_connected(L1, num_outputs=256, activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer())
        L1=tf.contrib.layers.fully_connected(L1, num_outputs=int(self.H*self.W*16), activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.xavier_initializer())
        L1=tf.reshape(L1,[-1,self.H, self.W, 16])
        decode_layer=self.num_filter[::-1]
        # Decoder Hidden layer with sigmoid activation #1
        for i in range(len(decode_layer)):
            L1 =tf.layers.conv2d_transpose(L1, filters=decode_layer[i], kernel_size=2, strides=2, activation=tf.nn.relu,
                                                   kernel_initializer=tf.contrib.layers.xavier_initializer())

        out= tf.layers.conv2d_transpose(L1, filters=self.cha, kernel_size=1, strides=1,activation=tf.sigmoid,
                                                   kernel_initializer=tf.contrib.layers.xavier_initializer())



        return out,tf.constant(1.0)

def loss(self, lr):

    self.z , mu_E, sigma_E= self.gaussian_MLP_encoder(self.X)
    # sampling by re-parameterization technique

    # decoding

    print('z shape: ', self.z.shape)

    # loss
    if self.bernoulli:
        self.y=self.bernoulli_MLP_decoder(self.z)

        self.marginal_likelihood = tf.reduce_sum(self.X * tf.log(self.y+1e-6) + (1 - self.X) * tf.log(1+1e-6 - self.y), 1)
        self.KL = 0.5 * tf.reduce_sum(tf.square(mu_E) + tf.square(sigma_E) - tf.log(1e-6 + tf.square(sigma_E)) - 1, 1)
        self.marginal_likelihood = tf.reduce_mean(self.marginal_likelihood)
        self.KL= tf.reduce_mean(self.KL)
        self.ELBO = self.marginal_likelihood - self.KL
    else:
        self.mu_D, self.sigma_D=self.Gaussian_MLP_decoder(self.z)
        self.marginal_likelihood = tf.reduce_sum(0.5* tf.log(tf.square(self.sigma_D)) + tf.square(self.X-self.mu_D)/(2*tf.square(self.sigma_D)),1)
        self.KL = 0.5 * tf.reduce_sum(tf.square(mu_E) + tf.square(sigma_E) - tf.log(1e-6 + tf.square(sigma_E)) - 1, 1)
        self.marginal_likelihood = tf.reduce_mean(self.marginal_likelihood)
        self.KL= tf.reduce_mean(self.KL)
        self.ELBO = -self.marginal_likelihood - self.KL


    self.cost = -self.ELBO
    self.train_op = tf.train.AdamOptimizer(lr).minimize(self.cost)
    self.sess=tf.Session()
    self.sess.run(tf.global_variables_initializer())

def predict(self, z):
    if self.bernoulli:
        predicted=self.sess.run(self.y, feed_dict={self.z:z})
    else:
        predicted=self.sess.run(self.mu_D, feed_dict={self.z:z})
    return predicted

def training(self, total_training_batch, batch_xs, avg_cost, avg_n, avg_K):

    if self.bernoulli:
        _,y_p,z_p,loss_p,n_p,K_p=self.sess.run([self.train_op,self.y,self.z,self.cost,-self.marginal_likelihood, self.KL], feed_dict={self.X:batch_xs})
        avg_cost+=loss_p/total_training_batch
        avg_n+=n_p/total_training_batch
        avg_K+=K_p/total_training_batch
        return y_p, z_p, avg_cost, avg_n, avg_K
    else:
        _,mu,sigma,z_p,loss_p,n_p,K_p=self.sess.run([self.train_op,self.mu_D, self.sigma_D,self.z,self.cost,self.marginal_likelihood, self.KL], feed_dict={self.X:batch_xs})
        avg_cost+=loss_p/total_training_batch
        avg_n+=n_p/total_training_batch
        avg_K+=K_p/total_training_batch
        return mu,sigma, z_p, avg_cost, avg_n, avg_K



mnist_2list=[]
for i in range(len(batch_ys)):
    if np.argmax(batch_ys[i])==2:
        mnist_2list.append(batch_xs[i].reshape([28,28,1]))
model=Conv_VAE([28,28,1], number_of_filters=[64,32],bernoulli=True)
model.loss(1e-3)
for epoch in range(0, 151):
    avg_cost=0
    avg_n=0
    avg_K=0
    for train in range(30):

        y_p,z_p,avg_cost,avg_n,avg_K=model.training(30, mnist_2list, avg_cost, avg_n, avg_K)
    if epoch%50==0:
        print('loss: ',avg_cost, '\n',
          'neg_ikelihood: ',avg_n,'\n',
          'KL_divergence: ', avg_K)

我的ConvVariational自动编码器不起作用 我不知道我做错了什么。 我的成本函数有问题吗?

实际上,我的成本打印为负值,但我想它并没有影响模型的效率。而且,我想我没有写错任何东西。

但是,我想知道为什么KL值会最大化而Neg_likelihood会最小化。我猜肯定是错误的。

在KL差异最小时,必须最大程度地提高负说谎感。

这怎么了?

请帮助我

0 个答案:

没有答案