Question

Iam在喀拉拉邦实现了3D CVAE，我想用它来重建3D CT体积。一切准备就绪，培训工作正常，但损失惨重。

我试图更改潜在空间的尺寸，隐藏层的数量和先验的方差，但这无济于事。

所有像素值在0-1之间进行归一化，体积大小为256x256x16x1，表示256px x 256px，16张图像和1个通道。 Iam使用二进制交叉熵KL损失和Adam优化器，学习率为0.001。

这是我的网络的样子：

def sampling(args):
    """Reparameterization trick by sampling fr an isotropic unit Gaussian.
# Arguments
    args (tensor): mean and log of variance of Q(z|X)

# Returns
    z (tensor): sampled latent vector
"""

z_mean, z_log_var = args
batch = K.shape(z_mean)[0]
dim = K.int_shape(z_mean)[1]
# by default, random_normal has mean=0 and std=1.0
epsilon = K.random_normal(shape=(batch, dim))

return z_mean + K.exp(0.5 * z_log_var) * epsilon



# Compute VAE loss
def my_vae_loss(y_true, y_pred):
    #reconstruction_loss = mse(K.flatten(y_true), K.flatten(y_pred))
    reconstruction_loss = binary_crossentropy(K.flatten(y_true), K.flatten(y_pred))
    reconstruction_loss *= image_size * image_size
    kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
    kl_loss = K.sum(kl_loss, axis=-1)
    kl_loss *= -0.5
    vae_loss = K.mean(reconstruction_loss + kl_loss)

    return vae_loss
# network parameters
input_shape = (256, 256, 16, 1)
batch_size = 2
kernel_size = 3
filters = 16
latent_dim = 256
epochs = 1000


# VAE model = encoder + decoder
# build encoder model
inputs = Input(shape=input_shape, name='encoder_input')
x = inputs
for i in range(4):
    filters *= 2
    x = Conv3D(filters=filters,
               kernel_size=kernel_size,
               activation = 'relu',                                                      
               padding='same')(x)     
    x = MaxPooling3D(pool_size=(2, 2, 2))(x)     



# shape info needed to build decoder model
shape = K.int_shape(x)

# generate latent vector Q(z|X)
x = Flatten()(x)
x = Dense(1024, activation='relu')(x)    
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)

# use reparameterization trick to push the sampling out as input
# note that "output_shape" isn't necessary with the TensorFlow backend
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var])

# instantiate encoder model
encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')
encoder.summary()
#plot_model(encoder, to_file='vae_cnn_encoder.png', show_shapes=True)

# build decoder model
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')

x = Dense(128, activation='relu')
x = Dense(shape[1] * shape[2] * shape[3] * shape[4], activation='relu')(latent_inputs)
x = Reshape((shape[1], shape[2], shape[3], shape[4]))(x)

for i in range(4):
    x = UpSampling3D((2,2,2))(x)        



    filters //= 2
outputs = Conv3D(4, (3, 3, 3), activation='sigmoid', padding='same', name = 'decoder_output')(x) 
#outputs = Conv3DTranspose(filters=1,
                          #kernel_size=kernel_size,
                          #activation='sigmoid',
                          #padding='same',
                          #name='decoder_output')(x)                                                    


# instantiate decoder model
decoder = Model(latent_inputs, outputs, name='decoder')
decoder.summary()
#plot_model(decoder, to_file='vae_cnn_decoder.png', show_shapes=True)

# instantiate VAE model
outputs = decoder(encoder(inputs)[2])
vae = Model(inputs, outputs, name='vae')

预先感谢您的任何建议，

欢呼

迈克尔

训练3D卷积变分自动编码器

0 个答案: