我的变化性AE丢失有问题。当我在一步之内进行计算时,它就可以工作,但是当我在训练循环中尝试将其最小化时,它就行不通了。 我不明白为什么函数没有被最小化并且等于零: 我的损失
def compute_loss(model, x):
mean, logvar = model.encode(x)
z = model.reparameterize(mean, logvar)
x_logit = model.decode(z)
cross_ent = tf.nn.sigmoid_cross_entropy_with_logits(logits=x_logit, labels=x)
logpx_z = -tf.reduce_sum(cross_ent, axis=-1)
logpz = log_normal_pdf(z, 0., 0.)
logqz_x = log_normal_pdf(z, mean, logvar)
return -tf.reduce_mean(logpx_z + logpz - logqz_x)
我的火车功能:
def train_step(model, x, optimizer):
with tf.GradientTape() as tape:
loss = compute_loss(model, x)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(zip(gradients, model.trainable_variables))
我的网络课程:
class CVAE(tf.keras.Model):
def __init__(self, latent_dim):
super(CVAE, self).__init__()
self.latent_dim = latent_dim
self.initializer = tf.keras.initializers.LecunNormal()
self.regularizer =tf.keras.regularizers.L1L2(l1=0.01, l2=0.01) # L1 + L2 penalties
self.encoder = tf.keras.Sequential(
[
tfkl.InputLayer(input_shape=(321,)),
tfkl.Dense(512,activation="relu",activity_regularizer=self.regularizer ,kernel_initializer =self.initializer ),
tfkl.Dense(latent_dim + latent_dim,activation="relu",activity_regularizer=self.regularizer,kernel_initializer =self.initializer,),
]
)
self.decoder = tf.keras.Sequential(
[
tfkl.Dense(512,activation="relu",activity_regularizer=self.regularizer,kernel_initializer =self.initializer),
tfkl.Dense(321,activation="sigmoid")]
)
def sample(self,x, eps=None):
mean ,encoded_logvar = self.encode(x)
encoder_std = tf.exp(0.5 * encoder_logvar)
if eps is None:
eps = tf.random.normal(tf.shape(encoder_std))
return self.decode(eps, apply_sigmoid=True)
def encode(self, x):
mean, logvar = tf.split(self.encoder(x), num_or_size_splits=2, axis=1)
return mean, logvar
def reparameterize(self, mean, logvar):
eps = tf.random.normal(shape=mean.shape) ## The distribution represents also a hyperparameter to optimize
return eps * logvar + mean
def decode(self, z, apply_sigmoid=False):
logits = self.decoder(z)
if apply_sigmoid:
probs = tf.sigmoid(logits)
return probs
return logits
这是我手动计算损失时的结果:
latent_dim = 3 # set the dimensionality of the latent space ,important for the embed function
model = CVAE(latent_dim)
loss = compute_loss(model,data)
loss
结果:
<tf.Tensor: shape=(), dtype=float32, numpy=233.07153>
有解决方案吗?