我正在使用tensorflow概率进行文本分类任务。这是一个二进制分类任务,我已将输出编码为分类变量。这是我的网络:
def neg_log_likelihood_with_logits(y_true, y_pred):
y_pred_dist = tfp.distributions.Categorical(logits=y_pred)
return -tf.reduce_mean(y_pred_dist.log_prob(tf.argmax(y_true, axis=-1)))
def add_kl_weight(layer, train_size, w_value=1.0):
w = layer.add_weight(name=layer.name+'/kl_loss_weight', shape=(),
initializer=tf.initializers.constant(w_value), trainable=False)
layer.kernel_divergence_fn = get_kernel_divergence_fn(train_size, w)
return layer
def build_bayesian_bcnn_model(input_shape, train_size):
model_in = Input(shape=input_shape)
dense_1 = DenseFlipout(1000, activation='relu',
kernel_divergence_fn=None)
dense_1 = add_kl_weight(dense_1, train_size)
x = dense_1(model_in)
x = Dropout(0.5)(x)
dense_2 = DenseFlipout(500, activation='relu',
kernel_divergence_fn=None)
dense_2 = add_kl_weight(dense_2, train_size)
x = dense_2(x)
x = Dropout(0.5)(x)
dense_3 = DenseFlipout(50, activation='relu',
kernel_divergence_fn=None)
dense_3 = add_kl_weight(dense_3, train_size)
x = dense_3(x)
x = Dropout(0.5)(x)
dense_4 = DenseFlipout(2, activation=None,
kernel_divergence_fn=None)
dense_4 = add_kl_weight(dense_4, train_size)
model_out = dense_4(x) # logits
model = Model(model_in, model_out)
return model
bcnn_model = build_bayesian_bcnn_model(X_train.shape[1:], X_train.shape[0])
bcnn_model.compile(loss=neg_log_likelihood_with_logits, optimizer=Adam(1e-3), metrics=['acc'],
experimental_run_tf_function=False)
当我训练模型时,损失从大约4000开始,并且下降非常缓慢。首先,精度有所提高,但随后几乎保持不变。我做错了什么吗?我只是从贝叶斯神经网络开始。有人可以帮忙吗?