我正在尝试使用log_prob
方法在自定义Keras损失函数中使用tfp中的Gamma函数,但是在训练开始时该函数总是返回nan
。
我已经测试了损失函数,并且看起来工作正常:
import tensorflow as tf
import tensorflow_probability as tfp
tf.enable_eager_execution()
def gamma_loss(y_true, alpha, beta):
gamma_distr = tfp.distributions.Gamma(concentration=alpha, rate=beta)
log_lik_gamma = gamma_distr.log_prob(y_true)
return -tf.reduce_mean(log_lik_gamma)
gamma_loss(100, 2, 2).numpy()
# 194.00854
问题可能与我传递给函数的参数(alpha
和beta
)有关,这些参数由我正在使用的模型的最终(自定义)层产生。
这是完整的代码段:
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input, Dense, Layer, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.initializers import glorot_normal
import tensorflow_probability as tfp
from sklearn.datasets import make_regression
class GammaLayer(Layer):
def __init__(self, output_dim, **kwargs):
self.output_dim = output_dim
super(GammaLayer, self).__init__(**kwargs)
def build(self, input_shape):
n_weight_rows = 4
self.kernel_2 = self.add_weight(name='kernel_2',
shape=(n_weight_rows, self.output_dim),
initializer=glorot_normal(),
trainable=True)
self.kernel_3 = self.add_weight(name='kernel_3',
shape=(n_weight_rows, self.output_dim),
initializer=glorot_normal(),
trainable=True)
self.bias_2 = self.add_weight(name='bias_2',
shape=(self.output_dim,),
initializer=glorot_normal(),
trainable=True)
self.bias_3 = self.add_weight(name='bias_3',
shape=(self.output_dim,),
initializer=glorot_normal(),
trainable=True)
super(GammaLayer, self).build(input_shape)
def call(self, x):
# Here i use softplus to make the parameters strictly positive
alpha = tf.math.softplus(K.dot(x, self.kernel_2) + self.bias_2)
beta = tf.math.softplus(K.dot(x, self.kernel_3) + self.bias_3)
return [alpha, beta]
def compute_output_shape(self, input_shape):
"""
The assumption is that the output is always one-dimensional
"""
return [(input_shape[0], self.output_dim), (input_shape[0], self.output_dim)]
def gamma_loss(y_true, y_pred):
alpha, beta = y_pred[0], y_pred[1]
gamma_distr = tfp.distributions.Gamma(concentration=alpha, rate=beta)
return -tf.reduce_mean(gamma_distr.log_prob(y_true))
X, y = make_regression(n_samples=1000, n_features=3, noise=0.1)
inputs = Input(shape=(3,))
x = Dense(6, activation='relu')(inputs)
x = Dense(4, activation='relu')(x)
x = GammaLayer(1, name='main_output')(x)
output_params = Concatenate(1, name="pvec")(x)
model = Model(inputs, output_params)
model.compile(loss=gamma_loss, optimizer='adam')
model.fit(X, y, epochs=30, batch_size=10) ```
答案 0 :(得分:1)
您可以尝试在softplus之外添加额外的1e-6左右吗?对于非常负的值,softplus会变得非常接近零。