我正在尝试在Keras中实现softmax_crossentropy损失。损失应仅考虑标签为1或0的样本,而忽略标签为-1的样本(即丢失标签)。我找到了一个实现此功能的binary_crossentropy函数,但无法为其实现softmax版本。
这是binary_crossentropy:
def binary_crossentropy(y_true, y_pred):
return K.mean(K.binary_crossentropy(tf.multiply(y_pred, tf.cast(tf.not_equal(y_true, -1), tf.float32)),
tf.multiply(y_true, tf.cast(tf.not_equal(y_true, -1), tf.float32))), axis=-1)
我试图用K.categorical_crossentropy来更改K.binary_crossentropy()函数,但这仅在计算损失时才给出“ nan”。
如何在Keras(Tensorflow后端)上实现此功能?
编辑:
@mujjiga建议使用sparse_crossentropy,但是在编译模型时遇到错误:
使用sparse_categorical_crossentropy
和boolean_mask
def sparse_crossentropy_masked(y_true, y_pred):
y_true_masked = tf.boolean_mask(y_true, tf.not_equal(y_true, -1))
y_pred_masked = tf.boolean_mask(y_pred, tf.not_equal(y_true, -1))
return K.mean(K.sparse_categorical_crossentropy(y_true_masked, y_pred_masked))
y_true = tf.constant(np.array([0.,1.,2., -1]))
y_pred = tf.constant(np.array([[1.,0.,0.], [0.,1.,0.], [0.,0.,1.], [0.,0.,1.]]))
loss_op = sparse_crossentropy_masked(y, y_hat)
y_true_1 = tf.constant(np.array([0.,1.,2.]))
y_pred_1 = tf.constant(np.array([[1.,0.,0.], [0.,1.,0.], [0.,0.,1.]]))
loss_1_op = sparse_crossentropy_masked(y_true_1, y_pred_1)
with tf.Session() as sess:
loss, loss_1 = sess.run([loss_op, loss_1_op])
assert loss == loss_1
model.compile(loss=sparse_crossentropy_masked)
### TypeError: int returned non-int (type NoneType
答案 0 :(得分:0)
使用sparse_categorical_crossentropy
和boolean_mask
def sparse_crossentropy_masked(y_true, y_pred):
y_true_masked = tf.boolean_mask(y_true, tf.not_equal(y_true, -1))
y_pred_masked = tf.boolean_mask(y_pred, tf.not_equal(y_true, -1))
return K.mean(K.sparse_categorical_crossentropy(y_true_masked, y_pred_masked))
y_true = tf.constant(np.array([0.,1.,2., -1]))
y_pred = tf.constant(np.array([[1.,0.,0.], [0.,1.,0.], [0.,0.,1.], [0.,0.,1.]]))
loss_op = sparse_crossentropy_masked(y, y_hat)
y_true_1 = tf.constant(np.array([0.,1.,2.]))
y_pred_1 = tf.constant(np.array([[1.,0.,0.], [0.,1.,0.], [0.,0.,1.]]))
loss_1_op = sparse_crossentropy_masked(y_true_1, y_pred_1)
with tf.Session() as sess:
loss, loss_1 = sess.run([loss_op, loss_1_op])
assert loss == loss_1
sparse_categorical_crossentropy
似乎有错误,请参见类似的问题here。因此,我们只能使用categorical_crossentropy
,但是现在应该将基本事实转换为单热编码。我们将使用-1表示不被考虑的标签(如果您感到困惑,请在下面的代码中打印y
)
工作示例:
def categorical_crossentropy_masked(y_true, y_pred):
y_true_masked = tf.boolean_mask(y_true, tf.reduce_any(tf.not_equal(y_true, -1), 1))
y_pred_masked = tf.boolean_mask(y_pred, tf.reduce_any(tf.not_equal(y_true, -1), 1))
return K.mean(K.categorical_crossentropy(y_true_masked, y_pred_masked))
inputs = Input(shape=(3,))
outputs = Dense(32, activation='relu')(inputs)
outputs = Dense(16, activation='relu')(outputs)
outputs = Dense(3, activation='softmax')(outputs)
model = Model(inputs, outputs)
model.compile(optimizer='adam', loss=[categorical_crossentropy_masked])
x = np.random.randn(100,3)
y = np.random.randint(0,3, size=(100))
y = tf.keras.utils.to_categorical(y)
# make some targets to -1
y[np.random.randint(0,100, size=(15))] = np.ones((15,y.shape[-1]))*-1.
model.fit(x, y)