在this comment中,@ eliadl在下面的代码中发布以用成本矩阵加权交叉熵。
import tensorflow.keras.backend as K
from tensorflow.keras.losses import CategoricalCrossentropy
class WeightedCategoricalCrossentropy(CategoricalCrossentropy):
def __init__(self, cost_mat, name='weighted_categorical_crossentropy', **kwargs):
assert(cost_mat.ndim == 2)
assert(cost_mat.shape[0] == cost_mat.shape[1])
super().__init__(name=name, **kwargs)
self.cost_mat = K.cast_to_floatx(cost_mat)
def __call__(self, y_true, y_pred):
return super().__call__(
y_true=y_true,
y_pred=y_pred,
sample_weight=get_sample_weights(y_true, y_pred, self.cost_mat),
)
def get_sample_weights(y_true, y_pred, cost_m):
num_classes = len(cost_m)
y_pred.shape.assert_has_rank(2)
y_pred.shape[1].assert_is_compatible_with(num_classes)
y_pred.shape.assert_is_compatible_with(y_true.shape)
y_pred = K.one_hot(K.argmax(y_pred), num_classes)
y_true_nk1 = K.expand_dims(y_true, 2)
y_pred_n1k = K.expand_dims(y_pred, 1)
cost_m_1kk = K.expand_dims(cost_m, 0)
sample_weights_nkk = cost_m_1kk * y_true_nk1 * y_pred_n1k
sample_weights_n = K.sum(sample_weights_nkk, axis=[1, 2])
return sample_weights_n
我正尝试使用相同的想法通过以下代码对焦点损失进行加权:
class WeightedFocalLoss(tf.keras.losses.Loss):
def __init__(self, gamma=2.0, alpha=1.0, cost_mat=np.ones((10,10)),
reduction=tf.keras.losses.Reduction.AUTO, name='weighted_focal_loss', **kwargs):
assert(cost_mat.ndim == 2)
assert(cost_mat.shape[0] == cost_mat.shape[1])
super(WeightedFocalLoss, self).__init__(reduction=reduction,
name=name, **kwargs)
self.cost_mat = K.cast_to_floatx(cost_mat)
self.gamma = float(gamma)
self.alpha = float(alpha)
def call(self, y_true, y_pred):
epsilon = 1.e-9
#sw = get_sample_weights(y_true, y_pred, self.cost_mat)
y_true = tf.convert_to_tensor(y_true, tf.float32)
y_pred = tf.convert_to_tensor(y_pred, tf.float32)
sw = get_sample_weights(y_true, y_pred, self.cost_mat)
model_out = tf.add(y_pred, epsilon)
ce = tf.multiply(y_true, -tf.math.log(model_out))
weight = tf.multiply(y_true, tf.pow(
tf.subtract(1., model_out), self.gamma))
fl = tf.multiply(self.alpha, tf.multiply(weight, ce))
wfl = tf.multiply(fl, sw)
reduced_wfl = tf.reduce_max(wfl, axis=1)
return reduced_wfl
def get_sample_weights(y_true, y_pred, cost_m):
num_classes = len(cost_m)
y_pred.shape.assert_has_rank(2)
y_pred.shape[1].assert_is_compatible_with(num_classes)
y_pred.shape.assert_is_compatible_with(y_true.shape)
y_pred = K.one_hot(K.argmax(y_pred), num_classes)
y_true_nk1 = K.expand_dims(y_true, 2)
y_pred_n1k = K.expand_dims(y_pred, 1)
cost_m_1kk = K.expand_dims(cost_m, 0)
sample_weights_nkk = cost_m_1kk * y_true_nk1 * y_pred_n1k
sample_weights_n = K.sum(sample_weights_nkk, axis=[1, 2])
return sample_weights_n
这给了我一个y_pred.shape[1].assert_is_compatible_with(num_classes)
行的错误信息
关于 int没有归因assert_is_compatible_with
那么,这是加权聚焦损失的正确方法吗?
用法:
w_array = np.ones((10, 10))
w_array[1, 7] = 1.5
w_array[7, 1] = 1.5
w_array[3, 8] = 1.5
w_array[8, 3] = 1.5
w_array[5, 6] = 1.5
w_array[6, 5] = 1.5
model.compile(optimizer=tf.keras.optimizers.Adam(lr=0.0001),
loss=WeightedFocalLoss(gamma=2.0, alpha=1.0, cost_mat=w_array),
metrics=['categorical_accuracy'])