我正在focal loss
中尝试lightGBM
中的this implementation,但不断损失:
[10] valid_0's binary_logloss: 0.206824 valid_0's focal_loss: 0.258893
[20] valid_0's binary_logloss: 0.206824 valid_0's focal_loss: 0.258893
[30] valid_0's binary_logloss: 0.206824 valid_0's focal_loss: 0.258893
当我尝试常规目标:“ binary”和指标:“ binary_logloss”时,它工作正常:
[10] valid_0's binary_logloss: 0.0362688
[20] valid_0's binary_logloss: 0.0348938
[30] valid_0's binary_logloss: 0.033937
[40] valid_0's binary_logloss: 0.0327498
[50] valid_0's binary_logloss: 0.0322498
[60] valid_0's binary_logloss: 0.0315371
[70] valid_0's binary_logloss: 0.0310556
[80] valid_0's binary_logloss: 0.030378
[90] valid_0's binary_logloss: 0.0300144
[100] valid_0's binary_logloss: 0.0297439
[110] valid_0's binary_logloss: 0.0292017
这是损失函数和目标函数:
def focal_loss_lgb(y_pred, dtrain, alpha, gamma):
a,g = alpha, gamma
y_true = dtrain.label
def fl(x,t):
p = 1/(1+np.exp(-x))
return -( a*t + (1-a)*(1-t) ) * (( 1 - ( t*p + (1-t)*(1-p)) )**g) * ( t*np.log(p)+(1-t)*np.log(1-p) )
partial_fl = lambda x: fl(x, y_true)
grad = derivative(partial_fl, y_pred, n=1, dx=1e-6)
hess = derivative(partial_fl, y_pred, n=2, dx=1e-6)
return grad, hess
def focal_loss_lgb_eval_error(y_pred, dtrain, alpha, gamma):
a,g = alpha, gamma
y_true = dtrain.label
p = 1/(1+np.exp(-y_pred))
loss = -( a*y_true + (1-a)*(1-y_true) ) * (( 1 - ( y_true*p + (1-y_true)*(1-p)) )**g) * ( y_true*np.log(p)+(1-y_true)*np.log(1-p) )
# (eval_name, eval_result, is_higher_better)
return 'focal_loss', np.mean(loss), False
def focal_loss_lgb_f1_score(preds, lgbDataset):
preds = sigmoid(preds)
binary_preds = [int(p>0.5) for p in preds]
y_true = lgbDataset.get_label()
return 'f1', f1_score(y_true, binary_preds), True
focal_loss = lambda x,y: focal_loss_lgb(x, y, alpha=0.25, gamma=1.)
focal_loss_eval = lambda x,y: focal_loss_lgb_eval_error(x, y, alpha=0.25, gamma=1.)
clf = lgbm.train(lgb_params,
dtrn_x,
num_boost_round,
valid_sets=[dval_x],
verbose_eval=verbose,
early_stopping_rounds=50,
fobj=focal_loss,
feval=focal_loss_eval
)
更新10/12/2019:
我设法获得this version working,但是它没有使用Alpha超参数:
import numpy as np
class Focal_Binary_Loss:
'''
The class of focal loss, allows the users to change the gamma parameter
'''
def __init__(self, gamma_indct):
'''
:param gamma_indct: The parameter to specify the gamma indicator
'''
self.gamma_indct = gamma_indct
def robust_pow(self, num_base, num_pow):
# numpy does not permit negative numbers to fractional power
# use this to perform the power algorithmic
return np.sign(num_base) * (np.abs(num_base)) ** (num_pow)
def focal_binary_object(self, pred, dtrain):
gamma_indct = self.gamma_indct
# retrieve data from dtrain matrix
label = dtrain.get_label()
# compute the prediction with sigmoid
sigmoid_pred = 1.0 / (1.0 + np.exp(-pred))
# gradient
# complex gradient with different parts
g1 = sigmoid_pred * (1 - sigmoid_pred)
g2 = label + ((-1) ** label) * sigmoid_pred
g3 = sigmoid_pred + label - 1
g4 = 1 - label - ((-1) ** label) * sigmoid_pred
g5 = label + ((-1) ** label) * sigmoid_pred
# combine the gradient
grad = gamma_indct * g3 * self.robust_pow(g2, gamma_indct) * np.log(g4 + 1e-9) + \
((-1) ** label) * self.robust_pow(g5, (gamma_indct + 1))
# combine the gradient parts to get hessian components
hess_1 = self.robust_pow(g2, gamma_indct) + \
gamma_indct * ((-1) ** label) * g3 * self.robust_pow(g2, (gamma_indct - 1))
hess_2 = ((-1) ** label) * g3 * self.robust_pow(g2, gamma_indct) / g4
# get the final 2nd order derivative
hess = ((hess_1 * np.log(g4 + 1e-9) - hess_2) * gamma_indct +
(gamma_indct + 1) * self.robust_pow(g5, gamma_indct)) * g1
return grad, hess
focal_loss = lambda x,y: Focal_Binary_Loss(gamma_indct=1).focal_binary_object(x,y)