我的total_loss为-
我面临的问题是在第一批处理之后,一些权重被更新为nan,导致所有输出均为nan。如果去除梯度损失,则可以正常工作。 我发现梯度损失的分母变为0,这引起了问题。为了解决这个问题,我将所有0的分母(梯度量)替换为1。但是这样做只会计算第一个反向支撑的数值损失,而下一个结果将以nan计。
def refineLoss(pred,target,boundary_mask,cuda):
#sobel_x and sobel_y represent the filters for x and y direction
sobel_x = torch.tensor([[+1, 0, -1], [+2, 0, -2], [+1, 0, -1]], requires_grad=False,dtype = torch.float)
sobel_y = torch.tensor([[+1, +2, +1], [0, 0, 0], [-1, -2, -1]], requires_grad=False,dtype = torch.float)
if cuda:
sobel_x,sobel_y = sobel_x.cuda(),sobel_y.cuda()
boundary_mask = boundary_mask.cuda()
sobel_x = sobel_x.view((1,1,3,3))
sobel_y = sobel_y.view((1,1,3,3))
#gradients in the x and y direction for both predictions and the target transparencies
G_x_pred = F.conv2d(pred,sobel_x,padding = 1)
G_y_pred = F.conv2d(pred,sobel_y,padding = 1)
G_x_target = F.conv2d(target,sobel_x,padding = 1)
G_y_target = F.conv2d(target,sobel_y,padding = 1)
#magnitudes of the gradients
M_pred = torch.sqrt(torch.pow(G_x_pred,2)+torch.pow(G_y_pred,2))
M_target = torch.sqrt(torch.pow(G_x_target,2)+torch.pow(G_y_target,2))
#see images
saveimage(M_pred[0],"pred_magnitude")
saveimage(pred[0],"pred_confidence")
saveimage(M_target[0],"image_magnitude")
#taking care of nans
M_pred = (M_pred==0.).float() + M_pred
M_target = (M_target==0.).float() + M_target
# Lcos = (1-v_pred*v_target)*Magnitude_pred
Lcos = (1-torch.abs((G_x_pred/M_pred)*(G_x_target/M_target)+(G_y_pred/M_pred)*(G_y_target/M_target)))*M_pred
#Lmag = max(lambda*M_target-M_pred,0)
lambd = 1.5
Lmag = lambd*M_target-M_pred
Lmag[Lmag<0] = 0
gamma_1 = 0.5
gamma_2 = 0.5
#total gradient loss
total_loss = (gamma_1*Lcos+gamma_2*Lmag)*boundary_mask
return total_loss.mean()