我有一个我在tensorflow上实现的自定义损失代码,出于技术原因,我希望将此代码传递给Pytorch。但是我似乎无法使其工作,而且我也不知道为什么。 火炬的损失似乎并没有训练网络
目标是将网络的原始线性输出(在softmax之前)与真实概率进行比较。 所需损失类似于this video中13:10的损失
Tensorflow代码:
import numpy as np
from scipy.special import softmax
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Softmax
from tensorflow.keras import Input, Model
delta = tf.Variable([[1.]], trainable=False)
main_input = Input(shape=(10,))
output = Dense(4, activation='linear')(main_input)
def custom_loss(delta):
def loss(y_true, y_pred):
y_pred_softmax = Softmax()(y_pred)
y_pred_softmax_clipped = K.clip(y_pred_softmax, 1e-8, 1 - 1e-8)
log_likelihood = y_true * K.log(y_pred_softmax_clipped)
return K.sum(-log_likelihood * delta)
return loss
model = Model(inputs=[main_input], outputs=output)
model.compile(optimizer=Adam(lr=0.01), loss=custom_loss(delta))
print(model.predict(np.ones((1,10))))
print(softmax(model.predict(np.ones((1,10)))[0]))
delta.assign([[1.0]])
model.fit(np.ones((1000,10), dtype='float'),np.asarray(1000*[[0.7, 0.3, 0.0, 0.0]], dtype='float'))
print(model.predict(np.ones((1,10))))
print(softmax(model.predict(np.ones((1,10)))[0]))
火炬代码:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc = nn.Linear(10, 4)
def forward(self, x):
x = self.fc(x)
return x
def custom_loss(delta):
def loss(y_pred, y_true):
y_pred_softmax = nn.Softmax(dim=1)(y_pred)
y_pred_softmax_clipped = torch.clamp(y_pred, 1e-8, 1 - 1e-8)
log_likelihood = y_true * torch.log(y_pred_softmax_clipped)
return torch.sum(-log_likelihood * delta)
return loss
delta = 1
batch_size = 32
n_sample = 1000
network = Net()
loss_function = custom_loss(delta)
optimizer = optim.Adam(network.parameters(), lr=0.001)
x = torch.ones((1,10))
print(network(x))
print(nn.Softmax(dim=1)(network(x)), '\n')
target = [0.7, 0.3, 0.0, 0.0]
for i in range(int(n_sample/batch_size)):
optimizer.zero_grad()
inputs = torch.ones((batch_size,10))
targets = torch.FloatTensor(batch_size*[target])
outputs = network(inputs)
loss = loss_function(outputs, targets)
loss.backward()
optimizer.step()
x = torch.ones((1,10), dtype=torch.float)
print(network(x))
print(nn.Softmax(dim=1)(network(x)))
使用pytorch的两个输出示例:
tensor([[-0.5445, 1.1960, -0.1856, 0.3652]], grad_fn=<AddmmBackward>)
tensor([[0.0942, 0.5370, 0.1349, 0.2340]], grad_fn=<SoftmaxBackward>)
tensor([[-0.5445, 1.1960, -0.1856, 0.3652]], grad_fn=<AddmmBackward>)
tensor([[0.0942, 0.5370, 0.1349, 0.2340]], grad_fn=<SoftmaxBackward>)
tensor([[-0.1185, 0.5827, 0.7683, 0.4973]], grad_fn=<AddmmBackward>)
tensor([[0.1371, 0.2764, 0.3327, 0.2538]], grad_fn=<SoftmaxBackward>)
tensor([[-0.1185, 1.8670, 0.7683, 0.4973]], grad_fn=<AddmmBackward>)
tensor([[0.0796, 0.5798, 0.1932, 0.1474]], grad_fn=<SoftmaxBackward>)
使用TF代码输出:
[[ 0.0284583 0.35027373 0.4858752 -0.61461353]]
[0.2229509 0.30758977 0.35225958 0.11719974]
Train on 1000 samples
1000/1000 [==============================] - 0s 302us/sample - loss: 25.1664
[[ 2.3004684 1.4706173 -1.8008366 -2.9013252]]
[0.68579024 0.29908288 0.01135056 0.00377643]