我使用nn.Transformer预测2个长度序列,如下面的数据集所述
import torch, torch.nn as nn
class A(nn.Module):
def __init__(self):
super().__init__()
self.embed_src = nn.Embedding(2, 10)
self.embed_target = nn.Embedding(2, 10)
self.transformer = nn.Transformer(10, 2)
self.lin = nn.Linear(10, 2)
self.softmax = nn.Softmax(dim=-1)
def forward(self, inp, tgt):
embed_src = self.embed_src(inp)
embed_target = self.embed_target(tgt)
# print(embedding.shape)
output = self.transformer(embed_src.view(len(inp), 1, -1), embed_target.view((len(tgt)), 1, -1))
output = self.lin(output)
print('output.shape', output.shape, 'output', output)
print('embed_target', embed_target.view(2, 1, -1))
print('softmax probabilities', self.softmax(output))
return output.permute(0, 2, 1)
model = A()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
dataset = [
[([0, 1, 0, 1, 0, 1]), ([[0], [1]])],
[([1, 0, 1, 0, 1, 0]), ([[1], [0]])],
[([0, 0, 0, 0, 0, 0]), ([[0], [0]])],
[([1, 1, 1, 1, 1, 1]), ([[1], [1]])],
[([1, 1, 0, 0, 1, 1]), ([[0], [0]])],
[([0, 0, 1, 1, 0, 0]), ([[1], [1]])]
]
tensor_dataset = []
for i in range(6):
tensor_dataset.append([torch.tensor(dataset[i][0]), torch.tensor(dataset[i][1])])
criterion = nn.CrossEntropyLoss()
for i in range(1000):
optimizer.zero_grad()
print(tensor_dataset[i%6][0], tensor_dataset[i%6][1])
loss = criterion(model(tensor_dataset[i%6][0], tensor_dataset[i%6][1]), tensor_dataset[i%6][1])
print(loss)
loss.backward()
optimizer.step()
我需要将self.embed_target require_grad设置为False吗? 是output.permute(0,2,1)与nn.CrossEntropyLoss一起使用的正确方法吗?