这是使用nn.Transformer的正确方法吗?

时间:2019-11-30 17:44:57

标签: python pytorch

我使用nn.Transformer预测2个长度序列,如下面的数据集所述

import torch, torch.nn as nn


class A(nn.Module):
  def __init__(self):
    super().__init__()
    self.embed_src = nn.Embedding(2, 10)
    self.embed_target = nn.Embedding(2, 10)
    self.transformer = nn.Transformer(10, 2)
    self.lin = nn.Linear(10, 2)
    self.softmax = nn.Softmax(dim=-1)
  def forward(self, inp, tgt):
    embed_src = self.embed_src(inp)
    embed_target = self.embed_target(tgt)
    # print(embedding.shape)
    output = self.transformer(embed_src.view(len(inp), 1, -1), embed_target.view((len(tgt)), 1, -1))
    output = self.lin(output)
    print('output.shape', output.shape, 'output', output)
    print('embed_target', embed_target.view(2, 1, -1))
    print('softmax probabilities', self.softmax(output))
    return output.permute(0, 2, 1)

model = A()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

dataset = [ 
                [([0, 1, 0, 1, 0, 1]), ([[0], [1]])],
                [([1, 0, 1, 0, 1, 0]), ([[1], [0]])],
                [([0, 0, 0, 0, 0, 0]), ([[0], [0]])],
                [([1, 1, 1, 1, 1, 1]), ([[1], [1]])],
                [([1, 1, 0, 0, 1, 1]), ([[0], [0]])],
                [([0, 0, 1, 1, 0, 0]), ([[1], [1]])]
               ]
tensor_dataset = []
for i in range(6):
  tensor_dataset.append([torch.tensor(dataset[i][0]), torch.tensor(dataset[i][1])])

criterion = nn.CrossEntropyLoss()

for i in range(1000):
  optimizer.zero_grad()
  print(tensor_dataset[i%6][0], tensor_dataset[i%6][1])
  loss = criterion(model(tensor_dataset[i%6][0], tensor_dataset[i%6][1]), tensor_dataset[i%6][1])
  print(loss)
  loss.backward()
  optimizer.step()

我需要将self.embed_target require_grad设置为False吗? 是output.permute(0,2,1)与nn.CrossEntropyLoss一起使用的正确方法吗?

0 个答案:

没有答案