我无法在colab实验室的gpu上开始训练我的pytorch模型进行机器翻译。
这是错误:
RuntimeError: Expected object of backend CPU but got backend CUDA for argument #4 'mat1'
出现在此代码之后:
self.attn_weights = F.softmax(self.attn(self.attn_weights), dim=1).to(device)
问题是我已经将设备的Attention类和model类放入了。我尝试将gpu变量放在tention.forward()方法中,出现此错误,但并没有帮助。 这是定义模型的代码:
input_dim = len(SRC.vocab)
output_dim = len(TRG.vocab)
src_embd_dim = tgt_embd_dim = 256
#hidden_dim = 512
hidden_dim = 256
#num_layers = 2
num_layers = 1
dropout_prob = 0.2
batch_size = 64
PAD_IDX = TRG.vocab.stoi['<pad>']
iterators = BucketIterator.splits((train_data, valid_data, test_data),
batch_size = batch_size, device = device)
train_iterator, valid_iterator, test_iterator = iterators
attention = Attention(batch_size, hidden_dim, "dot").to(device)
enc = Encoder(input_dim, src_embd_dim, hidden_dim, num_layers, dropout_prob)
dec = DecoderAttn(output_dim, tgt_embd_dim, hidden_dim, num_layers, attention, dropout_prob)
model = Seq2Seq(enc, dec, device).to(device)
此Attention和Decoder具有注意等级,如果可以使用的话:
class Attention(nn.Module):
def __init__(self, batch_size, hidden_size, method="cat"): # add parameters needed for your type of attention
super(Attention, self).__init__()
self.method = method # attention method you'll use. e.g. "cat", "one-layer-net", "dot", ...
#<YOUR CODE HERE>
self.batch_size = batch_size
self.hidden_size = hidden_size
def forward(self, embedded, last_hidden, encoder_outputs, seq_len=None):
self.max_length = encoder_outputs.shape[0]
if self.method == "cat":
self.attn = nn.Linear(self.hidden_size * 2, self.hidden_size)
if self.method == "dot":
self.attn = nn.Linear(self.hidden_size, self.hidden_size)
#self.attn_combine = nn.Linear(self.batch_size * 2, self.hidden_size)
if self.method == "cat" or self.method == "dot":
print(device)
if self.method == "cat":
self.attn_weights = torch.cat((embedded[0], last_hidden[0]), 1).to(device)
elif self.method == "dot":
self.attn_weights = torch.mul(embedded[0], last_hidden[0]).to(device)
'''print(attn_weights.unsqueeze(1).shape, encoder_outputs.transpose(0, 1).shape)
attn_applied = torch.bmm(attn_weights.unsqueeze(1),
encoder_outputs.transpose(0, 1))
print(embedded.shape, attn_applied.shape)
output = torch.cat((embedded.transpose(0, 1),
attn_applied), 1)
print(output.shape)
output = self.attn_combine(output)
return output'''
self.attn_weights = F.softmax(self.attn(self.attn_weights), dim=1).to(device)
self.attn_weights = self.attn_weights.unsqueeze(0)
return self.attn_weights
else:
raise NotImplementedError
class DecoderAttn(nn.Module):
def __init__(self, output_dim, emb_dim, hid_dim, n_layers, attention, dropout=0.1):
super(DecoderAttn, self).__init__()
self.emb_dim = emb_dim
self.hid_dim = hid_dim
self.output_dim = output_dim
self.n_layers = n_layers
self.attention = attention # instance of Attention class
self.dropout = dropout
# define layers
self.embedding = nn.Embedding(self.output_dim, self.emb_dim)
#self.rnn = nn.LSTM(self.hid_dim, self.hid_dim, self.n_layers) #(lstm embd, hid, layers, dropout)
self.rnn = nn.GRU(self.hid_dim, self.hid_dim)
self.out = nn.Linear(self.hid_dim, self.output_dim) # Projection :hid_dim x output_dim
self.dropout = nn.Dropout(dropout)
# more layers you'll need for attention
#<YOUR CODE HERE>
def forward(self, input_, hidden, encoder_output):
# make decoder with attention
# use code from seminar notebook as base and add attention to it
#<YOUR CODE HERE>
input_ = input_.unsqueeze(0)
# (1 x batch_size x emb_dim)
embedded = self.embedding(input_)# embd over input and dropout
embedded = self.dropout(embedded)
output = self.attention.forward(embedded, hidden, encoder_output)
output = F.relu(output)
output, hidden = self.rnn(output, hidden)
prediction = F.log_softmax(self.out(output[0]), dim=1)
#prediction = self.out(output.squeeze(0)) #project out of the rnn on the output dim
return prediction, hidden
def initHidden(self):
return torch.zeros(1, 1, self.hidden_size, device=device)
因此,这是我在colab上的笔记本(如果有Google帐户,则可以使用它): https://colab.research.google.com/drive/18CQ0jHGsiK8ValtgI3EIKAQDRF_-y-cA)