我正在尝试训练GRU模型,但在loss.backward()处遇到此断言错误。这是我的错误代码。任何帮助表示赞赏。
class AttendResistance(nn.Module):
def __init__(self, nb_classes, nb_tokens, embedding_matrix,
embed_dropout_rate=0, final_dropout_rate=0, return_attention=False):
super(AttendResistance, self).__init__()
embedding_dim = 20
hidden_size = 32
self.embed_dropout_rate = embed_dropout_rate
self.final_dropout_rate = final_dropout_rate
self.return_attention = return_attention
self.hidden_size = hidden_size
self.nb_classes = nb_classes
self.embed = nn.Embedding(nb_tokens, embedding_dim)
self.embed.weight = nn.Parameter(embedding_matrix)
self.embed_dropout = nn.Dropout2d(embed_dropout_rate)
self.gru = nn.GRU(embedding_dim, hidden_size, num_layers = 1, batch_first=True, dropout = 0.5,
bidirectional=False)
self.final_drop = nn.Dropout(final_dropout_rate)
self.linear = nn.Linear(hidden_size, nb_classes)
self.softmax = nn.Softmax(dim = 1)
def forward(self, input_seqs):
print (input_seqs.size())
x = self.embed(input_seqs)
print (x.size())
x = nn.Tanh()(x)
print (x.size())
x = self.embed_dropout(x)
print (x.size())
x, _ = self.gru(x)
print (x.size())
x = self.final_drop(x)
print (x.size())
x = self.linear(x[:, -1, :].float())
print (x.size())
outputs = self.softmax(x)
print (outputs.size())
if self.return_attention:
return outputs, att_weights
else:
return outputs
attn_res = AttendResistance(268, 20, embedding_matrix, 0.5, 0.3, True)
attn_res = attn_res.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(attn_res.parameters())
num_epochs = 10
for epoch in range(num_epochs):
for i, (prot_seqs, labels) in enumerate(train_loader):
prot_seqs = Variable(prot_seqs.long()).cuda()
labels = Variable(labels.long()).cuda()
#print (prot_seqs)
#print (labels)
# Forward + Backward + Optimize
optimizer.zero_grad()
outputs, att_weights = attn_res(prot_seqs)
print (outputs)
loss = criterion(outputs, torch.max(labels, 1)[1])
print (loss)
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
%(epoch+1, num_epochs, i+1, len(X_train)//batch_size, loss.data[0]))
以下是打印输出的错误:
torch.Size([64, 1602])
torch.Size([64, 1602, 20])
torch.Size([64, 1602, 20])
torch.Size([64, 1602, 20])
torch.Size([64, 1602, 32])
torch.Size([64, 1602, 32])
torch.Size([64, 268])
torch.Size([64, 268])
Variable containing:
1.00000e-03 *
3.5743 3.7436 4.2370 ... 3.9607 4.2058 4.2674
3.5743 3.7436 4.2370 ... 3.9607 4.2058 4.2674
3.5743 3.7436 4.2370 ... 3.9607 4.2058 4.2674
... ⋱ ...
3.5743 3.7436 4.2370 ... 3.9607 4.2058 4.2674
3.5743 3.7436 4.2370 ... 3.9607 4.2058 4.2674
3.5743 3.7436 4.2370 ... 3.9607 4.2058 4.2674
[torch.cuda.FloatTensor of size 64x268 (GPU 0)]
Variable containing:
5.5909
[torch.cuda.FloatTensor of size 1 (GPU 0)]
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
<ipython-input-89-a32cf2edb4cc> in <module>()
17 print (torch.sum(att_weights))
18 print (loss)
---> 19 loss.backward()
20 optimizer.step()
21
/home/nafizh/anaconda3/lib/python3.6/site-packages/torch/autograd/variable.py in backward(self, gradient, retain_graph, create_graph, retain_variables)
165 Variable.
166 """
--> 167 torch.autograd.backward(self, gradient, retain_graph, create_graph, retain_variables)
168
169 def register_hook(self, hook):
/home/nafizh/anaconda3/lib/python3.6/site-packages/torch/autograd/__init__.py in backward(variables, grad_variables, retain_graph, create_graph, retain_variables)
97
98 Variable._execution_engine.run_backward(
---> 99 variables, grad_variables, retain_graph)
100
101
/home/nafizh/anaconda3/lib/python3.6/site-packages/torch/autograd/function.py in _do_backward(self, gradients, retain_variables)
333 def _do_backward(self, gradients, retain_variables):
334 self.retain_variables = retain_variables
--> 335 result = super(NestedIOFunction, self)._do_backward(gradients, retain_variables)
336 if not retain_variables:
337 del self._nested_output
/home/nafizh/anaconda3/lib/python3.6/site-packages/torch/autograd/function.py in backward(self, *gradients)
341 def backward(self, *gradients):
342 nested_gradients = _unflatten(gradients, self._nested_output)
--> 343 result = self.backward_extended(*nested_gradients)
344 return tuple(_iter_None_tensors(result))
345
/home/nafizh/anaconda3/lib/python3.6/site-packages/torch/nn/_functions/rnn.py in backward_extended(self, grad_output, grad_hy)
333 output,
334 weight,
--> 335 grad_weight)
336 else:
337 grad_weight = [(None,) * len(layer_weight) for layer_weight in weight]
/home/nafizh/anaconda3/lib/python3.6/site-packages/torch/backends/cudnn/rnn.py in backward_weight(fn, input, hx, output, weight, grad_weight)
466
467 # copy the weights from the weight_buf into grad_weight
--> 468 grad_params = get_parameters(fn, handle, dw)
469 _copyParams(grad_params, grad_weight)
470 return grad_weight
/home/nafizh/anaconda3/lib/python3.6/site-packages/torch/backends/cudnn/rnn.py in get_parameters(fn, handle, weight_buf)
169 layer_params.append(param)
170 else:
--> 171 assert cur_offset == offset
172
173 cur_offset = offset + filter_dim_a[0]
AssertionError:
我是pytorch的新手。因为错误没有给我任何明确的信息,我不知道我在这里做错了什么。我知道这是一个正在发生的断言错误。但我不知道这些cur_offset和offset变量是什么。继续,
0.3.0.post4
Cuda编译工具,8.0版,V8.0.61
答案 0 :(得分:0)
@nafizh,我有同样的问题,并通过添加修复此问题
像embedding_matrix.float()
这样的内容,并删除了所有float(), double()
次调用。你可以在类似的吹气中找到详细信息。