Question

在尝试使用LSTM（RNN）建立多类文本分类网络时遇到此错误。该代码对于代码的训练部分似乎运行良好，而对于验证部分则抛出错误。以下是网络架构和培训代码。感谢您的帮助。

我尝试使用现有的使用RNN预测情绪的代码，并在最终和损失函数（从BCE损失到NLLLoss（））中将sigmoid替换为softmax函数

  def forward(self, x, hidden):
    """
    Perform a forward pass of our model on some input and hidden state.
    """
    batch_size = x.size(0)
    embeds = self.embedding(x)
    lstm_out,hidden= self.lstm(embeds,hidden)

     # stack up lstm outputs
    lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)

    # dropout and fully-connected layer
    out = self.dropout(lstm_out)
    out = self.fc(out)

    # softmax function
    soft_out = self.sof(out)

    # reshape to be batch_size first
    soft_out = soft_out.view(batch_size, -1)
#         soft_out = soft_out[:, -1] # get last batch of labels

    # return last sigmoid output and hidden state
    return soft_out, hidden


def init_hidden(self, batch_size):
    ''' Initializes hidden state '''
    # Create two new tensors with sizes n_layers x batch_size x hidden_dim,
    # initialized to zero, for hidden state and cell state of LSTM
    weight = next(self.parameters()).data

    if (train_on_gpu):
        hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda(),
              weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda())
    else:
        hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
                  weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())

    return hidden

# Instantiate the model w/ hyperparams
vocab_size = len(vocab_to_int)+1
output_size = 44
embedding_dim = 100
hidden_dim = 256
n_layers = 2

net = ClassificationRNN(vocab_size, output_size, embedding_dim, hidden_dim, n_layers)

print(net)

# loss and optimization functions
lr=0.001

criterion = nn.NLLLoss()

optimizer = torch.optim.Adam(net.parameters(), lr=lr)

# training params

epochs = 4 # 3-4 is approx where I noticed the validation loss stop decreasing

counter = 0
print_every = 100
clip=5 # gradient clipping

# move model to GPU, if available
if(train_on_gpu):
    net.cuda()

net.train()
# train for some number of epochs
for e in range(epochs):
    # initialize hidden state
    h = net.init_hidden(batch_size)

    # batch loop
    for inputs, labels in train_loader:
        counter += 1

        if(train_on_gpu):
            inputs, labels = inputs.cuda(), labels.cuda()

        # Creating new variables for the hidden state, otherwise
        # we'd backprop through the entire training history
        h = tuple([each.data for each in h])

        # zero accumulated gradients
        net.zero_grad()

        # get the output from the model
        output, h = net(inputs, h)

#         print('output:',output.squeeze())
#         print('labels:',labels.float())

        # calculate the loss and perform backprop
        loss = criterion(output, labels)
        loss.backward()
        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        nn.utils.clip_grad_norm_(net.parameters(), clip)
        optimizer.step()

        # loss stats
        if counter % print_every == 0:
            # Get validation loss
            val_h = net.init_hidden(batch_size)
            val_losses = []
            net.eval()
            for inputs, labels in valid_loader:

                # Creating new variables for the hidden state, otherwise
                # we'd backprop through the entire training history
                val_h = tuple([each.data for each in val_h])

                if(train_on_gpu):
                    inputs, labels = inputs.cuda(), labels.cuda()

                output, val_h = net(inputs, val_h)

                val_loss = criterion(output, labels)

                val_losses.append(val_loss.item())

            net.train()
            print("Epoch: {}/{}...".format(e+1, epochs),
                  "Step: {}...".format(counter),
                  "Loss: {:.6f}...".format(loss.item()),
                  "Val Loss: {:.6f}".format(np.mean(val_losses)))

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-41-805ed880b453> in <module>()
     58                     inputs, labels = inputs.cuda(), labels.cuda()
     59 
---> 60                 output, val_h = net(inputs, val_h)
     61 
     62                 val_loss = criterion(output, labels)

/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
    487             result = self._slow_forward(*input, **kwargs)
    488         else:
--> 489             result = self.forward(*input, **kwargs)
    490         for hook in self._forward_hooks.values():
    491             hook_result = hook(self, input, result)

<ipython-input-38-dbfb8d384231> in forward(self, x, hidden)
     34         batch_size = x.size(0)
     35         embeds = self.embedding(x)
---> 36         lstm_out,hidden= self.lstm(embeds,hidden)
     37 
     38          # stack up lstm outputs

Answer 1

尝试在使用DataLoader加载数据的代码行中添加droplast=True，例如用于从数据集train_data中加载训练数据：

train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size, drop_last=True)

Answer 2

似乎hidden的大小与您的模型不匹配。 3D张量的第二个暗角通常是序列长度。我建议您在评估阶段打印embeds和hidden的大小。

RuntimeError：预期隐藏的[0]大小（2，20，256），得到（2，50，256）

2 个答案: