Question

我正在尝试建立一个lstm模型。我的模型代码如下。

我的输入有4个功能，序列长度为5，批量大小为32。

        class RNN(nn.Module):

            def __init__(self, feature_dim, output_size, hidden_dim, n_layers, dropout=0.5):
                """
                Initialize the PyTorch RNN Module
                :param feature_dim: The number of input dimensions of the neural network
                :param output_size: The number of output dimensions of the neural network      
                :param hidden_dim: The size of the hidden layer outputs
                :param dropout: dropout to add in between LSTM/GRU layers
                """
                super(RNN, self).__init__()

                # set class variables
                self.output_size = output_size
                self.n_layers = n_layers
                self.hidden_dim = hidden_dim

                # define model layers
                self.lstm = nn.LSTM(feature_dim, hidden_dim, n_layers, batch_first=True)

                self.fc = nn.Linear(hidden_dim, output_size)

                self.dropout = nn.Dropout(dropout)


            def forward(self, nn_input, hidden):
                """
                Forward propagation of the neural network
                :param nn_input: The input to the neural network
                :param hidden: The hidden state        
                :return: Two Tensors, the output of the neural network and the latest hidden state
                """

                # Get Batch Size  
                batch_size = nn_input.size(0)

                # Pass through LSTM layer
                lstm_out, hidden = self.lstm(nn_input, hidden)
                # Stack up LSTM outputs
                lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
                # Add dropout and pass through fully connected layer
                x = self.dropout(lstm_out)
                x = self.fc(lstm_out)

                # reshape to be batch_size first
                output = x.view(batch_size, -1, self.output_size)
                # get last batch of labels
                out = output[:, -1]

                # return one batch of output word scores and the hidden state
                return out, hidden


            def init_hidden(self, batch_size):
                '''
                Initialize the hidden state of an LSTM/GRU
                :param batch_size: The batch_size of the hidden state
                :return: hidden state of dims (n_layers, batch_size, hidden_dim)
                '''
                # Implement function

                # initialize   state with zero weights, and move to GPU if available
                weight = next(self.parameters()).data

                if is_gpu_available:
                    hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device),
                          weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device))
                else:
                    hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
                              weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())
                return hidden

我训练时遇到错误

RuntimeError                              Traceback (most recent call last)
    /usr/local/bin/kernel-launchers/python/scripts/launch_ipykernel.py in <module>
          3 
          4 # training the model
    ----> 5 trained_rnn = train_rnn(rnn, batch_size, optimizer, num_epochs, show_every_n_batches)
          6 
          7 # saving the trained model

    /usr/local/bin/kernel-launchers/python/scripts/launch_ipykernel.py in train_rnn(rnn, batch_size, optimizer, n_epochs, show_every_n_batches)
         18 
         19             # forward, back prop
    ---> 20             loss, hidden = forward_back_prop(rnn, optimizer, inputs, labels, hidden)
         21             # record loss
         22             batch_losses.append(loss)

    /usr/local/bin/kernel-launchers/python/scripts/launch_ipykernel.py in forward_back_prop(rnn, optimizer, inp, target, hidden)
         22 
         23     # get the output from the model
    ---> 24     output, h = rnn(inp, h)
         25 
         26    # calculate the loss and perform backprop

    /usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
        491             result = self._slow_forward(*input, **kwargs)
        492         else:
    --> 493             result = self.forward(*input, **kwargs)
        494         for hook in self._forward_hooks.values():
        495             hook_result = hook(self, input, result)

    /usr/local/bin/kernel-launchers/python/scripts/launch_ipykernel.py in forward(self, nn_input, hidden)
         36 
         37         # Pass through LSTM layer
    ---> 38         lstm_out, hidden = self.lstm(nn_input, hidden)
         39         # Stack up LSTM outputs
         40         lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)

    /usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
        491             result = self._slow_forward(*input, **kwargs)
        492         else:
    --> 493             result = self.forward(*input, **kwargs)
        494         for hook in self._forward_hooks.values():
        495             hook_result = hook(self, input, result)

    /usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
        557             return self.forward_packed(input, hx)
        558         else:
    --> 559             return self.forward_tensor(input, hx)
        560 
        561 

    /usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in forward_tensor(self, input, hx)
        537         unsorted_indices = None
        538 
    --> 539         output, hidden = self.forward_impl(input, hx, batch_sizes, max_batch_size, sorted_indices)
        540 
        541         return output, self.permute_hidden(hidden, unsorted_indices)

    /usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in forward_impl(self, input, hx, batch_sizes, max_batch_size, sorted_indices)
        520         if batch_sizes is None:
        521             result = _VF.lstm(input, hx, self._get_flat_weights(), self.bias, self.num_layers,
    --> 522                               self.dropout, self.training, self.bidirectional, self.batch_first)
        523         else:
        524             result = _VF.lstm(input, batch_sizes, hx, self._get_flat_weights(), self.bias,

    RuntimeError: Expected object of scalar type Float but got scalar type Double for argument #4 'mat1'

我无法确定此错误的原因。如何解决？请帮忙。

还有，这是实现LSTM的正确方法还是有更好的方法来实现相同的目的？

Answer 1

torch.nn.LSTM不需要任何初始化，因为默认情况下它已初始化为零（请参见documentation）。此外，torch.nn.Module已经具有预定义的cuda()方法，因此可以将模块简单地移动到GPU，因此您可以安全地删除init_hidden(self, batch_size)。

出现此错误的原因是您的input的类型为torch.Double，而默认情况下，模块使用torch.Float（因为它足够准确，更快并且比torch.Double小）。您可以通过调用.float()来转换输入张量，在您的情况下，它看起来可能像这样：

def forward(self, nn_input, hidden):
    nn_input = nn_input.float()
    ... # rest of your code

最后，如果hidden参数始终为零，则无需使用，只需使用：

lstm_out, hidden = self.lstm(nn_input) # no hidden here

因为hidden在默认情况下也是零。

如何修复标量类型Float的预期对象但参数＃4'mat1'的标量类型为Double？

1 个答案: