我正在尝试建立一个lstm模型。我的模型代码如下。
我的输入有4个功能,序列长度为5,批量大小为32。
class RNN(nn.Module):
def __init__(self, feature_dim, output_size, hidden_dim, n_layers, dropout=0.5):
"""
Initialize the PyTorch RNN Module
:param feature_dim: The number of input dimensions of the neural network
:param output_size: The number of output dimensions of the neural network
:param hidden_dim: The size of the hidden layer outputs
:param dropout: dropout to add in between LSTM/GRU layers
"""
super(RNN, self).__init__()
# set class variables
self.output_size = output_size
self.n_layers = n_layers
self.hidden_dim = hidden_dim
# define model layers
self.lstm = nn.LSTM(feature_dim, hidden_dim, n_layers, batch_first=True)
self.fc = nn.Linear(hidden_dim, output_size)
self.dropout = nn.Dropout(dropout)
def forward(self, nn_input, hidden):
"""
Forward propagation of the neural network
:param nn_input: The input to the neural network
:param hidden: The hidden state
:return: Two Tensors, the output of the neural network and the latest hidden state
"""
# Get Batch Size
batch_size = nn_input.size(0)
# Pass through LSTM layer
lstm_out, hidden = self.lstm(nn_input, hidden)
# Stack up LSTM outputs
lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
# Add dropout and pass through fully connected layer
x = self.dropout(lstm_out)
x = self.fc(lstm_out)
# reshape to be batch_size first
output = x.view(batch_size, -1, self.output_size)
# get last batch of labels
out = output[:, -1]
# return one batch of output word scores and the hidden state
return out, hidden
def init_hidden(self, batch_size):
'''
Initialize the hidden state of an LSTM/GRU
:param batch_size: The batch_size of the hidden state
:return: hidden state of dims (n_layers, batch_size, hidden_dim)
'''
# Implement function
# initialize state with zero weights, and move to GPU if available
weight = next(self.parameters()).data
if is_gpu_available:
hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device),
weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().to(device))
else:
hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(),
weight.new(self.n_layers, batch_size, self.hidden_dim).zero_())
return hidden
我训练时遇到错误
RuntimeError Traceback (most recent call last)
/usr/local/bin/kernel-launchers/python/scripts/launch_ipykernel.py in <module>
3
4 # training the model
----> 5 trained_rnn = train_rnn(rnn, batch_size, optimizer, num_epochs, show_every_n_batches)
6
7 # saving the trained model
/usr/local/bin/kernel-launchers/python/scripts/launch_ipykernel.py in train_rnn(rnn, batch_size, optimizer, n_epochs, show_every_n_batches)
18
19 # forward, back prop
---> 20 loss, hidden = forward_back_prop(rnn, optimizer, inputs, labels, hidden)
21 # record loss
22 batch_losses.append(loss)
/usr/local/bin/kernel-launchers/python/scripts/launch_ipykernel.py in forward_back_prop(rnn, optimizer, inp, target, hidden)
22
23 # get the output from the model
---> 24 output, h = rnn(inp, h)
25
26 # calculate the loss and perform backprop
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
491 result = self._slow_forward(*input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
495 hook_result = hook(self, input, result)
/usr/local/bin/kernel-launchers/python/scripts/launch_ipykernel.py in forward(self, nn_input, hidden)
36
37 # Pass through LSTM layer
---> 38 lstm_out, hidden = self.lstm(nn_input, hidden)
39 # Stack up LSTM outputs
40 lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in __call__(self, *input, **kwargs)
491 result = self._slow_forward(*input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
495 hook_result = hook(self, input, result)
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in forward(self, input, hx)
557 return self.forward_packed(input, hx)
558 else:
--> 559 return self.forward_tensor(input, hx)
560
561
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in forward_tensor(self, input, hx)
537 unsorted_indices = None
538
--> 539 output, hidden = self.forward_impl(input, hx, batch_sizes, max_batch_size, sorted_indices)
540
541 return output, self.permute_hidden(hidden, unsorted_indices)
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/rnn.py in forward_impl(self, input, hx, batch_sizes, max_batch_size, sorted_indices)
520 if batch_sizes is None:
521 result = _VF.lstm(input, hx, self._get_flat_weights(), self.bias, self.num_layers,
--> 522 self.dropout, self.training, self.bidirectional, self.batch_first)
523 else:
524 result = _VF.lstm(input, batch_sizes, hx, self._get_flat_weights(), self.bias,
RuntimeError: Expected object of scalar type Float but got scalar type Double for argument #4 'mat1'
我无法确定此错误的原因。如何解决?请帮忙。
还有,这是实现LSTM的正确方法还是有更好的方法来实现相同的目的?
答案 0 :(得分:0)
torch.nn.LSTM
不需要任何初始化,因为默认情况下它已初始化为零(请参见documentation)。
此外,torch.nn.Module
已经具有预定义的cuda()
方法,因此可以将模块简单地移动到GPU,因此您可以安全地删除init_hidden(self, batch_size)
。
出现此错误的原因是您的input
的类型为torch.Double
,而默认情况下,模块使用torch.Float
(因为它足够准确,更快并且比torch.Double
小)。
您可以通过调用.float()
来转换输入张量,在您的情况下,它看起来可能像这样:
def forward(self, nn_input, hidden):
nn_input = nn_input.float()
... # rest of your code
最后,如果hidden
参数始终为零,则无需使用,只需使用:
lstm_out, hidden = self.lstm(nn_input) # no hidden here
因为hidden
在默认情况下也是零。