我已经搜索了很多堆栈溢出和pytorch.org论坛线程,这似乎是一个常见错误。但是,我在这些线程中阅读的解决方案很难遵循,并且我无法利用这些解决方案来使我的代码正常工作。我确实知道这与输入到模型中的张量大小有关。但是,我不确定如何修改代码以解决此问题。我对pytorch还是很陌生,所以我无法真正理解我发现的解释。我正在尝试使用在pytorch教程(https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/02-intermediate/bidirectional_recurrent_neural_network/main.py)中找到的双向LSTM。它使用MNIST图像数据集,但我正尝试使用财务文本数据集。我认为这就是问题所在,我知道文本和图像可能需要不同的内容,但是我不确定要修改什么。我还包括了一个用于数据加载器的函数。
def dataloader(messages, labels, sequence_length=30, batch_size=32, shuffle=False):
"""
Build a dataloader.
"""
if shuffle:
indices = list(range(len(messages)))
random.shuffle(indices)
messages = [messages[idx] for idx in indices]
labels = [labels[idx] for idx in indices]
total_sequences = len(messages) #total number of twits
for ii in range(0, total_sequences, batch_size):
batch_messages = messages[ii: ii+batch_size]
# First initialize a tensor of all zeros
batch = torch.zeros((sequence_length, len(batch_messages)), dtype=torch.int64)
for batch_num, tokens in enumerate(batch_messages):
token_tensor = torch.tensor(tokens)
# Left pad!
start_idx = max(sequence_length - len(token_tensor), 0) #returns 0 is len(token_tensor) > seqeuence_length
batch[start_idx:, batch_num] = token_tensor[:sequence_length] #replace each row in batch with the token
label_tensor = torch.tensor(labels[ii: ii+len(batch_messages)])
yield batch, label_tensor
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Hyper-parameters
sequence_length = 28
input_size = 28
hidden_size = 128
num_layers = 2
num_classes = 3
batch_size = 100
num_epochs = 2
learning_rate = 0.003
# Bidirectional recurrent neural network (many-to-one)
class BiRNN(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes):
super(BiRNN, self).__init__()
self.hidden_size = hidden_size
self.num_layers = num_layers
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
self.fc = nn.Linear(hidden_size*2, num_classes) # 2 for bidirection
def forward(self, x):
# Set initial states
h0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device) # 2 for bidirection
c0 = torch.zeros(self.num_layers*2, x.size(0), self.hidden_size).to(device)
# Forward propagate LSTM
out, _ = self.lstm(x, (h0, c0)) # out: tensor of shape (batch_size, seq_length, hidden_size*2)
# Decode the hidden state of the last time step
out = self.fc(out[:, -1, :])
return out
model_2 = BiRNN(input_size, hidden_size, num_layers, num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model_2.parameters(), lr=learning_rate)
train_loader = dataloader(
train_features, train_labels, batch_size=batch_size, sequence_length=20, shuffle=True)
# Train the model
total_step = 200
for epoch in range(num_epochs):
for i, (text_batch, labels) in enumerate(train_loader):
text_batch = text_batch.to(device)
labels = labels.to(device)
# Forward pass
outputs = model_2(text_batch)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
.format(epoch+1, num_epochs, i+1, total_step, loss.item()))
这是完整的错误:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-74-8a935e97b39c> in <module>
10
11 # Forward pass
---> 12 outputs = model_2(text_batch)
13 loss = criterion(outputs, labels)
14
~\Anaconda3\envs\thesis\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
545 result = self._slow_forward(*input, **kwargs)
546 else:
--> 547 result = self.forward(*input, **kwargs)
548 for hook in self._forward_hooks.values():
549 hook_result = hook(self, input, result)
<ipython-input-64-21fa163d5c93> in forward(self, x)
18
19 # Forward propagate LSTM
---> 20 out, _ = self.lstm(x, (h0, c0)) # out: tensor of shape (batch_size, seq_length, hidden_size*2)
21
22 # Decode the hidden state of the last time step
~\Anaconda3\envs\thesis\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
545 result = self._slow_forward(*input, **kwargs)
546 else:
--> 547 result = self.forward(*input, **kwargs)
548 for hook in self._forward_hooks.values():
549 hook_result = hook(self, input, result)
~\Anaconda3\envs\thesis\lib\site-packages\torch\nn\modules\rnn.py in forward(self, input, hx)
562 return self.forward_packed(input, hx)
563 else:
--> 564 return self.forward_tensor(input, hx)
565
566 class GRU(RNNBase):
~\Anaconda3\envs\thesis\lib\site-packages\torch\nn\modules\rnn.py in forward_tensor(self, input, hx)
541 unsorted_indices = None
542
--> 543 output, hidden = self.forward_impl(input, hx, batch_sizes, max_batch_size, sorted_indices)
544
545 return output, self.permute_hidden(hidden, unsorted_indices)
~\Anaconda3\envs\thesis\lib\site-packages\torch\nn\modules\rnn.py in forward_impl(self, input, hx, batch_sizes, max_batch_size, sorted_indices)
521 hx = self.permute_hidden(hx, sorted_indices)
522
--> 523 self.check_forward_args(input, hx, batch_sizes)
524 if batch_sizes is None:
525 result = _VF.lstm(input, hx, self._get_flat_weights(), self.bias, self.num_layers,
~\Anaconda3\envs\thesis\lib\site-packages\torch\nn\modules\rnn.py in check_forward_args(self, input, hidden, batch_sizes)
494 def check_forward_args(self, input, hidden, batch_sizes):
495 # type: (Tensor, Tuple[Tensor, Tensor], Optional[Tensor]) -> None
--> 496 self.check_input(input, batch_sizes)
497 expected_hidden_size = self.get_expected_hidden_size(input, batch_sizes)
498
~\Anaconda3\envs\thesis\lib\site-packages\torch\nn\modules\rnn.py in check_input(self, input, batch_sizes)
143 raise RuntimeError(
144 'input must have {} dimensions, got {}'.format(
--> 145 expected_input_dim, input.dim()))
146 if self.input_size != input.size(-1):
147 raise RuntimeError(
RuntimeError: input must have 3 dimensions, got 2