我正在编写一个利用 Pytorch 中 LSTM 构建的程序,但是损失总是在一些数字附近并且不会显着减少。 我的模型看起来像这样:
class LSTM(nn.Module):
def __init__(self, input_size, output_size, hidden_dim, n_layers):
super(LSTM, self).__init__()
# Defining some parameters
self.hidden_dim = hidden_dim
self.n_layers = n_layers
#Defining the layers
# LSTM Layer
self.lstm = nn.LSTM(input_size, hidden_dim, n_layers, batch_first=True)
for name, param in self.lstm.named_parameters():
if 'weight_ih' in name:
torch.nn.init.xavier_uniform_(param.data)
elif 'weight_hh' in name:
torch.nn.init.orthogonal_(param.data)
elif 'bias' in name:
param.data.fill_(0)
# Fully connected layer
self.fc = nn.Linear(hidden_dim, output_size)
torch.nn.init.xavier_uniform_(self.fc.weight)
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, x):
batch_size = x.size(0)
# Initializing hidden state for first input using method defined below
hidden = self.init_hidden(batch_size)
# Passing in the input and hidden state into the model and obtaining outputs
out, hidden = self.lstm(x, hidden)
# print(out[-1])
# Reshaping the outputs such that it can be fit into the fully connected layer
out = out.contiguous().view(-1, self.hidden_dim)
out = self.fc(out)
out = self.softmax(out)
return out, hidden
def init_hidden(self, batch_size):
# This method generates the first hidden state of zeros which we'll use in the forward pass
# We'll send the tensor holding the hidden state to the device we specified earlier as well
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
hidden = (torch.zeros(self.n_layers, batch_size, self.hidden_dim, dtype=torch.double, device=device),
(torch.zeros(self.n_layers, batch_size, self.hidden_dim, dtype=torch.double, device=device)))
return hidden
lstm = LSTM(300, 2, 2, 7)
lstm = lstm.double()
lstm.to(device)
criterion = nn.NLLLoss()
learning_rate = 0.6
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
这里是每个训练样本的函数
def epoch(x, y):
global lstm, criterion, learning_rate, optimizer
optimizer.zero_grad()
x = torch.unsqueeze(x, 1)
output, hidden = lstm(x)
output = torch.unsqueeze(output[-1], 0)
loss = criterion(output, y)
loss.backward()
optimizer.step()
return output, loss.item()
训练中的损失看起来像这样:
这些代码有什么问题吗?请帮我。谢谢。
答案 0 :(得分:0)
谢谢@Roni。问题原来是对定义 nn.LSTM 的批大小和其他特征的误解。现在我正在努力。