我对Pytorch并不陌生,我目前正在尝试实现本文中的网络:https://arxiv.org/pdf/1811.06621.pdf?fbclid=IwAR3Ya9ZfBNN40UO0wct7dGupjlBFEpU47IRHK-wXmejI4U2UQGf03sXHMlw。
我提供了该网络的课程以及一些使用虚拟数据的培训代码。代码可以编译并运行,但是每次迭代显示的损失始终相同(8.371)。这使我相信我实现网络的方式存在问题。我的实现有什么明显的错误吗?
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
torch.manual_seed(1)
# Hyper Parameters
sequence_length = 1
input_size = 320
hidden_size = 2048
recurrent_size = 640
num_layers = 8
num_classes = 10
batch_size = 10
num_epochs = 2
learning_rate = 0.01
# RNNT Model
class RNNTModel(nn.Module):
def __init__(self, input_size, hidden_size, recurrent_size, bias=True):
super(RNNTModel, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.recurrent_size = recurrent_size
self.bias = bias
self.downsample_fc = nn.Linear(self.recurrent_size * 2, self.recurrent_size)
self.joint_fc = nn.Linear(self.recurrent_size * 2, self.recurrent_size)
self.out_fc = nn.Linear(640, 4096)
self.softmax = nn.LogSoftmax(dim=1)
self.encoder_1 = nn.ModuleDict({
'lstm1': nn.LSTM(self.input_size, self.hidden_size, num_layers=1, bias=bias, batch_first=True),
'proj1': nn.Linear(self.hidden_size, self.recurrent_size),
'lstm2': nn.LSTM(self.recurrent_size, self.hidden_size, num_layers=1, bias=bias, batch_first=True),
'proj2': nn.Linear(self.hidden_size, self.recurrent_size)
})
self.encoder_2 = nn.ModuleDict({
'lstm3': nn.LSTM(self.recurrent_size, self.hidden_size, num_layers=1, bias=bias, batch_first=True),
'proj3': nn.Linear(self.hidden_size, self.recurrent_size),
'lstm4': nn.LSTM(self.recurrent_size, self.hidden_size, num_layers=1, bias=bias, batch_first=True),
'proj4': nn.Linear(self.hidden_size, self.recurrent_size),
'lstm5': nn.LSTM(self.recurrent_size, self.hidden_size, num_layers=1, bias=bias, batch_first=True),
'proj5': nn.Linear(self.hidden_size, self.recurrent_size),
'lstm6': nn.LSTM(self.recurrent_size, self.hidden_size, num_layers=1, bias=bias, batch_first=True),
'proj6': nn.Linear(self.hidden_size, self.recurrent_size),
'lstm7': nn.LSTM(self.recurrent_size, self.hidden_size, num_layers=1, bias=bias, batch_first=True),
'proj7': nn.Linear(self.hidden_size, self.recurrent_size),
'lstm8': nn.LSTM(self.recurrent_size, self.hidden_size, num_layers=1, bias=bias, batch_first=True),
'proj8': nn.Linear(self.hidden_size, self.recurrent_size)
})
self.prediction_net = nn.ModuleDict({
'fc1': nn.Linear(4096, 76),
'lstm1': nn.LSTM(76, self.hidden_size, num_layers=1, bias=bias, batch_first=True),
'proj1': nn.Linear(self.hidden_size, self.recurrent_size),
'lstm2': nn.LSTM(self.recurrent_size, self.hidden_size, num_layers=1, bias=bias, batch_first=True),
'proj2': nn.Linear(self.hidden_size, self.recurrent_size)
})
def forward(self, x, ):
y = [torch.zeros(1, x.size(1), 4096)]
for i in range(x.size(0) // 2):
# Unrolled loop of encoder 1
enc_out, (h1, c1) = self.encoder_1['lstm1'](torch.stack([x[2 * i], x[2 * i + 1]]))
enc_out = self.encoder_1['proj1'](enc_out)
enc_out, _ = self.encoder_1['lstm2'](enc_out)
enc_out = self.encoder_1['proj2'](enc_out)
# Downsample by halving framrate
enc_out = enc_out.view(1, -1, 2 * self.recurrent_size)
enc_out = self.downsample_fc(enc_out)
# Unrolled loop of encoder 2
enc_out, _ = self.encoder_2['lstm3'](enc_out)
enc_out = self.encoder_2['proj3'](enc_out)
enc_out, _ = self.encoder_2['lstm4'](enc_out)
enc_out = self.encoder_2['proj4'](enc_out)
enc_out, _ = self.encoder_2['lstm5'](enc_out)
enc_out = self.encoder_2['proj5'](enc_out)
enc_out, _ = self.encoder_2['lstm6'](enc_out)
enc_out = self.encoder_2['proj6'](enc_out)
enc_out, _ = self.encoder_2['lstm7'](enc_out)
enc_out = self.encoder_2['proj3'](enc_out)
enc_out, _ = self.encoder_2['lstm7'](enc_out)
enc_out = self.encoder_2['proj3'](enc_out)
enc_out, _ = self.encoder_2['lstm8'](enc_out)
enc_out = self.encoder_2['proj8'](enc_out)
# Unrolled loop of prediction net
pred_out = self.prediction_net['fc1'](y[i])
pred_out, _ = self.prediction_net['lstm1'](pred_out)
pred_out = self.prediction_net['proj1'](pred_out)
pred_out, _ = self.prediction_net['lstm2'](pred_out)
pred_out = self.prediction_net['proj2'](pred_out)
# Unrolled loop of joint layers
joint_out = torch.cat([enc_out, pred_out], dim=-1)
joint_out = self.joint_fc(joint_out)
joint_out = self.out_fc(joint_out)
joint_out = self.softmax(joint_out)
y.append(joint_out)
return(torch.stack(y[1:]))
rnnt = RNNTModel(input_size, hidden_size, recurrent_size, bias=True)
# y = rnnt(torch.rand(batch_size, sequence_length, input_size))
training_data = [(torch.rand(batch_size, sequence_length, input_size), torch.ones(batch_size//2, 1, 4096).long()) for _ in range(100)]
# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(rnnt.parameters(), lr=learning_rate)
# Train the model
for epoch in range(num_epochs):
for i, (x, y) in enumerate(training_data):
x = Variable(x)
y = Variable(y)
# Forward + Backward + Optimize
optimizer.zero_grad()
outputs = rnnt(x).view(-1, 4096)
loss = criterion(outputs, torch.max(y, 2)[1].squeeze())
loss.backward()
optimizer.step
if (i+1) % 1 == 0:
print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
%(epoch+1, num_epochs, i+1, len(training_data)//batch_size, loss.item()))
答案 0 :(得分:2)
我认为您希望使用optimizer.step()
而不是optimizer.step
。