我正在尝试通过PyTorch实施非线性回归任务。输入是一些句子,输出是它们的分数(这些分数是一些浮点数)。为了通过计算机理解句子,我通过torch.nn.RNN(递归神经网络)层将每个句子转换为50d实向量(这些是嵌入向量)。然后,我使用torch.nn.Linear层来预测得分。
这是我的模特
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence
import text_processing as tp
import torch.nn.functional as F
class RNNModel(nn.Module):
################## __init__ ####################
def __init__(self, vocab_size, input_size,hidden_size, output_size,my_embeddings,n_layers,bidirectional=True):
super(RNNModel, self).__init__()
self.hidden_size = hidden_size
self.n_layers = n_layers
self.n_directions = int(bidirectional) + 1
self.embedding = nn.Embedding(vocab_size, hidden_size)
self.rnn = nn.RNN(input_size,hidden_size,n_layers,bidirectional=bidirectional)
self.fc = nn.Linear(hidden_size, output_size)
#################### forward #################
def forward(self, batch_of_sentences, batch_of_len):
"""
input shape: B x S ([num of sentences in this batch] x [max sent len in this batch] )
transpose to make S x B
"""
input=batch_of_sentences
input = input.t()
batch_size = input.size(1)
# Make a hidden
hidden = self._init_hidden(batch_size)
# Embedding S x B -> S x B x I (embedding size)
embedded = self.embedding(input)
# Pack them up nicely
rnn_input = pack_padded_sequence(embedded, batch_of_len.data.cpu().numpy())
# To compact weights again call flatten_parameters().
self.rnn.flatten_parameters()
output, hidden = self.rnn(rnn_input, hidden)
fc_output = self.fc(hidden[-1])
return fc_output
################## _init_hidden ##################
def _init_hidden(self, batch_size):
hidden = torch.zeros(self.n_layers * self.n_directions,
batch_size, self.hidden_size)
return tp.create_variable(hidden)
我已经在主程序中调用了模型:
my_rnn_model = RNNModel(VOCAB_SIZE,INPUT_SIZE, HIDDEN_SIZE,OUTPUT_SIZE, tp.my_embeddings_train_dev,N_LAYERS, bidirectional=True)
criterion = torch.nn.MSELoss(size_average=False)
LR = 0.001
optimizer = torch.optim.Adam(my_rnn_model.parameters(), lr=LR, betas=(0.9, 0.99)
for epoch in range(1, N_EPOCHS + 1):
loss=train()
这是train()函数:
def train():
#Loading Data From TrainLoader
for i, (batch_sents, batch_target_scores) in enumerate(train_loader, 1):
#Zero Padding
zero_padded_batch_sent, sorted_batch_len, sorted_batch_target_scores = tp.make_variables(batch_sents, batch_target_scores,"train")
#Compute Output for Each Batch of Sentences
output = my_rnn_model(zero_padded_batch_sent, sorted_batch_len)
#Compute Loss
loss = criterion(output, sorted_batch_target_scores)
#Updating Parameters by Backward Propagation
my_rnn_model.zero_grad()
loss.backward()
optimizer.step()
但是我认为我的模型无法学习数据,因为运行30个历元后,训练和测试数据的损耗曲线非常振动。尽管我希望该错误随着时间的流逝而减慢,并且它没有振荡现象。
我认为该模型无法正常运行。我的代码的哪一部分是错误的? 预先感谢。