Question

我正在尝试通过PyTorch实施非线性回归任务。输入是一些句子，输出是它们的分数（这些分数是一些浮点数）。为了通过计算机理解句子，我通过torch.nn.RNN（递归神经网络）层将每个句子转换为50d实向量（这些是嵌入向量）。然后，我使用torch.nn.Linear层来预测得分。

这是我的模特

import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence
import text_processing as tp
import torch.nn.functional as F


class RNNModel(nn.Module):
##################        __init__          ####################
    def __init__(self, vocab_size, input_size,hidden_size, output_size,my_embeddings,n_layers,bidirectional=True):
        super(RNNModel, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.n_directions = int(bidirectional) + 1

        self.embedding = nn.Embedding(vocab_size, hidden_size)
        self.rnn = nn.RNN(input_size,hidden_size,n_layers,bidirectional=bidirectional)
        self.fc = nn.Linear(hidden_size, output_size)
####################          forward          #################
    def forward(self, batch_of_sentences, batch_of_len):
        """
        input shape: B x S ([num of sentences in this batch] x [max sent len in this batch] )
        transpose to make S x B
        """
        input=batch_of_sentences
        input = input.t()
        batch_size = input.size(1)

        # Make a hidden
        hidden = self._init_hidden(batch_size)

        # Embedding S x B -> S x B x I (embedding size)
        embedded = self.embedding(input)
        # Pack them up nicely
        rnn_input = pack_padded_sequence(embedded, batch_of_len.data.cpu().numpy())

        # To compact weights again call flatten_parameters().
        self.rnn.flatten_parameters()
        output, hidden = self.rnn(rnn_input, hidden)
        fc_output = self.fc(hidden[-1])
        return fc_output

##################          _init_hidden          ##################
    def _init_hidden(self, batch_size):
        hidden = torch.zeros(self.n_layers * self.n_directions,
                             batch_size, self.hidden_size)
        return tp.create_variable(hidden)

我已经在主程序中调用了模型：

my_rnn_model = RNNModel(VOCAB_SIZE,INPUT_SIZE, HIDDEN_SIZE,OUTPUT_SIZE, tp.my_embeddings_train_dev,N_LAYERS, bidirectional=True)
criterion = torch.nn.MSELoss(size_average=False)
LR = 0.001
optimizer = torch.optim.Adam(my_rnn_model.parameters(), lr=LR, betas=(0.9, 0.99)
for epoch in range(1, N_EPOCHS + 1):
   loss=train()

这是train（）函数：

def train():

    #Loading Data From TrainLoader
    for i, (batch_sents, batch_target_scores) in enumerate(train_loader, 1):
        #Zero Padding
        zero_padded_batch_sent, sorted_batch_len, sorted_batch_target_scores = tp.make_variables(batch_sents, batch_target_scores,"train")

        #Compute Output for Each Batch of Sentences
        output = my_rnn_model(zero_padded_batch_sent, sorted_batch_len)

        #Compute Loss
        loss = criterion(output, sorted_batch_target_scores)

        #Updating Parameters by Backward Propagation
        my_rnn_model.zero_grad()
        loss.backward()
        optimizer.step()

但是我认为我的模型无法学习数据，因为运行30个历元后，训练和测试数据的损耗曲线非常振动。尽管我希望该错误随着时间的流逝而减慢，并且它没有振荡现象。

这是30个时间段内火车和测试数据的损耗曲线

我认为该模型无法正常运行。我的代码的哪一部分是错误的？预先感谢。

Pytorch进行非线性回归

0 个答案: