不含pack_padded_sequence

时间:2018-09-17 05:16:03

标签: python lstm

我想使用定制的LSTM进行图像说明。 我的问题是,在批量处理多个不同长度的句子时,大多数LSTM模型都使用带有填充的pack_padded_sequence()函数,并使用batch_size进行训练。 我想知道如何输入相同的参数,而不是在自定义LSTM中使用pack_padded_sequence()。

这是我的模特

class FactoredLSTM(nn.Module):

def __init__(self, emb_dim, hidden_dim, factored_dim,  vocab_size, max_len=30):
    """ Set the hyper-parameters and build the layers."""
    super(FactoredLSTM, self).__init__()

    self.hidden_dim = hidden_dim
    self.factored_dim = factored_dim
    self.vocab_size = vocab_size
    self.max_len = max_len
    self.embed = nn.Embedding(vocab_size, emb_dim)
    self.linear = nn.Linear(hidden_dim, vocab_size)

    # factored lstm weights
    # U,V,W
    self.U_i = nn.Linear(factored_dim, hidden_dim)
    self.V_i = nn.Linear(emb_dim, factored_dim)
    self.W_i = nn.Linear(hidden_dim, hidden_dim)

    self.U_f = nn.Linear(factored_dim, hidden_dim)
    self.V_f = nn.Linear(emb_dim, factored_dim)
    self.W_f = nn.Linear(hidden_dim, hidden_dim)

    self.U_o = nn.Linear(factored_dim, hidden_dim)
    self.V_o = nn.Linear(emb_dim, factored_dim)
    self.W_o = nn.Linear(hidden_dim, hidden_dim)

    self.U_c = nn.Linear(factored_dim, hidden_dim)
    self.V_c = nn.Linear(emb_dim, factored_dim)
    self.W_c = nn.Linear(hidden_dim, hidden_dim)

    # factor matrix
    # S
    self.S_i = nn.Linear(factored_dim, factored_dim)
    self.S_f = nn.Linear(factored_dim, factored_dim)
    self.S_o = nn.Linear(factored_dim, factored_dim)
    self.S_c = nn.Linear(factored_dim, factored_dim)

def forward_factor(self, embedded, h_0, c_0, mode):

    i = self.V_i(embedded)
    f = self.V_f(embedded)
    o = self.V_o(embedded)
    c = self.V_c(embedded)

    if mode == "factual":
        i = self.S_i(i)
        f = self.S_f(f)
        o = self.S_o(o)
        c = self.S_c(c)

    i_t = torch.sigmoid(self.U_i(i) + self.W_i(h_0))
    f_t = torch.sigmoid(self.U_f(f) + self.W_f(h_0))
    o_t = torch.sigmoid(self.U_o(o) + self.W_o(h_0))
    c_tilda = torch.tanh(self.U_c(c) + self.W_c(h_0))

    c_t = f_t * c_0 + i_t * c_tilda
    h_t = o_t * c_t
    hiddens = h_t

    return hiddens, h_t, c_t

def forward(self, features, captions, lengths, mode="factual"):
    """
    Args:
        features: fixed vectors from images, [batch, emb_dim]
        captions: [batch, seq_len]
        mode: type of caption to generate
    """
    embedded = self.embed(captions) 
    embedded = torch.cat((features.unsqueeze(1), embedded), 1)
    packed = pack_padded_sequence(embedded, lengths, batch_first=True)

    # initialize hidden state
    h_t, c_t = self.init_hidden_states(len(lengths))
    outputs = []

    # iterate for length of captions
    for index in range(embedded.size(1)-1):
        emb = embedded[:, index, :]                 
        hiddens, h_t, c_t = self. forward_factor(emb, h_t, c_t, mode=mode)
        outs = self.linear(hiddens)
        outputs.append(outs)

    outputs = torch.stack(outputs, 1)

    return outputs

def init_hidden_states(self, batch_size):

    hidden_dim = self.hidden_dim
    h0 = Variable(torch.zeros(batch_size, hidden_dim)).cuda()
    c0 = Variable(torch.zeros(batch_size, hidden_dim)).cuda()
    nn.init.uniform_(h0)
    nn.init.uniform_(c0)

    return h0, c0

对于这部分内容我是否将正确的参数输入到定制的LSTM中感到困惑。 如下:

# iterate for length of captions
    for index in range(embedded.size(1)-1):
        emb = embedded[:, index, :]                 
        hiddens, h_t, c_t = self. forward_factor(emb, h_t, c_t, mode=mode)
        outs = self.linear(hiddens)
        outputs.append(outs)

有人可以给我一些建议吗? 非常感谢!!!

0 个答案:

没有答案