我想使用定制的LSTM进行图像说明。 我的问题是,在批量处理多个不同长度的句子时,大多数LSTM模型都使用带有填充的pack_padded_sequence()函数,并使用batch_size进行训练。 我想知道如何输入相同的参数,而不是在自定义LSTM中使用pack_padded_sequence()。
这是我的模特
class FactoredLSTM(nn.Module):
def __init__(self, emb_dim, hidden_dim, factored_dim, vocab_size, max_len=30):
""" Set the hyper-parameters and build the layers."""
super(FactoredLSTM, self).__init__()
self.hidden_dim = hidden_dim
self.factored_dim = factored_dim
self.vocab_size = vocab_size
self.max_len = max_len
self.embed = nn.Embedding(vocab_size, emb_dim)
self.linear = nn.Linear(hidden_dim, vocab_size)
# factored lstm weights
# U,V,W
self.U_i = nn.Linear(factored_dim, hidden_dim)
self.V_i = nn.Linear(emb_dim, factored_dim)
self.W_i = nn.Linear(hidden_dim, hidden_dim)
self.U_f = nn.Linear(factored_dim, hidden_dim)
self.V_f = nn.Linear(emb_dim, factored_dim)
self.W_f = nn.Linear(hidden_dim, hidden_dim)
self.U_o = nn.Linear(factored_dim, hidden_dim)
self.V_o = nn.Linear(emb_dim, factored_dim)
self.W_o = nn.Linear(hidden_dim, hidden_dim)
self.U_c = nn.Linear(factored_dim, hidden_dim)
self.V_c = nn.Linear(emb_dim, factored_dim)
self.W_c = nn.Linear(hidden_dim, hidden_dim)
# factor matrix
# S
self.S_i = nn.Linear(factored_dim, factored_dim)
self.S_f = nn.Linear(factored_dim, factored_dim)
self.S_o = nn.Linear(factored_dim, factored_dim)
self.S_c = nn.Linear(factored_dim, factored_dim)
def forward_factor(self, embedded, h_0, c_0, mode):
i = self.V_i(embedded)
f = self.V_f(embedded)
o = self.V_o(embedded)
c = self.V_c(embedded)
if mode == "factual":
i = self.S_i(i)
f = self.S_f(f)
o = self.S_o(o)
c = self.S_c(c)
i_t = torch.sigmoid(self.U_i(i) + self.W_i(h_0))
f_t = torch.sigmoid(self.U_f(f) + self.W_f(h_0))
o_t = torch.sigmoid(self.U_o(o) + self.W_o(h_0))
c_tilda = torch.tanh(self.U_c(c) + self.W_c(h_0))
c_t = f_t * c_0 + i_t * c_tilda
h_t = o_t * c_t
hiddens = h_t
return hiddens, h_t, c_t
def forward(self, features, captions, lengths, mode="factual"):
"""
Args:
features: fixed vectors from images, [batch, emb_dim]
captions: [batch, seq_len]
mode: type of caption to generate
"""
embedded = self.embed(captions)
embedded = torch.cat((features.unsqueeze(1), embedded), 1)
packed = pack_padded_sequence(embedded, lengths, batch_first=True)
# initialize hidden state
h_t, c_t = self.init_hidden_states(len(lengths))
outputs = []
# iterate for length of captions
for index in range(embedded.size(1)-1):
emb = embedded[:, index, :]
hiddens, h_t, c_t = self. forward_factor(emb, h_t, c_t, mode=mode)
outs = self.linear(hiddens)
outputs.append(outs)
outputs = torch.stack(outputs, 1)
return outputs
def init_hidden_states(self, batch_size):
hidden_dim = self.hidden_dim
h0 = Variable(torch.zeros(batch_size, hidden_dim)).cuda()
c0 = Variable(torch.zeros(batch_size, hidden_dim)).cuda()
nn.init.uniform_(h0)
nn.init.uniform_(c0)
return h0, c0
对于这部分内容我是否将正确的参数输入到定制的LSTM中感到困惑。 如下:
# iterate for length of captions
for index in range(embedded.size(1)-1):
emb = embedded[:, index, :]
hiddens, h_t, c_t = self. forward_factor(emb, h_t, c_t, mode=mode)
outs = self.linear(hiddens)
outputs.append(outs)
有人可以给我一些建议吗? 非常感谢!!!