我使用受http://mlexplained.com/2019/02/15/building-an-lstm-from-scratch-in-pytorch-lstms-in-depth-part-1/启发的非常定制的LSTM单元。
我用它来查看中间门控值。我的问题是,如何扩展此类以提供添加更多层和添加双向性的选项?应该将其包装在新类中还是在当前类中添加?
class Dim(IntEnum):
batch = 0
seq = 1
class simpleLSTM(nn.Module):
def __init__(self, input_sz: int, hidden_sz: int):
super().__init__()
self.input_size = input_sz
self.hidden_size = hidden_sz
# input gate
self.W_ii = Parameter(torch.Tensor(input_sz, hidden_sz))
self.W_hi = Parameter(torch.Tensor(hidden_sz, hidden_sz))
self.b_i = Parameter(torch.Tensor(hidden_sz))
# forget gate
self.W_if = Parameter(torch.Tensor(input_sz, hidden_sz))
self.W_hf = Parameter(torch.Tensor(hidden_sz, hidden_sz))
self.b_f = Parameter(torch.Tensor(hidden_sz))
# ???
self.W_ig = Parameter(torch.Tensor(input_sz, hidden_sz))
self.W_hg = Parameter(torch.Tensor(hidden_sz, hidden_sz))
self.b_g = Parameter(torch.Tensor(hidden_sz))
# output gate
self.W_io = Parameter(torch.Tensor(input_sz, hidden_sz))
self.W_ho = Parameter(torch.Tensor(hidden_sz, hidden_sz))
self.b_o = Parameter(torch.Tensor(hidden_sz))
self.init_weights()
self.out = nn.Linear(hidden_sz, len(TRG.vocab))
def init_weights(self):
for p in self.parameters():
if p.data.ndimension() >= 2:
nn.init.xavier_uniform_(p.data)
else:
nn.init.zeros_(p.data)
def forward(self, x, init_states=None ):
"""Assumes x is of shape (batch, sequence, feature)"""
seq_sz, bs, = x.size()
hidden_seq = []
prediction = []
if init_states is None:
h_t, c_t = torch.zeros(self.hidden_size).to(x.device), torch.zeros(self.hidden_size).to(x.device)
else:
h_t, c_t = init_states
for t in range(seq_sz): # iterate over the time steps
x_t = x[t, :].float()
#LOOK HERE!!!
i_t = torch.sigmoid(x_t @ self.W_ii + h_t @ self.W_hi + self.b_i)
f_t = torch.sigmoid(x_t @ self.W_if + h_t @ self.W_hf + self.b_f)
g_t = torch.tanh(x_t @ self.W_ig + h_t @ self.W_hg + self.b_g)
o_t = torch.sigmoid(x_t @ self.W_io + h_t @ self.W_ho + self.b_o)
c_t = f_t * c_t + i_t * g_t
h_t = o_t * torch.tanh(c_t)
hidden_seq.append(h_t.unsqueeze(Dim.batch))
pred_t = self.out(h_t.unsqueeze(Dim.batch))
#pred_t = F.softmax(pred_t)
prediction.append(pred_t)
hidden_seq = torch.cat(hidden_seq, dim=Dim.batch)
prediction = torch.cat(prediction, dim=Dim.batch)
# reshape from shape (sequence, batch, feature) to (batch, sequence, feature)
hidden_seq = hidden_seq.transpose(Dim.batch, Dim.seq).contiguous()
prediction = prediction.transpose(Dim.batch, Dim.seq).contiguous()
return prediction, hidden_seq, (h_t, c_t)
我将其称为并使用以下示例进行训练。
lstm = simpleLSTM(1, 100)
hidden_size = lstm.hidden_size
optimizer = optim.Adam(lstm.parameters())
h_0, c_0 = (torch.zeros(hidden_size, requires_grad=True),
torch.zeros(hidden_size, requires_grad=True))
grads = []
h_t, c_t = h_0, c_0
N_EPOCHS = 10
for epoch in range(N_EPOCHS):
epoch_loss = 0
for i, batch in enumerate(train):
optimizer.zero_grad()
src, src_len = batch.src
trg = batch.trg
trg = trg.view(-1)
predict, output, hidden_states = lstm(src)
predict = predict.t().unsqueeze(1)
predict= predict.view(-1, predict.shape[-1])
loss = criterion(predict,trg)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
print(epoch_loss)
答案 0 :(得分:0)
最简单的方法是创建另一个模块(例如Bidirectional
)并将所需的任何单元格传递给它。
实施本身很容易做到。请注意,我正在使用concat
操作来连接双向输出,您可能想要指定其他模式,例如求和等。
请阅读下面代码中的注释,您可能需要适当地对其进行更改。
import torch
class Bidirectional(torch.nn.Module):
def __init__(self, cell):
super().__init__()
self.cell = cell
def __call__(self, x, init_states=None):
prediction, hidden_seq, (h_t, c_t) = self.cell(x, init_states)
backward_prediction, backward_hidden_seq, (
backward_h_t,
backward_c_t,
# Assuming sequence is first dimension, otherwise change 0 appropriately
# Reverses sequences so the LSTM cell acts on the reversed sequence
) = self.cell(torch.flip(x, (0,)), init_states)
return (
# Assuming you transpose so it has (batch, seq, features) dimensionality
torch.cat((prediction, backward_prediction), 2),
torch.cat((hidden_seq, backward_hidden_seq), 2),
# Assuming it has (batch, features) dimensionality
torch.cat((h_t, backward_ht), 1),
torch.cat((c_t, backward_ct), 1),
)
涉及多层时,原则上可以做类似的事情:
import torch
class Multilayer(torch.nn.Module):
def __init__(self, *cells):
super().__init__()
self.cells = torch.nn.ModuleList(cells)
def __call__(self, x, init_states=None):
inputs = x
for cell in self.cells:
prediction, hidden_seq, (h_t, c_t) = cell(inputs, init_states)
inputs = hidden_seq
return prediction, hidden_seq, (h_t, c_t)
请注意,您必须将创建的单元格对象传递到Multilayer
中,例如:
# For three layers of LSTM, each needs features to be set up correctly
multilayer_LSTM = Multilayer(LSTM(), LSTM(), LSTM())
您也可以将类而不是实例传递给构造函数,并在Multilayer
内创建那些类(因此hidden_size
自动匹配),但是这些想法应该可以帮助您入门。