我是PyTorch框架的新手(主要来自Theano和Tensorflow): 我已经按照介绍教程阅读使用字符级RNN分类名称。 我现在尝试将其调整为char级LSTM模型,以获得该框架的一些实践经验。 基本上,我输入char索引的模型序列,并将相同序列作为目标给予模型,但将来会换一个。 但是我不能过度训练一个简单的训练例子,我也看不出我做错了什么。 如果有人能发现我的错误,那将非常有帮助。 这是我的代码:
class LSTMTxtGen(nn.Module):
def __init__(self, hidden_dim, n_layer, vocab_size):
super(LSTMTxtGen, self).__init__()
self.n_layer = n_layer
self.hidden_dim = hidden_dim
self.vocab_size = vocab_size
self.lstm = nn.LSTM(vocab_size, hidden_dim, n_layer, batch_first=True)
# The linear layer that maps from hidden state space to tag space
#self.hidden = self.init_hidden()
def init_hidden(self, batch_size):
# Before we've done anything, we dont have any hidden state.
# Refer to the Pytorch documentation to see exactly
# why they have this dimensionality.
# The axes semantics are (num_layers, minibatch_size, hidden_dim)
return (autograd.Variable(torch.zeros(self.n_layer, batch_size,
self.hidden_dim)),
autograd.Variable(torch.zeros(self.n_layer, batch_size,
self.hidden_dim)))
def forward(self, seqs):
self.hidden = self.init_hidden(seqs.size()[0])
lstm_out, self.hidden = self.lstm(seqs, self.hidden)
lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim)
lstm_out = nn.Linear(lstm_out.size(1), self.vocab_size)(lstm_out)
return lstm_out
model = LSTMTxtGen (
hidden_dim = 50,
n_layer = 3,
vocab_size = 44,
)
print(Model)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adamax(model.parameters())
G = Data.batch_generator(5,100)
batch_per_epoch, to_idx, to_char = next(G)
X, Y = next(G)
for epoch in range(10):
losses = []
for batch_count in range(batch_per_epoch):
model.zero_grad()
#mode.hidden = model.init_hidden()
#X, Y = next(G)
X = autograd.Variable(torch.from_numpy(X))
Y = autograd.Variable(torch.from_numpy(Y))
preds = model(X)
loss = criterion(preds.view(-1, model.vocab_size), Y.view(-1))
loss.backward()
optimizer.step()
losses.append(loss)
if (batch_count % 20 == 0):
print('Loss: ', losses[-1])