在使用编码器-解码器模型创建的聊天机器人中,即使仅在一个数据点上进行训练,平均训练误差(交叉熵)也无法降低。该模型的所有数据均来自康奈尔电影对话语料库。为了对单词进行编码,将为每个单词分配一个数字,并将其用作稀疏张量的索引,该张量将传递给模型。
训练循环:
for epochcounter in range(10000):
print("=======================EPOCH "+str(epochcounter)+"==================================")
for k, batch in enumerate(iterate_batches(data)):
batchid+=1
averageloss = 0
losscounter = 0
for sample, label in batch:
sample_t = []
label_t = []
embedded_label = []
label = ['begline']+label
for rawword in sample:
rawword = rawword.lower()
word = "".join(list(filter(lambda x: x in "qwertyuiopasdfghjklzxcvbnm",list(rawword))))
i = vocab.getIndex(word)
sample_t.append(torch.sparse_coo_tensor(torch.tensor([[i]]),torch.tensor([1]),[4453]))
for rawword in label:
rawword = rawword.lower()
word = "".join(list(filter(lambda x: x in "qwertyuiopasdfghjklzxcvbnm",list(rawword))))
i = vocab.getIndex(word)
embedded_label.append(i)
embedded_label.append(vocab.getIndex("endline"))
label_t.append(torch.sparse_coo_tensor(torch.tensor([[i]]),torch.tensor([1]),[4453]))
out_t = net(sample_t, label_t)
for n, out in enumerate(out_t):
loss_t = loss_function(out.view(1,4453),torch.tensor([embedded_label[n]]).view(1))
loss_t.backward(retain_graph=True)
loss = loss_t.item()
averageloss = (losscounter*averageloss+loss)/(losscounter+1)
losscounter+=1
print("Epoch "+str(epochcounter)+", Batch "+str(k)+", Average Loss: "+str(averageloss))
# _ = nn.utils.clip_grad_norm_(net.parameters(), clip)
optimizer.step()
optimizer.zero_grad()
以下是自定义模型:
net
对象的类)class Seq2SeqXentropy(nn.Module):
def __init__(self, emb_size, hid_size, output_size):
super(Seq2SeqXentropy, self).__init__()
self.encoder = Encoder(emb_size, hid_size)
self.decoder = Decoder(emb_size, hid_size, output_size)
def forward(self, input_sentence, target_sentence):
hidden_state = self.encoder(input_sentence)
output_sentence = self.decoder(hidden_state, target_sentence)
return output_sentence
class Encoder(nn.Module):
def __init__(self, emb_size, hid_size):
super(Encoder, self).__init__()
self.hid_size = hid_size
self.rnn = EncoderRNNCell(emb_size, hid_size)
def forward(self, sentence):
hidden_state = torch.rand(self.hid_size)
for word in sentence:
hidden_state = self.rnn(word, hidden_state)
return hidden_state
class Decoder(nn.Module):
def __init__(self, emb_size, hid_size, output_size):
super(Decoder, self).__init__()
self.rnn = DecoderRNNCell(emb_size, hid_size, output_size)
self.output = []
def forward(self, hidden_state, targetsentence):
output = []
for x in targetsentence:
hidden_state, y = self.rnn(x, hidden_state)
output.append(y)
return output
class EncoderRNNCell(nn.Module):
def __init__(self, emb_size, hid_size):
super(EncoderRNNCell, self).__init__()
self.w = nn.Parameter(torch.rand(hid_size, emb_size))
self.u = nn.Parameter(torch.rand(hid_size, hid_size))
def forward(self, x, h):
h_f = nn.functional.relu(torch.mv(self.w,x.to_dense().type(torch.FloatTensor)) + torch.mv(self.u,h))
return h_f
class DecoderRNNCell(nn.Module):
def __init__(self, emb_size, hid_size, output_size):
super(DecoderRNNCell, self).__init__()
self.w = nn.Parameter(torch.rand(hid_size, emb_size))
self.u = nn.Parameter(torch.rand(hid_size, hid_size))
self.v = nn.Parameter(torch.rand(output_size, hid_size))
def forward(self, x, h):
h_f = nn.functional.relu(torch.mv(self.w,x.to_dense().type(torch.FloatTensor))+torch.mv(self.u,h))
y = nn.functional.softmax(nn.functional.relu(torch.mv(self.v,h_f)))
return h_f, y