我正在实现一些代码,以从“使用用于语义槽填充的编码器-解码器LSTM生成标签数据”(https://pdfs.semanticscholar.org/7ffe/83d7dd3a474e15ccc2aef412009f100a5802.pdf)一文中生成一些代码,以用于自然语言理解(NLU)。我的体系结构是一个简单的编解码器LSTM,但是由于我生成的句子(针对单词和标签)不正确,因此我试图生成与输入相同的句子(仅单词)。不幸的是,这不能正常工作。
我正在使用vord2vec进行单词嵌入,并且嵌入的尺寸设置为64(如文章中所建议)。编码器LSTM以相反的顺序接收序列,且丢失率为0.5。解码器LSTM对于序列的每个输出也具有0.5的丢失率和softmax层,以映射最可能的字。输入与目标(相同的句子)完全相同,因为首先我要产生完全相同的句子。
对于训练,我使用了Adam优化器和categorical_crossentropy进行了损失。为了进行推断,我在生成序列时使用了波束搜索(B = 3)。
我的训练代码:
def pretrained_embedding_layer(emb):
vocab_len = len(emb)
emb_dim = len(emb[0])
emb_layer = Embedding(vocab_len, emb_dim, trainable = False)
emb_layer.build((None,))
emb_layer.set_weights([emb])
return emb_layer
LSTM_encoder = LSTM(1024, dropout=0.5, return_state=True, go_backwards=True, name='lstm_encoder')
LSTM_decoder = LSTM(1024, dropout=0.5, return_sequences=True, return_state=True, name='lstm_decoder')
dense_w = Dense(vocab_w_size, activation='softmax', name="word_output")
K.set_learning_phase(1)
def model1_enc_dec(input_shape, w_emb):
words_indices = Input(shape=input_shape, dtype='int32')
wemb_layer = pretrained_embedding_layer(w_emb)
wemb = wemb_layer(words_indices)
enc_out, enc_state_h, enc_state_c = LSTM_encoder(wemb)
encoder_states = [enc_state_h, enc_state_c]
dec_out, dec_state_h, dec_state_c = LSTM_decoder(wemb,
initial_state=encoder_states)
dec_out = dense_w(dec_out)
model1 = Model(inputs=[words_indices], outputs=[dec_out])
return model1
model = model1_enc_dec((maxlen,), w_emb, s_emb)
model.summary()
model.compile(loss="categorical_crossentropy", optimizer='adam', metrics=['accuracy'])
model.fit(train_w, train_lab_w, validation_data=(val_w, val_lab_w), epochs=epochs, verbose=1, shuffle=True)
我的推断代码:
wemb_layer = Embedding(len(w_emb), len(w_emb[0]), trainable=False)
wemb_layer.build((None,))
LSTM_encoder = LSTM(1024, return_state=True, go_backwards=True, name='lstm_encoder')
LSTM_decoder = LSTM(1024, return_sequences=True, return_state=True, name='lstm_decoder')
dense_w = Dense(vocab_w_size, activation='softmax', name="word_output")
def target_model(input_shape):
words_indices = Input(shape=input_shape, dtype='int32')
wemb = wemb_layer(words_indices)
enc_out, enc_state_h, enc_state_c = LSTM_encoder(wemb)
encoder_states = [enc_state_h, enc_state_c]
dec_out, dec_state_h, dec_state_c = LSTM_decoder(wemb,
initial_state=encoder_states)
dec_out = dense_w(dec_out)
model = Model(inputs=[words_indices], outputs=[dec_out])
return model
target_model = target_model((maxlen,))
wemb_layer.set_weights(model1.layers[1].get_weights()) # layer 0: input
LSTM_encoder.set_weights(model1.layers[2].get_weights())
LSTM_decoder.set_weights(model1.layers[3].get_weights())
dense_w.set_weights(model1.layers[4].get_weights())
def model1_enco_infe(input_shape):
words_indices = Input(shape=input_shape, dtype='int32')
wemb = wemb_layer(words_indices)
enc_out, enc_state_h, enc_state_c = LSTM_encoder(wemb)
encoder_model = Model(inputs=[words_indices], outputs=[enc_state_h,
enc_state_c])
return encoder_model
def model1_deco_infe(input_shape):
dec_word_input = Input(shape=input_shape, dtype='int32')
dec_state_input_h = Input(shape=(1024,))
dec_state_input_c = Input(shape=(1024,))
wemb = wemb_layer(dec_word_input)
dec_states_input = [dec_state_input_h, dec_state_input_c]
dec_out, dec_state_h, dec_state_c = LSTM_decoder(wemb,
initial_state=dec_states_input)
dec_states_output = [dec_state_h, dec_state_c]
deco_out = dense_w(dec_out)
decoder_model = Model(inputs=[dec_word_input] + dec_states_input, outputs=
[deco_out] + dec_states_output)
return decoder_model
encoder_model = model1_enco_infe((maxlen,))
decoder_model = model1_deco_infe((1,))
def beamsearch_B(deco_w_out, beam):
words_index = []
dw = deco_w_out.copy()
for i in range(beam):
word_index = np.argmax(dw, axis=-1)
dw[0][0][word_index[0][0]] = 0
words_index.append(word_index[0][0])
return words_index
def generate_model1_add(word_seq, encoder_model, decoder_model, dec_word_input, id2word, beam):
[enc_state_h, enc_state_c] = encoder_model.predict(word_seq)
states = [enc_state_h, enc_state_c]
word_sentence = ''
probs_word = []
word_sentences = []
dec_word_inputs = []
states_beam = []
stop_condition = False
[dec_w_out, dec_state_h, dec_state_c] =
decoder_model.predict([dec_word_input] + states)
words_index, _ = beamsearch_B(dec_w_out, [], beam)
for i in range(beam):
probs_word.append(-log(dec_w_out[0][0][words_index[i]]))
word_sentences.append(id2word[words_index[i]])
dec_word_inputs.append([words_index[i]])
states_beam.append([dec_state_h, dec_state_c])
n_words = 1
endgame = []
while not stop_condition:
words_indexes, words_sentences, probs_words, states_b = [], [],
[], []
for k in range(beam):
[dec_w_out, dec_state_h, dec_state_c] =
decoder_model.predict([dec_word_inputs[k]] + states_beam[k])
words_index, _ = beamsearch_B(dec_w_out, [], beam)
states = [dec_state_h, dec_state_c]
for j in range(beam):
words_indexes.append(words_index[j])
probs_words.append(probs_word[k] * -log(dec_w_out[0][0]
[words_index[j]]) + 1e-7)
words_sentences.append(word_sentences[k] + ' ' +
id2word[words_index[j]])
states_b.append(states)
probs = []
for i in range(len(probs_words)):
probs.append(1 / (probs_words[i]))
indexes = []
for i in range(beam):
index = np.argmax(probs, axis=-1)
probs[index] = 0
indexes.append(index)
for i in range(beam):
probs_word[i] = probs_words[indexes[i]]
word_sentences[i] = words_sentences[indexes[i]]
dec_word_inputs[i] = [words_indexes[indexes[i]]]
states_beam[i] = states_b[indexes[i]]
if (id2word[words_indexes[indexes[i]]] == 'EOS'):
endgame.append(i)
if len(endgame) == 1:
word_sentence = word_sentences[endgame]
stop_condition = True
elif len(endgame) > 1:
word_sentence = word_sentences[np.min(endgame)]
stop_condition = True
n_words += 1
if n_words > 50:
word_sentence = word_sentences[0]
stop_condition = True
return word_sentence
word_sentence = generate_model1_add(np.reshape(train_w[i], (1, maxlen)),
encoder_model, 0, decoder_model, [w2i['BOS']], i2w, 3)
我生成的序列的示例:
输入句子:BOS是我从波士顿飞往亚特兰大的第四次EOS PAD PAD PAD ... 生成的句子:BOS from from from from from from from from from from from from from from from from from from from ...
训练权重似乎不正确,但是我在训练过程中损失了0.0032-acc:0.9990-val_loss:0.0794-val_acc:0.9888。
我想要的只是生成与输入完全相同的句子。希望你能帮助我。预先谢谢你!