我有一个注意力集中的编码器-解码器模型(我遵循了创建此链接的链接:https://wanasit.github.io/attention-based-sequence-to-sequence-in-keras.html),并且我需要分离编码器和解码器以预测任意长度的序列。但是我在测试解码器模型时收到错误消息:“检查模型时出错:传递给模型的Numpy数组列表不是模型期望的大小。期望看到3个数组,但得到了以下4个数组的列表”。
encoder_inputs = Input(shape=(None,),name="Encoder_input",dtype='float64')
Embedding = Embedding(vocab_size, latent_dim,weights=[embedding_matrix],mask_zero=True,trainable=True, name="Embedding")
word_embedding_context = Embedding(encoder_inputs)
encoder_lstm = LSTM(neurons,return_sequences=True,name='Encoder_lstm')
encoder_outputs=encoder_lstm(word_embedding_context)
encoder_last1 = encoder_outputs[:,-1,:]
encoder_last2 = encoder_outputs[:,-1,:]
encoder_states=[encoder_last1,encoder_last2]
decoder_inputs = Input(shape=(None,),dtype='float64',name="decoder_inputs")
Embedding1 = Embedding(vocab_size, latent_dim,weights=[embedding_matrix],mask_zero=True,trainable=True, name="Embedding1")
word_embedding_answer = Embedding1(decoder_inputs)
decoder_lstm = LSTM(neurons, return_sequences=True, return_state=True, name="Decoder_lstm")
decoder, _, _ = decoder_lstm(word_embedding_answer, initial_state=encoder_states)
attention = dot([decoder, encoder_outputs], axes=[2, 2])
attention = Activation('softmax')(attention)
context = dot([attention, encoder_outputs], axes=[2,1])
decoder_combined_context = concatenate([context, decoder])
decoder_dense1=TimeDistributed(Dense(latent_dim, activation="tanh"))
output = decoder_dense1(decoder_combined_context)
decoder_dense2=TimeDistributed(Dense(vocab_size, activation="softmax"))
output = decoder_dense2(output)
model = Model([encoder_inputs, decoder_inputs], output)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy',metrics=['accuracy'])
history= model.fit([encoder_input_data, decoder_input_data], decoder_target_data,epochs=epochs,batch_size=batch_size,validation_split=0.2)
Inference Part:
encoder_model = Model(encoder_inputs, encoder_outputs)
state1 = Input(shape=(neurons,))
state2= Input(shape=(neurons,))
decoder_states_inputs = [state1, state2]
output_states=Input(tensor=encoder_outputs)
decoder_outputs, state_h, state_c = decoder_lstm(word_embedding_answer, initial_state=decoder_states_inputs)
decoder_states = [state_h, state_c]
attention = dot([decoder_outputs, output_states], axes=[2, 2])
attention = Activation('softmax')(attention)
context = dot([attention, output_states], axes=[2,1])
decoder_combined_context = concatenate([context, decoder_outputs])
output = decoder_dense1(decoder_combined_context)
output = decoder_dense2(output)
decoder_model = Model([decoder_inputs,output_states]+decoder_states_inputs, [output]+decoder_states )
I receive an error when I call decoder_model.predict:
def decode_sequence(input_seq):
# Encode the input as state vectors.
states_value = encoder_model.predict(input_seq)
states_value1=states_value[:,-1,:]
states_value2=states_value[:,-1,:]
states=[states_value1,states_value2]
# Generate empty target sequence of length 1.
target_seq = np.zeros((1, 1))
# Populate the first character of target sequence with the start character.
target_seq[0,0 ] = target_token_index['\t']
output_tokens, h, c = decoder_model.predict([target_seq,states_value]+ states)