基本上,我尝试使用RNN层和GRU单元实现Seq2Seq模型。在训练过程中一切似乎都正常(损耗和val_loss减少)
这是我的实现方式
编码器
class Encoder(object):
def __init__(self, num_words, embedding_size, state_size, layers=2, dropout_rate=0.1):
self.encoder_embedding = Embedding(input_dim=num_words,
output_dim=embedding_size,
name='encoder_embedding')
cells = [GRUCell(state_size, dropout=dropout_rate, recurrent_dropout=dropout_rate) for _ in range(layers)]
self.rnn_layer = RNN(cells, return_state=True)
def __call__(self, x):
# Start the neural network with its input-layer.
net = x
# Connect the embedding-layer.
net = self.encoder_embedding(net)
# Connect all the GRU-layers.
net = self.rnn_layer(net)
# This is the output of the encoder.
encoder_output, encoder_state = net[0], net[1:]
return encoder_output, encoder_state
解码器
class Decoder(object):
def __init__(self, num_words, embedding_size, state_size, layers=2, dropout_rate=0.1):
# Different embedding due to different languages
self.embedding = Embedding(input_dim=num_words,
output_dim=embedding_size,
name='decoder_embedding')
cells = [GRUCell(state_size, dropout=dropout_rate, recurrent_dropout=dropout_rate) for _ in range(layers)]
self.rnn_layer = RNN(cells, return_sequences=True)
# self.batch_norm = BatchNormalization()
self.output = Dense(num_words,
activation='softmax',
name='decoder_output')
def __call__(self, x, initial_state):
# Start the decoder-network with its input-layer.
net = x
# Connect the embedding-layer.
net = self.embedding(net)
# Connect all the GRU-layers.
net = self.rnn_layer(net, initial_state=initial_state)
# Connect the final dense layer that converts to
# one-hot encoded arrays.
net = TimeDistributed(self.output)(net)
decoder_output = net
return decoder_output
但是,当我尝试预测样本时:
class Translator(object):
def __init__(self, tokenizer_src, tokenizer_dest, encoder, decoder, start_word="", end_word=""):
self.tokenizer_src = tokenizer_src
self.tokenizer_dest = tokenizer_dest
self.encoder = encoder
self.decoder = decoder
self.start_word = start_word
self.end_word = end_word
def evaluate(self, input_text, delimiter=" "):
token_start = self.tokenizer_dest.word_index[self.start_word.strip()]
token_end = self.tokenizer_dest.word_index[self.end_word.strip()]
# Max number of tokens / words in the output sequence.
max_tokens = self.tokenizer_dest.max_tokens
# Convert the input-text to integer-tokens.
# Note the sequence of tokens has to be reversed.
# Padding is probably not necessary.
input_tokens = self.tokenizer_src.text_to_tokens(text=input_text,
reverse=True,
padding=True)
# Return [state] so we need to remove the outer dimension
initial_state = self.encoder.predict(input_tokens)
token_int = token_start
# Output text
decoded_tokens = []
# Wrap the input-data in a dict for clarity and safety,
# so we are sure we input the data in the right order.
while token_int != token_end and len(decoded_tokens) < max_tokens:
decoder_input_data = np.zeros((1, 1))
decoder_input_data[0, 0] = token_int
state_value = initial_state
x_data = {
'input_1': state_value,
'decoder_input': decoder_input_data
}
# print("At {} - state is {}".format(len(decoded_tokens), state_value))
output_tokens_and_state = self.decoder.predict_on_batch(x_data)
output_tokens, state = output_tokens_and_state[0], output_tokens_and_state[1:]
# Get the last predicted token as a one-hot encoded array.
token_int = np.argmax(output_tokens[0, -1, :])
sampled_word = self.tokenizer_dest.token_to_word(token_int)
decoded_tokens.append(sampled_word)
state_value = state
return delimiter.join(decoded_tokens[:-1])
Keras弹出错误消息,告诉我我需要我的encoder_input输入值:
回溯(最近通话最近): 在第31行的文件“ inference.py”中 结果= translator.evaluate(输入) 评估中的文件“ C:\ A_PERSONAL \ Github \ Machine Translation \ Classes \ Translator.py”,第136行 output_tokens_and_state = self.decoder.predict_on_batch([decoder_input_data,state_value]) 文件“ C:\ Users \ quang \ Anaconda3 \ envs \ car \ lib \ site-packages \ keras \ engine \ training.py”,行1274,位于precise_on_batch中 输出= self.predict_function(ins) 在调用中的文件“ C:\ Users \ quang \ Anaconda3 \ envs \ car \ lib \ site-packages \ keras \ backend \ tensorflow_backend.py”,第2715行 返回self._call(输入) _call中的文件“ C:\ Users \ quang \ Anaconda3 \ envs \ car \ lib \ site-packages \ keras \ backend \ tensorflow_backend.py”,行2675 获取= self._callable_fn(* array_vals) 在调用中的文件“ C:\ Users \ quang \ Anaconda3 \ envs \ car \ lib \ site-packages \ tensorflow \ python \ client \ session.py”,行1454 self._session._session,self._handle,参数,状态,无) 退出中的文件“ C:\ Users \ quang \ Anaconda3 \ envs \ car \ lib \ site-packages \ tensorflow \ python \ framework \ errors_impl.py”,第519行 c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError:您必须使用dtype float和shape [?,?]输入占位符张量'encoder_input'的值 [[节点:编码器输入=占位符类型= DT_FLOAT,形状= [?,?],_ device =“ / job:localhost /副本:0 / task:0 / device:GPU:0”]] [[节点:time_distributed_2 / Reshape_1 / _41 = _Recvclient_terminated = false,recv_device =“ / job:localhost /副本:0 / task:0 /设备:CPU:0”,send_device =“ / job:localhost /副本:0 / task :0 / device:GPU:0“,send_device_incarnation = 1,tensor_name =” edge_591_time_distributed_2 / Reshape_1“,tensor_type = DT_FLOAT,_device =” / job:localhost /副本:0 / task:0 / device:CPU:0“]]]