我正在尝试定义tf.keras
模型。我得到
ValueError: Output tensors to a Model must be the output of a TensorFlow Layer (thus holding past layer metadata). Found: Tensor(decoder_activation_softmax/truediv:0, shape=(?, ?, 144), dtype=float32)
致电self.model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=[decoder_outputs])
我见过几个有相同问题的线程,它们经常使用张量流函数代替tf.keras
层,例如Lambda()
,Add()
等第四层。我已经尝试在适当的地方添加适当的图层,但是仍然无法正常工作。
以下是我更改的型号:
import tensorflow as tf
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Multiply, Add, Dense, LSTM, GRU, CuDNNLSTM, Input, Embedding, TimeDistributed, Flatten, Dropout, Lambda, Concatenate
import numpy as np
LATENT_DIM = 256
DROPOUT_RATE = 0.2
class BahdanauAttention(Model):
def __init__(self, units, name=None):
super(BahdanauAttention, self).__init__(name=name)
self.W1 = Dense(units)
self.W2 = Dense(units)
self.V = Dense(1)
def __call__(self, query, values):
# hidden shape == (batch_size, hidden size)
# hidden_with_time_axis shape == (batch_size, 1, hiddembeden size)
# we are doing this to perform addition to calculate the score
ones_tensor = Lambda(lambda x: K.ones_like(x))(query)
ones_tensor = ones_tensor[:, 0]
hidden_with_time_axis = Lambda(lambda x: K.expand_dims(x, axis=1))(ones_tensor)
# score shape == (batch_size, max_length, hidden_size)
score = self.V(Dense(1, activation='tanh')(Add()([self.W1(values), self.W2(hidden_with_time_axis)])))
# attention_weights shape == (batch_size, max_length, 1)
# we get 1 at the last axis because we are applying score to self.V
attention_weights = Dense(units=1, activation='softmax')(score)
# context_vector shape after sum == (batch_size, hidden_size)
context_vector = Multiply()([attention_weights, values])
context_vector = Lambda(lambda x: K.sum(x, axis=1))(context_vector)
return context_vector, attention_weights
class Chatbot():
def __init__(self):
''' Configure the chatbot. '''
self.num_encoder_tokens = 500
self.num_decoder_tokens = 500
self.__build_model()
def __build_model(self):
''' Construct the model used to train the chatbot. '''
encoder_inputs = Input(shape=(None, self.num_encoder_tokens), name='encoder_input')
encoder_dropout = (TimeDistributed(Dropout(rate=DROPOUT_RATE, name='encoder_dropout')))(encoder_inputs)
encoder = GRU(LATENT_DIM, return_sequences=True, return_state=True, name='encoder_gru')
encoder_outputs, encoder_state = encoder(encoder_dropout)
# Attention mechanism
attention_layer = BahdanauAttention(LATENT_DIM, name='attention_layer')
attention_result, attention_weights = attention_layer(encoder_state, encoder_outputs)
decoder_inputs = Input(shape=(None, self.num_decoder_tokens), name='decoder_input')
decoder_dropout = (TimeDistributed(Dropout(rate=DROPOUT_RATE, name='decoder_dropout')))(decoder_inputs)
decoder_gru = GRU(LATENT_DIM, return_sequences=True, return_state=True, name='decoder_gru')
decoder_outputs, _ = decoder_gru(decoder_dropout, initial_state=encoder_state)
decoder_outputs = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_weights])
decoder_dense = Dense(self.num_decoder_tokens, activation='softmax', name='decoder_activation_softmax')
dense_time = TimeDistributed(decoder_dense, name='time_distributed_layer')
decoder_outputs = dense_time(decoder_outputs)
self.model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=[decoder_outputs])
self.model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
该错误来自BadhanauAttention
类的__call__()
函数。它不返回Layers,导致在ValueError
类中定义模型时抛出Chatbot
。我将其识别为添加行decoder_outputs = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_weights])
时发生的错误。很明显,注意类中的attention_weights
不是层,而是张量。
正如我提到的,我试图重写关注类以使用Layers而不是tensorflow函数。这是原始的类:
class BahdanauAttention(tf.keras.Model):
def __init__(self, units):
super(BahdanauAttention, self).__init__()
self.W1 = tf.keras.layers.Dense(units)
self.W2 = tf.keras.layers.Dense(units)
self.V = tf.keras.layers.Dense(1)
def call(self, query, values):
# hidden shape == (batch_size, hidden size)
# hidden_with_time_axis shape == (batch_size, 1, hidden size)
# we are doing this to perform addition to calculate the score
hidden_with_time_axis = tf.expand_dims(query, 1)
# score shape == (batch_size, max_length, hidden_size)
score = self.V(tf.nn.tanh(
self.W1(values) + self.W2(hidden_with_time_axis)))
# attention_weights shape == (batch_size, max_length, 1)
# we get 1 at the last axis because we are applying score to self.V
attention_weights = tf.nn.softmax(score, axis=1)
# context_vector shape after sum == (batch_size, hidden_size)
context_vector = attention_weights * values
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector, attention_weights
该类也可以在这里找到:https://www.tensorflow.org/alpha/tutorials/text/nmt_with_attention
我希望这个问题不要太广泛。谢谢。