Question

我正在尝试定义tf.keras模型。我得到

ValueError: Output tensors to a Model must be the output of a TensorFlow Layer (thus holding past layer metadata). Found: Tensor(decoder_activation_softmax/truediv:0, shape=(?, ?, 144), dtype=float32)

致电self.model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=[decoder_outputs])

时

我见过几个有相同问题的线程，它们经常使用张量流函数代替tf.keras层，例如Lambda()，Add()等第四层。我已经尝试在适当的地方添加适当的图层，但是仍然无法正常工作。

以下是我更改的型号：

import tensorflow as tf
from tensorflow.python.keras import backend as K
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Multiply, Add, Dense, LSTM, GRU, CuDNNLSTM, Input, Embedding, TimeDistributed, Flatten, Dropout, Lambda, Concatenate
import numpy as np


LATENT_DIM = 256
DROPOUT_RATE = 0.2


class BahdanauAttention(Model):
  def __init__(self, units, name=None):
    super(BahdanauAttention, self).__init__(name=name)
    self.W1 = Dense(units)
    self.W2 = Dense(units)
    self.V = Dense(1)

  def __call__(self, query, values):
    # hidden shape == (batch_size, hidden size)
    # hidden_with_time_axis shape == (batch_size, 1, hiddembeden size)
    # we are doing this to perform addition to calculate the score
    ones_tensor = Lambda(lambda x: K.ones_like(x))(query)
    ones_tensor = ones_tensor[:, 0]
    hidden_with_time_axis = Lambda(lambda x: K.expand_dims(x, axis=1))(ones_tensor)

    # score shape == (batch_size, max_length, hidden_size)
    score = self.V(Dense(1, activation='tanh')(Add()([self.W1(values), self.W2(hidden_with_time_axis)])))

    # attention_weights shape == (batch_size, max_length, 1)
    # we get 1 at the last axis because we are applying score to self.V
    attention_weights = Dense(units=1, activation='softmax')(score)

    # context_vector shape after sum == (batch_size, hidden_size)
    context_vector = Multiply()([attention_weights, values])
    context_vector = Lambda(lambda x: K.sum(x, axis=1))(context_vector)

    return context_vector, attention_weights


class Chatbot():
    def __init__(self):
        ''' Configure the chatbot. '''
        self.num_encoder_tokens = 500
        self.num_decoder_tokens = 500
        self.__build_model()

def __build_model(self):
    ''' Construct the model used to train the chatbot. '''
    encoder_inputs = Input(shape=(None, self.num_encoder_tokens), name='encoder_input')
    encoder_dropout = (TimeDistributed(Dropout(rate=DROPOUT_RATE, name='encoder_dropout')))(encoder_inputs)
    encoder = GRU(LATENT_DIM, return_sequences=True, return_state=True, name='encoder_gru')

    encoder_outputs, encoder_state = encoder(encoder_dropout)

    # Attention mechanism
    attention_layer = BahdanauAttention(LATENT_DIM, name='attention_layer')
    attention_result, attention_weights = attention_layer(encoder_state, encoder_outputs)

    decoder_inputs = Input(shape=(None, self.num_decoder_tokens), name='decoder_input')
    decoder_dropout = (TimeDistributed(Dropout(rate=DROPOUT_RATE, name='decoder_dropout')))(decoder_inputs)

    decoder_gru = GRU(LATENT_DIM, return_sequences=True, return_state=True, name='decoder_gru')
    decoder_outputs, _ = decoder_gru(decoder_dropout, initial_state=encoder_state)

    decoder_outputs = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_weights])

    decoder_dense = Dense(self.num_decoder_tokens, activation='softmax', name='decoder_activation_softmax')
    dense_time = TimeDistributed(decoder_dense, name='time_distributed_layer')
    decoder_outputs = dense_time(decoder_outputs)

    self.model = Model(inputs=[encoder_inputs, decoder_inputs], outputs=[decoder_outputs])
    self.model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

该错误来自BadhanauAttention类的__call__()函数。它不返回Layers，导致在ValueError类中定义模型时抛出Chatbot。我将其识别为添加行decoder_outputs = Concatenate(axis=-1, name='concat_layer')([decoder_outputs, attention_weights])时发生的错误。很明显，注意类中的attention_weights不是层，而是张量。

正如我提到的，我试图重写关注类以使用Layers而不是tensorflow函数。这是原始的类：

class BahdanauAttention(tf.keras.Model):
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    self.W1 = tf.keras.layers.Dense(units)
    self.W2 = tf.keras.layers.Dense(units)
    self.V = tf.keras.layers.Dense(1)

  def call(self, query, values):
    # hidden shape == (batch_size, hidden size)
    # hidden_with_time_axis shape == (batch_size, 1, hidden size)
    # we are doing this to perform addition to calculate the score
    hidden_with_time_axis = tf.expand_dims(query, 1)

    # score shape == (batch_size, max_length, hidden_size)
    score = self.V(tf.nn.tanh(
        self.W1(values) + self.W2(hidden_with_time_axis)))

    # attention_weights shape == (batch_size, max_length, 1)
    # we get 1 at the last axis because we are applying score to self.V
    attention_weights = tf.nn.softmax(score, axis=1)

    # context_vector shape after sum == (batch_size, hidden_size)
    context_vector = attention_weights * values
    context_vector = tf.reduce_sum(context_vector, axis=1)

    return context_vector, attention_weights

该类也可以在这里找到：https://www.tensorflow.org/alpha/tutorials/text/nmt_with_attention

我希望这个问题不要太广泛。谢谢。

tf.keras模型输出错误：要求图层但给定张量

0 个答案: