Question

我正在尝试在返回张量时执行注意力机制，但出现以下错误

ValueError: Shape mismatch: The shape of labels (received (64, 53)) should equal the shape of logits except for the last dimension (received (64, 1, 500)).

Please find the below code

这里是需要注意的代码，如果有误，请纠正我

class Attention(tf.keras.layers.Layer):
  def __init__(self):
    super().__init__()



  def call(self,enc_op,hidden_state):
    # print(enc_op.shape,hidden_state.shape)
    query_with_time_axis = tf.expand_dims(hidden_state, 1)
    context_vector = tf.matmul(enc_op,tf.transpose(query_with_time_axis,perm=[0,2,1]))
    context_vector = tf.nn.softmax(context_vector,axis=1)
    context_vector = context_vector * enc_op
    context_vector = tf.reduce_sum(context_vector, axis=1)
    return context_vector

这是解码器部分，我在这里引起注意类解码器（tf.keras.layers.Layer）： def init （自我，vocab_size，embedding_dim，input_length，dec_units）： super（）。初始化（） self.vocab_size = vocab_size self.embedding_dim = embedding_dim self.dec_units = dec_units self.input_length =输入长度 self.attention = Attention（）

  def build(self,input_shape):
    self.embedding = Embedding(input_dim=self.vocab_size,output_dim = self.embedding_dim,input_shape = input_shape,
                               mask_zero = True, name = "embedding_layer_decoder")
    self.lstm = LSTM(self.dec_units,return_sequences=True,return_state=True,name = "Decoder_LSTM")
  
  def call(self,target_sentances,enc_op,hidden_state,cell_state):
    target_embed = self.embedding(target_sentances)   
    for i in range(target_embed.shape[1]):
      context_vector = self.attention(enc_op,hidden_state)
      y = tf.concat([context_vector, target_embed[:,i,:]], axis=-1)
      y = tf.expand_dims(y, 1)
      lstm_output,hidden_state,_ = self.lstm(y,initial_state = [hidden_state,cell_state])
      return lstm_output

class Mymodel(Model):
  def __init__(self,encoder_inputs_length,decoder_inputs_length,output_vocab_size):
    super().__init__()
    self.encoder = Encoder(vocab_size = 500, embedding_dim = 50, input_length = encoder_inputs_length, enc_units=64)
    self.decoder = Decoder(vocab_size = 500, embedding_dim = 50, input_length = decoder_inputs_length, dec_units=64)
    self.dense = Dense(output_vocab_size,activation = "softmax")

  def call(self,data):
    input,output = data[0],data[1]
    print(input.shape,output.shape)
    encoder_output,encoder_h,encoder_c = self.encoder(input)
    print("="*20, "ENCODER", "="*20)
    print("-"*35)
    print(encoder_output)
    print("ENCODER ==> OUTPUT SHAPE",encoder_output.shape)
    print("ENCODER ==> HIDDEN STATE SHAPE",encoder_h.shape)
    print("ENCODER ==> CELL STATE SHAPE", encoder_c.shape)
    print("="*20,"Decoder","="*20)
    decoder_output = self.decoder(output,encoder_output,encoder_h,encoder_c)
    output1 = self.dense(decoder_output)
    print("-"*35)
    print("Final output shape",output.shape)
    print("="*50)
    return output1

model  = Mymodel(encoder_inputs_length=30,decoder_inputs_length=20,output_vocab_size=500)

ENCODER_SEQ_LEN = 30
DECODER_SEQ_LEN = 20

optimizer = tf.keras.optimizers.Adam()
model.compile(optimizer=optimizer,loss=tf.keras.losses.SparseCategoricalCrossentropy())
for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
  model.fit([inp, targ], targ, steps_per_epoch=1)

我的输入和目标的形状是（64、55）（64、53）批量大小为64

标签和logit的形状之间出现形状不匹配错误？

0 个答案: