在此代码中,wq是随机创建的大小为d_model的权重矩阵
class MultiHeadAttention(tf.keras.layers.Layer):
def __init__(self, d_model, num_head):
self.wq = tf.keras.layers.Dense(d_model)
self.wk = tf.keras.layers.Dense(d_model)
self.wv = tf.keras.layers.Dense(d_model)
self.dense = tf.keras.layers.Dense(d_model)
def call(self, v, k, q, mask):
batch_size = tf.shape(q)[0]
q = self.wq(q)
最后一行发生了什么