Question

我一直在使用BiLSTM对句子中的每个单词进行分类，我的输入是n_sentences，max_sequence_length，classs。最近，我一直在尝试使用以下关注层：https://www.kaggle.com/takuok/bidirectional-lstm-and-attention-lb-0-043

class Attention(Layer):
def __init__(self, step_dim,
             W_regularizer=None, b_regularizer=None,
             W_constraint=None, b_constraint=None,
             bias=True, **kwargs):
    self.supports_masking = True
    self.init = initializers.get('glorot_uniform')

    self.W_regularizer = regularizers.get(W_regularizer)
    self.b_regularizer = regularizers.get(b_regularizer)

    self.W_constraint = constraints.get(W_constraint)
    self.b_constraint = constraints.get(b_constraint)

    self.bias = bias
    self.step_dim = step_dim
    self.features_dim = 0
    super(Attention, self).__init__(**kwargs)

def build(self, input_shape):
    assert len(input_shape) == 3

    self.W = self.add_weight((input_shape[-1],),
                             initializer=self.init,
                             name='{}_W'.format(self.name),
                             regularizer=self.W_regularizer,
                             constraint=self.W_constraint)
    self.features_dim = input_shape[-1]


    if self.bias:
        self.b = self.add_weight((input_shape[1],),
                                 initializer='zero',
                                 name='{}_b'.format(self.name),
                                 regularizer=self.b_regularizer,
                                 constraint=self.b_constraint)
    else:
        self.b = None

    self.built = True

def compute_mask(self, input, input_mask=None):
    return None

def call(self, x, mask=None):
    features_dim = self.features_dim
    step_dim = self.step_dim

    eij = K.reshape(K.dot(K.reshape(x, (-1, features_dim)),
            K.reshape(self.W, (features_dim, 1))), (-1, step_dim))

    if self.bias:
        eij += self.b

    eij = K.tanh(eij)

    a = K.exp(eij)

    if mask is not None:
        a *= K.cast(mask, K.floatx())

    a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

    a = K.expand_dims(a)
    weighted_input = x * a
    return K.sum(weighted_input, axis=1)

def compute_output_shape(self, input_shape):
    return input_shape[0], self.features_dim

我的输出必须是（示例，步骤，功能），否则我会得到

ValueError: Error when checking target: expected dense_2 to have 2 dimensions, but got array with shape (656, 109, 2)

所以我切换了：

 return input_shape[0], self.features_dim

到

 return input_shape[0], self.step_dim, self.features_dim

这样做是另一个错误：

InvalidArgumentError: Incompatible shapes: [32,109] vs. [32]
 [[{{node metrics/acc/Equal}}]]

要在句子上实际使用注意力层，我需要修改什么？

Answer 1

您使用SeqSelfAttention吗？

我遇到了同样的问题，我使用了SeqWeightedAttention而不是SeqSelfAttention-它解决了我的问题。

model.add(SeqWeightedAttention())

注意层输出形状问题

1 个答案: