我正在开发Bi-LSTM模型,并希望为其添加关注层。但是我不知道如何添加它。
我当前的模型代码是
model = Sequential()
model.add(Embedding(max_words, 1152, input_length=max_len, weights=[embeddings]))
model.add(BatchNormalization())
model.add(Activation('tanh'))
model.add(Dropout(0.5))
model.add(Bidirectional(LSTM(32)))
model.add(BatchNormalization())
model.add(Activation('tanh'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
model.summary()
模型摘要是
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_1 (Embedding) (None, 1152, 1152) 278396928
_________________________________________________________________
batch_normalization_1 (Batch (None, 1152, 1152) 4608
_________________________________________________________________
activation_1 (Activation) (None, 1152, 1152) 0
_________________________________________________________________
dropout_1 (Dropout) (None, 1152, 1152) 0
_________________________________________________________________
bidirectional_1 (Bidirection (None, 64) 303360
_________________________________________________________________
batch_normalization_2 (Batch (None, 64) 256
_________________________________________________________________
activation_2 (Activation) (None, 64) 0
_________________________________________________________________
dropout_2 (Dropout) (None, 64) 0
_________________________________________________________________
dense_1 (Dense) (None, 1) 65
=================================================================
Total params: 278,705,217
Trainable params: 278,702,785
Non-trainable params: 2,432
答案 0 :(得分:16)
这可能是带有自定义图层的可能的自定义解决方案,该图层可以计算位置/时间维度上的注意力
class Attention(Layer):
def __init__(self, return_sequences=True):
self.return_sequences = return_sequences
super(Attention,self).__init__()
def build(self, input_shape):
self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
initializer="normal")
self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
initializer="zeros")
super(Attention,self).build(input_shape)
def call(self, x):
e = K.tanh(K.dot(x,self.W)+self.b)
a = K.softmax(e, axis=1)
output = x*a
if self.return_sequences:
return output
return K.sum(output, axis=1)
它可以接收3D张量并输出3D张量(return_sequences=True)
或2D张量(return_sequences=False)
。下面是一个虚拟的例子
# dummy data creation
max_len = 100
max_words = 333
emb_dim = 126
n_sample = 5
X = np.random.randint(0,max_words, (n_sample,max_len))
Y = np.random.randint(0,2, n_sample)
与return_sequences=True
model = Sequential()
model.add(Embedding(max_words, emb_dim, input_length=max_len))
model.add(Bidirectional(LSTM(32, return_sequences=True)))
model.add(Attention(return_sequences=True)) # receive 3D and output 3D
model.add(LSTM(32))
model.add(Dense(1, activation='sigmoid'))
model.summary()
model.compile('adam', 'binary_crossentropy')
model.fit(X,Y, epochs=3)
与return_sequences=False
model = Sequential()
model.add(Embedding(max_words, emb_dim, input_length=max_len))
model.add(Bidirectional(LSTM(32, return_sequences=True)))
model.add(Attention(return_sequences=False)) # receive 3D and output 2D
model.add(Dense(1, activation='sigmoid'))
model.summary()
model.compile('adam', 'binary_crossentropy')
model.fit(X,Y, epochs=3)
您可以轻松地将其集成到您的网络中
正在运行的笔记本:https://colab.research.google.com/drive/1nyi5FWuAaRS-eypLKwWxQ_sDfv-oKaOs?usp=sharing
答案 1 :(得分:3)
如果有人在外部只使用 Tensorflow 而不是 keras,这就是这样做的方法。
import tensorflow as tf
class Attention(tf.keras.layers.Layer):
def __init__(self, return_sequences=True, name=None, **kwargs):
super(Attention, self).__init__(name=name)
self.return_sequences = return_sequences
super(Attention, self).__init__(**kwargs)
def build(self, input_shape):
self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
initializer="glorot_uniform", trainable=True)
self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
initializer="glorot_uniform", trainable=True)
super(Attention, self).build(input_shape)
def call(self, x):
e = tf.keras.activations.tanh(tf.keras.backend.dot(x, self.W) + self.b)
a = tf.keras.activations.softmax(e, axis=1)
output = x * a
if self.return_sequences:
return a, output
return a, tf.keras.backend.sum(output, axis=1)
def get_config(self):
config = super().get_config().copy()
config.update({
'return_sequences': self.return_sequences
})
return config