在keras中:我发现layer.shape与model.summary()中的layer输出形状不同。可能是什么原因?
我正在尝试嵌入[https://arxiv.org/pdf/1703.03130.pdf]
的结构化自专心句子A.shape
Out[44]: TensorShape([Dimension(None), Dimension(10), Dimension(339)])
vs。 seq_self_attention_4形状(无,339、256)如下:
Model: "model_3"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
lstm_input (InputLayer) (None, 339) 0
__________________________________________________________________________________________________
lstm/embedding (Embedding) (None, 339, 100) 5000000 lstm_input[0][0]
__________________________________________________________________________________________________
bidirectional_4 (Bidirectional) (None, 339, 256) 234496 lstm/embedding[0][0]
__________________________________________________________________________________________________
seq_self_attention_4 (SeqSelfAt (None, 339, 256) 1330 bidirectional_4[0][0]
__________________________________________________________________________________________________
layer_mat_mul_4 (LayerMatMul) [(None, 339, 256), ( 0 seq_self_attention_4[0][0]
bidirectional_4[0][0]
__________________________________________________________________________________________________
flatten_4 (Flatten) (None, 86784) 0 layer_mat_mul_4[0][0]
__________________________________________________________________________________________________
lstm_predictions (Dense) (None, 2) 173570 flatten_4[0][0]
class SeqSelfAttention(keras.layers.Layer):
def __init__(self, da, r, hidden_state):
super(SeqSelfAttention, self).__init__()
self.da = da
self.r = r
self.hidden_state = hidden_state
w_init = tf.random_normal_initializer()
self.W_s1 = tf.Variable(initial_value=w_init(shape=(self.da, 2*self.hidden_state),
dtype='float32'),
trainable=True)
self.W_s2 = tf.Variable(initial_value=w_init(shape=(self.r, self.da),
dtype='float32'),
trainable=True)
def call(self, inputs):
W_s1_H = K.tanh(tf.matmul(self.W_s1, inputs, transpose_b=True))
W_s2_H = tf.matmul(self.W_s2, W_s1_H)
return K.softmax(W_s2_H, axis=1)
da=5
r=10
vocab_size=50000
embedding_size=100
embedding_weights=None
hidden_state=128
input_shape=339
message = Input(shape=(input_shape,), dtype='int32', name='lstm_input')
mask = Lambda(lambda inputs: K.not_equal(inputs, 0))(message)
embedding = make_embedding('lstm', vocab_size, embedding_size, embedding_weights, mask_zero=False)(message)
return_sequences = True
lstm_1 = Bidirectional(LSTM(units=hidden_state, return_sequences=return_sequences))(embedding, mask=mask)
A = SeqSelfAttention(da, r, hidden_state)(lstm_1)
final = Flatten()(m)
preds = Dense(units=2, activation='softmax', name='lstm_predictions')(final)
model = Model(
inputs=[message],
outputs=[preds],
)
model.compile(loss='sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
谢谢。