model.add(Masking(mask_value=0.0, input_shape=(max_len, 8)))
model.add(LSTM(100, activation='relu'))
model.add(LSTM(8, activation='relu', return_sequences=True))
model.compile(optimizer='adam', loss='mse')
model.fit(x_train, x_train, batch_size=32, callbacks=[chk], epochs=1000, validation_split=0.05, shuffle=True)
答案 0 :(得分:1)
Keras中的每个层都有一个input_mask和output_mask,在示例中,在第一个LSTM层之后(当return_sequence = False时),该掩码已经丢失。让我在下面的示例中对此进行解释,并展示两种在LSTM自动编码器中实现屏蔽的解决方案。
time_steps = 3
n_features = 2
input_layer = tfkl.Input(shape=(time_steps, n_features))
# I want to mask the timestep where all the feature values are 1 (usually we pad by 0)
x = tfk.layers.Masking(mask_value=1)(input_layer)
x = tfkl.LSTM(2, return_sequences=True)(x)
x = tfkl.LSTM(2, return_sequences=False)(x)
x = tfkl.RepeatVector(time_steps)(x)
x = tfkl.LSTM(2, return_sequences=True)(x)
x = tfkl.LSTM(2, return_sequences=True)(x)
x = tfk.layers.Dense(n_features)(x)
lstm_ae = tfk.models.Model(inputs=input_layer, outputs=x)
lstm_ae.compile(optimizer='adam', loss='mse')
Model: "model_2"
Layer (type) Output Shape Param #
input_3 (InputLayer) [(None, 3, 2)] 0
masking_2 (Masking) (None, 3, 2) 0
lstm_8 (LSTM) (None, 3, 2) 40
lstm_9 (LSTM) (None, 2) 40
repeat_vector_2 (RepeatVecto (None, 3, 2) 0
lstm_10 (LSTM) (None, 3, 2) 40
lstm_11 (LSTM) (None, 3, 2) 40
dense_2 (Dense) (None, 3, 2) 6
Total params: 166
Trainable params: 166
Non-trainable params: 0
for i, l in enumerate(lstm_ae.layers):
print(f'layer {i}: {l}')
print(f'has input mask: {l.input_mask}')
print(f'has output mask: {l.output_mask}')
layer 0: <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x645b49cf8>
has input mask: None
has output mask: None
layer 1: <tensorflow.python.keras.layers.core.Masking object at 0x645b49c88>
has input mask: None
has output mask: Tensor("masking_2/Identity_1:0", shape=(None, 3), dtype=bool)
layer 2: <tensorflow.python.keras.layers.recurrent_v2.LSTM object at 0x645b4d0b8>
has input mask: Tensor("masking_2/Identity_1:0", shape=(None, 3), dtype=bool)
has output mask: Tensor("masking_2/Identity_1:0", shape=(None, 3), dtype=bool)
layer 3: <tensorflow.python.keras.layers.recurrent_v2.LSTM object at 0x645b4dba8>
has input mask: Tensor("masking_2/Identity_1:0", shape=(None, 3), dtype=bool)
has output mask: None
layer 4: <tensorflow.python.keras.layers.core.RepeatVector object at 0x645db0390>
has input mask: None
has output mask: None
layer 5: <tensorflow.python.keras.layers.recurrent_v2.LSTM object at 0x6470b5da0>
has input mask: None
has output mask: None
layer 6: <tensorflow.python.keras.layers.recurrent_v2.LSTM object at 0x6471410f0>
has input mask: None
has output mask: None
layer 7: <tensorflow.python.keras.layers.core.Dense object at 0x647dfdf60>
has input mask: None
has output mask: None
如您在上面看到的,第二个LSTM层(return_sequence = False)返回None,这是有道理的,因为时间步丢失了(形状改变了)并且该层不知道如何通过遮罩,您可以还要检查源代码,如果return_sequence = True,则将看到它返回input_mask,否则返回None。当然,另一个问题是RepeatVector层,该层根本不支持显式遮罩,这又是因为形状已更改。除了这个瓶颈部分(第二个LSTM + RepeatVector)之外,模型的其他部分都可以通过掩码,因此我们只需要处理瓶颈部分。
# last timestep should be masked because all feature values are 1
x = np.array([1, 2, 1, 2, 1, 1], dtype='float32').reshape(1, 3, 2)
array([[[1., 2.],
[1., 2.],
[1., 1.]]], dtype=float32)
y = lstm_ae.predict(x)
array([[[0.00020542, 0.00011909],
[0.0007361 , 0.00047323],
[0.00158514, 0.00107504]]], dtype=float32)
# the expected loss should be the sum of square error between the first 2 timesteps
# (2 features each timestep) divided by 6. you might expect that this should be
# divided by 4, but in the source code this is actually divided by 6, which doesn't
# matter a lot because only the gradient of loss matter, but not the loss itself.
expected_loss = np.square(x[:, :2, :] - y[:, :2, :]).sum()/6
actual_loss_with_masking = lstm_ae.evaluate(x=x, y=x)
# the actual loss still includes the last timestep, which means the masking is not # effectively passed to the output layer for calculating the loss
# if we provide the sample_weight 0 for each timestep that we want to mask, the
# loss will be ignored correctly
lstm_ae.compile(optimizer='adam', loss='mse', sample_weight_mode='temporal')
sample_weight_array = np.array([1, 1, 0]).reshape(1, 3) # it means to ignore the last timestep
actual_loss_with_sample_weight = lstm_ae.evaluate(x=x, y=x, sample_weight=sample_weight_array)
# the actual loss now is correct
class lstm_bottleneck(tf.keras.layers.Layer):
def __init__(self, lstm_units, time_steps, **kwargs):
self.lstm_units = lstm_units
self.time_steps = time_steps
self.lstm_layer = tfkl.LSTM(lstm_units, return_sequences=False)
self.repeat_layer = tfkl.RepeatVector(time_steps)
super(lstm_bottleneck, self).__init__(**kwargs)
def call(self, inputs):
# just call the two initialized layers
return self.repeat_layer(self.lstm_layer(inputs))
def compute_mask(self, inputs, mask=None):
# return the input_mask directly
return mask
time_steps = 3
n_features = 2
input_layer = tfkl.Input(shape=(time_steps, n_features))
# I want to mask the timestep where all the feature values are 1 (usually we pad by 0)
x = tfk.layers.Masking(mask_value=1)(input_layer)
x = tfkl.LSTM(2, return_sequences=True)(x)
x = lstm_bottleneck(lstm_units=2, time_steps=3)(x)
# x = tfkl.LSTM(2, return_sequences=False)(x)
# x = tfkl.RepeatVector(time_steps)(x)
x = tfkl.LSTM(2, return_sequences=True)(x)
x = tfkl.LSTM(2, return_sequences=True)(x)
x = tfk.layers.Dense(n_features)(x)
lstm_ae = tfk.models.Model(inputs=input_layer, outputs=x)
lstm_ae.compile(optimizer='adam', loss='mse')
Model: "model_2"
Layer (type) Output Shape Param #
input_3 (InputLayer) [(None, 3, 2)] 0
masking_2 (Masking) (None, 3, 2) 0
lstm_10 (LSTM) (None, 3, 2) 40
lstm_bottleneck_3 (lstm_bott (None, 3, 2) 40
lstm_12 (LSTM) (None, 3, 2) 40
lstm_13 (LSTM) (None, 3, 2) 40
dense_2 (Dense) (None, 3, 2) 6
Total params: 166
Trainable params: 166
Non-trainable params: 0
for i, l in enumerate(lstm_ae.layers):
print(f'layer {i}: {l}')
print(f'has input mask: {l.input_mask}')
print(f'has output mask: {l.output_mask}')
layer 0: <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x64dbf98d0>
has input mask: None
has output mask: None
layer 1: <tensorflow.python.keras.layers.core.Masking object at 0x64dbf9f60>
has input mask: None
has output mask: Tensor("masking_2/Identity_1:0", shape=(None, 3), dtype=bool)
layer 2: <tensorflow.python.keras.layers.recurrent_v2.LSTM object at 0x64dbf9550>
has input mask: Tensor("masking_2/Identity_1:0", shape=(None, 3), dtype=bool)
has output mask: Tensor("masking_2/Identity_1:0", shape=(None, 3), dtype=bool)
layer 3: <__main__.lstm_bottleneck object at 0x64dbf91d0>
has input mask: Tensor("masking_2/Identity_1:0", shape=(None, 3), dtype=bool)
has output mask: Tensor("masking_2/Identity_1:0", shape=(None, 3), dtype=bool)
layer 4: <tensorflow.python.keras.layers.recurrent_v2.LSTM object at 0x64e04ca20>
has input mask: Tensor("masking_2/Identity_1:0", shape=(None, 3), dtype=bool)
has output mask: Tensor("masking_2/Identity_1:0", shape=(None, 3), dtype=bool)
layer 5: <tensorflow.python.keras.layers.recurrent_v2.LSTM object at 0x64eeb8b00>
has input mask: Tensor("masking_2/Identity_1:0", shape=(None, 3), dtype=bool)
has output mask: Tensor("masking_2/Identity_1:0", shape=(None, 3), dtype=bool)
layer 6: <tensorflow.python.keras.layers.core.Dense object at 0x64ef43208>
has input mask: Tensor("masking_2/Identity_1:0", shape=(None, 3), dtype=bool)
has output mask: Tensor("masking_2/Identity_1:0", shape=(None, 3), dtype=bool)
# last timestep should be masked because all feature values are 1
x = np.array([1, 2, 1, 2, 1, 1], dtype='float32').reshape(1, 3, 2)
array([[[1., 2.],
[1., 2.],
[1., 1.]]], dtype=float32)
y = lstm_ae.predict(x)
array([[[ 0.00065455, -0.00294413],
[ 0.00166675, -0.00742249],
[ 0.00166675, -0.00742249]]], dtype=float32)
# the expected loss should be the square error between the first 2 timesteps divided by 6
expected_loss = np.square(x[:, :2, :] - y[:, :2, :]).sum()/6
# now the loss is correct with a custom layer
actual_loss_with_masking = lstm_ae.evaluate(x=x, y=x)