这篇文章是指here开发的方法。
我有一个1D卷积自动编码器,可以对1500个时间序列的训练样本进行训练,每个样本具有2001个时间分量
# network parameters
Conv_1 = 256
Conv_2 = 32
Dense_1 = 200
Dense_2 = 100
def reset_seeds():
np.random.seed(1)
random.seed(2)
if tf.__version__[0] == '2':
tf.random.set_seed(3)
else:
tf.set_random_seed(3)
print("RANDOM SEEDS RESET")
number_of_time_components = np.shape(X_train)[1] # 2001
def Encoder():
encoder_input = Input(batch_shape=(None, number_of_time_components, 1))
e1 = Conv1D(Conv_1,3, activation='tanh', padding='valid', name='')(encoder_input)
e2 = MaxPooling1D(2)(e1)
e3 = Conv1D(Conv_2,3, activation='tanh', padding='valid')(e2)
e4 = MaxPooling1D(2)(e3)
e5 = Flatten()(e4)
encoded = Dense(Dense_1,activation = 'tanh')(e5)
encoded = Dense(Dense_2,activation = 'tanh')(encoded)
return Model(encoder_input, encoded)
def DecoderAE(encoder_input, encoded_input):
encoded_i = Dense(Dense_1,activation = 'tanh', name='enc_i')(encoded_input)
encoded_reshaped = Reshape((Dense_1,1), name='resh_1')(encoded_i)
d1 = Conv1D(Conv_2, 3, activation='tanh', padding='valid',
name='decod_conv1d_1')(encoded_reshaped)
d2 = UpSampling1D(2, name='decod_upsampling1d_1')(d1)
d3 = Conv1D(Conv_1, 3, activation='tanh', padding='valid', name='decod_conv1d_2')(d2)
d4 = UpSampling1D(2, name='decod_upsampling1d_2')(d3)
d5 = Flatten(name='decod_flatten')(d4)
d6 = Dense(number_of_time_components, name='decod_dense1')(d5)
decoded = Reshape((number_of_time_components,1), name='decod_reshape')(d6)
return Model(encoder_input, decoded)
reset_seeds()
encoder = Encoder()
AE = DecoderAE(encoder.input, encoder.output)
AE.compile(optimizer='adam', loss='mse')
# train the model
history = AE.fit(x = X_train, y = X_train,
epochs=20,
batch_size=100,
validation_split=validation_split)
# predictions testing
predictions = AE.predict(X_test)
predictions = np.squeeze(predictions)
np.savetxt('PREDICTIONS/predictions_testing.txt', predictions)
# Encode testing features
features = encoder.predict(X_test)
features = np.squeeze(features)
np.savetxt('FEATURES/encoded_features_testing.txt', features)
# save model
AE.save_weights('SAVED_MODELS/autoencoder_weights.h5')
因此,我保存了自动编码器权重。我还保存了预测
我的测试数据集的编码特征。目视检查显示自动编码器在测试数据集上的性能良好:predictions
与测试数据集X_test
中的原始时间序列非常吻合
然后我关闭此文件并打开另一个文件:
2个文件
def reset_seeds():
np.random.seed(1)
random.seed(2)
if tf.__version__[0] == '2':
tf.random.set_seed(3)
else:
tf.set_random_seed(3)
print("RANDOM SEEDS RESET")
def Decoder():
encoded_input = Input(shape=(Dense_2,))
encoded_i = Dense(Dense_1,activation = 'tanh', name='enc_i')(encoded_input)
encoded_reshaped = Reshape((Dense_1,1), name='resh_1')(encoded_i)
d1 = Conv1D(Conv_2, 3, activation='tanh', padding='valid', name='decod_conv1d_1')(encoded_reshaped)
d2 = UpSampling1D(2, name='decod_upsampling1d_1')(d1)
d3 = Conv1D(Conv_1, 3, activation='tanh', padding='valid', name='decod_conv1d_2')(d2)
d4 = UpSampling1D(2, name='decod_upsampling1d_2')(d3)
d5 = Flatten(name='decod_flatten')(d4)
d6 = Dense(number_of_time_components, name='decod_dense1')(d5)
decoded = Reshape((number_of_time_components,1), name='decod_reshape')(d6)
return Model(encoded_input, decoded)
def load_weights(model, filepath):
with h5py.File(filepath, mode='r') as f:
file_layer_names = [n.decode('utf8') for n in f.attrs['layer_names']]
model_layer_names = [layer.name for layer in model.layers]
weight_values_to_load = []
for name in file_layer_names:
if name not in model_layer_names:
print(name, "is ignored; skipping")
continue
g = f[name]
weight_names = [n.decode('utf8') for n in g.attrs['weight_names']]
weight_values = []
if len(weight_names) != 0:
weight_values = [g[weight_name] for weight_name in weight_names]
try:
layer = model.get_layer(name=name)
except:
layer = None
if layer is not None:
symbolic_weights = (layer.trainable_weights +
layer.non_trainable_weights)
if len(symbolic_weights) != len(weight_values):
print('Model & file weights shapes mismatch')
else:
weight_values_to_load += zip(symbolic_weights, weight_values)
K.batch_set_value(weight_values_to_load)
### load DECODER
reset_seeds()
decoder = Decoder()
load_weights(decoder, 'SAVED_MODELS/autoencoder_weights.h5'.format(filedescriptor))
decoder_loaded_weights = decoder.get_weights()
### load encoded testing FEATURES
features = np.loadtxt('FEATURES/features_testing.txt')
decompressed = decoder.predict(features)
decompressed = np.squeeze(decompressed)
### load predictions of Autoencoder (calculated in File 1)
predictions = np.loadtxt('PREDICTIONS/predictions_testing.txt')
# comparison between decoded features, and previously saved predictions of the Autoencoder
fig, ax = plt.subplots(5, figsize=(10,20))
for i in range(5):
ax[i].plot(X_test[100*i], color='blue', label='Original')
ax[i].plot(decompressed[100*i], color='red', label='Decoded features')
ax[i].plot(predictions[100*i], color='black', label='Predictions of Autoencoder')ax[i].set_xlabel('Time components', fontsize='x-large')
ax[i].set_ylabel('Amplitude', fontsize='x-large')
ax[i].set_title('Testing sample n. {:}'.format(100*i+1), fontsize='x-large')
ax[i].legend(fontsize='x-large')
plt.subplots_adjust(hspace=1)
plt.show()
plt.close()
我读了以前训练过的自动编码器的权重,并隔离了解码器部分。我用它来预测先前保存的相同编码功能的输出。这些应该与我以前保存的自动编码器的预测完全匹配,现在正在加载以进行比较。
但是,当我绘制原始时间序列,先前已编码的解码特征以及自动编码器的输出时,我发现最后两个之间有很大差异,它们应该完全匹配。在文件1中经过训练的自动编码器效果很好,因为预测与输入(蓝线和黑线)很像。
为什么红线和黑线不同?换句话说,为什么decompressed != predictions
?