Keras-自动编码器中的解码编码功能与根据自动编码器计算预测不同

时间:2019-11-04 12:05:20

标签: python tensorflow machine-learning keras autoencoder

这篇文章是指here开发的方法。

我有一个1D卷积自动编码器,可以对1500个时间序列的训练样本进行训练,每个样本具有2001个时间分量

# network parameters
Conv_1 = 256
Conv_2 = 32
Dense_1 = 200
Dense_2 = 100

def reset_seeds():
    np.random.seed(1)
    random.seed(2)
    if tf.__version__[0] == '2':
        tf.random.set_seed(3)
    else:
        tf.set_random_seed(3)
    print("RANDOM SEEDS RESET")

number_of_time_components = np.shape(X_train)[1] # 2001

def Encoder():
    encoder_input = Input(batch_shape=(None, number_of_time_components, 1))  
    e1 = Conv1D(Conv_1,3, activation='tanh', padding='valid', name='')(encoder_input)
    e2 = MaxPooling1D(2)(e1)
    e3 = Conv1D(Conv_2,3, activation='tanh', padding='valid')(e2)
    e4 = MaxPooling1D(2)(e3)
    e5 = Flatten()(e4)
    encoded = Dense(Dense_1,activation = 'tanh')(e5)
    encoded = Dense(Dense_2,activation = 'tanh')(encoded)
    return Model(encoder_input, encoded)

def DecoderAE(encoder_input, encoded_input):
    encoded_i = Dense(Dense_1,activation = 'tanh', name='enc_i')(encoded_input)  
    encoded_reshaped = Reshape((Dense_1,1), name='resh_1')(encoded_i)
    d1 = Conv1D(Conv_2, 3, activation='tanh', padding='valid',
                       name='decod_conv1d_1')(encoded_reshaped)
    d2 = UpSampling1D(2, name='decod_upsampling1d_1')(d1)
    d3 = Conv1D(Conv_1, 3, activation='tanh', padding='valid', name='decod_conv1d_2')(d2)
    d4 = UpSampling1D(2, name='decod_upsampling1d_2')(d3)
    d5 = Flatten(name='decod_flatten')(d4)
    d6 = Dense(number_of_time_components, name='decod_dense1')(d5)
    decoded = Reshape((number_of_time_components,1), name='decod_reshape')(d6)
    return Model(encoder_input, decoded)

reset_seeds()
encoder = Encoder()
AE = DecoderAE(encoder.input, encoder.output)
AE.compile(optimizer='adam', loss='mse')

# train the model
history = AE.fit(x = X_train, y = X_train,
                    epochs=20,
                    batch_size=100,
                    validation_split=validation_split)

# predictions testing
predictions = AE.predict(X_test)
predictions = np.squeeze(predictions)
np.savetxt('PREDICTIONS/predictions_testing.txt', predictions)


# Encode testing features
features = encoder.predict(X_test)
features = np.squeeze(features) 
np.savetxt('FEATURES/encoded_features_testing.txt', features)

# save model
AE.save_weights('SAVED_MODELS/autoencoder_weights.h5')

因此,我保存了自动编码器权重。我还保存了预测  我的测试数据集的编码特征。目视检查显示自动编码器在测试数据集上的性能良好:predictions与测试数据集X_test中的原始时间序列非常吻合

然后我关闭此文件并打开另一个文件:

2个文件

def reset_seeds():
    np.random.seed(1)
    random.seed(2)
    if tf.__version__[0] == '2':
        tf.random.set_seed(3)
    else:
        tf.set_random_seed(3)
    print("RANDOM SEEDS RESET")

def Decoder():
    encoded_input = Input(shape=(Dense_2,))
    encoded_i = Dense(Dense_1,activation = 'tanh', name='enc_i')(encoded_input)  
    encoded_reshaped = Reshape((Dense_1,1), name='resh_1')(encoded_i)
    d1 = Conv1D(Conv_2, 3, activation='tanh', padding='valid', name='decod_conv1d_1')(encoded_reshaped)
    d2 = UpSampling1D(2, name='decod_upsampling1d_1')(d1)
    d3 = Conv1D(Conv_1, 3, activation='tanh', padding='valid', name='decod_conv1d_2')(d2)
    d4 = UpSampling1D(2, name='decod_upsampling1d_2')(d3)
    d5 = Flatten(name='decod_flatten')(d4)
    d6 = Dense(number_of_time_components, name='decod_dense1')(d5)
    decoded = Reshape((number_of_time_components,1), name='decod_reshape')(d6)
    return Model(encoded_input, decoded)


def load_weights(model, filepath):
    with h5py.File(filepath, mode='r') as f:
        file_layer_names = [n.decode('utf8') for n in f.attrs['layer_names']]
        model_layer_names = [layer.name for layer in model.layers]

        weight_values_to_load = []
        for name in file_layer_names:
            if name not in model_layer_names:
                print(name, "is ignored; skipping")
                continue
            g = f[name]
            weight_names = [n.decode('utf8') for n in g.attrs['weight_names']]

            weight_values = []
            if len(weight_names) != 0:
                weight_values = [g[weight_name] for weight_name in weight_names]
            try:
                layer = model.get_layer(name=name)
            except:
                layer = None
            if layer is not None:
                symbolic_weights = (layer.trainable_weights + 
                                    layer.non_trainable_weights)
                if len(symbolic_weights) != len(weight_values):
                    print('Model & file weights shapes mismatch')
                else:
                    weight_values_to_load += zip(symbolic_weights, weight_values)

        K.batch_set_value(weight_values_to_load)

### load DECODER
reset_seeds()
decoder = Decoder()
load_weights(decoder, 'SAVED_MODELS/autoencoder_weights.h5'.format(filedescriptor))
decoder_loaded_weights = decoder.get_weights()

### load encoded testing FEATURES
features = np.loadtxt('FEATURES/features_testing.txt')
decompressed = decoder.predict(features)
decompressed = np.squeeze(decompressed)

### load predictions of Autoencoder (calculated in File 1)
predictions = np.loadtxt('PREDICTIONS/predictions_testing.txt')

# comparison between decoded features, and previously saved predictions of the Autoencoder 
fig, ax = plt.subplots(5, figsize=(10,20))
for i in range(5):
        ax[i].plot(X_test[100*i], color='blue', label='Original') 
        ax[i].plot(decompressed[100*i], color='red', label='Decoded features')
        ax[i].plot(predictions[100*i], color='black', label='Predictions of Autoencoder')ax[i].set_xlabel('Time components', fontsize='x-large')
        ax[i].set_ylabel('Amplitude', fontsize='x-large')
        ax[i].set_title('Testing sample n. {:}'.format(100*i+1), fontsize='x-large')
        ax[i].legend(fontsize='x-large')
plt.subplots_adjust(hspace=1)
plt.show()
plt.close()
我读了以前训练过的自动编码器的权重,并隔离了解码器部分。我用它来预测先前保存的相同编码功能的输出。这些应该与我以前保存的自动编码器的预测完全匹配,现在正在加载以进行比较。

但是,当我绘制原始时间序列,先前已编码的解码特征以及自动编码器的输出时,我发现最后两个之间有很大差异,它们应该完全匹配。在文件1中经过训练的自动编码器效果很好,因为预测与输入(蓝线和黑线)很像。

为什么红线和黑线不同?换句话说,为什么decompressed != predictions

enter image description here

0 个答案:

没有答案