因此,我目前正在使用3隐藏层在Keras中实现堆叠式自动编码器。如果我做对了,那么验证错误似乎还可以,大约25个纪元之后,我可以达到MNIST数据集(手写数字)分类的96.5%的准确度。但是,当我尝试测试集时,我最终得到了大约85%的收益,我确实希望测试误差会更高一些,但是两者之间几乎相差12%,据我所知, MNIST数据集应该是更易于使用的数据集之一。因此,我想知道我的代码是否有问题,或者实际上是这样吗?
#from __future__ import print_function
from keras.layers import Input, Dense, initializers
import numpy as np
from Dataset import Dataset
import matplotlib.pyplot as plt
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras.layers import Dense, Activation
import tensorflow as tf
from keras import backend as K
import time
from keras.utils import to_categorical
#global variables
d = Dataset()
num_features = d.X_train.shape[1]
#input = [784, 400, 100, 10, 100, 400]
#output = [400, 100, 10, 100, 400, 784]
names = ['hidden1', 'hidden2', 'hidden3', 'hidden4', 'hidden5', 'hidden6']
list_of_nodes = [784, 400, 144, 10]
def generate_hidden_nodes(list_of_nodes):
input = []
for j in range(len(list_of_nodes)):
input.append(list_of_nodes[j])
for i in range(len(list_of_nodes)-2):
input.append(list_of_nodes[-2-i])
output = input[::-1]
return input, output
input,output = generate_hidden_nodes(list_of_nodes)
def autoencoder(epochs):
w = initializers.RandomNormal(mean=0.0, stddev=0.05, seed=None)
model = Sequential()
input, output = generate_hidden_nodes(list_of_nodes)
for j in range(len(input)):
if j == (len(input)-1):
model.add(Dense(output[j], activation='sigmoid', kernel_initializer=w, input_dim=input[j], name=names[j]))
#model.add(Dropout(0.45))
else:
model.add(Dense(output[j], activation='relu', kernel_initializer=w, input_dim=input[j],
name = names[j]))
#model.add(Dropout(0.45))
model.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['acc'])
history = model.fit(d.X_train, d.X_train,
epochs=epochs,
batch_size=50,
shuffle=True,
validation_split = 0.2)
#validation_data=(d.X_test, d.X_test))
#print(history.history.keys())
#plt.plot(history.history['val_acc'])
#print(history.history['val_acc'])
#plt.show()
get_1st_layer_output = K.function([model.layers[0].input],
[model.layers[5].output])
layer_output = get_1st_layer_output([d.X_train])[0]
#creating one more autoencoder for supervised learning
classifier = Sequential()
classifier.add(Dense(10, activation='sigmoid', input_dim=784, name='hej'))
classifier.compile(optimizer=Adam(lr=0.001), loss='binary_crossentropy', metrics=['acc'])
y_train = to_categorical(d.Y_train, num_classes=10)
#print(layer_output.shape)
#print(d.X_train.shape)
history2 = classifier.fit(layer_output, y_train,
epochs=epochs,
batch_size=50,
shuffle=True,
validation_split=0.2)
predictions = classifier.predict(d.X_test)
pred = np.argmax(predictions, axis=1)
storage = []
plt.plot(history.history['val_acc'])
print(history2.history['val_acc'])
for j in range(d.Y_train.shape[0]):
if pred[j] == d.Y_train[j]:
storage.append(1)
return np.sum(storage)/d.Y_test.shape[0]
print(autoencoder(20))