所以我正在尝试使用堆叠式自动编码器构建一个深度网络,用于使用MNIST数据集训练网络。我首先预先训练模型(逐层)并做一个正常的backprop进行微调。问题在于,当我对随机初始化的权重进行正常的反向提升时,我得到了非常高的纪元损失,但仍然准确度为~96%。但是当我对堆叠式自动编码器做同样的事情时,我得到相对较低的纪元损失和低准确性。我在这里附上完整的源代码和输出。
test2.py
from deepautoencoder import StackedAutoEncoder
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import tensorflow as tf
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
data, target = mnist.train.images, mnist.train.labels
n_nodes_hl1=1000
n_nodes_hl2=750
n_nodes_hl3=500
n_classes=10
batch_size=100
def nueral_network_model(data,X, Y):
hidden_1_layer={'weights':tf.Variable(tf.pack(X[0])),
'biases':tf.Variable(tf.pack(Y[0]))}
hidden_2_layer={'weights':tf.Variable(tf.pack(X[1])),
'biases':tf.Variable(tf.pack(Y[1]))}
hidden_3_layer={'weights':tf.Variable(tf.pack(X[2])),
'biases':tf.Variable(tf.pack(Y[2]))}
output_layer={'weights':tf.Variable(tf.pack(X[3])),
'biases':tf.Variable(tf.pack(Y[3]))}
l1=tf.add(tf.matmul(data, hidden_1_layer['weights']),hidden_1_layer['biases'])
l1=tf.nn.relu(l1)
l2=tf.add(tf.matmul(l1, hidden_2_layer['weights']),hidden_2_layer['biases'])
l2=tf.nn.relu(l2)
l3=tf.add(tf.matmul(l2, hidden_3_layer['weights']),hidden_3_layer['biases'])
l3=tf.nn.relu(l3)
output=tf.add(tf.matmul(l3, output_layer['weights']),output_layer['biases'])
return output
def train_neural_network(x, X, Y):
prediction=nueral_network_model(x,X, Y)
cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(prediction,y))
optimizer=tf.train.AdamOptimizer().minimize(cost)
n_epochs=10
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
for epoch in range(n_epochs):
epoch_loss=0
for _ in range(int(mnist.train.num_examples/batch_size)):
epoch_x,epoch_y=mnist.train.next_batch(batch_size)
_,c=sess.run([optimizer, cost], feed_dict={x:epoch_x,y:epoch_y})
epoch_loss+=c
print('Epoch ', (epoch+1), ' completed of ', n_epochs,'. Loss ', epoch_loss)
correct=tf.equal(tf.argmax(prediction, 1), tf.argmax(y,1))
accuracy=tf.reduce_mean(tf.cast(correct, 'float'))
print('Accuracy ', accuracy.eval({x:mnist.test.images, y:mnist.test.labels}))
# train / test split
idx = np.random.rand(data.shape[0]) < 0.8
'''print(idx.format("float32"))
train_X, train_Y = data[idx], target[idx]
test_X, test_Y = data[~idx], target[~idx]'''
train_X, train_Y = mnist.train.images, mnist.train.labels
test_X, test_Y = mnist.test.images, mnist.test.labels
model = StackedAutoEncoder(dims=[n_nodes_hl1, n_nodes_hl2, n_nodes_hl3,n_classes], activations=['relu', 'relu', 'relu', 'relu'], epoch=[
3000, 1000, 1000,1000], loss='rmse', lr=0.007, batch_size=100, print_step=200)
model.fit(train_X)
#weights=open("weights.txt","w")
#weights.write(str(len(model.getweights())))
#print(str1, file=weights)
#weights.write(str)
#weights.close()'''
X=model.getweights()
Y=model.getbiases()
x=tf.placeholder("float",[None, 784])
y=tf.placeholder("float")
train_neural_network(x,X, Y)
stacked_autoencoder.py
import numpy as np
import deepautoencoder.utils as utils
import tensorflow as tf
allowed_activations = ['sigmoid', 'tanh', 'softmax', 'relu', 'linear']
allowed_noises = [None, 'gaussian', 'mask']
allowed_losses = ['rmse', 'cross-entropy']
class StackedAutoEncoder:
"""A deep autoencoder with denoising capability"""
def assertions(self):
global allowed_activations, allowed_noises, allowed_losses
assert self.loss in allowed_losses, 'Incorrect loss given'
assert 'list' in str(
type(self.dims)), 'dims must be a list even if there is one layer.'
assert len(self.epoch) == len(
self.dims), "No. of epochs must equal to no. of hidden layers"
assert len(self.activations) == len(
self.dims), "No. of activations must equal to no. of hidden layers"
assert all(
True if x > 0 else False
for x in self.epoch), "No. of epoch must be atleast 1"
assert set(self.activations + allowed_activations) == set(
allowed_activations), "Incorrect activation given."
assert utils.noise_validator(
self.noise, allowed_noises), "Incorrect noise given"
def __init__(self, dims, activations, epoch=1000, noise=None, loss='rmse',
lr=0.001, batch_size=100, print_step=50):
self.print_step = print_step
self.batch_size = batch_size
self.lr = lr
self.loss = loss
self.activations = activations
self.noise = noise
self.epoch = epoch
self.dims = dims
self.assertions()
self.depth = len(dims)
self.weights, self.biases = [], []
def add_noise(self, x):
if self.noise == 'gaussian':
n = np.random.normal(0, 0.1, (len(x), len(x[0])))
return x + n
if 'mask' in self.noise:
frac = float(self.noise.split('-')[1])
temp = np.copy(x)
for i in temp:
n = np.random.choice(len(i), round(
frac * len(i)), replace=False)
i[n] = 0
return temp
if self.noise == 'sp':
pass
def getshape(self, x):
return x.shape
def fit(self, x):
for i in range(self.depth):
print('Layer {0}'.format(i + 1))
if self.noise is None:
x = self.run(data_x=x, activation=self.activations[i],
data_x_=x,
hidden_dim=self.dims[i], epoch=self.epoch[
i], loss=self.loss,
batch_size=self.batch_size, lr=self.lr,
print_step=self.print_step)
else:
temp = np.copy(x)
x = self.run(data_x=self.add_noise(temp),
activation=self.activations[i], data_x_=x,
hidden_dim=self.dims[i],
epoch=self.epoch[
i], loss=self.loss,
batch_size=self.batch_size,
lr=self.lr, print_step=self.print_step)
def getweights(self):
return self.weights
def getbiases(self):
return self.biases
def transform(self, data):
tf.reset_default_graph()
sess = tf.Session()
x = tf.constant(data, dtype=tf.float32)
for w, b, a in zip(self.weights, self.biases, self.activations):
weight = tf.constant(w, dtype=tf.float32)
bias = tf.constant(b, dtype=tf.float32)
layer = tf.matmul(x, weight) + bias
x = self.activate(layer, a)
return x.eval(session=sess)
def fit_transform(self, x):
self.fit(x)
return self.transform(x)
def run(self, data_x, data_x_, hidden_dim, activation, loss, lr,
print_step, epoch, batch_size=100):
tf.reset_default_graph()
input_dim = len(data_x[0])
sess = tf.Session()
x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x')
x_ = tf.placeholder(dtype=tf.float32, shape=[
None, input_dim], name='x_')
encode = {'weights': tf.Variable(tf.truncated_normal(
[input_dim, hidden_dim], dtype=tf.float32)),
'biases': tf.Variable(tf.truncated_normal([hidden_dim],
dtype=tf.float32))}
decode = {'biases': tf.Variable(tf.truncated_normal([input_dim],
dtype=tf.float32)),
'weights': tf.transpose(encode['weights'])}
encoded = self.activate(
tf.matmul(x, encode['weights']) + encode['biases'], activation)
decoded = tf.matmul(encoded, decode['weights']) + decode['biases']
# reconstruction loss
if loss == 'rmse':
loss = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(x_, decoded))))
elif loss == 'cross-entropy':
loss = -tf.reduce_mean(x_ * tf.log(decoded))
train_op = tf.train.AdamOptimizer(lr).minimize(loss)
sess.run(tf.initialize_all_variables())
for i in range(epoch):
b_x, b_x_ = utils.get_batch(
data_x, data_x_, batch_size)
sess.run(train_op, feed_dict={x: b_x, x_: b_x_})
if (i + 1) % print_step == 0:
l = sess.run(loss, feed_dict={x: data_x, x_: data_x_})
print('epoch {0}: global loss = {1}'.format(i, l))
# debug
# print('Decoded', sess.run(decoded, feed_dict={x: self.data_x_})[0])
self.weights.append(sess.run(encode['weights']))
self.biases.append(sess.run(encode['biases']))
return sess.run(encoded, feed_dict={x: data_x_})
def activate(self, linear, name):
if name == 'sigmoid':
return tf.nn.sigmoid(linear, name='encoded')
elif name == 'softmax':
return tf.nn.softmax(linear, name='encoded')
elif name == 'linear':
return linear
elif name == 'tanh':
return tf.nn.tanh(linear, name='encoded')
elif name == 'relu':
return tf.nn.relu(linear, name='encoded')
当我运行test2.py时,我得到了这个
Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Layer 1
epoch 199: global loss = 0.7411692142486572
epoch 399: global loss = 0.37929072976112366
epoch 599: global loss = 0.3112555742263794
epoch 799: global loss = 0.2843865156173706
epoch 999: global loss = 0.27192142605781555
epoch 1199: global loss = 0.2679365277290344
epoch 1399: global loss = 0.263033926486969
epoch 1599: global loss = 0.2602871060371399
epoch 1799: global loss = 0.2599691152572632
epoch 1999: global loss = 0.25943684577941895
epoch 2199: global loss = 0.2595524191856384
epoch 2399: global loss = 0.25938916206359863
epoch 2599: global loss = 0.2593194842338562
epoch 2799: global loss = 0.25892356038093567
epoch 2999: global loss = 0.25896236300468445
Layer 2
epoch 199: global loss = 0.3850303292274475
epoch 399: global loss = 0.37506163120269775
epoch 599: global loss = 0.24444995820522308
epoch 799: global loss = 0.2438928335905075
epoch 999: global loss = 0.24864040315151215
Layer 3
epoch 199: global loss = 3.073141574859619
epoch 399: global loss = 2.9303085803985596
epoch 599: global loss = 2.1766204833984375
epoch 799: global loss = 2.1765928268432617
epoch 999: global loss = 2.147747755050659
Layer 4
epoch 199: global loss = 2.420090913772583
epoch 399: global loss = 2.4199604988098145
epoch 599: global loss = 2.419969320297241
epoch 799: global loss = 2.420668601989746
epoch 999: global loss = 2.42080020904541
Epoch 1 completed of 10 . Loss 1374.4021287
Epoch 2 completed of 10 . Loss 1296.14652419
Epoch 3 completed of 10 . Loss 1272.90055275
Epoch 4 completed of 10 . Loss 1267.7663238
Epoch 5 completed of 10 . Loss 1265.81543016
Epoch 6 completed of 10 . Loss 1266.61085653
Epoch 7 completed of 10 . Loss 1265.73505712
Epoch 8 completed of 10 . Loss 1265.61917806
Epoch 9 completed of 10 . Loss 1265.62904024
Epoch 10 completed of 10 . Loss 1265.62279081
Accuracy 0.1143
我觉得我在这里遗漏了一些东西。请帮忙!