Question

所以我正在尝试使用堆叠式自动编码器构建一个深度网络，用于使用MNIST数据集训练网络。我首先预先训练模型（逐层）并做一个正常的backprop进行微调。问题在于，当我对随机初始化的权重进行正常的反向提升时，我得到了非常高的纪元损失，但仍然准确度为~96％。但是当我对堆叠式自动编码器做同样的事情时，我得到相对较低的纪元损失和低准确性。我在这里附上完整的源代码和输出。

test2.py

from deepautoencoder import StackedAutoEncoder
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import tensorflow as tf

mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)
data, target = mnist.train.images, mnist.train.labels
n_nodes_hl1=1000
n_nodes_hl2=750
n_nodes_hl3=500
n_classes=10
batch_size=100



def nueral_network_model(data,X, Y):
    hidden_1_layer={'weights':tf.Variable(tf.pack(X[0])),
                    'biases':tf.Variable(tf.pack(Y[0]))}
    hidden_2_layer={'weights':tf.Variable(tf.pack(X[1])),
                    'biases':tf.Variable(tf.pack(Y[1]))}
    hidden_3_layer={'weights':tf.Variable(tf.pack(X[2])),
                    'biases':tf.Variable(tf.pack(Y[2]))}
    output_layer={'weights':tf.Variable(tf.pack(X[3])),
                    'biases':tf.Variable(tf.pack(Y[3]))}
    l1=tf.add(tf.matmul(data, hidden_1_layer['weights']),hidden_1_layer['biases'])
    l1=tf.nn.relu(l1)

    l2=tf.add(tf.matmul(l1, hidden_2_layer['weights']),hidden_2_layer['biases'])
    l2=tf.nn.relu(l2)

    l3=tf.add(tf.matmul(l2, hidden_3_layer['weights']),hidden_3_layer['biases'])
    l3=tf.nn.relu(l3)

    output=tf.add(tf.matmul(l3, output_layer['weights']),output_layer['biases'])
    return output
def train_neural_network(x, X, Y):

    prediction=nueral_network_model(x,X, Y)
    cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(prediction,y))
    optimizer=tf.train.AdamOptimizer().minimize(cost)
    n_epochs=10
    with tf.Session() as sess:
        sess.run(tf.initialize_all_variables())
        for epoch in range(n_epochs):
            epoch_loss=0
            for _ in range(int(mnist.train.num_examples/batch_size)):
                epoch_x,epoch_y=mnist.train.next_batch(batch_size)
                _,c=sess.run([optimizer, cost], feed_dict={x:epoch_x,y:epoch_y})
                epoch_loss+=c
            print('Epoch ', (epoch+1), ' completed of ', n_epochs,'. Loss ', epoch_loss)

        correct=tf.equal(tf.argmax(prediction, 1), tf.argmax(y,1))
        accuracy=tf.reduce_mean(tf.cast(correct, 'float'))
        print('Accuracy ', accuracy.eval({x:mnist.test.images, y:mnist.test.labels}))
# train / test  split
idx = np.random.rand(data.shape[0]) < 0.8
'''print(idx.format("float32"))
train_X, train_Y = data[idx], target[idx]
test_X, test_Y = data[~idx], target[~idx]'''
train_X, train_Y =  mnist.train.images, mnist.train.labels
test_X, test_Y =  mnist.test.images, mnist.test.labels

model = StackedAutoEncoder(dims=[n_nodes_hl1, n_nodes_hl2, n_nodes_hl3,n_classes], activations=['relu', 'relu', 'relu', 'relu'], epoch=[
                          3000, 1000, 1000,1000], loss='rmse', lr=0.007, batch_size=100, print_step=200)
model.fit(train_X)

#weights=open("weights.txt","w")
#weights.write(str(len(model.getweights())))
#print(str1, file=weights)
#weights.write(str)
#weights.close()'''
X=model.getweights()
Y=model.getbiases()
x=tf.placeholder("float",[None, 784])
y=tf.placeholder("float")
train_neural_network(x,X, Y)

stacked_autoencoder.py

import numpy as np
import deepautoencoder.utils as utils
import tensorflow as tf

allowed_activations = ['sigmoid', 'tanh', 'softmax', 'relu', 'linear']
allowed_noises = [None, 'gaussian', 'mask']
allowed_losses = ['rmse', 'cross-entropy']


class StackedAutoEncoder:
    """A deep autoencoder with denoising capability"""

    def assertions(self):
        global allowed_activations, allowed_noises, allowed_losses
        assert self.loss in allowed_losses, 'Incorrect loss given'
        assert 'list' in str(
            type(self.dims)), 'dims must be a list even if there is one layer.'
        assert len(self.epoch) == len(
            self.dims), "No. of epochs must equal to no. of hidden layers"
        assert len(self.activations) == len(
            self.dims), "No. of activations must equal to no. of hidden layers"
        assert all(
            True if x > 0 else False
            for x in self.epoch), "No. of epoch must be atleast 1"
        assert set(self.activations + allowed_activations) == set(
            allowed_activations), "Incorrect activation given."
        assert utils.noise_validator(
            self.noise, allowed_noises), "Incorrect noise given"

    def __init__(self, dims, activations, epoch=1000, noise=None, loss='rmse',
                 lr=0.001, batch_size=100, print_step=50):
        self.print_step = print_step
        self.batch_size = batch_size
        self.lr = lr
        self.loss = loss
        self.activations = activations
        self.noise = noise
        self.epoch = epoch
        self.dims = dims
        self.assertions()
        self.depth = len(dims)
        self.weights, self.biases = [], []

    def add_noise(self, x):
        if self.noise == 'gaussian':
            n = np.random.normal(0, 0.1, (len(x), len(x[0])))
            return x + n
        if 'mask' in self.noise:
            frac = float(self.noise.split('-')[1])
            temp = np.copy(x)
            for i in temp:
                n = np.random.choice(len(i), round(
                    frac * len(i)), replace=False)
                i[n] = 0
            return temp
        if self.noise == 'sp':
            pass
    def getshape(self, x):
        return x.shape
    def fit(self, x):
        for i in range(self.depth):
            print('Layer {0}'.format(i + 1))
            if self.noise is None:
                x = self.run(data_x=x, activation=self.activations[i],
                             data_x_=x,
                             hidden_dim=self.dims[i], epoch=self.epoch[
                                 i], loss=self.loss,
                             batch_size=self.batch_size, lr=self.lr,
                             print_step=self.print_step)
            else:
                temp = np.copy(x)
                x = self.run(data_x=self.add_noise(temp),
                             activation=self.activations[i], data_x_=x,
                             hidden_dim=self.dims[i],
                             epoch=self.epoch[
                                 i], loss=self.loss,
                             batch_size=self.batch_size,
                             lr=self.lr, print_step=self.print_step)
    def getweights(self):
        return self.weights

    def getbiases(self):
        return self.biases


    def transform(self, data):
        tf.reset_default_graph()
        sess = tf.Session()
        x = tf.constant(data, dtype=tf.float32)
        for w, b, a in zip(self.weights, self.biases, self.activations):
            weight = tf.constant(w, dtype=tf.float32)
            bias = tf.constant(b, dtype=tf.float32)
            layer = tf.matmul(x, weight) + bias
            x = self.activate(layer, a)
        return x.eval(session=sess)

    def fit_transform(self, x):
        self.fit(x)
        return self.transform(x)

    def run(self, data_x, data_x_, hidden_dim, activation, loss, lr,
            print_step, epoch, batch_size=100):
        tf.reset_default_graph()
        input_dim = len(data_x[0])
        sess = tf.Session()
        x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x')
        x_ = tf.placeholder(dtype=tf.float32, shape=[
                            None, input_dim], name='x_')
        encode = {'weights': tf.Variable(tf.truncated_normal(
            [input_dim, hidden_dim], dtype=tf.float32)),
            'biases': tf.Variable(tf.truncated_normal([hidden_dim],
                                                      dtype=tf.float32))}
        decode = {'biases': tf.Variable(tf.truncated_normal([input_dim],
                                                            dtype=tf.float32)),
                  'weights': tf.transpose(encode['weights'])}
        encoded = self.activate(
            tf.matmul(x, encode['weights']) + encode['biases'], activation)
        decoded = tf.matmul(encoded, decode['weights']) + decode['biases']

        # reconstruction loss
        if loss == 'rmse':
            loss = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(x_, decoded))))
        elif loss == 'cross-entropy':
            loss = -tf.reduce_mean(x_ * tf.log(decoded))
        train_op = tf.train.AdamOptimizer(lr).minimize(loss)

        sess.run(tf.initialize_all_variables())
        for i in range(epoch):
            b_x, b_x_ = utils.get_batch(
                data_x, data_x_, batch_size)
            sess.run(train_op, feed_dict={x: b_x, x_: b_x_})
            if (i + 1) % print_step == 0:
                l = sess.run(loss, feed_dict={x: data_x, x_: data_x_})
                print('epoch {0}: global loss = {1}'.format(i, l))

        # debug
        # print('Decoded', sess.run(decoded, feed_dict={x: self.data_x_})[0])
        self.weights.append(sess.run(encode['weights']))
        self.biases.append(sess.run(encode['biases']))
        return sess.run(encoded, feed_dict={x: data_x_})

    def activate(self, linear, name):
        if name == 'sigmoid':
            return tf.nn.sigmoid(linear, name='encoded')
        elif name == 'softmax':
            return tf.nn.softmax(linear, name='encoded')
        elif name == 'linear':
            return linear
        elif name == 'tanh':
            return tf.nn.tanh(linear, name='encoded')
        elif name == 'relu':
            return tf.nn.relu(linear, name='encoded')

当我运行test2.py时，我得到了这个

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Layer 1
epoch 199: global loss = 0.7411692142486572
epoch 399: global loss = 0.37929072976112366
epoch 599: global loss = 0.3112555742263794
epoch 799: global loss = 0.2843865156173706
epoch 999: global loss = 0.27192142605781555
epoch 1199: global loss = 0.2679365277290344
epoch 1399: global loss = 0.263033926486969
epoch 1599: global loss = 0.2602871060371399
epoch 1799: global loss = 0.2599691152572632
epoch 1999: global loss = 0.25943684577941895
epoch 2199: global loss = 0.2595524191856384
epoch 2399: global loss = 0.25938916206359863
epoch 2599: global loss = 0.2593194842338562
epoch 2799: global loss = 0.25892356038093567
epoch 2999: global loss = 0.25896236300468445
Layer 2
epoch 199: global loss = 0.3850303292274475
epoch 399: global loss = 0.37506163120269775
epoch 599: global loss = 0.24444995820522308
epoch 799: global loss = 0.2438928335905075
epoch 999: global loss = 0.24864040315151215
Layer 3
epoch 199: global loss = 3.073141574859619
epoch 399: global loss = 2.9303085803985596
epoch 599: global loss = 2.1766204833984375
epoch 799: global loss = 2.1765928268432617
epoch 999: global loss = 2.147747755050659
Layer 4
epoch 199: global loss = 2.420090913772583
epoch 399: global loss = 2.4199604988098145
epoch 599: global loss = 2.419969320297241
epoch 799: global loss = 2.420668601989746
epoch 999: global loss = 2.42080020904541
Epoch  1  completed of  10 . Loss  1374.4021287
Epoch  2  completed of  10 . Loss  1296.14652419
Epoch  3  completed of  10 . Loss  1272.90055275
Epoch  4  completed of  10 . Loss  1267.7663238
Epoch  5  completed of  10 . Loss  1265.81543016
Epoch  6  completed of  10 . Loss  1266.61085653
Epoch  7  completed of  10 . Loss  1265.73505712
Epoch  8  completed of  10 . Loss  1265.61917806
Epoch  9  completed of  10 . Loss  1265.62904024
Epoch  10  completed of  10 . Loss  1265.62279081
Accuracy  0.1143

我觉得我在这里遗漏了一些东西。请帮忙！

TensorFlow网络具有低纪元丢失但仍然具有低精度

0 个答案: