堆叠自动编码器,用于在tensorflow中使用自定义数据库进行手语识别

时间:2017-08-23 07:56:25

标签: python tensorflow autoencoder

我正在尝试使用tensorflow中的堆叠自动编码器重建我的数据库的输入图像。如果我使用mnist数据库,那么我可以正确地重建输入图像。但是当我应用我自己的数据库时,我无法正确地重建输入图像。这是我在python中的代码

from __future__ import division, print_function, absolute_import
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
from PIL import Image
from sklearn.utils import shuffle
from sklearn.cross_validation import  train_test_split
import glob
import scipy.misc as ms
import matplotlib.cm as cm
path1 = 'gestures dataset\dataset'
img_dimension=28
listimg = os.listdir(path1)
num_sample = np.size(listimg)
imageread=[]
originalimage=[]
class_no=6
label=np.ones((num_sample),dtype=int)
labels=np.zeros((num_sample,class_no),dtype=int)
i=int(num_sample/class_no)
k=0
lastindex=i
for j in range(class_no):
    label[k:lastindex]=j
    k=lastindex
    lastindex +=i
for i in range(len(labels)):
    labels[i][label[i]]=1
for image in glob.glob('gestures dataset/dataset/*.ppm'):
    img = np.array(Image.open(image).convert('L'))
    img = ms.imresize(img, (50, 50), 'nearest')
    im_array = np.array(img)
    imageread.append(im_array.flatten())    #m*nn
    originalimage.append(im_array.flatten())
originalimage=np.array(originalimage)
originalimage=originalimage/255
data ,labels=shuffle(originalimage,labels,random_state=2)
train=[data,labels]
train_data,train_label=(train[0],train[1])
x_train,x_test,y_train,y_test=train_test_split(train_data,train_label,
test_si     ze=0.2,random_state=4)
learning_rate = 0.01
training_epochs = 1000
batch_size = 256
display_step = 1
examples_to_show = 5
n_hidden_1 = 50 # 1st layer num features
n_hidden_2 = 30 # 2nd layer num features
n_hidden_3 = 20
n_input = 2500 # MNIST data input (img shape: 28*28)
X = tf.placeholder("float", [None, n_input])

weights = {
    'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'encoder_h2': tf.Variable(tf.random_normal([n_hidden_1,n_hidden_2])),
    'encoder_h3': tf.Variable(tf.random_normal([n_hidden_2,n_hidden_3])),
    'decoder_h1': tf.Variable(tf.random_normal([n_hidden_3,n_hidden_2])),
    'decoder_h2': tf.Variable(tf.random_normal([n_hidden_2,n_hidden_1])),
    'decoder_h3': tf.Variable(tf.random_normal([n_hidden_1, n_input])) 
}

biases = {
    'encoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'encoder_b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'encoder_b3': tf.Variable(tf.random_normal([n_hidden_3])),
    'decoder_b1': tf.Variable(tf.random_normal([n_hidden_2])),
    'decoder_b2': tf.Variable(tf.random_normal([n_hidden_1])),
    'decoder_b3': tf.Variable(tf.random_normal([n_input]))
}
# Building the encoder
def encoder(x):
    # Encoder Hidden layer with sigmoid activation #1
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']),
                               biases['encoder_b1']))
    #Encoder Hidden layer with sigmoid activation #2
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1,  
    weights['encoder_h2']),
                              biases['encoder_b2']))
    layer_3 = tf.nn.sigmoid(tf.add(tf.matmul(layer_2, 
    weights['encoder_h3']),
                              biases['encoder_b3']))
    return layer_3


# Building the decoder
def decoder(x):
    # Decoder Hidden layer with sigmoid activation #1
    layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']),
                                   biases['decoder_b1']))
    # Decoder Hidden layer with sigmoid activation #2
    layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1, 
    weights['decoder_h2']),
                                   biases['decoder_b2']))
    layer_3 =tf.nn.sigmoid(tf.add(tf.matmul(layer_2, 
    weights['decoder_h3']),
                                   biases['decoder_b3']))
    return layer_3

# Construct model
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)

# Prediction
y_pred = decoder_op
# Targets (Labels) are the input data.
y_true = X

# Define loss and optimizer, minimize the squared error
cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)

# Initializing the variables
init = tf.global_variables_initializer()
sess = tf.InteractiveSession()
sess.run(init)
for epoch in range(training_epochs):
    _, c = sess.run([optimizer, cost], feed_dict={X: x_train})
# Display logs per epoch step
    if epoch % display_step == 0:
        print("Epoch:", '%04d' % (epoch+1),
              "cost=", "{:.9f}".format(c))

print("Optimization Finished!")

# Applying encode and decode over test set
encode_decode = sess.run(
    y_pred, feed_dict={X: x_train[:examples_to_show]})
# Compare original images with their reconstructions
f, a = plt.subplots(2, 5, figsize=(10, 2))
for i in range(examples_to_show):
    a[0][i].imshow(np.reshape(x_test[i], (50, 50)),cmap=cm.Greys_r)
    a[1][i].imshow(np.reshape(encode_decode[i],(50, 50)),cmap=cm.Greys_r)
f.show()
plt.draw()

这是我的输入图像和相应的重建图像。第一行表示输入图像,第二图像表示重建图像

reconstructed images

我的问题是如何从重建图像中消除噪音?

0 个答案:

没有答案