我正在尝试使用tensorflow中的堆叠自动编码器重建我的数据库的输入图像。如果我使用mnist数据库,那么我可以正确地重建输入图像。但是当我应用我自己的数据库时,我无法正确地重建输入图像。这是我在python中的代码
from __future__ import division, print_function, absolute_import
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
from PIL import Image
from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split
import glob
import scipy.misc as ms
import matplotlib.cm as cm
path1 = 'gestures dataset\dataset'
img_dimension=28
listimg = os.listdir(path1)
num_sample = np.size(listimg)
imageread=[]
originalimage=[]
class_no=6
label=np.ones((num_sample),dtype=int)
labels=np.zeros((num_sample,class_no),dtype=int)
i=int(num_sample/class_no)
k=0
lastindex=i
for j in range(class_no):
label[k:lastindex]=j
k=lastindex
lastindex +=i
for i in range(len(labels)):
labels[i][label[i]]=1
for image in glob.glob('gestures dataset/dataset/*.ppm'):
img = np.array(Image.open(image).convert('L'))
img = ms.imresize(img, (50, 50), 'nearest')
im_array = np.array(img)
imageread.append(im_array.flatten()) #m*nn
originalimage.append(im_array.flatten())
originalimage=np.array(originalimage)
originalimage=originalimage/255
data ,labels=shuffle(originalimage,labels,random_state=2)
train=[data,labels]
train_data,train_label=(train[0],train[1])
x_train,x_test,y_train,y_test=train_test_split(train_data,train_label,
test_si ze=0.2,random_state=4)
learning_rate = 0.01
training_epochs = 1000
batch_size = 256
display_step = 1
examples_to_show = 5
n_hidden_1 = 50 # 1st layer num features
n_hidden_2 = 30 # 2nd layer num features
n_hidden_3 = 20
n_input = 2500 # MNIST data input (img shape: 28*28)
X = tf.placeholder("float", [None, n_input])
weights = {
'encoder_h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'encoder_h2': tf.Variable(tf.random_normal([n_hidden_1,n_hidden_2])),
'encoder_h3': tf.Variable(tf.random_normal([n_hidden_2,n_hidden_3])),
'decoder_h1': tf.Variable(tf.random_normal([n_hidden_3,n_hidden_2])),
'decoder_h2': tf.Variable(tf.random_normal([n_hidden_2,n_hidden_1])),
'decoder_h3': tf.Variable(tf.random_normal([n_hidden_1, n_input]))
}
biases = {
'encoder_b1': tf.Variable(tf.random_normal([n_hidden_1])),
'encoder_b2': tf.Variable(tf.random_normal([n_hidden_2])),
'encoder_b3': tf.Variable(tf.random_normal([n_hidden_3])),
'decoder_b1': tf.Variable(tf.random_normal([n_hidden_2])),
'decoder_b2': tf.Variable(tf.random_normal([n_hidden_1])),
'decoder_b3': tf.Variable(tf.random_normal([n_input]))
}
# Building the encoder
def encoder(x):
# Encoder Hidden layer with sigmoid activation #1
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['encoder_h1']),
biases['encoder_b1']))
#Encoder Hidden layer with sigmoid activation #2
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1,
weights['encoder_h2']),
biases['encoder_b2']))
layer_3 = tf.nn.sigmoid(tf.add(tf.matmul(layer_2,
weights['encoder_h3']),
biases['encoder_b3']))
return layer_3
# Building the decoder
def decoder(x):
# Decoder Hidden layer with sigmoid activation #1
layer_1 = tf.nn.sigmoid(tf.add(tf.matmul(x, weights['decoder_h1']),
biases['decoder_b1']))
# Decoder Hidden layer with sigmoid activation #2
layer_2 = tf.nn.sigmoid(tf.add(tf.matmul(layer_1,
weights['decoder_h2']),
biases['decoder_b2']))
layer_3 =tf.nn.sigmoid(tf.add(tf.matmul(layer_2,
weights['decoder_h3']),
biases['decoder_b3']))
return layer_3
# Construct model
encoder_op = encoder(X)
decoder_op = decoder(encoder_op)
# Prediction
y_pred = decoder_op
# Targets (Labels) are the input data.
y_true = X
# Define loss and optimizer, minimize the squared error
cost = tf.reduce_mean(tf.pow(y_true - y_pred, 2))
optimizer = tf.train.RMSPropOptimizer(learning_rate).minimize(cost)
# Initializing the variables
init = tf.global_variables_initializer()
sess = tf.InteractiveSession()
sess.run(init)
for epoch in range(training_epochs):
_, c = sess.run([optimizer, cost], feed_dict={X: x_train})
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch+1),
"cost=", "{:.9f}".format(c))
print("Optimization Finished!")
# Applying encode and decode over test set
encode_decode = sess.run(
y_pred, feed_dict={X: x_train[:examples_to_show]})
# Compare original images with their reconstructions
f, a = plt.subplots(2, 5, figsize=(10, 2))
for i in range(examples_to_show):
a[0][i].imshow(np.reshape(x_test[i], (50, 50)),cmap=cm.Greys_r)
a[1][i].imshow(np.reshape(encode_decode[i],(50, 50)),cmap=cm.Greys_r)
f.show()
plt.draw()
这是我的输入图像和相应的重建图像。第一行表示输入图像,第二图像表示重建图像
我的问题是如何从重建图像中消除噪音?