我一直在尝试在Tensorflow中实现卷积自动编码器,类似于在this tutorial中在Keras中完成的操作。
到目前为止,这是我的代码看起来像
filter1 = tf.Variable(tf.random_normal([3, 3, 1, 16]))
filter2 = tf.Variable(tf.random_normal([3, 3, 16, 8]))
filter3 = tf.Variable(tf.random_normal([3, 3, 8, 8]))
d_filter1 = tf.Variable(tf.random_normal([3, 3, 8, 8]))
d_filter2 = tf.Variable(tf.random_normal([3, 3, 8, 8]))
d_filter3 = tf.Variable(tf.random_normal([3, 3, 8, 16]))
d_filter4 = tf.Variable(tf.random_normal([3, 3, 16, 1]))
def encoder(input_img):
conv1 = tf.nn.relu(tf.nn.conv2d(input_img, filter1, strides=[1, 1, 1, 1], padding='SAME'))# [-1, 28, 28, 16]
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=2, strides=2, padding='SAME') # [-1, 14, 14, 16]
conv2 = tf.nn.relu(tf.nn.conv2d(pool1, filter2, strides=[1, 1, 1, 1], padding='SAME')) # [-1, 14, 14, 8]
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=2, strides=2, padding='SAME') # [-1, 7, 7, 8]
conv3 = tf.nn.relu(tf.nn.conv2d(pool2, filter3, strides=[1, 1, 1, 1], padding='SAME')) # [-1, 7, 7, 8]
pool3 = tf.layers.max_pooling2d(inputs=conv3, pool_size=2, strides=2, padding='SAME') # [-1, 4, 4, 8]
return pool3
def decoder(encoded):
d_conv1 = tf.nn.relu(tf.nn.conv2d(encoded, d_filter1, strides=[1, 1, 1, 1], padding='SAME')) # [-1, 4, 4, 8]
d_pool1 = tf.keras.layers.UpSampling2D((2, 2))(d_conv1) # [-1, 8, 8, 8]
d_conv2 = tf.nn.relu(tf.nn.conv2d(d_pool1, d_filter2, strides=[1, 1, 1, 1], padding='SAME')) # [-1, 8, 8, 8]
d_pool2 = tf.keras.layers.UpSampling2D((2, 2))(d_conv2) # [-1, 16, 16, 8]
d_conv3 = tf.nn.relu(tf.nn.conv2d(d_pool2, d_filter3, strides=[1, 1, 1, 1], padding='VALID')) # [-1, 14, 14, 16]
d_pool3 = tf.keras.layers.UpSampling2D((2, 2))(d_conv3) # [28, 28, 16]
decoded = tf.nn.sigmoid(tf.nn.conv2d(d_pool3, d_filter4, strides=[1, 1, 1, 1], padding='SAME')) # [-1, 28, 28, 1]
return decoded
x = tf.placeholder(tf.float32, [None, 28, 28, 1])
encoded = encoder(x)
decoded = decoder(mid)
autoencoder = decoder(encoded)
loss = tf.reduce_mean(tf.keras.losses.binary_crossentropy(y_true=x, y_pred=autoencoder))
optimizer = tf.train.AdadeltaOptimizer(learning_rate=0.1).minimize(loss)
batch_size = 128
epochs = 50
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
num_batches = int(x_train.shape[0]/batch_size)
for epoch in range(epochs):
avg_epoch_loss = 0.0
for k in range(num_batches):
batch_x = x_train[k*batch_size:k*batch_size+batch_size]
feed_dict = {x: batch_x.reshape([-1, 28, 28, 1])}
_, l = sess.run([optimizer, loss], feed_dict=feed_dict)
avg_epoch_loss += l
if k % 100 == 0:
print 'Step {}/{} of epoch {}/{} completed with loss {}'.format(k, num_batches, epoch, epochs, l)
avg_epoch_loss /= num_batches
print 'Epoch {}/{} completed with average loss {}'.format(epoch, epochs, avg_epoch_loss)
saver.save(sess=sess, save_path='./model.ckpt')
img = sess.run(autoencoder, feed_dict={x: x_test[0].reshape([1, 28, 28, 1])}).reshape(28, 28)
plt.imshow(img, cmap='gray')
plt.show()
当我训练这个时,损失值往往会下降,但然后保持在相同(高)值附近。但是,当我使用上面链接中的Keras方法替换encoder
和decoder
函数时,损失会以合理的速率降低并收敛到较低的值。
def encoder(input_img):
Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)
return encoded
def decoder(encoded):
x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)
return decoded
我试图找出这两种方法之间的区别,我已多次查看它,看起来我的方法应该与Keras方法完全相同。任何有助于弄清楚发生了什么的帮助都会受到赞赏!
答案 0 :(得分:0)
代码中一个简单明显的问题是您没有正确初始化过滤器。尝试以下方法,它可能会有效。您还可以尝试其他复杂的初始化方案,例如Xavier Inititalizer
filter1 = tf.Variable(tf.random_normal([3, 3, 1, 16], mean=0.0, std=0.01))
filter2 = tf.Variable(tf.random_normal([3, 3, 16, 8], mean=0.0, std=0.01))
filter3 = tf.Variable(tf.random_normal([3, 3, 8, 8], mean=0.0, std=0.01))
d_filter1 = tf.Variable(tf.random_normal([3, 3, 8, 8], mean=0.0, std=0.01))
d_filter2 = tf.Variable(tf.random_normal([3, 3, 8, 8], mean=0.0, std=0.01))
d_filter3 = tf.Variable(tf.random_normal([3, 3, 8, 16], mean=0.0, std=0.01))
d_filter4 = tf.Variable(tf.random_normal([3, 3, 16, 1], mean=0.0, std=0.01))
答案 1 :(得分:0)
您的版本与您指向的Keras代码之间的差异是优化程序的学习率。不同的学习率会导致完全不同的结果(趋同,分歧,不稳定)。
本教程使用值为1.0的default learning rate。您的版本使用相同的优化程序(AdaDelta),但您将学习速率设置为0.1。
如果设置相同的值,该怎么办? (检查rho,epsilon和衰变是否也具有相同的值可能很有用。)