我使用图层API编写了编码器和解码器功能。两者都是3层深。
def Enocder(real_img):
with tf.variable_scope("encoder"):
conv1 = tf.layers.conv2d(inputs=X, filters=32, kernel_size=[
5, 5], use_bias=True, padding="same", activation=tf.nn.leaky_relu)
pool1 = tf.layers.max_pooling2d(
inputs=conv1, pool_size=[2, 2], padding="same", strides=[2, 2])
conv2 = tf.layers.conv2d(inputs=pool1, filters=64, kernel_size=[
5, 5], use_bias=True, padding="same", activation=tf.nn.leaky_relu)
pool2 = tf.layers.max_pooling2d(
inputs=conv2, pool_size=[2, 2], padding="same",strides=[2, 2])
conv3 = tf.layers.conv2d(inputs=pool2, filters=128, kernel_size=[
5, 5], use_bias=True, padding="same", activation=tf.nn.leaky_relu)
pool3 = tf.layers.max_pooling2d(
inputs=conv3, pool_size=[2, 2], padding="same", strides=[2, 2])
return pool3
def Decoder(Z):
with tf.variable_scope("decoder"):
deconv1 = tf.layers.conv2d_transpose(inputs=Z, filters=128, kernel_size=[
5, 5], padding="same", strides=[2, 2])
deconv2 = tf.layers.conv2d_transpose(inputs=deconv1, filters=64, kernel_size=[
5, 5], padding="same", strides=[2, 2])
deconv3 = tf.layers.conv2d_transpose(inputs=deconv2, filters=32, kernel_size=[
5, 5], padding="same", strides=[2, 2])
deconv4 = tf.layers.conv2d_transpose(inputs=deconv3, filters=1, kernel_size=[5,5], padding="same", strides=[2,2])
return deconv4
real_img = tf.placeholder(dtype=tf.float32, shape=[None, 784])
X = tf.reshape(real_img, [-1, 28, 28, 1])
enc = Enocder(X)
dec = Decoder(enc)
cost = tf.reduce_sum(tf.square(X - dec))
错误:
ValueError: Dimensions must be equal, but are 28 and 24 for 'sub' (op: 'Sub') with input shapes: [?,28,28,1], [?,64,64,1].
如何获得28x28形状的解码图像?
答案 0 :(得分:1)
tf.layers.max_pooling2d
也有一个padding
参数。对于卷积,将其设置为same
应解决此问题。否则,您的池将略微缩小输入(超出大步),如此处所示。
请注意,你还需要你的最后一个卷积转置图层才能使用1个滤镜 - 现在你的重建将是[?,28,28,32],但你需要[?,28,28,1]像输入