我用VGG预训练模型作为张量流中的编码器实现卷积自动编码器并计算构造损失,但由于形状不兼容,tf会话无法完成运行:[32,150528]与[32,301056]损失计算。尺寸的问题?
class VGG_Autoencoder(object):
def __init__(self, model_path="vgg_saved_models/", learning_rate=1e-3):
self.model_path = model_path
self.learning_rate = learning_rate
self.inputs = tf.placeholder(tf.float32, [None, 300, 300, 3], name='image')
self.resized_image = tf.image.resize_images(self.inputs, [224, 224])
def encoder(self, input):
with tfs.arg_scope(nets.vgg.vgg_arg_scope()):
_, layers = nets.vgg.vgg_16(inputs=input, is_training=False)
fc7 = layers["vgg_16/fc7"]
fc8 = tf.layers.dense(fc7, units=1024, activation=tf.nn.relu, name="fc8")
fc8_flat = tf.layers.flatten(fc8)
fc8_flat_dense = tf.layers.dense(fc8_flat, units=512, name="fc8_flatten_dense")
return fc8_flat_dense
def decoder(self, bottleneck):
fc_bottleneck = tf.layers.dense(bottleneck, units=256, activation=tf.nn.relu, name="fc_bottleneck")
bn_resh = tf.reshape(bottleneck, [-1, 4, 4, 16])
conv_9 = tf.layers.conv2d_transpose(bn_resh, filters=256, kernel_size=3, strides=1, padding='same',
activation=tf.nn.relu)
conv_10 = tf.layers.conv2d_transpose(conv_9, filters=256, kernel_size=3, strides=1, padding='same',
activation=tf.nn.relu)
conv_11 = tf.layers.conv2d_transpose(conv_10, filters=128, kernel_size=3, strides=1, padding='same',
activation=tf.nn.relu)
conv_12 = tf.layers.conv2d_transpose(conv_11, filters=128, kernel_size=3, strides=2, padding='same',
activation=tf.nn.relu)
conv_13 = tf.layers.conv2d_transpose(conv_12, filters=64, kernel_size=3, strides=7, padding='same',
activation=tf.nn.relu)
conv_14 = tf.layers.conv2d_transpose(conv_13, filters=64, kernel_size=3, strides=1, padding='same',
activation=tf.nn.relu)
conv_15 = tf.layers.conv2d_transpose(conv_14, filters=32, kernel_size=3, strides=1, padding='same',
activation=tf.nn.relu)
conv_16 = tf.layers.conv2d_transpose(conv_15, filters=32, kernel_size=3, strides=2, padding='same',
activation=tf.nn.relu)
conv_17 = tf.layers.conv2d_transpose(conv_16, filters=16, kernel_size=3, strides=1, padding='same',
activation=tf.nn.relu)
conv_18 = tf.layers.conv2d_transpose(conv_17, filters=16, kernel_size=3, strides=1, padding='same',
activation=tf.nn.relu)
conv_19 = tf.layers.conv2d_transpose(conv_18, filters=3, kernel_size=3, strides=2, padding='same',
activation=None)
#output = tf.image.resize_images(conv_18, [224, 224])
return conv_19
def loss(self, input_image, reconstructed_image):
tf.Print(input_image, [tf.shape(input_image)])
tf.Print(reconstructed_image, [tf.shape(reconstructed_image)])
input_image_ = tf.reshape(input_image, [32, -1])
reconstructed_image_ = tf.reshape(reconstructed_image, [32, -1])
reconstruction_loss = tf.reduce_sum(tf.square(input_image_ - reconstructed_image_), axis=1)
loss = tf.reduce_mean(reconstruction_loss)
return loss
def optmizer(self, loss, learning_rate, global_step):
restore_fn = tf.contrib.framework.get_variables_to_restore
fc7_variables = restore_fn(exclude=["vgg_16/fc8"])
sub_variables = fc7_variables[29:]
init_var = tf.variables_initializer(sub_variables)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss, var_list=sub_variables, global_step=global_step, name="fcn_train_op")
def summary(self, input_image, reconstructed_image, loss):
input_resized_image = tf.summary.image('resized_image', input_image, 20)
output_resized_image = tf.summary.image('constructed_image', reconstructed_image, 20)
loss_summary = tf.summary.scalar("loss", loss)
merged = tf.summary.merge_all()
return merged
def data_iterator(self, batch_size):
data_files = glob.glob('/home/WIN-UNI-DUE/sjmonagi/Desktop/dataset/Train/**/VAE_FloorPlan*', recursive=True)
while True:
data = np.load(random.sample(data_files, 1)[0])
np.random.shuffle(data)
np.random.shuffle(data)
N = data.shape[0]
start = np.random.randint(0, N - batch_size)
yield data[start:start + batch_size]
def train_nn():
sess = tf.InteractiveSession()
global_step = tf.Variable(0, name='global_step', trainable=False)
writer = tf.summary.FileWriter('logdir_vgg')
VGG = VGG_Autoencoder()
model_name = VGG.model_path + 'model'
images = VGG.data_iterator(32)
bottle_neck = VGG.encoder(VGG.resized_image)
output_images = VGG.decoder(bottle_neck)
loss = VGG.loss(VGG.resized_image, output_images)
VGG.optmizer(loss, VGG.learning_rate, global_step=global_step)
merged = VGG.summary(VGG.resized_image, output_images, loss)
tf.global_variables_initializer().run()
saver = tf.train.Saver(max_to_keep=1)
step = global_step.eval()
try:
saver.restore(sess, tf.train.latest_checkpoint(VGG.model_path))
print("Model restored from: {}".format(VGG.model_path))
except:
print("Could not restore saved model")
while True:
images = next(images)
_, loss_value, summary = sess.run([loss, merged],
feed_dict={VGG.inputs: images})
if np.isnan(loss_value):
raise ValueError('Loss value is NaN')
if step % 10 == 0 and step > 0:
print('step {}: training loss {:.6f}'.format(step, loss_value))
save_path = saver.save(sess, model_name, global_step=global_step)
if loss_value <= 35:
print('step {}: training loss {:.6f}'.format(step, loss_value))
save_path = saver.save(sess, model_name, global_step=global_step)
break
step += 1
if __name__ == "__main__":
train_nn()
'''**