Question

我用VGG预训练模型作为张量流中的编码器实现卷积自动编码器并计算构造损失，但由于形状不兼容，tf会话无法完成运行：[32,150528]与[32,301056]损失计算。尺寸的问题？

class VGG_Autoencoder(object):
    def __init__(self, model_path="vgg_saved_models/", learning_rate=1e-3):
        self.model_path = model_path
        self.learning_rate = learning_rate
        self.inputs = tf.placeholder(tf.float32, [None, 300, 300, 3], name='image')
        self.resized_image = tf.image.resize_images(self.inputs, [224, 224])

    def encoder(self, input):
        with tfs.arg_scope(nets.vgg.vgg_arg_scope()):
            _, layers = nets.vgg.vgg_16(inputs=input, is_training=False)
        fc7 = layers["vgg_16/fc7"]
        fc8 = tf.layers.dense(fc7, units=1024, activation=tf.nn.relu, name="fc8")
        fc8_flat = tf.layers.flatten(fc8)
        fc8_flat_dense = tf.layers.dense(fc8_flat, units=512, name="fc8_flatten_dense")
        return fc8_flat_dense

    def decoder(self, bottleneck):
        fc_bottleneck = tf.layers.dense(bottleneck, units=256, activation=tf.nn.relu, name="fc_bottleneck")
        bn_resh = tf.reshape(bottleneck, [-1, 4, 4, 16])
        conv_9 = tf.layers.conv2d_transpose(bn_resh, filters=256, kernel_size=3, strides=1, padding='same',
                                       activation=tf.nn.relu)
        conv_10 = tf.layers.conv2d_transpose(conv_9, filters=256, kernel_size=3, strides=1, padding='same',
                                       activation=tf.nn.relu)
        conv_11 = tf.layers.conv2d_transpose(conv_10, filters=128, kernel_size=3, strides=1, padding='same',
                                       activation=tf.nn.relu)
        conv_12 = tf.layers.conv2d_transpose(conv_11, filters=128, kernel_size=3, strides=2, padding='same',
                                       activation=tf.nn.relu)
        conv_13 = tf.layers.conv2d_transpose(conv_12, filters=64, kernel_size=3, strides=7, padding='same',
                                       activation=tf.nn.relu)
        conv_14 = tf.layers.conv2d_transpose(conv_13, filters=64, kernel_size=3, strides=1, padding='same',
                                       activation=tf.nn.relu)
        conv_15 = tf.layers.conv2d_transpose(conv_14, filters=32, kernel_size=3, strides=1, padding='same',
                                       activation=tf.nn.relu)
        conv_16 = tf.layers.conv2d_transpose(conv_15, filters=32, kernel_size=3, strides=2, padding='same',
                                       activation=tf.nn.relu)
        conv_17 = tf.layers.conv2d_transpose(conv_16, filters=16, kernel_size=3, strides=1, padding='same',
                                       activation=tf.nn.relu)
        conv_18 = tf.layers.conv2d_transpose(conv_17, filters=16, kernel_size=3, strides=1, padding='same',
                                       activation=tf.nn.relu)
        conv_19 = tf.layers.conv2d_transpose(conv_18, filters=3, kernel_size=3, strides=2, padding='same',
                                       activation=None)
        #output = tf.image.resize_images(conv_18, [224, 224])

        return conv_19

    def loss(self, input_image, reconstructed_image):
        tf.Print(input_image, [tf.shape(input_image)])
        tf.Print(reconstructed_image, [tf.shape(reconstructed_image)])

        input_image_ = tf.reshape(input_image, [32, -1])
        reconstructed_image_ = tf.reshape(reconstructed_image, [32, -1])
        reconstruction_loss = tf.reduce_sum(tf.square(input_image_ - reconstructed_image_), axis=1)

        loss = tf.reduce_mean(reconstruction_loss)
        return loss

    def optmizer(self, loss, learning_rate, global_step):
        restore_fn = tf.contrib.framework.get_variables_to_restore
        fc7_variables = restore_fn(exclude=["vgg_16/fc8"])
        sub_variables = fc7_variables[29:]
        init_var = tf.variables_initializer(sub_variables)
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        train_op = optimizer.minimize(loss, var_list=sub_variables, global_step=global_step, name="fcn_train_op")

    def summary(self, input_image, reconstructed_image, loss):
        input_resized_image = tf.summary.image('resized_image', input_image, 20)
        output_resized_image = tf.summary.image('constructed_image', reconstructed_image, 20)
        loss_summary = tf.summary.scalar("loss", loss)
        merged = tf.summary.merge_all()
        return merged

    def data_iterator(self, batch_size):
        data_files = glob.glob('/home/WIN-UNI-DUE/sjmonagi/Desktop/dataset/Train/**/VAE_FloorPlan*', recursive=True)
        while True:
            data = np.load(random.sample(data_files, 1)[0])
            np.random.shuffle(data)
            np.random.shuffle(data)
            N = data.shape[0]
            start = np.random.randint(0, N - batch_size)
            yield data[start:start + batch_size]

def train_nn():

        sess = tf.InteractiveSession()
        global_step = tf.Variable(0, name='global_step', trainable=False)

        writer = tf.summary.FileWriter('logdir_vgg')

        VGG = VGG_Autoencoder()
        model_name = VGG.model_path + 'model'
        images = VGG.data_iterator(32)
        bottle_neck = VGG.encoder(VGG.resized_image)
        output_images = VGG.decoder(bottle_neck)
        loss = VGG.loss(VGG.resized_image, output_images)
        VGG.optmizer(loss, VGG.learning_rate, global_step=global_step)
        merged = VGG.summary(VGG.resized_image, output_images, loss)
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(max_to_keep=1)
        step = global_step.eval()
        try:
            saver.restore(sess, tf.train.latest_checkpoint(VGG.model_path))
            print("Model restored from: {}".format(VGG.model_path))
        except:
            print("Could not restore saved model")

        while True:
            images = next(images)
            _, loss_value, summary = sess.run([loss, merged],
                                              feed_dict={VGG.inputs: images})

            if np.isnan(loss_value):
                raise ValueError('Loss value is NaN')
            if step % 10 == 0 and step > 0:
                print('step {}: training loss {:.6f}'.format(step, loss_value))
                save_path = saver.save(sess, model_name, global_step=global_step)
            if loss_value <= 35:
                print('step {}: training loss {:.6f}'.format(step, loss_value))
                save_path = saver.save(sess, model_name, global_step=global_step)
                break
            step += 1


if __name__ == "__main__":
    train_nn()
'''**

使用VGG预训练模型作为张量流中的编码器实现卷积自动编码器

0 个答案: