这是我的代码,据我所知,我尝试打印验证损失并且它是恒定的,但确实知道为什么我检查了太多的代码示例并且没有将训练操作传递给tensorflow会话验证时我不应该更新权重,我在这里缺少什么 将tensorflow作为tf导入 将numpy导入为np 导入glob,随机,操作系统
# tf.logging.set_verbosity(tf.logging.ERROR)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
model_path = "saved_models/"
model_name = model_path + 'model'
training_dataset_path = "Dataset/training"
validation_dataset_path = "Dataset/validation"
training_iter = 100000
validation_iter = 1000
total_t = 0
validation_period = 5000
class Network(object):
# Create model
def __init__(self):
self.image = tf.placeholder(tf.float32, [None, 300, 300, 3], name='image')
self.resized_image = tf.image.resize_images(self.image, [256, 256])
self.normalized_image = tf.image.per_image_standardization(self.resized_image)
tf.summary.image('resized', self.normalized_image, 20)
self.save_path = 'AE_model/ae.ckpt'
self.z_mu = self.encoder(self.normalized_image)
self.reconstructions = self.decoder(self.z_mu)
tf.summary.image('reconstructed_normalized_image', self.reconstructions, 20)
tf.summary.histogram("reconstructed", self.reconstructions)
self.loss = self.compute_loss()
tf.summary.scalar('loss', self.loss)
self.merged = tf.summary.merge_all()
def encoder(self, x):
conv_1 = tf.layers.conv2d(x, filters=4, kernel_size=3, strides=2, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_1_1 = tf.layers.conv2d(conv_1, filters=4, kernel_size=3, strides=1, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_2 = tf.layers.conv2d(conv_1_1, filters=8, kernel_size=3, strides=2, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_2_1 = tf.layers.conv2d(conv_2, filters=8, kernel_size=3, strides=1, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_2_2 = tf.layers.conv2d(conv_2_1, filters=8, kernel_size=3, strides=1, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_3 = tf.layers.conv2d(conv_2_2, filters=16, kernel_size=3, strides=2, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_3_1 = tf.layers.conv2d(conv_3, filters=16, kernel_size=3, strides=1, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_4 = tf.layers.conv2d(conv_3_1, filters=32, kernel_size=3, strides=2, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_4_1 = tf.layers.conv2d(conv_4, filters=32, kernel_size=3, strides=1, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_4_2 = tf.layers.conv2d(conv_4_1, filters=32, kernel_size=3, strides=1, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_5 = tf.layers.conv2d(conv_4_2, filters=64, kernel_size=3, strides=2, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_5_1 = tf.layers.conv2d(conv_5, filters=64, kernel_size=3, strides=1, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_6 = tf.layers.conv2d(conv_5_1, filters=128, kernel_size=3, strides=1, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_6_1 = tf.layers.conv2d(conv_6, filters=128, kernel_size=3, strides=1, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
x = tf.layers.flatten(conv_6_1)
z = tf.layers.dense(x, units=512, name='z_mu')
return z
def decoder(self, z):
x = tf.layers.dense(z, 1024, activation=tf.nn.leaky_relu) # 65536
x = tf.reshape(x, [-1, 8, 8, 16])
conv_8 = tf.layers.conv2d_transpose(x, filters=256, kernel_size=3, strides=2, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_8_1 = tf.layers.conv2d_transpose(conv_8, filters=256, kernel_size=3, strides=1, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_9 = tf.layers.conv2d_transpose(conv_8_1, filters=128, kernel_size=3, strides=2, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_9_1 = tf.layers.conv2d_transpose(conv_9, filters=128, kernel_size=3, strides=1, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_9_2 = tf.layers.conv2d_transpose(conv_9_1, filters=128, kernel_size=3, strides=1, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_10 = tf.layers.conv2d_transpose(conv_9_2, filters=64, kernel_size=3, strides=2, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_10_1 = tf.layers.conv2d_transpose(conv_10, filters=64, kernel_size=3, strides=1, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_11 = tf.layers.conv2d_transpose(conv_10_1, filters=32, kernel_size=3, strides=2, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_11_1 = tf.layers.conv2d_transpose(conv_11, filters=32, kernel_size=3, strides=1, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_11_2 = tf.layers.conv2d_transpose(conv_11_1, filters=32, kernel_size=3, strides=1, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_12 = tf.layers.conv2d_transpose(conv_11_2, filters=16, kernel_size=3, strides=2, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_12_1 = tf.layers.conv2d_transpose(conv_12, filters=16, kernel_size=3, strides=1, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_13 = tf.layers.conv2d_transpose(conv_12_1, filters=3, kernel_size=3, strides=1, padding='same',
activation=tf.nn.leaky_relu,
kernel_initializer=tf.initializers.he_normal())
conv_13_1 = tf.layers.conv2d_transpose(conv_13, filters=3, kernel_size=3, strides=1, padding='same',
activation=None,
kernel_initializer=tf.initializers.he_normal())
return conv_13_1
def compute_loss(self):
batch_shape = tf.shape(self.normalized_image)[0]
logits_flat = tf.reshape(self.reconstructions, [batch_shape, -1])
labels_flat = tf.reshape(self.normalized_image, [batch_shape, -1])
reconstruction_loss = tf.reduce_sum(tf.square(logits_flat - labels_flat), axis=1)
vae_loss = tf.reduce_mean(reconstruction_loss)
return vae_loss
def load(self, sess):
self.saver = tf.train.Saver(tf.global_variables())
load_was_success = True
try:
save_dir = '/'.join(self.save_path.split('/')[:-1])
ckpt = tf.train.get_checkpoint_state(save_dir)
load_path = ckpt.model_checkpoint_path
self.saver.restore(sess, load_path)
except:
print("no saved model to load. starting new session")
load_was_success = False
else:
print("loaded model: {}".format(load_path))
saver = tf.train.Saver(tf.global_variables())
episode_number = int(load_path.split('-')[-1])
def save(self, sess, n):
self.saver.save(sess, self.save_path, global_step=n)
print("SAVED MODEL #{}".format(n))
def data_iterator(batch_size, path):
data_files = glob.glob(path + '/**/VAE_FloorPlan*', recursive=True)
while True:
data = np.load(random.sample(data_files, 1)[0])
np.random.shuffle(data)
np.random.shuffle(data)
N = data.shape[0]
start = np.random.randint(0, N - batch_size)
yield data[start:start + batch_size]
def train_vae():
global_step = tf.Variable(0, name='global_step', trainable=False)
with tf.Session() as sess:
loss_writer = tf.summary.FileWriter('logdir_AE/train', sess.graph)
validation_writer = tf.summary.FileWriter('logdir_AE/validation')
network = Network()
train_op = tf.train.AdamOptimizer(1e-3).minimize(network.loss, global_step=global_step)
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver(max_to_keep=1)
step = global_step.eval()
print(step)
training_data = data_iterator(batch_size=8, path=training_dataset_path)
validation_data = data_iterator(batch_size=4, path=validation_dataset_path)
try:
network.load(sess=sess)
except:
print("Could not restore saved model")
for step in range(training_iter):
training_images = next(training_data)
_, training_loss, loss_summary = sess.run([train_op, network.loss, network.merged],
feed_dict={network.image: training_images})
# print("reconstructed",network.reconstructions.eval(feed_dict={network.image: training_images}))
# print("input resized",network.normalized_image.eval(feed_dict={network.image: training_images}))
# print("input",network.image.eval(feed_dict={network.image: training_images}))
loss_writer.add_summary(loss_summary, step)
if np.isnan(training_loss):
raise ValueError('Loss value is NaN')
if step % 10 == 0 and step > 0:
print('step {}: training loss {:.6f}'.format(step, training_loss))
if step % 1000 == 0 and step > 0:
network.save(sess=sess, n=step)
if step % 1000 == 0: # validation
print("validation")
for i in range(validation_iter):
validation_images = next(validation_data)
validation_loss, validation_summary = sess.run([network.loss, network.merged],
feed_dict={network.image: validation_images})
validation_writer.add_summary(validation_summary, step)
print('step {}: Validation loss {:.6f}'.format(step, training_loss))
if training_loss <= 35:
print('step {}: training loss {:.6f}'.format(step, training_loss))
network.save(sess=sess, n=step)
break
step += 1
def load_vae():
graph = tf.Graph()
with graph.as_default():
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config, graph=graph)
network = Network()
init = tf.global_variables_initializer()
sess.run(init)
saver = tf.train.Saver(max_to_keep=1)
training_data = data_iterator(batch_size=32)
try:
saver.restore(sess, tf.train.latest_checkpoint(model_path))
except:
raise ImportError("Could not restore saved model")
return sess, network
if __name__ == "__main__":
train_vae()