在以下代码中,我想在张量流中训练模型。该模型是一个ResNet模型,是一个深层模型,因此该批处理应该很小,以便数据/所有激活都适合内存。由于这个原因,我实现了一个自定义优化器,该优化器可以在不同的进料微型批次上累积梯度,最后一次应用梯度下降。另外,我使用了tf.data
api从创建的tfrecords
中获取数据。请注意,我的输入数据是视频帧;所使用的检测到的变量表示在特定帧中是否检测到面部。因此,检测到的仅用于MSE
(仅供说明)。
import tensorflow as tf
import numpy as np
import csv
import os
num_epoch = 100
latent_dim = 100
cell_size = 100
# for each input frame, I have 3 outputs.
num_classes = 3
common = "C:/Users/user/Documents/SEWA_db/tfrecords_db/"
filenames_train = []
filenames_dev = []
for i in range(1, 35):
filenames_train.append(common + "Train_DE_{num:02d}.tfrecords".format(num=i))
for i in range(1, 15):
filenames_dev.append(common + "Devel_DE_{num:02d}.tfrecords".format(num=i))
phase_train = tf.placeholder_with_default(True, shape=(), name='phase')
train_batch_size = 5
test_batch_size = 5
tf.set_random_seed(123)
mseed = 123
# this method is used within the model()...
def create_variables(name, shape, initializer=tf.contrib.layers.xavier_initializer(), weight_decay=0.0001):
'''
:param name: A string. The name of the new variable
:param shape: A list of dimensions
:param initializer: User Xavier as default.
:param is_fc_layer: Want to create fc layer variable? May use different weight_decay for fc
layers.
:return: The created variable
'''
## TODO: to allow different weight decay to fully connected layer and conv layer
regularizer = tf.contrib.layers.l2_regularizer(scale=weight_decay)
new_variables = tf.get_variable(name, shape=shape, initializer=initializer,
regularizer=regularizer)
return new_variables
def model(inputs, n):
....
# predictions shape: (batch_size, 3)
return predictions
# loss function:
summaries_while_testing = []
summaries_while_training = []
def loss(predictions, labels, detected, name_scope, train_test):
# MSE
with tf.name_scope(name_scope):
MSE = tf.square(tf.subtract(predictions, labels))
MSE = tf.boolean_mask(MSE, detected)
MSE = tf.reduce_mean(MSE)
if train_test == 'Train':
reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
MSE += tf.reduce_sum(reg_losses)
loss_s = tf.summary.scalar('MSE', MSE)
summaries_while_training.append(loss_s)
else:
loss_s = tf.summary.scalar('MSE', MSE)
summaries_while_testing.append(loss_s)
return MSE
# optimizer:
def optimize(mse):
with tf.name_scope('Optimizer'):
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
trainable_variables = tf.trainable_variables()
accum_vars = [tf.Variable(tf.zeros_like(single_tr_variable.value()), trainable=False)
for single_tr_variable in trainable_variables]
# This is used as a rest mode between different training iterations...
zero_ops = [tv.assign(tf.zeros_like(tv)) for tv in accum_vars]
grads_vars = optimizer.compute_gradients(mse, trainable_variables)
accum_ops = [accum_vars[i].assign_add(gv[0]) for i, gv in enumerate(grads_vars) if gv[0] is not None]
train_step = optimizer.apply_gradients([(accum_vars[i], gv[1]) for i, gv in enumerate(grads_vars)])
return train_step, accum_ops, zero_ops
# retrieve data section
def _parse_function(example_proto):
# The annotation contains the following features: timestamp; arousal; valence; liking
features = {
'height': tf.FixedLenFeature([], tf.int64),
'width': tf.FixedLenFeature([], tf.int64),
'image_raw': tf.FixedLenFeature([], tf.string),
'frame_number': tf.FixedLenFeature([1], tf.int64),
'detected': tf.FixedLenFeature([1], tf.int64),
'arousal': tf.FixedLenFeature([1], tf.float32),
'valence': tf.FixedLenFeature([1], tf.float32),
'liking': tf.FixedLenFeature([1], tf.float32)
}
parsed_features = tf.parse_single_example(example_proto, features)
# This is how we create one example, that is, extract one example from the database.
image = tf.decode_raw(parsed_features['image_raw'], tf.uint8)
# The height and the weights are used to
height = tf.cast(parsed_features['height'], tf.int32)
width = tf.cast(parsed_features['width'], tf.int32)
# The image is reshaped since when stored as a binary format, it is flattened. Therefore, we need the
# height and the weight to restore the original image back.
# Tensor("Reshape:0", shape=(112, 112, 3), dtype=uint8)
image = tf.reshape(image, [112, 112, 3])
detected = parsed_features['detected']
arousal = parsed_features['arousal']
valence = parsed_features['valence']
liking = parsed_features['liking']
return detected, arousal, valence, liking, image
############################### TRAINING ###################################
datasets_train_iterators = []
for file_name in filenames_train:
dataset_train = tf.data.TFRecordDataset(file_name).map(_parse_function).batch(train_batch_size)
datasets_train_iterators.append(dataset_train)
dataset_train_all = tf.data.Dataset.zip(tuple(datasets_train_iterators))
iterator_train_all = dataset_train_all.make_initializable_iterator()
def retrieve_inputs_train():
next_batch = iterator_train_all.get_next()
detected = []
arousal = []
valence = []
liking = []
images = []
for n in next_batch:
detected.append(n[0])
arousal.append(n[1])
valence.append(n[2])
liking.append(n[3])
images.append(n[4])
detected = tf.concat(detected, axis=0)
arousal = tf.concat(arousal, axis=0)
valence = tf.concat(valence, axis=0)
liking = tf.concat(liking, axis=0)
images = tf.concat(images, axis=0)
return detected, arousal, valence, liking, images
############################### TESTING ###################################
datasets_dev_iterators = []
for file_name in filenames_dev:
dataset_dev = tf.data.TFRecordDataset(file_name).map(_parse_function).batch(test_batch_size)
datasets_dev_iterators.append(dataset_dev)
dataset_dev_all = tf.data.Dataset.zip(tuple(datasets_dev_iterators))
iterator_dev_all = dataset_dev_all.make_initializable_iterator()
def retrieve_inputs_dev():
next_batch = iterator_dev_all.get_next()
detected = []
arousal = []
valence = []
liking = []
images = []
for n in next_batch:
detected.append(n[0])
arousal.append(n[1])
valence.append(n[2])
liking.append(n[3])
images.append(n[4])
detected = tf.concat(detected, axis=0)
arousal = tf.concat(arousal, axis=0)
valence = tf.concat(valence, axis=0)
liking = tf.concat(liking, axis=0)
images = tf.concat(images, axis=0)
return detected, arousal, valence, liking, images
# preparing model before training
detected, arousal, valence, liking, images = tf.cond(phase_train,
lambda: retrieve_inputs_train(),
lambda: retrieve_inputs_dev())
images_casted = tf.cast(images, tf.float32)
with tf.name_scope('image_normal'):
images_casted_normalized = tf.map_fn(lambda img: tf.image.per_image_standardization(img), images_casted)
# shape of predictions: (680, 3) -> 3 since we are outputing arousal, valence and liking
# the n parameter is for Resnet configuration... Not important for now
predictions = model(images_casted_normalized, n=[3, 4, 6, 3])
predicted_arousal = tf.slice(predictions, begin=[0, 0], size=[-1, 1], name='predicted_arousal')
predicted_valence = tf.slice(predictions, begin=[0, 1], size=[-1, 1], name='predicted_valence')
predicted_liking = tf.slice(predictions, begin=[0, 2], size=[-1, 1], name='predicted_liking')
MSE_a = tf.cond(phase_train,
lambda: loss(predicted_arousal, arousal, detected, 'MSE_arousal_Train', 'Train'),
lambda: loss(predicted_arousal, arousal, detected, 'MSE_arousal_Devel', 'Devel'))
MSE_v = tf.cond(phase_train,
lambda: loss(predicted_valence, valence, detected, 'MSE_valence_Train', 'Train'),
lambda: loss(predicted_valence, valence, detected, 'MSE_valence_Devel', 'Devel'))
MSE_l = tf.cond(phase_train,
lambda: loss(predicted_liking, liking, detected, 'MSE_liking_Train', 'Train'),
lambda: loss(predicted_liking, liking, detected, 'MSE_liking_Devel', 'Devel'))
MSE = MSE_a + MSE_v + MSE_l
train_step, accum_ops, zero_ops = optimize(MSE)
init_op = tf.global_variables_initializer()
model_path = "C:/Users/user/Documents/f24/model"
events_path = "C:/Users/user/Documents/f24/event_files/34_layers"
with tf.Session() as sess:
sess.run(init_op)
train_writer = tf.summary.FileWriter(events_path, sess.graph)
merged_train = tf.summary.merge(summaries_while_training)
merged_val = tf.summary.merge(summaries_while_testing)
sess.run(iterator_train_all.initializer)
sess.run(iterator_dev_all.initializer)
最后,出现以下错误:
FailedPreconditionError: Attempting to use uninitialized value conv3_1/conv2_in_block/conv
[[Node: conv3_1/conv2_in_block/conv/read = Identity[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](conv3_1/conv2_in_block/conv)]]
During handling of the above exception, another exception occurred:
FailedPreconditionError Traceback (most recent call last)
<ipython-input-11-dbe6d12c67ce> in <module>()
7
8 for v in accum_vars:
----> 9 sess.run(v.initializer)
10
11 sess.run(init_op)
...
File "<ipython-input-10-8d7d7b4aa814>", line 10, in <module>
predictions = model(images_casted_normalized, n=[3, 4, 6, 3])
File "<ipython-input-5-fae307f9536f>", line 25, in model
conv3 = residual_block(layers[-1], 256, is_training=phase_train)
File "<ipython-input-4-d8a2d1403f18>", line 97, in residual_block
conv2 = bn_relu_conv_layer(conv1, [3, 3, output_channel, output_channel], 1, is_training=is_training)
File "<ipython-input-4-d8a2d1403f18>", line 61, in bn_relu_conv_layer
filter = create_variables(name='conv', shape=filter_shape)
File "<ipython-input-4-d8a2d1403f18>", line 15, in create_variables
regularizer=regularizer)
File "C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 1317, in get_variable
constraint=constraint)
现在,当我在optimize()
中删除这两行时,我的代码可以正常工作,但是我知道这是错误的。
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
或者,如果我将以下代码用于优化程序,则我的代码可以正常运行。
def optimize(mse):
with tf.name_scope('Optimizer'):
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_step = optimizer.minimize(mse)
return train_step
对我来说,这很奇怪和奇怪。我很想知道出现错误的原因。
非常感谢您的帮助!