我目前正在为我的系统实施初始v3。输入将是一批图像,并输出图像的标签。基本上,该模型将有一个初始模块,2个完全连接的层。然后,使用多标签S形交叉熵函数计算损失。最后因为我想在一小部分预先标签上训练物品,我会将损失乘以“过滤器重量”,因此750个标签中的90%将有0损失,其他标签将有正常损失。
def build_inputs(self):
"""Input prefetching, preprocessing and batching.
Outputs:
self.images
self.tag_captions
self.highscore_captions (training and eval only)
self.input_mask (training and eval only)
"""
if self.mode == "inference":
# In inference mode, images and inputs are fed via placeholders.
image_feed = tf.placeholder(dtype=tf.string, shape=[], name="image_feed")
tag_feed = tf.placeholder(dtype=tf.int64,
shape=[None], # batch_size
name="tag_feed")
# Process image and insert batch dimensions.
images = tf.expand_dims(self.process_image(image_feed), 0)
tag_filters = input_ops.convert_one_vector(
tag_feed,
self.num_outputs,
name="pred_tag_filter_convert_one_hot_vector")
# No target sequences or input mask in inference mode.
highscore_caption_one_vectors = None
else:
# Prefetch serialized SequenceExample protos.
input_queue = input_ops.prefetch_input_data(
self.reader,
self.config.input_file_pattern,
is_training=self.is_training(),
batch_size=self.config.batch_size,
values_per_shard=self.config.values_per_input_shard,
input_queue_capacity_factor=self.config.input_queue_capacity_factor,
num_reader_threads=self.config.num_input_reader_threads)
# Image processing and random distortion. Split across multiple threads
# with each thread applying a slightly different distortion.
assert self.config.num_preprocess_threads % 2 == 0
images_and_captions = []
for thread_id in range(self.config.num_preprocess_threads):
serialized_sequence_example = input_queue.dequeue()
encoded_image, tag_caption, highscore_caption = input_ops.parse_sequence_example(
serialized_sequence_example,
image_feature=self.config.image_feature_name,
tag_caption_feature=self.config.tag_feature_name,
highscore_caption_feature=self.config.highscore_feature_name)
if len(tag_caption) >0 and len(highscore_caption) > 0:
image = self.process_image(encoded_image, thread_id=thread_id)
images_and_captions.append([image, tag_caption, highscore_caption])
# Batch inputs.
queue_capacity = (2 * self.config.num_preprocess_threads *
self.config.batch_size)
images, tag_filters, highscore_caption_one_vectors = (
input_ops.batch_with_dynamic_pad(images_and_captions,
num_outputs=self.num_outputs,
batch_size=self.config.batch_size,
queue_capacity=queue_capacity))
self.images = images
self.tag_filters = tag_filters
self.targets = highscore_caption_one_vectors
def build_model(self):
"""Builds the model.
Inputs:
self.seq_embeddings
self.targets (training and eval only)
self.input_mask (training and eval only)
Outputs:
self.total_loss (training and eval only)
self.target_cross_entropy_losses (training and eval only)
self.target_cross_entropy_loss_weights (training and eval only)
"""
# Stack batches vertically.
inception_output = image_embedding.inception_v3(
self.images,
trainable=True,
is_training=self.is_training(),
dropout_keep_prob=self.config.dropout_keep_prob)
self.inception_variables = tf.get_collection(
tf.GraphKeys.GLOBAL_VARIABLES, scope="InceptionV3")
with tf.variable_scope("inception_fc_1") as fc_1_scope:
fc_1_outputs = tf.contrib.layers.fully_connected(
inputs=inception_output,
num_outputs=self.config.fc_units,
activation_fn=tf.nn.relu,
weights_initializer=self.initializer,
biases_initializer=None,
scope=fc_1_scope)
with tf.variable_scope("logits") as logits_scope:
logits = tf.contrib.layers.fully_connected(
inputs=fc_1_outputs,
num_outputs=self.num_outputs,
activation_fn=None,
weights_initializer=self.initializer,
scope=logits_scope)
if self.mode == "inference":
filtered_logits = tf.multiply(logits, self.tag_filters, name="filter_mul")
tf.nn.sigmoid(filtered_logits, name="sigmoid")
else:
logits = tf.reshape(logits, [-1])
weights = tf.reshape(self.tag_filters, [-1])
targets = tf.reshape(self.targets, [-1])
targets = tf.cast(targets, tf.float32)
# Compute losses.
#Nan here if x is 0
losses = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=targets)
#Nan here
batch_loss = tf.div(tf.reduce_sum(tf.multiply(losses, weights)),
tf.reduce_sum(weights),
name="batch_loss")
tf.contrib.losses.add_loss(batch_loss)
total_loss = tf.contrib.losses.get_total_loss()
# Add summaries.
tf.summary.scalar("batch_loss", batch_loss)
tf.summary.scalar("total_loss", total_loss)
for var in tf.trainable_variables():
tf.summary.histogram(var.op.name, var)
self.total_loss = total_loss
self.target_cross_entropy_losses = losses # Used in evaluation.
self.target_cross_entropy_loss_weights = weights # Used in evaluation.
然而,该模型将运行几千步,然后它将打印出Nan以便丢失。然后大约100步之后,程序将停止。如果我从最后一个有效检查点重新开始训练,那么它将在最后一个Nan之后运行,但稍后会再次停止。
它还会在末尾(内部或外部初始模型)显示权重或偏差的Nan误差
我想知道为什么在这种情况下损失是楠。当我检查张量板时,我没有看到权重和偏差在纳米误差之前发生了很大变化。