Question

我目前正在为我的系统实施初始v3。输入将是一批图像，并输出图像的标签。基本上，该模型将有一个初始模块，2个完全连接的层。然后，使用多标签S形交叉熵函数计算损失。最后因为我想在一小部分预先标签上训练物品，我会将损失乘以“过滤器重量”，因此750个标签中的90％将有0损失，其他标签将有正常损失。

  def build_inputs(self):
    """Input prefetching, preprocessing and batching.

    Outputs:
      self.images
      self.tag_captions
      self.highscore_captions (training and eval only)
      self.input_mask (training and eval only)
    """
    if self.mode == "inference":
      # In inference mode, images and inputs are fed via placeholders.
      image_feed = tf.placeholder(dtype=tf.string, shape=[], name="image_feed")
      tag_feed = tf.placeholder(dtype=tf.int64,
                                  shape=[None],  # batch_size
                                  name="tag_feed")

      # Process image and insert batch dimensions.
      images = tf.expand_dims(self.process_image(image_feed), 0)
      tag_filters = input_ops.convert_one_vector(
        tag_feed, 
        self.num_outputs, 
        name="pred_tag_filter_convert_one_hot_vector")

      # No target sequences or input mask in inference mode.
      highscore_caption_one_vectors = None
    else:
      # Prefetch serialized SequenceExample protos.
      input_queue = input_ops.prefetch_input_data(
          self.reader,
          self.config.input_file_pattern,
          is_training=self.is_training(),
          batch_size=self.config.batch_size,
          values_per_shard=self.config.values_per_input_shard,
          input_queue_capacity_factor=self.config.input_queue_capacity_factor,
          num_reader_threads=self.config.num_input_reader_threads)

      # Image processing and random distortion. Split across multiple threads
      # with each thread applying a slightly different distortion.
      assert self.config.num_preprocess_threads % 2 == 0
      images_and_captions = []
      for thread_id in range(self.config.num_preprocess_threads):
        serialized_sequence_example = input_queue.dequeue()
        encoded_image, tag_caption, highscore_caption = input_ops.parse_sequence_example(
            serialized_sequence_example,
            image_feature=self.config.image_feature_name,
            tag_caption_feature=self.config.tag_feature_name,
            highscore_caption_feature=self.config.highscore_feature_name)
        if len(tag_caption) >0 and len(highscore_caption) > 0:
          image = self.process_image(encoded_image, thread_id=thread_id)  
          images_and_captions.append([image, tag_caption, highscore_caption])

      # Batch inputs.
      queue_capacity = (2 * self.config.num_preprocess_threads *
                        self.config.batch_size)
      images, tag_filters, highscore_caption_one_vectors = (
          input_ops.batch_with_dynamic_pad(images_and_captions,
                                           num_outputs=self.num_outputs,
                                           batch_size=self.config.batch_size,
                                           queue_capacity=queue_capacity))

    self.images = images
    self.tag_filters = tag_filters
    self.targets = highscore_caption_one_vectors

  def build_model(self):
    """Builds the model.

    Inputs:
      self.seq_embeddings
      self.targets (training and eval only)
      self.input_mask (training and eval only)

    Outputs:
      self.total_loss (training and eval only)
      self.target_cross_entropy_losses (training and eval only)
      self.target_cross_entropy_loss_weights (training and eval only)
    """

    # Stack batches vertically.
    inception_output = image_embedding.inception_v3(
        self.images,
        trainable=True,
        is_training=self.is_training(),
        dropout_keep_prob=self.config.dropout_keep_prob)
    self.inception_variables = tf.get_collection(
        tf.GraphKeys.GLOBAL_VARIABLES, scope="InceptionV3")

    with tf.variable_scope("inception_fc_1") as fc_1_scope:
      fc_1_outputs = tf.contrib.layers.fully_connected(
          inputs=inception_output,
          num_outputs=self.config.fc_units,
          activation_fn=tf.nn.relu,
          weights_initializer=self.initializer,
          biases_initializer=None,
          scope=fc_1_scope)

    with tf.variable_scope("logits") as logits_scope:
      logits = tf.contrib.layers.fully_connected(
          inputs=fc_1_outputs,
          num_outputs=self.num_outputs,
          activation_fn=None,
          weights_initializer=self.initializer,
          scope=logits_scope)

    if self.mode == "inference":
      filtered_logits = tf.multiply(logits, self.tag_filters, name="filter_mul")      
      tf.nn.sigmoid(filtered_logits, name="sigmoid")
    else:
      logits = tf.reshape(logits, [-1])
      weights = tf.reshape(self.tag_filters, [-1])
      targets = tf.reshape(self.targets, [-1])
      targets = tf.cast(targets, tf.float32)
      # Compute losses.
      #Nan here if x is 0
      losses = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels=targets)

      #Nan here
      batch_loss = tf.div(tf.reduce_sum(tf.multiply(losses, weights)),
                          tf.reduce_sum(weights),
                          name="batch_loss")
      tf.contrib.losses.add_loss(batch_loss)
      total_loss = tf.contrib.losses.get_total_loss()

      # Add summaries.
      tf.summary.scalar("batch_loss", batch_loss)
      tf.summary.scalar("total_loss", total_loss)
      for var in tf.trainable_variables():
        tf.summary.histogram(var.op.name, var)

      self.total_loss = total_loss
      self.target_cross_entropy_losses = losses  # Used in evaluation.
      self.target_cross_entropy_loss_weights = weights  # Used in evaluation.

然而，该模型将运行几千步，然后它将打印出Nan以便丢失。然后大约100步之后，程序将停止。如果我从最后一个有效检查点重新开始训练，那么它将在最后一个Nan之后运行，但稍后会再次停止。

它还会在末尾（内部或外部初始模型）显示权重或偏差的Nan误差

我想知道为什么在这种情况下损失是楠。当我检查张量板时，我没有看到权重和偏差在纳米误差之前发生了很大变化。

自定义初始v3 sigmoid交叉熵的南错误

0 个答案: