Question

我只在training（）中将我的损失概括为'xentropy_mean'，但是在张量板中，我没有找到'xentropy_mean'图表，但我没有找到许多其他图表。我不知道我写错了什么，确实是什么问题。是因为我在我的代码中使用线程吗？如果我不使用线程，我该怎么写呢？

The tensorboard screenshot 队列下有6个图表，我不知道是什么含义

我在下面的文件中创建模型

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math

import tensorflow.python.platform
import tensorflow as tf

# The MNIST dataset has 10 classes, representing the digits 0 through 9.
NUM_CLASSES = 16

# The MNIST images are always 28x28 pixels.
IMAGE_SIZE = 28
IMAGE_PIXELS = 784


def inference(images, hidden1_units, hidden2_units):
  """Build the MNIST model up to where it may be used for inference.

  Args:
    images: Images placeholder, from inputs().
    hidden1_units: Size of the first hidden layer.
    hidden2_units: Size of the second hidden layer.

  Returns:
    softmax_linear: Output tensor with the computed logits.
  """
  # Hidden 1
  with tf.name_scope('hidden1'):
    weights = tf.Variable(
        tf.truncated_normal([IMAGE_PIXELS, hidden1_units],
                            stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
        name='weights')
    biases = tf.Variable(tf.zeros([hidden1_units]),
                         name='biases')
    hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
  # Hidden 2
  with tf.name_scope('hidden2'):
    weights = tf.Variable(
        tf.truncated_normal([hidden1_units, hidden2_units],
                            stddev=1.0 / math.sqrt(float(hidden1_units))),
        name='weights')
    biases = tf.Variable(tf.zeros([hidden2_units]),
                         name='biases')
    hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
  # Linear
  with tf.name_scope('softmax_linear'):
    weights = tf.Variable(
        tf.truncated_normal([hidden2_units, NUM_CLASSES],
                            stddev=1.0 / math.sqrt(float(hidden2_units))),
        name='weights')
    biases = tf.Variable(tf.zeros([NUM_CLASSES]),
                         name='biases')
    logits = tf.matmul(hidden2, weights) + biases
  return logits


def loss(logits, labels):

  batch_size = tf.size(labels)
  #print('batch size %d' %(batch_size))
  labels = tf.expand_dims(labels, 1)
  indices = tf.expand_dims(tf.range(0, batch_size), 1)
  concated = tf.concat(1, [indices, labels])
  #print('Done2')
  onehot_labels = tf.sparse_to_dense(
      concated, tf.pack([batch_size, 16]), 1.0, 0.0)
  #print('Done1')
  cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits,
                                                          onehot_labels,
                                                          name='xentropy')
  loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
  tf.summary.scalar(loss.op.name, loss)

  return loss


def training(loss, learning_rate):
  optimizer=tf.train.GradientDescentOptimizer(learning_rate)
  global_step=tf.Variable(0,name='global_step',trainable=False)
  train_op = optimizer.minimize(loss, global_step=global_step)

  return train_op


def evaluation(logits, labels):


  correct = tf.nn.in_top_k(logits, labels, 1)
  # Return the number of true entries.
  return tf.reduce_sum(tf.cast(correct, tf.int32))

并在此文件中训练模型：

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import os.path
import sys
import time
import numpy as np

import tensorflow as tf

import mnist


# Basic model parameters as external flags.
#FLAGS = None

# Constants used for dealing with the files, matches convert_to_records.
TRAIN_FILE = 'train.tfrecords'
VALIDATION_FILE = 'validation.tfrecords'
TEST_FILE='test.tfrecords'


flags = tf.app.flags
FLAGS = flags.FLAGS
#FLAGS = None
flags.DEFINE_string('train_dir', '/home/queenie/image2tfrecord/tfrecords-28-gray/', 'Directory to put the training data.')
flags.DEFINE_string('filename', 'train.tfrecords', 'Directory to put the training data.')
flags.DEFINE_integer('batch_size', 100, 'Batch size.  '
                     'Must divide evenly into the dataset sizes.')
flags.DEFINE_integer('num_epochs', None, 'Batch size.  '
                     'Must divide evenly into the dataset sizes.')
flags.DEFINE_integer('hidden1', 128,'balabala')
flags.DEFINE_integer('hidden2', 32,'balabala')
flags.DEFINE_integer('learning_rate', 0.01,'balabala')
flags.DEFINE_integer('max_steps', 50000,'balabala')


def placeholder_inputs(batch_size):
  images_placeholder=tf.placeholder(tf.float32,shape=(batch_size,mnist.IMAGE_PIXELS))
  labels_placeholder=tf.placeholder(tf.int32,shape=(batch_size))
  return images_placeholder,labels_placeholder

def fill_feed_dict(images_feed,labels_feed,images_pl,labels_pl):

  feed_dict={
  images_pl:images_feed,
  labels_pl:labels_feed,
  }
  return feed_dict

def read_and_decode(filename_queue):
  reader = tf.TFRecordReader()
  _, serialized_example = reader.read(filename_queue)
  features = tf.parse_single_example(
      serialized_example,
      # Defaults are not specified since both keys are required.
      features={
          'image_raw': tf.FixedLenFeature([], tf.string),
          'label': tf.FixedLenFeature([], tf.int64),
      })

  # Convert from a scalar string tensor (whose single string has
  # length mnist.IMAGE_PIXELS) to a uint8 tensor with shape
  # [mnist.IMAGE_PIXELS].
  image = tf.decode_raw(features['image_raw'], tf.uint8)
  image.set_shape([mnist.IMAGE_PIXELS])

  # OPTIONAL: Could reshape into a 28x28 image and apply distortions
  # here.  Since we are not applying any distortions in this
  # example, and the next step expects the image to be flattened
  # into a vector, we don't bother.

  # Convert from [0, 255] -> [-0.5, 0.5] floats.
  image = tf.cast(image, tf.float32) * (1. / 255) - 0.5

  # Convert label from a scalar uint8 tensor to an int32 scalar.
  label = tf.cast(features['label'], tf.int32)

  return image, label


def do_eval(sess,eval_correct):
    true_count=0
    for step in xrange(FLAGS.batch_size):
        #print(sess.run(eval_correct))
        true_count+=sess.run(eval_correct)

    precision=float(true_count)/FLAGS.batch_size/FLAGS.batch_size
    print('  Num examples: %d  Num correct: %d  Precision @ 1: %0.04f' %
(FLAGS.batch_size, true_count, precision))
    return precision


def inputs(train, batch_size, num_epochs):

  if not num_epochs: num_epochs = None
  if train=='train':
    filename=os.path.join(FLAGS.train_dir,TRAIN_FILE)
  elif train=='validation':
    filename=os.path.join(FLAGS.train_dir,VALIDATION_FILE)
  else:
    filename=os.path.join(FLAGS.train_dir,TEST_FILE)


  # filename = os.path.join(FLAGS.train_dir,
  #                         TRAIN_FILE if train else VALIDATION_FILE)

  with tf.name_scope('input'):
    filename_queue = tf.train.string_input_producer(
        [filename], num_epochs=None)

    # Even when reading in multiple threads, share the filename
    # queue.
    image, label = read_and_decode(filename_queue)

    # Shuffle the examples and collect them into batch_size batches.
    # (Internally uses a RandomShuffleQueue.)
    # We run this in two threads to avoid being a bottleneck.
    images, sparse_labels = tf.train.shuffle_batch(
        [image, label], batch_size=batch_size, num_threads=2,
        capacity=1000 + 3 * batch_size,
        # Ensures a minimum amount of shuffling of examples.
        min_after_dequeue=1000)

    return images, sparse_labels


def run_training():



  with tf.Graph().as_default():



    # Build a Graph that computes predictions from the inference model.
    images, labels = inputs(train='train', batch_size=FLAGS.batch_size,
                            num_epochs=FLAGS.num_epochs)

    images_valid,labels_valid=inputs(train='validation', batch_size=FLAGS.batch_size,
                             num_epochs=FLAGS.num_epochs)

    images_test,labels_test=inputs(train='test', batch_size=FLAGS.batch_size,
                             num_epochs=FLAGS.num_epochs)

    logits = mnist.inference(images,
                             FLAGS.hidden1,
                             FLAGS.hidden2)
    # Add to the Graph the loss calculation.

    valid_prediction=mnist.inference(images_valid,FLAGS.hidden1,FLAGS.hidden2)

    test_prediction=mnist.inference(images_test,FLAGS.hidden1,FLAGS.hidden2)



    loss = mnist.loss(logits, labels)

    # Add to the Graph operations that train the model.
    train_op = mnist.training(loss, FLAGS.learning_rate)

    eval_correct=mnist.evaluation(logits,labels)

    eval_correct_valid=mnist.evaluation(valid_prediction,labels_valid)

    eval_correct_test=mnist.evaluation(test_prediction,labels_test)

    summary_op=tf.merge_all_summaries()
    # The op for initializing the variables.
    init_op = tf.group(tf.initialize_all_variables(),
                       tf.initialize_local_variables())

    saver = tf.train.Saver()
    # Create a session for running operations in the Graph.
    sess = tf.Session()


    # Initialize the variables (the trained variables and the
    # epoch counter).
    sess.run(init_op)

    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph)

    # Start input enqueue threads.
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    try:
      step = 0
      train_precision=0
      validation_precision=0
      test_precision=0
      #while not coord.should_stop():
      while not coord.should_stop():
        start_time = time.time()


        _, loss_value,images_see,labels_see = sess.run([train_op, loss,images,labels])
        #print('run done')

        duration = time.time() - start_time


        # Print an overview fairly often.
        if step % 100 == 0:
          print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value,
                                                     duration))
          precision_tr=do_eval(sess,eval_correct)
          summary_str=sess.run(summary_op)
          summary_writer.add_summary(summary_str,step)


        if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
          checkpoint_file = os.path.join(FLAGS.train_dir, 'model.ckpt')
          saver.save(sess, checkpoint_file, global_step=step)
          print('Train:')
          do_eval(sess,eval_correct)
          print('Validation:')
          do_eval(sess,eval_correct_valid)
          print('Test:')
          do_eval(sess,eval_correct_test)

        step += 1

    except tf.errors.OutOfRangeError:
      print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))
    finally:
      # When done, ask the threads to stop.
      coord.request_stop()

    # Wait for threads to finish.
    coord.join(threads)
    sess.close()


run_training()

然后我得到像这样的张量板，6个关于队列的图表。 The tensorboard screenshot

Answer 1

您看到的队列图是默认情况下从shuffle_batch和朋友创建的，可用于监控输入管道的性能（理想情况下，您希望所有队列保持容量，因为这意味着您的GPU不是在输入阅读时阻止。

我不明白为什么你的摘要没有在张量板中显示。我可以获得更多信息吗？

我的张量板事件出现了许多我没有总结的图表

1 个答案: