Tensorboard--高级节点的计算时间与其子节点的计算时间总和不同

时间:2018-02-20 22:06:23

标签: python tensorflow profiling

在TensorFlow上tutorial之后,我试图了解使用tensorboard的运行时统计信息。

我发现表示名称范围的高级节点的计算时间不等于其子节点的计算时间总和。为什么不一样?

例如,在附加的快照中:

  • ConvLayer2的计算时间= 75.5毫秒,而
  • 子节点计算时间= 55.2(转)+ 1.73(加)+ 1(其他节点)= 57.9 ms

ConvLayer2的快照

import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

g = tf.Graph()
with g.as_default():
    def conv2d(x, W):
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding = "SAME")
    def max_pool_2x2(x):
        return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides = [1, 2, 2, 1], padding = "SAME")

    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

    x = tf.placeholder(tf.float32, shape=[100, 784], name = "X_input") # Input layer
    y_= tf.placeholder(tf.float32, shape=[100, 10], name = "Y_labels")

    # Reshape input vector into a 4d tensor 
    x_image = tf.reshape(x, [-1, 28, 28, 1])

    # Layer 1
    with tf.name_scope('ConvLayer1'):
        W_conv1 = tf.Variable(tf.truncated_normal([5, 5, 1, 32], stddev=0.1), name = "Weights_L1")
        b_conv1 = tf.Variable(tf.constant(0.1, shape = [32]), name = "Bias_L1")
        h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
        h_pool1 = max_pool_2x2(h_conv1)

    # Layer 2
    with tf.name_scope('ConvLayer2'):
        W_conv2 = tf.Variable(tf.truncated_normal([5, 5, 32, 64], stddev=0.1), name = "Weights_L2")
        b_conv2 = tf.Variable(tf.constant(0.1, shape = [64]), name = "Bias_L2")
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
        h_pool2 = max_pool_2x2(h_conv2)

    # Layer 3 : Fully Connected Layer
    with tf.name_scope('FullyConnectLayer1'):
        w_fc1 = tf.Variable(tf.truncated_normal([7*7*64, 1024], stddev=0.1), name = "Weights_fc1")
        b_fc1 = tf.Variable(tf.constant(0.1, shape = [1024]), name = "Bias_fc1")
        # Flatten
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1)
        # Dropout to reduce overfitting
        with tf.name_scope('performDropout'):
            keep_probability = tf.placeholder(tf.float32)
            h_fc1_drop = tf.nn.dropout(h_fc1, keep_probability)

    # Layer 4: Readout layer
    with tf.name_scope('FullyConnectLayer2'):
        w_fc2 = tf.Variable(tf.truncated_normal([1024, 10], stddev=0.1), name = "Weights_fc2")
        b_fc2 = tf.Variable(tf.constant(0.1, shape = [10]), name = "Bias_fc2")
        y_out = tf.matmul(h_fc1_drop, w_fc2) + b_fc2

    # loss function
    with tf.name_scope('xEntropy'):
        loss_crossEntropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y_, logits = y_out), name="xent")
        tf.summary.scalar("xEntropy", loss_crossEntropy)

    with tf.name_scope('Train_AdamOptim'):
        optimizer = tf.train.AdamOptimizer(learning_rate = 1e-4)
        train_step = optimizer.minimize(loss_crossEntropy)

    with tf.name_scope('accuracy'):
        correct_predict = tf.equal(tf.argmax(y_out ,1), tf.argmax(y_ ,1))
        accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32))
        tf.summary.scalar("accuracy", accuracy)

    # Merge all summary ops into a single op
    summary = tf.summary.merge_all()

    # Operation: Initialize variables
    var_init = tf.global_variables_initializer()

    #### Add trace and metadata calls. 
    run_options = tf.RunOptions(trace_level = tf.RunOptions.FULL_TRACE)
    run_metadata = tf.RunMetadata()

    # Run the computational graph  
    with tf.Session() as sess:
        # Initialize the variables that were created while building the computational graph
        sess.run(var_init)

        # Write the current session graph events onto a file using summary Filewriter. To be visulaized in Tensorboard.
        writer = tf.summary.FileWriter("/tmp/mnist_demo/1")
        writer.add_graph(sess.graph)

        # Run the training step "required" number of times -- here, 20000 timesteps
        for i in range(101): #20000
            batch = mnist.train.next_batch(100) 
            # Training
            sess.run(train_step, feed_dict={x:batch[0], y_:batch[1], keep_probability:0.5})

            # validation
            if i % 50 == 0:
                [val_accuracy, s] = sess.run([accuracy, summary], feed_dict={x:mnist.validation.images[0:100, :], 
                                                              y_:mnist.validation.labels[0:100, :], keep_probability: 1.0},
                                                              options=run_options, run_metadata=run_metadata)
                writer.add_run_metadata(run_metadata, 'step %d' % i)
                writer.add_summary(s, i)
                writer.flush()
                print("step %d, validation accuracy %g" % (i, val_accuracy))

Tensorflow教程未提供有关如何聚合高级节点计算时间的任何信息。任何帮助是极大的赞赏。

0 个答案:

没有答案