在TensorFlow上tutorial之后,我试图了解使用tensorboard的运行时统计信息。
我发现表示名称范围的高级节点的计算时间不等于其子节点的计算时间总和。为什么不一样?
例如,在附加的快照中:
ConvLayer2的快照
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
g = tf.Graph()
with g.as_default():
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding = "SAME")
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides = [1, 2, 2, 1], padding = "SAME")
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
x = tf.placeholder(tf.float32, shape=[100, 784], name = "X_input") # Input layer
y_= tf.placeholder(tf.float32, shape=[100, 10], name = "Y_labels")
# Reshape input vector into a 4d tensor
x_image = tf.reshape(x, [-1, 28, 28, 1])
# Layer 1
with tf.name_scope('ConvLayer1'):
W_conv1 = tf.Variable(tf.truncated_normal([5, 5, 1, 32], stddev=0.1), name = "Weights_L1")
b_conv1 = tf.Variable(tf.constant(0.1, shape = [32]), name = "Bias_L1")
h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
# Layer 2
with tf.name_scope('ConvLayer2'):
W_conv2 = tf.Variable(tf.truncated_normal([5, 5, 32, 64], stddev=0.1), name = "Weights_L2")
b_conv2 = tf.Variable(tf.constant(0.1, shape = [64]), name = "Bias_L2")
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
# Layer 3 : Fully Connected Layer
with tf.name_scope('FullyConnectLayer1'):
w_fc1 = tf.Variable(tf.truncated_normal([7*7*64, 1024], stddev=0.1), name = "Weights_fc1")
b_fc1 = tf.Variable(tf.constant(0.1, shape = [1024]), name = "Bias_fc1")
# Flatten
h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, w_fc1) + b_fc1)
# Dropout to reduce overfitting
with tf.name_scope('performDropout'):
keep_probability = tf.placeholder(tf.float32)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_probability)
# Layer 4: Readout layer
with tf.name_scope('FullyConnectLayer2'):
w_fc2 = tf.Variable(tf.truncated_normal([1024, 10], stddev=0.1), name = "Weights_fc2")
b_fc2 = tf.Variable(tf.constant(0.1, shape = [10]), name = "Bias_fc2")
y_out = tf.matmul(h_fc1_drop, w_fc2) + b_fc2
# loss function
with tf.name_scope('xEntropy'):
loss_crossEntropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels = y_, logits = y_out), name="xent")
tf.summary.scalar("xEntropy", loss_crossEntropy)
with tf.name_scope('Train_AdamOptim'):
optimizer = tf.train.AdamOptimizer(learning_rate = 1e-4)
train_step = optimizer.minimize(loss_crossEntropy)
with tf.name_scope('accuracy'):
correct_predict = tf.equal(tf.argmax(y_out ,1), tf.argmax(y_ ,1))
accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32))
tf.summary.scalar("accuracy", accuracy)
# Merge all summary ops into a single op
summary = tf.summary.merge_all()
# Operation: Initialize variables
var_init = tf.global_variables_initializer()
#### Add trace and metadata calls.
run_options = tf.RunOptions(trace_level = tf.RunOptions.FULL_TRACE)
run_metadata = tf.RunMetadata()
# Run the computational graph
with tf.Session() as sess:
# Initialize the variables that were created while building the computational graph
sess.run(var_init)
# Write the current session graph events onto a file using summary Filewriter. To be visulaized in Tensorboard.
writer = tf.summary.FileWriter("/tmp/mnist_demo/1")
writer.add_graph(sess.graph)
# Run the training step "required" number of times -- here, 20000 timesteps
for i in range(101): #20000
batch = mnist.train.next_batch(100)
# Training
sess.run(train_step, feed_dict={x:batch[0], y_:batch[1], keep_probability:0.5})
# validation
if i % 50 == 0:
[val_accuracy, s] = sess.run([accuracy, summary], feed_dict={x:mnist.validation.images[0:100, :],
y_:mnist.validation.labels[0:100, :], keep_probability: 1.0},
options=run_options, run_metadata=run_metadata)
writer.add_run_metadata(run_metadata, 'step %d' % i)
writer.add_summary(s, i)
writer.flush()
print("step %d, validation accuracy %g" % (i, val_accuracy))
Tensorflow教程未提供有关如何聚合高级节点计算时间的任何信息。任何帮助是极大的赞赏。