我正在尝试使用以下代码将张量流中的矩阵相乘:
dimsize = 2000
A = np.random.rand(dimsize, dimsize).astype('float32')
B = np.random.rand(dimsize, dimsize).astype('float32')
C = np.random.rand(dimsize, dimsize).astype('float32')
with tf.device('/cpu:0'):
a = tf.placeholder(tf.float32, [dimsize, dimsize])
b = tf.placeholder(tf.float32, [dimsize, dimsize])
with tf.device('/cpu:0'):
mul = tf.matmul(a, b)
metadata = tf.RunMetadata()
run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
# Run the op.
profiler = tf.profiler.Profiler(sess.graph)
for i in range(10):
sess.run([mul], feed_dict={a:A, b:B}, options=run_options, run_metadata=metadata)
profiler.add_step(i, metadata)
option_builder = tf.profiler.ProfileOptionBuilder
opts = (option_builder(option_builder.time_and_memory()).
with_step(-1). # with -1, should compute the average of all registered steps.
with_file_output('test.txt').
select(['micros','bytes','peak_bytes','occurrence']).order_by('micros').
build())
profiler.profile_operations(options=opts)
我收到此运行时警告:
tensorflow\python\ops\math_ops.py:2040: RuntimeWarning: overflow encountered in long_scalars
return ops.OpStats("flops", (k * output_count * 2))
并且,探查器返回空结果。即我在test.txt
Profile:
node name | requested bytes | peak bytes | total execution time | accelerator execution time | cpu execution time | op occurrence (run|defined)
当我将dimsize更改为1000时,我会看到分析器结果:
Profile:
node name | requested bytes | peak bytes | total execution time | accelerator execution time | cpu execution time | op occurrence (run|defined)
MatMul 4.00MB (100.00%, 100.00%), 4.00MB (100.00%, 100.00%), 11.31ms (100.00%, 100.00%), 0us (0.00%, 0.00%), 11.31ms (100.00%, 100.00%), 1|1