我尝试使用混合精度来训练基本的CNN。使用RTX 2080的tensorcores。代码在Ubuntu上的tensorflow 13.1上可以正常工作,但在nvidia的tensorflow分支上失败,并说“未找到变量”。我不知道为什么。有人可以帮忙吗?详细信息如下。
代码来自NVidia Docs,即。 https://devblogs.nvidia.com/video-mixed-precision-techniques-tensor-cores-deep-learning/
的第3部分import tensorflow as tf
import numpy as np
def float32_variable_storage_getter(getter, name, shape=None, dtype=None,
initializer=None, regularizer=None,
trainable=True,
*args, **kwargs):
storage_dtype = tf.float32 if trainable else dtype
variable = getter(name, shape, dtype=storage_dtype,
initializer=initializer, regularizer=regularizer,
trainable=trainable,
*args, **kwargs)
if trainable and dtype != tf.float32: variable = tf.cast(variable, dtype)
return variable
def bm(inputs):
_,_,h,w=inputs.get_shape().as_list()
top_layer =tf.layers.conv2d(inputs,64,7,use_bias=False,data_format='channels_first',padding='SAME')
top_layer=tf.contrib.layers.batch_norm(top_layer,data_format="NCHW",fused=True)
top_layer=tf.layers.max_pooling2d(top_layer,2,2,data_format='channels_first')
top_layer=tf.reshape(top_layer,(-1,64*(h//2)*(w//2)))
top_layer=tf.layers.dense(top_layer,128,activation=tf.nn.relu)
return top_layer
def btm(inputs,labels,nlabel):
inputs=tf.cast(inputs,tf.float16)
with tf.device('/gpu:0'),tf.variable_scope('fp32_vars', custom_getter=float32_variable_storage_getter):
top_layer=bm(inputs)
logits=tf.layers.dense(top_layer,nlabel,activation=None)
logits=tf.cast(logits, tf.float32)
loss = tf.losses.sparse_softmax_cross_entropy( logits=logits,labels=labels)
optimizer = tf.train.MomentumOptimizer(learning_rate=0.01, momentum=0.9)
loss_scale=128.0
grads, varis = zip(*optimizer.compute_gradients(loss * loss_scale))
grads,_ =tf.clip_by_global_norm(grads,5.0)
grads = [grad / loss_scale for grad in grads]
#gradvars= optimizer.compute_gradients(loss)
train_op = optimizer.apply_gradients(zip(grads,varis))#gradvars)
return inputs,labels,loss,train_op
nchan,h,w,nlabel=3,224,224,100
inputs=tf.placeholder(tf.float32,(None,nchan,h,w))
labels=tf.placeholder(tf.int32,(None,))
inputs,labels,loss,train_op=btm(inputs,labels,nlabel)
from tensorflow import ConfigProto
config = ConfigProto()
config.gpu_options.allow_growth = True
sess=tf.Session(config=config)
bs=128
inputs_np=np.random.random(size=(bs,nchan,h,w)).astype(np.float32)
labels_np=np.random.randint(nlabel,size=(bs,)).astype(np.int32)
sess.run(tf.global_variables_initializer())
for step in range(20):
loss_np,_=sess.run([loss,train_op],{inputs:inputs_np,labels:labels_np})
print("Loss",loss_np)
使用官方TF 13.1输出。可以:
...
Loss 5.3065777
Loss 5.251279
Loss 5.1554813
Loss 5.036022
Loss 4.9095006
Loss 4.788646
Loss 4.680414
Loss 4.584101
Loss 4.5076714
Loss 4.4439754
Loss 4.3807573
Loss 4.3181067
Loss 4.253341
Loss 4.1884956
Loss 4.1250153
Loss 4.0654
Loss 4.0059204
Loss 3.9472775
Loss 3.8848066
Loss 3.8182044
Process finished with exit code 0
使用NVidias张量流分布得到(https://ngc.nvidia.com/catalog/containers/nvidia:tensorflow)
WARNING:tensorflow:From test.py:20: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.conv2d instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.
WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
* https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
* https://github.com/tensorflow/addons
If you depend on functionality not listed there, please file an issue.
WARNING:tensorflow:From test.py:22: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.max_pooling2d instead.
WARNING:tensorflow:From test.py:24: dense (from tensorflow.python.layers.core) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.dense instead.
WARNING:tensorflow:From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/losses/losses_impl.py:209: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.cast instead.
Traceback (most recent call last):
File "test.py", line 47, in <module>
inputs,labels,loss,train_op=btm(inputs,labels,nlabel)
File "test.py", line 41, in btm
train_op = optimizer.apply_gradients(zip(grads,varis))#gradvars)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/optimizer.py", line 604, in apply_gradients
name=name)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 2097, in cond
orig_res_t, res_t = context_t.BuildCondBranch(true_fn)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 1941, in BuildCondBranch
original_result = fn()
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/optimizer.py", line 597, in do_update
name+'-apply')
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/optimizer.py", line 629, in _apply_gradients_helper
raise ValueError("No variables provided.")
ValueError: No variables provided.