我想在特定的gpu设备下使用dynamic_rnn,但是会引发如下错误:
import tensorflow as tf
sequence = [[1, 2],[2, 3, 5]]
def generator():
for el in sequence:
yield el
dataset = tf.data.Dataset().from_generator(generator, tf.int32, tf.TensorShape([None]))
dataset = dataset.map(lambda seq: (seq, tf.size(seq)))
dataset = dataset.padded_batch(1, padded_shapes=(tf.TensorShape([None]), tf.TensorShape([])), padding_values=(0, 0))
iter = dataset.make_initializable_iterator()
seq, seq_len = iter.get_next()
embedding = tf.get_variable('embeddings', [10, 100])
cells = tf.contrib.rnn.GRUCell(5)
emb = tf.nn.embedding_lookup(embedding, seq)
with tf.device('/gpu:0'): # If I assign the gpu device, the codes below will raise errors
outputs, state = tf.nn.dynamic_rnn(cell=cells, inputs=emb, sequence_length=seq_len, dtype=tf.float32)
with tf.Session() as sess:
sess.run(iter.initializer)
sess.run(tf.global_variables_initializer())
out = sess.run(outputs)
错误:
Caused by op 'rnn/Assert/Assert', defined at:
File "test.py", line 22, in <module>
outputs, state = tf.nn.dynamic_rnn(cell=cells, inputs=emb, sequence_length=seq_len, dtype=tf.float32)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/rnn.py", line 622, in dynamic_rnn
[_assert_has_shape(sequence_length, [batch_size])]):
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/rnn.py", line 617, in _assert_has_shape
packed_shape, " but saw shape: ", x_shape])
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/util/tf_should_use.py", line 118, in wrapped
return _add_should_use_warning(fn(*args, **kwargs))
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/control_flow_ops.py", line 172, in Assert
return gen_logging_ops._assert(condition, data, summarize, name="Assert")
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_logging_ops.py", line 51, in _assert
name=name)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 3290, in create_op
op_def=op_def)
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1654, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): Cannot assign a device for operation 'rnn/Assert/Assert': Could not satisfy explicit device specification '/device:GPU:0' because no supported kernel for GPU devices is available.
Registered kernels:
device='CPU'
[[Node: rnn/Assert/Assert = Assert[T=[DT_STRING, DT_INT32, DT_STRING, DT_INT32], summarize=3, _device="/device:GPU:0"](rnn/All, rnn/Assert/Assert/data_0, rnn/stack, rnn/Assert/Assert/data_2, rnn/Shape_1)]]
如果我不使用tf.device('/ gpu:0'),则代码可以正常工作。 但是出于某种原因,我需要使用它,例如多GPU支持。 我应该如何解决?谢谢
答案 0 :(得分:1)
一个简单的解决方案是在会话中使用“软放置”选项,即要求TensorFlow使用所指示的设备,除非没有内核,否则它将退回到CPU:
# ...
with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
# ...
另一种可能性是将函数传递给tf.device
,以便选择GPU设备,除非给定的操作满足某种条件(通常在其具有某种特定类型时,在这种情况下为Assert
,尽管您可能需要在模型中排除其他操作)
def assign_device(op):
if op.type != 'Assert':
return '/gpu:0'
else:
return '/cpu:0'
# ...
with tf.device(assign_device):
# ...