Question

我想使用GPU构建LSTM模型。但有例外：

InvalidArgumentError (see above for traceback): Cannot assign a device to node 'model_1/tower_0/model/drnn/Assert/Assert': Could not satisfy explicit device specification '/device:GPU:0' because no supported kernel for GPU devices is available.
     [[Node: model_1/tower_0/model/drnn/Assert/Assert = Assert[T=[DT_STRING, DT_INT32, DT_STRING, DT_INT32], summarize=3, _device="/device:GPU:0"](model_1/tower_0/model/drnn/All, model_1/tower_0/model/drnn/Assert/Assert/data_0, model_1/tower_0/model/drnn/stack, model_1/tower_0/model/drnn/Assert/Assert/data_2, model_1/tower_0/model/drnn/Shape_1)]]

似乎它是由GPU不支持的某些OP引起的

有关追溯的更多信息如下：

Caused by op 'model_1/tower_0/model/drnn/Assert/Assert', defined at:
  File "train.py", line 351, in <module>
    runner.run()
  File "train.py", line 61, in run
    is_train=False)
  File "/home/liuziqi/keyword_spotting/models/dynamic_rnn.py", line 38, in __init__
    self.build_graph(config, is_train)
  File "/home/liuziqi/keyword_spotting/utils/common.py", line 40, in wrapper
    result = func(*args, **kwargs)
  File "/home/liuziqi/keyword_spotting/models/dynamic_rnn.py", line 57, in build_graph
    seqLengths)
  File "/home/liuziqi/keyword_spotting/models/dynamic_rnn.py", line 182, in build_multi_dynamic_brnn
    scope="drnn")
  File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 540, in dynamic_rnn
    [_assert_has_shape(sequence_length, [batch_size])]):
  File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 535, in _assert_has_shape
    packed_shape, " but saw shape: ", x_shape])
  File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 121, in Assert
    condition, data, summarize, name="Assert")
  File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/ops/gen_logging_ops.py", line 39, in _assert
    summarize=summarize, name=name)
  File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 768, in apply_op
    op_def=op_def)
  File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2336, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home/liuziqi/py3env/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1228, in __init__
    self._traceback = _extract_stack()

和构建LSTM的代码是：

def build_graph(self, config, is_train):

    outputs = self.build_multi_dynamic_brnn(config, self.inputX,
                                            self.seqLengths)
    with tf.name_scope('fc-layer'):
        if config.use_project:
            weightsClasses = tf.Variable(
                tf.truncated_normal(
                    [config.num_proj, config.num_classes],
                    name='weightsClasses'))
            flatten_outputs = tf.reshape(outputs, (-1, config.num_proj))
        else:
            weightsClasses = tf.Variable(
                tf.truncated_normal(
                    [config.hidden_size, config.num_classes],
                    name='weightsClasses'))
            flatten_outputs = tf.reshape(outputs,
                                         (-1, config.hidden_size))
        biasesClasses = tf.Variable(tf.zeros([config.num_classes]),
                                    name='biasesClasses')
    flatten_logits = tf.matmul(flatten_outputs,
                               weightsClasses) + biasesClasses
    self.softmax = tf.reshape(tf.nn.softmax(flatten_logits),
                              (config.batch_size, -1,
                               config.num_classes))
    if is_train:
        flatten_labels = tf.reshape(self.labels,
                                    (-1, config.num_classes))
        self.xent_loss = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(labels=flatten_labels,
                                                    logits=flatten_logits))

        # calculating maxpooling loss
        self.log_softmax = -tf.log(self.softmax)
        self.crop_log_softmax = tf.slice(self.log_softmax, [0, 0, 1],
                                         [-1, -1, -1])
        self.crop_labels = tf.slice(self.labels, [0, 0, 1], [-1, -1, -1])
        self.masked_log_softmax = self.crop_log_softmax * self.crop_labels
        self.segment_len = tf.count_nonzero(self.masked_log_softmax, 1,
                                            dtype=tf.float32)  # shape (batchsize,class_num)
        self.max_frame = tf.reduce_max(self.masked_log_softmax,
                                       1)  # shape (batchsize,class_num)
        self.xent_max_frame = tf.reduce_sum(
            self.max_frame * self.segment_len)
        self.background_log_softmax = tf.slice(self.log_softmax, [0, 0, 0],
                                               [-1, -1, 1])
        self.background_label = tf.slice(self.labels, [0, 0, 0],
                                         [-1, -1, 1])
        self.xent_background = tf.reduce_sum(
            tf.reduce_sum(
                self.background_log_softmax * self.background_label,
                (1, 2)) / tf.cast(self.seqLengths,
                                  tf.float32))

        self.flatten_masked_softmax = tf.reshape(self.masked_log_softmax,
                                                 (config.batch_size, -1))
        self.max_index = tf.arg_max(self.flatten_masked_softmax, 1)

        self.max_pooling_loss = self.xent_background + self.xent_max_frame

        self.var_trainable_op = tf.trainable_variables()

        if config.max_pooling_loss:
            self.loss = self.max_pooling_loss
        else:
            self.loss = self.xent_loss

        if config.grad_clip == -1:
            # not apply gradient clipping

            self.optimizer = tf.train.AdamOptimizer(
                config.learning_rate).minimize(self.loss)
        else:
            # apply gradient clipping
            grads, _ = tf.clip_by_global_norm(
                tf.gradients(self.loss, self.var_trainable_op),
                config.grad_clip)
            opti = tf.train.AdamOptimizer(config.learning_rate)
            self.optimizer = opti.apply_gradients(
                zip(grads, self.var_trainable_op))


def build_multi_dynamic_brnn(self,
                             config,
                             inputX,
                             seqLengths):
    hid_input = inputX

    cell = cell_fn(num_units=config.hidden_size,
                   use_peepholes=True,
                   cell_clip=config.cell_clip,
                   initializer=tf.contrib.layers.xavier_initializer(),
                   num_proj=config.num_proj if config.use_project else None,
                   proj_clip=None,
                   forget_bias=1.0,
                   state_is_tuple=True,
                   activation=tf.tanh,
                   reuse=tf.get_variable_scope().reuse
                   )
    for i in range(config.num_layers):
        outputs, output_states = dynamic_rnn(cell,
                                             inputs=hid_input,
                                             sequence_length=seqLengths,
                                             initial_state=None,
                                             dtype=tf.float32,
                                             scope="drnn")

        # tensor of shape: [batch_size, max_time, input_size]
        hidden = outputs
        if config.mode == 'train':
            hidden = self.dropout(hidden, config.keep_prob)

        if i != config.num_layers - 1:
            hid_input = hidden

    return hidden

任何人都可以帮我弄清楚哪个步骤或操作会导致此错误？（以前当我在cpu中运行此代码时，它非常好。当我转向GPU版本时会发生此错误，因此我认为存在一些与GPU无关的操作）

Answer 1

dynamic_rnn op在内部使用tf.Assert进行一些字符串数据检查，没有GPU实现。
您可以使用soft_placement。由于它不是真正的计算操作，因此不应该降低您的性能。

使用GPU的tensorlfow LSTM

1 个答案: