Question

我目前正在TensorFlow（和Keras）中使用RNN生成运动对象数据。我的RNN模型定义如下：

if tf.test.is_gpu_available():
    my_gru = tf.keras.layers.CuDNNGRU
else:
    import functools
    my_gru = functools.partial(
        tf.keras.layers.GRU, recurrent_activation='sigmoid')

def build_model(internal_units, batch_size):
    model = tf.keras.Sequential([
        my_gru(internal_units, return_sequences=True, recurrent_initializer='glorot_uniform', stateful=True, batch_input_shape=[batch_size, None, 3]),
        tf.keras.layers.Dense(3)
    ])
    return model

INTERNAL_UNITS = 1024
model = build_model(internal_units=INTERNAL_UNITS, batch_size=BATCH_SIZE)

我得到了该模型的以下摘要，这些摘要很好：

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
cu_dnngru_2 (CuDNNGRU)       (64, None, 1024)          3161088   
_________________________________________________________________
dense_4 (Dense)              (64, None, 3)             3075      
=================================================================
Total params: 3,164,163
Trainable params: 3,164,163
Non-trainable params: 0
_________________________________________________________________

当我尝试在训练之前对模型进行单个预测时，只是为了查看它是否正常工作并且输出具有正确的形状，就会出现错误。

这就是我运行模型的方式...

for input_example_batch, target_example_batch in dataset.take(1): 
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, n_features)")

...这是我得到的错误...

---------------------------------------------------------------------------
InvalidArgumentError                      Traceback (most recent call last)
<ipython-input-52-dd1ea240c0d1> in <module>()
      1 for input_example_batch, target_example_batch in dataset.take(1):
----> 2     example_batch_predictions = model(input_example_batch)
      3     print(example_batch_predictions.shape, "# (batch_size, sequence_length, n_features)")

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
    755       if not in_deferred_mode:
    756         self._in_call = True
--> 757         outputs = self.call(inputs, *args, **kwargs)
    758         self._in_call = False
    759         if outputs is None:

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/sequential.py in call(self, inputs, training, mask)
    227   def call(self, inputs, training=None, mask=None):
    228     if self._is_graph_network:
--> 229       return super(Sequential, self).call(inputs, training=training, mask=mask)
    230 
    231     outputs, _ = self._call_and_compute_mask(

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/network.py in call(self, inputs, training, mask)
    843     outputs, _ = self._run_internal_graph(inputs,
    844                                           training=training,
--> 845                                           mask=masks)
    846     return outputs
    847 

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/network.py in _run_internal_graph(self, inputs, training, mask)
   1029                     computed_tensor, **kwargs)
   1030               else:
-> 1031                 output_tensors = layer.call(computed_tensor, **kwargs)
   1032                 if hasattr(layer, 'compute_mask'):
   1033                   output_masks = layer.compute_mask(computed_tensor,

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/cudnn_recurrent.py in call(self, inputs, mask, training, initial_state)
    107       # Reverse time axis.
    108       inputs = K.reverse(inputs, 1)
--> 109     output, states = self._process_batch(inputs, initial_state)
    110 
    111     if self.stateful:

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/layers/cudnn_recurrent.py in _process_batch(self, inputs, initial_state)
    297         params=params,
    298         is_training=True,
--> 299         rnn_mode='gru')
    300 
    301     if self.stateful or self.return_state:

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_cudnn_rnn_ops.py in cudnn_rnn(input, input_h, input_c, params, rnn_mode, input_mode, direction, dropout, seed, seed2, is_training, name)
    142           input_mode=input_mode, direction=direction, dropout=dropout,
    143           seed=seed, seed2=seed2, is_training=is_training, name=name,
--> 144           ctx=_ctx)
    145     except _core._NotOkStatusException as e:
    146       if name is not None:

/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_cudnn_rnn_ops.py in cudnn_rnn_eager_fallback(input, input_h, input_c, params, rnn_mode, input_mode, direction, dropout, seed, seed2, is_training, name, ctx)
    184   "is_training", is_training)
    185   _result = _execute.execute(b"CudnnRNN", 4, inputs=_inputs_flat,
--> 186                              attrs=_attrs, ctx=_ctx, name=name)
    187   _execute.record_gradient(
    188       "CudnnRNN", _inputs_flat, _attrs, _result, name)

/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     64     else:
     65       message = e.message
---> 66     six.raise_from(core._status_to_exception(e.code, message), None)
     67   # pylint: enable=protected-access
     68   return tensors

/usr/local/lib/python3.6/dist-packages/six.py in raise_from(value, from_value)

InvalidArgumentError: cannot compute CudnnRNN as input #1(zero-based) was expected to be a double tensor but is a float tensor [Op:CudnnRNN]

我使用的输入（input_example_batch）如下：

tf.Tensor(
[[[ 1.04234461  4.66466794 -4.32528214]
  [ 1.04244826  4.49530966 -4.42294239]
  [ 1.04256889  4.47099585 -4.51911731]
  ...
  [ 1.05549699 -0.64915764  0.67451403]
  [ 1.05559893 -0.7788313   0.66355975]
  [ 1.05570257 -0.88502956  0.65223413]]

 [[-0.83849063 -0.47476892 -0.22299478]
  [-0.8383632  -0.41943403 -0.22782209]
  [-0.83830883 -0.41635987 -0.24527468]
  ...
  [-0.82015615  0.05482504  0.681198  ]
  [-0.8200627  -0.04187127  0.69233796]
  [-0.81995906 -0.13493448  0.66615907]]

 [[-1.27264128  0.12441285 -0.48032767]
  [-1.27256143  0.29544794 -0.714081  ]
  [-1.27247477  0.46871879 -0.97197089]
  ...
  [-1.25435437  3.47832158  0.72724314]
  [-1.25427451  3.90674772  0.63255355]
  [-1.25418956  4.28934093  0.49219015]]

 ...

 [[ 1.01251773  1.16012535 -2.9933152 ]
  [ 1.01263666  1.16906836 -2.99554319]
  [ 1.0127505   0.99943062 -2.95135471]
  ...
  [ 1.02650943 -0.42362607  0.46805359]
  [ 1.02661308 -0.49377282  0.42702143]
  [ 1.02666575 -0.49544963  0.42739276]]

 [[-1.34049978 -0.01029126  0.5430626 ]
  [-1.34038934 -0.04690171  0.52078269]
  [-1.34029419  0.07857991  0.52895199]
  ...
  [-1.3289751   1.94152097  1.03489148]
  [-1.32888165  1.51113855  0.94372954]
  [-1.32878311  1.16208164  0.76511898]]

 [[ 0.57716585 -1.38779448  0.0469634 ]
  [ 0.57728308 -1.44536512 -0.01542033]
  [ 0.57734255 -1.49846425 -0.02581762]
  ...
  [ 0.58594146 -0.63183055 -0.03621491]
  [ 0.58603491 -0.60136842 -0.04419854]
  [ 0.58613855 -0.63015374 -0.07204842]]], shape=(64, 100, 3), dtype=float64)

所有这些都在Google Colab环境中运行。

我已经试图强迫它使用普通的GRU层，这会导致几乎相同的错误。但是，与普通的GRU层相比，一个显着的区别是它声称期望的是浮动张量而不是双张量（恰好相反，这是我复制到本文中的错误）。

我的整个代码基本上与example given on the TensorFlow website相同。我对模型进行了少许修改，并将输入数据更改为具有3个特征，而不是具有与字母较长一样多的特征。原始的TensorFlow示例对我来说很好用。

请原谅冗长的帖子，并询问更多信息。我试图做到彻底，但是你永远都不知道。感谢您的帮助。

编辑：

Here，您可以在Google Colab上找到一个简短的示例Notebook，可用于自己重现错误。

Answer 1

因此，我找到了一个似乎对我有用的解决方案。当我调用model.input时，得到以下输出，表明GRU层要求其输入为浮点张量。

<DeferredTensor 'gru_input' shape=(64, ?, 3) dtype=float32>

使用线路

x = tf.cast(x, dtype=tf.float32)

（TensorFlow documentation）我可以将输入数据转换为浮点张量。使用此浮动张量，模型可以正常工作。

我希望这对遇到相同问题的人有所帮助。根据{{3}} float64，兼容性是TensorFlow的一个已知问题。

TesorFlow Keras GRU层无效参数错误-“预期为双张量，但为浮点张量”

1 个答案: