这可能是因为cuDNN初始化失败,所以请尝试查看上面是否打印了警告日志消息。 [操作:Conv2D]

时间:2019-11-24 06:55:42

标签: python tensorflow

我在Anaconda中安装TensorFlow-GPU 2.0并导入该软件包时,然后运行我的CNN模型,它工作正常,但是当我尝试运行训练模型时,出现错误。

这是我的错误报告:

Epoch 1/50
---------------------------------------------------------------------------
UnknownError                              Traceback (most recent call last)
<ipython-input-5-c4639d74909a> in <module>
      6                                         epochs=50,
      7                                         validation_data=testing_set,
----> 8                                         validation_steps=50)

~\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit_generator(self, generator, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch)
   1295         shuffle=shuffle,
   1296         initial_epoch=initial_epoch,
-> 1297         steps_name='steps_per_epoch')
   1298 
   1299   def evaluate_generator(self,

~\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\engine\training_generator.py in model_iteration(model, data, steps_per_epoch, epochs, verbose, callbacks, validation_data, validation_steps, validation_freq, class_weight, max_queue_size, workers, use_multiprocessing, shuffle, initial_epoch, mode, batch_size, steps_name, **kwargs)
    263 
    264       is_deferred = not model._is_compiled
--> 265       batch_outs = batch_function(*batch_data)
    266       if not isinstance(batch_outs, list):
    267         batch_outs = [batch_outs]

~\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\engine\training.py in train_on_batch(self, x, y, sample_weight, class_weight, reset_metrics)
    971       outputs = training_v2_utils.train_on_batch(
    972           self, x, y=y, sample_weight=sample_weight,
--> 973           class_weight=class_weight, reset_metrics=reset_metrics)
    974       outputs = (outputs['total_loss'] + outputs['output_losses'] +
    975                  outputs['metrics'])

~\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\engine\training_v2_utils.py in train_on_batch(model, x, y, sample_weight, class_weight, reset_metrics)
    262       y,
    263       sample_weights=sample_weights,
--> 264       output_loss_metrics=model._output_loss_metrics)
    265 
    266   if reset_metrics:

~\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in train_on_batch(model, inputs, targets, sample_weights, output_loss_metrics)
    309           sample_weights=sample_weights,
    310           training=True,
--> 311           output_loss_metrics=output_loss_metrics))
    312   if not isinstance(outs, list):
    313     outs = [outs]

~\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in _process_single_batch(model, inputs, targets, output_loss_metrics, sample_weights, training)
    250               output_loss_metrics=output_loss_metrics,
    251               sample_weights=sample_weights,
--> 252               training=training))
    253       if total_loss is None:
    254         raise ValueError('The model cannot be run '

~\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\engine\training_eager.py in _model_loss(model, inputs, targets, output_loss_metrics, sample_weights, training)
    125     inputs = nest.map_structure(ops.convert_to_tensor, inputs)
    126 
--> 127   outs = model(inputs, **kwargs)
    128   outs = nest.flatten(outs)
    129 

~\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py in __call__(self, inputs, *args, **kwargs)
    889           with base_layer_utils.autocast_context_manager(
    890               self._compute_dtype):
--> 891             outputs = self.call(cast_inputs, *args, **kwargs)
    892           self._handle_activity_regularization(inputs, outputs)
    893           self._set_mask_metadata(inputs, outputs, input_masks)

~\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\engine\sequential.py in call(self, inputs, training, mask)
    254       if not self.built:
    255         self._init_graph_network(self.inputs, self.outputs, name=self.name)
--> 256       return super(Sequential, self).call(inputs, training=training, mask=mask)
    257 
    258     outputs = inputs  # handle the corner case where self.layers is empty

~\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\engine\network.py in call(self, inputs, training, mask)
    706     return self._run_internal_graph(
    707         inputs, training=training, mask=mask,
--> 708         convert_kwargs_to_constants=base_layer_utils.call_context().saving)
    709 
    710   def compute_output_shape(self, input_shape):

~\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\engine\network.py in _run_internal_graph(self, inputs, training, mask, convert_kwargs_to_constants)
    858 
    859           # Compute outputs.
--> 860           output_tensors = layer(computed_tensors, **kwargs)
    861 
    862           # Update tensor_dict.

~\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\engine\base_layer.py in __call__(self, inputs, *args, **kwargs)
    889           with base_layer_utils.autocast_context_manager(
    890               self._compute_dtype):
--> 891             outputs = self.call(cast_inputs, *args, **kwargs)
    892           self._handle_activity_regularization(inputs, outputs)
    893           self._set_mask_metadata(inputs, outputs, input_masks)

~\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\keras\layers\convolutional.py in call(self, inputs)
    195 
    196   def call(self, inputs):
--> 197     outputs = self._convolution_op(inputs, self.kernel)
    198 
    199     if self.use_bias:

~\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\ops\nn_ops.py in __call__(self, inp, filter)
   1132           call_from_convolution=False)
   1133     else:
-> 1134       return self.conv_op(inp, filter)
   1135     # copybara:strip_end
   1136     # copybara:insert return self.conv_op(inp, filter)

~\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\ops\nn_ops.py in __call__(self, inp, filter)
    637 
    638   def __call__(self, inp, filter):  # pylint: disable=redefined-builtin
--> 639     return self.call(inp, filter)
    640 
    641 

~\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\ops\nn_ops.py in __call__(self, inp, filter)
    236         padding=self.padding,
    237         data_format=self.data_format,
--> 238         name=self.name)
    239 
    240 

~\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\ops\nn_ops.py in conv2d(input, filter, strides, padding, use_cudnn_on_gpu, data_format, dilations, name, filters)
   2008                            data_format=data_format,
   2009                            dilations=dilations,
-> 2010                            name=name)
   2011 
   2012 

~\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\ops\gen_nn_ops.py in conv2d(input, filter, strides, padding, use_cudnn_on_gpu, explicit_paddings, data_format, dilations, name)
   1029             input, filter, strides=strides, use_cudnn_on_gpu=use_cudnn_on_gpu,
   1030             padding=padding, explicit_paddings=explicit_paddings,
-> 1031             data_format=data_format, dilations=dilations, name=name, ctx=_ctx)
   1032       except _core._SymbolicException:
   1033         pass  # Add nodes to the TensorFlow graph.

~\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\ops\gen_nn_ops.py in conv2d_eager_fallback(input, filter, strides, padding, use_cudnn_on_gpu, explicit_paddings, data_format, dilations, name, ctx)
   1128   explicit_paddings, "data_format", data_format, "dilations", dilations)
   1129   _result = _execute.execute(b"Conv2D", 1, inputs=_inputs_flat, attrs=_attrs,
-> 1130                              ctx=_ctx, name=name)
   1131   _execute.record_gradient(
   1132       "Conv2D", _inputs_flat, _attrs, _result, name)

~\Anaconda3\envs\tf-gpu\lib\site-packages\tensorflow_core\python\eager\execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     65     else:
     66       message = e.message
---> 67     six.raise_from(core._status_to_exception(e.code, message), None)
     68   except TypeError as e:
     69     keras_symbolic_tensors = [

~\Anaconda3\envs\tf-gpu\lib\site-packages\six.py in raise_from(value, from_value)

UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above. [Op:Conv2D]

1
# Save a model
2
model.save('Datasets/300_train/CNN_300.tflearn')

这是我的CNN代码:

作为信息,我必须安装带有NVCUDA.DLL 10.2.95的NVIDIA GPU驱动程序版本441.20

![enter image description here] 1

1 个答案:

答案 0 :(得分:0)

我遇到了类似的问题。
已安装的机器: Python3.7,tensorflow-gpu 2.0,Cuda V10.0,cuDnn 7.4,Nvidia驱动程序411版本,Windows 10(依赖关系如TF2文档中所述)。
经过三天的尝试重新配置和重新安装所有内容,唯一起作用的是:

  1. 卸载Cuda,cuDnn和tensorflow2
  2. 将Nvidia驱动程序更新为441
  3. 安装Cuda V10.0
  4. 安装cuDnn 7.6(不是文档中所述的7.4!)
  5. 安装tensorflow-gpu2

注意Tensoflow自身重新编译很重要-这是在首次安装nvidia驱动程序和tensorflow-gpu以及从python代码调用任何tensorflow函数之后发生的(这会使代码挂起至少2分钟-在我的情况下,这大约是10分钟)。重新安装tensorflow \ cuda不会启动另一个tensorflow的重新编译过程,只有重新安装nvidia驱动程序才可以。