我已经在配备NVidia的机器上通过Anaconda成功安装了Tensorflow和Keras,它在工作时非常完美!但有时会失败,并显示GPU同步错误。
更具体地说,在激活Anaconda环境后,我可以在GPU上成功编译并运行我的模型。但是如果我想从另一个python程序编译并运行第二个模型,我总是会收到此GPU Sync错误。发生这种情况时,我必须关闭环境,再次重新激活环境,然后从编译和运行第二个模型开始,一切顺利。
除了从一种模型切换到另一种模型之外,如果我一次又一次地重新运行同一模型,有时会遇到相同的错误。
OS: Windows 10 home
GPU: 1050ti
Anaconda 1.9.7
Dev Environment: Jupyter Lab 0.35.4
tensorflow-gpu: 1.13.1
keras-gpu: 2.2.4
任何帮助将不胜感激!
---------------------------------------------------------------------------
InternalError Traceback (most recent call last)
~\Anaconda3\envs\G\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
1333 try:
-> 1334 return fn(*args)
1335 except errors.OpError as e:
~\Anaconda3\envs\G\lib\site-packages\tensorflow\python\client\session.py in _run_fn(feed_dict, fetch_list, target_list, options, run_metadata)
1318 return self._call_tf_sessionrun(
-> 1319 options, feed_dict, fetch_list, target_list, run_metadata)
1320
~\Anaconda3\envs\G\lib\site-packages\tensorflow\python\client\session.py in _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, run_metadata)
1406 self._session, options, feed_dict, fetch_list, target_list,
-> 1407 run_metadata)
1408
InternalError: GPU sync failed
在处理上述异常期间,发生了另一个异常:
InternalError Traceback (most recent call last)
<ipython-input-308-f1e49fa98487> in <module>
15 Dimension,
16 weights=[Embedding_Matrix],
---> 17 trainable=True))
18 my_model_cuda.add(Bidirectional(CuDNNGRU(Dimension, kernel_initializer='glorot_uniform', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, recurrent_initializer='orthogonal', bias_initializer='zeros',return_sequences=True)))
19 my_model_cuda.add(Bidirectional(CuDNNGRU(Dimension, kernel_initializer='glorot_uniform', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, recurrent_initializer='orthogonal', bias_initializer='zeros',return_sequences=False)))
~\Anaconda3\envs\G\lib\site-packages\keras\engine\sequential.py in add(self, layer)
163 # and create the node connecting the current layer
164 # to the input layer we just created.
--> 165 layer(x)
166 set_inputs = True
167 else:
~\Anaconda3\envs\G\lib\site-packages\keras\engine\base_layer.py in __call__(self, inputs, **kwargs)
434 # Load weights that were specified at layer instantiation.
435 if self._initial_weights is not None:
--> 436 self.set_weights(self._initial_weights)
437
438 # Raise exceptions in case the input is not compatible
~\Anaconda3\envs\G\lib\site-packages\keras\engine\base_layer.py in set_weights(self, weights)
1049 return
1050 weight_value_tuples = []
-> 1051 param_values = K.batch_get_value(params)
1052 for pv, p, w in zip(param_values, params, weights):
1053 if pv.shape != w.shape:
~\Anaconda3\envs\G\lib\site-packages\keras\backend\tensorflow_backend.py in batch_get_value(ops)
2418 """
2419 if ops:
-> 2420 return get_session().run(ops)
2421 else:
2422 return []
~\Anaconda3\envs\G\lib\site-packages\keras\backend\tensorflow_backend.py in get_session()
197 # not already marked as initialized.
198 is_initialized = session.run(
--> 199 [tf.is_variable_initialized(v) for v in candidate_vars])
200 uninitialized_vars = []
201 for flag, v in zip(is_initialized, candidate_vars):
~\Anaconda3\envs\G\lib\site-packages\tensorflow\python\client\session.py in run(self, fetches, feed_dict, options, run_metadata)
927 try:
928 result = self._run(None, fetches, feed_dict, options_ptr,
--> 929 run_metadata_ptr)
930 if run_metadata:
931 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
~\Anaconda3\envs\G\lib\site-packages\tensorflow\python\client\session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
1150 if final_fetches or final_targets or (handle and feed_dict_tensor):
1151 results = self._do_run(handle, final_targets, final_fetches,
-> 1152 feed_dict_tensor, options, run_metadata)
1153 else:
1154 results = []
~\Anaconda3\envs\G\lib\site-packages\tensorflow\python\client\session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1326 if handle is None:
1327 return self._do_call(_run_fn, feeds, fetches, targets, options,
-> 1328 run_metadata)
1329 else:
1330 return self._do_call(_prun_fn, handle, feeds, fetches)
~\Anaconda3\envs\G\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
1346 pass
1347 message = error_interpolation.interpolate(message, self._graph)
-> 1348 raise type(e)(node_def, op, message)
1349
1350 def _extend_graph(self):
InternalError: GPU sync failed