事实上,我发现了一些类似的问题,但我尝试了所有的方法,没有人可以解决我的问题,所以我在这里问。 我的GPU是GeForce GT 740M,我的电脑只有一个GPU,以下是我的错误,顺便说一句,我的batch_size是1,它非常小。
Traceback (most recent call last):
File "<ipython-input-1-7a037df84c8b>", line 1, in <module>
runfile('E:/tensorflow_learning/domain_generalization_try002/train/main_train.py', wdir='E:/tensorflow_learning/domain_generalization_try002/train')
File "D:\Users\ZYY\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 880, in runfile
execfile(filename, namespace)
File "D:\Users\ZYY\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "E:/tensorflow_learning/domain_generalization_try002/train/main_train.py", line 236, in <module>
keep_prob: dropout_rate})
File "D:\Users\ZYY\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 895, in run
run_metadata_ptr)
File "D:\Users\ZYY\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1128, in _run
feed_dict_tensor, options, run_metadata)
File "D:\Users\ZYY\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1344, in _do_run
options, run_metadata)
File "D:\Users\ZYY\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1363, in _do_call
raise type(e)(node_def, op, message)
ResourceExhaustedError: OOM when allocating tensor with shape[37748736,3] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
[[Node: concatenation/weights/transpose_82 = Transpose[T=DT_FLOAT, Tperm=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](concatenation/weights/Reshape_81, train/gradients/concatenation/weights/transpose_82_grad/InvertPermutation)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
Caused by op 'concatenation/weights/transpose_82', defined at:
File "D:\Users\ZYY\Anaconda3\lib\site-packages\spyder\utils\ipython\start_kernel.py", line 231, in <module>
main()
File "D:\Users\ZYY\Anaconda3\lib\site-packages\spyder\utils\ipython\start_kernel.py", line 227, in main
kernel.start()
File "D:\Users\ZYY\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 477, in start
ioloop.IOLoop.instance().start()
File "D:\Users\ZYY\Anaconda3\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
super(ZMQIOLoop, self).start()
File "D:\Users\ZYY\Anaconda3\lib\site-packages\tornado\ioloop.py", line 888, in start
handler_func(fd_obj, events)
File "D:\Users\ZYY\Anaconda3\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "D:\Users\ZYY\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
self._handle_recv()
File "D:\Users\ZYY\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
self._run_callback(callback, msg)
File "D:\Users\ZYY\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
callback(*args, **kwargs)
File "D:\Users\ZYY\Anaconda3\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
return fn(*args, **kwargs)
File "D:\Users\ZYY\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "D:\Users\ZYY\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 235, in dispatch_shell
handler(stream, idents, msg)
File "D:\Users\ZYY\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "D:\Users\ZYY\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "D:\Users\ZYY\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "D:\Users\ZYY\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2698, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "D:\Users\ZYY\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2808, in run_ast_nodes
if self.run_code(code, result):
File "D:\Users\ZYY\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2862, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-1-7a037df84c8b>", line 1, in <module>
runfile('E:/tensorflow_learning/domain_generalization_try002/train/main_train.py', wdir='E:/tensorflow_learning/domain_generalization_try002/train')
File "D:\Users\ZYY\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 880, in runfile
execfile(filename, namespace)
File "D:\Users\ZYY\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "E:/tensorflow_learning/domain_generalization_try002/train/main_train.py", line 91, in <module>
model = alexnet_mdl(domains_ids, variable_dicts, output_num, factorization_method)
File "E:\tensorflow_learning\domain_generalization_try002\train\sample_code_zyy_train.py", line 38, in __init__
self.concatenate_models(variable_dicts=variable_dicts, eps_or_k_user_prefered=eps_or_k_user_prefered)
File "E:\tensorflow_learning\domain_generalization_try002\train\sample_code_zyy_train.py", line 202, in concatenate_models
eps_or_k=eps_or_k)
File "E:\tensorflow_learning\domain_generalization_try002\train\tensor_toolbox_yyang_train_svds.py", line 137, in TensorProducer_2
W = TuckerTensorProducer(U, S)
File "E:\tensorflow_learning\domain_generalization_try002\train\tensor_toolbox_yyang_train_svds.py", line 127, in TuckerTensorProducer
S = TensorProduct(S, U[i], (0, 1))
File "E:\tensorflow_learning\domain_generalization_try002\train\tensor_toolbox_yyang_train_svds.py", line 112, in TensorProduct
result = tf.matmul(tf.transpose(TensorUnfold(A, axes[0])), TensorUnfold(B, axes[1]))
File "D:\Users\ZYY\Anaconda3\lib\site-packages\tensorflow\python\ops\array_ops.py", line 1384, in transpose
ret = transpose_fn(a, perm, name=name)
File "D:\Users\ZYY\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_array_ops.py", line 7687, in transpose
"Transpose", x=x, perm=perm, name=name)
File "D:\Users\ZYY\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "D:\Users\ZYY\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3160, in create_op
op_def=op_def)
File "D:\Users\ZYY\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1625, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[37748736,3] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
[[Node: concatenation/weights/transpose_82 = Transpose[T=DT_FLOAT, Tperm=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](concatenation/weights/Reshape_81, train/gradients/concatenation/weights/transpose_82_grad/InvertPermutation)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
当我收到此错误时,我检查了我的GPU,它的状态在这里, GPU state,那么我该怎样做才能克服这个错误,或者只是因为我的GPU不够先进?