当我使用以下方法恢复保存的模型时:
checkpoint = tf.train.get_checkpoint_state(config.pre_model_dir)
if checkpoint and checkpoint.model_checkpoint_path:
saver.restore(session, checkpoint.model_checkpoint_path)
,我收到此错误:
INFO:tensorflow:Restoring parameters from ./saved_model/10_zones/10/network--1685000
---------------------------------------------------------------------------
NotFoundError Traceback (most recent call last)
/usr/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1321 try:
-> 1322 return fn(*args)
1323 except errors.OpError as e:
/usr/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run_fn(feed_dict, fetch_list, target_list, options, run_metadata)
1306 return self._call_tf_sessionrun(
-> 1307 options, feed_dict, fetch_list, target_list, run_metadata)
1308
/usr/lib/python3.6/site-packages/tensorflow/python/client/session.py in _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, run_metadata)
1408 self._session, options, feed_dict, fetch_list, target_list,
-> 1409 run_metadata)
1410 else:
NotFoundError: Key Variable not found in checkpoint
[[Node: save/RestoreV2 = RestoreV2[dtypes=[DT_INT32, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]
[[Node: save/RestoreV2/_21 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_18_save/RestoreV2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
During handling of the above exception, another exception occurred:
NotFoundError Traceback (most recent call last)
<ipython-input-97-0cbd09927b40> in <module>()
42 checkpoint = tf.train.get_checkpoint_state(config.pre_model_dir)
43 if checkpoint and checkpoint.model_checkpoint_path:
---> 44 saver.restore(session, checkpoint.model_checkpoint_path)
45 print("loaded the model")
46 else:
/usr/lib/python3.6/site-packages/tensorflow/python/training/saver.py in restore(self, sess, save_path)
1800 else:
1801 sess.run(self.saver_def.restore_op_name,
-> 1802 {self.saver_def.filename_tensor_name: save_path})
1803
1804 @staticmethod
/usr/lib/python3.6/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
898 try:
899 result = self._run(None, fetches, feed_dict, options_ptr,
--> 900 run_metadata_ptr)
901 if run_metadata:
902 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/usr/lib/python3.6/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
1133 if final_fetches or final_targets or (handle and feed_dict_tensor):
1134 results = self._do_run(handle, final_targets, final_fetches,
-> 1135 feed_dict_tensor, options, run_metadata)
1136 else:
1137 results = []
/usr/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1314 if handle is None:
1315 return self._do_call(_run_fn, feeds, fetches, targets, options,
-> 1316 run_metadata)
1317 else:
1318 return self._do_call(_prun_fn, handle, feeds, fetches)
/usr/lib/python3.6/site-packages/tensorflow/python/client/session.py in _do_call(self, fn, *args)
1333 except KeyError:
1334 pass
-> 1335 raise type(e)(node_def, op, message)
1336
1337 def _extend_graph(self):
NotFoundError: Key Variable not found in checkpoint
[[Node: save/RestoreV2 = RestoreV2[dtypes=[DT_INT32, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]
[[Node: save/RestoreV2/_21 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_18_save/RestoreV2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
Caused by op 'save/RestoreV2', defined at:
File "/usr/lib64/python3.6/runpy.py", line 193, in _run_module_as_main
"__main__", mod_spec)
File "/usr/lib64/python3.6/runpy.py", line 85, in _run_code
exec(code, run_globals)
File "/usr/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
app.launch_new_instance()
File "/usr/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
app.start()
File "/usr/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 486, in start
self.io_loop.start()
File "/usr/lib64/python3.6/site-packages/tornado/platform/asyncio.py", line 127, in start
self.asyncio_loop.run_forever()
File "/usr/lib64/python3.6/asyncio/base_events.py", line 422, in run_forever
self._run_once()
File "/usr/lib64/python3.6/asyncio/base_events.py", line 1432, in _run_once
handle._run()
File "/usr/lib64/python3.6/asyncio/events.py", line 145, in _run
self._callback(*self._args)
File "/usr/lib64/python3.6/site-packages/tornado/platform/asyncio.py", line 117, in _handle_events
handler_func(fileobj, events)
File "/usr/lib64/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
return fn(*args, **kwargs)
File "/usr/lib64/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
self._handle_recv()
File "/usr/lib64/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
self._run_callback(callback, msg)
File "/usr/lib64/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
callback(*args, **kwargs)
File "/usr/lib64/python3.6/site-packages/tornado/stack_context.py", line 276, in null_wrapper
return fn(*args, **kwargs)
File "/usr/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
return self.dispatch_shell(stream, msg)
File "/usr/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
handler(stream, idents, msg)
File "/usr/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
user_expressions, allow_stdin)
File "/usr/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
res = shell.run_cell(code, store_history=store_history, silent=silent)
File "/usr/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
File "/usr/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
raw_cell, store_history, silent, shell_futures)
File "/usr/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2903, in run_ast_nodes
if self.run_code(code, result):
File "/usr/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2963, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-97-0cbd09927b40>", line 26, in <module>
saver = tf.train.Saver()
File "/usr/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1338, in __init__
self.build()
File "/usr/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1347, in build
self._build(self._filename, build_save=True, build_restore=True)
File "/usr/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 1384, in _build
build_save=build_save, build_restore=build_restore)
File "/usr/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 835, in _build_internal
restore_sequentially, reshape)
File "/usr/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 472, in _AddRestoreOps
restore_sequentially)
File "/usr/lib/python3.6/site-packages/tensorflow/python/training/saver.py", line 886, in bulk_restore
return io_ops.restore_v2(filename_tensor, names, slices, dtypes)
File "/usr/lib/python3.6/site-packages/tensorflow/python/ops/gen_io_ops.py", line 1463, in restore_v2
shape_and_slices=shape_and_slices, dtypes=dtypes, name=name)
File "/usr/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/usr/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3392, in create_op
op_def=op_def)
File "/usr/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1718, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
NotFoundError (see above for traceback): Key Variable not found in checkpoint
[[Node: save/RestoreV2 = RestoreV2[dtypes=[DT_INT32, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, ..., DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](_arg_save/Const_0_0, save/RestoreV2/tensor_names, save/RestoreV2/shape_and_slices)]]
[[Node: save/RestoreV2/_21 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device_incarnation=1, tensor_name="edge_18_save/RestoreV2", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
我搜索了此错误,然后发现了一个tf错误,该错误要求使用完整的相对路径调用模型,并且我遵循该路径并尝试了以下值:'./saved_model/10_zones/10'
和os.path.abspath(config.pre_model_dir+'./../saved_model/10_zones/10')
config.pre_model_dir
。两者均导致相同的错误。
我还使用以下命令检查了已保存变量的名称 从tensorflow.contrib.framework.python.framework导入checkpoint_utils
var_list = checkpoint_utils.list_variables(config.pre_model_dir)
for v in var_list:
print(v)
这是:
('actor/main_net/layer1/biases/Variable', [90])
('actor/main_net/layer1/biases/Variable/Adam', [90])
('actor/main_net/layer1/biases/Variable/Adam_1', [90])
('actor/main_net/layer1/weights/Variable', [30, 90])
('actor/main_net/layer1/weights/Variable/Adam', [30, 90])
('actor/main_net/layer1/weights/Variable/Adam_1', [30, 90])
('actor/main_net/layer2/biases/Variable', [60])
('actor/main_net/layer2/biases/Variable/Adam', [60])
('actor/main_net/layer2/biases/Variable/Adam_1', [60])
('actor/main_net/layer2/weights/Variable', [90, 60])
('actor/main_net/layer2/weights/Variable/Adam', [90, 60])
('actor/main_net/layer2/weights/Variable/Adam_1', [90, 60])
('actor/main_net/layer3/biases/Variable', [30])
('actor/main_net/layer3/biases/Variable/Adam', [30])
('actor/main_net/layer3/biases/Variable/Adam_1', [30])
('actor/main_net/layer3/weights/Variable', [60, 30])
('actor/main_net/layer3/weights/Variable/Adam', [60, 30])
('actor/main_net/layer3/weights/Variable/Adam_1', [60, 30])
('actor/main_net/layer4/biases/Variable', [10])
('actor/main_net/layer4/biases/Variable/Adam', [10])
('actor/main_net/layer4/biases/Variable/Adam_1', [10])
('actor/main_net/layer4/weights/Variable', [30, 10])
('actor/main_net/layer4/weights/Variable/Adam', [30, 10])
('actor/main_net/layer4/weights/Variable/Adam_1', [30, 10])
('actor/target_net/layer1/biases/Variable', [90])
('actor/target_net/layer1/weights/Variable', [30, 90])
('actor/target_net/layer2/biases/Variable', [60])
('actor/target_net/layer2/weights/Variable', [90, 60])
('actor/target_net/layer3/biases/Variable', [30])
('actor/target_net/layer3/weights/Variable', [60, 30])
('actor/target_net/layer4/biases/Variable', [10])
('actor/target_net/layer4/weights/Variable', [30, 10])
('beta1_power', [])
('beta1_power_1', [])
('beta2_power', [])
('beta2_power_1', [])
('critic/main_net/l1/biases', [90])
('critic/main_net/l1/biases/Adam', [90])
('critic/main_net/l1/biases/Adam_1', [90])
('critic/main_net/l1/weights', [40, 90])
('critic/main_net/l1/weights/Adam', [40, 90])
('critic/main_net/l1/weights/Adam_1', [40, 90])
('critic/main_net/l2/biases', [60])
('critic/main_net/l2/biases/Adam', [60])
('critic/main_net/l2/biases/Adam_1', [60])
('critic/main_net/l2/weights', [90, 60])
('critic/main_net/l2/weights/Adam', [90, 60])
('critic/main_net/l2/weights/Adam_1', [90, 60])
('critic/main_net/l3/biases', [30])
('critic/main_net/l3/biases/Adam', [30])
('critic/main_net/l3/biases/Adam_1', [30])
('critic/main_net/l3/weights', [60, 30])
('critic/main_net/l3/weights/Adam', [60, 30])
('critic/main_net/l3/weights/Adam_1', [60, 30])
('critic/main_net/l4/bias', [1])
('critic/main_net/l4/bias/Adam', [1])
('critic/main_net/l4/bias/Adam_1', [1])
('critic/main_net/l4/kernel', [30, 1])
('critic/main_net/l4/kernel/Adam', [30, 1])
('critic/main_net/l4/kernel/Adam_1', [30, 1])
('critic/target_net/l1/biases', [90])
('critic/target_net/l1/weights', [40, 90])
('critic/target_net/l2/biases', [60])
('critic/target_net/l2/weights', [90, 60])
('critic/target_net/l3/biases', [30])
('critic/target_net/l3/weights', [60, 30])
('critic/target_net/l4/bias', [1])
('critic/target_net/l4/kernel', [30, 1])
与我当前模型中的tf.global_variables()
的结果相同,它们都相似:
<tf.Variable 'actor/main_net/layer1/weights/Variable:0' shape=(30, 90) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer1/biases/Variable:0' shape=(90,) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer2/weights/Variable:0' shape=(90, 60) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer2/biases/Variable:0' shape=(60,) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer3/weights/Variable:0' shape=(60, 30) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer3/biases/Variable:0' shape=(30,) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer4/weights/Variable:0' shape=(30, 10) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer4/biases/Variable:0' shape=(10,) dtype=float32_ref>,
<tf.Variable 'actor/target_net/layer1/weights/Variable:0' shape=(30, 90) dtype=float32_ref>,
<tf.Variable 'actor/target_net/layer1/biases/Variable:0' shape=(90,) dtype=float32_ref>,
<tf.Variable 'actor/target_net/layer2/weights/Variable:0' shape=(90, 60) dtype=float32_ref>,
<tf.Variable 'actor/target_net/layer2/biases/Variable:0' shape=(60,) dtype=float32_ref>,
<tf.Variable 'actor/target_net/layer3/weights/Variable:0' shape=(60, 30) dtype=float32_ref>,
<tf.Variable 'actor/target_net/layer3/biases/Variable:0' shape=(30,) dtype=float32_ref>,
<tf.Variable 'actor/target_net/layer4/weights/Variable:0' shape=(30, 10) dtype=float32_ref>,
<tf.Variable 'actor/target_net/layer4/biases/Variable:0' shape=(10,) dtype=float32_ref>,
<tf.Variable 'Variable:0' shape=() dtype=int32_ref>,
<tf.Variable 'beta1_power:0' shape=() dtype=float32_ref>,
<tf.Variable 'beta2_power:0' shape=() dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer1/weights/Variable/Adam:0' shape=(30, 90) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer1/weights/Variable/Adam_1:0' shape=(30, 90) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer1/biases/Variable/Adam:0' shape=(90,) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer1/biases/Variable/Adam_1:0' shape=(90,) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer2/weights/Variable/Adam:0' shape=(90, 60) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer2/weights/Variable/Adam_1:0' shape=(90, 60) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer2/biases/Variable/Adam:0' shape=(60,) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer2/biases/Variable/Adam_1:0' shape=(60,) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer3/weights/Variable/Adam:0' shape=(60, 30) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer3/weights/Variable/Adam_1:0' shape=(60, 30) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer3/biases/Variable/Adam:0' shape=(30,) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer3/biases/Variable/Adam_1:0' shape=(30,) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer4/weights/Variable/Adam:0' shape=(30, 10) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer4/weights/Variable/Adam_1:0' shape=(30, 10) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer4/biases/Variable/Adam:0' shape=(10,) dtype=float32_ref>,
<tf.Variable 'actor/main_net/layer4/biases/Variable/Adam_1:0' shape=(10,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l1/weights:0' shape=(40, 90) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l1/biases:0' shape=(90,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l2/weights:0' shape=(90, 60) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l2/biases:0' shape=(60,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l3/weights:0' shape=(60, 30) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l3/biases:0' shape=(30,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l4/kernel:0' shape=(30, 1) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l4/bias:0' shape=(1,) dtype=float32_ref>,
<tf.Variable 'critic/target_net/l1/weights:0' shape=(40, 90) dtype=float32_ref>,
<tf.Variable 'critic/target_net/l1/biases:0' shape=(90,) dtype=float32_ref>,
<tf.Variable 'critic/target_net/l2/weights:0' shape=(90, 60) dtype=float32_ref>,
<tf.Variable 'critic/target_net/l2/biases:0' shape=(60,) dtype=float32_ref>,
<tf.Variable 'critic/target_net/l3/weights:0' shape=(60, 30) dtype=float32_ref>,
<tf.Variable 'critic/target_net/l3/biases:0' shape=(30,) dtype=float32_ref>,
<tf.Variable 'critic/target_net/l4/kernel:0' shape=(30, 1) dtype=float32_ref>,
<tf.Variable 'critic/target_net/l4/bias:0' shape=(1,) dtype=float32_ref>,
<tf.Variable 'beta1_power_1:0' shape=() dtype=float32_ref>,
<tf.Variable 'beta2_power_1:0' shape=() dtype=float32_ref>,
<tf.Variable 'critic/main_net/l1/weights/Adam:0' shape=(40, 90) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l1/weights/Adam_1:0' shape=(40, 90) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l1/biases/Adam:0' shape=(90,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l1/biases/Adam_1:0' shape=(90,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l2/weights/Adam:0' shape=(90, 60) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l2/weights/Adam_1:0' shape=(90, 60) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l2/biases/Adam:0' shape=(60,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l2/biases/Adam_1:0' shape=(60,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l3/weights/Adam:0' shape=(60, 30) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l3/weights/Adam_1:0' shape=(60, 30) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l3/biases/Adam:0' shape=(30,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l3/biases/Adam_1:0' shape=(30,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l4/kernel/Adam:0' shape=(30, 1) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l4/kernel/Adam_1:0' shape=(30, 1) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l4/bias/Adam:0' shape=(1,) dtype=float32_ref>,
<tf.Variable 'critic/main_net/l4/bias/Adam_1:0' shape=(1,) dtype=float32_ref>
这两个列表中唯一的区别是<tf.Variable 'Variable:0' shape=() dtype=int32_ref>
,我不知道这是做什么用的以及如何生成的。但是,我不认为这是否有问题,因为我可以恢复的任何模型也都存在此问题。
我非常感谢您提供帮助和意见,以解决此错误。
答案 0 :(得分:1)
我通过使用以下方法排除了单个变量来解决了这个问题:
variables = slim.get_variables_to_restore()
variables_to_restore = [v for v in variables if 'global_step_counter' not in v.name.split('/')[0]]
saver = tf.train.Saver(variables_to_restore)