运行笔记本时出现以下错误:
InvalidArgumentErrorTraceback (most recent call last)
<ipython-input-77-e16e0fd6d275> in <module>()
----> 1 tpu_ops = tf.contrib.tpu.batch_parallel(run_find_closest_latent_vector, [], num_shards=8)
2
3 def run_once():
4 session_tpu.run(tpu_ops)
5
/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu.pyc in batch_parallel(computation, inputs, num_shards, infeed_queue, device_assignment, name)
981 infeed_queue=infeed_queue,
982 device_assignment=device_assignment,
--> 983 name=name)
984
985
/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu.pyc in shard(computation, inputs, num_shards, input_shard_axes, outputs_from_all_shards, output_shard_axes, infeed_queue, device_assignment, name)
879 infeed_queue=infeed_queue,
880 device_assignment=device_assignment,
--> 881 name=name)
882
883 # There must be at least one shard since num_shards > 0.
/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu.pyc in replicate(computation, inputs, infeed_queue, device_assignment, name)
505 """
506 return split_compile_and_replicate(computation, inputs, infeed_queue,
--> 507 device_assignment, name)[1]
508
509
/usr/local/lib/python2.7/dist-packages/tensorflow/contrib/tpu/python/tpu/tpu.pyc in split_compile_and_replicate(***failed resolving arguments***)
682 vscope.set_custom_getter(custom_getter)
683
--> 684 outputs = computation(*computation_inputs)
685
686 vscope.set_use_resource(saved_use_resource)
<ipython-input-76-66eb3bb2ffa2> in run_find_closest_latent_vector()
34
35 def run_find_closest_latent_vector():
---> 36 result = find_closest_latent_vector(num_optimization_steps=40)
37 display_images(result[0], [("Loss: %.2f" % loss) for loss in result[1]])
38
<ipython-input-76-66eb3bb2ffa2> in find_closest_latent_vector(num_optimization_steps)
22
23 with tf.Session(tpu_address) as session_tpu:
---> 24 session_tpu.run(tf.global_variables_initializer())
25 optimizer = tf.train.AdamOptimizer(learning_rate=0.3)
26 train = optimizer.minimize(loss)
/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc in run(self, fetches, feed_dict, options, run_metadata)
927 try:
928 result = self._run(None, fetches, feed_dict, options_ptr,
--> 929 run_metadata_ptr)
930 if run_metadata:
931 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc in _run(self, handle, fetches, feed_dict, options, run_metadata)
1150 if final_fetches or final_targets or (handle and feed_dict_tensor):
1151 results = self._do_run(handle, final_targets, final_fetches,
-> 1152 feed_dict_tensor, options, run_metadata)
1153 else:
1154 results = []
/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
1326 if handle is None:
1327 return self._do_call(_run_fn, feeds, fetches, targets, options,
-> 1328 run_metadata)
1329 else:
1330 return self._do_call(_prun_fn, handle, feeds, fetches)
/usr/local/lib/python2.7/dist-packages/tensorflow/python/client/session.pyc in _do_call(self, fn, *args)
1346 pass
1347 message = error_interpolation.interpolate(message, self._graph)
-> 1348 raise type(e)(node_def, op, message)
1349
1350 def _extend_graph(self):
InvalidArgumentError: Unsuccessful TensorSliceReader constructor: Failed to get matching files on /tmp/tfhub_modules/2f9e2f0be115550c7ae9b90bb71b29e76fa404d8/variables/variables: Unimplemented: File system scheme '[local]' not implemented (file: '/tmp/tfhub_modules/2f9e2f0be115550c7ae9b90bb71b29e76fa404d8/variables/variables')
[[node checkpoint_initializer_9 (defined at /usr/local/lib/python2.7/dist-packages/tensorflow_hub/native_module.py:395) = RestoreV2[dtypes=[DT_FLOAT], _device="/job:tpu_worker/replica:0/task:0/device:CPU:0"](checkpoint_initializer/prefix, checkpoint_initializer_9/tensor_names, checkpoint_initializer/shape_and_slices)]]
以某种方式说Unimplemented: File system scheme '[local]' not implemented
可能与this issue有关。
我没有直接访问文件路径的权限,无法在需要的地方添加\
。
这是笔记本中的代码块:
def _get_beta_accumulators(self):
return self._beta1_power, self._beta2_power
def find_closest_latent_vector(num_optimization_steps):
images = []
losses = []
with tf.Graph().as_default():
module = hub.Module("https://tfhub.dev/google/progan-128/1")
initial_vector = tf.random_normal([1, latent_dim], seed=5)
vector = tf.get_variable("vector", initializer=initial_vector)
image = module(vector)
target_image_difference = tf.reduce_sum(
tf.losses.absolute_difference(image[0], target_image[:,:,:3]))
# The latent vectors were sampled from a normal distribution. We can get
# more realistic images if we regularize the length of the latent vector to
# the average length of vector from this distribution.
regularizer = tf.abs(tf.norm(vector) - np.sqrt(latent_dim))
loss = target_image_difference + regularizer
with tf.Session(tpu_address) as session_tpu:
session_tpu.run(tf.global_variables_initializer())
optimizer = tf.train.AdamOptimizer(learning_rate=0.3)
train = optimizer.minimize(loss)
for _ in range(num_optimization_steps):
_, loss_out, im_out = session_tpu.run([train, loss, image])
print(im_out[0])
losses.append(loss_out)
print loss_out
return images, losses
def run_find_closest_latent_vector():
result = find_closest_latent_vector(num_optimization_steps=40)
display_images(result[0], [("Loss: %.2f" % loss) for loss in result[1]])
希望这将有助于调试问题。
有什么想法吗?
答案 0 :(得分:7)
我相信您的根本问题是您将本地计算机用于输入,模型或检查点。但是,根据此doc,
所有输入文件和模型目录必须使用云存储 存储桶路径(gs:// bucket-name / ...),并且此存储桶必须可访问 从TPU服务器。注意所有数据处理和模型 检查点是在TPU服务器而不是本地计算机上执行的。
答案 1 :(得分:1)
您可以使用笔记本计算机中的相关代码来编辑问题吗?我了解您的笔记本是私人的,但也许您只能共享一个单元来帮助我们调试问题。