资源耗尽:分配具有形状的张量时发生OOM

时间:2020-05-15 02:05:42

标签: tensorflow

我在stylegan上训练了我的模型,并建立了一个pkl文件。 现在我正在尝试测试以查看重量的结果 我训练了模型并尝试通过运行pretrianed_example.py文件进行验证 但显示错误

这是我的pretrained_example.py脚本

import os
import pickle
import numpy as np
import PIL.Image
import dnnlib
import dnnlib.tflib as tflib
import config
import tensorflow as tf


def main():
    # Initialize TensorFlow.
    tflib.init_tf()

    # Load pre-trained network.
    # To this
    url = os.path.abspath("results/00008-sgan-custom_dataset-1gpu/network-snapshot-009748.pkl")
    with open(url, 'rb') as f:
        _G, _D, Gs = pickle.load(f)
        # _G = Instantaneous snapshot of the generator. Mainly useful for resuming a previous training run.
        # _D = Instantaneous snapshot of the discriminator. Mainly useful for resuming a previous training run.
        # Gs = Long-term average of the generator. Yields higher-quality results than the instantaneous snapshot.

    # Print network details.
    Gs.print_layers()

    # Pick latent vector.
    for x in range(0, 100):
        # Pick latent vector.
        rnd = np.random.RandomState(x)
        latents = rnd.randn(1, Gs.input_shape[1])

        fmt = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True)
        images = Gs.run(latents, None, truncation_psi=0.7, randomize_noise=True, output_transform=fmt)

        os.makedirs(config.result_dir, exist_ok=True)
        png_filename = os.path.join(config.result_dir + '/finished', 'example' + str(x) + '.png')
        PIL.Image.fromarray(images[1], 'RGB').save(png_filename)
if __name__ == "__main__":
    main()

这是错误:

---------------------------------------------------------------------------
ResourceExhaustedError                    Traceback (most recent call last)
C:\ProgramData\Anaconda3\envs\old_tensorflow\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
   1355     try:
-> 1356       return fn(*args)
   1357     except errors.OpError as e:

C:\ProgramData\Anaconda3\envs\old_tensorflow\lib\site-packages\tensorflow\python\client\session.py in _run_fn(feed_dict, fetch_list, target_list, options, run_metadata)
   1340       return self._call_tf_sessionrun(
-> 1341           options, feed_dict, fetch_list, target_list, run_metadata)
   1342 

C:\ProgramData\Anaconda3\envs\old_tensorflow\lib\site-packages\tensorflow\python\client\session.py in _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, run_metadata)
   1428         self._session, options, feed_dict, fetch_list, target_list,
-> 1429         run_metadata)
   1430 

ResourceExhaustedError: OOM when allocating tensor with shape[3,3,512,512] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
     [[{{node G_synthesis_16/32x32/Conv0_up/weight/Initializer/random_normal/RandomStandardNormal}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


During handling of the above exception, another exception occurred:

ResourceExhaustedError                    Traceback (most recent call last)
<ipython-input-18-cdc9c653bba8> in <module>
     28         PIL.Image.fromarray(images[1], 'RGB').save(png_filename)
     29 if __name__ == "__main__":
---> 30     main()

<ipython-input-18-cdc9c653bba8> in main()
      7     url = os.path.abspath("results/00008-sgan-custom_dataset-1gpu/network-snapshot-009748.pkl")
      8     with open(url, 'rb') as f:
----> 9         _G, _D, Gs = pickle.load(f)
     10         # _G = Instantaneous snapshot of the generator. Mainly useful for resuming a previous training run.
     11         # _D = Instantaneous snapshot of the discriminator. Mainly useful for resuming a previous training run.

~\Documents\python\stylegan\dnnlib\tflib\network.py in __setstate__(self, state)
    297         # Init TensorFlow graph.
    298         self._init_graph()
--> 299         self.reset_own_vars()
    300         tfutil.set_vars({self.find_var(name): value for name, value in state["variables"]})
    301 

~\Documents\python\stylegan\dnnlib\tflib\network.py in reset_own_vars(self)
    188     def reset_own_vars(self) -> None:
    189         """Re-initialize all variables of this network, excluding sub-networks."""
--> 190         tfutil.run([var.initializer for var in self.own_vars.values()])
    191 
    192     def reset_vars(self) -> None:

~\Documents\python\stylegan\dnnlib\tflib\tfutil.py in run(*args, **kwargs)
     24     """Run the specified ops in the default session."""
     25     assert_tf_initialized()
---> 26     return tf.get_default_session().run(*args, **kwargs)
     27 
     28 

C:\ProgramData\Anaconda3\envs\old_tensorflow\lib\site-packages\tensorflow\python\client\session.py in run(self, fetches, feed_dict, options, run_metadata)
    948     try:
    949       result = self._run(None, fetches, feed_dict, options_ptr,
--> 950                          run_metadata_ptr)
    951       if run_metadata:
    952         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

C:\ProgramData\Anaconda3\envs\old_tensorflow\lib\site-packages\tensorflow\python\client\session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
   1171     if final_fetches or final_targets or (handle and feed_dict_tensor):
   1172       results = self._do_run(handle, final_targets, final_fetches,
-> 1173                              feed_dict_tensor, options, run_metadata)
   1174     else:
   1175       results = []

C:\ProgramData\Anaconda3\envs\old_tensorflow\lib\site-packages\tensorflow\python\client\session.py in _do_run(self, handle, target_list, fetch_list, feed_dict, options, run_metadata)
   1348     if handle is None:
   1349       return self._do_call(_run_fn, feeds, fetches, targets, options,
-> 1350                            run_metadata)
   1351     else:
   1352       return self._do_call(_prun_fn, handle, feeds, fetches)

C:\ProgramData\Anaconda3\envs\old_tensorflow\lib\site-packages\tensorflow\python\client\session.py in _do_call(self, fn, *args)
   1368           pass
   1369       message = error_interpolation.interpolate(message, self._graph)
-> 1370       raise type(e)(node_def, op, message)
   1371 
   1372   def _extend_graph(self):

ResourceExhaustedError: OOM when allocating tensor with shape[3,3,512,512] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
     [[node G_synthesis_16/32x32/Conv0_up/weight/Initializer/random_normal/RandomStandardNormal (defined at <string>:149) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

您完成了!只需运行“ python pretrained_example.py”,然后查看结果!您应该在结果目录中有一个生成的图像。如果您想生成多个,可以对pretrained_example.py

进行这些更改

提前谢谢

0 个答案:

没有答案