示例:以下代码在Ubuntu 14.04上正常运行
# some imports
import numpy as np
import glob
import sys
import multiprocessing
import os
# creating some temporary data
tmp_dir = os.path.join('tmp', 'nptest')
if not os.path.exists(tmp_dir):
os.makedirs(tmp_dir)
for i in range(10):
x = np.random.rand(100, 50)
y = np.random.rand(200, 20)
file_path = os.path.join(tmp_dir, '%05d.npz' % i)
np.savez_compressed(file_path, x=x, y=y)
def read_npz(path):
data = dict(np.load(path))
return (data['x'], data['y'])
def parallel_read(files):
pool = multiprocessing.Pool(processes=4)
data_list = pool.map(read_npz, files)
return data_list
files = glob.glob(os.path.join(tmp_dir, '*.npz'))
x = parallel_read(files)
print('done')
但在Windows 7上失败,并出现以下错误消息:
cmd = get_command_line() + [rhandle]
pool = multiprocessing.Pool(processes=4)
File "C:\Anaconda\lib\multiprocessing\forking.py", line 358, in get_command_line
File "C:\Anaconda\lib\multiprocessing\__init__.py", line 232, in Pool
return Pool(processes, initializer, initargs, maxtasksperchild)
File "C:\Anaconda\lib\multiprocessing\pool.py", line 159, in __init__
is not going to be frozen to produce a Windows executable.''')
RuntimeError:
Attempt to start a new process before the current process
has finished its bootstrapping phase.
This probably means that you are on Windows and you have
forgotten to use the proper idiom in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce a Windows executable.
self._repopulate_pool()
File "C:\Anaconda\lib\multiprocessing\pool.py", line 223, in _repopulate_pool
w.start()
File "C:\Anaconda\lib\multiprocessing\process.py", line 130, in start
self._popen = Popen(self)
File "C:\Anaconda\lib\multiprocessing\forking.py", line 258, in __init__
cmd = get_command_line() + [rhandle]
File "C:\Anaconda\lib\multiprocessing\forking.py", line 358, in get_command_line
is not going to be frozen to produce a Windows executable.''')
RuntimeError:
Attempt to start a new process before the current process
has finished its bootstrapping phase.
This probably means that you are on Windows and you have
forgotten to use the proper idiom in the main module:
if __name__ == '__main__':
freeze_support()
...
The "freeze_support()" line can be omitted if the program
is not going to be frozen to produce a Windows executable.
根据我的理解,这源于这样一个事实:子进程在Windows上启动时导入主模块,而他们不在Linux上。通过将x = parallel_read(files)
放在main函数中可以防止Windows上的问题。 E.g:
if __name__ == '__main__':
x = parallel_read(files)
print('done')
为什么子进程在Windows上启动时导入主模块,而不是Linux?
答案 0 :(得分:2)
Windows没有fork
功能。大多数其他操作系统都可以,并且在这些平台上,multiprocessing
使用它来启动与父进程具有相同状态的新进程。 Windows必须通过其他方式设置子进程的状态,包括导入__main__
模块。
请注意,如果您要求,Python 3.4(及更高版本)允许您在所有操作系统上使用非分叉实现。有关此功能的讨论,请参阅错误跟踪器上的issue 8713。