我尝试关注this solution和this solution,但到目前为止都没有成功:
当我运行以下代码块时:
global manager
global lock
manager = Manager()
lock = manager.Lock()
class MyClass(object):
def get_next_chunk(self, numberlist, chunks):
for i in range(0, len(numberlist), chunks):
yield numberlist[i:i + chunks]
def multi_process(self, numberlist):
procs = 5
chunksize = 100
with Pool(procs) as pool:
pool.map(self.process_numberlist,
self.get_next_chunk(numberlist, chunksize))
return self.running_total_list
def process_numberlist(self, numberlist):
temp_num_list = []
temp_num_list = self.getnewNumbers()
logger.debug("temp_num_list length: " + str(len(temp_num_list)))
try:
lock.acquire()
except Exception as e:
logger.error("Couldn't acquire lock")
logger.error(e)
traceback.format_exc()
logger.error(sys.exc_info()[0])
self.running_total_list = self.running_total_list + temp
logger.debug("New running_total_list length: "
+ str(len(self.running_total_list)))
lock.release()
break
我的日志中的输出如下:
[process_numberlist() ] temp_num_list length: 5
[process_numberlist() ] New running_total_list result set length: 5
[process_numberlist() ] temp_num_list length: 6
[process_numberlist() ] New running_total_list result set length: 6
[process_numberlist() ] temp_num_list length: 4
[process_numberlist() ] New running_total_list result set length: 9
当我预期的输出结果为:
[process_numberlist() ] temp_num_list length: 5
[process_numberlist() ] New running_total_list result set length: 5
[process_numberlist() ] temp_num_list length: 6
[process_numberlist() ] New running_total_list result set length: 11
[process_numberlist() ] temp_num_list length: 4
[process_numberlist() ] New running_total_list result set length: 15
编辑-尝试2
请参阅根据亚伦的建议进行的更新。现在收到“只能加入可迭代”错误
global manager
global lock
class MyClass(object):
def get_next_chunk(self, numberlist, chunks):
for i in range(0, len(numberlist), chunks):
yield numberlist[i:i + chunks]
def multi_process(self, numberlist):
procs = 5
chunksize = 100
manager = Manager()
lock = manager.Lock()
with Pool(procs) as pool:
func = partial(self.process_numberlist, lock)
pool.map(function,
self.get_next_chunk(numberlist, chunksize))
return self.running_total_list
def process_numberlist(self, numberlist, lock):
temp_num_list = []
temp_num_list = self.getnewNumbers()
logger.debug("temp_num_list length: " + str(len(temp_num_list)))
try:
lock.acquire()
self.running_total_list = self.running_total_list + temp_num_list
logger.debug("New running_total_list length: "
+ str(len(self.running_total_list)))
lock.release()
except Exception as e:
logger.error("Couldn't acquire lock")
logger.error(e)
traceback.format_exc()
logger.error(sys.exc_info()[0])
break
编辑#3 -此玩具示例中未包含的getNewNumbers()只是返回一个整数数组。希望有帮助
答案 0 :(得分:0)
在我看来,您的主要目标是访问共享资源(running_total_list
),这就是为什么我特别关注这一点。
在您的示例中,您使用了Pool
,而我使用了Process
。您可以查看this article两者之间的核心区别,并确定哪种更适合您的用例。
我想到了这个简单的示例,说明如何在多个进程之间共享资源。这应该使您对如何从那里开始有了一个好主意:
from multiprocessing import Process, Lock, Manager
def gen_numbers():
import random
return [i for i in range(random.randint(4,11))]
def process_numberlist(lock, shared_list, num):
temp_num_list = gen_numbers()
print("Proc %s: temp_num_list length: %s" %(num, len(temp_num_list)))
try:
lock.acquire()
shared_list += temp_num_list
print("Proc %s: New shared_list length: %s" %(num, len(shared_list)))
finally:
lock.release()
lock = Lock()
manager = Manager()
shared_list = manager.list()
proc = 5
proc_list = []
for num in range(proc):
p = Process(target=process_numberlist, args=( lock, shared_list, num+1, ))
p.start()
proc_list.append( p )
for p in proc_list:
p.join()
要注意的重要一件事是此处shared_list
的定义。与线程不同,每个进程都有自己的内存空间(Pool
也不例外),这就是为什么它们之间无法共享数据的原因。这意味着,您需要实现某种进程间通信(IPC),幸运的是python已经为您提供了一些工具。其中之一是multiprocessing.Manager
。它公开了一些数据结构(例如dict
或list
),可用于在进程之间共享。
在这种情况下,Lock
也一样。这很重要,因为您不想同时访问多个进程的共享内存。这只会使您的程序变幻莫测。
需要注意的另一件事是,process_numberlist
的执行不一定按顺序进行,因为每个进程都彼此独立运行,但是它们都可以访问相同的资源。
希望对您有帮助!
答案 1 :(得分:0)
您似乎将OOPS概念和IPC混淆了。
请参见此处,我在母进程中将类A
创建为a
的实例。我从同一母进程中调用方法a.go
。当方法a.go
调用multiprocessing.Pool(2)
时,将创建两个子进程。现在我们有三个过程。一位母亲和两个孩子。
每个都有自己的a
版本。现在有一个母亲和两个孩子,但是实例a
的三个版本。我刚刚在母亲中将A
的一个实例创建为a
。谁创造了另外两个?这是操作系统和酸洗操作。由操作系统创建时,孩子们会得到母亲的所有对象。如果孩子修改了a
的版本,则a
的其他版本不会受到影响。
import multiprocessing
import os
class A:
def __init__(self):
self.numbers = []
def add(self, n):
self.numbers.append(n)
me = multiprocessing.current_process()
print('mom: {}, my-pid: {}, data: {}'.format(os.getppid(), me.ident,
self.numbers))
def go(self):
with multiprocessing.Pool(2) as workers:
workers.map(self.add, range(1, 4))
if __name__ == '__main__':
a = A()
a.go()
print('pid: {}, data: {}'.format(multiprocessing.current_process().ident,
a.numbers))
输出;
mom: 10029, my-pid: 10030, data: [1]
mom: 10029, my-pid: 10031, data: [2]
mom: 10029, my-pid: 10030, data: [3]
pid: 10029, data: []
这里只有两个孩子pid 10030和pid10031。他们在a.numbers
后面附加了3个项目。因此,其中一个应该附加了两个项目,但在打印时仅显示了附加项目。 pid为10030的孩子应该显示[1, 3]
。这是怎么回事?
让我们先将a.numbers
列表初始化为母亲中的[0]
,然后将其初始化为print
a.numbers
,然后再追加为孩子。
import multiprocessing
import os
class A:
def __init__(self):
self.numbers = []
def add(self, n):
me = multiprocessing.current_process()
print('mom: {}, my-pid: {}, previous-data: {}'.format(
os.getppid(), me.ident, self.numbers))
self.numbers.append(n)
print('mom: {}, my-pid: {}, current-data: {}'.format(
os.getppid(), me.ident, self.numbers))
def go(self):
with multiprocessing.Pool(2) as workers:
workers.map(self.add, range(1, 4))
if __name__ == '__main__':
a = A()
a.numbers.append(0)
a.go()
print('pid: {}, data: {}'.format(multiprocessing.current_process().ident,
a.numbers))
输出;
mom: 10407, my-pid: 10408, previous-data: [0]
mom: 10407, my-pid: 10408, current-data: [0, 1]
mom: 10407, my-pid: 10409, previous-data: [0]
mom: 10407, my-pid: 10409, current-data: [0, 2]
mom: 10407, my-pid: 10408, previous-data: [0]
mom: 10407, my-pid: 10408, current-data: [0, 3]
pid: 10407, data: [0]
a.numbers
中母亲的一切出现在孩子们身上。但是,带有pid 10408并附加了2个项目的孩子仍然没有保留以前添加的项目。
现在让我们检查实例a
,我们要为其变异的数据是相同的实例,或者每个a
都不同,即使pid相同。
import multiprocessing
import os
class A:
def __init__(self):
self.numbers = []
def __str__(self):
return '<{}>'.format(', '.join(str(x) for x in self.numbers))
def __del__(self):
me = multiprocessing.current_process()
print("I'm being destroyed, my pid: {}, data: {}".format(me.ident, self))
def add(self, n):
me = multiprocessing.current_process()
self.numbers.append(n)
print('mom: {}, my-pid: {}, current-data: {}'.format(
os.getppid(), me.ident, self.numbers))
def go(self):
with multiprocessing.Pool(2) as workers:
workers.map(self.add, range(1, 4))
if __name__ == '__main__':
a = A()
a.numbers.append(0)
a.go()
print('pid: {}, data: {}'.format(multiprocessing.current_process().ident,
a.numbers))
输出;
mom: 11881, my-pid: 11883, current-data: [0, 2]
mom: 11881, my-pid: 11882, current-data: [0, 1]
I'm being destroyed, my pid: 11882, data: <0, 1>
I'm being destroyed, my pid: 11883, data: <0, 2>
mom: 11881, my-pid: 11883, current-data: [0, 3]
I'm being destroyed, my pid: 11883, data: <0, 3>
pid: 11881, data: [0]
I'm being destroyed, my pid: 11881, data: <0>
从上面的输出中可以明显看出,子进程没有终止,因为我们可以看到pid相同,但是对象a
被销毁了。因此,过程保持不变,但是实例a
是从母亲那里复制的。
如何在流程之间共享对象? multiprocessing.Manager
进行营救。
import multiprocessing
import os
class A:
def __init__(self):
manager = multiprocessing.Manager()
self.numbers = manager.list()
def __str__(self):
return '<{}>'.format(self.numbers)
def __del__(self):
me = multiprocessing.current_process()
print("I'm being destroyed, my pid: {}, data: {}".format(
me.ident, self))
def add(self, n):
me = multiprocessing.current_process()
self.numbers.append(n)
print('mom: {}, my-pid: {}, current-data: {}'.format(
os.getppid(), me.ident, self.numbers))
def go(self):
with multiprocessing.Pool(2) as workers:
workers.map(self.add, range(1, 4))
if __name__ == '__main__':
a = A()
a.numbers.append(0)
a.go()
print('pid: {}, data: {}'.format(multiprocessing.current_process().ident,
a.numbers))
输出;
mom: 12296, my-pid: 12303, current-data: [0, 1]
I'm being destroyed, my pid: 12303, data: <[0, 1, 2]>
mom: 12296, my-pid: 12304, current-data: [0, 1, 2]
I'm being destroyed, my pid: 12304, data: <[0, 1, 2]>
mom: 12296, my-pid: 12303, current-data: [0, 1, 2, 3]
I'm being destroyed, my pid: 12303, data: <[0, 1, 2, 3]>
pid: 12296, data: [0, 1, 2, 3]
I'm being destroyed, my pid: 12296, data: <<ListProxy object, typeid 'list' at 0x7f69aa037048; '__str__()' failed>>
数据现在在进程之间共享,但有一些开销。
class A:
def __init__(self):
print('children: {}'.format(multiprocessing.active_children()))
manager = multiprocessing.Manager()
print('children: {}'.format(multiprocessing.active_children()))
self.numbers = manager.list()
if __name__ == '__main__':
a = A()
输出;
children: []
children: [<ForkProcess(SyncManager-1, started)>]
还有一个共享对象的额外过程。
如何解决这个问题而没有开销?让孩子处理并返回数据,并在母亲中进行list
构建。
import multiprocessing
class A:
def __init__(self):
self.numbers = []
def add(self, n):
return [n]
def go(self):
with multiprocessing.Pool(2) as workers:
for lst in workers.map(self.add, range(1, 4)):
self.numbers.extend(lst)
print('active children: {}'.format(
[p.ident for p in multiprocessing.active_children()]))
if __name__ == '__main__':
a = A()
a.numbers.append(0)
a.go()
print('pid: {}, data: {}'.format(multiprocessing.current_process().ident,
a.numbers))
输出;
active children: [13436, 13435]
pid: 13434, data: [0, 1, 2, 3]