在Windows 7上使用Python3处理一些大型csv文件时,我遇到了程序运行速度不够快的问题。代码的原始工作版本与下面类似,但进程调用都是线程。添加multiprocessing
库并将tdg.Thread
转移到mp.Process
后,如下所示,我收到了这个酸洗错误:
line 70, in <module>
proc1.start()
File "C:\Python34\lib\multiprocessing\process.py", line 105, in start
self._popen = self._Popen(self)
File "C:\Python34\lib\multiprocessing\context.py", line 212, in _Popen
return _default_context.get_context().Process._Popen(process_obj)
File "C:\Python34\lib\multiprocessing\context.py", line 313, in _Popen
return Popen(process_obj)
File "C:\Python34\lib\multiprocessing\popen_spawn_win32.py", line 66, in __init__
reduction.dump(process_obj, to_child)
File "C:\Python34\lib\multiprocessing\reduction.py", line 59, in dump
ForkingPickler(file, protocol).dump(obj)
_pickle.PicklingError: Can't pickle <class '_thread.lock'>: attribute lookup lock on _thread failed
代码:
import multiprocessing as mp
import threading as tdg
import queue as q
def my_p1func1(data, Q):
#performs LDAP for data set 1
print("p1f1:",data)
Q.put(data)
def my_p1func2(data, Q):
#performs LDAP for data set2
print("p1f2:",data)
Q.put(data)
def my_proc1(data, Q):
f1_Q = q.Queue()
f2_Q = q.Queue()
f1 = tdg.Thread(target=myP1Func1, args = (data['1'], f1_Q))
f2 = tdg.Thread(target=myP1Func2, args = (data['2'], f2_Q))
f1.start()
f2.start()
f1.join()
f2.join()
f1_out=f1_Q.get()
f2_out=f2_Q.get()
Q.put({'f1':f1_out,'f2':f2_out})
def my_p2func1(data, Q):
#perform gethostbyaddr() for data set 1
print("p2f1:",data)
Q.put(data)
def my_p2func2(data, Q):
#perform gethostbyaddr() for data set 2
print("p2f2:",data)
Q.put(data)
def my_proc2(data, Q):
f1_Q = q.Queue()
f2_Q = q.Queue()
f1 = tdg.Thread(target=myP2Func1, args = (data['1'], f1_Q))
f2 = tdg.Thread(target=myP2Func2, args = (data['2'], f2_Q))
f1.start()
f2.start()
f1.join()
f2.join()
f1_out=f1_Q.get()
f2_out=f2_Q.get()
Q.put({'f1':f1_out,'f2':f2_out})
dataIn = {'1': [1,2,3], '2': ['a','b','c']}
pq1 = q.Queue()
pq2 = q.Queue()
proc1 = mp.Process(target=my_proc1, args=(dataIn, pq1))
proc2 = mp.Process(target=my_proc2, args=(dataIn,pq2))
proc1.start()
proc2.start()
proc1.join()
proc2.join()
p1 = pq1.get()
p2 = pq2.get()
print(p1)
print(p2)
我虽然问题是由我在我的打印声明周围的锁定引起的,但即使在删除它们之后它仍然会抛出相同的酸洗错误。
我对此表示满意,并希望了解为什么它试图腌制未使用的东西,以及如何让它运行以便提高效率?
答案 0 :(得分:0)
您不能将常规Queue.Queue
对象与multiprocessing
一起使用。您必须使用multiprocessing.Queue
。标准Queue.Queue
将不会在进程之间共享,即使您要使其可选。不过,这很容易解决:
if __name__ == "__main__":
dataIn = {'1': [1,2,3], '2': ['a','b','c']}
pq1 = mp.Queue()
pq2 = mp.Queue()
proc1 = mp.Process(target=my_proc1, args=(dataIn, pq1))
proc2 = mp.Process(target=my_proc2, args=(dataIn, pq2))
proc1.start()
proc2.start()
proc1.join()
proc2.join()
p1 = pq1.get()
p2 = pq2.get()