我正在使用多进程模块处理一个巨大的文件。我通过每一行做一些过程并打印/ stdout到另一个文件。问题是我有时会在一行上打印两条不同的线条。我已经锁定了,即便如此,我也看到同样的问题。我该如何解决这个问题?
e.g
output received:
0,0.784207850012,0.215792149988,1,B5E326C7,0.257956186525,0.742043813475,1,UP87859
expected output:
0,0.784207850012,0.215792149988,1,B5E326C7
0.257956186525,0.742043813475,1,UP87859
代码:
#global lock
lock = multiprocessing.Lock()
def pred(line):
u,seg,c,g,lab = line.strip().split(None,4)
< #### DO SOMETHING ##### and generate p,pp0,pp1>
if('null' not in lab):
lock.acquire()
sys.stdout.write(','.join(map(str, [p,pp0,pp1,1,u])) + '\n')
lock.release()
else:
lock.acquire()
sys.stdout.write(','.join(map(str, [p,pp0,pp1,1,u])) + '\n')
lock.release()
if __name__ == "__main__":
pool = multiprocessing.Pool(25,initializer=init, initargs=(l,))
with open('file.csv') as source_file:
results = pool.map(pred, source_file,1000)