我的想法是,我有一些非常大的字典会经历一些循环,我想使用Python's
multiprocessing
来更快地完成这项工作。
我已定义了2个功能。
import sys, collections
import multiprocessing
from timeit import itertools
def getReoccurences(g, dups, genes, synonyms):
#dups = collections.OrderedDict()
# check if our gene exists in the synonym dict
if g in synonyms:
dups[g] = []
for synonym in synonyms[g]:
if synonym in genes:
# got one reoccurence.
dups[g].append(synonym)
del genes[synonym]
#return dups
def cleanFromUnofficialName(g, replaced, dups, genes, synonyms):
for official_gene in synonyms:
list_of_synonyms = synonyms[official_gene]
if g in list_of_synonyms:
if g not in synonyms:
#We got a gene that is listed with the unofficial name.
replaced[official_gene] = g
#sys.stdout.write("\t%s was listed with the unofficial name %s. Replaced\n" % (official_gene, g))
if g not in genes:
sys.stdout.write("\tDouble occurence: %s ignoring 2nd occurence.\n" % g)
else:
del genes[g] # remove that gene from the list
genes[official_gene] = [] # put the official in it's place
#main func
def clean(genes, synomyms)
# lists to join the processes later on...
#############################
getReoccuring_processes = []
cleanFromUnofficialName_processes = []
# http://stackoverflow.com/questions/6832554/python-multiprocessing-how-do-i-share-a-dict-among-multiple-processes
manager1 = multiprocessing.Manager()
dups = manager1.dict()
manager2 = multiprocessing.Manager()
replaced = manager2.dict()
for g in genes:
p1 = multiprocessing.Process(target=getReoccurences, args=(g, dups, genes, synomyms))
p1.start()
getReoccuring_processes.append(p1)
for g in genes:
p2 = multiprocessing.Process(target=cleanFromUnofficialName_processes, args=(g, replaced, dups, genes, synomyms))
p2.start()
cleanFromUnofficialName_processes.append(p2)
#join the processes
for p1, p2 in itertools.izip(getReoccuring_processes, cleanFromUnofficialName_processes):
p1.join()
p2.join()
但是当我做的时候
for dup in dups:
print dup
我得到TypeError: 'list' object is not callable
问题是:如何使用多处理编辑此词典并仍然能够打印其值?