我的代码:
import pymongo
import multiprocessing
import sys
import time
import re
def process_document(document):
xxxx
def process_cursor(processid,start,count_per_process):
print "process id is %d starting" % (processid)
cli = pymongo.MongoClient(host=["xxx:27017", "xxx:27017"], replicaSet='xxxx', readPreference="secondary")
table = cli['tydata']['users']
docs = table.find().skip(start).limit(count_per_process)
print "process id is %d processing" % (processid)
final_chip = 0L
total=0
count=0
if docs:
for doc in docs:
final_chip += process_document(doc)
count += 1
total+=1
if count == 1000000:
print 'counting is %d,time %d,processid %d' % (count,time.time(),processid)
print 'now is %d,processid %d' % (total, processid)
count = 0
print 'total is %d'%total
print "process id is %d ending" %(processid)
queue.put(final_chip)
if __name__ == "__main__":
total=282039860
process_num = 10
count_per_process = total / process_num
queue=multiprocessing.Queue()
process_list = []
start_time=time.time()
print start_time
for id in range(process_num):
start=id*count_per_process
p = multiprocessing.Process(target=process_cursor,args=(id,start,count_per_process,))
process_list.append(p)
for i in range(process_num):
process_list[i].start()
for i in range(process_num):
process_list[i].join()
end_time=time.time()
print end_time
while not queue.empty():
a = queue.get()
sum += a
print 'sum is %d'%sum
file_name= '/home/xxx/test_mongodb/script_mongodb/sum.txt'
file=open(file_name,'w')
file.write(str(sum))
file.flush()
file.close()
print 'the process total time is %d'%(end_time-start_time)
我使用Mongodb的多进程扫描一个2T集合pymongo
,但只有一个进程有效,其他进程等待。我不知道为什么只有一个过程起作用,而其他过程在最后等待,然后另一个过程起作用。
我的代码有什么问题吗?