使用Mongodb的多进程扫描一个2T集合时,只有一个进程工作,

时间:2017-12-11 03:40:36

标签: mongodb multiprocessing pymongo

我的代码:

import pymongo
import multiprocessing
import sys
import time
import re
def process_document(document):
    xxxx


def process_cursor(processid,start,count_per_process):
    print "process id is %d starting" % (processid)
    cli = pymongo.MongoClient(host=["xxx:27017", "xxx:27017"],   replicaSet='xxxx', readPreference="secondary")
    table = cli['tydata']['users']
    docs = table.find().skip(start).limit(count_per_process)
    print "process id is %d processing" % (processid)
    final_chip = 0L
    total=0
    count=0
    if docs:
        for doc in docs:
            final_chip += process_document(doc)
            count += 1
            total+=1
            if count == 1000000:
                print 'counting is %d,time %d,processid %d' % (count,time.time(),processid)
                print 'now is %d,processid %d' % (total, processid)
                count = 0
    print 'total is %d'%total
    print "process id is %d ending" %(processid)
    queue.put(final_chip)
if __name__ == "__main__":

    total=282039860
    process_num = 10
    count_per_process = total / process_num
    queue=multiprocessing.Queue()
    process_list = []

    start_time=time.time()
    print start_time

    for id in range(process_num):
        start=id*count_per_process
        p = multiprocessing.Process(target=process_cursor,args=(id,start,count_per_process,))
        process_list.append(p)

    for i in range(process_num):
        process_list[i].start()

    for i in range(process_num):
        process_list[i].join()

    end_time=time.time()
    print end_time
    while not queue.empty():
        a = queue.get()
        sum += a
    print 'sum is %d'%sum
    file_name= '/home/xxx/test_mongodb/script_mongodb/sum.txt'
    file=open(file_name,'w')
    file.write(str(sum))
    file.flush()
    file.close()
    print 'the process total time is %d'%(end_time-start_time)

我使用Mongodb的多进程扫描一个2T集合pymongo,但只有一个进程有效,其他进程等待。我不知道为什么只有一个过程起作用,而其他过程在最后等待,然后另一个过程起作用。  我的代码有什么问题吗?

0 个答案:

没有答案