使用python2.7队列时为什么内存使用总是在增加

时间:2018-01-13 04:14:30

标签: python queue multiprocessing

我写了一个多进程程序,一个进程有两个线程调度任务和收集结果,多进程从task_queue获取任务并将结果放在result_queue中。它看起来如下: task_master.py

import threading
import random, time, Queue
from meliae import scanner
from multiprocessing.managers import BaseManager

class QueueManager(BaseManager):
    pass

def dispatch_task(task):
    print threading.currentThread()
    for i in xrange(100000):
        task.put(i)
        print "task %d has been put into queue" % i 

def get_result(result):
    print threading.currentThread()
    i = 0 
    while True:
        r = result.get()
        print r
        i = i + 1 
        if (i % 100 == 0): 
            scanner.dump_all_objects('dump%s.txt' % time.time())




if __name__ == "__main__":
    task_queue = Queue.Queue(maxsize=100)
    result_queue = Queue.Queue(maxsize=50)

    QueueManager.register('get_task_queue', callable=lambda: task_queue)
    QueueManager.register('get_result_queue', callable=lambda: result_queue)

    manager = QueueManager(address=('', 5000), authkey=b'abc')
    manager.start()

    task = manager.get_task_queue()
    result = manager.get_result_queue()

    task_thread = threading.Thread(target=dispatch_task, args=(task,))
    result_thread = threading.Thread(target=get_result, args=(result,))

    task_thread.start()
    result_thread.start()

    print "task thread has started"
    print "result thread has started"

    while True:
        if not task_thread.isAlive() and not result_thread.isAlive():
            break


    manager.shutdown()
    print('master exit.')

task_worker.py

import os
import uuid
import numpy as np
import time, sys, Queue
import random
from multiprocessing.managers import BaseManager
from multiprocessing import Process

Max_Process = 10

class QueueManager(BaseManager):
    pass

def process_task(task, result):
    r = list()

    while True:
        t = task.get()

        for i in xrange(10):
            a = list()
            a.append(uuid.uuid1())
            for j in xrange(10):
                a.append(random.random())

            r.append(a)

        print "pid: %d, result %d is waiting to put into queue" % (os.getpid(), t)
        result.put(r)
        print "pid: %d, result %d has been put into queue" % (os.getpid(), t)

if __name__ == "__main__":
    QueueManager.register('get_task_queue')
    QueueManager.register('get_result_queue')


    server_addr = '127.0.0.1'
    print('Connect to server %s...' % server_addr)

    m = QueueManager(address=(server_addr, 5000), authkey=b'abc')

    m.connect()

    task = m.get_task_queue()
    result = m.get_result_queue()

    plist = list()
    for i in xrange(Max_Process):
        p = Process(target=process_task, args=(task, result,))
        plist.append(p)
        p.start()

    for p in plist:
        p.join()

    print('worker exit.')

当我设置了result_queue的maxsize时,task_master的内存使用量总是在增加。

Pid : 7092, rss: 18816 ;Pid : 7093, rss: 83808 
Pid : 7092, rss: 18824 ;Pid : 7093, rss: 84292 
Pid : 7092, rss: 19620 ;Pid : 7093, rss: 93280 
Pid : 7092, rss: 19868 ;Pid : 7093, rss: 95424 
Pid : 7092, rss: 19964 ;Pid : 7093, rss: 95648  
Pid : 7092, rss: 20752 ;Pid : 7093, rss: 98948 
Pid : 7092, rss: 20672 ;Pid : 7093, rss: 102624

我使用meliae.loader来分析task_master的内存使用情况,它看起来如下:

from meliae import loader
om = loader.load('dump1515814558.29.txt')
om.summarize()


p = om.get_all('list')
print len(p)
print "++++++++++++++++++++++++++++++++++++++++++++++"
print p[0]
print "++++++++++++++++++++++++++++++++++++++++++++++"
print p[0].c

print "++++++++++++++++++++++++++++++++++++++++++++++"
print p[0].p

结果是:

loaded line 43206, 43207 objs,   4.3 /   4.3 MiB read in 0.5s        
checked    43206 /    43207 collapsed     1813    
set parents    41393 /    41394            
collapsed in 0.2s
1832
++++++++++++++++++++++++++++++++++++++++++++++
list(4432453288 14520B 1600refs 1par)
++++++++++++++++++++++++++++++++++++++++++++++
[list(4433209392 216B 11refs 1par), list(4433209320 216B 11refs 1par), list(4433209248 216B 11refs 1par), list(4433209176 216B 11refs 1par), ...]
++++++++++++++++++++++++++++++++++++++++++++++
[frame(4426104912 456B 7refs 1par 'get_result"')]

我很困惑,为什么在设置队列的maxsize时内存使用率会增加,任何答案都会受到赞赏,谢谢!

0 个答案:

没有答案