我写了一个多进程程序,一个进程有两个线程调度任务和收集结果,多进程从task_queue获取任务并将结果放在result_queue中。它看起来如下: task_master.py
import threading
import random, time, Queue
from meliae import scanner
from multiprocessing.managers import BaseManager
class QueueManager(BaseManager):
pass
def dispatch_task(task):
print threading.currentThread()
for i in xrange(100000):
task.put(i)
print "task %d has been put into queue" % i
def get_result(result):
print threading.currentThread()
i = 0
while True:
r = result.get()
print r
i = i + 1
if (i % 100 == 0):
scanner.dump_all_objects('dump%s.txt' % time.time())
if __name__ == "__main__":
task_queue = Queue.Queue(maxsize=100)
result_queue = Queue.Queue(maxsize=50)
QueueManager.register('get_task_queue', callable=lambda: task_queue)
QueueManager.register('get_result_queue', callable=lambda: result_queue)
manager = QueueManager(address=('', 5000), authkey=b'abc')
manager.start()
task = manager.get_task_queue()
result = manager.get_result_queue()
task_thread = threading.Thread(target=dispatch_task, args=(task,))
result_thread = threading.Thread(target=get_result, args=(result,))
task_thread.start()
result_thread.start()
print "task thread has started"
print "result thread has started"
while True:
if not task_thread.isAlive() and not result_thread.isAlive():
break
manager.shutdown()
print('master exit.')
task_worker.py
import os
import uuid
import numpy as np
import time, sys, Queue
import random
from multiprocessing.managers import BaseManager
from multiprocessing import Process
Max_Process = 10
class QueueManager(BaseManager):
pass
def process_task(task, result):
r = list()
while True:
t = task.get()
for i in xrange(10):
a = list()
a.append(uuid.uuid1())
for j in xrange(10):
a.append(random.random())
r.append(a)
print "pid: %d, result %d is waiting to put into queue" % (os.getpid(), t)
result.put(r)
print "pid: %d, result %d has been put into queue" % (os.getpid(), t)
if __name__ == "__main__":
QueueManager.register('get_task_queue')
QueueManager.register('get_result_queue')
server_addr = '127.0.0.1'
print('Connect to server %s...' % server_addr)
m = QueueManager(address=(server_addr, 5000), authkey=b'abc')
m.connect()
task = m.get_task_queue()
result = m.get_result_queue()
plist = list()
for i in xrange(Max_Process):
p = Process(target=process_task, args=(task, result,))
plist.append(p)
p.start()
for p in plist:
p.join()
print('worker exit.')
当我设置了result_queue的maxsize时,task_master的内存使用量总是在增加。
Pid : 7092, rss: 18816 ;Pid : 7093, rss: 83808
Pid : 7092, rss: 18824 ;Pid : 7093, rss: 84292
Pid : 7092, rss: 19620 ;Pid : 7093, rss: 93280
Pid : 7092, rss: 19868 ;Pid : 7093, rss: 95424
Pid : 7092, rss: 19964 ;Pid : 7093, rss: 95648
Pid : 7092, rss: 20752 ;Pid : 7093, rss: 98948
Pid : 7092, rss: 20672 ;Pid : 7093, rss: 102624
我使用meliae.loader来分析task_master的内存使用情况,它看起来如下:
from meliae import loader
om = loader.load('dump1515814558.29.txt')
om.summarize()
p = om.get_all('list')
print len(p)
print "++++++++++++++++++++++++++++++++++++++++++++++"
print p[0]
print "++++++++++++++++++++++++++++++++++++++++++++++"
print p[0].c
print "++++++++++++++++++++++++++++++++++++++++++++++"
print p[0].p
结果是:
loaded line 43206, 43207 objs, 4.3 / 4.3 MiB read in 0.5s
checked 43206 / 43207 collapsed 1813
set parents 41393 / 41394
collapsed in 0.2s
1832
++++++++++++++++++++++++++++++++++++++++++++++
list(4432453288 14520B 1600refs 1par)
++++++++++++++++++++++++++++++++++++++++++++++
[list(4433209392 216B 11refs 1par), list(4433209320 216B 11refs 1par), list(4433209248 216B 11refs 1par), list(4433209176 216B 11refs 1par), ...]
++++++++++++++++++++++++++++++++++++++++++++++
[frame(4426104912 456B 7refs 1par 'get_result"')]
我很困惑,为什么在设置队列的maxsize时内存使用率会增加,任何答案都会受到赞赏,谢谢!