在python中使用多处理分工差?

时间:2011-08-18 19:46:17

标签: python parallel-processing multiprocessing multicore

考虑以下计划:

import Queue
from multiprocessing import Queue as Q
from multiprocessing import Process, Value, Array
from time import time
from numpy import sum, zeros
from numpy.random import normal

def enche(queue, cnts, tims, t0, proc):
    while True:
        try:
            queue.get(False)
            x = sum(normal(size = 1000))
            cnts[proc] = cnts[proc] + 1
        except Queue.Empty:
            tims[proc] = time() - t0.value
            break

def main(m, nth):
    queue = Q()
    tmp = [queue.put(0) for i in xrange(m)]
    cnts = Array('i', zeros(nth, dtype = "int"))
    tims = Array('d', zeros(nth))
    t0  = Value('d', time())
    procs = [ Process(target = enche , args = (queue,cnts, tims, t0,i)) for i in xrange(nth) ]
    for p in procs:
        p.start()
    for p in procs:
        p.join()
    for k in xrange(nth):
        print "%2.0f/100, %4.1f ms,"%(cnts[k]/float(m) * 100, tims[k]*1000), 
    print "" 


if __name__ == "__main__":
    for k in xrange(100):
        main(100, 4)

本质上,函数main(m,nth)应该执行一项任务 - 将m个正常随机数加m次 - 并在第n个过程中划分此任务。

然后,我计算并打印每个流程完成的任务百分比以及每个流程持续多长时间。这是我得到的典型结果:

 4/100,  1.6 ms,  3/100,  2.2 ms, 88/100, 10.9 ms,  5/100,  2.7 ms, 
23/100,  3.6 ms,  1/100,  1.5 ms,  3/100,  2.0 ms, 73/100, 14.9 ms, 
 8/100,  3.9 ms,  1/100,  3.4 ms,  8/100,  5.3 ms, 83/100, 21.0 ms, 
 6/100,  4.1 ms,  0/100,  3.2 ms, 76/100, 19.0 ms, 18/100,  8.8 ms, 
 5/100,  1.7 ms, 90/100,  9.2 ms,  5/100,  2.4 ms,  0/100,  2.6 ms, 
 2/100,  1.5 ms,  8/100,  2.4 ms, 89/100,  8.9 ms,  1/100,  2.6 ms, 
 8/100,  2.3 ms, 11/100,  2.7 ms,  3/100,  2.4 ms, 78/100,  8.5 ms, 
52/100,  5.8 ms, 11/100,  3.2 ms,  4/100,  2.6 ms, 33/100,  5.9 ms, 
 6/100,  2.3 ms,  8/100,  2.9 ms, 24/100,  4.3 ms, 62/100,  7.7 ms, 
13/100,  3.0 ms, 11/100,  2.6 ms,  1/100,  2.5 ms, 75/100,  8.5 ms, 
 6/100,  2.1 ms,  3/100,  2.1 ms,  6/100,  2.7 ms, 85/100,  9.0 ms, 
 8/100,  2.3 ms,  3/100,  2.0 ms, 27/100,  4.4 ms, 62/100,  7.6 ms, 
18/100,  3.2 ms,  0/100,  1.9 ms,  6/100,  3.1 ms, 76/100,  9.2 ms, 
 3/100,  1.9 ms, 19/100,  3.4 ms, 30/100,  6.0 ms, 48/100,  7.9 ms, 
10/100,  2.4 ms,  0/100,  1.5 ms,  8/100,  2.8 ms, 82/100,  8.8 ms, 
 6/100,  1.8 ms, 36/100,  4.7 ms, 32/100,  4.7 ms, 26/100,  4.7 ms, 
 8/100,  2.0 ms, 46/100,  5.9 ms, 46/100,  5.9 ms,  0/100,  2.6 ms, 
 3/100,  1.6 ms, 54/100,  6.2 ms,  1/100,  2.4 ms, 42/100,  6.1 ms, 
12/100,  2.6 ms,  8/100,  2.4 ms, 34/100,  5.3 ms, 46/100,  6.3 ms, 
 1/100,  1.4 ms,  9/100,  2.3 ms,  1/100,  2.3 ms, 89/100,  9.2 ms, 
13/100,  2.6 ms,  5/100,  2.2 ms,  3/100,  2.4 ms, 79/100,  8.7 ms, 
11/100,  2.3 ms,  7/100,  2.6 ms,  8/100,  2.9 ms, 74/100,  8.8 ms, 
 8/100,  1.8 ms,  9/100,  2.7 ms,  0/100,  1.8 ms, 83/100,  9.3 ms, 
13/100,  2.4 ms,  4/100,  2.0 ms,  3/100,  2.5 ms, 80/100,  9.9 ms, 
10/100,  2.3 ms, 72/100,  8.0 ms,  6/100,  2.6 ms, 12/100,  4.1 ms, 
27/100,  4.3 ms,  0/100,  1.5 ms,  5/100,  2.5 ms, 68/100,  8.4 ms, 
 3/100,  1.5 ms,  8/100,  2.3 ms, 86/100,  8.3 ms,  3/100,  2.7 ms, 
 7/100,  1.9 ms, 10/100,  2.5 ms,  5/100,  2.5 ms, 78/100,  8.2 ms, 
14/100,  2.5 ms,  3/100,  2.0 ms,  5/100,  2.5 ms, 78/100,  8.2 ms, 
53/100,  7.4 ms,  3/100,  1.9 ms,  1/100,  2.4 ms, 43/100,  7.5 ms, 
 3/100,  1.7 ms,  9/100,  2.5 ms, 47/100,  6.0 ms, 41/100,  6.0 ms, 
 3/100,  1.5 ms, 37/100,  4.7 ms, 33/100,  4.7 ms, 27/100,  4.7 ms, 
 7/100,  1.8 ms, 36/100,  4.5 ms, 32/100,  4.5 ms, 25/100,  4.5 ms, 
 3/100,  1.4 ms, 51/100,  5.6 ms, 46/100,  5.6 ms,  0/100,  2.3 ms, 
 5/100,  1.8 ms,  9/100,  2.5 ms, 46/100,  5.8 ms, 40/100,  5.9 ms, 
 9/100,  2.0 ms,  3/100,  1.8 ms,  5/100,  2.3 ms, 83/100,  8.8 ms, 
 7/100,  2.1 ms,  3/100,  2.1 ms,  8/100,  2.9 ms, 82/100,  8.6 ms, 
11/100,  2.3 ms,  4/100,  2.0 ms,  3/100,  2.5 ms, 82/100,  9.3 ms, 
19/100,  2.9 ms,  9/100,  2.6 ms, 10/100,  3.0 ms, 62/100,  7.3 ms, 
18/100,  2.8 ms, 16/100,  2.9 ms,  2/100,  2.5 ms, 64/100,  7.9 ms, 
 2/100,  1.5 ms, 51/100,  5.8 ms, 47/100,  5.9 ms,  0/100,  2.4 ms, 
26/100,  4.0 ms,  8/100,  2.3 ms, 11/100,  2.9 ms, 55/100,  7.6 ms, 
 8/100,  2.1 ms,  3/100,  1.9 ms, 79/100,  9.2 ms, 10/100,  3.8 ms, 
14/100,  2.5 ms,  4/100,  2.1 ms, 45/100,  5.6 ms, 37/100,  5.6 ms, 
11/100,  2.2 ms,  9/100,  2.8 ms,  4/100,  2.4 ms, 76/100,  8.9 ms, 
 3/100,  1.4 ms,  4/100,  2.1 ms,  5/100,  2.4 ms, 88/100,  9.5 ms, 
 3/100,  1.4 ms,  7/100,  2.4 ms, 49/100,  5.9 ms, 41/100,  5.9 ms, 
 3/100,  1.4 ms,  3/100,  1.8 ms, 92/100,  9.3 ms,  2/100,  2.6 ms, 
 9/100,  2.3 ms, 16/100,  3.1 ms, 41/100,  5.5 ms, 34/100,  5.5 ms, 
 5/100,  1.7 ms,  8/100,  2.3 ms, 47/100,  5.8 ms, 40/100,  5.7 ms, 
 6/100,  1.7 ms,  5/100,  2.2 ms,  5/100,  2.4 ms, 84/100,  8.6 ms, 
 7/100,  2.0 ms,  6/100,  2.2 ms, 47/100,  5.9 ms, 40/100,  5.8 ms, 
 5/100,  1.9 ms, 32/100,  5.1 ms,  6/100,  2.7 ms, 57/100,  7.7 ms, 
11/100,  2.4 ms, 14/100,  2.7 ms, 42/100,  5.3 ms, 33/100,  5.3 ms, 
50/100,  6.6 ms, 12/100,  2.8 ms,  0/100,  1.9 ms, 38/100,  6.6 ms, 
55/100,  7.0 ms,  1/100,  1.7 ms,  2/100,  2.2 ms, 42/100,  6.9 ms, 
 8/100,  1.9 ms, 48/100,  6.7 ms,  5/100,  2.4 ms, 39/100,  6.7 ms, 
12/100,  2.3 ms, 18/100,  3.0 ms, 30/100,  5.6 ms, 40/100,  6.8 ms, 
 5/100,  1.7 ms,  3/100,  1.9 ms,  5/100,  2.5 ms, 87/100,  8.8 ms, 
50/100,  6.7 ms, 13/100,  2.7 ms,  0/100,  2.0 ms, 37/100,  6.7 ms, 

为什么我的分工如此糟糕?为什么有时一个过程会持续很短的时间而什么都不做,而其他人几乎完成了90%的工作?

我该怎么做才能改善这个?

编辑:额外信息 - 我在i7 CPU中运行它。

2 个答案:

答案 0 :(得分:5)

与进程启动开销相比,它并行执行的任务相当小,因此一个进程可以在其他进程有机会开始运行之前进行多次迭代。

如果该行:

x = sum(normal(size = 1000))

更改为:

x = sum(normal(size = 100000))

这就是我得到的:

25/100, 366.2 ms, 25/100, 364.4 ms, 25/100, 364.1 ms, 25/100, 361.7 ms, 
34/100, 362.1 ms,  0/100,  2.6 ms, 25/100, 357.6 ms, 41/100, 357.8 ms, 
25/100, 365.2 ms, 26/100, 362.7 ms, 25/100, 366.0 ms, 24/100, 355.7 ms, 
21/100, 360.3 ms, 21/100, 355.9 ms, 22/100, 362.3 ms, 36/100, 358.0 ms, 

答案 1 :(得分:1)

正如Maxim所提到的,您的任务太小,操作系统无法依次交换每个进程,并为每个进程调整分配的CPU时间。任务越长,每个进程就越有权访问处理器。

您还可以使用os.times()

检查已分配给当前进程的实际CPU时间

它允许您知道为每个进程分配了多少实际CPU时间。当然,如果没有分配任何CPU时间,该过程将无法完成任何工作:)