
时间:2014-11-17 16:27:23

标签: python multithreading cpython gil



这与臭名昭着的cpython GIL有关吗?


from threading import Thread 
import numpy as np 
import time

# breaks a list into n list of lists
def split(a, n):
    k, m = len(a) / n, len(a) % n
    return (a[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in xrange(n))


def parallel_compute(fn):
    class Worker(Thread):
        def __init__(self, *args):
            self.result = None
            self.args = args
        def run(self):
            self.result = fn(*self.args)
    def new_compute(*args, **kwargs):        
        threads = [Worker(args[0], args[1], args[2], x) for x in split(args[3], THREAD_NUM)]
        for x in threads: x.start()
        for x in threads: x.join()
        final_res = []
        for x in threads: final_res.extend(x.result)
        return final_res        
    return new_compute

# some function that does a lot of computation
def f(x): return np.abs(np.tan(np.cos(np.sqrt(x**2))))

class Foo:
    def compute(self, bla, blah, input_list):
        return map(f, input_list)

inp = [i for i in range(40*1000*100)]
#inp = [1,2,3,4,5,6,7]

if __name__ == "__main__": 

    o = Foo()
    start = time.time()
    res = o.compute(None, None, inp)
    end = time.time()
    print 'parallel', end - start


import time, fast_one, numpy as np

class SlowFoo:
    def compute(self, bla, blah, input_list):
        return map(fast_one.f, input_list)

if __name__ == "__main__": 

    o = SlowFoo()
    start = time.time()
    res = np.array(o.compute(None, None, fast_one.inp))
    end = time.time()
    print 'single', end - start

这是提供"PicklingError: Can't pickle <type 'function'>: attribute lookup __builtin__.function failed".

import pathos.multiprocessing as mp
import numpy as np, dill
import time

def split(a, n):
    k, m = len(a) / n, len(a) % n
    return (a[i * k + min(i, m):(i + 1) * k + min(i + 1, m)] for i in xrange(n))

def f(x): return np.abs(np.tan(np.cos(np.sqrt(x**2))))

def compute(input_list):
    return map(f, input_list)

D = 2; pool = mp.Pool(D)
def parallel_compute(fn):
    def new_compute(*args, **kwargs):
        inp = []
        for x in split(args[0], D): inp.append(x)
        outputs_async = pool.map_async(fn, inp)
        outputs = outputs_async.get()
        outputs = [y for x in outputs for y in x]
        return outputs
    return new_compute

compute = parallel_compute(compute)

inp = [i for i in range(40*1000)]

if __name__ == "__main__": 

    start = time.time()
    res = compute(inp)
    end = time.time()
    print 'parallel', end - start
    print len(res)

2 个答案:

答案 0 :(得分:2)

是的,当你的线程在Python中实现CPU绑定工作时(不是通过C扩展,可以在Python结构编组/解组数据之前和之后释放GIL),GIL就是这里的一个问题。 / p>


答案 1 :(得分:0)


您需要工作线程来启动子进程进行计算。 那些子进程可以真正并行运行。