Python中的多处理比单线程慢

时间:2017-11-25 20:42:38

标签: python multithreading

我最初的问题是关于Python下的并行性。然而,由于问题仍然没有答案我删除了它,我试图总结我的结论。希望它会帮助某人......

通常,有两种主要方法可以使代码并行运行 - 使用多线程多处理库。

根据 stackoverflow.com上的许多帖子 多线程库能够跨线程有效地共享内存,但在单核上运行线程。因此,如果瓶颈是I / O操作,它可以加速您的代码。我不确定图书馆是否有很多现实生活中的申请......

如果您的代码是CPU密集型的(有时称为CPU限制),多处理库可以解决您的问题。该库将线程分布在各个核心上。然而,许多人(包括我)观察到这样的多核代码可能明显慢于其单核对应物。这个问题应该是由于各个线程无法有效共享内存这一事实造成的 - 数据被广泛复制,这会产生相当大的开销。正如下面的代码所示,开销很大程度上取决于输入数据类型。问题是在Windows上比在Linux上更深刻。我不得不说并行性是我最大的Python失望 - 显然Python并没有考虑并行性而设计......

第一段代码使用pandas dataframe在核心之间分配Process

import numpy as np
import math as mth
import pandas as pd
import time as tm
import multiprocessing as mp

def bnd_calc_npv_dummy(bnds_info, core_idx, npv):
    """ multiple core dummy valuation function (based on single core function) """

    bnds_no = len(bnds_info)
    tm.sleep(0.0001 * bnds_no)

    npv[core_idx] = np.array(bnds_info['npv'])

def split_bnds_info(bnds_info, cores_no):
    """ cut dataframe with bond definitions into pieces - one piece per core """

    bnds_info_mp = []
    bnds_no = len(bnds_info)
    batch_size = mth.ceil(np.float64(bnds_no) / cores_no) # number of bonds allocated to one core

    # split dataframe among cores
    for idx in range(cores_no):
        lower_bound = int(idx * batch_size)
        upper_bound = int(np.min([(idx + 1) * batch_size, bnds_no]))
        bnds_info_mp.append(bnds_info[lower_bound : upper_bound].reset_index().copy())

    # return list of dataframes
    return bnds_info_mp

def bnd_calc_npv(bnds_info, cores_no):
    """ dummy valuation function running multicore """

    manager = mp.Manager()
    npv = manager.dict()

    bnds_info_mp = split_bnds_info(bnds_info, cores_no)

    processes = [mp.Process(target = bnd_calc_npv_dummy, args = (bnds_info_mp[core_idx], core_idx, npv)) for core_idx in xrange(cores_no)]     
    [process.start() for process in processes]     
    [process.join() for process in processes]

    # return NPV of individual bonds    
    return np.hstack(npv.values())

if __name__ == '__main__':

    # create dummy dataframe
    bnds_no = 1200 # number of dummy in the sample
    bnds_info = {'currency_name' : 'EUR', 'npv' : 100}
    bnds_info = pd.DataFrame(bnds_info, index = range(1))
    bnds_info = pd.concat([bnds_info] * bnds_no, ignore_index = True)

    # one core
    print("ONE CORE")
    start_time = tm.time()
    bnds_no = len(bnds_info)
    tm.sleep(0.0001 * bnds_no)
    npv = np.array(bnds_info['npv'])
    elapsed_time = (tm.time() - start_time)
    print('   elapsed time: ' + str(elapsed_time) + 's')

    # two cores
    print("TWO CORES")
    cores_no = 2
    start_time = tm.time()
    npv = bnd_calc_npv(bnds_info, cores_no)
    elapsed_time = (tm.time() - start_time)
    print('   elapsed time: ' + str(elapsed_time) + 's')

    # three cores
    print("THREE CORES")
    cores_no = 3
    start_time = tm.time()
    npv = bnd_calc_npv(bnds_info, cores_no)
    elapsed_time = (tm.time() - start_time)
    print('  elapsed time: ' + str(elapsed_time) + 's')

    # four cores
    print("FOUR CORES")
    cores_no = 4
    start_time = tm.time()
    npv = bnd_calc_npv(bnds_info, cores_no)
    elapsed_time = (tm.time() - start_time)
    print('  elapsed time: ' + str(elapsed_time) + 's')

第二个代码与之前的代码相同 - 唯一的区别是这次我们使用numpy array代替pandas dataframe而且性能差异很大(比较单核的运行时更改)多核的运行时间变化。)

import numpy as np
import math as mth
import time as tm
import multiprocessing as mp

def bnd_calc_npv_dummy(bnds_info, core_idx, npv):
    """ multiple core dummy valuation function (based on single core function) """

    bnds_no = len(bnds_info)
    tm.sleep(0.0001 * bnds_no)

    npv[core_idx] = bnds_info

def split_bnds_info(bnds_info, cores_no):
    """ cut dataframe with bond definitions into pieces - one piece per core """

    bnds_info_mp = []
    bnds_no = len(bnds_info)
    batch_size = mth.ceil(np.float64(bnds_no) / cores_no) # number of bonds allocated to one core

    # split dataframe among cores
    for idx in range(cores_no):
        lower_bound = int(idx * batch_size)
        upper_bound = int(np.min([(idx + 1) * batch_size, bnds_no]))
        bnds_info_mp.append(bnds_info[lower_bound : upper_bound])

    # return list of dataframes
    return bnds_info_mp

def bnd_calc_npv(bnds_info, cores_no):
    """ dummy valuation function running multicore """

    manager = mp.Manager()
    npv = manager.dict()

    bnds_info_mp = split_bnds_info(bnds_info, cores_no)

    processes = [mp.Process(target = bnd_calc_npv_dummy, args = (bnds_info_mp[core_idx], core_idx, npv)) for core_idx in xrange(cores_no)]     
    [process.start() for process in processes]     
    [process.join() for process in processes]

    # return NPV of individual bonds    
    return np.hstack(npv.values())

if __name__ == '__main__':

    # create dummy dataframe
    bnds_no = 1200 # number of dummy in the sample
    bnds_info = np.array([100] * bnds_no)

    # one core
    print("ONE CORE")
    start_time = tm.time()
    bnds_no = len(bnds_info)
    tm.sleep(0.0001 * bnds_no)
    elapsed_time = (tm.time() - start_time)
    print('   elapsed time: ' + str(elapsed_time) + 's')

    # two cores
    print("TWO CORES")
    cores_no = 2
    start_time = tm.time()
    npv = bnd_calc_npv(bnds_info, cores_no)
    elapsed_time = (tm.time() - start_time)
    print('   elapsed time: ' + str(elapsed_time) + 's')

    # three cores
    print("THREE CORES")
    cores_no = 3
    start_time = tm.time()
    npv = bnd_calc_npv(bnds_info, cores_no)
    elapsed_time = (tm.time() - start_time)
    print('  elapsed time: ' + str(elapsed_time) + 's')

    # four cores
    print("FOUR CORES")
    cores_no = 4
    start_time = tm.time()
    npv = bnd_calc_npv(bnds_info, cores_no)
    elapsed_time = (tm.time() - start_time)
    print('  elapsed time: ' + str(elapsed_time) + 's')

最后一段代码使用Pool代替Process。运行时间略好一些。

import numpy as np
import time as tm
import multiprocessing as mp

#import pdb
#pdb.set_trace()

def bnd_calc_npv_dummy(bnds_info):
    """ multiple core dummy valuation function (based on single core function) """

    try:
        # get number of bonds
        bnds_no = len(bnds_info)
    except:
        pass
        bnds_no = 1

        tm.sleep(0.0001 * bnds_no)

    return bnds_info

def bnd_calc_npv(bnds_info, cores_no):
    """ dummy valuation function running multicore """

    pool = mp.Pool(processes = cores_no)
    npv = pool.map(bnd_calc_npv_dummy, bnds_info.tolist()) 

    # return NPV of individual bonds    
    return npv

if __name__ == '__main__':

    # create dummy dataframe
    bnds_no = 1200 # number of dummy in the sample
    bnds_info = np.array([100.0] * bnds_no)

    # one core
    print("ONE CORE")
    start_time = tm.time()
    bnds_no = len(bnds_info)
    tm.sleep(0.0001 * bnds_no)
    elapsed_time = (tm.time() - start_time)
    print('   elapsed time: ' + str(elapsed_time) + 's')

    # two cores
    print("TWO CORES")
    cores_no = 2
    start_time = tm.time()
    npv = bnd_calc_npv(bnds_info, cores_no)
    elapsed_time = (tm.time() - start_time)
    print('   elapsed time: ' + str(elapsed_time) + 's')

    # three cores
    print("THREE CORES")
    cores_no = 3
    start_time = tm.time()
    npv = bnd_calc_npv(bnds_info, cores_no)
    elapsed_time = (tm.time() - start_time)
    print('  elapsed time: ' + str(elapsed_time) + 's')

    # four cores
    print("FOUR CORES")
    cores_no = 4
    start_time = tm.time()
    npv = bnd_calc_npv(bnds_info, cores_no)
    elapsed_time = (tm.time() - start_time)
    print('  elapsed time: ' + str(elapsed_time) + 's')

所以,我的结论是并行的Python实现不适用于现实生活(我使用Python 2.7.13和Window 7)。 最好的问候,

麦基

PS:如果有人能够改变代码,我会更乐意改变主意...

1 个答案:

答案 0 :(得分:1)

当可以独立计算问题的某些部分时,多处理效果最佳,例如使用multiprocessing.Pool。 池中的每个工作进程都处理输入的一部分,并将结果返回给主进程。

如果所有进程都需要修改整个输入数组的数据,那么manager的同步化开销可能会破坏多处理的任何增益。