多处理python全局变量multiprocessing.Value池方法返回错误结果

时间:2018-07-24 11:19:33

标签: python multiprocessing global-variables

在下面的python代码中,我尝试了两种求和值的方法:

  1. 通过遍历列表进行指挥
  2. 使用多处理池

在某些数字之上,我收到不同的结果,即S_1 int和S_2多重处理。值返回不同的结果

import random
import multiprocessing
import time
from multiprocessing import Process, Value, Array
from multiprocessing import Pool
import pandas as pd
from data_path import *
nm = 10**4
cores=2
tickers=list(pd.read_excel("/Users/itaybd/Finzor_2_26/dev_code/Engine/DATA/SecurityClassifiers/SPDR/SPY_All_Holdings.xls",skiprows=3)["Identifier"])[:100]

tickers = [t for t in tickers if isinstance(t,str) or isinstance(t,unicode)]

file_names = [os.path.join(data_path,t+".csv") for t in tickers if os.path.isfile(os.path.join(data_path,t+".csv"))] 
global S_1,S_2
S_1 = 0
S_2 =  Value('d', 0.0)

def g1(f_name):
    global S_1
    df =  (pd.read_csv(f_name))#.ix[-1,"Close"]
    S_1+=df.ix[0,"Close"]
def g2(f_name):
    global S_2
    df= (pd.read_csv(f_name))#.ix[-1,"Close"]
    S_2.value +=df.ix[0,"Close"]
    return df.ix[0,"Close"]


def tm(f):
    st = time.time()
    f()
    t = time.time()- st
    print("total run time",t)
    return t


def g_tmp1():
    for f_name in file_names:
        g1(f_name)
def g_tmp2():
    p = Pool(cores)
    MM = p.map(g2,file_names)
    print("MM",sum(MM))
    p.close()
    p.join()
def g_tmp2m():
    p = Pool(cores-1)
    p.map(g2,file_names)
    p.close()
    p.join()


print(tm(g_tmp1))
print(tm(g_tmp2))

print("S_1",S_1)
print("S_2",S_2.value)

结果:

('total run time', 4.318149089813232)
4.31814908981
('MM', 5011.8801329999997)
('total run time', 2.42002010345459)
2.42002010345
('S_1', 5011.8801329999997)
('S_2', 4767.500133)

0 个答案:

没有答案