在下面的python代码中,我尝试了两种求和值的方法:
在某些数字之上,我收到不同的结果,即S_1 int和S_2多重处理。值返回不同的结果
import random
import multiprocessing
import time
from multiprocessing import Process, Value, Array
from multiprocessing import Pool
import pandas as pd
from data_path import *
nm = 10**4
cores=2
tickers=list(pd.read_excel("/Users/itaybd/Finzor_2_26/dev_code/Engine/DATA/SecurityClassifiers/SPDR/SPY_All_Holdings.xls",skiprows=3)["Identifier"])[:100]
tickers = [t for t in tickers if isinstance(t,str) or isinstance(t,unicode)]
file_names = [os.path.join(data_path,t+".csv") for t in tickers if os.path.isfile(os.path.join(data_path,t+".csv"))]
global S_1,S_2
S_1 = 0
S_2 = Value('d', 0.0)
def g1(f_name):
global S_1
df = (pd.read_csv(f_name))#.ix[-1,"Close"]
S_1+=df.ix[0,"Close"]
def g2(f_name):
global S_2
df= (pd.read_csv(f_name))#.ix[-1,"Close"]
S_2.value +=df.ix[0,"Close"]
return df.ix[0,"Close"]
def tm(f):
st = time.time()
f()
t = time.time()- st
print("total run time",t)
return t
def g_tmp1():
for f_name in file_names:
g1(f_name)
def g_tmp2():
p = Pool(cores)
MM = p.map(g2,file_names)
print("MM",sum(MM))
p.close()
p.join()
def g_tmp2m():
p = Pool(cores-1)
p.map(g2,file_names)
p.close()
p.join()
print(tm(g_tmp1))
print(tm(g_tmp2))
print("S_1",S_1)
print("S_2",S_2.value)
结果:
('total run time', 4.318149089813232)
4.31814908981
('MM', 5011.8801329999997)
('total run time', 2.42002010345459)
2.42002010345
('S_1', 5011.8801329999997)
('S_2', 4767.500133)