我要更新熊猫
您好,我想比较熊猫数据帧计算中单核和多核的速度。 给出以下情况:第i行的'c'列是从'i-9'行到'i'行的'a'值的平均值。
from multiprocessing import Process, Value, Array, Manager
import pandas as pd
import numpy as np
import time
total_num = 1000
df = pd.DataFrame(np.arange(1,total_num*2+1).reshape(total_num,2),
columns=['a','b'])
df['c']=0
df2 = pd.DataFrame(np.arange(1,total_num*2+1).reshape(total_num,2),
columns=['a','b'])
df2['c']=0
def Cal(start, end):
for i in range(end-start-1):
if i+start < 10:
df.loc[i+start,'c']=df.loc[:i+start,'c'].mean()
else :
df.loc[i+start,'c']=df.loc[i-9:i+start,'c'].mean()
def Cal2(my_df,start, end):
for i in range(end-start-1):
if i+start < 10:
my_df.df.loc[i+start,'c']=my_df.df.loc[:i+start,'c'].mean()
else :
my_df.df.loc[i+start,'c']=my_df.df.loc[i-9:i+start,'c'].mean()
print(my_df)
print('Single core : --->')
start_t = time.time()
Cal(0,total_num+1)
end_t = time.time()
print(end_t-start_t)
print('Multiprocess ---->')
if __name__=='__main__':
num=len(df2)
num_core=4
between=num//num_core
mgr=Manager()
ns = mgr.Namespace()
ns.df=df2
procs=[]
start_t =time.time()
for index in range(num_core):
proc=Process(target=Cal2,args=(ns,index*between,(index+1)*between))
procs.append(proc)
proc.start()
for proc in procs:
proc.join()
end_t = time.time()
print(end_t-start_t)
起初,我意识到Multiprocessing不使用全局变量。所以我用了经理。但是,df2的“ c”列没有更改。
我该怎么做? :p