我正在运行以下代码:
def get_previous_next_returns(portfolio,total_returns):
assets=[]
i=0
for asset in portfolio:
i+=1
try:
for offset in [1,5,15,30,45,60,75,90,120,150,
200,250,500,750,1000,1250,1500]:
print(i,asset.name,offset)
asset['return_stock'] = (asset.Close - asset.Close.shift(1)) / (asset.Close.shift(1))
merged_data = pd.merge(asset, sp_500, on='Date')
total_positive_days=0
total_beating_sp_days=0
total_days = offset
for index in range(0,len(merged_data)):
if index-offset>0:
#for index, row in merged_data.iterrows():
#print(offset, index)
sliced = merged_data.iloc[index - offset : index]
total_positive_days = (sliced.Close_x > sliced.Close_x.shift(1)).sum()
total_beating_sp_days = (sliced.return_stock > sliced.return_sp).sum()
percentage_of_positive_days = float(total_positive_days/total_days)
percentage_of_beating_days = float(total_beating_sp_days/total_days)
asset.loc[index,'Pct_positive_'+str(offset)] = percentage_of_positive_days
asset.loc[index,'Pct_beating_'+str(offset)] = percentage_of_beating_days
# previous period returns
asset['Pct_change_'+str(offset)] = asset['Close'].pct_change(periods = offset)
# next period returns
asset['Pct_change_plus_'+str(offset)] = asset['Close'].pct_change(periods = -offset)
assets.append(asset)
total_returns=total_returns.append(asset)
except IndexError:
print("Index error")
return assets, total_returns
问题是我在(merged_data)上运行它的数据帧非常大(超过一百万行),因此代码需要很多小时才能完成...是否有某种方法可以加快它的速度(例如,替换为循环使用更有效的代码块?
答案 0 :(得分:0)
将数据框划分为多个部分,然后使用python的 multiprocessing 模块同时执行它们。 为此,您可以使用multiprocessing.Pool或multiprocessing.Process