下面是使用迭代的功能代码,该代码运行时间太长。我不断听说使用其他方法可以获得更好的性能。有人可以提供示例吗?
def payback_period(cohorts, sales):
for index, row in cohorts.iterrows():
subset = sales[sales['cohort'] == index]
cumsum = {}
for k, proceeds in subset.groupby(['purchase_time', 'duration'])['proceeds'].agg('sum').to_dict().items():
month = k[0]
duration = k[1]
for m in range(int(duration)):
r_month = month+pd.DateOffset(months=m)
if r_month not in cumsum:
cumsum[r_month] = 0
cumsum[r_month]+= proceeds/duration
payback_period = 999
for month in sorted(cumsum.keys()):
revenue = cumsum[month]
cum_revenue = sum(dict((k, cumsum[k]) for k in cumsum.keys() if k <=month).values())
if cum_revenue > cohorts.loc[index, 'spend']:
m_delta = round((month - index)/np.timedelta64(1, 'M'))
if payback_period > m_delta:
payback_period = m_delta
cohorts.loc[index, 'payback_period'] = payback_period
return cohorts
# print(index, payback_period, sum(cumsum.values()), cohorts.loc[index, 'spend'])