问题:
基本上,我从SQL查询返回数据集,然后我想通过 autonumber 将该数据集拆分成组,然后应用函数 get_currentdate 中包含的一些逻辑返回每个自动编号的最后交易日期。为了实现这一点,我使用了pandas的groupby
和apply
函数,但运行起来只需要一分多钟。我想知道我是否可以通过多处理加快速度。我怎么能这样做呢?
代码:
class findLastTransactionDate:
conn = create_engine(
"postgresql://progressa_admin:0Ce25%J4Jsx%@data-analytics.cvfvngfjabzl.us-west-1.rds.amazonaws.com:5432/dataanalytics",
execution_options=dict(stream_results=True))
def query(self):
"""Returns General Ledger transactions data from RDS"""
sql = """SELECT ge.LoanAgreementID,
ge.TransactionDate,
ge.GeneralLedgerType,
ge.Amount,
la.AutoNumber
FROM lms_data.general_ledger ge
LEFT JOIN lms_data.loan_agreements la
ON ge.LoanAgreementID = la.LoanAgreementID
WHERE la.loanstatus NOT IN (10,90) AND ge.GeneralLedgerType IN (7,9)"""
data = []
for chunk in pd.read_sql(sql, self.conn, chunksize=10000):
data.append(chunk)
df = pd.concat(data)
df = df.sort_values('transactiondate', ascending=True)
df = df.set_index('transactiondate')
df = df.loc[datetime.date(year=2013, month=9, day=13):(datetime.date.today() + datetime.timedelta(days=3))].reset_index()
ddf = dd.from_pandas(df, npartitions=3)
return ddf
@staticmethod
def get_currentdate(row):
"""Logic tree for returning the last transaction date of the loan"""
Debit = row[(row['generalledgertype'] == 7)][['autonumber', 'amount', 'transactiondate']].sort_values(
'transactiondate', ascending=False).reset_index(drop=True)
Return = row[(row['generalledgertype'] == 9)][['autonumber', 'amount', 'transactiondate']].sort_values(
'transactiondate', ascending=False).reset_index(drop=True)
if len(Debit.autonumber) == 0:
return np.nan
elif len(Return.autonumber) == 0:
return Debit.transactiondate[0]
elif (len(Debit.autonumber) > 0) & (len(Return.autonumber) > 0):
if np.abs(Debit.autonumber.tolist()[0]) not in Return.autonumber.tolist():
return Debit.transactiondate[0]
else:
CurrentDate = np.nan
for k in range(0, len(Debit.amount)):
if k == len(Return.amount):
CurrentDate = Debit.transactiondate[k]
break
elif (np.abs(Debit.amount[k]) == Return.amount[k]) & (
Debit.transactiondate[k] > Return.transactiondate[k]):
CurrentDate = Debit.transactiondate[k]
break
elif (np.abs(Debit.amount[k]) == Return.amount[k]) & (
Return.transactiondate[k] - Debit.transactiondate[k] < datetime.timedelta(days=30)):
if (k + 1 == len(Return.amount)) & (len(Return.amount) == len(Debit.amount)):
CurrentDate = np.nan
break
else:
continue
else:
CurrentDate = Debit.transactiondate[k]
break
return CurrentDate
def get_results(self, n_cores):
data = self.query()
l = list(data.groupby('autonumber'))
with Pool(n_cores) as p:
vals = p.map(self.get_currentdate, l)
return vals
if __name__ == '__main__':
start = time.time()
A = findLastTransactionDate().get_results(n_cores = 5)
print(A)
end = time.time()
print(end-start)
错误:
KeyError: 'Column not found: 0'
示例数据:
transactiondate loanagreementid generalledgertype \
0 2013-09-13 92A268A7-514B-E411-829F-BC9A78563412 7
1 2013-09-27 92A268A7-514B-E411-829F-BC9A78563412 7
2 2013-10-11 92A268A7-514B-E411-829F-BC9A78563412 7
3 2013-10-25 92A268A7-514B-E411-829F-BC9A78563412 7
4 2013-11-08 92A268A7-514B-E411-829F-BC9A78563412 7
5 2013-11-22 92A268A7-514B-E411-829F-BC9A78563412 7
6 2013-12-06 92A268A7-514B-E411-829F-BC9A78563412 7
7 2013-12-20 92A268A7-514B-E411-829F-BC9A78563412 7
8 2013-12-31 92A268A7-514B-E411-829F-BC9A78563412 7
9 2013-09-13 93A268A7-514B-E411-829F-BC9A78563412 7
10 2013-09-27 93A268A7-514B-E411-829F-BC9A78563412 7
11 2013-10-11 93A268A7-514B-E411-829F-BC9A78563412 7
12 2013-10-25 93A268A7-514B-E411-829F-BC9A78563412 7
13 2013-11-08 93A268A7-514B-E411-829F-BC9A78563412 7
14 2013-11-22 93A268A7-514B-E411-829F-BC9A78563412 7
15 2013-12-06 93A268A7-514B-E411-829F-BC9A78563412 7
16 2013-12-20 93A268A7-514B-E411-829F-BC9A78563412 7
17 2014-01-03 93A268A7-514B-E411-829F-BC9A78563412 7
18 2014-01-16 93A268A7-514B-E411-829F-BC9A78563412 7
19 2014-01-17 93A268A7-514B-E411-829F-BC9A78563412 7
20 2013-09-13 94A268A7-514B-E411-829F-BC9A78563412 7
21 2013-09-20 94A268A7-514B-E411-829F-BC9A78563412 7
22 2013-09-27 94A268A7-514B-E411-829F-BC9A78563412 7
23 2013-10-04 94A268A7-514B-E411-829F-BC9A78563412 7
24 2013-10-11 94A268A7-514B-E411-829F-BC9A78563412 7
25 2013-10-18 94A268A7-514B-E411-829F-BC9A78563412 7
26 2013-10-25 94A268A7-514B-E411-829F-BC9A78563412 7
27 2013-11-01 94A268A7-514B-E411-829F-BC9A78563412 7
28 2013-11-08 94A268A7-514B-E411-829F-BC9A78563412 7
29 2013-11-15 94A268A7-514B-E411-829F-BC9A78563412 7
.. ... ... ...
70 2013-12-31 95A268A7-514B-E411-829F-BC9A78563412 7
71 2014-01-15 95A268A7-514B-E411-829F-BC9A78563412 7
72 2014-01-31 95A268A7-514B-E411-829F-BC9A78563412 7
73 2014-02-14 95A268A7-514B-E411-829F-BC9A78563412 7
74 2014-02-28 95A268A7-514B-E411-829F-BC9A78563412 7
75 2014-03-14 95A268A7-514B-E411-829F-BC9A78563412 7
76 2014-03-31 95A268A7-514B-E411-829F-BC9A78563412 7
77 2014-04-15 95A268A7-514B-E411-829F-BC9A78563412 7
78 2014-04-30 95A268A7-514B-E411-829F-BC9A78563412 7
79 2014-05-15 95A268A7-514B-E411-829F-BC9A78563412 7
80 2014-05-30 95A268A7-514B-E411-829F-BC9A78563412 7
81 2014-06-13 95A268A7-514B-E411-829F-BC9A78563412 7
82 2014-06-30 95A268A7-514B-E411-829F-BC9A78563412 7
83 2014-07-15 95A268A7-514B-E411-829F-BC9A78563412 7
84 2013-09-13 96A268A7-514B-E411-829F-BC9A78563412 9
85 2013-09-13 96A268A7-514B-E411-829F-BC9A78563412 7
86 2013-09-20 96A268A7-514B-E411-829F-BC9A78563412 7
87 2013-09-27 96A268A7-514B-E411-829F-BC9A78563412 9
88 2013-09-27 96A268A7-514B-E411-829F-BC9A78563412 7
89 2013-10-04 96A268A7-514B-E411-829F-BC9A78563412 7
90 2013-10-11 96A268A7-514B-E411-829F-BC9A78563412 7
91 2013-10-18 96A268A7-514B-E411-829F-BC9A78563412 7
92 2013-10-25 96A268A7-514B-E411-829F-BC9A78563412 7
93 2013-11-01 96A268A7-514B-E411-829F-BC9A78563412 7
94 2013-11-08 96A268A7-514B-E411-829F-BC9A78563412 9
95 2013-11-08 96A268A7-514B-E411-829F-BC9A78563412 7
96 2013-11-15 96A268A7-514B-E411-829F-BC9A78563412 7
97 2013-11-15 96A268A7-514B-E411-829F-BC9A78563412 9
98 2013-11-22 96A268A7-514B-E411-829F-BC9A78563412 7
99 2013-11-22 96A268A7-514B-E411-829F-BC9A78563412 9
amount autonumber
0 -378.07 48110
1 -378.07 48110
2 -378.07 48110
3 -378.07 48110
4 -378.07 48110
5 -378.07 48110
6 -378.07 48110
7 -378.07 48110
8 -3888.43 48110
9 -119.25 48111
10 -119.25 48111
11 -119.25 48111
12 -119.25 48111
13 -119.25 48111
14 -119.25 48111
15 -119.25 48111
16 -119.25 48111
17 -119.25 48111
18 -3321.64 48111
19 -119.25 48111
20 -231.80 48113
21 -231.80 48113
22 -231.80 48113
23 -231.80 48113
24 -231.80 48113
25 -231.80 48113
26 -231.80 48113
27 -231.80 48113
28 -231.80 48113
29 -231.80 48113
.. ... ...
70 -84.34 48114
71 -84.34 48114
72 -84.34 48114
73 -84.34 48114
74 -84.34 48114
75 -84.34 48114
76 -84.34 48114
77 -84.34 48114
78 -84.34 48114
79 -84.34 48114
80 -84.34 48114
81 -84.34 48114
82 -84.34 48114
83 -83.79 48114
84 74.88 48115
85 -74.88 48115
86 -74.88 48115
87 149.76 48115
88 -149.76 48115
89 -74.88 48115
90 -74.88 48115
91 -74.88 48115
92 -74.88 48115
93 -74.88 48115
94 74.88 48115
95 -74.88 48115
96 -149.76 48115
97 149.76 48115
98 -74.88 48115
99 74.88 48115