Python:如何在Pandas Group中实现多处理应用?

时间:2018-02-09 17:29:39

标签: python pandas multiprocessing apply pandas-groupby

问题: 基本上,我从SQL查询返回数据集,然后我想通过 autonumber 将该数据集拆分成组,然后应用函数 get_currentdate 中包含的一些逻辑返回每个自动编号的最后交易日期。为了实现这一点,我使用了pandas的groupbyapply函数,但运行起来只需要一分多钟。我想知道我是否可以通过多处理加快速度。我怎么能这样做呢?

代码:

class findLastTransactionDate:

    conn = create_engine(
        "postgresql://progressa_admin:0Ce25%J4Jsx%@data-analytics.cvfvngfjabzl.us-west-1.rds.amazonaws.com:5432/dataanalytics",
        execution_options=dict(stream_results=True))

    def query(self):
        """Returns General Ledger transactions data from RDS"""

        sql = """SELECT ge.LoanAgreementID,
        ge.TransactionDate,
        ge.GeneralLedgerType,
        ge.Amount,
        la.AutoNumber
        FROM lms_data.general_ledger ge
        LEFT JOIN lms_data.loan_agreements la
        ON ge.LoanAgreementID = la.LoanAgreementID
        WHERE la.loanstatus NOT IN (10,90) AND ge.GeneralLedgerType IN (7,9)"""

        data = []
        for chunk in pd.read_sql(sql, self.conn, chunksize=10000):
            data.append(chunk)
        df = pd.concat(data)

        df = df.sort_values('transactiondate', ascending=True)
        df = df.set_index('transactiondate')
        df = df.loc[datetime.date(year=2013, month=9, day=13):(datetime.date.today() + datetime.timedelta(days=3))].reset_index()
        ddf = dd.from_pandas(df, npartitions=3)

        return ddf

    @staticmethod
    def get_currentdate(row):
        """Logic tree for returning the last transaction date of the loan"""
        Debit = row[(row['generalledgertype'] == 7)][['autonumber', 'amount', 'transactiondate']].sort_values(
            'transactiondate', ascending=False).reset_index(drop=True)
        Return = row[(row['generalledgertype'] == 9)][['autonumber', 'amount', 'transactiondate']].sort_values(
            'transactiondate', ascending=False).reset_index(drop=True)

        if len(Debit.autonumber) == 0:
            return np.nan

        elif len(Return.autonumber) == 0:
            return Debit.transactiondate[0]

        elif (len(Debit.autonumber) > 0) & (len(Return.autonumber) > 0):
            if np.abs(Debit.autonumber.tolist()[0]) not in Return.autonumber.tolist():
                return Debit.transactiondate[0]

            else:
                CurrentDate = np.nan

                for k in range(0, len(Debit.amount)):
                    if k == len(Return.amount):
                        CurrentDate = Debit.transactiondate[k]
                        break
                    elif (np.abs(Debit.amount[k]) == Return.amount[k]) & (
                                Debit.transactiondate[k] > Return.transactiondate[k]):
                        CurrentDate = Debit.transactiondate[k]
                        break
                    elif (np.abs(Debit.amount[k]) == Return.amount[k]) & (
                                    Return.transactiondate[k] - Debit.transactiondate[k] < datetime.timedelta(days=30)):
                        if (k + 1 == len(Return.amount)) & (len(Return.amount) == len(Debit.amount)):
                            CurrentDate = np.nan
                            break
                        else:
                            continue
                    else:
                        CurrentDate = Debit.transactiondate[k]
                        break

                return CurrentDate

    def get_results(self, n_cores):

        data = self.query()
        l = list(data.groupby('autonumber'))

        with Pool(n_cores) as p:
            vals = p.map(self.get_currentdate, l)
            return vals

if __name__ == '__main__':
    start = time.time()
    A = findLastTransactionDate().get_results(n_cores = 5)
    print(A)
    end = time.time()
    print(end-start)

错误:

KeyError: 'Column not found: 0'

示例数据:

   transactiondate                       loanagreementid  generalledgertype  \
0       2013-09-13  92A268A7-514B-E411-829F-BC9A78563412                  7   
1       2013-09-27  92A268A7-514B-E411-829F-BC9A78563412                  7   
2       2013-10-11  92A268A7-514B-E411-829F-BC9A78563412                  7   
3       2013-10-25  92A268A7-514B-E411-829F-BC9A78563412                  7   
4       2013-11-08  92A268A7-514B-E411-829F-BC9A78563412                  7   
5       2013-11-22  92A268A7-514B-E411-829F-BC9A78563412                  7   
6       2013-12-06  92A268A7-514B-E411-829F-BC9A78563412                  7   
7       2013-12-20  92A268A7-514B-E411-829F-BC9A78563412                  7   
8       2013-12-31  92A268A7-514B-E411-829F-BC9A78563412                  7   
9       2013-09-13  93A268A7-514B-E411-829F-BC9A78563412                  7   
10      2013-09-27  93A268A7-514B-E411-829F-BC9A78563412                  7   
11      2013-10-11  93A268A7-514B-E411-829F-BC9A78563412                  7   
12      2013-10-25  93A268A7-514B-E411-829F-BC9A78563412                  7   
13      2013-11-08  93A268A7-514B-E411-829F-BC9A78563412                  7   
14      2013-11-22  93A268A7-514B-E411-829F-BC9A78563412                  7   
15      2013-12-06  93A268A7-514B-E411-829F-BC9A78563412                  7   
16      2013-12-20  93A268A7-514B-E411-829F-BC9A78563412                  7   
17      2014-01-03  93A268A7-514B-E411-829F-BC9A78563412                  7   
18      2014-01-16  93A268A7-514B-E411-829F-BC9A78563412                  7   
19      2014-01-17  93A268A7-514B-E411-829F-BC9A78563412                  7   
20      2013-09-13  94A268A7-514B-E411-829F-BC9A78563412                  7   
21      2013-09-20  94A268A7-514B-E411-829F-BC9A78563412                  7   
22      2013-09-27  94A268A7-514B-E411-829F-BC9A78563412                  7   
23      2013-10-04  94A268A7-514B-E411-829F-BC9A78563412                  7   
24      2013-10-11  94A268A7-514B-E411-829F-BC9A78563412                  7   
25      2013-10-18  94A268A7-514B-E411-829F-BC9A78563412                  7   
26      2013-10-25  94A268A7-514B-E411-829F-BC9A78563412                  7   
27      2013-11-01  94A268A7-514B-E411-829F-BC9A78563412                  7   
28      2013-11-08  94A268A7-514B-E411-829F-BC9A78563412                  7   
29      2013-11-15  94A268A7-514B-E411-829F-BC9A78563412                  7   
..             ...                                   ...                ...   
70      2013-12-31  95A268A7-514B-E411-829F-BC9A78563412                  7   
71      2014-01-15  95A268A7-514B-E411-829F-BC9A78563412                  7   
72      2014-01-31  95A268A7-514B-E411-829F-BC9A78563412                  7   
73      2014-02-14  95A268A7-514B-E411-829F-BC9A78563412                  7   
74      2014-02-28  95A268A7-514B-E411-829F-BC9A78563412                  7   
75      2014-03-14  95A268A7-514B-E411-829F-BC9A78563412                  7   
76      2014-03-31  95A268A7-514B-E411-829F-BC9A78563412                  7   
77      2014-04-15  95A268A7-514B-E411-829F-BC9A78563412                  7   
78      2014-04-30  95A268A7-514B-E411-829F-BC9A78563412                  7   
79      2014-05-15  95A268A7-514B-E411-829F-BC9A78563412                  7   
80      2014-05-30  95A268A7-514B-E411-829F-BC9A78563412                  7   
81      2014-06-13  95A268A7-514B-E411-829F-BC9A78563412                  7   
82      2014-06-30  95A268A7-514B-E411-829F-BC9A78563412                  7   
83      2014-07-15  95A268A7-514B-E411-829F-BC9A78563412                  7   
84      2013-09-13  96A268A7-514B-E411-829F-BC9A78563412                  9   
85      2013-09-13  96A268A7-514B-E411-829F-BC9A78563412                  7   
86      2013-09-20  96A268A7-514B-E411-829F-BC9A78563412                  7   
87      2013-09-27  96A268A7-514B-E411-829F-BC9A78563412                  9   
88      2013-09-27  96A268A7-514B-E411-829F-BC9A78563412                  7   
89      2013-10-04  96A268A7-514B-E411-829F-BC9A78563412                  7   
90      2013-10-11  96A268A7-514B-E411-829F-BC9A78563412                  7   
91      2013-10-18  96A268A7-514B-E411-829F-BC9A78563412                  7   
92      2013-10-25  96A268A7-514B-E411-829F-BC9A78563412                  7   
93      2013-11-01  96A268A7-514B-E411-829F-BC9A78563412                  7   
94      2013-11-08  96A268A7-514B-E411-829F-BC9A78563412                  9   
95      2013-11-08  96A268A7-514B-E411-829F-BC9A78563412                  7   
96      2013-11-15  96A268A7-514B-E411-829F-BC9A78563412                  7   
97      2013-11-15  96A268A7-514B-E411-829F-BC9A78563412                  9   
98      2013-11-22  96A268A7-514B-E411-829F-BC9A78563412                  7   
99      2013-11-22  96A268A7-514B-E411-829F-BC9A78563412                  9   

     amount  autonumber  
0   -378.07       48110  
1   -378.07       48110  
2   -378.07       48110  
3   -378.07       48110  
4   -378.07       48110  
5   -378.07       48110  
6   -378.07       48110  
7   -378.07       48110  
8  -3888.43       48110  
9   -119.25       48111  
10  -119.25       48111  
11  -119.25       48111  
12  -119.25       48111  
13  -119.25       48111  
14  -119.25       48111  
15  -119.25       48111  
16  -119.25       48111  
17  -119.25       48111  
18 -3321.64       48111  
19  -119.25       48111  
20  -231.80       48113  
21  -231.80       48113  
22  -231.80       48113  
23  -231.80       48113  
24  -231.80       48113  
25  -231.80       48113  
26  -231.80       48113  
27  -231.80       48113  
28  -231.80       48113  
29  -231.80       48113  
..      ...         ...  
70   -84.34       48114  
71   -84.34       48114  
72   -84.34       48114  
73   -84.34       48114  
74   -84.34       48114  
75   -84.34       48114  
76   -84.34       48114  
77   -84.34       48114  
78   -84.34       48114  
79   -84.34       48114  
80   -84.34       48114  
81   -84.34       48114  
82   -84.34       48114  
83   -83.79       48114  
84    74.88       48115  
85   -74.88       48115  
86   -74.88       48115  
87   149.76       48115  
88  -149.76       48115  
89   -74.88       48115  
90   -74.88       48115  
91   -74.88       48115  
92   -74.88       48115  
93   -74.88       48115  
94    74.88       48115  
95   -74.88       48115  
96  -149.76       48115  
97   149.76       48115  
98   -74.88       48115  
99    74.88       48115 

0 个答案:

没有答案