数据帧中python的“ for循环”,我可以加快速度吗?

时间:2019-01-02 09:02:01

标签: python performance optimization

我可以加快以下代码的运行时间吗?

我的理财计划代码

我必须继续使用我的金钱日志

因为我需要跟踪使用钱的情况

def seperate_accumulate_m(m_df):

    """
    make accumulate money paper according to option number
    """

    m_df = m_df.reset_index(drop=True)

    m_df.loc[0, "etc_m"] = 0
    m_df.loc[0, "sell_m"] = 0
    m_df.loc[0, "buy_m"] = 0
    m_df.loc[0, "use_m"] = 0
    m_df.loc[0, "gone_m"] = 0


    if m_df.loc[0, "option"] == 3:
        m_df.loc[0, "etc_m"] = m_df.loc[0, "m"]
    elif m_df.loc[0, "option"] == 2:
       m_df.loc[0, "sell_m"] = m_df.loc[0, "m"]
    elif m_df.loc[0, "option"] == 1:
        m_df.loc[0, "buy_m"] = m_df.loc[0, "m"]
    else:
        pass


    for idx in range(1, len(m_df)):
        if m_df.loc[idx, "option"] == 3:
            m_df.loc[idx, "etc_m"] = m_df.loc[idx-1, "etc_m"] + m_df.loc[idx, "m"]
            m_df.loc[idx, "sell_m"] = m_df.loc[idx-1, "sell_m"] + 0
            m_df.loc[idx, "buy_m"] = m_df.loc[idx-1, "buy_m"] + 0
            m_df.loc[idx, "use_m"] = m_df.loc[idx-1, "use_m"] + 0
            m_df.loc[idx, "gone_m"] = m_df.loc[idx-1, "gone_m"] + 0
        elif m_df.loc[idx, "option"] == 2:
            m_df.loc[idx, "etc_m"] = m_df.loc[idx-1, "etc_m"] + 0
            m_df.loc[idx, "sell_m"] = m_df.loc[idx-1, "sell_m"] + m_df.loc[idx, "m"]
            m_df.loc[idx, "buy_m"] = m_df.loc[idx-1, "buy_m"] + 0
            m_df.loc[idx, "use_m"] = m_df.loc[idx-1, "use_m"] + 0
            m_df.loc[idx, "gone_m"] = m_df.loc[idx-1, "gone_m"] + 0
        elif m_df.loc[idx, "option"] == 1:
            m_df.loc[idx, "etc_m"] = m_df.loc[idx-1, "etc_m"] + 0
            m_df.loc[idx, "sell_m"] = m_df.loc[idx-1, "sell_m"] + 0
            m_df.loc[idx, "buy_m"] = m_df.loc[idx-1, "buy_m"] + m_df.loc[idx, "m"]
            m_df.loc[idx, "use_m"] = m_df.loc[idx-1, "use_m"] + 0
            m_df.loc[idx, "gone_m"] = m_df.loc[idx-1, "gone_m"] + 0
        else:
            continue

    return m_df

数据大小为3,500,000行

且m_no的大小为90000行

def get_total_accumulate_m_df(data):

    """
    make total dataframe base on seperate_accumulate_m
    """

    tmp_df = pd.DataFrame()

    for mno in tqdm(data["m_no"].unique()):

        mno_df = data[data["m_no"] == mno]
        mno_df = mno_df.sort_values(by='regdt')
        mno_df = mno_df.reset_index(drop=True)


        df = seperate_accumulate_m(mno_df)
        tmp_df = pd.concat([tmp_df, df])

    return tmp_df

这条线花费大约2个小时的时间 我想减少时间! 请帮助我。

df = get_total_accumulate_emoney_df(data)

谢谢。

0 个答案:

没有答案