使熊猫更快地循环

时间:2019-06-10 12:06:52

标签: python python-3.x pandas

我有一个while循环,它遍历30000行的数据帧A,并更新另一个数据帧B,并使用数据帧B进行进一步的迭代。它花费太多时间。想要更快!任何想法

for x in range(0, dataframeA.shape[0]):

    AuthorizationID_Temp = dataframeA["AuthorizationID"].iloc[x]
    Auth_BeginDate = dataframeA["BeginDate"].iloc[x]
    Auth_EndDate = dataframeA["EndDate"].iloc[x]
    BeginDate_Temp = pd.to_datetime(Auth_BeginDate).date()
    ScriptsFlag = dataframeA["ScriptsFlag"].iloc[x]
    Legacy_PlacementID = dataframeA["Legacy_PlacementID"].iloc[x]
    Legacy_AncillaryServicesID = dataframeA["Legacy_AncillaryServicesID"].iloc[x]
    ProviderID_Temp = dataframeA["ProviderID"].iloc[x]
    SRSProcode_Temp = dataframeA["SRSProcode"].iloc[x]
    Rate_Temp = dataframeA["Rate"].iloc[x]

    Scripts2["BeginDate1_SC"] = pd.to_datetime(Scripts2["BeginDate_SC"]).dt.date
    Scripts2["EndDate1_SC"] = pd.to_datetime(Scripts2["EndDate_SC"]).dt.date
    # BeginDate_Temp = BeginDate_Temp.date()
    # EndDate_Temp = EndDate_Temp.date()

    Scripts_New_Modified1 = Scripts2.loc[
        ((Scripts2["ScriptsFlag_SC"].isin(["N", "M"])) & (Scripts2["AuthorizationID_SC"] == AuthorizationID_Temp))
        & ((Scripts2["ProviderID_SC"] == ProviderID_Temp) & (Scripts2["SRSProcode_SC"] == SRSProcode_Temp)),
        :,
    ]

    Scripts_New_Modified = Scripts_New_Modified1.loc[
        (Scripts_New_Modified1["BeginDate1_SC"] == BeginDate_Temp)
        & ((Scripts_New_Modified1["EndDate1_SC"] == EndDate_Temp) & (Scripts_New_Modified1["Rate_SC"] == Rate_Temp)),
        "AuthorizationID_SC",
    ]

    if ScriptsFlag == "M":

        if Legacy_PlacementID is not None:

            InsertA = insertA(AuthorizationID_Temp, BeginDate_Temp, EndDate_Temp, Units_Temp, EndDate_Temp_DO)

            dataframeB = dataframeB.append(InsertA)

            print("ScriptsTemp6 shape is {}".format(dataframeB.shape))

        # else:

        # ScriptsTemp6 = ScriptsTemp5.copy()

        # print('ScriptsTemp6 shape is {}'.format(ScriptsTemp6.shape))

        if Legacy_AncillaryServicesID is not None:

            InsertB = insertB(AuthorizationID_Temp, BeginDate_Temp, EndDate_Temp, Units_Temp, EndDate_Temp_DO)

            dataframeB = dataframeB.append(InsertB)

            print("ScriptsTemp7 shape is {}".format(dataframeB.shape))

        dataframe_New = dataframeB.loc[
            ((dataframeB["ScriptsFlag"] == "N") & (dataframeB["AuthorizationID"] == AuthorizationID_Temp))
            & ((dataframeB["ProviderID"] == ProviderID_Temp) & (dataframeB["SRSProcode"] == SRSProcode_Temp)),
            :,
        ]

        dataframe_New1 = dataframe_New.loc[
            (pd.to_datetime(dataframe_New["BeginDate"]).dt.date == BeginDate_Temp)
            & ((pd.to_datetime(dataframe_New["EndDate"]).dt.date == EndDate_Temp_DO) & (dataframe_New["Rate"] == Rate_Temp)),
            "AuthorizationID",
        ]

        # PLAATN = dataframeA.copy()

        Insert1 = insert1(dataframe_New1, BeginDate_Temp, AuthorizationID_Temp, EndDate_Temp, Units_Temp, EndDate_Temp_DO)

        if Insert1.shape[0] > 0:

            dataframeB = dataframeB.append(Insert1.iloc[0])

        # else:

        # ScriptsTemp8 = ScriptsTemp7

        print("ScriptsTemp8 shape is {}".format(dataframeB.shape))

        dataframe_modified1 = dataframeB.loc[
            ((dataframeB["ScriptsFlag"] == "M") & (dataframeB["AuthorizationID"] == AuthorizationID_Temp))
            & ((dataframeB["ProviderID"] == ProviderID_Temp) & (dataframeB["SRSProcode"] == SRSProcode_Temp)),
            :,
        ]

        dataframe_modified = dataframe_modified1.loc[
            (dataframe_modified1["BeginDate"] == BeginDate_Temp)
            & ((dataframe_modified1["EndDate"] == EndDate_Temp_DO) & (dataframe_modified1["Rate"] == Rate_Temp)),
            "AuthorizationID",
        ]

        Insert2 = insert2(
            dataframe_modified,
            Scripts_New_Modified,
            AuthorizationID_Temp,
            BeginDate_Temp,
            EndDate_Temp,
            Units_Temp,
            EndDate_Temp_DO,
        )

        if Insert2.shape[0] > 0:
            dataframeB = dataframeB.append(Insert2.iloc[0])

具有30000行的dataframeA

dataframeB应该在DataframeA的每次迭代(30000次迭代)中插入新行

在每次迭代的中间都应使用更新的dataframeB来过滤条件

insertA和InsertB是两个具有附加过滤功能的函数

运行30000行需要太多时间,所以

因此需要更多时间来运行。 提供有关在执行时间方面加快循环速度的建议

0 个答案:

没有答案