我有一个while循环,它遍历30000行的数据帧A,并更新另一个数据帧B,并使用数据帧B进行进一步的迭代。它花费太多时间。想要更快!任何想法
for x in range(0, dataframeA.shape[0]):
AuthorizationID_Temp = dataframeA["AuthorizationID"].iloc[x]
Auth_BeginDate = dataframeA["BeginDate"].iloc[x]
Auth_EndDate = dataframeA["EndDate"].iloc[x]
BeginDate_Temp = pd.to_datetime(Auth_BeginDate).date()
ScriptsFlag = dataframeA["ScriptsFlag"].iloc[x]
Legacy_PlacementID = dataframeA["Legacy_PlacementID"].iloc[x]
Legacy_AncillaryServicesID = dataframeA["Legacy_AncillaryServicesID"].iloc[x]
ProviderID_Temp = dataframeA["ProviderID"].iloc[x]
SRSProcode_Temp = dataframeA["SRSProcode"].iloc[x]
Rate_Temp = dataframeA["Rate"].iloc[x]
Scripts2["BeginDate1_SC"] = pd.to_datetime(Scripts2["BeginDate_SC"]).dt.date
Scripts2["EndDate1_SC"] = pd.to_datetime(Scripts2["EndDate_SC"]).dt.date
# BeginDate_Temp = BeginDate_Temp.date()
# EndDate_Temp = EndDate_Temp.date()
Scripts_New_Modified1 = Scripts2.loc[
((Scripts2["ScriptsFlag_SC"].isin(["N", "M"])) & (Scripts2["AuthorizationID_SC"] == AuthorizationID_Temp))
& ((Scripts2["ProviderID_SC"] == ProviderID_Temp) & (Scripts2["SRSProcode_SC"] == SRSProcode_Temp)),
:,
]
Scripts_New_Modified = Scripts_New_Modified1.loc[
(Scripts_New_Modified1["BeginDate1_SC"] == BeginDate_Temp)
& ((Scripts_New_Modified1["EndDate1_SC"] == EndDate_Temp) & (Scripts_New_Modified1["Rate_SC"] == Rate_Temp)),
"AuthorizationID_SC",
]
if ScriptsFlag == "M":
if Legacy_PlacementID is not None:
InsertA = insertA(AuthorizationID_Temp, BeginDate_Temp, EndDate_Temp, Units_Temp, EndDate_Temp_DO)
dataframeB = dataframeB.append(InsertA)
print("ScriptsTemp6 shape is {}".format(dataframeB.shape))
# else:
# ScriptsTemp6 = ScriptsTemp5.copy()
# print('ScriptsTemp6 shape is {}'.format(ScriptsTemp6.shape))
if Legacy_AncillaryServicesID is not None:
InsertB = insertB(AuthorizationID_Temp, BeginDate_Temp, EndDate_Temp, Units_Temp, EndDate_Temp_DO)
dataframeB = dataframeB.append(InsertB)
print("ScriptsTemp7 shape is {}".format(dataframeB.shape))
dataframe_New = dataframeB.loc[
((dataframeB["ScriptsFlag"] == "N") & (dataframeB["AuthorizationID"] == AuthorizationID_Temp))
& ((dataframeB["ProviderID"] == ProviderID_Temp) & (dataframeB["SRSProcode"] == SRSProcode_Temp)),
:,
]
dataframe_New1 = dataframe_New.loc[
(pd.to_datetime(dataframe_New["BeginDate"]).dt.date == BeginDate_Temp)
& ((pd.to_datetime(dataframe_New["EndDate"]).dt.date == EndDate_Temp_DO) & (dataframe_New["Rate"] == Rate_Temp)),
"AuthorizationID",
]
# PLAATN = dataframeA.copy()
Insert1 = insert1(dataframe_New1, BeginDate_Temp, AuthorizationID_Temp, EndDate_Temp, Units_Temp, EndDate_Temp_DO)
if Insert1.shape[0] > 0:
dataframeB = dataframeB.append(Insert1.iloc[0])
# else:
# ScriptsTemp8 = ScriptsTemp7
print("ScriptsTemp8 shape is {}".format(dataframeB.shape))
dataframe_modified1 = dataframeB.loc[
((dataframeB["ScriptsFlag"] == "M") & (dataframeB["AuthorizationID"] == AuthorizationID_Temp))
& ((dataframeB["ProviderID"] == ProviderID_Temp) & (dataframeB["SRSProcode"] == SRSProcode_Temp)),
:,
]
dataframe_modified = dataframe_modified1.loc[
(dataframe_modified1["BeginDate"] == BeginDate_Temp)
& ((dataframe_modified1["EndDate"] == EndDate_Temp_DO) & (dataframe_modified1["Rate"] == Rate_Temp)),
"AuthorizationID",
]
Insert2 = insert2(
dataframe_modified,
Scripts_New_Modified,
AuthorizationID_Temp,
BeginDate_Temp,
EndDate_Temp,
Units_Temp,
EndDate_Temp_DO,
)
if Insert2.shape[0] > 0:
dataframeB = dataframeB.append(Insert2.iloc[0])
具有30000行的dataframeA
dataframeB应该在DataframeA的每次迭代(30000次迭代)中插入新行
在每次迭代的中间都应使用更新的dataframeB来过滤条件
insertA和InsertB是两个具有附加过滤功能的函数
运行30000行需要太多时间,所以
因此需要更多时间来运行。 提供有关在执行时间方面加快循环速度的建议