我已经基于一些列的哈希创建了一个函数来消除数据中的不一致,但是当我将.iloc归因于其他.iloc时,.iloc的运行速度很慢,有什么方法可以改善性能吗?< / p>
def check_update(df_lineup_full, df_lineup_actual):
df_lineup_full_hashed = create_hash(df_lineup_full)
df_lineup_actual_hashed = create_hash(df_lineup_actual)
s = datetime.datetime.now()
for item in df_lineup_actual_hashed.itertuples():
index = df_lineup_full_hashed.loc[df_lineup_full_hashed['hash'] == item.hash].index[0]
df_lineup_full_hashed.iloc[index] = df_lineup_actual_hashed.iloc[item.Index]
df_inconsistencies = df_lineup_full_hashed.loc[df_lineup_full_hashed.duplicated(subset=['hash'], keep=False)]
df_lineup_full_hashed = df_lineup_full_hashed.drop_duplicates(subset=['hash'])
e = datetime.datetime.now()
print(e-s)
return [df_lineup_full_hashed[df_lineup_full_hashed.columns[0:22]], df_inconsistencies]