我想对3个Excel进行复杂的操作。首先,我过滤了用户状态为“已启用”的数据,然后将循环浏览其他2个Excel,以查找与“启用用户”相对应的数据。 我开始一个一个地循环,并保存在元组列表中。 我曾尝试同时使用Normal Looping和itertuples(),但速度仍然太慢。有没有更快的方法来执行此操作-
from pandas import ExcelWriter
from pandas import ExcelFile
df = pd.read_excel('Enable-DisableUsersList.xlsx')
df2 = df[df.Status == '*ENABLED']
df1 = pd.read_excel('Excel1.xlsx')
df3 = pd.read_excel('Excel.xls2')
final_list = []
for ind1 in df1.index:
for ind2 in df2.index:
if(df1['USER PROFILE'][ind1] == df2['User ID'][ind2]):
for ind3 in df3.index:
if(df3['user profile'][ind3] == df2['User ID'][ind2]):
userprofile = df1['USER PROFILE'][ind1]
nameprofile = df1['NAME PROFILE'][ind1]
mainmenuno = df3['sysmenu option'][ind3]
mainmenu = df1['SUBMENU'][ind1]
mainmenudesc = df1['NAME SUBMENU'][ind1]
submenu = df1['OPTION'][ind1]
submenudesc = df1['NAME OF OPTION'][ind1]
status = df2['Status'][ind2]
list = (userprofile, nameprofile, mainmenuno, mainmenu,
mainmenudesc, submenu, submenudesc, status)
final_list.append(list)
# print(final_list)
print(final_list)````
I have used df.iterrows() instead of df.index also but still thats slow.
答案 0 :(得分:0)
使用字典
import pandas as pd
import random
import time
df1 = pd.DataFrame()
df2 = pd.DataFrame()
df3 = pd.DataFrame()
id_list = list(range(500))
random.shuffle(id_list)
df1["id"] = id_list
random.shuffle(id_list)
df1["value"] = id_list
random.shuffle(id_list)
df2["id"] = id_list
random.shuffle(id_list)
df2["value"] = id_list
random.shuffle(id_list)
df3["id"] = id_list
random.shuffle(id_list)
df3["value"] = id_list
#till here we created 3 random dataframes with same values in different order
out = []
start = time.time()
for ind1 in df1.index:
for ind2 in df2.index:
if df2["value"][ind2] == df1["value"][ind1]:
for ind3 in df3.index:
if df3["value"][ind3]==df2["value"][ind2]:
out.append([df3["id"][ind3], df2["id"][ind2]])
print(time.time()-start)
start1 = time.time()
df1_dict = df1.set_index("value").to_dict() #use the value you want to compare as the index, it will make it the dict key
df2_dict = df2.set_index("value").to_dict()
df3_dict = df3.set_index("value").to_dict()
out1 = []
for k in df1_dict["id"]:
id2 = df2_dict["id"][k]
id3 = df3_dict["id"][k]
out1.append([id2,id3])
print(time.time()-start1)
,迭代时间为8.5秒 对于字典,其为0.0035