Question

您好，我需要将两个字典写到csv中，我想以有序的方式编写第一个字典，而第二个字典的顺序无关紧要，第二个字典的键是Selenium驱动程序中的cookie，因为我不知道每个站点将返回多少键（每个站点会有所不同）

user1= {'ID':1,'Name':'John','Age':13} # the order I want to keep those must come first
cookies_data = driver.get_cookies() # dictionary
with open('file.csv','w') as csvFile:
    #write to file in this order id,name,age,cookies


# A sample of cookies:
#cookies_data = [{'domain': 'google.com', 'expiry': 1624301720.007404, 'httpOnly': True, 'name': 'ANID', 'path': '/',}] 
#cookies_data can have more than one dictionary and some of the keys may be the same

Answer 1

您可以使用csv.DictWriter来编写csv，在组合的cookies_data列表中循环浏览时，从合并的字典中选择列的名称和值，然后使用它们来编写csv。

#Combine fieldnames of both dictionaries
fieldnames = list(user1.keys()) + list(cookies_data[0].keys())

#Open csv file
with open('file.csv','w') as csvFile:

    #Write the header and write the combined dictionary
    writer = csv.DictWriter(csvFile, fieldnames=fieldnames)
    writer.writeheader()

    #Loop over the cookies_data list and write the combined dict to csv
    for data in cookies_data:
        new_dict = {**user1, **data}
        writer.writerow(new_dict)

输出将为

ID,Name,Age,domain,expiry,httpOnly,name,path,secure,value
1,John,13,.....
1,John,13,.....
1,John,13,.....
1,John,13,.....

Answer 2

您可以使用熊猫阅读字典，创建数据框，然后导出到csv。

import pandas as pd


user1= {'ID':1,'Name':'John','Age':13}
cookies_data = [{'domain': 'google.com', 'expiry': 1624301720.007404, 'httpOnly': True, 'name': 'ANID', 'path': '/'}] 

#to avoid overriding if cookies_data[0] has same keys as cookies_data[1]
cookies = {}
for cookie in cookies_data:
    for c in list(cookie.keys()):
        if c in list(cookies.keys()):
            new_c = str(c) + str(cookies_data.index(cookie))
            cookies.update({new_c : cookie[c]})
        else:
            cookies.update({c : cookie[c]})


user1.update(cookies)
df = pd.DataFrame.from_dict(user1, orient='index')
print(df)

df.to_csv('something.csv')

输出：

ID                 1
Name            John
Age               13
domain    google.com
expiry    1.6243e+09
httpOnly        True
name            ANID
path               /

您也可以使用pd.concat添加到数据框：

df = pd.concat([df, df], axis=1)

输出：

ID                 1           1
Name            John        John
Age               13          13
domain    google.com  google.com
expiry    1.6243e+09  1.6243e+09
httpOnly        True        True
name            ANID        ANID
path               /           /

完整代码示例：

import pandas as pd



def get_df(user1, cookies_data):
    cookies = {}
    for cookie in cookies_data:
        for c in list(cookie.keys()):
            if c in list(cookies.keys()):
                new_c = str(c) + str(cookies_data.index(cookie))
                cookies.update({new_c : cookie[c]})
            else:
                cookies.update({c : cookie[c]})
    user1.update(cookies)
    df = pd.DataFrame.from_dict(user1, orient='index')
    return df



def main():
    user1 = {'ID':1,'Name':'Rob','Age':17}
    cookies_data = [{'domain': 'google.com', 'expiry': 1624301720.007404, 'httpOnly': True, 'name': 'ANID', 'path': '/'}]
    df1 = get_df(user1, cookies_data)
    user2 = {'ID':1,'Name':'John','Age':13}
    cookies_data2 = [{'domain': 'google.com', 'expiry': 1624301720.007404, 'httpOnly': True, 'name': 'ANID'}]
    df2 = get_df(user2, cookies_data2)
    new_df = pd.concat([df1, df2], axis=1)
    print(new_df)
    new_df.to_csv('something.csv')

main()

输出：

Age               17          13
ID                 1           1
Name             Rob        John
domain    google.com  google.com
expiry    1.6243e+09  1.6243e+09
httpOnly        True        True
name            ANID        ANID
path               /         NaN

还可以在主函数中使用四循环来遍历用户：

def main():
all_users = [{'ID':1,'Name':'Rob','Age':17}, {'ID':1,'Name':'John','Age':13}]
all_cookies_data = [[{'domain': 'google.com', 'expiry': 1624301720.007404, 'httpOnly': True, 'name': 'ANID', 'path': '/'}], [{'domain': 'google.com', 'expiry': 1624301720.007404, 'httpOnly': True, 'name': 'ANID'}]]
need_dfs = list(zip(all_users, all_cookies_data))

dfs_to_concat = []
for dfs in need_dfs:
    new_df = get_df(*dfs)
    dfs_to_concat.append(new_df)

new_df = pd.concat(dfs_to_concat, axis=1)
print(new_df)
new_df.to_csv('something.csv')

使用csv编写两个字典

2 个答案: