def merge_dicts(list_of_dicts: list, missval=None):
'''Merges a list of dicts, having common keys into a single dict
with items appended to a list
>>> d1 = {'a' : 1, 'b': 2, 'c': 3}
>>> d2 = {'a':4, 'b':5 }
>>> d3 = {'d': 5}
>>> merge_dicts([d1, d2, d3], 'NA')
{'a': [1, 4, 'NA'], 'b': [2, 5, 'NA'],
'c': [3, 'NA', 'NA'], 'd': ['NA', 'NA', 5]}
'''
all_keys = []
for d in list_of_dicts:
for k in d.keys():
if k not in all_keys:
all_keys.append(k)
merged = {}
for k in all_keys:
for d in list_of_dicts:
try:
merged[k].append(d.get(k, missval))
except KeyError:
merged[k] = [d.get(k)]
return(merged)
函数文档字符串是不言自明的。有没有一种更有效的方法,而不必编写两个for循环?一个可以找到所有字典中的所有键,另一个可以制成合并的字典?
答案 0 :(得分:4)
如果您不关心按键的顺序,则应使用set
创建按键列表。您可以使用理解来创建它。
第二部分,您可以使用dict理解,并使用列表理解来创建每个列表:
def merge_dicts(list_of_dicts: list, missval=None):
'''Merges a list of dicts, having common keys into a single dict
with items appended to a list
>>> d1 = {'a' : 1, 'b': 2, 'c': 3}
>>> d2 = {'a':4, 'b':5 }
>>> d3 = {'d': 5}
>>> merge_dicts([d1, d2, d3], 'NA')
{'a': [1, 4, 'NA'], 'b': [2, 5, 'NA'],
'c': [3, 'NA', 'NA'], 'd': ['NA', 'NA', 5]}
'''
all_keys = {key for d in list_of_dicts for key in d.keys()}
merged = {k: [d.get(k, missval) for d in list_of_dicts] for k in all_keys}
return(merged)
d1 = {'a' : 1, 'b': 2, 'c': 3}
d2 = {'a':4, 'b':5 }
d3 = {'d': 5}
merge_dicts([d1, d2, d3], 'NA')
#{'a': [1, 4, 'NA'],
# 'b': [2, 5, 'NA'],
# 'c': [3, 'NA', 'NA'],
# 'd': ['NA', 'NA', 5]}
答案 1 :(得分:2)
这是defaultdict
的一种解决方案:
from collections import defaultdict
def merge_dicts(list_of_dicts: list, missval=None):
result = defaultdict(lambda: [missval] * len(list_of_dicts))
for i, d in enumerate(list_of_dicts):
for k, v in d.items():
result[k][i] = v
return dict(result)
d1 = {'a' : 1, 'b': 2, 'c': 3}
d2 = {'a':4, 'b':5 }
d3 = {'d': 5}
print(merge_dicts([d1, d2, d3], 'NA'))
# {'a': [1, 4, 'NA'], 'b': [2, 5, 'NA'], 'c': [3, 'NA', 'NA'], 'd': ['NA', 'NA', 5]}
答案 2 :(得分:0)
如果您使用的是熊猫,则可以使用字典填充数据框,然后再次将其转换回字典:
pd.DataFrame([d1, d2, d3]).to_dict()
{'a': {0: 1.0, 1: 4.0, 2: nan},
'b': {0: 2.0, 1: 5.0, 2: nan},
'c': {0: 3.0, 1: nan, 2: nan},
'd': {0: nan, 1: nan, 2: 5.0}}