我有一个字典,如
ds = [{'serving': 'hot', 'meal': 'breakfast', 'name': 'toasts'},
{'serving': 'cold', 'meal': 'dinner', 'name': 'salad'},
{'serving': 'cold', 'meal': 'lunch', 'name': 'salad'},
{'serving': 'cold', 'meal': 'lunch', 'name': 'ice-cream'},
{'serving': 'hot', 'meal': 'dinner', 'name': 'fondue'},]
我希望{P}与我在Pandas中所做的一样set_index
。最终我想得到:
>>> hierarchize(ds, ('serving', 'meal'), array=False, default=dict)
{'cold': {
'dinner': [{'name': 'salad'}],
'lunch': [{'name': 'salad'},
{'name': 'ice-cream'}]},
'hot': {
'breakfast': [{'name': 'toasts'}],
'dinner': [{'name': 'fondue'}]}
}
所以我写了这篇文章,我想知道是否有更优雅的方法来使用内置模块。
def hierarchize(data, primary_keys, array=False, default=dict):
import copy
root = default()
for entry in data:
new_entry = copy.copy(entry)
node = root
for key in primary_keys:
value = entry[key]
element = [] if key == primary_keys[-1] else default()
node[value] = node.get(value, element)
parent = node
node = node[value]
new_entry.pop(key)
if array:
node.append(new_entry)
elif key in node:
raise ValueError('Duplicate key %s found in dataset' % key)
else:
parent[value] = new_entry
return default(root)
答案 0 :(得分:2)
我不知道内置模块使代码更优雅,但递归方法怎么样?
def hierarchize(data, primary_keys):
shorten = lambda d, pk: dict([(k, v) for k, v in d.items() if k != pk])
if not primary_keys:
return data
res = {}
for x in data:
cat = x[primary_keys[0]]
x_short = shorten(x, primary_keys[0])
res[cat] = res[cat] + [x_short] if cat in res else [x_short]
return dict([(k, hierarchize(v, primary_keys[1:])) for k, v in res.items()])
修改强>
可以利用 itertools.groupby()
在某种程度上做你想做的事情,所以你可能想尝试这个版本:
from itertools import groupby
def hierarchize(data, primary_keys):
get_key = lambda s: s[primary_keys[0]]
shorten = lambda d, pk: dict([(k, v) for k, v in d.items() if k != pk])
pk = primary_keys
if not pk:
return data
return dict((k, hierarchize2([shorten(x, pk[0]) for x in v], pk[1:]))
for k,v in groupby(sorted(data, key=get_key), get_key))