在Python中的dict上设置set_index

时间:2017-04-30 21:14:00

标签: python dictionary

我有一个字典,如

ds = [{'serving': 'hot', 'meal': 'breakfast', 'name': 'toasts'},
      {'serving': 'cold', 'meal': 'dinner', 'name': 'salad'},
      {'serving': 'cold', 'meal': 'lunch', 'name': 'salad'},
      {'serving': 'cold', 'meal': 'lunch', 'name': 'ice-cream'},
      {'serving': 'hot', 'meal': 'dinner', 'name': 'fondue'},] 

我希望{P}与我在Pandas中所做的一样set_index。最终我想得到:

>>> hierarchize(ds, ('serving', 'meal'), array=False, default=dict)
{'cold': {
    'dinner': [{'name': 'salad'}],
    'lunch':  [{'name': 'salad'},
               {'name': 'ice-cream'}]},
 'hot': {
    'breakfast': [{'name': 'toasts'}],
    'dinner': [{'name': 'fondue'}]}
}   

所以我写了这篇文章,我想知道是否有更优雅的方法来使用内置模块。

def hierarchize(data, primary_keys, array=False, default=dict):
    import copy
    root = default()
    for entry in data:
        new_entry = copy.copy(entry)
        node = root
        for key in primary_keys:
            value = entry[key]
            element = [] if key == primary_keys[-1] else default()
            node[value] = node.get(value, element)
            parent = node
            node = node[value]
            new_entry.pop(key)
        if array:
            node.append(new_entry)
        elif key in node:
            raise ValueError('Duplicate key %s found in dataset' % key)
        else:
            parent[value] = new_entry
    return default(root)  

1 个答案:

答案 0 :(得分:2)

我不知道内置模块使代码更优雅,但递归方法怎么样?

def hierarchize(data, primary_keys):
    shorten = lambda d, pk: dict([(k, v) for k, v in d.items() if k != pk])
    if not primary_keys:
        return data
    res = {}
    for x in data:
        cat = x[primary_keys[0]]
        x_short = shorten(x, primary_keys[0])
        res[cat] = res[cat] + [x_short] if cat in res else [x_short]
    return dict([(k, hierarchize(v, primary_keys[1:])) for k, v in res.items()])

修改

可以利用

itertools.groupby()在某种程度上做你想做的事情,所以你可能想尝试这个版本:

from itertools import groupby

def hierarchize(data, primary_keys):
    get_key = lambda s: s[primary_keys[0]]
    shorten = lambda d, pk: dict([(k, v) for k, v in d.items() if k != pk])
    pk = primary_keys
    if not pk:
        return data
    return dict((k, hierarchize2([shorten(x, pk[0]) for x in v], pk[1:]))
                for k,v in groupby(sorted(data, key=get_key), get_key))