Question

我正在尝试将对象列表转换为可以通过索引访问的嵌套字典。

以下代码适用于两级嵌套字典。我想将其扩展到任意级别的灵活工作。

from collections import namedtuple
import pprint 

Holding = namedtuple('holding', ['portfolio', 'ticker', 'shares'])
lst = [
        Holding('Large Cap', 'TSLA', 100),
        Holding('Large Cap', 'MSFT', 200),
        Holding('Small Cap', 'UTSI', 500)
]

def indexer(lst, indexes):
    """Creates a dynamic nested dictionary based on indexes."""
    result = {}
    for item in lst:
        index0 = getattr(item, indexes[0])
        index1 = getattr(item, indexes[1])
        result.setdefault(index0, {}).setdefault(index1, [])
        result[index0][index1].append(item)
    return result 


d = indexer(lst, ['portfolio', 'ticker'])
pp = pprint.PrettyPrinter()
pp.pprint(d)

输出：

{'Large Cap': {'MSFT': [holding(portfolio='Large Cap', ticker='MSFT', shares=200)],
               'TSLA': [holding(portfolio='Large Cap', ticker='TSLA', shares=100)]},
 'Small Cap': {'UTSI': [holding(portfolio='Small Cap', ticker='UTSI', shares=500)]}}

Answer 1

你可以尝试沿着以下几行。只需迭代索引指定的属性列表，并继续跟踪由此创建的嵌套dict：

def indexer(lst, indexes):
    result = {}
    for item in lst:
        attrs = [getattr(item, i) for i in indexes]
        crnt = result  # always the dict at the current nesting level
        for attr in attrs[:-1]:
            # follow one level deeper
            crnt = crnt.setdefault(attr, {})  
        crnt.setdefault(attrs[-1], []).append(item)
    return result

这会产生以下输出：

>>> d = indexer(lst, ['portfolio', 'ticker'])
{'Large Cap': {'ticker': [holding(portfolio='Large Cap', ticker='TSLA', shares=100),
                          holding(portfolio='Large Cap', ticker='MSFT', shares=200)]},
 'Small Cap': {'ticker': [holding(portfolio='Small Cap', ticker='UTSI', shares=500)]}}

>>> d = indexer(lst, ['portfolio', 'ticker', 'shares'])
{'Large Cap': {'MSFT': {200: [holding(portfolio='Large Cap', ticker='MSFT', shares=200)]},
               'TSLA': {100: [holding(portfolio='Large Cap', ticker='TSLA', shares=100)]}},
 'Small Cap': {'UTSI': {500: [holding(portfolio='Small Cap', ticker='UTSI', shares=500)]}}}

Answer 2

我见过的实现嵌套词典的最好方法之一是Aaron Hall answer问题What is the best way to implement nested dictionaries?。这是一个在Autovivification编程语言中实现名为“Perl”的类型的示例。

无论如何，在这里使用一个是有用的，因为这意味着你只需要为你的树状数据结构的“叶子”调用setdefault()（它们是list，而不是子词典）。

所以这里是你的问题的答案，使用它：

from collections import namedtuple
from functools import reduce
from operator import attrgetter
from pprint import pprint


Holding = namedtuple('Holding', ['portfolio', 'ticker', 'shares'])

lst = [Holding('Large Cap', 'TSLA', 100),
       Holding('Large Cap', 'MSFT', 200),
       Holding('Small Cap', 'UTSI', 500),]

def indexer(lst, indexes):
    """ Creates a dynamic nested dictionary based on indexes. """

    class Vividict(dict):
        """ dict subclass which dynamically creates sub-dictionaries when
            they're first referenced (and don't exist).
            See https://stackoverflow.com/a/19829714/355230
        """
        def __missing__(self, key):
            value = self[key] = type(self)()
            return value

    result = Vividict()
    index_getters = attrgetter(*indexes)
    for item in lst:
        *indices, leaf = index_getters(item)  # Leaves are lists, not dicts.
        target = reduce(lambda x, y: x[y], indices, result)
        target.setdefault(leaf, []).append(item)

    return result

d = indexer(lst, ['portfolio', 'ticker'])
pprint(d)
print()
d = indexer(lst, ['portfolio', 'ticker', 'shares'])
pprint(d)

输出：

{'Large Cap': {'MSFT': [Holding(portfolio='Large Cap', ticker='MSFT', shares=200)],
               'TSLA': [Holding(portfolio='Large Cap', ticker='TSLA', shares=100)]},
 'Small Cap': {'UTSI': [Holding(portfolio='Small Cap', ticker='UTSI', shares=500)]}}

{'Large Cap': {'MSFT': {200: [Holding(portfolio='Large Cap', ticker='MSFT', shares=200)]},
               'TSLA': {100: [Holding(portfolio='Large Cap', ticker='TSLA', shares=100)]}},
 'Small Cap': {'UTSI': {500: [Holding(portfolio='Small Cap', ticker='UTSI', shares=500)]}}}

Answer 3

你的代码实际上是一个很好的尝试，我所做的一个小小的补充就是维护最后一个索引引入的当前地图，让下一个索引在那里创建一个地图。因此，对于每个索引（以及循环中的每个迭代），您实际上更深层次。在最后一级创建一个列表而不是一个字典，在循环之后，该项只是附加到当前级别。

def indexer(lst, indexes):
    result = {}
    for item in lst:
        current_level = result
        for i, index in enumerate(indexes):
            key = getattr(item, index)
            current_level.setdefault(key, [] if i == len(indexes)-1 else {})  # if we are in the last iteration, create a list instead of a dict
            current_level = current_level[key]
        current_level.append(item)
    return result

Answer 4

此问题可能更适合CodeReview。

由于您的代码正常运行，以下是一些提示：

namedtuple返回一个课程。 holding应写为Holding。
lst太通用了。它是Holding个实例的列表，可以称为holdings。
index0不是列表索引，而是dict键。
您可以使用嵌套的defaultdict，而不是反复调用setdefault。

以下是一个例子：

from collections import namedtuple, defaultdict
import pprint

Holding = namedtuple('holding', ['portfolio', 'ticker', 'shares'])
holdings = [
    Holding('Large Cap', 'TSLA', 100),
    Holding('Large Cap', 'MSFT', 200),
    Holding('Small Cap', 'UTSI', 500)
]


def default_tree(depth, leaf):
    if depth == 1:
        return defaultdict(leaf)
    else:
        return defaultdict(lambda: default_tree(depth - 1, leaf))

def indexer(lst, attributes):
    """Creates a dynamic nested dictionary based on indexes."""
    root = default_tree(len(attributes), list)
    for item in lst:
        node = root
        for attribute in attributes:
            key = getattr(item, attribute)
            node = node[key]
        node.append(item)
    return root


d = indexer(holdings, ['portfolio', 'ticker', 'shares'])
pp = pprint.PrettyPrinter()
pp.pprint(d)

Answer 5

我开始使用QueryList而不是嵌套的dict，它让我的生活变得如此简单。

例如：

ql.filter（股票=“MSFT'）将返回所有MSFT记录的列表。

class QueryList(list):
    """Stores a list indexable by attributes.
    """

    def group_by(self, attrs) -> dict:
        """Like a database group_by function.

        args:
            attrs: a str or a list of the group_by attrs.

        Returns:
            {(attr_val0, attr_val1,...): QueryList(),
             ...,
             }
            -- or --
            {attr_val: QueryList(),
            attr_val: QueryList(),
            ...
            }
        """
        result = defaultdict(QueryList)
        if isinstance(attrs, str):
            for item in self:
                result[getattr(item, attrs)].append(item)
        else:
            for item in self:
                result[tuple(getattr(item, x) for x in attrs)].append(item)

        return result

    def filter(self, **kwargs):
        """Returns the subset of QueryList that has matching attributes.

        args:
            kwargs: Attribute name/value pairs.

        For example:
            foo.filter(portfolio='123', account='ABC') will return all matching items.
        """
        if len(kwargs) == 1:
            [(attr, val)] = kwargs.items()
            result = QueryList([x for x in self if getattr(x, attr) == val])
        else:
            attr_val_pairs = [(k, v) for k, v in kwargs.items()]
            d = self.group_by(tuple(x[0] for x in attr_val_pairs))
            result = d.get(tuple(x[1] for x in attr_val_pairs), QueryList())

        return result

    def scalar(self, default=None, attr=None):
        """Returns the first item in this QueryList.

        args:
            default: The value to return if there is less than one item,
                or if the attr is not found.
            attr: Returns getattr(item, attr) if not None.
        """
        item, = self[0:1] or [default]

        if attr is None:
            result = item
        else:
            result = getattr(item, attr, default)
        return result

从对象列表创建动态级别嵌套dict？

5 个答案: