Question

我有一个字典列表，如下所示：

data = [{'stat3': '5', 'stat2': '4', 'player': '1'}, 
        {'stat3': '8', 'stat2': '1', 'player': '1'}, 
        {'stat3': '6', 'stat2': '1', 'player': '3'}, 
        {'stat3': '3', 'stat2': '7', 'player': '3'}]

我希望得到一个嵌套字典，其键是键的值（＆＃39;播放器＆＃39;），其值是聚合统计数据的字典。

输出应该：

{'3': {'stat3': 9, 'stat2': 8, 'player': '3'}, 
 '1': {'stat3': 13, 'stat2': 5, 'player': '1'}}

以下是我的代码：

from collections import defaultdict
result = {}
total_stat = defaultdict(int)

for dict in data:
    total_stat[dict['player']] += int(dict['stat3'])  
    total_stat[dict['player']] += int(dict['stat2']) 
total_stat = ([{'player': info, 'stat3': total_stat[info],
                'stat2': total_stat[info]} for info in 
                 sorted(total_stat, reverse=True)])
for item in total_stat:       
    result.update({item['player']: item})
print(result)

然而，我得到了这个：

{'3': {'player': '3', 'stat3': 17, 'stat2': 17}, 
 '1': {'player': '1', 'stat3': 18, 'stat2': 18}}

我怎么能做对的？还是有其他方法吗？

Answer 1

您的数据是一个DataFrame，一个自然的pandas解决方案是：

In [34]: pd.DataFrame.from_records(data).astype(int).groupby('player').sum().T.to_dict()

Out[34]: {1: {'stat2': 5, 'stat3': 13}, 3: {'stat2': 8, 'stat3': 9}}

Answer 2

只需使用更嵌套的默认工厂：

>>> total_stat = defaultdict(lambda : defaultdict(int))
>>> value_fields = 'stat2', 'stat3'
>>> for datum in data:
...     player_data = total_stat[datum['player']]
...     for k in value_fields:
...         player_data[k] += int(datum[k])
...
>>> from pprint import pprint
>>> pprint(total_stat)
defaultdict(<function <lambda> at 0x1023490d0>,
            {'1': defaultdict(<class 'int'>, {'stat2': 5, 'stat3': 13}),
             '3': defaultdict(<class 'int'>, {'stat2': 8, 'stat3': 9})})

Answer 3

此解决方案使用嵌套字典。 out是{player: Counter}字典，其中Counter本身是另一个字典{stat: score}

import collections

def split_player_stat(dict_object):
    """
    Split a row of data into player, stat

    >>> split_player_stat({'stat3': '5', 'stat2': '4', 'player': '1'})
    '1', {'stat3': 5, 'stat2': 4}
    """
    key = dict_object['player']
    value = {k: int(v) for k, v in dict_object.items() if k != 'player'}
    return key, value

data = [{'stat3': '5', 'stat2': '4', 'player': '1'},
        {'stat3': '8', 'stat2': '1', 'player': '1'},
        {'stat3': '6', 'stat2': '1', 'player': '3'},
        {'stat3': '3', 'stat2': '7', 'player': '3'}]

out = collections.defaultdict(collections.Counter)
for player_stat in data:
    player, stat = split_player_stat(player_stat)
    out[player].update(stat)
print(out)

此解决方案的神奇之处在于collections.defaultdict和collections.Counter类，两者都像字典一样。

Answer 4

这里的大多数解决方案都使问题过于复杂。让我们简单易懂。你走了：

In [26]: result = {}

In [27]: req_key = 'player'

In [29]: for dct in data:
    ...:     player_val = dct.pop(req_key)
    ...:     result.setdefault(player_val, {req_key: player_val})
    ...:     for k, v in dct.items():
    ...:         result[player_val][k] = result[player_val].get(k, 0) + int(v)

In [30]: result
Out[30]:
{'1': {'player': '1', 'stat2': 5, 'stat3': 13},
 '3': {'player': '3', 'stat2': 8, 'stat3': 9}}

这里你简单干净。对于这个简单的问题，不需要进口。现在来参加该计划：

result.setdefault(player_val, {'player': player_val})

如果结果中没有此类密钥，则会将默认值设置为"player": 3或"player": 1。

result[player_val][k] = result[player_val].get(k, 0) + int(v)

这会增加具有常用值的键的值。

Answer 5

不是最好的代码，也不是pythonic，但我认为你应该能够通过它来找出代码出错的地方。

def sum_stats_by_player(data):
    result = {}

    for dictionary in data:
        print(f"evaluating dictionary {dictionary}")

        player = dictionary["player"]
        stat3 = int(dictionary["stat3"])
        stat2 = int(dictionary["stat2"])

        # if the player isn't in our result
        if player not in result:
            print(f"\tfirst time player {player}")
            result[player] = {}  # add the player as an empty dictionary
            result[player]["player"] = player

        if "stat3" not in result[player]:
            print(f"\tfirst time stat3 {stat3}")
            result[player]["stat3"] = stat3
        else:
            print(f"\tupdating stat3 { result[player]['stat3'] + stat3}")
            result[player]["stat3"] += stat3

        if "stat2" not in result[player]:
            print(f"\tfirst time stat2 {stat2}")
            result[player]["stat2"] = stat2
        else:
            print(f"\tupdating stat2 { result[player]['stat2'] + stat2}")
            result[player]["stat2"] += stat2

    return result


data = [{'stat3': '5', 'stat2': '4', 'player': '1'},
        {'stat3': '8', 'stat2': '1', 'player': '1'},
        {'stat3': '6', 'stat2': '1', 'player': '3'},
        {'stat3': '3', 'stat2': '7', 'player': '3'}]

print(sum_stats_by_player(data))

Answer 6

使用Counter

的另一个版本

import itertools
from collections import Counter

def count_group(group):
    c = Counter()
    for g in group:
        g_i = dict([(k, int(v)) for k, v in g.items() if k != 'player'])
        c.update(g_i)
    return dict(c)

sorted_data = sorted(data, key=lambda x:x['player'])
results = [(k, count_group(g)) for k, g in itertools.groupby(sorted_data, lambda x: x['player'])]

print(results)

给予

[('1', {'stat3': 13, 'stat2': 5}), ('3', {'stat3': 9, 'stat2': 8})]

Answer 7

两个循环可以让你：

按主键对数据进行分组
汇总所有辅助信息

这两项任务是在下面显示的aggregate_statistics函数中完成的。

from collections import Counter
from pprint import pprint


def main():
    data = [{'player': 1, 'stat2': 4, 'stat3': 5},
            {'player': 1, 'stat2': 1, 'stat3': 8},
            {'player': 3, 'stat2': 1, 'stat3': 6},
            {'player': 3, 'stat2': 7, 'stat3': 3}]
    new_data = aggregate_statistics(data, 'player')
    pprint(new_data)


def aggregate_statistics(table, key):
    records_by_key = {}
    for record in table:
        data = record.copy()
        records_by_key.setdefault(data.pop(key), []).append(Counter(data))
    new_data = []
    for second_key, value in records_by_key.items():
        start, *remaining = value
        for record in remaining:
            start.update(record)
        new_data.append(dict(start, **{key: second_key}))
    return new_data


if __name__ == '__main__':
    main()

Python - 基于相同的键对字典列表中的值求和

7 个答案: