我有一个字典列表,如下所示:
data = [{'stat3': '5', 'stat2': '4', 'player': '1'},
{'stat3': '8', 'stat2': '1', 'player': '1'},
{'stat3': '6', 'stat2': '1', 'player': '3'},
{'stat3': '3', 'stat2': '7', 'player': '3'}]
我希望得到一个嵌套字典,其键是键的值('播放器'),其值是聚合统计数据的字典。
输出应该:
{'3': {'stat3': 9, 'stat2': 8, 'player': '3'},
'1': {'stat3': 13, 'stat2': 5, 'player': '1'}}
以下是我的代码:
from collections import defaultdict
result = {}
total_stat = defaultdict(int)
for dict in data:
total_stat[dict['player']] += int(dict['stat3'])
total_stat[dict['player']] += int(dict['stat2'])
total_stat = ([{'player': info, 'stat3': total_stat[info],
'stat2': total_stat[info]} for info in
sorted(total_stat, reverse=True)])
for item in total_stat:
result.update({item['player']: item})
print(result)
然而,我得到了这个:
{'3': {'player': '3', 'stat3': 17, 'stat2': 17},
'1': {'player': '1', 'stat3': 18, 'stat2': 18}}
我怎么能做对的?还是有其他方法吗?
答案 0 :(得分:11)
您的数据是一个DataFrame,一个自然的pandas
解决方案是:
In [34]: pd.DataFrame.from_records(data).astype(int).groupby('player').sum().T.to_dict()
Out[34]: {1: {'stat2': 5, 'stat3': 13}, 3: {'stat2': 8, 'stat3': 9}}
答案 1 :(得分:5)
只需使用更嵌套的默认工厂:
>>> total_stat = defaultdict(lambda : defaultdict(int))
>>> value_fields = 'stat2', 'stat3'
>>> for datum in data:
... player_data = total_stat[datum['player']]
... for k in value_fields:
... player_data[k] += int(datum[k])
...
>>> from pprint import pprint
>>> pprint(total_stat)
defaultdict(<function <lambda> at 0x1023490d0>,
{'1': defaultdict(<class 'int'>, {'stat2': 5, 'stat3': 13}),
'3': defaultdict(<class 'int'>, {'stat2': 8, 'stat3': 9})})
答案 2 :(得分:3)
此解决方案使用嵌套字典。 out
是{player: Counter}
字典,其中Counter
本身是另一个字典{stat: score}
import collections
def split_player_stat(dict_object):
"""
Split a row of data into player, stat
>>> split_player_stat({'stat3': '5', 'stat2': '4', 'player': '1'})
'1', {'stat3': 5, 'stat2': 4}
"""
key = dict_object['player']
value = {k: int(v) for k, v in dict_object.items() if k != 'player'}
return key, value
data = [{'stat3': '5', 'stat2': '4', 'player': '1'},
{'stat3': '8', 'stat2': '1', 'player': '1'},
{'stat3': '6', 'stat2': '1', 'player': '3'},
{'stat3': '3', 'stat2': '7', 'player': '3'}]
out = collections.defaultdict(collections.Counter)
for player_stat in data:
player, stat = split_player_stat(player_stat)
out[player].update(stat)
print(out)
此解决方案的神奇之处在于collections.defaultdict
和collections.Counter
类,两者都像字典一样。
答案 3 :(得分:1)
这里的大多数解决方案都使问题过于复杂。让我们简单易懂。你走了:
In [26]: result = {}
In [27]: req_key = 'player'
In [29]: for dct in data:
...: player_val = dct.pop(req_key)
...: result.setdefault(player_val, {req_key: player_val})
...: for k, v in dct.items():
...: result[player_val][k] = result[player_val].get(k, 0) + int(v)
In [30]: result
Out[30]:
{'1': {'player': '1', 'stat2': 5, 'stat3': 13},
'3': {'player': '3', 'stat2': 8, 'stat3': 9}}
这里你简单干净。对于这个简单的问题,不需要进口。现在来参加该计划:
result.setdefault(player_val, {'player': player_val})
如果结果中没有此类密钥,则会将默认值设置为"player": 3
或"player": 1
。
result[player_val][k] = result[player_val].get(k, 0) + int(v)
这会增加具有常用值的键的值。
答案 4 :(得分:1)
不是最好的代码,也不是pythonic,但我认为你应该能够通过它来找出代码出错的地方。
def sum_stats_by_player(data):
result = {}
for dictionary in data:
print(f"evaluating dictionary {dictionary}")
player = dictionary["player"]
stat3 = int(dictionary["stat3"])
stat2 = int(dictionary["stat2"])
# if the player isn't in our result
if player not in result:
print(f"\tfirst time player {player}")
result[player] = {} # add the player as an empty dictionary
result[player]["player"] = player
if "stat3" not in result[player]:
print(f"\tfirst time stat3 {stat3}")
result[player]["stat3"] = stat3
else:
print(f"\tupdating stat3 { result[player]['stat3'] + stat3}")
result[player]["stat3"] += stat3
if "stat2" not in result[player]:
print(f"\tfirst time stat2 {stat2}")
result[player]["stat2"] = stat2
else:
print(f"\tupdating stat2 { result[player]['stat2'] + stat2}")
result[player]["stat2"] += stat2
return result
data = [{'stat3': '5', 'stat2': '4', 'player': '1'},
{'stat3': '8', 'stat2': '1', 'player': '1'},
{'stat3': '6', 'stat2': '1', 'player': '3'},
{'stat3': '3', 'stat2': '7', 'player': '3'}]
print(sum_stats_by_player(data))
答案 5 :(得分:0)
使用Counter
的另一个版本import itertools
from collections import Counter
def count_group(group):
c = Counter()
for g in group:
g_i = dict([(k, int(v)) for k, v in g.items() if k != 'player'])
c.update(g_i)
return dict(c)
sorted_data = sorted(data, key=lambda x:x['player'])
results = [(k, count_group(g)) for k, g in itertools.groupby(sorted_data, lambda x: x['player'])]
print(results)
给予
[('1', {'stat3': 13, 'stat2': 5}), ('3', {'stat3': 9, 'stat2': 8})]
答案 6 :(得分:0)
两个循环可以让你:
这两项任务是在下面显示的aggregate_statistics
函数中完成的。
from collections import Counter
from pprint import pprint
def main():
data = [{'player': 1, 'stat2': 4, 'stat3': 5},
{'player': 1, 'stat2': 1, 'stat3': 8},
{'player': 3, 'stat2': 1, 'stat3': 6},
{'player': 3, 'stat2': 7, 'stat3': 3}]
new_data = aggregate_statistics(data, 'player')
pprint(new_data)
def aggregate_statistics(table, key):
records_by_key = {}
for record in table:
data = record.copy()
records_by_key.setdefault(data.pop(key), []).append(Counter(data))
new_data = []
for second_key, value in records_by_key.items():
start, *remaining = value
for record in remaining:
start.update(record)
new_data.append(dict(start, **{key: second_key}))
return new_data
if __name__ == '__main__':
main()