d1 = {'weight':1, 'data': { 'apples': 8, 'oranges': 7 } }
d2 = {'weight':3, 'data': { 'apples': 4, 'bananas': 3 } }
all_dictionaries = [d1, d2, ... ]
def mergeDictionariesWithWeight(all_dictionaries)
如何将这些词典合并在一起(如果重叠,多重值与权重)
该函数将返回:
{ 'apples': 4, 'oranges': 7, 'bananas': 3 }
苹果是4
,因为8 * .25 + 4 * .75
编辑:我刚写了一个取平均值的东西,就像这样。但当然它与我想做的事情完全不同,因为我把所有内容都放在一个列表中,然后除以长度。
result = {}
keymap = {}
for the_dict in dlist:
for (k, v) in the_dict.items():
if not keymap.has_key(k):
keymap[k] = []
keymap[k].append(v)
for (k, v) in keymap.items():
average = sum(int(x) for x in keymap[k]) / float(len(keymap[k]))
result[k] = float(average)
return result
答案 0 :(得分:7)
>>> from collections import defaultdict
>>> d=defaultdict(lambda:(0,0))
>>> for D in all_dictionaries:
... weight = D['weight']
... for k,v in D['data'].items():
... d[k]=d[k][0]+weight*v,d[k][1]+weight
...
>>> dict((k,v[0]/v[1]) for k,v in d.items())
{'apples': 5, 'oranges': 7, 'bananas': 3}
如果您需要浮点结果
>>> dict((k,1.*v[0]/v[1]) for k,v in d.items())
{'apples': 5.0, 'oranges': 7.0, 'bananas': 3.0}
关于 defaultdict
的说明您经常看到defaultdict(int)
或defaultdict(list)
甚至defaultdict(set)
。 defaultdict的参数必须可以调用,不带参数。只要发现密钥丢失,就会使用调用此参数的结果。 ie - 调用它返回字典的默认值
例如
>>> d=defaultdict(int)
>>> d[1]
0
>>> d['foo']
0
这通常用于计算内容,因为int()
返回0.如果你想要默认值为1而不是0,那么它更难处理,因为你不能将参数传递给int,但是所有你need是一个返回1的callable。使用 lambda 函数可以毫不费力地完成。
>>> d=defaultdict(lambda:1)
>>> d[1]
1
>>> d['foo']
1
在这个答案中,我想跟踪加权总数和权重总和。我可以通过使用2元组作为默认值来完成此操作。
>>> d=defaultdict(lambda:(0,0))
>>> d[1]
(0, 0)
>>> d['foo']
(0, 0)
答案 1 :(得分:1)
这是一个首先使用临时字典将项目收集到列表中的解决方案,然后计算最终加权字典。它可以在没有临时的情况下完成,但这很容易理解。
from collections import defaultdict
def mergeDictionariesWithWeight(dlist):
tmp = defaultdict(list)
for d in dlist:
weight = d['weight']
for k, v in d['data'].items():
tmp[k].append((weight, v))
r = {}
for k, v in tmp.items():
# If there's just one item, ignore the weight
if len(v) == 1:
r[k] = v[0][1]
else:
total_weight = sum((x[0] for x in v), 0.0)
r[k] = sum(x[1] * x[0]/total_weight for x in v)
return r
返回:{'apples':5.0,'oranges':7,'bananas':3}(因为8 * .25 + 4 * .75 = 5.0)
答案 2 :(得分:1)
试试这个:
def mergeDictionariesWithWeight(all_dictionaries): weightSum = 0 weightDictionary ={} for dictionary in all_dictionaries: weight = dictionary['weight'] data = dictionary['data'] #find the total weight of the elements in data for (k,v) in data.items(): if k in weightDictionary: weightDictionary[k] += weight*v weightSum += weight #normalize the results by deviding by the weight sum for (key, value) in weightDictionary: weightDictionary[key] = value / float(weightSum) return weightDictionary d1 = {'weight':1, 'data': { 'apples': 8, 'oranges': 7 } } d2 = {'weight':3, 'data': { 'apples': 4, 'bananas': 3 } } all_dictionaries = [d1, d2] mergeDictionariesWithWeight(all_dictionaries)
答案 3 :(得分:1)
from collections import defaultdict
def merge_dictionaries_with_weight(all_dictionaries):
totals = defaultdict(int)
result = defaultdict(int)
for each in all_dictionaries:
weight = float(each['weight'])
for key, value in each['data'].items():
totals[key] += weight
result[key] += weight * value
for key, total in totals.items():
result[key] /= total
return result
答案 4 :(得分:0)
在算法上无法区分gnibbler's,但不知何故,生成器表达式让我高兴。
>>> from collections import defaultdict
>>> weights, values = defaultdict(int), defaultdict(int)
>>> key_weight_value = ((key, d['weight'], value)
for d in all_dictionaries
for key, value in d['data'].iteritems())
>>> for k, w, v in key_weight_value:
... weights[k], values[k] = weights[k] + w, values[k] + w * v
...
>>> dict((k, values[k] * 1.0 / weights[k]) for k in weights)
{'apples': 5.0, 'oranges': 7.0, 'bananas': 3.0}