我有一个像
这样的词典 mydict={
(a,1):0,
(a,2):0,
(a,3):0,
(a,4):1,
(a,5):2,
(a,6):2,
(a,7)=0,
(a,8)=0,
}
我想将其概括为
mysummarydict={
(a,1,3):0,
(a,4,4):1,
(a,5,6):2,
(a,7,8):0
}
这些值来自关于不重叠但可能有间隙的间隔的数据集。第一个字典现在每个单点都有一个条目,我想得到第二个字典,其中包含那些具有共同值的相邻点的摘要。你能指导我在python 2.6中找到最好的解决方案吗? 感谢
答案 0 :(得分:2)
from itertools import groupby
from operator import itemgetter
mydict={
('a', 1): 0,
('a', 2): 0,
('a', 3): 0,
('a', 4): 1,
('a', 5): 2,
('a', 6): 2,
('a', 7): 0,
('a', 8): 0,
}
data = mydict.items()
data.sort()
def groupkey(item):
return item[0][0], item[1]
result = {}
for v, group in groupby(data, key=groupkey):
char, value = v
nums = [item[0][1] for item in group]
result[char, min(nums), max(nums)] = value
print result
结果:
{
('a', 1, 3): 0
('a', 4, 4): 1,
('a', 5, 6): 2,
('a', 7, 8): 0,
}
答案 1 :(得分:0)
如果您将此数据存储在列表中,则会变得更加容易:
from itertools import groupby
from operator import itemgetter
mylist = [0, 0, 0, 1, 2, 2, 0, 0]
def interval(v):
head = tail = next(v)
for tail in v:
pass
return head[0] + 1, tail[0] + 1
print({interval(v): k for k, v in groupby(enumerate(mylist), key=itemgetter(1))})
给
{(5, 6): 2, (1, 3): 0, (7, 8): 0, (4, 4): 1}
答案 2 :(得分:0)
我找到了一种更短更快的方法:
from itertools import groupby
from operator import itemgetter
from time import clock
mydict={('a', 1): 0,
('a', 2): 0,
('a', 3): 0,
('a', 4): 1,
('a', 5): 2,
('a', 6): 2,
('a', 7): 0,
('a', 8): 0,
}
A,B,C = [],[],[]
for i in xrange(1000):
t0 = clock()
data = mydict.items()
data.sort()
def groupkey(item):
return item[0][0], item[1]
result1 = {}
for v, group in groupby(data, key=groupkey):
char, value = v
nums = [item[0][1] for item in group]
result1[char, min(nums), max(nums)] = value
A.append(clock()-t0)
#----------------------------------------------------------------
t0 = clock()
data = [ [a,b,c] for ((a,b),c) in mydict.items()]
data.sort()
result2 = {}
for (char,value),group in groupby(data, key=itemgetter(0,2)):
nums = [item[1] for item in group]
result2[char,nums[0],nums[-1]] = value
B.append(clock()-t0)
#-----------------------------------------------------------------
t0 = clock()
data = [ [a,b,c] for ((a,b),c) in mydict.items()]
data.sort()
result3 = {}
for ((char,value),nums) in [ (cle,[item[1] for item in group]) for cle,group in groupby(data, key=itemgetter(0,2))]:
result3[char,nums[0],nums[-1]] = value
C.append(clock()-t0)
print 'result1==',result1
print 'result2==',result2
print 'result3==',result3
print 'result1==result2==result3==',result1==result2==result3
print id(result1)==id(result2),id(result2)==id(result3),id(result3)==id(result1)
print '{:.1%}.'.format(min(B)/min(A))
print '{:.1%}.'.format(min(C)/min(A))
结果:
result1 == {('a',5,6):2,('a',4,4):1,('a',7,8):0,('a',1, 3):0}
result2 == {('a',5,6):2,('a',4,4):1,('a',7,8):0,('a',1, 3):0}
result3 == {('a',5,6):2,('a',4,4):1,('a',7,8):0,('a',1, 3):0}
result1 == result2 == result3 == True
假错假
87.0%。
93.2%。