我有一个清单
old_list = [
(1, 'AAA', None, 1),
(2, 'AAA', 'x', 0),
(5, 'AAB', 'z', 1),
(6, 'ABB', 'x', 1),
(9, 'ABB', 'x', 1)]
我想要一个新的列表有唯一的i [1]和更大的id i [0],就像这个结果
new_list = [
(2, 'AAA', 'x', 0),
(5, 'AAB', 'z', 1),
(9, 'ABB', 'x', 1)]
]
有人可以帮助我吗?
答案 0 :(得分:3)
您可以使用itertools.groupby
old_list = [
(1, 'AAA', None, 1),
(2, 'AAA', 'x', 0),
(5, 'AAB', 'z', 1),
(6, 'ABB', 'x', 1),
(9, 'ABB', 'x', 1)]
from itertools import groupby
from operator import itemgetter
print [sorted(list(group), key=itemgetter(0))[-1]
for key, group in groupby(old_list, key=itemgetter(1))]
<强>输出强>
[(2, 'AAA', 'x', 0), (5, 'AAB', 'z', 1), (9, 'ABB', 'x', 1)]
如果old_list
尚未排序,您可以像这样对其进行排序
old_list = sorted([
(1, 'AAA', None, 1),
(2, 'AAA', 'x', 0),
(5, 'AAB', 'z', 1),
(6, 'ABB', 'x', 1),
(9, 'ABB', 'x', 1)], key=itemgetter(1))
答案 1 :(得分:0)
你可以这样做:
d={}
for t in old_list:
d.setdefault(t[1],[]).append(t)
new_list=[]
for k in sorted(d): # sort by the keys ('AAA', 'AAB', etc)
new_list.append(max(d[k], key=lambda t: t[0])) # max index (t[0])
# [(2, 'AAA', 'x', 0), (5, 'AAB', 'z', 1), (9, 'ABB', 'x', 1)]
修改
如果没有重复ID的可能性,您可以这样做:
d={}
for t in old_list:
d.setdefault(t[1],[]).append(t)
new_list=[]
for k in sorted(d):
new_list.append(d[k][-1])
样本数据的答案相同。
from collections import defaultdict
s = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)]
def f1():
d = defaultdict(list)
for k, v in s:
d[k].append(v)
return d.items()
def f2():
d={}
for k, v in s:
d.setdefault(k, []).append(v)
return d.items()
if __name__ == '__main__':
import timeit
import sys
print(sys.version)
print('defaultdict:', timeit.timeit("f1()", setup="from __main__ import f1, s"))
print('setdefault:', timeit.timeit("f2()", setup="from __main__ import f2, s"))
打印:
3.3.2 (default, Jul 6 2013, 10:40:18)
[GCC 4.2.1 Compatible Apple LLVM 4.2 (clang-425.0.28)]
defaultdict: 2.384568103996571
setdefault: 1.6183147379779257
有帖子的时间表明,这是一种更快速的方法:
from __future__ import print_function
from collections import OrderedDict
import itertools
from operator import itemgetter
old_list = [
(1, 'AAA', None, 1),
(2, 'AAA', 'x', 0),
(5, 'AAB', 'z', 1),
(6, 'ABB', 'x', 1),
(9, 'ABB', 'x', 1)]
def f1():
d={}
for t in old_list:
d.setdefault(t[1],[]).append(t)
new_list=[]
for k in sorted(d):
new_list.append(d[k][-1])
return new_list
def f2():
nl = sorted(old_list, key=itemgetter(2,1))
return OrderedDict((elem[1], elem)
for elem in nl).values()
def f3():
nl=sorted(old_list, key=lambda x: x[1])
return [sorted(list(group), key=lambda x:x[0], reverse=True)[0]
for key, group in itertools.groupby(nl, key=lambda x:x[1])]
if __name__ == '__main__':
import timeit
import sys
print(sys.version)
print('drewk:',timeit.timeit("f1()", setup="from __main__ import f1, old_list"))
print('Abhijit:', timeit.timeit("f2()", setup="from __main__ import f2, old_list, OrderedDict, itemgetter"))
print('thefourtheye:', timeit.timeit("f3()", setup="from __main__ import f3, old_list, itertools"))
打印:
2.7.5 (default, Aug 25 2013, 00:04:04)
[GCC 4.2.1 Compatible Apple LLVM 5.0 (clang-500.0.68)]
drewk: 3.30526208878
Abhijit: 20.5611379147
thefourtheye: 13.2195081711