我有一个非常基本的(但不是很有用的)函数,可以通过将键值添加到列表中来保持跟踪,从键值中重复删除字典列表。
def dedupe(rs):
delist = []
for r in rs:
if r['key'] not in delist:
delist.append(r['key'])
else:
rs.remove(r)
return rs
在下面的脚本中使用了两个字典列表:
from pprint import pprint
records = [
{'key': 'Item 1',
'name': 'Item 1',
'positions': [['00:00:00', '00:05:54'],
['00:05:55', '00:07:54'],
['00:16:47', '00:20:04']]},
{'key': 'Item 1',
'name': 'Item 1',
'positions': [['00:05:55', '00:07:54'],
['00:00:00', '00:05:54'],
['00:16:47', '00:20:04']]},
{'key': 'Item 1',
'name': 'Item 1',
'positions': [['00:16:47', '00:20:04'],
['00:00:00', '00:05:54'],
['00:05:55', '00:07:54']]},
{'key': 'Item 2',
'name': 'Item 2',
'positions': [['00:07:55', '00:11:23'], ['00:11:24', '00:16:46']]},
{'key': 'Item 2',
'name': 'Item 2',
'positions': [['00:11:24', '00:16:46'], ['00:07:55', '00:11:23']]},
{'key': 'Item 3', 'name': 'Item 3', 'positions': [['00:20:05', '00:25:56']]}
]
records2 = [
{'key': 'Item 1',
'name': 'Item 1',
'positions': [['00:00:00', '00:05:54'],
['00:05:55', '00:07:54'],
['00:16:47', '00:20:04']]},
{'key': 'Item 1',
'name': 'Item 1',
'positions': [['00:05:55', '00:07:54'],
['00:00:00', '00:05:54'],
['00:16:47', '00:20:04']]},
{'key': 'Item 2',
'name': 'Item 2',
'positions': [['00:07:55', '00:11:23'], ['00:11:24', '00:16:46']]},
{'key': 'Item 1',
'name': 'Item 1',
'positions': [['00:16:47', '00:20:04'],
['00:00:00', '00:05:54'],
['00:05:55', '00:07:54']]},
{'key': 'Item 2',
'name': 'Item 2',
'positions': [['00:11:24', '00:16:46'], ['00:07:55', '00:11:23']]},
{'key': 'Item 3', 'name': 'Item 3', 'positions': [['00:20:05', '00:25:56']]}
]
def dedupe(rs):
delist = []
for r in rs:
if r['key'] not in delist:
delist.append(r['key'])
else:
rs.remove(r)
return rs
if __name__ == '__main__':
res = dedupe(records)
res2 = dedupe(records2)
pprint(res)
pprint(res2)
对于records
或records2
,我希望得到:
[
{'key': 'Item 1',
'name': 'Item 1',
'positions': [['00:00:00', '00:05:54'],
['00:05:55', '00:07:54'],
['00:16:47', '00:20:04']]},
{'key': 'Item 2',
'name': 'Item 2',
'positions': [['00:07:55', '00:11:23'], ['00:11:24', '00:16:46']]},
{'key': 'Item 3',
'name': 'Item 3',
'positions': [['00:20:05', '00:25:56']]}
]
但我得到了(对于两个输入中的每一个):
[
{'key': 'Item 1',
'name': 'Item 1',
'positions': [['00:00:00', '00:05:54'],
['00:05:55', '00:07:54'],
['00:16:47', '00:20:04']]},
{'key': 'Item 1',
'name': 'Item 1',
'positions': [['00:16:47', '00:20:04'],
['00:00:00', '00:05:54'],
['00:05:55', '00:07:54']]},
{'key': 'Item 2',
'name': 'Item 2',
'positions': [['00:07:55', '00:11:23'], ['00:11:24', '00:16:46']]},
{'key': 'Item 3', 'name': 'Item 3', 'positions': [['00:20:05', '00:25:56']]}
]
[
{'key': 'Item 1',
'name': 'Item 1',
'positions': [['00:00:00', '00:05:54'],
['00:05:55', '00:07:54'],
['00:16:47', '00:20:04']]},
{'key': 'Item 2',
'name': 'Item 2',
'positions': [['00:07:55', '00:11:23'], ['00:11:24', '00:16:46']]},
{'key': 'Item 2',
'name': 'Item 2',
'positions': [['00:11:24', '00:16:46'], ['00:07:55', '00:11:23']]},
{'key': 'Item 3', 'name': 'Item 3', 'positions': [['00:20:05', '00:25:56']]}
]
我一直盯着并调整这一点,但我不清楚为什么它不按顺序删除第三个实例(records
),或者为三个实例工作,但是如果有三个实例的那个被分解(records2
),那么就会失败。{/ p>
答案 0 :(得分:2)
迭代时我不会从迭代器中删除元素。
而是这样做:
def dedupe(rs):
delist = []
new_rs = []
for r in rs:
if r['key'] not in delist:
print r['key']
delist.append(r['key'])
new_rs.append(r)
return new_rs