Python - 循环遍历字典列表以删除冗余数据

时间:2016-07-15 13:34:19

标签: python dictionary lambda

我有一本看起来像这样的字典

>>> testd
{'0CD7D6FE4A6411E61693005056AA00F2': 
[
{'USERNAME': 'abc', 'REPORT': 'NEW', 'CLASS': 'IR', 'CREATED': '15/07/2016 08:13:45.783199', 'TRANSACTION_TYPE': 'Elec', 'STATE': 'RQ', 'REPORT_TYPE': 'RT', 'MESSAGE_TYPE': None, 'ID': '37A8E019BB90637BE0533BFA020A8A47', 'VALUE': 3361231, 'TRANSACTION_MODE': 'New', 'MAJOR_VERSION_NUMBER': 1, 'VID': '0CD7D6FE4A6411E61693005056AA00F2'}, 
{'USERNAME': 'abc', 'REPORT': 'NEW', 'CLASS': 'IR', 'CREATED': '15/07/2016 08:13:45.799144', 'TRANSACTION_TYPE': 'Elec', 'STATE': 'RQ GEN', 'REPORT_TYPE': 'RT', 'MESSAGE_TYPE': None, 'ID': '37A8E019BB90637BE0533BFA020A8A47', 'VALUE': 3361231, 'TRANSACTION_MODE': 'New', 'MAJOR_VERSION_NUMBER': 1, 'VID': '0CD7D6FE4A6411E61693005056AA00F2'}, 
{'USERNAME': 'abc', 'REPORT': 'NEW', 'CLASS': 'IR', 'CREATED': '15/07/2016 08:13:45.819618', 'TRANSACTION_TYPE': 'Elec', 'STATE': 'SN', 'REPORT_TYPE': 'RT', 'MESSAGE_TYPE': None, 'ID': '37A8E019BB90637BE0533BFA020A8A47', 'VALUE': 3361231, 'TRANSACTION_MODE': 'New', 'MAJOR_VERSION_NUMBER': 1, 'VID': '0CD7D6FE4A6411E61693005056AA00F2'}, 
{'USERNAME': 'abc', 'REPORT': 'NEW', 'CLASS': 'IR', 'CREATED': '15/07/2016 08:13:46.973626', 'TRANSACTION_TYPE': 'Elec', 'STATE': 'RQ', 'REPORT_TYPE': 'RT', 'MESSAGE_TYPE': None, 'ID': '37A8E19D8013635EE0533BFA020AFB7B', 'VALUE': 3361231, 'TRANSACTION_MODE': 'New', 'MAJOR_VERSION_NUMBER': 1, 'VID': '0CD7D6FE4A6411E61693005056AA00F2'}, 
{'USERNAME': 'abc', 'REPORT': 'NEW', 'CLASS': 'IR', 'CREATED': '15/07/2016 08:13:47.447108', 'TRANSACTION_TYPE': 'Elec', 'STATE': 'GEN', 'REPORT_TYPE': 'RT', 'MESSAGE_TYPE': None, 'ID': '37A8E19D8013635EE0533BFA020AFB7B', 'VALUE': 3361231, 'TRANSACTION_MODE': 'New', 'MAJOR_VERSION_NUMBER': 1, 'VID': '0CD7D6FE4A6411E61693005056AA00F2'}, 
{'USERNAME': 'abc', 'REPORT': 'NEW', 'CLASS': 'IR', 'CREATED': '15/07/2016 08:13:47.462248', 'TRANSACTION_TYPE': 'Elec', 'STATE': 'SN', 'REPORT_TYPE': 'RT', 'MESSAGE_TYPE': None, 'ID': '37A8E19D8013635EE0533BFA020AFB7B', 'VALUE': 3361231, 'TRANSACTION_MODE': 'New', 'MAJOR_VERSION_NUMBER': 1, 'VID': '0CD7D6FE4A6411E61693005056AA00F2'}], 
'C4F0895E4A6211E637A6005056AA0102': 
[
{'USERNAME': 'newuser', 'REPORT': 'NEW', 'CLASS': 'IR', 'CREATED': '15/07/2016 08:04:35.432990', 'TRANSACTION_TYPE': 'Elec', 'STATE': 'RQ', 'REPORT_TYPE': 'RT', 'MESSAGE_TYPE': None, 'ID': '37A8NEW5B78416375E0533BFA020AA0D3', 'VALUE': 3360119, 'TRANSACTION_MODE': 'New', 'MAJOR_VERSION_NUMBER': 1, 'VID': 'C4F0895E4A6211E637A6005056AA0102'}, 
{'USERNAME': 'newuser', 'REPORT': 'NEW', 'CLASS': 'IR', 'CREATED': '15/07/2016 08:04:35.444645', 'TRANSACTION_TYPE': 'Elec', 'STATE': 'RQ GEN', 'REPORT_TYPE': 'RT', 'MESSAGE_TYPE': None, 'ID': '37A8NEW5B78416375E0533BFA020AA0D3', 'VALUE': 3360119, 'TRANSACTION_MODE': 'New', 'MAJOR_VERSION_NUMBER': 1, 'VID': 'C4F0895E4A6211E637A6005056AA0102'}, 
{'USERNAME': 'newuser', 'REPORT': 'NEW', 'CLASS': 'IR', 'CREATED': '15/07/2016 08:04:35.456260', 'TRANSACTION_TYPE': 'Elec', 'STATE': 'SN', 'REPORT_TYPE': 'RT', 'MESSAGE_TYPE': None, 'ID': '37A8NEW5B78416375E0533BFA020AA0D3', 'VALUE': 3360119, 'TRANSACTION_MODE': 'New', 'MAJOR_VERSION_NUMBER': 1, 'VID': 'C4F0895E4A6211E637A6005056AA0102'}, 
{'USERNAME': 'newuser', 'REPORT': 'NEW', 'CLASS': 'IR', 'CREATED': '15/07/2016 08:04:36.036600', 'TRANSACTION_TYPE': 'Elec', 'STATE': 'RQ', 'REPORT_TYPE': 'RT', 'MESSAGE_TYPE': None, 'ID': '37A8E2D1BB356397E0533BFA020AACC2', 'VALUE': 3360119, 'TRANSACTION_MODE': 'New', 'MAJOR_VERSION_NUMBER': 1, 'VID': 'C4F0895E4A6211E637A6005056AA0102'}, 
{'USERNAME': 'newuser', 'REPORT': 'NEW', 'CLASS': 'IR', 'CREATED': '15/07/2016 08:04:36.374838', 'TRANSACTION_TYPE': 'Elec', 'STATE': 'GEN', 'REPORT_TYPE': 'RT', 'MESSAGE_TYPE': None, 'ID': '37A8E2D1BB356397E0533BFA020AACC2', 'VALUE': 3360119, 'TRANSACTION_MODE': 'New', 'MAJOR_VERSION_NUMBER': 1, 'VID': 'C4F0895E4A6211E637A6005056AA0102'}, 
{'USERNAME': 'newuser', 'REPORT': 'NEW', 'CLASS': 'IR', 'CREATED': '15/07/2016 08:04:36.398159', 'TRANSACTION_TYPE': 'Elec', 'STATE': 'SN', 'REPORT_TYPE': 'RT', 'MESSAGE_TYPE': None, 'ID': '37A8E2D1BB356397E0533BFA020AACC2', 'VALUE': 3360119, 'TRANSACTION_MODE': 'New', 'MAJOR_VERSION_NUMBER': 1, 'VID': 'C4F0895E4A6211E637A6005056AA0102'}]
}

还有另一个字典用于在上面的dict中查找基于密钥和ID' ID'子词典中的关键。 lookup_dict看起来像这样:

>>> testld
{'0CD7D6FE4A6411E61693005056AA00F2': '37A8E019BB90637BE0533BFA020A8A47', 'C4F0895E4A6211E637A6005056AA0102': '37A8NEW5B78416375E0533BFA020AA0D3'}

我想要的结果是:

{'0CD7D6FE4A6411E61693005056AA00F2': 
[{'USERNAME': 'abc', 'REPORT': 'NEW', 'CLASS': 'IR', 'CREATED': '15/07/2016 08:13:45.783199', 'TRANSACTION_TYPE': 'Elec', 'STATE': 'RQ', 'REPORT_TYPE': 'RT', 'MESSAGE_TYPE': None, 'ID': '37A8E019BB90637BE0533BFA020A8A47', 'VALUE': 3361231, 'TRANSACTION_MODE': 'New', 'MAJOR_VERSION_NUMBER': 1, 'VID': '0CD7D6FE4A6411E61693005056AA00F2'}, 
{'USERNAME': 'abc', 'REPORT': 'NEW', 'CLASS': 'IR', 'CREATED': '15/07/2016 08:13:45.799144', 'TRANSACTION_TYPE': 'Elec', 'STATE': 'RQ GEN', 'REPORT_TYPE': 'RT', 'MESSAGE_TYPE': None, 'ID': '37A8E019BB90637BE0533BFA020A8A47', 'VALUE': 3361231, 'TRANSACTION_MODE': 'New', 'MAJOR_VERSION_NUMBER': 1, 'VID': '0CD7D6FE4A6411E61693005056AA00F2'}, 
{'USERNAME': 'abc', 'REPORT': 'NEW', 'CLASS': 'IR', 'CREATED': '15/07/2016 08:13:45.819618', 'TRANSACTION_TYPE': 'Elec', 'STATE': 'SN', 'REPORT_TYPE': 'RT', 'MESSAGE_TYPE': None, 'ID': '37A8E019BB90637BE0533BFA020A8A47', 'VALUE': 3361231, 'TRANSACTION_MODE': 'New', 'MAJOR_VERSION_NUMBER': 1, 'VID': '0CD7D6FE4A6411E61693005056AA00F2'}],
'C4F0895E4A6211E637A6005056AA0102': 
[{'USERNAME': 'newuser', 'REPORT': 'NEW', 'CLASS': 'IR', 'CREATED': '15/07/2016 08:04:35.432990', 'TRANSACTION_TYPE': 'Elec', 'STATE': 'RQ', 'REPORT_TYPE': 'RT', 'MESSAGE_TYPE': None, 'ID': '37A8NEW5B78416375E0533BFA020AA0D3', 'VALUE': 3360119, 'TRANSACTION_MODE': 'New', 'MAJOR_VERSION_NUMBER': 1, 'VID': 'C4F0895E4A6211E637A6005056AA0102'}, 
{'USERNAME': 'newuser', 'REPORT': 'NEW', 'CLASS': 'IR', 'CREATED': '15/07/2016 08:04:35.444645', 'TRANSACTION_TYPE': 'Elec', 'STATE': 'RQ GEN', 'REPORT_TYPE': 'RT', 'MESSAGE_TYPE': None, 'ID': '37A8NEW5B78416375E0533BFA020AA0D3', 'VALUE': 3360119, 'TRANSACTION_MODE': 'New', 'MAJOR_VERSION_NUMBER': 1, 'VID': 'C4F0895E4A6211E637A6005056AA0102'}, 
{'USERNAME': 'newuser', 'REPORT': 'NEW', 'CLASS': 'IR', 'CREATED': '15/07/2016 08:04:35.456260', 'TRANSACTION_TYPE': 'Elec', 'STATE': 'SN', 'REPORT_TYPE': 'RT', 'MESSAGE_TYPE': None, 'ID': '37A8NEW5B78416375E0533BFA020AA0D3', 'VALUE': 3360119, 'TRANSACTION_MODE': 'New', 'MAJOR_VERSION_NUMBER': 1, 'VID': 'C4F0895E4A6211E637A6005056AA0102'}]
}

结果是仅在原始操作中进行一些操作后才测试字典。说明如下:

  1. testd和testld字典键相同(也是testd列表下每个子字典的VID值)
  2. testld值是与testd dict相关联的ID键(在testd子词典中)的值。现在每个测试列表包含6个具有相同密钥数的dicts。其中3个具有相同的ID值,另外3个具有不同的ID值,其中一个ID与testld dict的值匹配。我需要删除3个这样的ID,其ID值与testld dict(仅适用于该VID)不匹配
  3. 我为实现这一目标而创建的代码如下:

    >>> for key, value in testd.iteritems():
    ...     for d in value:
    ...         if testld[key] != d['ID']:
    ...             testd[key].remove(dict)
    

    我的代码没有按预期工作,而不是删除3个词典,它实际上只删除了1个,不知道为什么!

3 个答案:

答案 0 :(得分:2)

您现在可以VID快速查找。你想要的是VID然后ID查找,这意味着双重嵌套。

您可以使用现有映射的另一个嵌套来完成此操作:

>>> from collections import defaultdict
>>> by_vid_by_id = dict(testd)
>>> for key, value in by_vid_by_id.items():
...     new_value = defaultdict(list)
...     by_vid_by_id[key] = new_value
...     for d in value:
...         new_value[d['ID']].append(d)
... 

用法示例:

>>> by_vid_by_id['0CD7D6FE4A6411E61693005056AA00F2']['37A8E019BB90637BE0533BFA020A8A47']
[
    {'USERNAME': 'abc', 'CREATED': '15/07/2016 08:13:45.783199', 'VID': '0CD7D6FE4A6411E61693005056AA00F2', 'VALUE': 3361231, 'TRANSACTION_TYPE': 'Elec', 'STATE': 'RQ', 'REPORT_TYPE': 'RT', 'REPORT': 'NEW', 'MESSAGE_TYPE': None, 'ID': '37A8E019BB90637BE0533BFA020A8A47', 'MAJOR_VERSION_NUMBER': 1, 'TRANSACTION_MODE': 'New', 'CLASS': 'IR'},
    {'USERNAME': 'abc', 'CREATED': '15/07/2016 08:13:45.799144', 'VID': '0CD7D6FE4A6411E61693005056AA00F2', 'VALUE': 3361231, 'TRANSACTION_TYPE': 'Elec', 'STATE': 'RQ GEN', 'REPORT_TYPE': 'RT', 'REPORT': 'NEW', 'MESSAGE_TYPE': None, 'ID': '37A8E019BB90637BE0533BFA020A8A47', 'MAJOR_VERSION_NUMBER': 1, 'TRANSACTION_MODE': 'New', 'CLASS': 'IR'},
    {'USERNAME': 'abc', 'CREATED': '15/07/2016 08:13:45.819618', 'VID': '0CD7D6FE4A6411E61693005056AA00F2', 'VALUE': 3361231, 'TRANSACTION_TYPE': 'Elec', 'STATE': 'SN', 'REPORT_TYPE': 'RT', 'REPORT': 'NEW', 'MESSAGE_TYPE': None, 'ID': '37A8E019BB90637BE0533BFA020A8A47', 'MAJOR_VERSION_NUMBER': 1, 'TRANSACTION_MODE': 'New', 'CLASS': 'IR'}]
>>> 

为方便起见,我使用defaultdict,但重点是用现有字典中的地图值替换列表值,以便快速查找。

答案 1 :(得分:0)

如果你想要一本新词典,你可以试试这个

newd = {}
for key,value in testd.iteritems():
    newd[key] = [val for val in value if val['ID'] == testld[key]]

如果你在testd中有一个不在testld中的密钥,它会抛出一个KeyError,但这似乎不应该发生。

答案 2 :(得分:0)

通过这样做解决了它

        for key, value in testd.iteritems():
        for items in value:
            for v in items.itervalues():
                if testld[key] != v:
                    try:
                        value.remove(items)
                    except ValueError:
                        pass