按位置分组相等的值列表 - python

时间:2017-05-15 21:37:22

标签: python dictionary

我有这个数据

list_ip = ["192.168.2.9", "192.168.2.8", "192.168.2.7", "192.168.2.6"]
list_traffic = [
    [u'192.168.2.9', u'23.67.224.83', u'10', u'2', u'*', u'*'],
    [u'23.67.208.186', u'192.168.2.9', u'10', u'1', u'*', u'*'],
    [u'192.168.2.7', u'74.125.139.124', u'10', u'1', u'*', u'*'],
    [u'23.67.208.186', u'192.168.2.7', u'10', u'1', u'*', u'*'],
    [u'23.67.208.186', u'192.168.2.7', u'10', u'1', u'*', u'*'],
    [u'192.168.2.9', u'74.125.139.125', u'10', u'1', u'*', u'*'],
    [u'23.67.208.186', u'192.168.2.7', u'10', u'1', u'*', u'*'],
]

 #list_example: [u'192.168.2.9', u'23.67.224.83', u'10', u'2', u'*', u'*'] 
 #position 0=src_ip
 #position 1=dst_ip
 #position 2=bytes
 #position 3=packets

我的目标是对位置2(字节)的所有值求和,其中src_ip = 192.168.2.9,对于dst_ip = 192.168.2.9也是相同的过程。

Src_ip和dst_ip必须在list_ip

预期结果

# Result
# new_list_traffic = { 
#     "192.168.2.9": [20, 10],
#     "192.168.2.7": [10, 30],
# }
#192.168.2.9 = 20/10  download/upload bytes
#192.168.2.7 = 10/30 download/upload bytes

我尝试了以下方法,但这需要大约2秒钟,这是分组数据的最佳方式吗?

def trafico_clientes2(request):
    start_time = time.clock()
    #list_example: [u'192.168.2.9', u'23.67.224.83', u'10', u'2', u'*', u'*'] 
    #0=src_ip, 1=dst_ip, 2=bytes, 3=packets

    list_ip = ["192.168.2.9", "192.168.2.8", "192.168.2.7", "192.168.2.6"]
    list_traffic = [
        [u'192.168.2.9', u'23.67.224.83', u'10', u'2', u'*', u'*'],
        [u'23.67.208.186', u'192.168.2.9', u'10', u'1', u'*', u'*'],
        [u'192.168.2.7', u'74.125.139.124', u'10', u'1', u'*', u'*'],
        [u'23.67.208.186', u'192.168.2.7', u'10', u'1', u'*', u'*'],
        [u'23.67.208.186', u'192.168.2.7', u'10', u'1', u'*', u'*'],
        [u'192.168.2.9', u'74.125.139.125', u'10', u'1', u'*', u'*'],
        [u'23.67.208.186', u'192.168.2.7', u'10', u'1', u'*', u'*'],
    ]
    new_list_traffic = { }

    for traffic_ip in list_traffic:
        src_ip = traffic_ip[0]
        dst_ip = traffic_ip[1]
        bytes = int(traffic_ip[2])
        if src_ip in list_ip:
            #bytes download
            total_bytes = new_list_traffic.get(src_ip)
            if total_bytes == None:
                new_list_traffic[src_ip] = [bytes, 0]
            else:
                total_bytes [0] = total_bytes [0] + bytes
                new_list_traffic[src_ip] = total_bytes

        elif dst_ip in list_ip:
            #bytes upload
            total_bytes = new_list_traffic.get(dst_ip)
            if total_bytes == None:
                new_list_traffic[dst_ip] = [0, bytes]
            else:
                total_bytes [1] = total_bytes [1] + bytes
                new_list_traffic[dst_ip] = total_bytes
    # Result
    # new_list_traffic = { 
    #     "192.168.2.9": [20, 10],
    #     "192.168.2.7": [10, 30],
    # }
    #192.168.2.9 = 20/10  download/upload bytes
    #192.168.2.7 = 10/30 download/upload bytes

    total_tiempo =  time.clock() - start_time, "seconds"
    return render(request, 'trafico.html',{"datos": list_traffic,  "lista_trafico": new_list_traffic, "total_tiempo": total_tiempo})

非常感谢你的帮助。

1 个答案:

答案 0 :(得分:0)

>>> from collections import defaultdict
>>> results = defaultdict(int)
>>> resultd = defaultdict(int)

>>> for data in list_traffic:
...     results[data[0]] += int(data[2])
...     resultd[data[1]] += int(data[2])

>>> new_list_traffic = dict()
>>> for ip in list_ip:
...     new_list_traffic[ip] = [results[ip], resultd[ip]]
>>> new_list_traffic
{
    '192.168.2.8': [0, 0], 
    '192.168.2.9': [20, 10], 
    '192.168.2.6': [0, 0], 
    '192.168.2.7': [10, 30]
}