Python操纵json,列表和字典

时间:2016-02-01 21:25:41

标签: python arrays json list dictionary

对不起,但试图完成。

我试图获取以下数据 - (只从较大的json文件中采样,结构相同)

{
    "count": 394,
    "status": "ok",
    "data": [
        {
            "md5": "cd042ba78d0810d86755136609793d6d",
            "threatscore": 90,
            "threatlevel": 0,
            "avdetect": 0,
            "vxfamily": "",
            "domains": [
                "dynamicflakesdemo.com",
                "www.bountifulbreast.co.uk"
            ],
            "hosts": [
                "66.33.214.180",
                "64.130.23.5",
            ],
            "environmentId": "1",
        },
        {
            "md5": "4f3a560c8deba19c5efd48e9b6826adb",
            "threatscore": 65,
            "threatlevel": 0,
            "avdetect": 0,
            "vxfamily": "",
             "domains": [
                "px.adhigh.net"
            ],
            "hosts": [
                "130.211.155.133",
                "65.52.108.163",
                "172.225.246.16"
            ],
            "environmentId": "1",
        }
    ]
}

如果"威胁评分"超过70我想将它添加到这个json结构 - 防爆。         "数据":[             {                 " md5":" cd042ba78d0810d86755136609793d6d",                 "威胁评分":90,

{
"Event":
    {"date":"2015-11-25",
    "threat_level_id":"1",
    "info":"HybridAnalysis",
    "analysis":"0",
    "distribution":"0",
    "orgc":"SOC", 
    "Attribute": [
        {"type":"ip-dst",
        "category":"Network activity",
        "to_ids":True,
        "distribution":"3",
        "value":"66.33.214.180"},
        {"type":"ip-dst",
        "category":"Network activity",
        "to_ids":True,
        "distribution":"3",
        "value":"64.130.23.5"}
        {"type":"domain",
        "category":"Network activity",
        "to_ids":True,
        "distribution":"3",
        "value":"dynamicflakesdemo.com"},
        {"type":"domain",
        "category":"Network activity",
        "to_ids":True,
        "distribution":"3",
        "value":"www.bountifulbreast.co.uk"}
        {"type":"md5",
        "category":"Payload delivery",
        "to_ids":True,
        "distribution":"3",
        "value":"cd042ba78d0810d86755136609793d6d"}]
}
}

这是我的代码 -

from datetime import datetime
import os
import json
from pprint import pprint

now = datetime.now()

testFile = open("feed.json")
feed = json.load(testFile)


for x in feed['data']:
    if x['threatscore'] > 90:
        data = {}
        data['Event']={}
        data['Event']["date"] = now.strftime("%Y-%m-%d")
        data['Event']["threat_level_id"] = "1"
        data['Event']["info"] = "HybridAnalysis"
        data['Event']["analysis"] = 0
        data['Event']["distribution"] = 3
        data['Event']["orgc"] = "Malware"
        data['Event']["Attribute"] = []
        if 'hosts' in x:
            data['Event']["Attribute"].append({'type': "ip-dst"})
            data['Event']["Attribute"][0]["category"] = "Network activity"
            data['Event']["Attribute"][0]["to-ids"] = True
            data['Event']["Attribute"][0]["distribution"] = "3"
            data["Event"]["Attribute"][0]["value"] =x['hosts'] 
        if 'md5' in x:
            data['Event']["Attribute"].append({'type': "md5"})
            data['Event']["Attribute"][1]["category"] = "Payload delivery"
            data['Event']["Attribute"][1]["to-ids"] = True
            data['Event']["Attribute"][1]["distribution"]  = "3"
            data['Event']["Attribute"][1]['value'] = x['md5']
        if 'domains' in x:
            data['Event']["Attribute"].append({'type': "domain"})
            data['Event']["Attribute"][2]["category"] = "Network activity"
            data['Event']["Attribute"][2]["to-ids"] = True
            data['Event']["Attribute"][2]["distribution"] = "3"
            data['Event']["Attribute"][2]["value"] = x['domains']
        attributes = data["Event"]["Attribute"]
        data["Event"]["Attribute"] = []
        for attribute in attributes:
            for value in attribute["value"]:
                    if value == " ":
                        pass
                    else:
                        new_attr = attribute.copy()
                        new_attr["value"] = value
                        data["Event"]["Attribute"].append(new_attr)
        pprint(data)

with open('output.txt', 'w') as outfile:
    json.dump(data, outfile)

现在它似乎被清理了一点,但数据[' md5']正在拆分每个字母,我认为它就像L3viathan先前说的那样我一直覆盖第一个字母字典中的元素...但我不知道如何让它继续追加???

{'Event': {'Attribute': [{'category': 'Network activity',
                          'distribution': '3',
                          'to-ids': True,
                          'type': 'ip-dst',
                          'value': u'216.115.96.174'},
                         {'category': 'Network activity',
                          'distribution': '3',
                          'to-ids': True,
                          'type': 'ip-dst',
                          'value': u'64.4.54.167'},
                         {'category': 'Network activity',
                          'distribution': '3',
                          'to-ids': True,
                          'type': 'ip-dst',
                          'value': u'63.250.200.37'},
                         {'category': 'Payload delivery',
                          'distribution': '3',
                          'to-ids': True,
                          'type': 'md5',
                          'value': u'7'},
                         {'category': 'Payload delivery',
                          'distribution': '3',
                          'to-ids': True,
                          'type': 'md5',
                          'value': u'1'},

最后仍然会收到以下错误: Traceback(最近一次调用最后一次):   File" hybridanalysis.py",第34行,in     数据['事件'] ["属性"] [1] ["类别"] ="有效负载交付" IndexError:列表索引超出范围

最终目标是设置它以便我可以将事件发布到MISP中,但他们必须一次一个。

3 个答案:

答案 0 :(得分:1)

在json," Attiribute"保存列表中包含1项dict的值,如下所示。

{'Event': {'Attribute': [{'category': 'Network activity',
                      'distribution': '3',
                      'to-ids': True,
                      'type': 'ip-dst',
                      'value': [u'54.94.221.70']}]
...

当您致电data['Event']["Attribute"][1]["category"]时,您正在获取属性列表中的第二个项目(索引1),而它只有一个项目,这就是您收到错误的原因。

答案 1 :(得分:1)

I think this should fix your problems. I added the attribute dictionary all in one go, and moved the data in a list (which is more appropriate), but you might want to remove the superfluous list which wraps the Events.

from datetime import datetime
import os
import json
from pprint import pprint

now = datetime.now()

testFile = open("feed.json")
feed = json.load(testFile)

data_list = []

for x in feed['data']:
    if x['threatscore'] > 90:
        data = {}
        data['Event']={}
        data['Event']["date"] = now.strftime("%Y-%m-%d")
        data['Event']["threat_level_id"] = "1"
        data['Event']["info"] = "HybridAnalysis"
        data['Event']["analysis"] = 0
        data['Event']["distribution"] = 3
        data['Event']["orgc"] = "Malware"
        data['Event']["Attribute"] = []
        if 'hosts' in x:
            data['Event']["Attribute"].append({
                'type': 'ip-dst',
                'category': 'Network activity',
                'to-ids': True,
                'distribution': '3',
                'value': x['hosts']})
        if 'md5' in x:
            data['Event']["Attribute"].append({
                'type': 'md5',
                'category': 'Payload delivery',
                'to-ids': True,
                'distribution': '3',
                'value': x['md5']})
        if 'domains' in x:
            data['Event']["Attribute"].append({
                'type': 'domain',
                'category': 'Network activity',
                'to-ids': True,
                'distribution': '3',
                'value': x['domains']})
        attributes = data["Event"]["Attribute"]
        data["Event"]["Attribute"] = []
        for attribute in attributes:
            for value in attribute["value"]:
                    if value == " ":
                        pass
                    else:
                        new_attr = attribute.copy()
                        new_attr["value"] = value
                        data["Event"]["Attribute"].append(new_attr)
        data_list.append(data)

with open('output.txt', 'w') as outfile:
    json.dump(data_list, outfile)

答案 2 :(得分:0)

谢谢L3viathan!下面是我如何调整它以不迭代MD5的。

    attributes = data["Event"]["Attribute"]
    data["Event"]["Attribute"] = []
    for attribute in attributes:
        if attribute['type'] == 'md5':
            new_attr = attribute.copy()                        
            new_attr["value"] = str(x['md5'])
            data["Event"]["Attribute"].append(new_attr)
        else:
            for value in attribute["value"]:
                new_attr = attribute.copy()                        
                new_attr["value"] = value
                data["Event"]["Attribute"].append(new_attr)
    data_list.append(data)

操纵json似乎是学习列表和词典的方法。