我有一个解析JSON文件的Python脚本,如下所示:
[
{
"_index": "bulletins",
"_type": "bulletin",
"_id": "OPENWRT-SA-000001",
"_score": null,
"_source": {
"lastseen": "2016-09-26T15:45:23",
"references": [
"http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2015-3193",
],
"affectedPackage": [
{
"OS": "OpenWrt",
"OSVersion": "15.05",
"packageVersion": "9.9.8-P3-1",
"packageFilename": "UNKNOWN",
"arch": "all",
"packageName": "bind",
"operator": "lt"
}
],
"edition": 1,
"description": "value in here,
"reporter": "OpenWrt Project",
"published": "2016-01-24T13:33:41",
"title": "bind: Security update (4 CVEs)",
"type": "openwrt",
"bulletinFamily": "unix",
"cvelist": [
"CVE-2015-8704",
],
"modified": "2016-01-24T13:33:41",
"id": "OPENWRT-SA-000001",
"href": "https://lists.openwrt.org/pipermail/openwrt-security-announce/2016-January/000001.html",
"cvss": {
"score": 7.1,
"vector": "AV:NETWORK/AC:MEDIUM/Au:NONE/C:NONE/I:NONE/A:COMPLETE/"
}
},
"sort": [
34872
]
},
我删除了一些值以保持帖子更短,但留下一些以试图保持结构。
我想从_source
键中取出所有子键,然后将它们移至与_source
相同的级别,然后删除_source
键。
我解析JSON的代码是:
import json
import logging
import logging.handlers
import os
import pymongo
from pymongo import MongoClient
def import_json(mongo_server,mongo_port, vuln_folder):
try:
logging.info('Connecting to MongoDB')
client = MongoClient(mongo_server, mongo_port)
db = client['vuln_sets']
coll = db['vulnerabilities']
logging.info('Connected to MongoDB')
basepath = os.path.dirname(__file__)
filepath = os.path.abspath(os.path.join(basepath, ".."))
archive_filepath = filepath + vuln_folder
filedir = os.chdir(archive_filepath)
file_count = 0
for item in os.listdir(filedir):
if item.endswith('.json'):
file_name = os.path.abspath(item)
with open(item, 'r') as currentfile:
vuln_counter = 0
duplicate_count = 0
logging.info('Currently processing ' + item)
file_count +=1
json_data = currentfile.read()
vuln_content = json.loads(json_data)
for vuln in vuln_content:
try:
del vuln['_type']
coll.insert(vuln, continue_on_error=True)
vuln_counter +=1
except pymongo.errors.DuplicateKeyError:
duplicate_count +=1
logging.info('Added ' + str(vuln_counter) + ' vulnerabilities for ' + item)
logging.info('Found ' + str(duplicate_count) + ' duplicate records!')
os.remove(file_name)
logging.info('Processed ' + str(file_count) + ' files')
except Exception as e:
logging.exception(e)
您可以看到已经删除了一个不需要的密钥,但该密钥没有所需的数据,因为我需要来自_source的子密钥。我不确定实现此目的的最佳方法,是否在程序上正确地重新创建具有新信息的JSON文件但是我需要保持键和结构的顺序除了移动子键之外水平。
答案 0 :(得分:1)
您可以使用字典update()
功能来实现您尝试做的事情,但重要的是要注意字典不具有"顺序钥匙" - 见:Key Order in Python Dictionaries。
以下是一种方法的示例,从字典定义开始。
d = {
"_index": "bulletins",
"_type": "bulletin",
"_id": "OPENWRT-SA-000001",
"_score": None,
"_source": {
"lastseen": "2016-09-26T15:45:23",
"references": [
"http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2015-3193",
],
"affectedPackage": [
{
"OS": "OpenWrt",
"OSVersion": "15.05",
"packageVersion": "9.9.8-P3-1",
"packageFilename": "UNKNOWN",
"arch": "all",
"packageName": "bind",
"operator": "lt"
}
],
"edition": 1,
"description": "value in here",
"reporter": "OpenWrt Project",
"published": "2016-01-24T13:33:41",
"title": "bind: Security update (4 CVEs)",
"type": "openwrt",
"bulletinFamily": "unix",
"cvelist": [
"CVE-2015-8704",
],
"modified": "2016-01-24T13:33:41",
"id": "OPENWRT-SA-000001",
"href": "https://lists.openwrt.org/pipermail/openwrt-security-announce/2016-January/000001.html",
"cvss": {
"score": 7.1,
"vector": "AV:NETWORK/AC:MEDIUM/Au:NONE/C:NONE/I:NONE/A:COMPLETE/"
}
}
}
# create a new dictionary with everything except the key "_source"
new_d = {key: d[key] for key in d if key != '_source'}
# add the keys/values from "_source" to new dictionary
new_d.update(d['_source']) # This will overwriting any existing keys
new_d的输出:
{'_id': 'OPENWRT-SA-000001',
'_index': 'bulletins',
'_score': None,
'_type': 'bulletin',
'affectedPackage': [{'OS': 'OpenWrt',
'OSVersion': '15.05',
'arch': 'all',
'operator': 'lt',
'packageFilename': 'UNKNOWN',
'packageName': 'bind',
'packageVersion': '9.9.8-P3-1'}],
'bulletinFamily': 'unix',
'cvelist': ['CVE-2015-8704'],
'cvss': {
'score': 7.1,
'vector': 'AV:NETWORK/AC:MEDIUM/Au:NONE/C:NONE/I:NONE/A:COMPLETE/'},
'description': 'value in here',
'edition': 1,
'href': 'https://lists.openwrt.org/pipermail/openwrt-security-announce/2016-January/000001.html',
'id': 'OPENWRT-SA-000001',
'lastseen': '2016-09-26T15:45:23',
'modified': '2016-01-24T13:33:41',
'published': '2016-01-24T13:33:41',
'references': ['http://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2015-
3193'],
'reporter': 'OpenWrt Project',
'title': 'bind: Security update (4 CVEs)',
'type': 'openwrt'}
答案 1 :(得分:0)
我设法通过使用以下代码使其工作:
for vuln in vuln_content:
try:
del vuln['_type']
new_vuln = {key: vuln[key] for key in vuln if key != '_source'}
new_vuln.update(vuln['_source'])
coll.insert(new_vuln, continue_on_error=True)
vuln_counter +=1
except pymongo.errors.DuplicateKeyError:
duplicate_count +=1