我正在尝试解析一个json文件,该文件包含一个数据框的多个嵌套列。源JSON文件位于此处:https://nvd.nist.gov/vuln/data-feeds#JSON_FEED
我正在使用带有Python的Jupyter笔记本,并且能够展平第一层嵌套,但是我无法展平其他嵌套列。
这是我的代码:
import pandas as pd
from pandas.io.json import json_normalize
df_cve2019 = pd.read_json('https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2019.json.zip', compression='zip')
df_cve2019.head(2)
CVE_data_type CVE_data_format CVE_data_version CVE_data_numberOfCVEs CVE_data_timestamp CVE_Items
CVE MITRE 4 9826 2019-10-17T07:00Z {'cve': {'data_type': 'CVE', 'data_format': 'MITRE', 'data_version': '4.0', 'CVE_data_meta': {'ID': 'CVE-2019-0001', 'ASSIGNER': 'cve@mitre.org'}, 'problemtype': {'problemtype_data': [{'description': [{'lang': 'en', 'value': 'CWE-400'}]}]}, 'references': {'reference_data': [{'url': 'http://www.securityfocus.com/bid/106541', 'name': '106541', 'refsource': 'BID', 'tags': ['Third Party Advisory', 'VDB Entry']}, {'url': 'https://kb.juniper.net/JSA10900', 'name': 'https://kb.juniper.net/JSA10900', 'refsource': 'CONFIRM', 'tags': ['Vendor Advisory']}]}, 'description': {'description_data': [{'lang': 'en', 'value': 'Receipt of a malformed packet on MX Series devices with dynamic vlan configuration can trigger an uncontrolled recursion loop in the Broadband Edge subscriber management daemon (bbe-smgd), and lead to high CPU usage and a crash of the bbe-smgd service. Repeated receipt of the same packet can result in an extended denial of service condition for the device. Affected releases are Juniper Networks Junos OS: 16.1 versions prior to 16.1R7-S1; 16.2 versions prior to 16.2R2-S7; 17.1 versions prior to 17.1R2-S10, 17.1R3; 17.2 versions prior to 17.2R3; 17.3 versions prior to 17.3R3-S1; 17.4 versions prior to 17.4R2; 18.1 versions prior to 18.1R3; 18.2 versions prior to 18.2R2.'}]}}, 'configurations': {'CVE_data_version': '4.0', 'nodes': [{'operator': 'OR', 'cpe_match': [{'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:16.1:*:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:16.1:r1:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:16.1:r2:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:16.1:r3:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:16.1:r3-s10:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:16.1:r4:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:16.1:r5:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:16.1:r6:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:16.1:r6-s6:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:16.1:r7:*:*:*:*:*:*'}]}, {'operator': 'OR', 'cpe_match': [{'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:16.2:*:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:16.2:r1:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:16.2:r2:*:*:*:*:*:*'}]}, {'operator': 'OR', 'cpe_match': [{'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:17.1:*:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:17.1:r1:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:17.1:r2:*:*:*:*:*:*'}]}, {'operator': 'OR', 'cpe_match': [{'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:17.2:*:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:17.2:r1:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:17.2:r1-s7:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:17.2:r2:*:*:*:*:*:*'}]}, {'operator': 'OR', 'cpe_match': [{'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:17.3:*:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:17.3:r1:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:17.3:r2:*:*:*:*:*:*'}]}, {'operator': 'OR', 'cpe_match': [{'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:17.4:*:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:17.4:r1:*:*:*:*:*:*'}]}, {'operator': 'OR', 'cpe_match': [{'vulnerable': True, 'cpe23Uri': 'cpe:2.3:a:juniper:junos:18.1:*:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:a:juniper:junos:18.1:r1:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:a:juniper:junos:18.1:r2:*:*:*:*:*:*'}]}, {'operator': 'OR', 'cpe_match': [{'vulnerable': True, 'cpe23Uri': 'cpe:2.3:a:juniper:junos:18.2:*:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:a:juniper:junos:18.2:r1-s3:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:a:juniper:junos:18.2:r1-s4:*:*:*:*:*:*'}]}]}, 'impact': {'baseMetricV3': {'cvssV3': {'version': '3.0', 'vectorString': 'CVSS:3.0/AV:N/AC:H/PR:N/UI:N/S:U/C:N/I:N/A:H', 'attackVector': 'NETWORK', 'attackComplexity': 'HIGH', 'privilegesRequired': 'NONE', 'userInteraction': 'NONE', 'scope': 'UNCHANGED', 'confidentialityImpact': 'NONE', 'integrityImpact': 'NONE', 'availabilityImpact': 'HIGH', 'baseScore': 5.9, 'baseSeverity': 'MEDIUM'}, 'exploitabilityScore': 2.2, 'impactScore': 3.6}, 'baseMetricV2': {'cvssV2': {'version': '2.0', 'vectorString': 'AV:N/AC:M/Au:N/C:N/I:N/A:C', 'accessVector': 'NETWORK', 'accessComplexity': 'MEDIUM', 'authentication': 'NONE', 'confidentialityImpact': 'NONE', 'integrityImpact': 'NONE', 'availabilityImpact': 'COMPLETE', 'baseScore': 7.1}, 'severity': 'HIGH', 'exploitabilityScore': 8.6, 'impactScore': 6.9, 'acInsufInfo': False, 'obtainAllPrivilege': False, 'obtainUserPrivilege': False, 'obtainOtherPrivilege': False, 'userInteractionRequired': False}}, 'publishedDate': '2019-01-15T21:29Z', 'lastModifiedDate': '2019-10-09T23:43Z'}
CVE MITRE 4 9826 2019-10-17T07:00Z {'cve': {'data_type': 'CVE', 'data_format': 'MITRE', 'data_version': '4.0', 'CVE_data_meta': {'ID': 'CVE-2019-0002', 'ASSIGNER': 'cve@mitre.org'}, 'problemtype': {'problemtype_data': [{'description': [{'lang': 'en', 'value': 'CWE-20'}]}]}, 'references': {'reference_data': [{'url': 'http://www.securityfocus.com/bid/106669', 'name': '106669', 'refsource': 'BID', 'tags': ['Third Party Advisory']}, {'url': 'https://kb.juniper.net/JSA10901', 'name': 'https://kb.juniper.net/JSA10901', 'refsource': 'CONFIRM', 'tags': ['Vendor Advisory']}, {'url': 'https://www.juniper.net/documentation/en_US/junos/topics/reference/command-summary/show-pfe-filter.html', 'name': 'https://www.juniper.net/documentation/en_US/junos/topics/reference/command-summary/show-pfe-filter.html', 'refsource': 'MISC', 'tags': ['Vendor Advisory']}]}, 'description': {'description_data': [{'lang': 'en', 'value': "On EX2300 and EX3400 series, stateless firewall filter configuration that uses the action 'policer' in combination with other actions might not take effect. When this issue occurs, the output of the command: show pfe filter hw summary will not show the entry for: RACL group Affected releases are Junos OS on EX2300 and EX3400 series: 15.1X53 versions prior to 15.1X53-D590; 18.1 versions prior to 18.1R3; 18.2 versions prior to 18.2R2. This issue affect both IPv4 and IPv6 firewall filter."}]}}, 'configurations': {'CVE_data_version': '4.0', 'nodes': [{'operator': 'OR', 'cpe_match': [{'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:15.1x53:d50:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:15.1x53:d51:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:15.1x53:d52:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:15.1x53:d55:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:15.1x53:d57:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:15.1x53:d58:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:o:juniper:junos:15.1x53:d59:*:*:*:*:*:*'}]}, {'operator': 'OR', 'cpe_match': [{'vulnerable': True, 'cpe23Uri': 'cpe:2.3:a:juniper:junos:18.1:*:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:a:juniper:junos:18.1:r1:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:a:juniper:junos:18.1:r2:*:*:*:*:*:*'}]}, {'operator': 'OR', 'cpe_match': [{'vulnerable': True, 'cpe23Uri': 'cpe:2.3:a:juniper:junos:18.2:*:*:*:*:*:*:*'}, {'vulnerable': True, 'cpe23Uri': 'cpe:2.3:a:juniper:junos:18.2:r1:*:*:*:*:*:*'}]}]}, 'impact': {'baseMetricV3': {'cvssV3': {'version': '3.0', 'vectorString': 'CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H', 'attackVector': 'NETWORK', 'attackComplexity': 'LOW', 'privilegesRequired': 'NONE', 'userInteraction': 'NONE', 'scope': 'UNCHANGED', 'confidentialityImpact': 'HIGH', 'integrityImpact': 'HIGH', 'availabilityImpact': 'HIGH', 'baseScore': 9.8, 'baseSeverity': 'CRITICAL'}, 'exploitabilityScore': 3.9, 'impactScore': 5.9}, 'baseMetricV2': {'cvssV2': {'version': '2.0', 'vectorString': 'AV:N/AC:L/Au:N/C:P/I:P/A:P', 'accessVector': 'NETWORK', 'accessComplexity': 'LOW', 'authentication': 'NONE', 'confidentialityImpact': 'PARTIAL', 'integrityImpact': 'PARTIAL', 'availabilityImpact': 'PARTIAL', 'baseScore': 7.5}, 'severity': 'HIGH', 'exploitabilityScore': 10.0, 'impactScore': 6.4, 'acInsufInfo': False, 'obtainAllPrivilege': False, 'obtainUserPrivilege': False, 'obtainOtherPrivilege': False, 'userInteractionRequired': False}}, 'publishedDate': '2019-01-15T21:29Z', 'lastModifiedDate': '2019-10-09T23:43Z'}
然后我通过
对CVE_Items列进行归一化df_CVE_Items_2019 = json_normalize(df_cve2019['CVE_Items'])
df_CVE_Items_2019.head(2)
,我收到以下输出:
现在,我想展平诸如“ configuration.nodes”和“ cve.references.reference_data”之类的列,并使用cve.CVE_data_meta.ID作为键索引为它们中的每一个创建一个单独的数据框。例如,对于配置,我想要一个大致如下所示的df:
cve.CVE_data_meta.ID configurations.nodes.cpe_match.cpe23.uri ....
CVE-2019-00001 "cpe: 1..." ....
CVE-2019-00001 "cpe: 2..." ....
...
CVE-2019-00002 "cpe: 1..." ....
... ... ....
有人可以帮忙吗?