如何使用pandas将嵌套的JSON文件转换为CSV

时间:2018-04-04 07:47:21

标签: python json pandas csv

我正在使用的代码如下 截图是我目前得到的输出。我也希望展开CVE_Items列。

import pandas as pd
    df = pd.read_json('nvdcve-1.0-modified.json')
    df.to_csv("test.csv")

JSON的格式。这是来自NVD网站的NVD数据。

{
  "CVE_data_type" : "CVE",
  "CVE_data_format" : "MITRE",
  "CVE_data_version" : "4.0",
  "CVE_data_numberOfCVEs" : "1000",
  "CVE_data_timestamp" : "2018-04-04T00:00Z",
  "CVE_Items" : [ {
    "cve" : {
      "data_type" : "CVE",
      "data_format" : "MITRE",
      "data_version" : "4.0",
      "CVE_data_meta" : {
        "ID" : "CVE-2001-1594",
        "ASSIGNER" : "cve@mitre.org"
      },
      "affects" : {
        "vendor" : {
          "vendor_data" : [ {
            "vendor_name" : "gehealthcare",
            "product" : {
              "product_data" : [ {
                "product_name" : "entegra_p&r",
                "version" : {
                  "version_data" : [ {
                    "version_value" : "*"
                  } ]
                }
              } ]
            }
          } ]
        }
      },
      "problemtype" : {
        "problemtype_data" : [ {
          "description" : [ {
            "lang" : "en",
            "value" : "CWE-255"
          } ]
        } ]
      },
      "references" : {
        "reference_data" : [ {
          "url" : "http://apps.gehealthcare.com/servlet/ClientServlet/2263784.pdf?DOCCLASS=A&REQ=RAC&DIRECTION=2263784-100&FILENAME=2263784.pdf&FILEREV=5&DOCREV_ORG=5&SUBMIT=+ACCEPT+"
        }, {
          "url" : "http://www.forbes.com/sites/thomasbrewster/2015/07/10/vulnerable-breasts/"
        }, {
          "url" : "https://ics-cert.us-cert.gov/advisories/ICSMA-18-037-02"
        }, {
          "url" : "https://twitter.com/digitalbond/status/619250429751222277"
        } ]
      },
      "description" : {
        "description_data" : [ {
          "lang" : "en",
          "value" : "GE Healthcare eNTEGRA P&R has a password of (1) entegra for the entegra user, (2) passme for the super user of the Polestar/Polestar-i Starlink 4 upgrade, (3) 0 for the entegra user of the Codonics printer FTP service, (4) eNTEGRA for the eNTEGRA P&R user account, (5) insite for the WinVNC Login, and possibly other accounts, which has unspecified impact and attack vectors.  NOTE: it is not clear whether this password is default, hardcoded, or dependent on another system or product that requires a fixed value."
        } ]
      }
    },
    "configurations" : {
      "CVE_data_version" : "4.0",
      "nodes" : [ {
        "operator" : "OR",
        "cpe" : [ {
          "vulnerable" : true,
          "cpe22Uri" : "cpe:/a:gehealthcare:entegra_p%26r",
          "cpe23Uri" : "cpe:2.3:a:gehealthcare:entegra_p\\&r:*:*:*:*:*:*:*:*"
        } ]
      } ]
    },
    "impact" : {
      "baseMetricV2" : {
        "cvssV2" : {
          "version" : "2.0",
          "vectorString" : "(AV:N/AC:L/Au:N/C:C/I:C/A:C)",
          "accessVector" : "NETWORK",
          "accessComplexity" : "LOW",
          "authentication" : "NONE",
          "confidentialityImpact" : "COMPLETE",
          "integrityImpact" : "COMPLETE",
          "availabilityImpact" : "COMPLETE",
          "baseScore" : 10.0
        },
        "severity" : "HIGH",
        "exploitabilityScore" : 10.0,
        "impactScore" : 10.0,
        "obtainAllPrivilege" : false,
        "obtainUserPrivilege" : false,
        "obtainOtherPrivilege" : false,
        "userInteractionRequired" : false
      }
    },
    "publishedDate" : "2015-08-04T14:59Z",
    "lastModifiedDate" : "2018-03-28T01:29Z"
  }, {
    "cve" : {
      "data_type" : "CVE",
      "data_format" : "MITRE",
      "data_version" : "4.0",
      "CVE_data_meta" : {
        "ID" : "CVE-2002-2446",
        "ASSIGNER" : "cve@mitre.org"
      },
      "affects" : {
        "vendor" : {
          "vendor_data" : [ {
            "vendor_name" : "gehealthcare",
            "product" : {
              "product_data" : [ {
                "product_name" : "millennium_mg_firmware",
                "version" : {
                  "version_data" : [ {
                    "version_value" : "-"
                  } ]
                }
              }

所以我想将嵌套的JSON转换为CSV,即使嵌套列也是CSV列的一部分而不是JSON。

脚本输出:

enter image description here

1 个答案:

答案 0 :(得分:0)

您应该使用json_normalize

import pandas as pd
from pandas.io.json import json_normalize
import json

with open('nvdcve-1.0-modified.json') as file:
    data = json.load(file)

df = json_normalize(data, 'CVE_Items', ['CVE_data_type', 'CVE_data_format', 'CVE_data_version', 'CVE_data_numberOfCVEs', 'CVE_data_timestamp'])

这将使CVE_Items的第一级嵌套变平。您可能需要重复此过程以平滑部分或全部结果列,具体取决于您希望最终输出的方式。

有关详细信息,请参阅文档:http://pandas.pydata.org/pandas-docs/version/0.17.0/generated/pandas.io.json.json_normalize.html

此博客文章也可能有所帮助: https://mindtrove.info/flatten-nested-json-with-pandas