将深度嵌套的 JSON 响应转换为 Pandas 数据帧

时间:2021-03-20 01:57:48

标签: python json pandas dataframe json-rpc

我对数据科学有点陌生。我正在处理一个项目,我正在从返回以下 JSON 响应的 API 调用中收集数据:

{
  "jsonrpc": "2.0",
  "result": {
    "class": "dataset",
    "dimension": {
      "STATISTIC": {
        "category": {
          "index": ["DHA10C1"],
          "label": { "DHA10C1": "Births" },
          "unit": {
            "DHA10C1": { "decimals": 0, "label": "Number", "position": "end" }
          }
        },
        "label": "Statistic"
      },
      "TLIST(A1)": {
        "category": {
          "index": ["2015", "2016", "2017", "2018"],
          "label": {
            "2015": "2015",
            "2016": "2016",
            "2017": "2017",
            "2018": "2018"
          }
        },
        "label": "Year"
      },
      "C02842V03416": {
        "category": {
          "index": ["-"],
          "label": { "-": "All counties and regions" }
        },
        "label": "Area of Residence of Mother"
      },
      "C02025V02453": {
        "category": {
          "index": ["1", "2", "-"],
          "label": {
            "1": "Single - never married",
            "2": "Married",
            "-": "All marital status"
          }
        },
        "label": "Martial Status of Mother"
      },
      "C02199V02655": {
        "category": { "index": ["-"], "label": { "-": "Both sexes" } },
        "label": "Sex of Infant"
      },
      "C02076V02508": {
        "category": {
          "index": [
            "222",
            "365",
            "410",
            "440",
            "460",
            "475",
            "489",
            "999",
            "-",
            "X001"
          ],
          "label": {
            "222": "0 - 19 years",
            "365": "20 - 24 years",
            "410": "25 - 29 years",
            "440": "30 - 34 years",
            "460": "35 - 39 years",
            "475": "40 - 44 years",
            "489": "44 years and over",
            "999": "Age not stated",
            "-": "All ages",
            "X001": "Missing"
          }
        },
        "label": "Age Group of Mother"
      }
    },
    "extension": {
      "matrix": "DHA10",
      "reasons": [],
      "language": { "code": "en", "name": "English" },
      "contact": { "name": "", "email": "info@health.gov.ie", "phone": "" },
      "subject": { "code": 50, "value": "Department of Health" },
      "product": { "code": "DH", "value": "Health Statistics" },
      "official": true,
      "copyright": {
        "name": "Department of Health",
        "code": "DOH",
        "href": "https://www.gov.ie/en/organisation/department-of-health/"
      },
      "exceptional": false,
      "reservation": false,
      "archive": false,
      "experimental": false,
      "analytical": false
    },
    "href": "https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/DHA10/JSON-stat/2.0/en",
    "id": [
      "STATISTIC",
      "TLIST(A1)",
      "C02842V03416",
      "C02025V02453",
      "C02199V02655",
      "C02076V02508"
    ],
    "label": "Births",
    "link": {
      "alternate": [
        {
          "type": "text/csv",
          "href": "https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/DHA10/CSV/1.0/en"
        },
        {
          "type": "application/json",
          "href": "https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/DHA10/JSON-stat/1.0/en"
        },
        {
          "type": "application/octet-stream",
          "href": "https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/DHA10/PX/2013/en"
        },
        {
          "type": "application/base64",
          "href": "https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/DHA10/XLSX/2007/en"
        }
      ]
    },
    "note": [
      "Department of Health statistics hosted by the CSO. Any comments or queries can be sent to [url=mailto:info@health.gov.ie]info@health.gov.ie[/url]"
    ],
    "role": { "metric": ["STATISTIC"], "time": ["TLIST(A1)"] },
    "size": [1, 4, 1, 3, 1, 10],
    "updated": "2021-02-25T11:00:00.000Z",
    "value": [
      1199.0,
      5705.0,
      12322.0,
      23684.0,
      18451.0,
      3955.0,
      220.0,
      null,
      65536.0,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      1199.0,
      5705.0,
      12322.0,
      23684.0,
      18451.0,
      3955.0,
      220.0,
      null,
      65536.0,
      null,
      1101.0,
      5217.0,
      11357.0,
      23012.0,
      18775.0,
      4079.0,
      300.0,
      null,
      63841.0,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      1101.0,
      5217.0,
      11357.0,
      23012.0,
      18775.0,
      4079.0,
      300.0,
      null,
      63841.0,
      null,
      1037.0,
      5115.0,
      10779.0,
      21652.0,
      18943.0,
      3970.0,
      322.0,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      null,
      1037.0,
      5115.0,
      10779.0,
      21652.0,
      18943.0,
      3970.0,
      322.0,
      null,
      null,
      null,
      837.0,
      4088.0,
      6247.0,
      6351.0,
      4250.0,
      1190.0,
      86.0,
      null,
      null,
      null,
      140.0,
      966.0,
      4175.0,
      14574.0,
      14680.0,
      3148.0,
      241.0,
      null,
      null,
      null,
      977.0,
      5054.0,
      10422.0,
      20925.0,
      18930.0,
      4338.0,
      327.0,
      null,
      null,
      null
    ],
    "version": "2.0"
  },
  "id": null
}

当我试图用上述响应形成一个 Pandas 数据帧时,问题就出现了,因为 JSON 响应是深度嵌套的。我尝试了以下代码片段

df = pd.json_normalize(data['result'])

它返回以下 result

我只希望数据框看起来像 this

感谢任何帮助。

1 个答案:

答案 0 :(得分:0)

像您一样规范化 json 之后,您或许可以在每一列上使用“explode”(将类似列表的每个元素转换为一行):

df.explode('size').reset_index(drop=True)