将大型格式错误的.json文件转换为csv

时间:2014-12-30 19:40:39

标签: python json csv flatten

我对python和.json文件的经验很少。我想将我从其他人那里收到的大型.json文件转换为.csv文件,以便在excel中使用。

文件格式如下:

{
"bedrock": {
    "basinAge": "",
    "basinName": "",
    "basinSetting": "",
    "basinSource": "",
    "basinType": "",
    "division": "ATLANTICPLAIN",
    "primary": "ATLANTICPLAIN",
    "province": "COASTALPLAIN",
    "section": "MISSISSIPPIALLUVIALPLAIN"
},
"country": "US",
"county": "ButlerCounty",
"crow": {
    "PERIOD_RAN": "",
    "SITE_PERIO": "",
    "SURFACE_EL": "",
    "VS30_RANGE": "",
    "ZDRIFT": "",
    "ZONE": "",
    "ZPLEIS": "",
    "Zhol": "",
    "condition": "",
    "firmThickness": "",
    "geobed": "",
    "geodes": "",
    "geophone": "",
    "meas_type": "",
    "resonance": "",
    "sitelocation": "",
    "sitenumber": "",
    "sitevs30": "",
    "slope": "",
    "slopevel": "",
    "soilThickness": "",
    "veltofirm": "",
    "vs30": ""
},
"embaymentDepth": 27.176477284750096,
"file": "../../data\\anderson\\anderson-et-al-2003-MoDOT.json",
"geologicClass": "YNa",
"geology": "al",
"geologySource": "fullerton",
"lat": 36.790518,
"latlon": [
    [
        "36.7905",
        "-90.2025"
    ],
    "232.0000",
    0.00172447,
    "stable"
],
"location": "BridgeA-3709",
"lon": -90.202518,
"profile": {
    "entry": {
        "0": [
            0,
            146.185,
            "Empty"
        ],
        "1": [
            2.91874,
            194.378,
            "Empty"
        ],
        "2": [
            4.11277,
            228.112,
            "Empty"
        ],
        "3": [
            6.10282,
            221.687,
            "Empty"
        ],
        "4": [
            7.9602,
            221.687,
            "Empty"
        ],
        "5": [
            8.09287,
            220.08,
            "Empty"
        ],
        "6": [
            8.09287,
            216.867,
            "Empty"
        ],
        "7": [
            14.063,
            260.241,
            "Empty"
        ],
        "8": [
            18.0431,
            279.518,
            "Empty"
        ],
        "9": [
            22.1559,
            282.731,
            "Empty"
        ],
        "10": [
            26.0033,
            281.124,
            "Empty"
        ],
        "11": [
            29.9834,
            276.305,
            "Empty"
        ],
        "12": [
            36.0862,
            293.976,
            "Empty"
        ],
        "13": [
            41.9237,
            435.341,
            "Empty"
        ],
        "14": [
            48.0265,
            557.43,
            "Empty"
        ],
        "15": [
            54.1294,
            640.964,
            "Empty"
        ],
        "16": [
            59.8342,
            726.104,
            "Empty"
        ],
        "17": [
            68.1924,
            "Empty",
            "Empty"
        ]
    },
    "units": [
        "m",
        "m/s",
        "m/s"
    ]
},
"sedEnd": "",
"sedStack": "",
"sedStart": "",
"sedSubsurface": "",
"sedSurficial": "",
"sedVaneer": "",
"site": "SASW",
"state": "MO",
"terrain": "16",
"terrainvel": "246",
"vs30": {
    "profileListed": {
        "units": "",
        "value": "None"
    },
    "siteListed": {},
    "stationlisted": {
        "method": "",
        "units": "",
        "value": ""
    },
    "units": "m/s",
    "value": 232.2477304197259,
    "wald": "",
    "yong": ""
},
"vsz": [
    146.185,
    146.185,
    147.1733748014587,
    156.68616932663514,
    166.6758658515508,
    174.5091277144315,
    180.04135355419726,
    184.37079145300547,
    187.52878874899267,
    190.10050728694824,
    192.25770054815115,
    194.09311720243602,
    195.6737567374426,
    197.04922523230243,
    200.16214171750644,
    203.0924940564577,
    205.75028418598887,
    208.17185007705515,
    210.97976183739868,
    213.5984956154859,
    216.02447901610586,
    218.27823757348278,
    220.44997122220872,
    222.4921122273311,
    224.40458487503645,
    226.19935932262254,
    227.84822291575128,
    229.40085613024158,
    230.86555428020594,
    232.2477304197259,
    265.0897348970574,
    "",
    "",
    "",
    "",
    "",
    ""
]
}

上面有1000个条目,每个条目都有相同的键。在网上做了一些研究之后,我很确定我需要压缩条目,但我不确定如何以编程方式进行。某些分类指标在其后面有一系列键('基岩',' crow'等),必要时可以丢弃。

1 个答案:

答案 0 :(得分:0)

绝对第一步是使用JSON解析器解析文件。然后编写查看结果字典的代码并提取数据。

我不确定你的意思"格式错误&#34 ;;它看起来像有效的JSON。如果您在使用Python的json模块解析它时遇到问题,可以尝试使用Python的yaml模块进行处理。 YAML是JSON的超集,但是对于非必需的逗号这样的小格式化内容更加宽容。

http://pymotw.com/2/json/

https://pypi.python.org/pypi/PyYAML