Question

我对python非常陌生，想使用python将嵌套的JSON文件转换为CSV表。但是我遇到了一个问题，在将数据导出到CSV文件后，我所有的值都在一行中有很多列，但是我希望它具有多行。

以下是我的示例数据：

[
    {
        "by_app": [
            {
                "app": 5,
                "cat": 3,
                "clients": [
                    {
                        "mac": "ec:1f:72:fa:75:77",
                        "rx_bytes": 372,
                        "rx_packets": 3,
                        "tx_bytes": 1361,
                        "tx_packets": 4
                    },
                    {
                        "mac": "f0:9f:c2:6c:5b:d0",
                        "rx_bytes": 56896,
                        "rx_packets": 191,
                        "tx_bytes": 210622,
                        "tx_packets": 460
                    }
                ],
                "known_clients": 2,
                "rx_bytes": 60837,
                "rx_packets": 203,
                "tx_bytes": 213435,
                "tx_packets": 475
            },
            {
                "app": 94,
                "cat": 19,
                "clients": [
                    {
                        "mac": "30:07:4d:38:ae:e2",
                        "rx_bytes": 64654,
                        "rx_packets": 147,
                        "tx_bytes": 19533,
                        "tx_packets": 138
                    },
                    {
                        "mac": "ec:1f:72:fa:75:77",
                        "rx_bytes": 42416,
                        "rx_packets": 68,
                        "tx_bytes": 12419,
                        "tx_packets": 74
                    }
                ],
                "known_clients": 2,
                "rx_bytes": 5421117,
                "rx_packets": 4779,
                "tx_bytes": 243979,
                "tx_packets": 2377
            },
            {
                "app": 162,
                "cat": 20,
                "rx_bytes": 3295298,
                "rx_packets": 2935,
                "tx_bytes": 171266,
                "tx_packets": 2032
            },
            {
                "app": 209,
                "cat": 13,
                "rx_bytes": 21763,
                "rx_packets": 38,
                "tx_bytes": 4433,
                "tx_packets": 30
            },
            {
                "app": 222,
                "cat": 13,
                "clients": [
                    {
                        "mac": "30:07:4d:38:ae:e2",
                        "rx_bytes": 300,
                        "rx_packets": 3,
                        "tx_bytes": 503,
                        "tx_packets": 4
                    },
                    {
                        "mac": "ec:1f:72:fa:75:77",
                        "rx_bytes": 3452,
                        "rx_packets": 24,
                        "tx_bytes": 4176,
                        "tx_packets": 26
                    },
                    {
                        "mac": "f0:9f:c2:6c:5b:d0",
                        "rx_bytes": 0,
                        "rx_packets": 0,
                        "tx_bytes": 396,
                        "tx_packets": 6
                    }
                ],
                "known_clients": 3,
                "rx_bytes": 4742,
                "rx_packets": 32,
                "tx_bytes": 5787,
                "tx_packets": 42
            },
            {
                "app": 167,
                "cat": 20,
                "clients": [
                    {
                        "mac": "ec:1f:72:fa:75:77",
                        "rx_bytes": 5761,
                        "rx_packets": 14,
                        "tx_bytes": 1574,
                        "tx_packets": 13
                    }
                ],
                "known_clients": 1,
                "rx_bytes": 138686,
                "rx_packets": 237,
                "tx_bytes": 31821,
                "tx_packets": 155
            },
            {
                "app": 112,
                "cat": 4,
                "clients": [
                    {
                        "mac": "ec:1f:72:fa:75:77",
                        "rx_bytes": 135381,
                        "rx_packets": 161,
                        "tx_bytes": 42596,
                        "tx_packets": 140
                    }
                ],
                "known_clients": 1,
                "rx_bytes": 135381,
                "rx_packets": 161,
                "tx_bytes": 42596,
                "tx_packets": 140
            },
            {
                "app": 62,
                "cat": 8,
                "rx_bytes": 7219,
                "rx_packets": 10,
                "tx_bytes": 1153,
                "tx_packets": 9
            },
            {
                "app": 185,
                "cat": 20,
                "rx_bytes": 4733026,
                "rx_packets": 4666,
                "tx_bytes": 728026,
                "tx_packets": 2688
            },
            {
                "app": 130,
                "cat": 4,
                "clients": [
                    {
                        "mac": "30:07:4d:38:ae:e2",
                        "rx_bytes": 113871,
                        "rx_packets": 121,
                        "tx_bytes": 16442,
                        "tx_packets": 116
                    }
                ],
                "known_clients": 1,
                "rx_bytes": 113871,
                "rx_packets": 121,
                "tx_bytes": 16442,
                "tx_packets": 116
            },
            {
                "app": 65535,
                "cat": 255,
                "clients": [
                    {
                        "mac": "30:07:4d:38:ae:e2",
                        "rx_bytes": 8195,
                        "rx_packets": 27,
                        "tx_bytes": 3834,
                        "tx_packets": 25
                    },
                    {
                        "mac": "ec:1f:72:fa:75:77",
                        "rx_bytes": 27338,
                        "rx_packets": 93,
                        "tx_bytes": 11330,
                        "tx_packets": 86
                    },
                    {
                        "mac": "f0:9f:c2:6c:5b:d0",
                        "rx_bytes": 19974,
                        "rx_packets": 181,
                        "tx_bytes": 2447,
                        "tx_packets": 34
                    },
                    {
                        "mac": "f0:9f:c2:c6:63:5a",
                        "rx_bytes": 90,
                        "rx_packets": 1,
                        "tx_bytes": 0,
                        "tx_packets": 0
                    }
                ],
                "known_clients": 4,
                "rx_bytes": 6417768,
                "rx_packets": 10254,
                "tx_bytes": 1193280,
                "tx_packets": 7326
            },
            {
                "app": 190,
                "cat": 13,
                "clients": [
                    {
                        "mac": "ec:1f:72:fa:75:77",
                        "rx_bytes": 25041,
                        "rx_packets": 34,
                        "tx_bytes": 32420,
                        "tx_packets": 49
                    }
                ],
                "known_clients": 1,
                "rx_bytes": 25041,
                "rx_packets": 34,
                "tx_bytes": 32420,
                "tx_packets": 49
            },
            {
                "app": 106,
                "cat": 18,
                "clients": [
                    {
                        "mac": "f0:9f:c2:6c:5b:d0",
                        "rx_bytes": 360,
                        "rx_packets": 4,
                        "tx_bytes": 360,
                        "tx_packets": 4
                    },
                    {
                        "mac": "f0:9f:c2:c6:63:5a",
                        "rx_bytes": 90,
                        "rx_packets": 1,
                        "tx_bytes": 180,
                        "tx_packets": 2
                    }
                ],
                "known_clients": 2,
                "rx_bytes": 450,
                "rx_packets": 5,
                "tx_bytes": 540,
                "tx_packets": 6
            },
            {
                "app": 1,
                "cat": 6,
                "rx_bytes": 23370,
                "rx_packets": 35,
                "tx_bytes": 4388,
                "tx_packets": 26
            },
            {
                "app": 61,
                "cat": 9,
                "rx_bytes": 4825,
                "rx_packets": 24,
                "tx_bytes": 2040,
                "tx_packets": 24
            },
            {
                "app": 32,
                "cat": 17,
                "rx_bytes": 27068,
                "rx_packets": 42,
                "tx_bytes": 6002,
                "tx_packets": 27
            },
            {
                "app": 3,
                "cat": 24,
                "clients": [
                    {
                        "mac": "30:07:4d:38:ae:e2",
                        "rx_bytes": 3791,
                        "rx_packets": 8,
                        "tx_bytes": 1258,
                        "tx_packets": 9
                    },
                    {
                        "mac": "ec:1f:72:fa:75:77",
                        "rx_bytes": 25745,
                        "rx_packets": 109,
                        "tx_bytes": 21603,
                        "tx_packets": 104
                    }
                ],
                "known_clients": 2,
                "rx_bytes": 29536,
                "rx_packets": 117,
                "tx_bytes": 22861,
                "tx_packets": 113
            },
            {
                "app": 63,
                "cat": 18,
                "rx_bytes": 0,
                "rx_packets": 0,
                "tx_bytes": 114,
                "tx_packets": 2
            },
            {
                "app": 21,
                "cat": 3,
                "rx_bytes": 41992,
                "rx_packets": 53,
                "tx_bytes": 9788,
                "tx_packets": 34
            },
            {
                "app": 21,
                "cat": 14,
                "rx_bytes": 31920,
                "rx_packets": 114,
                "tx_bytes": 19203,
                "tx_packets": 82
            }
        ],
        "by_cat": [
            {
                "apps": [
                    5,
                    21
                ],
                "cat": 3,
                "rx_bytes": 102829,
                "rx_packets": 256,
                "tx_bytes": 223223,
                "tx_packets": 509
            },
            {
                "apps": [
                    112,
                    130
                ],
                "cat": 4,
                "rx_bytes": 249252,
                "rx_packets": 282,
                "tx_bytes": 59038,
                "tx_packets": 256
            },
            {
                "apps": [
                    1
                ],
                "cat": 6,
                "rx_bytes": 23370,
                "rx_packets": 35,
                "tx_bytes": 4388,
                "tx_packets": 26
            },
            {
                "apps": [
                    62
                ],
                "cat": 8,
                "rx_bytes": 7219,
                "rx_packets": 10,
                "tx_bytes": 1153,
                "tx_packets": 9
            },
            {
                "apps": [
                    61
                ],
                "cat": 9,
                "rx_bytes": 4825,
                "rx_packets": 24,
                "tx_bytes": 2040,
                "tx_packets": 24
            },
            {
                "apps": [
                    209,
                    222,
                    190
                ],
                "cat": 13,
                "rx_bytes": 51546,
                "rx_packets": 104,
                "tx_bytes": 42640,
                "tx_packets": 121
            },
            {
                "apps": [
                    21
                ],
                "cat": 14,
                "rx_bytes": 31920,
                "rx_packets": 114,
                "tx_bytes": 19203,
                "tx_packets": 82
            },
            {
                "apps": [
                    32
                ],
                "cat": 17,
                "rx_bytes": 27068,
                "rx_packets": 42,
                "tx_bytes": 6002,
                "tx_packets": 27
            },
            {
                "apps": [
                    106,
                    63
                ],
                "cat": 18,
                "rx_bytes": 450,
                "rx_packets": 5,
                "tx_bytes": 654,
                "tx_packets": 8
            },
            {
                "apps": [
                    94
                ],
                "cat": 19,
                "rx_bytes": 5421117,
                "rx_packets": 4779,
                "tx_bytes": 243979,
                "tx_packets": 2377
            },
            {
                "apps": [
                    162,
                    167,
                    185
                ],
                "cat": 20,
                "rx_bytes": 8167010,
                "rx_packets": 7838,
                "tx_bytes": 931113,
                "tx_packets": 4875
            },
            {
                "apps": [
                    3
                ],
                "cat": 24,
                "rx_bytes": 29536,
                "rx_packets": 117,
                "tx_bytes": 22861,
                "tx_packets": 113
            },
            {
                "apps": [
                    65535
                ],
                "cat": 255,
                "rx_bytes": 6417768,
                "rx_packets": 10254,
                "tx_bytes": 1193280,
                "tx_packets": 7326
            }
        ],
        "last_updated": 1557123138
    }
]

下面是我的python代码：


import json
import sys
import csv

def to_string(s):
    try:
        return str(s)
    except:
        return s.encode('utf-8')

def reduce_item(key, value):
    global reduced_item

    if type(value) is list:
        i=0
        for sub_item in value:
            reduce_item(key+'_'+to_string(i), sub_item)
            i=i+1

    elif type(value) is dict:
        sub_keys = value.keys()
        for sub_key in sub_keys:
            reduce_item(key+'_'+to_string(sub_key), value[sub_key])

    else:
        reduced_item[to_string(key)] = to_string(value)


node="by_app"
fp = open("sample.json", 'r')
json_value = fp.read()
raw_data = json.loads(json_value)
fp.close()
data_to_be_processed=[]

for i in node:
    try:
        data_to_be_processed.append(raw_data[i])
    except:
        data_to_be_processed.append(raw_data)

    processed_data = []
    header = []

for item in data_to_be_processed:
    reduced_item = {}
    reduce_item(i, item)

    header += reduced_item.keys()

    processed_data.append(reduced_item)

header = list(set(header))
header.sort()


with open("test.csv", 'w+', newline='') as f:
    f_csv = csv.DictWriter(f, header, quoting=csv.QUOTE_ALL)
    f_csv.writeheader()
    for row in processed_data:
        f_csv.writerow(row)

我希望CSV文件中的输出为：！（https://json-csv.com/c/2Who）

Answer 1

您的方法有一个小错误；尝试执行以下算法

使用以下依赖项

set identity_insert [tablename] on
insert into [tablename] (columnlist) values(valuelist)
set identity_insert [tablename] off

import csv
import json
import logging

from itertools import chain
from six import string_types, PY2

此函数将通过内部调用json_to_dicts帮助您将json转换为字典对象：

logger = logging.getLogger("json2csv")
JSONError = ValueError if PY2 else json.decoder.JSONDecodeError

这是json-to-dict的实际逻辑

def json_to_csv(input_file_path, output_file_path):
    with open(input_file_path) as input_file:
        json = input_file.read()
    dicts = json_to_dicts(json)
    with open(output_file_path, "w") as output_file:
        dicts_to_csv(dicts, output_file)

格式化字典中的键/值对

def json_to_dicts(json_str):
    try:
        objects = json.loads(json_str)
    except JSONError:
        objects = [json.loads(l) for l in json_str.split('\n') if l.strip()]

    return [dict(to_keyvalue_pairs(obj)) for obj in objects]

这会将字典写入csv

def to_keyvalue_pairs(source, ancestors=[], key_delimeter='_'):
    def is_sequence(arg):
        return (not isinstance(arg, string_types)) and (hasattr(arg, "__getitem__") or hasattr(arg, "__iter__"))

    def is_dict(arg):
        return isinstance(arg, dict)

    if is_dict(source):
        result = [to_keyvalue_pairs(source[key], ancestors + [key]) for key in source.keys()]
        return list(chain.from_iterable(result))
    elif is_sequence(source):
        result = [to_keyvalue_pairs(item, ancestors + [str(index)]) for (index, item) in enumerate(source)]
        return list(chain.from_iterable(result))
    else:
        return [(key_delimeter.join(ancestors), source)]

如何使用python将嵌套的JSON转换为CSV表

1 个答案: