我对python非常陌生,想使用python将嵌套的JSON文件转换为CSV表。但是我遇到了一个问题,在将数据导出到CSV文件后,我所有的值都在一行中有很多列,但是我希望它具有多行。
以下是我的示例数据:
[
{
"by_app": [
{
"app": 5,
"cat": 3,
"clients": [
{
"mac": "ec:1f:72:fa:75:77",
"rx_bytes": 372,
"rx_packets": 3,
"tx_bytes": 1361,
"tx_packets": 4
},
{
"mac": "f0:9f:c2:6c:5b:d0",
"rx_bytes": 56896,
"rx_packets": 191,
"tx_bytes": 210622,
"tx_packets": 460
}
],
"known_clients": 2,
"rx_bytes": 60837,
"rx_packets": 203,
"tx_bytes": 213435,
"tx_packets": 475
},
{
"app": 94,
"cat": 19,
"clients": [
{
"mac": "30:07:4d:38:ae:e2",
"rx_bytes": 64654,
"rx_packets": 147,
"tx_bytes": 19533,
"tx_packets": 138
},
{
"mac": "ec:1f:72:fa:75:77",
"rx_bytes": 42416,
"rx_packets": 68,
"tx_bytes": 12419,
"tx_packets": 74
}
],
"known_clients": 2,
"rx_bytes": 5421117,
"rx_packets": 4779,
"tx_bytes": 243979,
"tx_packets": 2377
},
{
"app": 162,
"cat": 20,
"rx_bytes": 3295298,
"rx_packets": 2935,
"tx_bytes": 171266,
"tx_packets": 2032
},
{
"app": 209,
"cat": 13,
"rx_bytes": 21763,
"rx_packets": 38,
"tx_bytes": 4433,
"tx_packets": 30
},
{
"app": 222,
"cat": 13,
"clients": [
{
"mac": "30:07:4d:38:ae:e2",
"rx_bytes": 300,
"rx_packets": 3,
"tx_bytes": 503,
"tx_packets": 4
},
{
"mac": "ec:1f:72:fa:75:77",
"rx_bytes": 3452,
"rx_packets": 24,
"tx_bytes": 4176,
"tx_packets": 26
},
{
"mac": "f0:9f:c2:6c:5b:d0",
"rx_bytes": 0,
"rx_packets": 0,
"tx_bytes": 396,
"tx_packets": 6
}
],
"known_clients": 3,
"rx_bytes": 4742,
"rx_packets": 32,
"tx_bytes": 5787,
"tx_packets": 42
},
{
"app": 167,
"cat": 20,
"clients": [
{
"mac": "ec:1f:72:fa:75:77",
"rx_bytes": 5761,
"rx_packets": 14,
"tx_bytes": 1574,
"tx_packets": 13
}
],
"known_clients": 1,
"rx_bytes": 138686,
"rx_packets": 237,
"tx_bytes": 31821,
"tx_packets": 155
},
{
"app": 112,
"cat": 4,
"clients": [
{
"mac": "ec:1f:72:fa:75:77",
"rx_bytes": 135381,
"rx_packets": 161,
"tx_bytes": 42596,
"tx_packets": 140
}
],
"known_clients": 1,
"rx_bytes": 135381,
"rx_packets": 161,
"tx_bytes": 42596,
"tx_packets": 140
},
{
"app": 62,
"cat": 8,
"rx_bytes": 7219,
"rx_packets": 10,
"tx_bytes": 1153,
"tx_packets": 9
},
{
"app": 185,
"cat": 20,
"rx_bytes": 4733026,
"rx_packets": 4666,
"tx_bytes": 728026,
"tx_packets": 2688
},
{
"app": 130,
"cat": 4,
"clients": [
{
"mac": "30:07:4d:38:ae:e2",
"rx_bytes": 113871,
"rx_packets": 121,
"tx_bytes": 16442,
"tx_packets": 116
}
],
"known_clients": 1,
"rx_bytes": 113871,
"rx_packets": 121,
"tx_bytes": 16442,
"tx_packets": 116
},
{
"app": 65535,
"cat": 255,
"clients": [
{
"mac": "30:07:4d:38:ae:e2",
"rx_bytes": 8195,
"rx_packets": 27,
"tx_bytes": 3834,
"tx_packets": 25
},
{
"mac": "ec:1f:72:fa:75:77",
"rx_bytes": 27338,
"rx_packets": 93,
"tx_bytes": 11330,
"tx_packets": 86
},
{
"mac": "f0:9f:c2:6c:5b:d0",
"rx_bytes": 19974,
"rx_packets": 181,
"tx_bytes": 2447,
"tx_packets": 34
},
{
"mac": "f0:9f:c2:c6:63:5a",
"rx_bytes": 90,
"rx_packets": 1,
"tx_bytes": 0,
"tx_packets": 0
}
],
"known_clients": 4,
"rx_bytes": 6417768,
"rx_packets": 10254,
"tx_bytes": 1193280,
"tx_packets": 7326
},
{
"app": 190,
"cat": 13,
"clients": [
{
"mac": "ec:1f:72:fa:75:77",
"rx_bytes": 25041,
"rx_packets": 34,
"tx_bytes": 32420,
"tx_packets": 49
}
],
"known_clients": 1,
"rx_bytes": 25041,
"rx_packets": 34,
"tx_bytes": 32420,
"tx_packets": 49
},
{
"app": 106,
"cat": 18,
"clients": [
{
"mac": "f0:9f:c2:6c:5b:d0",
"rx_bytes": 360,
"rx_packets": 4,
"tx_bytes": 360,
"tx_packets": 4
},
{
"mac": "f0:9f:c2:c6:63:5a",
"rx_bytes": 90,
"rx_packets": 1,
"tx_bytes": 180,
"tx_packets": 2
}
],
"known_clients": 2,
"rx_bytes": 450,
"rx_packets": 5,
"tx_bytes": 540,
"tx_packets": 6
},
{
"app": 1,
"cat": 6,
"rx_bytes": 23370,
"rx_packets": 35,
"tx_bytes": 4388,
"tx_packets": 26
},
{
"app": 61,
"cat": 9,
"rx_bytes": 4825,
"rx_packets": 24,
"tx_bytes": 2040,
"tx_packets": 24
},
{
"app": 32,
"cat": 17,
"rx_bytes": 27068,
"rx_packets": 42,
"tx_bytes": 6002,
"tx_packets": 27
},
{
"app": 3,
"cat": 24,
"clients": [
{
"mac": "30:07:4d:38:ae:e2",
"rx_bytes": 3791,
"rx_packets": 8,
"tx_bytes": 1258,
"tx_packets": 9
},
{
"mac": "ec:1f:72:fa:75:77",
"rx_bytes": 25745,
"rx_packets": 109,
"tx_bytes": 21603,
"tx_packets": 104
}
],
"known_clients": 2,
"rx_bytes": 29536,
"rx_packets": 117,
"tx_bytes": 22861,
"tx_packets": 113
},
{
"app": 63,
"cat": 18,
"rx_bytes": 0,
"rx_packets": 0,
"tx_bytes": 114,
"tx_packets": 2
},
{
"app": 21,
"cat": 3,
"rx_bytes": 41992,
"rx_packets": 53,
"tx_bytes": 9788,
"tx_packets": 34
},
{
"app": 21,
"cat": 14,
"rx_bytes": 31920,
"rx_packets": 114,
"tx_bytes": 19203,
"tx_packets": 82
}
],
"by_cat": [
{
"apps": [
5,
21
],
"cat": 3,
"rx_bytes": 102829,
"rx_packets": 256,
"tx_bytes": 223223,
"tx_packets": 509
},
{
"apps": [
112,
130
],
"cat": 4,
"rx_bytes": 249252,
"rx_packets": 282,
"tx_bytes": 59038,
"tx_packets": 256
},
{
"apps": [
1
],
"cat": 6,
"rx_bytes": 23370,
"rx_packets": 35,
"tx_bytes": 4388,
"tx_packets": 26
},
{
"apps": [
62
],
"cat": 8,
"rx_bytes": 7219,
"rx_packets": 10,
"tx_bytes": 1153,
"tx_packets": 9
},
{
"apps": [
61
],
"cat": 9,
"rx_bytes": 4825,
"rx_packets": 24,
"tx_bytes": 2040,
"tx_packets": 24
},
{
"apps": [
209,
222,
190
],
"cat": 13,
"rx_bytes": 51546,
"rx_packets": 104,
"tx_bytes": 42640,
"tx_packets": 121
},
{
"apps": [
21
],
"cat": 14,
"rx_bytes": 31920,
"rx_packets": 114,
"tx_bytes": 19203,
"tx_packets": 82
},
{
"apps": [
32
],
"cat": 17,
"rx_bytes": 27068,
"rx_packets": 42,
"tx_bytes": 6002,
"tx_packets": 27
},
{
"apps": [
106,
63
],
"cat": 18,
"rx_bytes": 450,
"rx_packets": 5,
"tx_bytes": 654,
"tx_packets": 8
},
{
"apps": [
94
],
"cat": 19,
"rx_bytes": 5421117,
"rx_packets": 4779,
"tx_bytes": 243979,
"tx_packets": 2377
},
{
"apps": [
162,
167,
185
],
"cat": 20,
"rx_bytes": 8167010,
"rx_packets": 7838,
"tx_bytes": 931113,
"tx_packets": 4875
},
{
"apps": [
3
],
"cat": 24,
"rx_bytes": 29536,
"rx_packets": 117,
"tx_bytes": 22861,
"tx_packets": 113
},
{
"apps": [
65535
],
"cat": 255,
"rx_bytes": 6417768,
"rx_packets": 10254,
"tx_bytes": 1193280,
"tx_packets": 7326
}
],
"last_updated": 1557123138
}
]
下面是我的python代码:
import json
import sys
import csv
def to_string(s):
try:
return str(s)
except:
return s.encode('utf-8')
def reduce_item(key, value):
global reduced_item
if type(value) is list:
i=0
for sub_item in value:
reduce_item(key+'_'+to_string(i), sub_item)
i=i+1
elif type(value) is dict:
sub_keys = value.keys()
for sub_key in sub_keys:
reduce_item(key+'_'+to_string(sub_key), value[sub_key])
else:
reduced_item[to_string(key)] = to_string(value)
node="by_app"
fp = open("sample.json", 'r')
json_value = fp.read()
raw_data = json.loads(json_value)
fp.close()
data_to_be_processed=[]
for i in node:
try:
data_to_be_processed.append(raw_data[i])
except:
data_to_be_processed.append(raw_data)
processed_data = []
header = []
for item in data_to_be_processed:
reduced_item = {}
reduce_item(i, item)
header += reduced_item.keys()
processed_data.append(reduced_item)
header = list(set(header))
header.sort()
with open("test.csv", 'w+', newline='') as f:
f_csv = csv.DictWriter(f, header, quoting=csv.QUOTE_ALL)
f_csv.writeheader()
for row in processed_data:
f_csv.writerow(row)
我希望CSV文件中的输出为: !(https://json-csv.com/c/2Who)
答案 0 :(得分:0)
您的方法有一个小错误;尝试执行以下算法
使用以下依赖项
set identity_insert [tablename] on
insert into [tablename] (columnlist) values(valuelist)
set identity_insert [tablename] off
import csv
import json
import logging
from itertools import chain
from six import string_types, PY2
此函数将通过内部调用json_to_dicts帮助您将json转换为字典对象:
logger = logging.getLogger("json2csv")
JSONError = ValueError if PY2 else json.decoder.JSONDecodeError
这是json-to-dict的实际逻辑
def json_to_csv(input_file_path, output_file_path):
with open(input_file_path) as input_file:
json = input_file.read()
dicts = json_to_dicts(json)
with open(output_file_path, "w") as output_file:
dicts_to_csv(dicts, output_file)
格式化字典中的键/值对
def json_to_dicts(json_str):
try:
objects = json.loads(json_str)
except JSONError:
objects = [json.loads(l) for l in json_str.split('\n') if l.strip()]
return [dict(to_keyvalue_pairs(obj)) for obj in objects]
这会将字典写入csv
def to_keyvalue_pairs(source, ancestors=[], key_delimeter='_'):
def is_sequence(arg):
return (not isinstance(arg, string_types)) and (hasattr(arg, "__getitem__") or hasattr(arg, "__iter__"))
def is_dict(arg):
return isinstance(arg, dict)
if is_dict(source):
result = [to_keyvalue_pairs(source[key], ancestors + [key]) for key in source.keys()]
return list(chain.from_iterable(result))
elif is_sequence(source):
result = [to_keyvalue_pairs(item, ancestors + [str(index)]) for (index, item) in enumerate(source)]
return list(chain.from_iterable(result))
else:
return [(key_delimeter.join(ancestors), source)]