这个问题可能在这里被问过几次了。我一直在尝试使嵌套的JSON文件变平并将其转换为CSV,但是我能得到的最接近的是列出字段名称:MyCount, from, Mysize, Allhits, aggs
,但没有值:
Output.csv:
""
Mycount
from
Mysize
Allhits
aggs
我一直在尝试这段代码将JSON转换为CSV:
import json
import csv
def get_leaves(item, key=None):
if isinstance(item, dict):
leaves = {}
for i in item.keys():
leaves.update(get_leaves(item[i], i))
return leaves
elif isinstance(item, list):
leaves = {}
for i in item:
leaves.update(get_leaves(i, key))
return leaves
else:
return {key : item}
with open('path/to/my/file.json') as f_input:
json_data = json.load(f_input)
# Paresing all entries to get the complete fieldname list
fieldnames = set()
for entry in json_data:
fieldnames.update(get_leaves(entry).keys())
with open('/path/to/myoutput.csv', 'w', newline='') as f_output:
csv_output = csv.DictWriter(f_output, fieldnames=sorted(fieldnames))
csv_output.writeheader()
csv_output.writerows(get_leaves(entry) for entry in json_data)
JSON结构如下:
{"Mycount":538,
"from":0,
"Mysize":1000,
"Allhits":[{
"isVerified":true,
"backgroundColor":"FF720B",
"name":"yourShop",
"Id":"12345678",
"ActionItems":[{
"subtitle":"Click here to start",
"body":null,
"language":"de",
"title":"Start here",
"isDefault":true}],
"tintColor":"FFFFFF",
"shoppingHours":[{"hours":[{"day":["SUNDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["MONDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["SATURDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["FRIDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["THURSDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["WEDNESDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["TUESDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]}]}],
"LogoUrl":"https://url/to/my/logo.png",
"coverage":[{
"country":"*",
"language":"*",
"ratio":1}],
"shoppingHours2":[{"hours":[{"day":["SUNDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["MONDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["SATURDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["FRIDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["THURSDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["WEDNESDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]},{"day":["TUESDAY"],"timeRange":[{"allDay":false,"from":25200,"to":68400}]}]}],
"group":"shop_open",
"timeZone":"CET",
"phone":"+1234567890",
"modTime":1234567890,
"intId":"+123456789",
"Logo2Url":"https://link/to/my/logo.png"}],
"aggs":{}}
使用pandas模块是否容易实现?我仍在学习python,因此,我对此表示感谢。从这个json文件中,我至少需要将id, intId, name, ratio
和该字段名称的值提取到CSV中。
所需的输出应该是(或者它可以具有所有字段名称和值,然后我将能够直接从CSV中提取我需要的字段):
id intId name ratio
12345678 123456789 yourShop 1
这只是一条记录的版本,但是我的输出文件必须包含JSON文件中所有ID的行。
谢谢您的帮助!
编辑 我也尝试了以下方法:
import json
import csv
x = '/path/to/myrecords.json'
x = json.loads(x)
f.writerow(["name", "id", "intId", "ratio"])
f = csv.writer(open("/path/to/my/output.csv", "w", newline=''))
for x in x:
f.writerow([x["Allhits"]["name"],
x["Allhits"]["id"],
x["Allhits"]["ActionItems"]["intId"],
x["Allhits"]["ActionItems"]["ratio"]])
但在x = json.loads(x)
步骤中收到此错误:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/myusername/anaconda3/lib/python3.6/json/__init__.py", line 354, in loads
return _default_decoder.decode(s)
File "/Users/myusername/anaconda3/lib/python3.6/json/decoder.py", line 339, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/Users/myusername/anaconda3/lib/python3.6/json/decoder.py", line 357, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
答案 0 :(得分:0)
如果您需要展平整个json(包括数组),则可以通过递归执行以下操作:
import json
import csv
def flatten(item, prefix=None):
result = {}
if isinstance(item, list):
item = { i : item[i] for i in range(0, len(item) )}
for key, val in item.items():
prefixed_key = f"{prefix}{key}" if prefix else str(key)
if isinstance(val, list) or isinstance(val, dict):
result = {**result, **flatten(val, f"{prefixed_key}_")}
else:
result[prefixed_key] = val
return result
with open('test.json') as f_input, open('result.csv', 'w', newline='') as f_output:
writer = csv.writer(f_output)
hits = json.load(f_input)["Allhits"]
header_written = False
for hit in hits:
flat = flatten(hit)
if not header_written:
writer.writerow(flat.keys())
header_written = True
writer.writerow(flat.values())
使用此功能,您会得到以下csv怪胎:
isVerified,backgroundColor,name,Id,ActionItems_0_subtitle,ActionItems_0_body,ActionItems_0_language,ActionItems_0_title,ActionItems_0_isDefault,tintColor,shoppingHours_0_hours_0_day_0,shoppingHours_0_hours_0_timeRange_0_allDay,shoppingHours_0_hours_0_timeRange_0_from,shoppingHours_0_hours_0_timeRange_0_to,shoppingHours_0_hours_1_day_0,shoppingHours_0_hours_1_timeRange_0_allDay,shoppingHours_0_hours_1_timeRange_0_from,shoppingHours_0_hours_1_timeRange_0_to,shoppingHours_0_hours_2_day_0,shoppingHours_0_hours_2_timeRange_0_allDay,shoppingHours_0_hours_2_timeRange_0_from,shoppingHours_0_hours_2_timeRange_0_to,shoppingHours_0_hours_3_day_0,shoppingHours_0_hours_3_timeRange_0_allDay,shoppingHours_0_hours_3_timeRange_0_from,shoppingHours_0_hours_3_timeRange_0_to,shoppingHours_0_hours_4_day_0,shoppingHours_0_hours_4_timeRange_0_allDay,shoppingHours_0_hours_4_timeRange_0_from,shoppingHours_0_hours_4_timeRange_0_to,shoppingHours_0_hours_5_day_0,shoppingHours_0_hours_5_timeRange_0_allDay,shoppingHours_0_hours_5_timeRange_0_from,shoppingHours_0_hours_5_timeRange_0_to,shoppingHours_0_hours_6_day_0,shoppingHours_0_hours_6_timeRange_0_allDay,shoppingHours_0_hours_6_timeRange_0_from,shoppingHours_0_hours_6_timeRange_0_to,LogoUrl,coverage_0_country,coverage_0_language,coverage_0_ratio,shoppingHours2_0_hours_0_day_0,shoppingHours2_0_hours_0_timeRange_0_allDay,shoppingHours2_0_hours_0_timeRange_0_from,shoppingHours2_0_hours_0_timeRange_0_to,shoppingHours2_0_hours_1_day_0,shoppingHours2_0_hours_1_timeRange_0_allDay,shoppingHours2_0_hours_1_timeRange_0_from,shoppingHours2_0_hours_1_timeRange_0_to,shoppingHours2_0_hours_2_day_0,shoppingHours2_0_hours_2_timeRange_0_allDay,shoppingHours2_0_hours_2_timeRange_0_from,shoppingHours2_0_hours_2_timeRange_0_to,shoppingHours2_0_hours_3_day_0,shoppingHours2_0_hours_3_timeRange_0_allDay,shoppingHours2_0_hours_3_timeRange_0_from,shoppingHours2_0_hours_3_timeRange_0_to,shoppingHours2_0_hours_4_day_0,shoppingHours2_0_hours_4_timeRange_0_allDay,shoppingHours2_0_hours_4_timeRange_0_from,shoppingHours2_0_hours_4_timeRange_0_to,shoppingHours2_0_hours_5_day_0,shoppingHours2_0_hours_5_timeRange_0_allDay,shoppingHours2_0_hours_5_timeRange_0_from,shoppingHours2_0_hours_5_timeRange_0_to,shoppingHours2_0_hours_6_day_0,shoppingHours2_0_hours_6_timeRange_0_allDay,shoppingHours2_0_hours_6_timeRange_0_from,shoppingHours2_0_hours_6_timeRange_0_to,group,timeZone,phone,modTime,intId,Logo2Url
True,FF720B,yourShop,12345678,Click here to start,,de,Start here,True,FFFFFF,SUNDAY,False,25200,68400,MONDAY,False,25200,68400,SATURDAY,False,25200,68400,FRIDAY,False,25200,68400,THURSDAY,False,25200,68400,WEDNESDAY,False,25200,68400,TUESDAY,False,25200,68400,https://url/to/my/logo.png,*,*,1,SUNDAY,False,25200,68400,MONDAY,False,25200,68400,SATURDAY,False,25200,68400,FRIDAY,False,25200,68400,THURSDAY,False,25200,68400,WEDNESDAY,False,25200,68400,TUESDAY,False,25200,68400,shop_open,CET,+1234567890,1234567890,+123456789,https://link/to/my/logo.png
但是,如果您只需要特定的键,则可以遍历Allhits
并检索所需的内容,如下所示:
with open('test.json') as f_input, open('result.csv', 'w', newline='') as f_output:
writer = csv.writer(f_output)
hits = json.load(f_input)["Allhits"]
writer.writerow(["Id", "intId", "name", "ratio"])
for hit in hits:
writer.writerow([hit["Id"], hit["intId"], hit["name"], hit["coverage"][0]["ratio"]])
答案 1 :(得分:0)
尝试一下。这将在Allhits
列表中循环并获得所需的最少数据集:
import json
import csv
with open('/path/to/myrecords.json') as f_input:
json_data = json.load(f_input)
with open('/path/to/my/output.csv', 'w', newline='') as f_output:
csv_output = csv.writer(f_output)
csv_output.writerow(["id", "intId", "name", "ratio"])
for x in json_data['Allhits']:
csv_output.writerow([x["Id"], x["intId"], x["name"], x["coverage"][0]["ratio"]])
答案 2 :(得分:0)
x = json.loads(x)
json.decoder.JSONDecodeError:预期值:第1行第1列(字符 0)
这是因为json.loads
在文件名作为x传递时期望包含json数据的字符串。
"Allhits":[{...}]
是包含字典的单个元素列表
将x["Allhits"]["name"]
替换为x["Allhits"][0]["name"]
。访问诸如"Id"
之类的其他元素时,也是如此。