我只有几个JSON文件需要解析为CSV文件。
下面是我的代码:
import json
from collections import defaultdict
def flatten_json1(y):
out = defaultdict(list)
def flatten(x, name=''):
if type(x) is dict:
for a in x:
flatten(x[a], a )
elif type(x) is list:
i = 0
for a in x:
flatten(a, name )
i += 1
else:
out[name].append(x)
flatten(y)
return out
import json
from pprint import pprint
with open('testjson.json') as f:
data = json.load(f)
pprint(flatten_json1(data))
这是我正在使用的示例JSON:
{
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"batters":
{
"batter":
[
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"topping":
[
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
]
}
运行时,它为我提供以下输出:
defaultdict(<type 'list'>, {u'type': [u'None', u'Glazed', u'Sugar',
u'Powdered Sugar', u'Chocolate with Sprinkles', u'Chocolate', u'Maple',
u'Regular', u'Chocolate', u'Blueberry', u"Devil's Food", u'donut'],
u'id': [u'5001', u'5002', u'5005', u'5007', u'5006', u'5003', u'5004',
u'1001', u'1002', u'1003', u'1004', u'0001'], u'ppu': [0.55], u'name':
[u'Cake']})
但是您可以看到,属性“ ppu”和“名称”键的列表大小不等于其他两个键。
所以,我怎样才能像下面这样:
defaultdict(<type 'list'>, {u'type': [u'None', u'Glazed', u'Sugar',
u'Powdered Sugar', u'Chocolate with Sprinkles', u'Chocolate', u'Maple',
u'Regular', u'Chocolate', u'Blueberry', u"Devil's Food", u'donut'],
u'id': [u'5001', u'5002', u'5005', u'5007', u'5006', u'5003', u'5004',
u'1001', u'1002', u'1003', u'1004', u'0001'], u'ppu':
[0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55,0.55], u'name': [u'Cake’,u'Cake'u'Cake'u'Cake'u'Cake'u'Cake'u'Cake'u'Cake'u'Cake'u'Cake'u'Cake'u'Cake']})
每个长度为12。请帮忙。
谢谢。
答案 0 :(得分:0)
由于您不知道最长的数组在构建平面dict结果时的最大长度,因此可以在构建结果之后循环遍历结果键,并使用最后一个元素填充每个数组。
在第二个示例中,子节点具有不存在于根中的新键,从而导致行对齐失败。我为此添加了一个测试,以确定根目录中是否包含缺少的密钥。如果是,请移动空白以保持对齐。请注意,如果您在所有商品上都具有完全动态的键,则可能需要一个更强大的解决方案,但这似乎为时过早。
def flatten_json(data):
result = defaultdict(list)
def flatten(x, name=""):
if type(x) is dict:
for k, v in x.items():
flatten(v, k)
elif type(x) is list:
for v in x:
flatten(v, name)
else:
result[name].append(x)
flatten(data)
max_length = max([len(v) for v in result.values()])
for v in result.values():
if max_length - len(v) == 1:
v.insert(0, "")
v.extend([v[-1]] * (max_length - len(v)))
return result
输出:
defaultdict(<class 'list'>,
{'id': ['0001', '1001', '1002', '1003', '1004', '5001', '5002',
'5005', '5007', '5006', '5003', '5004'],
'name': ['Cake', 'Cake', 'Cake', 'Cake', 'Cake', 'Cake', 'Cake',
'Cake', 'Cake', 'Cake', 'Cake', 'Cake'],
'ppu': [0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55, 0.55,
0.55, 0.55],
'type': ['donut', 'Regular', 'Chocolate', 'Blueberry',
"Devil's Food", 'None', 'Glazed', 'Sugar',
'Powdered Sugar', 'Chocolate with Sprinkles', 'Chocolate',
'Maple']})
和
defaultdict(<class 'list'>,
{'active': [False, True, True, True, False, True],
'ages': ['123', '123', '123', '123', '123', '123'],
'availableDate': ['2018-24-11', '2018-24-12', '2018-24-13',
'2018-24-14', '2018-24-15', '2018-24-16'],
'build': ['Jack12', 'Jack12', 'Jack12', 'Jack12', 'Jack12',
'Jack12'],
'country': ['', 'IND1', 'IND2', 'IND3', 'IND4', 'IND5'],
'hierID': ['jack', 'jack', 'jack', 'jack', 'jack', 'jack'],
'locID': ['Jack123', 'Jack123', 'Jack123', 'Jack123', 'Jack123',
'Jack123'],
'org': ['', 'jack1', 'jack2', 'jac3', 'jack4', 'jack5']})