我有这样的字典:
source = {
'Section 1' : {
'range' : [0, 200],
'template' : 'ID-LOA-XXX',
'nomenclature': True
},
'Section 2' : {
'range' : [201, 800],
'template' : 'ID-EPI-XXX',
'nomenclature': False,
'Subsection 1' : {
'range' : [0, 400],
'template' : 'ID-EPI-S1-XXX',
'nomenclature': False,
'Subsubsection 1' : {
'range' : [0, 400],
'template' : 'ID-EPI-S12-XXX',
'nomenclature': False
}
},
'Subsection 2' : {
'range' : [0, 400],
'template' : 'ID-EPI-S2-XXX',
'nomenclature': False
}
},
# etc.
}
从JSON文件加载。我想要“扁平化”。那到下面的字典:
target = {
'Section 1' : {
'range' : [0, 200],
'template' : 'ID-LOA-XXX',
'nomenclature': True,
'location' : './Section 1/'
},
'Section 2' : {
'range' : [201, 800],
'template' : 'ID-EPI-XXX',
'nomenclature': False,
'location' : './Section 2/'
},
'Subsection 1' : {
'range' : [0, 400],
'template' : 'ID-EPI-S1-XXX',
'nomenclature': False,
'location' : './Section 2/Subsection 1/'
},
'Subsubsection 1' : {
'range' : [0, 400],
'template' : 'ID-EPI-S12-XXX',
'nomenclature': False,
'location' : './Section 2/Subsection 1/Subsubsection 1'
},
'Subsection 2' : {
'range' : [0, 400],
'template' : 'ID-EPI-S2-XXX',
'nomenclature': False,
'location' : './Section 2/Subsection 2/'
},
# etc.
}
我可以能够更改原始JSON文件的生成方式,但我不想去那里。
单词中的JSON文件:每个部分至少包含三个键,可能包含其他键。那些其他键被解释为当前部分中包含的子部分,每个键都是具有相同属性的dict
。这种模式原则上可以无限深度地进行递归。
我还想执行一些断言:
'range'
,'template'
和'nomenclature'
)到目前为止,我只设法进行了这些检查:
import json
key_requirements = {
"nomenclature": lambda x : isinstance(x, bool),
"template" : lambda x : isinstance(x, str) and "X" in x,
"range" : lambda x : isinstance(x, list) and len(x)==2 and all([isinstance(y,int) for y in x]) and x[1] > x[0]
}
def checkSection(section):
for key in section:
if key not in key_requirements:
checkSection(section[key])
elif not key_requirements[key]( section[key] ):
# error: assertion failed
pass
else:
# error: key not present
pass
for key in source # json.load(open(myJsonFile))
checkSection(data[key])
但是目前,没有多少咖啡能够让我想出一种高效,优雅,抒情的方式来将所需的转换编织到这个方案中......
有任何建议或想法吗?
答案 0 :(得分:2)
问题需要递归遍历,除非你想要一些第三方库(是的,有解决方案),你需要一个简单的本地增长的递归遍历
NOte 路径语义可能与您的不同,因为我在Windows上
<强>实施强>
def flatten(source):
target = {}
def helper(src, path ='.', last_key = None):
if last_key:
target[last_key] = {}
target[last_key]['location'] = path
for key, value in src.items():
if isinstance(value, dict):
helper(value, os.path.join(path, key), key)
else:
target[last_key][key] = value
helper(source)
return target
<强>输出强>
>>> pprint.pprint(source)
{'Section 1': {'nomenclature': True,
'range': [0, 200],
'template': 'ID-LOA-XXX'},
'Section 2': {'Subsection 1': {'Subsubsection 1': {'nomenclature': False,
'range': [0, 400],
'template': 'ID-EPI-S12-XXX'},
'nomenclature': False,
'range': [0, 400],
'template': 'ID-EPI-S1-XXX'},
'Subsection 2': {'nomenclature': False,
'range': [0, 400],
'template': 'ID-EPI-S2-XXX'},
'nomenclature': False,
'range': [201, 800],
'template': 'ID-EPI-XXX'}}
>>> pprint.pprint(flatten(source))
{'Section 1': {'location': '\\Section 1',
'nomenclature': True,
'range': [0, 200],
'template': 'ID-LOA-XXX'},
'Section 2': {'location': '\\Section 2',
'nomenclature': False,
'range': [201, 800],
'template': 'ID-EPI-XXX'},
'Subsection 1': {'location': '\\Section 2\\Subsection 1',
'nomenclature': False,
'range': [0, 400],
'template': 'ID-EPI-S1-XXX'},
'Subsection 2': {'location': '\\Section 2\\Subsection 2',
'nomenclature': False,
'range': [0, 400],
'template': 'ID-EPI-S2-XXX'},
'Subsubsection 1': {'location': '\\Section 2\\Subsection 1\\Subsubsection 1',
'nomenclature': False,
'range': [0, 400],
'template': 'ID-EPI-S12-XXX'}}
答案 1 :(得分:1)
这适用于您的情况:
output = {}
for key, value in source.iteritems():
item = {}
for nested_key, nested_value in value.iteritems():
if type(nested_value) == type({}):
nested_item = {}
for nested_key_2, nested_value_2 in nested_value.iteritems():
nested_item[nested_key_2] = nested_value_2
output[nested_key] = nested_item
else:
item[nested_key] = nested_value
output[key] = item
答案 2 :(得分:0)
我最终得到了这个解决方案:
import os
key_requirements = {
"nomenclature": lambda x : isinstance(x, bool),
"template" : lambda x : isinstance(x, str) and "X" in x,
"range" : lambda x : isinstance(x, list) and len(x)==2 and all([isinstance(y,int) for y in x]) and x[1] > x[0]
}
def checkAndFlattenData(data):
def merge_dicts(dict1,dict2):
return dict(list(dict1.items()) + list(dict2.items()))
def check_section(section, section_content):
section_out = {
'range' : section_content['range'],
'template': section_content['template'],
'location': section
}
nested_section_out = {}
for key,value in section_content.iteritems():
if key not in key_requirements:
if not isinstance(value,dict):
# error: invalid key
pass
else:
nested_section_out[key], recurse_out = check_section(key,value)
nested_section_out = merge_dicts(nested_section_out, recurse_out)
elif not key_requirements[key](value):
print "ASSERTION FAILED!"# error: field assertion failed
pass
for key in nested_section_out:
nested_section_out[key]['location'] = os.path.join(section, nested_section_out[key]['location'])
return section_out, nested_section_out
new_data = {}
for key,value in data.iteritems():
new_data[key], nested_data = check_section(key, value)
new_data = merge_dicts(new_data, nested_data)
for key,value in new_data.iteritems():
new_data[key]['location'] = os.path.join('.', new_data[key]['location'])
return new_data
target = checkAndFlattenData(source)
但是,我无法摆脱这种感觉,这可以做得更加pythonic(和/或更有效)...如果有人有任何建议,不要犹豫复制粘贴这个并在独立答案中做出改进,所以我可以接受。