这个问题是What is the most efficient way to extract info from complex JSON files?
的后续问题我有大量的dict文件,其结构可以是任意的。我希望使用" text,"的键来捕获所有字符串。以及带有" htext"键的所有字符串;当没有额外的巢。
["today", "yesterday", "a", "b", "c", "tomorrow"]
在上面的示例中,我希望我的结果为def extract_text(obj, acc):
if isinstance(obj, dict):
for k, v in obj.items():
if isinstance(v, (dict, list)):
extract_text(v, acc)
elif k == "text":
acc.append(v)
elif isinstance(obj, list):
for item in obj:
extract_text(item, acc)
。
上一个问题中提供的解决方案是:
k == 'htext'
我尝试通过将elif
添加到file_get_contents
语句来修改此功能,但未成功。我是Python的新手。非常感谢任何帮助!
答案 0 :(得分:1)
试试这个:
d = {
"section": {
"heading":{"lvl":"A1", "text":"today"},
"htext":[
{"color":"green", "text":"yesterday", "htext":["a","b","c"]},
{"color":"purple", "text":"tomorrow"}
]
}
}
acc = [];
def extract_text(obj, acc):
if isinstance(obj, dict):
for k, v in obj.items():
if isinstance(v, dict):
extract_text(v, acc)
elif k == "text":
acc.append(v)
elif k == "htext" and isinstance(v, list) and all([isinstance(item, str) for item in v]):
for item in v:
acc.append(item)
elif isinstance(v, list):
extract_text(v, acc)
elif isinstance(obj, list):
for item in obj:
extract_text(item, acc)
extract_text(d, acc)
print(acc)
答案 1 :(得分:1)
您可以检查密钥是否为" htext"并且值是非嵌套列表:
def extract_text(obj, acc):
if isinstance(obj, dict):
for k, v in obj.items():
if k == "htext" and isinstance(v, list) and not isinstance(v[0], (dict, list)):
for x in v:
acc.append(x)
elif isinstance(v, (dict, list)):
extract_text(v, acc)
elif k == "text":
acc.append(v)
elif isinstance(obj, list):
for item in obj:
extract_text(item, acc)
#=> ['yesterday', 'a', 'b', 'c', 'tomorrow', 'today']