我有一本这样的字典:
a = {'compatibility': {'schema': ['attribute_variables/evar44',
'event42',
'container_visitors'],
'status': 'valid',
'supported_features': ['function_and',
'function_attr',
'function_container',
'function_event',
'function_event-exists',
'function_streq'],
'supported_products': ['o', 'data_warehouse', 'discover'],
'supported_schema': ['warehouse', 'n'],
'validator_version': '1.1.11'},
'definition': {'container': {'context': 'visitors',
'func': 'container',
'pred': {'func': 'and',
'preds': [{'description': 'e42',
'evt': {'func': 'event', 'name': 'metrics/event42'},
'func': 'event-exists'},
{'description': 'v44',
'func': 'streq',
'str': '544',
'val': {'func': 'attr', 'name': 'variables/evar44'}}]}},
'func': 'segment',
'version': [1, 0, 0]},
'description': '',
'id': 's2165c30c946ebceb',
'modified': '12',
'name': 'Apop',
'owner': {'id': 84699, 'login': 'max', 'name': 'Max'},
'reportSuiteName': 'App',
'rsid': 'test',
'siteTitle': 'App',
'tags': []}
我想提取每个键“ description”,“ func”和“ str” /“ num”的值,并在这些字典的一个DataFrame中返回这些值。
我使用这段代码进行了尝试,但是我无法获得所有值,也很难将这些值放入一个DataFrame中。
def findkeys(node, kv):
if isinstance(node, list):
for i in node:
for x in findkeys(i, kv):
yield x
elif isinstance(node, dict):
if kv in node:
yield node[kv]
for j in node.values():
for x in findkeys(j, kv):
yield x
对于我的示例,我想要的输出是
pd.DataFrame(np.array([['e42', 'event', 'NaN'], ['v44', 'streq', '544']]),
columns=['description', 'funk', 'str/num'])
答案 0 :(得分:0)
下面的代码将“有趣”键的值收集到字典中。
from collections import defaultdict
a = {'compatibility': {'schema': ['attribute_variables/evar44',
'event42',
'container_visitors'],
'status': 'valid',
'supported_features': ['function_and',
'function_attr',
'function_container',
'function_event',
'function_event-exists',
'function_streq'],
'supported_products': ['o', 'data_warehouse', 'discover'],
'supported_schema': ['warehouse', 'n'],
'validator_version': '1.1.11'},
'definition': {'container': {'context': 'visitors',
'func': 'container',
'pred': {'func': 'and',
'preds': [{'description': 'e42',
'evt': {'func': 'event', 'name': 'metrics/event42'},
'func': 'event-exists'},
{'description': 'v44',
'func': 'streq',
'str': '544',
'val': {'func': 'attr', 'name': 'variables/evar44'}}]}},
'func': 'segment',
'version': [1, 0, 0]},
'description': '',
'id': 's2165c30c946ebceb',
'modified': '12',
'name': 'Apop',
'owner': {'id': 84699, 'login': 'max', 'name': 'Max'},
'reportSuiteName': 'App',
'rsid': 'test',
'siteTitle': 'App',
'tags': []}
def walk_dict(d, interesting_keys, result, depth=0):
for k, v in sorted(d.items(), key=lambda x: x[0]):
if isinstance(v, dict):
walk_dict(v, interesting_keys, result, depth + 1)
elif isinstance(v,list):
for entry in v:
if isinstance(entry, dict):
walk_dict(entry, interesting_keys, result, depth + 1)
else:
if k in interesting_keys:
result[k].append(v)
result = defaultdict(list)
walk_dict(a, ["description", "func", "str", "num"], result)
print(result)
输出
defaultdict(<class 'list'>, {'func': ['container', 'and', 'event', 'event-exists', 'streq', 'attr', 'segment'], 'description': ['e42', 'v44', ''], 'str': ['544']})