查找嵌套python dict中所有出现的键

时间:2019-07-01 12:21:08

标签: python json dataframe dictionary

我有一本这样的字典:

a = {'compatibility': {'schema': ['attribute_variables/evar44',
   'event42',
   'container_visitors'],
  'status': 'valid',
  'supported_features': ['function_and',
   'function_attr',
   'function_container',
   'function_event',
   'function_event-exists',
   'function_streq'],
  'supported_products': ['o', 'data_warehouse', 'discover'],
  'supported_schema': ['warehouse', 'n'],
  'validator_version': '1.1.11'},
 'definition': {'container': {'context': 'visitors',
   'func': 'container',
   'pred': {'func': 'and',
    'preds': [{'description': 'e42',
      'evt': {'func': 'event', 'name': 'metrics/event42'},
      'func': 'event-exists'},
     {'description': 'v44',
      'func': 'streq',
      'str': '544',
      'val': {'func': 'attr', 'name': 'variables/evar44'}}]}},
  'func': 'segment',
  'version': [1, 0, 0]},
 'description': '',
 'id': 's2165c30c946ebceb',
 'modified': '12',
 'name': 'Apop',
 'owner': {'id': 84699, 'login': 'max', 'name': 'Max'},
 'reportSuiteName': 'App',
 'rsid': 'test',
 'siteTitle': 'App',
 'tags': []}

我想提取每个键“ description”,“ func”和“ str” /“ num”的值,并在这些字典的一个DataFrame中返回这些值。

我使用这段代码进行了尝试,但是我无法获得所有值,也很难将这些值放入一个DataFrame中。

def findkeys(node, kv):
    if isinstance(node, list):
        for i in node:
            for x in findkeys(i, kv):
               yield x
    elif isinstance(node, dict):
        if kv in node:
            yield node[kv]
        for j in node.values():
            for x in findkeys(j, kv):
                yield x

对于我的示例,我想要的输出是

pd.DataFrame(np.array([['e42', 'event', 'NaN'], ['v44', 'streq', '544']]), 
               columns=['description', 'funk', 'str/num'])

1 个答案:

答案 0 :(得分:0)

下面的代码将“有趣”键的值收集到字典中。

from collections import defaultdict

a = {'compatibility': {'schema': ['attribute_variables/evar44',
                                  'event42',
                                  'container_visitors'],
                       'status': 'valid',
                       'supported_features': ['function_and',
                                              'function_attr',
                                              'function_container',
                                              'function_event',
                                              'function_event-exists',
                                              'function_streq'],
                       'supported_products': ['o', 'data_warehouse', 'discover'],
                       'supported_schema': ['warehouse', 'n'],
                       'validator_version': '1.1.11'},
     'definition': {'container': {'context': 'visitors',
                                  'func': 'container',
                                  'pred': {'func': 'and',
                                           'preds': [{'description': 'e42',
                                                      'evt': {'func': 'event', 'name': 'metrics/event42'},
                                                      'func': 'event-exists'},
                                                     {'description': 'v44',
                                                      'func': 'streq',
                                                      'str': '544',
                                                      'val': {'func': 'attr', 'name': 'variables/evar44'}}]}},
                    'func': 'segment',
                    'version': [1, 0, 0]},
     'description': '',
     'id': 's2165c30c946ebceb',
     'modified': '12',
     'name': 'Apop',
     'owner': {'id': 84699, 'login': 'max', 'name': 'Max'},
     'reportSuiteName': 'App',
     'rsid': 'test',
     'siteTitle': 'App',
     'tags': []}


def walk_dict(d, interesting_keys, result, depth=0):
    for k, v in sorted(d.items(), key=lambda x: x[0]):
        if isinstance(v, dict):
            walk_dict(v, interesting_keys, result, depth + 1)
        elif isinstance(v,list):
            for entry in v:
                if isinstance(entry, dict):
                    walk_dict(entry, interesting_keys, result, depth + 1)
        else:
            if k in interesting_keys:
                result[k].append(v)


result = defaultdict(list)
walk_dict(a, ["description", "func", "str", "num"], result)
print(result)

输出

defaultdict(<class 'list'>, {'func': ['container', 'and', 'event', 'event-exists', 'streq', 'attr', 'segment'], 'description': ['e42', 'v44', ''], 'str': ['544']})