我有以下json对象:
[{
'firstname': 'Jimmie',
'lastname': 'Barninger',
'zip_code': 12345,
'colors': ['2014-01-01', '2015-01-01'],
'ids': {
'44': 'OK',
'51': 'OK'
},
'address': {
'state': 'MI',
'town': 'Dearborn'
},
'other': {
'ids': {
'1': 'OK',
'103': 'OK'
},
}
}, {
'firstname': 'John',
'lastname': 'Doe',
'zip_code': 90027,
'colors': None,
'ids': {
'91': 'OK',
'103': 'OK'
},
'address': {
'state': 'CA',
'town': 'Los Angeles'
},
'other': {
'ids': {
'91': 'OK',
'103': 'OK'
},
}
}]
我希望能够获得每个字典具有的唯一键值的数量。在上面,数字为:
address: 2 # ['state', 'town']
ids: 4 # ['44', '51', '91', '103']
other.ids 3 # ['1', '103', '91']
我一直很难遍历对象以解决此问题,尤其是在列表中有一个项目的情况下。到目前为止,我一直在尝试以下操作,尽管它目前无法正常工作,但我正在粘贴以供参考:
def count_per_key(obj, _c=None):
if _c is None: unique_values_per_key = {}
if isinstance(obj, list):
return [count_per_key(l) for l in obj]
elif not isinstance(obj, dict):
pass
else:
for key, value in obj.items():
if not isinstance(value, dict):
continue
elif isinstance(value, dict):
if key not in unique_values_per_key: unique_values_per_key[key] = set()
unique_values_per_key[key].union(set(value.keys()))
return count_per_key(value)
elif isinstance(value, list):
return [count_per_key(o) for o in value]
return unique_values_per_key
答案 0 :(得分:1)
您可以将递归与生成器一起使用:
from collections import defaultdict
d = [{'firstname': 'Jimmie', 'lastname': 'Barninger', 'zip_code': 12345, 'colors': ['2014-01-01', '2015-01-01'], 'ids': {'44': 'OK', '51': 'OK'}, 'address': {'state': 'MI', 'town': 'Dearborn'}, 'other': {'ids': {'1': 'OK', '103': 'OK'}}}, {'firstname': 'John', 'lastname': 'Doe', 'zip_code': 90027, 'colors': None, 'ids': {'91': 'OK', '103': 'OK'}, 'address': {'state': 'CA', 'town': 'Los Angeles'}, 'other': {'ids': {'91': 'OK', '103': 'OK'}}}]
def get_vals(d, _path = []):
for a, b in getattr(d, 'items', lambda :{})():
if a in {'ids', 'address'}:
yield ['.'.join(_path+[a]), list(b.keys())]
else:
yield from get_vals(b, _path+[a])
c = defaultdict(list)
results = [i for b in d for i in get_vals(b)]
for a, b in results:
c[a].extend(b)
_r = [[a, set(list(b))] for a, b in c.items()]
new_r = [[a, b, len(b)] for a, b in _r]
输出:
[
['ids', {'91', '44', '51', '103'}, 4],
['address', {'state', 'town'}, 2],
['other.ids', {'1', '91', '103'}, 3]
]
答案 1 :(得分:1)
l= [{'firstname': 'Jimmie', 'lastname': 'Barninger', 'zip_code': 12345, 'colors': ['2014-01-01', '2015-01-01'], 'ids': {'44': 'OK', '51': 'OK'}, 'address': {'state': 'MI', 'town': 'Dearborn'}, 'other': {'ids': {'1': 'OK', '103': 'OK'}}}, {'firstname': 'John', 'lastname': 'Doe', 'zip_code': 90027, 'colors': None, 'ids': {'91': 'OK', '103': 'OK'}, 'address': {'state': 'CA', 'town': 'Los Angeles'}, 'other': {'ids': {'91': 'OK', '103': 'OK'}}}]
def find_dicts(d,parent=''):
for k,v in d.items():
if isinstance(v,dict):
if parent is not '':
identifier=str(parent)+'.'+str(k)
else:
identifier=str(k)
yield {identifier:[x for x in v.keys()]}
yield from find_dicts(v,k)
else:
pass
s=[list(find_dicts(d)) for d in l]
dict_names=[list(y.keys())[0] for y in s[0]]
final_dict={name:[] for name in dict_names}
for li in s:
for di in li:
di_key=list(di.keys())[0]
di_values=list(di.values())[0]
for k,v in final_dict.items():
if k == di_key:
for value in di_values:
if value not in final_dict[k]:
final_dict[k].append(value)
for k,v in final_dict.items():
print(k,":",len(v),v)
输出
ids : 4 ['44', '51', '91', '103']
address : 2 ['town', 'state']
other.ids : 3 ['103', '1', '91']
other : 1 ['ids']