我正在尝试提取包含字符串的字典
字典在下面
[ { "id": "1", "name": "A", "businessArea": [ "Accounting" ], "Designation": [ "L2" ], "Location":"NY" },
{ "id": "2", "name": "B", "businessArea": [ "Engineering" ], "Role": [ "Tester","Developer" ], "Designation": [ "L1" ],"Location":"CA" },
{ "id": "3", "name": "C", "businessArea": [ "Engineering" ], "Role": [ "Developer" ], "Designation": [ "L1" ],"Location":"NY" }]
我正在尝试提取
businessArea
是Engineering
,Role
,即Tester or Developer
和Designation
是L1
和Location
是NY
代码在下面
def get_set(d,field):
return {d[field]} if isinstance(d[field], str) else set(d[field])
# we use this to filter
def validate(d):
if 'Role' in d or `businessArea` in d or `Designation` in d or `Location` in d :
return get_set(d,'Role').intersection({'Developer','Tester'}) and \
get_set(d,'businessArea').intersection({'Engineering'}) and \
get_set(d,'Designation').intersection({'L1'}) and \
get_set(d,'Location').intersection({'NY'})
result = [d for d in test if validate(d)]
预期为[{ "id": "3", "name": "C", "businessArea": [ "Engineerring" ], "Role": [ "Developer" ], "Designation": [ "L1" ],"Location":"NY" }]
再添加一本字典进行测试
[{ 'id': '1', 'name': 'Group1', 'BusinessArea': [ { 'id': '14', 'name': 'Accounting' }, { 'id': '3', 'name': 'Accounting' } ],'Designation': [ { 'id': '16', 'name': 'L1' }, { 'id': '20', 'name': 'L2' }, { 'id': '25', 'name': 'L2' }, ] }, { 'id': '2', 'name': 'Group1', 'BusinessArea': [ { 'id': '14', 'name': 'Research' }, { 'id': '3', 'name': 'Accounting' } ], 'Role': [ { 'id': '5032', 'name': 'Tester' }, { 'id': '5033', 'name': 'Developer' } ], 'Designation': [ { 'id': '16', 'name': 'L1' }, { 'id': '20', 'name': 'L2' }, { 'id': '25', 'name': 'L2' }, ] }, { 'id': '1', 'name': 'Group1', 'BusinessArea': [ { 'id': '14', 'name': 'Research' }, { 'id': '3', 'name': 'Accounting' } ], 'Role': [ { 'id': '5032', 'name': 'Developer' }, { 'id': '5033', 'name': 'Developer' } ], 'Designation': [ { 'id': '16', 'name': 'L1' }, { 'id': '20', 'name': 'L2' }, { 'id': '25', 'name': 'L2' }] }]
第二本要测试的字典
[{'_index': '1',
'_type': '_doc',
'_id': '1',
'_score': 1.0,
'_source': {'id': '1',
'name': 'A',
'businessArea': [{'id': '25', 'name': 'Accounting'}],
'Role': ['Developer'],
'Designation': ['L2'],
'Location': 'NY'}},
{'_index': '1',
'_type': '_doc',
'_id': '2',
'_score': 1.0,
'_source': {'id': '2',
'name': 'B',
'businessArea': [{'id': '25', 'name': 'Engineering'}],
'Role': ['Tester', 'Developer'],
'Designation': ['L1'],
'Location': 'NY'}},
{'_index': '1',
'_type': '_doc',
'_id': '3',
'_score': 1.0,
'_source': {'id': '3',
'name': 'C',
'businessArea': [{'id': '25', 'name': 'Engineering'}],
'Role': ['Tester', 'Developer'],
'Designation': ['L1'],
'Location': 'NY'}}]
答案 0 :(得分:1)
尝试这样:
myList = [ { "id": "1", "name": "A", "businessArea": [ "Accounting" ], "Designation": [ "L2" ], "Location":"NY" },
{ "id": "2", "name": "B", "businessArea": [ "Engineerring" ], "Role": [ "Tester","Developer" ], "Designation": [ "L1" ],"Location":"CA" },
{ "id": "3", "name": "C", "businessArea": [ "Engineerring" ], "Role": [ "Developer" ], "Designation": [ "L1" ],"Location":"NY" }]
for x in myList:
if (("Tester" in x['Role'] or "Developer" in x['Role']) and (x["Designation"] == "L1")) and (x["Location"] == "NY"):
print(x)
答案 1 :(得分:1)
修正原始代码
您可以通过在函数validate中的if语句中替换或并使其与之一起运行,从而使代码运行
def get_set(d,field):
return {d[field]} if isinstance(d[field], str) else set(d[field])
# we use this to filter
def validate(d):
# change or to and in if statement
if 'Role' in d and 'businessArea' in d and 'Designation' in d and 'Location' in d :
return get_set(d,'Role').intersection({'Developer','Tester'}) and \
get_set(d,'businessArea').intersection({'Engineering'}) and \
get_set(d,'Designation').intersection({'L1'}) and \
get_set(d,'Location').intersection({'NY'})
test = [ { "id": "1", "name": "A", "businessArea": [ "Accounting" ], "Designation": [ "L2" ], "Location":"NY" },
{ "id": "2", "name": "B", "businessArea": [ "Engineering" ], "Role": [ "Tester","Developer" ], "Designation": [ "L1" ],"Location":"CA" },
{ "id": "3", "name": "C", "businessArea": [ "Engineering" ], "Role": [ "Developer" ], "Designation": [ "L1" ],"Location":"NY" }]
result = [d for d in test if validate(d)]
print(result)
输出
[{'id': '3',
'name': 'C',
'businessArea': ['Engineering'],
'Role': ['Developer'],
'Designation': ['L1'],
'Location': 'NY'}]
处理嵌套词典的新算法
def contains(item, field, values):
'''
Search through nested dictionary starting a key field
to find if a value is in values
'''
if field:
item = item.get(field) # Get value of field
if item is None:
return None # Done if field not found
if isinstance(item, list):
return any(contains(v, None, values) for v in item) # recursively check if any value in list in values
if isinstance(item, dict):
return any(contains(v, None, values) for v in item.values()) # recursively check if any value in dictionary in values
return item in values # Not list or dict, so check if item in values
def validate(d):
return contains(d, 'Role', {'Developer','Tester'}) and \
contains(d, 'businessArea', {'Engineering'}) and \
contains(d, 'Designation', {'L1'}) and \
contains(d, 'Location', {'NY'})
测试
测试1
test = [ { "id": "1", "name": "A", "businessArea": [ "Accounting" ], "Designation": [ "L2" ], "Location":"NY" },
{ "id": "2", "name": "B", "businessArea": [ "Engineering" ], "Role": [ "Tester","Developer" ], "Designation": [ "L1" ],"Location":"CA" },
{ "id": "3", "name": "C", "businessArea": [ "Engineering" ], "Role": [ "Developer" ], "Designation": [ "L1" ],"Location":"NY" }]
输出1
[{'id': '3', 'name': 'C', 'businessArea': ['Engineering'], 'Role': ['Developer'], 'Designation': ['L1'], 'Location': 'NY'}]
测试2
test = [{'id': '1', 'name': 'A', 'businessArea':[{'id': '25', 'name': 'Accounting'}], 'Role': ['Developer'], 'Designation': ['L2'], 'Location': 'NY'}, {'id': '2', 'name': 'B', 'businessArea':[{'id': '25', 'name': 'Engineering'}], 'Role': ['Tester', 'Developer'], 'Designation': ['L1'], 'Location': 'NY'}, {'id': '3', 'name': 'C', 'businessArea':[{'id': '25', 'name': 'Engineering'}], 'Role': ['Tester', 'Developer'], 'Designation': ['L1'], 'Location': 'NY'}]
result = [d for d in t if validate(d)]
print(result)
输出2
[{'id': '2', 'name': 'B', 'businessArea': [{'id': '25', 'name': 'Engineering'}], 'Role': ['Tester', 'Developer'], 'Designation': ['L1'], 'Location': 'NY'}, {'id': '3', 'name': 'C', 'businessArea': [{'id': '25', 'name': 'Engineering'}], 'Role': ['Tester', 'Developer'], 'Designation': ['L1'], 'Location': 'NY'}]
测试3
test = [{ 'id': '1', 'name': 'Group1', 'BusinessArea': [ { 'id': '14', 'name': 'Accounting' }, { 'id': '3', 'name': 'Accounting' } ],'Designation': [ { 'id': '16', 'name': 'L1' }, { 'id': '20', 'name': 'L2' }, { 'id': '25', 'name': 'L2' }, ] }, { 'id': '2', 'name': 'Group1', 'BusinessArea': [ { 'id': '14', 'name': 'Research' }, { 'id': '3', 'name': 'Accounting' } ], 'Role': [ { 'id': '5032', 'name': 'Tester' }, { 'id': '5033', 'name': 'Developer' } ], 'Designation': [ { 'id': '16', 'name': 'L1' }, { 'id': '20', 'name': 'L2' }, { 'id': '25', 'name': 'L2' }, ] }, { 'id': '1', 'name': 'Group1', 'BusinessArea': [ { 'id': '14', 'name': 'Research' }, { 'id': '3', 'name': 'Accounting' } ], 'Role': [ { 'id': '5032', 'name': 'Developer' }, { 'id': '5033', 'name': 'Developer' } ], 'Designation': [ { 'id': '16', 'name': 'L1' }, { 'id': '20', 'name': 'L2' }, { 'id': '25', 'name': 'L2' }] }]
result = [d for d in t if validate(d)]
print(result)
输出3
[{'id': '2', 'name': 'B', 'businessArea': [{'id': '25', 'name': 'Engineering'}], 'Role': ['Tester', 'Developer'], 'Designation': ['L1'], 'Location': 'NY'}, {'id': '3', 'name': 'C', 'businessArea': [{'id': '25', 'name': 'Engineering'}], 'Role': ['Tester', 'Developer'], 'Designation': ['L1'], 'Location': 'NY'}]
测试4 (有问题的第二本词典)
test = [{'_index': '1',
'_type': '_doc',
'_id': '1',
'_score': 1.0,
'_source': {'id': '1',
'name': 'A',
'businessArea': [{'id': '25', 'name': 'Accounting'}],
'Role': ['Developer'],
'Designation': ['L2'],
'Location': 'NY'}},
{'_index': '1',
'_type': '_doc',
'_id': '2',
'_score': 1.0,
'_source': {'id': '2',
'name': 'B',
'businessArea': [{'id': '25', 'name': 'Engineering'}],
'Role': ['Tester', 'Developer'],
'Designation': ['L1'],
'Location': 'NY'}},
{'_index': '1',
'_type': '_doc',
'_id': '3',
'_score': 1.0,
'_source': {'id': '3',
'name': 'C',
'businessArea': [{'id': '25', 'name': 'Engineering'}],
'Role': ['Tester', 'Developer'],
'Designation': ['L1'],
'Location': 'NY'}}]
result = [d for d in t if validate(d)]
print(result)
输出
[{'id': '2', 'name': 'B', 'businessArea': [{'id': '25', 'name': 'Engineering'}], 'Role': ['Tester', 'Developer'], 'Designation': ['L1'], 'Location': 'NY'}, {'id': '3', 'name': 'C', 'businessArea': [{'id': '25', 'name': 'Engineering'}], 'Role': ['Tester', 'Developer'], 'Designation': ['L1'], 'Location': 'NY'}]
答案 2 :(得分:0)
您的示例有错字(engineerring
)
[ { "id": "1", "name": "A", "businessArea": [ "Accounting" ], "Designation": [ "L2" ], "Location":"NY" },
{ "id": "2", "name": "B", "businessArea": [ "Engineering" ], "Role": [ "Tester","Developer" ], "Designation": [ "L1" ],"Location":"CA" },
{ "id": "3", "name": "C", "businessArea": [ "Engineering" ], "Role": [ "Developer" ], "Designation": [ "L1" ],"Location":"NY" }]