说我有字符串
"((attr1=25 and attr2=8) or attr3=15)"
或
"((attr1>25 and attr2<50) or (attr3=10 and attr4=20))"
或
"(attrXYZ=10)"
甚至
"(attr1=20 and attr2=20 and attr3=20 and attr4=20)"
包含字典的列表,其中每个字典可能有也可能没有字符串中的指定属性。 Python中是否有一种简单的方法可以过滤与此类字符串查询匹配的字典?
答案 0 :(得分:1)
我们的策略是将输入编辑为类似于Python自然理解的语法,而不是创建我们自己的解析器。这样做,我们将使用dis模块(用于Python字节码的反汇编程序)来获取字符串中的所有名称。
import dis
class Number:
def __init__(self, n, exists=True):
self.n = n
self.exists = exists
def __lt__(self, other):
return self.n < other if self.exists else False
def __le__(self, other):
return self.n <= other if self.exists else False
def __eq__(self, other):
return self.n == other if self.exists else False
def __ne__(self, other):
return self.n != other if self.exists else False
def __gt__(self, other):
return self.n > other if self.exists else False
def __ge__(self, other):
return self.n >= other if self.exists else False
def clear_entries(entry):
entry_output = entry.replace('!=', '<>').replace('=','==').replace('<>','!=')
return entry_output
def check_condition(dict_, str_):
str_ = clear_entries(str_)
for k, v in dict_.items():
exec("{0} = {1}".format(k, v))
all_names = dis.Bytecode(str_).codeobj.co_names
l_ = locals()
non_defined_names = [v for v in all_names if v not in l_]
for name in non_defined_names:
exec("{0} = Number(0, exists=False)".format(name)) # the number value does not matter here (because of the 'exists' flag)
if eval(str_):
return True
return False
if __name__ == '__main__':
entries = [
"((attr1=25 and attr2=8) or attr3=15)",
"((attr1>25 and attr2<50) or (attr3=10 and attr4=20))",
"(2<attrXYZ<10)",
"(attr1=20 and attr2=20 and attr3=20 and attr4=20)",
"(attr1=20 or (attr2=20 and attr3=20 and attr4=20 and attr1231231=1))"
]
dicts = [
{'attr1': 25, 'attr2': 8, 'attr3': 123},
{'attr1': 1, 'attr2': 8, 'attr3': 123},
{'attr1': 26, 'attr2': 8, 'attr3': 123, 'attr4': 1},
{'attr1': 1, 'attr2': 50, 'attr3': 1, 'attr4': 20},
{'attr1': -1, 'attr2': 50, 'attr3': 1, 'attr4': 20},
{'attrXYZ': 3},
{'attrXYZ': 10},
{'attr1': 20}
]
for entry in entries:
for d in dicts:
print(check_condition(d, entry), '"{0}"'.format(entry), d)
(True, '"((attr1=25 and attr2=8) or attr3=15)"', {'attr1': 25, 'attr2': 8, 'attr3': 123})
(False, '"((attr1=25 and attr2=8) or attr3=15)"', {'attr1': 1, 'attr2': 8, 'attr3': 123})
(False, '"((attr1=25 and attr2=8) or attr3=15)"', {'attr1': 26, 'attr2': 8, 'attr3': 123, 'attr4': 1})
(False, '"((attr1=25 and attr2=8) or attr3=15)"', {'attr1': 1, 'attr2': 50, 'attr3': 1, 'attr4': 20})
(False, '"((attr1=25 and attr2=8) or attr3=15)"', {'attr1': -1, 'attr2': 50, 'attr3': 1, 'attr4': 20})
(False, '"((attr1=25 and attr2=8) or attr3=15)"', {'attrXYZ': 3})
(False, '"((attr1=25 and attr2=8) or attr3=15)"', {'attrXYZ': 10})
(False, '"((attr1=25 and attr2=8) or attr3=15)"', {'attr1': 20})
(False, '"((attr1>25 and attr2<50) or (attr3=10 and attr4=20))"', {'attr1': 25, 'attr2': 8, 'attr3': 123})
(False, '"((attr1>25 and attr2<50) or (attr3=10 and attr4=20))"', {'attr1': 1, 'attr2': 8, 'attr3': 123})
(True, '"((attr1>25 and attr2<50) or (attr3=10 and attr4=20))"', {'attr1': 26, 'attr2': 8, 'attr3': 123, 'attr4': 1})
(False, '"((attr1>25 and attr2<50) or (attr3=10 and attr4=20))"', {'attr1': 1, 'attr2': 50, 'attr3': 1, 'attr4': 20})
(False, '"((attr1>25 and attr2<50) or (attr3=10 and attr4=20))"', {'attr1': -1, 'attr2': 50, 'attr3': 1, 'attr4': 20})
(False, '"((attr1>25 and attr2<50) or (attr3=10 and attr4=20))"', {'attrXYZ': 3})
(False, '"((attr1>25 and attr2<50) or (attr3=10 and attr4=20))"', {'attrXYZ': 10})
(False, '"((attr1>25 and attr2<50) or (attr3=10 and attr4=20))"', {'attr1': 20})
(False, '"(2<attrXYZ<10)"', {'attr1': 25, 'attr2': 8, 'attr3': 123})
(False, '"(2<attrXYZ<10)"', {'attr1': 1, 'attr2': 8, 'attr3': 123})
(False, '"(2<attrXYZ<10)"', {'attr1': 26, 'attr2': 8, 'attr3': 123, 'attr4': 1})
(False, '"(2<attrXYZ<10)"', {'attr1': 1, 'attr2': 50, 'attr3': 1, 'attr4': 20})
(False, '"(2<attrXYZ<10)"', {'attr1': -1, 'attr2': 50, 'attr3': 1, 'attr4': 20})
(True, '"(2<attrXYZ<10)"', {'attrXYZ': 3})
(False, '"(2<attrXYZ<10)"', {'attrXYZ': 10})
(False, '"(2<attrXYZ<10)"', {'attr1': 20})
(False, '"(attr1=20 and attr2=20 and attr3=20 and attr4=20)"', {'attr1': 25, 'attr2': 8, 'attr3': 123})
(False, '"(attr1=20 and attr2=20 and attr3=20 and attr4=20)"', {'attr1': 1, 'attr2': 8, 'attr3': 123})
(False, '"(attr1=20 and attr2=20 and attr3=20 and attr4=20)"', {'attr1': 26, 'attr2': 8, 'attr3': 123, 'attr4': 1})
(False, '"(attr1=20 and attr2=20 and attr3=20 and attr4=20)"', {'attr1': 1, 'attr2': 50, 'attr3': 1, 'attr4': 20})
(False, '"(attr1=20 and attr2=20 and attr3=20 and attr4=20)"', {'attr1': -1, 'attr2': 50, 'attr3': 1, 'attr4': 20})
(False, '"(attr1=20 and attr2=20 and attr3=20 and attr4=20)"', {'attrXYZ': 3})
(False, '"(attr1=20 and attr2=20 and attr3=20 and attr4=20)"', {'attrXYZ': 10})
(False, '"(attr1=20 and attr2=20 and attr3=20 and attr4=20)"', {'attr1': 20})
(False, '"(attr1=20 or (attr2=20 and attr3=20 and attr4=20 and attr1231231=1))"', {'attr1': 25, 'attr2': 8, 'attr3': 123})
(False, '"(attr1=20 or (attr2=20 and attr3=20 and attr4=20 and attr1231231=1))"', {'attr1': 1, 'attr2': 8, 'attr3': 123})
(False, '"(attr1=20 or (attr2=20 and attr3=20 and attr4=20 and attr1231231=1))"', {'attr1': 26, 'attr2': 8, 'attr3': 123, 'attr4': 1})
(False, '"(attr1=20 or (attr2=20 and attr3=20 and attr4=20 and attr1231231=1))"', {'attr1': 1, 'attr2': 50, 'attr3': 1, 'attr4': 20})
(False, '"(attr1=20 or (attr2=20 and attr3=20 and attr4=20 and attr1231231=1))"', {'attr1': -1, 'attr2': 50, 'attr3': 1, 'attr4': 20})
(False, '"(attr1=20 or (attr2=20 and attr3=20 and attr4=20 and attr1231231=1))"', {'attrXYZ': 3})
(False, '"(attr1=20 or (attr2=20 and attr3=20 and attr4=20 and attr1231231=1))"', {'attrXYZ': 10})
(True, '"(attr1=20 or (attr2=20 and attr3=20 and attr4=20 and attr1231231=1))"', {'attr1': 20})
答案 1 :(得分:1)
(编辑:你真的应该使用像pyparsing这样的东西,而不是做一些快速而又脏的东西。)
如果来源来自不受信任的输入,请不对查询字符串使用exec
。
import re
QUERY_EXEC_RE = re.compile('(\w+)=')
def _matches(query_exec, d):
a = []
exec('a.append({0})'.format(query_exec), globals(), locals())
return a[0]
def query_dicts(query, dicts):
query_exec = QUERY_EXEC_RE.sub(r'd.get("\1") == ', query)
return [d for d in dicts if _matches(query_exec, d)]
示例:
query = "((attr1=25 and attr2=8) or attr3=15)"
dicts = [
dict(attr1=1, attr2=2, attr3=3),
dict(attr1=25, attr2=7, attr3=12),
dict(attr1=24, attr2=8, attr3=13),
dict(attr1=25, attr2=8, attr3=14),
dict(attr1=5, attr2=1, attr3=15),
dict(attr3=15),
dict(attr1=25, attr2=8),
]
answer = query_dicts(query, dicts)
print(answer)
[{'attr1': 25, 'attr2': 8, 'attr3': 14},
{'attr1': 5, 'attr2': 1, 'attr3': 15},
{'attr3': 15},
{'attr1': 25, 'attr2': 8}]