Python 3 - 基于字符串

时间:2016-12-07 05:06:27

标签: python string list dictionary filter

说我有字符串

"((attr1=25 and attr2=8) or attr3=15)"

"((attr1>25 and attr2<50) or (attr3=10 and attr4=20))"

"(attrXYZ=10)"

甚至

"(attr1=20 and attr2=20 and attr3=20 and attr4=20)"

包含字典的列表,其中每个字典可能有也可能没有字符串中的指定属性。 Python中是否有一种简单的方法可以过滤与此类字符串查询匹配的字典?

2 个答案:

答案 0 :(得分:1)

免责声明:这是一个非常懒惰和不安全的解决方案,它使用了Python中最不光彩的两个函数evalexec,但如果输出的形状与一个完全相同,则可以正常工作你提供的。

我们的策略是将输入编辑为类似于Python自然理解的语法,而不是创建我们自己的解析器。这样做,我们将使用dis模块(用于Python字节码的反汇编程序)来获取字符串中的所有名称。

import dis 

class Number:
    def __init__(self, n, exists=True):
        self.n = n
        self.exists = exists

    def __lt__(self, other):
        return self.n < other if self.exists else False

    def __le__(self, other):
        return self.n <= other if self.exists else False

    def __eq__(self, other):
        return self.n == other if self.exists else False

    def __ne__(self, other):
        return self.n != other if self.exists else False

    def __gt__(self, other):
        return self.n > other if self.exists else False

    def __ge__(self, other):
        return self.n >= other if self.exists else False


def clear_entries(entry):
    entry_output = entry.replace('!=', '<>').replace('=','==').replace('<>','!=')
    return entry_output

def check_condition(dict_, str_):
    str_ = clear_entries(str_)

    for k, v in dict_.items():
        exec("{0} = {1}".format(k, v))

    all_names = dis.Bytecode(str_).codeobj.co_names
    l_ = locals()
    non_defined_names = [v for v in all_names if v not in l_]

    for name in non_defined_names:
        exec("{0} = Number(0, exists=False)".format(name))  # the number value does not matter here (because of the 'exists' flag)

    if eval(str_):
        return True

    return False

测试

if __name__ == '__main__':
    entries = [
        "((attr1=25 and attr2=8) or attr3=15)",
        "((attr1>25 and attr2<50) or (attr3=10 and attr4=20))",
        "(2<attrXYZ<10)",
        "(attr1=20 and attr2=20 and attr3=20 and attr4=20)",
        "(attr1=20 or (attr2=20 and attr3=20 and attr4=20 and attr1231231=1))"
    ]

    dicts = [
        {'attr1': 25, 'attr2': 8, 'attr3': 123},
        {'attr1': 1, 'attr2': 8, 'attr3': 123},
        {'attr1': 26, 'attr2': 8, 'attr3': 123, 'attr4': 1},
        {'attr1': 1, 'attr2': 50, 'attr3': 1, 'attr4': 20},
        {'attr1': -1, 'attr2': 50, 'attr3': 1, 'attr4': 20},
        {'attrXYZ': 3},
        {'attrXYZ': 10},
        {'attr1': 20}

    ]

    for entry in entries:
        for d in dicts:
            print(check_condition(d, entry), '"{0}"'.format(entry), d)

结果

(True, '"((attr1=25 and attr2=8) or attr3=15)"', {'attr1': 25, 'attr2': 8, 'attr3': 123})
(False, '"((attr1=25 and attr2=8) or attr3=15)"', {'attr1': 1, 'attr2': 8, 'attr3': 123})
(False, '"((attr1=25 and attr2=8) or attr3=15)"', {'attr1': 26, 'attr2': 8, 'attr3': 123, 'attr4': 1})
(False, '"((attr1=25 and attr2=8) or attr3=15)"', {'attr1': 1, 'attr2': 50, 'attr3': 1, 'attr4': 20})
(False, '"((attr1=25 and attr2=8) or attr3=15)"', {'attr1': -1, 'attr2': 50, 'attr3': 1, 'attr4': 20})
(False, '"((attr1=25 and attr2=8) or attr3=15)"', {'attrXYZ': 3})
(False, '"((attr1=25 and attr2=8) or attr3=15)"', {'attrXYZ': 10})
(False, '"((attr1=25 and attr2=8) or attr3=15)"', {'attr1': 20})
(False, '"((attr1>25 and attr2<50) or (attr3=10 and attr4=20))"', {'attr1': 25, 'attr2': 8, 'attr3': 123})
(False, '"((attr1>25 and attr2<50) or (attr3=10 and attr4=20))"', {'attr1': 1, 'attr2': 8, 'attr3': 123})
(True, '"((attr1>25 and attr2<50) or (attr3=10 and attr4=20))"', {'attr1': 26, 'attr2': 8, 'attr3': 123, 'attr4': 1})
(False, '"((attr1>25 and attr2<50) or (attr3=10 and attr4=20))"', {'attr1': 1, 'attr2': 50, 'attr3': 1, 'attr4': 20})
(False, '"((attr1>25 and attr2<50) or (attr3=10 and attr4=20))"', {'attr1': -1, 'attr2': 50, 'attr3': 1, 'attr4': 20})
(False, '"((attr1>25 and attr2<50) or (attr3=10 and attr4=20))"', {'attrXYZ': 3})
(False, '"((attr1>25 and attr2<50) or (attr3=10 and attr4=20))"', {'attrXYZ': 10})
(False, '"((attr1>25 and attr2<50) or (attr3=10 and attr4=20))"', {'attr1': 20})
(False, '"(2<attrXYZ<10)"', {'attr1': 25, 'attr2': 8, 'attr3': 123})
(False, '"(2<attrXYZ<10)"', {'attr1': 1, 'attr2': 8, 'attr3': 123})
(False, '"(2<attrXYZ<10)"', {'attr1': 26, 'attr2': 8, 'attr3': 123, 'attr4': 1})
(False, '"(2<attrXYZ<10)"', {'attr1': 1, 'attr2': 50, 'attr3': 1, 'attr4': 20})
(False, '"(2<attrXYZ<10)"', {'attr1': -1, 'attr2': 50, 'attr3': 1, 'attr4': 20})
(True, '"(2<attrXYZ<10)"', {'attrXYZ': 3})
(False, '"(2<attrXYZ<10)"', {'attrXYZ': 10})
(False, '"(2<attrXYZ<10)"', {'attr1': 20})
(False, '"(attr1=20 and attr2=20 and attr3=20 and attr4=20)"', {'attr1': 25, 'attr2': 8, 'attr3': 123})
(False, '"(attr1=20 and attr2=20 and attr3=20 and attr4=20)"', {'attr1': 1, 'attr2': 8, 'attr3': 123})
(False, '"(attr1=20 and attr2=20 and attr3=20 and attr4=20)"', {'attr1': 26, 'attr2': 8, 'attr3': 123, 'attr4': 1})
(False, '"(attr1=20 and attr2=20 and attr3=20 and attr4=20)"', {'attr1': 1, 'attr2': 50, 'attr3': 1, 'attr4': 20})
(False, '"(attr1=20 and attr2=20 and attr3=20 and attr4=20)"', {'attr1': -1, 'attr2': 50, 'attr3': 1, 'attr4': 20})
(False, '"(attr1=20 and attr2=20 and attr3=20 and attr4=20)"', {'attrXYZ': 3})
(False, '"(attr1=20 and attr2=20 and attr3=20 and attr4=20)"', {'attrXYZ': 10})
(False, '"(attr1=20 and attr2=20 and attr3=20 and attr4=20)"', {'attr1': 20})
(False, '"(attr1=20 or (attr2=20 and attr3=20 and attr4=20 and attr1231231=1))"', {'attr1': 25, 'attr2': 8, 'attr3': 123})
(False, '"(attr1=20 or (attr2=20 and attr3=20 and attr4=20 and attr1231231=1))"', {'attr1': 1, 'attr2': 8, 'attr3': 123})
(False, '"(attr1=20 or (attr2=20 and attr3=20 and attr4=20 and attr1231231=1))"', {'attr1': 26, 'attr2': 8, 'attr3': 123, 'attr4': 1})
(False, '"(attr1=20 or (attr2=20 and attr3=20 and attr4=20 and attr1231231=1))"', {'attr1': 1, 'attr2': 50, 'attr3': 1, 'attr4': 20})
(False, '"(attr1=20 or (attr2=20 and attr3=20 and attr4=20 and attr1231231=1))"', {'attr1': -1, 'attr2': 50, 'attr3': 1, 'attr4': 20})
(False, '"(attr1=20 or (attr2=20 and attr3=20 and attr4=20 and attr1231231=1))"', {'attrXYZ': 3})
(False, '"(attr1=20 or (attr2=20 and attr3=20 and attr4=20 and attr1231231=1))"', {'attrXYZ': 10})
(True, '"(attr1=20 or (attr2=20 and attr3=20 and attr4=20 and attr1231231=1))"', {'attr1': 20})

答案 1 :(得分:1)

只有这样,如果你确定你的查询信息是安全的。

(编辑:你真的应该使用像pyparsing这样的东西,而不是做一些快速而又脏的东西。)

如果来源来自不受信任的输入,请对查询字符串使用exec

import re

QUERY_EXEC_RE = re.compile('(\w+)=')

def _matches(query_exec, d):
    a = []
    exec('a.append({0})'.format(query_exec), globals(), locals())
    return a[0]

def query_dicts(query, dicts):
    query_exec = QUERY_EXEC_RE.sub(r'd.get("\1") == ', query)
    return [d for d in dicts if _matches(query_exec, d)]

示例:

query = "((attr1=25 and attr2=8) or attr3=15)"
dicts = [
    dict(attr1=1, attr2=2, attr3=3),
    dict(attr1=25, attr2=7, attr3=12),
    dict(attr1=24, attr2=8, attr3=13),
    dict(attr1=25, attr2=8, attr3=14),
    dict(attr1=5, attr2=1, attr3=15),
    dict(attr3=15),
    dict(attr1=25, attr2=8),
]
answer = query_dicts(query, dicts)
print(answer)

[{'attr1': 25, 'attr2': 8, 'attr3': 14},
 {'attr1': 5, 'attr2': 1, 'attr3': 15},
 {'attr3': 15},
 {'attr1': 25, 'attr2': 8}]