在以下代码中,如果我只在列表 规则 中添加一个元素,则处理时间会呈指数级增长:
def rules_generator():
generated_rules = dict()
dictionary = {'<PLACE>': ['hospital', 'market'], '<DRINK>': ['Fanta', 'beer']}
rules = ['go to <PLACE>', '<DRINK> for drinking in <PLACE>', 'love it']
for r in rules:
rule = r
if re.search('\<[A-Z]+?\>', r):
node_list = re.findall('\<[A-Z]+?\>', r)
for node in node_list:
expanded_node = '(' + '|'.join(dictionary[node]) + ')'
r = re.sub(node, expanded_node, r)
generated_rules[rule] = r
else:
generated_rules[rule] = r
return generated_rules
def analyzer(text):
generated_rules = rules_generator()
for rule, expanded_rule in generated_rules.items():
if re.search(expanded_rule,text):
print True
corpus = open('corpus.txt', 'r')
for l in corpus:
print analyzer(l)