因此,我们有两种类型的关键字:
!
开头的关键字,此关键字应包含在文本中@!
开头的关键字,不应在文本中看到此关键字我们要检查给定关键字是否有至少一个有效模式。可能出现了很多关键字,找到任何有效的模式对我们来说都很好。
示例1: 关键字:['!A','!C'] 文字:
Multiline text ...
A
Some other text
C
预期结果:正确
示例2: 关键字:['!A','@!B','!C'] 文字:
Multiline text ...
A
Some other text
B
C
预期结果:错误,因为B放在A和C之间
示例3: 关键字:['!A','@!B','!C'] 文字:
Multiline text ...
A
Some other text
B
A
C
预期结果:是的,因为第二个A后面没有B,而第二个A之后是C。
到目前为止,我已经尝试过(没有运气):
方法1的示例函数
def contain_keywords(content, keywords):
content = str(content)
regex_builder = []
or_keyword = False
for keyword in keywords:
if keyword.startswith("@!"):
reg = '[^%s]' % re.escape(keyword[2:])
regex_builder.append(reg)
elif keyword.startswith("!"):
reg = '(%s)' % re.escape(keyword[1:])
regex_builder.append(reg)
pattern = r'.*%s.*' % ('([\s\S])*'.join(regex_builder))
res = re.search(pattern, content)
return res is not None
方法2的示例函数
def contain_keywords2(content, keywords, offset=0, keyword_index=0):
content = str(content)
valid_pattern = True
or_keyword = False
if keyword_index >= len(keywords) or offset >= len(content):
return True
for keyword_index, keyword in enumerate(keywords[keyword_index:]):
keyword = keyword.strip()
if keyword.startswith("@!"):
reg = keyword[2:]
location = content[offset:].find(reg)
if location != -1:
return False
valid_pattern = contain_keywords2(content, keywords, offset=offset, keyword_index=keyword_index+1)
if not valid_pattern:
return False
elif keyword.startswith("!"):
reg = keyword[1:]
location = content[offset:].find(reg)
print(location + offset)
if location == -1:
return False
if keyword_index + 1 >= len(keywords):
break
valid_pattern = contain_keywords2(content, keywords, offset=offset + location + len(reg), keyword_index=keyword_index+1)
if not valid_pattern:
return False
return valid_pattern
答案 0 :(得分:0)
由于没有人回答,我将发布解决方案:
def contain_keywords2(content_text, keywords, offset=0, keyword_index=0):
or_keyword = False
if keyword_index >= len(keywords) or offset >= len(content_text):
return True
for loop_keyword_index, keyword in enumerate(keywords[keyword_index:]):
keyword = keyword.lstrip()
if keyword.startswith("@!"):
reg = keyword[2:]
# Don't look whole file, just search it in at last 10 lines:
# If you want to search all over the file, remove these 3 lines (including pos_of_tens_line at location ...)
pos_of_tens_line = findnth(content_text[offset:], '\n', 10)
if pos_of_tens_line == -1:
pos_of_tens_line = len(content_text)
location = content_text.find(reg, offset, offset + pos_of_tens_line)
if location != -1:
return False
return contain_keywords2(content_text, keywords, offset=offset,
keyword_index=keyword_index + loop_keyword_index + 1)
elif keyword.startswith("!"):
reg = keyword[1:].strip()
for keyword_positions in list(find_all(content_text[offset:], reg)):
valid_pattern = contain_keywords2(content_text, keywords,
offset=(offset + keyword_positions + len(reg)),
keyword_index=(keyword_index + loop_keyword_index + 1))
if valid_pattern:
return True
return False
elif keyword.startswith('|!'):
or_keyword = True
break
if or_keyword:
for keyword in keywords:
if keyword.startswith('|!'):
if keyword[2:] in content_text:
return True
return False