def concordance(string, search_term, width=80):
offset = 0
indexes = []
while offset < len(string):
try:
position = string[offset:].lower().index(search_term.lower())
except ValueError:
break
if position:
indexes.append(position + offset)
offset += position + len(search_term)
return tuple(string[index-width:index+width+len(search_term)]
string = 'The relationship between beef and beef broth, is not like the
relationship between beef and beef-broth, a simpler extraction and
condensation; rather, as Einstein goes on, it is like the relationship
between our overcoat and the ticket given us when we check our overcoat. In
other words, human perception involves coding even more than crude
sensing.just as a map-maker colors a nation purple not because it is purple
but because his code demands it."
当我尝试用search_term为“ and”调用调和函数时,结果不仅返回“ and”,而且还返回其中包含and的单词,例如“ demand”。我如何更改代码,使其仅返回“和”?
答案 0 :(得分:0)
:您可以尝试使用re
module。在这里,我只会在search_term
以外的字符包围着您的[a-zA-Z0-9_]
的情况下(不确定是否最适合您的意图...):
import re
def concordance(strg, search_term, width=5):
ret = []
# \W Matches any character which is not a word character.
# ~ anything that is not [a-zA-Z0-9_]
rgx = re.compile('\W{}\W'.format(search_term))
match_end = 0
while True:
match = rgx.search(strg, match_end)
if match is None:
break
ret.append(strg[match.start()+1-width:match.end()-1+width])
match_end = match.end()
return ret
print(concordance(strg, 'and'))
# ['beef and beef', 'beef and beef', 'tion and\ncond', 'coat and the ']