我定义了这个函数来查找文本文件中的关键字但是,现在我想得到一个元组,其中包含文件中关键字之前和之后的单词,我不知道该怎么做
def findProperWords(paragraphs, excludedWords):
key = [] # an empty list
count = 0
for paragraph in paragraphs: #calling of every paaragraph in the textfile
count += 1 #counting each paragraph
words = list(paragraph.split(' '))
# spliting each paragraph into a list of words
for keys in words:
if len(keys) > 0:
if keys[0] == keys[0].upper():
#checking for words that start with capital letters
if keys.lower() not in excludedWords:
key.append(keys) # creating a list of the key words
index = paragraph.find(keys)
# finding the position of each key word in the textile
答案 0 :(得分:0)
试试这个,但请注意,它只能找到段落中的上一个和下一个单词。如果您希望它在上一个/下一个段落中找到结果,请考虑创建一个大的单词列表(如果内存限制允许)或迭代到新段落更新前一个段落以及在段落的最后一次迭代时保存最后一个单词以供以后使用
def findProperWords(paragraphs, excludedWords):
key = [] # an empty list
count = 0
for paragraph in paragraphs: #calling of every paaragraph in the textfile
count += 1 #counting each paragraph
words = list(paragraph.split(' '))
# spliting each paragraph into a list of words
for idx,keys in enumerate(words):
if len(keys) > 0:
if keys[0] == keys[0].upper():
#checking for words that start with capital letters
if keys.lower() not in excludedWords:
key.append(keys) # creating a list of the key words
index = paragraph.find(keys)
# finding the position of each key word in the textile
if idx > 0:
word_before = words[idx-1]
if idx < len(words) -2:
word_after = words[idx+1]