import time
with open('txt.txt', 'r') as myfile:
data=myfile.read().replace('\n', '')
pdf_content = data.split(" ")`
vocab = input('Vocab word to search for: ')
if vocab not in pdf_content:
print('word not found....')
if vocab in pdf_content:
for vocab in pdf_content:
print((" ".join(pdf_content[1])))
time.sleep(200)
我想基本上在文本主体中搜索某个单词,然后返回围绕该1个单词的一组单词。
例如你的段落是那只敏捷的棕色狐狸跳过了那只懒狗 并且我们想搜索棕色,它将返回快速的棕色狐狸,因为这些是周围的单词。我不确定如何执行此操作,但会非常感谢您的帮助
答案 0 :(得分:3)
您可以使用正则表达式:
import re
text = 'the quick brown fox jumped over the lazy dog'
word = "brown"
for match in re.finditer(r"\w+\W+{}\W+\w+".format(word), text):
print(match.group())
输出
quick brown fox
正则表达式
\w+
匹配一个单词\W+
后跟一个或多个不是单词的字符'brown'
答案 1 :(得分:0)
或单线:
print(' '.join(s.split()[s.split().index(s2)-1:s.split().index(s2)+2]))
s = 'the quick brown fox jumped over the lazy dog'
s2 = "brown"
print(' '.join(s.split()[s.split().index(s2)-1:s.split().index(s2)+2]))
说明:
将index-1的字符串连接到index + 1(在这种情况下为+2)
index表示s2
的拆分列表中s
的索引
答案 2 :(得分:0)
尝试拆分单词并使用索引:
pdf_content = "the quick brown fox jumps over the lazy dog"
word = "brown"
words = pdf_content.split()
pos = words.index(word)
found = word
if pos > 0:
found = words[pos - 1] + " " + found
if pos < len(words) - 1:
found = found + " " + words[pos + 1]
print(found)