from nltk.tokenize import word_tokenize
from nltk.tokenize import PunktSentenceTokenizer
#Other lines of code
question = ['Add a device under the name karthik']
responses = ['Added']
if user_text in question:
token = word_tokenize(user_text)
custom_sent_tokenizer = PunktSentenceTokenizer([question])
tokenized = custom_sent_tokenizer.tokenize(user_text)
for i in tokenized[:5]:
words = nltk.word_tokenize(i)
tagged = nltk.pos_tag(words)
print(tagged)
执行后,我得到了具有等效POS标签的标记化单词。现在我需要知道如何搜索特定的POS标签,以便我可以在文本中执行一些过滤!!!
答案 0 :(得分:1)
您可以使用简单的列表comp:
>>> sentence = "A screaming comes across the sky."
>>> tokenized = word_tokenize(sentence)
>>> tagged = nltk.pos_tag(tokenized)
>>> [_ for x in tagged if _[1] == 'NN']
[('screaming', 'NN'), ('sky', 'NN')]
或者您可以使用filtering:
>>> sentence = "I am seated in an office, surrounded by heads and bodies."
>>> tokenized = word_tokenize(sentence)
>>> tagged = nltk.pos_tag(tokenized)
>>> list(filter(lambda x: x[1] == 'NNS', tagged))
[('heads', 'NNS'), ('bodies', 'NNS')]