我可以使用以下代码使用SpaCy捕获短语:我得到的短语是: amazing_landscape 美丽 城市
我不想得到“美丽”,因为它不是名词。请帮忙。代码如下:
import spacy
nlp = spacy.load('en_core_web_sm')
f = 'I live in a city with amazing landscapes. It is beautiful. Can you guess?'
stop = ['indeed','regard',..... 'have']
doc = nlp(f)
words = []
tags = ['JJ','JJR','JJS','VBG','CD','AFX','VBN','HYPH','NNP','NNPS','NN','NNS']
nouns = ['NNP','NNPS','NN','NNS']
valid = []
tokens = []
for i in doc:
if nlp.vocab[i.text] in stop or i.tag_ not in tags:
if len(valid)>0:
words.append(valid)
valid = []
else:
valid.append(i.lemma_)
terms = set()
for i in words:
terms.add('_'.join(j for j in i))
for i in terms:
print(i)