我使用下面给出的python代码来提取文本中存在的命名实体。现在我需要从文本中那些有命名实体的句子中获取形容词。即与命名实体一起使用的形容词。如果有“NE”,我可以改变我的代码来检查树是否有'JJ',还是有其他方法?
def tokenize(text):
sentences = nltk.sent_tokenize(text)
sentences = [nltk.word_tokenize(sent) for sent in sentences]
sentences = [nltk.pos_tag(sent) for sent in sentences]
return sentences
text=open("file.txt","r").read()
sentences=tokenize(text)
chunk_sent=nltk.batch_ne_chunk(sentences,binary=True)
print chunk_sent[1]
输出:
树('S',[(“'','POS'),('完成','NNP'),('in','IN'),('speech','NN'), (',',','),树('NE',[('Gautam','NNP')]),(',',' ,'),('因此','RB'),('质疑','VBD'),(',',','),('给','VBD'),('in',' IN'),(''','DT'),('midst','NN'),('of','IN'), ('that','DT'),('big','JJ'),('assemblage','NN'),''of','IN'),('沉思','JJ'), ('sages''NNP'),('a','DT'),('full',' JJ'),('和','CC'),('正确','NN'),('回答','NN'),('in','IN'),('words',' NNS'),('辅音','JJ'),('带','IN'),('他们的' ','PRP $'),('模式','NN'),(''','IN'),('生命','NN'),('。','。')])< / p>
虽然这句话在NE之前没有JJ。我怎么能得到JJ用于NE?
def ne(tree):
names = []
if hasattr(tree, 'node') and tree.node:
if tree.node == 'NE':
names.append(' '.join([child[0] for child in tree]))
else:
for child in tree:
names.extend(ne(child))
return names
names = []
for item in chunk_sent:
names.extend(ne(item))
print names
答案 0 :(得分:0)
>>> from nltk.corpus import brown
>>> from nltk import batch_ne_chunk as bnc
>>> from nltk.tree import Tree
>>> sentences = brown.tagged_sents()[0:5]
>>> chunk_sents = bnc(sentences)
>>>
>>> for sent in chunk_sents:
... for i,j in zip(sent[:-1], sent[1:]):
... if type(j) is Tree and i[1].startswith("JJ"):
... print i,j
...
('Grand', 'JJ-TL') (PERSON Jury/NN-TL)
('Executive', 'JJ-TL') (ORGANIZATION Committee/NN-TL)