Question

我使用下面给出的python代码来提取文本中存在的命名实体。现在我需要从文本中那些有命名实体的句子中获取形容词。即与命名实体一起使用的形容词。如果有“NE”，我可以改变我的代码来检查树是否有'JJ'，还是有其他方法？

def tokenize(text): 
sentences = nltk.sent_tokenize(text) 
sentences = [nltk.word_tokenize(sent) for sent in sentences] 
sentences = [nltk.pos_tag(sent) for sent in sentences] 
return sentences 

text=open("file.txt","r").read() 
sentences=tokenize(text) 
chunk_sent=nltk.batch_ne_chunk(sentences,binary=True)
print chunk_sent[1]

输出：

树（'S'，[（“''，'POS'），（'完成'，'NNP'），（'in'，'IN'），（'speech'，'NN'），（'，'，'，'），树（'NE'，[（'Gautam'，'NNP'）]），（'，'，' ，'），（'因此'，'RB'），（'质疑'，'VBD'），（'，'，'，'），（'给'，'VBD'），（'in'，' IN'），（'''，'DT'），（'midst'，'NN'），（'of'，'IN'），（'that'，'DT'），（'big'，'JJ'），（'assemblage'，'NN'），''of'，'IN'），（'沉思'，'JJ'），（'sages''NNP'），（'a'，'DT'），（'full'，' JJ'），（'和'，'CC'），（'正确'，'NN'），（'回答'，'NN'），（'in'，'IN'），（'words'，' NNS'），（'辅音'，'JJ'），（'带'，'IN'），（'他们的' '，'PRP $'），（'模式'，'NN'），（'''，'IN'），（'生命'，'NN'），（'。'，'。'）]）< / p>

虽然这句话在NE之前没有JJ。我怎么能得到JJ用于NE？

def ne(tree):
    names = []
    if hasattr(tree, 'node') and tree.node:
      if tree.node == 'NE':
        names.append(' '.join([child[0] for child in tree]))
    else:
        for child in tree:
            names.extend(ne(child))

return names

names = []
for item in chunk_sent:
   names.extend(ne(item))
print names

Answer 1

>>> from nltk.corpus import brown
>>> from nltk import batch_ne_chunk as bnc
>>> from nltk.tree import Tree
>>> sentences = brown.tagged_sents()[0:5]
>>> chunk_sents = bnc(sentences)
>>> 
>>> for sent in chunk_sents:
...     for i,j in zip(sent[:-1], sent[1:]):
...             if type(j) is Tree and i[1].startswith("JJ"):
...                     print i,j
... 
('Grand', 'JJ-TL') (PERSON Jury/NN-TL)
('Executive', 'JJ-TL') (ORGANIZATION Committee/NN-TL)

与命名实体一起使用的形容词

1 个答案: