def test():
sample = "hello my name is Shefali and I live in Nebraska."
print sample
sentences = nltk.sent_tokenize(sample)
tokenized_sentences = [nltk.word_tokenize(sentence) for sentence in sentences]
tagged_sentences = [nltk.pos_tag(sentence) for sentence in tokenized_sentences]
chunked_sentences = nltk.ne_chunk_sents(tagged_sentences)
print(list(chunked_sentences))
输出是:
hello my name is Shefali and I live in Nebraska.
[Tree('S', [('hello', 'NN'), ('my', 'PRP$'), ('name', 'NN'), ('is', 'VBZ'), Tree('PERSON', [('Shefali', 'NNP')]), ('and', 'CC'), ('I', 'PRP'), ('live', 'VBP'), ('in', 'IN'), Tree('GPE', [('Nebraska', 'NNP')]), ('.', '.')])]
当我写print(chunked_sentences)
时,它给出了以下输出:<generator object <genexpr> at 0x000000000CE18438>
我想只提取PERSON和GPE并打印它们。我怎么做?什么是发电机对象?