如何制作NLTK pos_tag字样而不是字符?

时间:2013-08-15 09:07:15

标签: python nltk


 # -*- coding: utf-8 -*-
from nltk.corpus import wordnet as wn
from nltk import pos_tag
import nltk
syno =[]

sentence = '''His father suggested he study to become a parson instead, but Darwin was far more inclined to study natural history.DarwinDar·win (där'wĭn),Charles Robert.1809-1882.British naturalist who revolutionized the study of biology with his theory ofevolutionbased on natural selection
Like several scientists before him, Darwin believed all the life on earth evolved (developed gradually) over millions of years from a few common ancestors.'''

sent = pos_tag(word_tokenize(sentence))


[('H', 'NNP'), ('e', 'VBP'), ('l', 'NN'), ('l', 'NN'), ('o', 'NN'), (' ', ':'), ('m', 'NN'), ('y', 'NN'), (' ', ':'), ('n', 'NN'), ('a', 'DT'), ('m', 'NN'), ('e', 'NN'), (' ', ':'), ('i', 'PRP'), ('s', 'VBZ'), (' ', ':'), ('A', 'DT'), ('b', 'NN'), ('h', 'NN'), ('i', 'PRP'), ('s', 'VBZ'), ('h', 'JJ'), ('e', 'NN'), ('k', 'NN'), (' ', ':'), ('M', 'NNP'), ('i', 'PRP'), ('t', 'VBP'), ('r', 'JJ'), ('a', 'DT')]


1 个答案:

答案 0 :(得分:9)


>>> from nltk import pos_tag, word_tokenize
>>> sentence = "Hello my name is Derek. I live in Salt Lake city."
>>> pos_tag(word_tokenize(sentence))
[('Hello', 'NNP'), ('my', 'PRP$'), ('name', 'NN'), ('is', 'VBZ'), ('Derek.', 'NNP'), ('I', 'PRP'), ('live', 'VBP'), ('in', 'IN'), ('Salt', 'NNP'), ('Lake', 'NNP'), ('city', 'NN'), ('.', '.')]