import os
os.environ['CLASSPATH'] = "C:/Users/Anon/Desktop/ZAHID/Python/stanford-ner-2017-06-09/stanford-ner-2017-06-09/stanford-ner.jar"
os.environ['STANFORD_MODELS'] = "C:/Users/Anon/Desktop/ZAHID/Python/stanford-ner-2017-06-09/stanford-ner-2017-06-09/classifiers/"
os.environ['JAVAHOME'] = "C:/Program Files/Java/jdk1.8.0_45/bin/java.exe"
from nltk.tag.stanford import NERTagger
st = NERTagger('english.all.3class.distsim.crf.ser.gz')
text = st.tag('Rami Eid is studying at Stony Brook University in NY'.split())
print(text)
Output: [(u'Rami', u'PERSON'), (u'Eid', u'PERSON'), (u'is', u'O'), (u'studying', u'O'), (u'at', u'O'), (u'Stony', u'ORGANIZATION'), (u'Brook', u'ORGANIZATION'), (u'University', u'ORGANIZATION'), (u'in', u'O'), (u'NY', u'O')]
但我希望输出为:
[('Rami', 'PERSON'), ('Eid', 'PERSON'), ('is', 'O'), ('studying', 'O'),
('at', 'O'), ('Stony', 'ORGANIZATION'), ('Brook', 'ORGANIZATION'),
('University', 'ORGANIZATION'), ('in', 'O'), ('NY', 'LOCATION')]