import re
import nltk
from nltk.corpus import stopwords
stop = stopwords.words('english')
from nltk.corpus import wordnet
inputfile = open('file.txt', 'r')
String= inputfile.read()
def last_name(resume_text):
tokenized_sentences = nltk.sent_tokenize(resume_text)
a_list=[]
for sentence in tokenized_sentences:
a_list=(sentence.split())
s1=a_list[1:]
sentence1=''.join(s1)
tokenized_sentences = nltk.sent_tokenize(sentence1)
for sentence in tokenized_sentences:
for chunk in nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sentence), tagset='universal')):
if hasattr(chunk, 'label') and chunk.label() == 'PERSON':
chunk = chunk[0]
(name, tag) = chunk
if tag == 'NOUN':
return name
if __name__ == '__main__':
lastname= last_name(String)
print(lastname)
我想从简历中提取姓氏。它正确返回第一个名称,但第二个名称错误。
我该如何解决这个问题?