import nltk
file_content = open("C:\headlines.txt").read()
tokens = nltk.word_tokenize(file_content)
print(tokens)
输出
['West', 'Bengal', 'govt', '.', 'extends', 'food', 'security', 'to', '8.5',
'crore', 'people', ':', 'Mamata', 'Govt', 'committed', 'to', 'implementing',
'SC', 'verdict', 'on', 'Sabarimala', ',', 'says', 'Kerala', 'CM', 'Pinarayi',
'Vijayan', 'Polling', 'under', 'way', 'for', 'last', 'phase', 'of',
'municipal', 'polls', 'in', 'Jammu', 'and', 'Kashmir', '.']