import nltk
from nltk.stem import PorterStemmer
from nltk.corpus import stopwords
paragraph = ''' State-run Bharat Sanchar Nigam Ltd (BSNL) is readying to pay November salary in another two days, which will be raised from internal accruals and bank loans.'''
sentence = nltk.sent_tokenize(paragraph)
stemmer = PorterStemmer()
for i in range(len(sentence)):
words = nltk.word_tokenize(i)
words = [stemmer.stem(word) for word in words if word not in set(stopwords.words('english'))]
sentence[i] = ' '.join(words)
这部分我出错了
words = nltk.word_tokenize(i)
答案 0 :(得分:1)
range()
产生一个可迭代的整数。因此,当您将i
馈入nltk.word_tokenize()
时,就是在馈入一个整数。显然,整数不是字符串。
我个人不知道nltk.word_tokenize()
应该如何工作,但是基于上下文线索,您似乎可能希望在索引i
。 >而不只是索引i
:
words = nltk.word_tokenize(sentence[i])