当我运行以下代码时,给出了以下错误:
行AttributeError: 'tokenizer' object has no attribute 'sent_tokenize'
上的sentence_token= self.sent_tokenize(input_data)
。
为什么它会给我这个错误?
from nltk.tokenize import sent_tokenize, \word_tokenize, WordPunctTokenizer
from nltk.corpus import brown
class tokenizer:
input_data=''
def __init__(self, input_data='', ):
self.input_data = input_data
def gen_SentTokenizers(self,input_data):
sentence_token= self.sent_tokenize(input_data) # Error!
return sentence_token
def gen_WordTokenizers(self, input_data):
word_token=self.word_tokenize(input_data)
return word_token
def gen_WordPunctToken(self, input_data):
word_PunctToken=self.WordPunctTokenizer().tokenize(input_data)
return word_PunctToken
if __name__=='__main__':
input_data = ' '.join(brown.words())
token1 = tokenizer()
token2 = tokenizer()
token3 = tokenizer()
token1.gen_SentTokenizers(input_data)
print("\nSentence tokenizer:")
print(token1)
token2.gen_WordPunctToken(input_data)
print("\nWord punct tokenizer:")
print(token2)
token3.gen_WordTokenizers(input_data)
print("\nWord tokenizer:")
print(token3)
答案 0 :(得分:2)
在终端上:
pip install --upgrade nltk
python -m nltk.downloader popular
在代码中:
from nltk.tokenize import sent_tokenize
text = 'This str has many sentences. A sentence here. A sentence there.'
sent_tokenize(text)