我想使用NLTK sent_tokenize读取文本并将其拆分为句子。这该怎么做?我已经使用了wordpunc_tokenize,并且效果很好,但没有send_tokenize。
from collections import defaultdict
import nltk
from nltk.tokenize import
word_tokenize,sent_tokenize,wordpunct_tokenize
import re
import os
import sys
from pathlib import Path
def stats():
while True:
try:
file_to_open =Path(input("\nPlease, insert your file path: "))
with open(file_to_open) as f:
words = word_tokenize(f.read())
break
except FileNotFoundError:
print("\nFile not found. Better try again")
except IsADirectoryError:
print("\nIncorrect Directory path.Try again")
print(words)
print('\n\nThis text contains',len(words), 'tokens')
def sent_tokenize():
while True:
try:
file_to_open =Path(input("\nPlease, insert your file path: "))
with open(file_to_open) as f:
words = sent_tokenize(f.read())
break
except FileNotFoundError:
print("\nFile not found. Better try again")
except IsADirectoryError:
print("\nIncorrect Directory path.Try again")
print(words)
print('\n\nThis text contains',len(words), 'sentences')
stats()
sent_tokenize()
我想将文本打印成句子,但出现错误消息:
Traceback (most recent call last):
File "/Users/nataliaresende/Dropbox/PYTHON/stats.py", line 46, in
<module>
sent_tokenize()
File "/Users/nataliaresende/Dropbox/PYTHON/stats.py", line 40, in
sent_tokenize
sent=sent_tokenize(words)
TypeError: sent_tokenize() takes 0 positional arguments but 1 was
given
有人可以帮忙吗?