我正面临这个错误,我的功能据说没有定义即使我已经正确定义并调用了函数,这是我得到的错误请帮助:
文件" split_text.py",第80行,in split_pun(word)#passing split_pun中的words函数值以删除标点符号,但这给了我一个错误 NameError:name' split_pun'未定义
这是代码:
"""
Natural Language Toolkit: Urdu Language POS-Tagged (not yet) Corpus Reader
"""
import re
from six import string_types
from nltk.tag import str2tuple, map_tag
import os.path
from nltk.corpus.reader.util import *
from nltk.corpus.reader.api import *
class UrduCorpusReader(CorpusReader):
def words(self, fileids=None): #function for word tokenization
"""
List of words, one per line. Blank lines are ignored.
"""
words_list = []
for filepath in self.abspaths(fileids=fileids):
# print(filepath)
data = open(filepath, 'r').read()
data = data.replace('\n',' ')
words_list = data.split(' ')
#print( words_list) #printing the words after tokenization
return words_list
def split_pun(self,ifile): #function for punctuation removal
punctuations = [
u'\u06D4', # arabic full stop
'.',
u'\u061F', # Arabic question mark
u'\u061B', #ARABIC SEMICOLON
u'\u066D', #ARABIC FIVE POINTED STAR
u'\u2018' ,#LEFT SINGLE QUOTATION MARK
u'\u2019' ,#Right Single Quotation Mark
u'\u0027' ,#APOSTROPHE
'/',
':',
';',
'-',
'*',
')',
'(',
'/'
]
f = open(ifile,'r')
text = f.readlines()
f.close()
for x in range(0,len(text)):
s1 = ''.join(ch for ch in s if ch not in punctuations)
print(s1)
return s1
def raw(self, fileids=None):
if fileids is None:
fileids = self._fileids
elif isinstance(fileids, string_types):
fileids = [fileids]
return concat([self.open(f).read() for f in fileids])
if '__main__' == __name__:
word = ' '
corpus_root = os.path.abspath('../test_data')
wordlists = UrduCorpusReader(corpus_root, '.*')
print("Loaded corpus with file IDs: ")
print(wordlists.fileids())
list1 = wordlists.fileids()
for infile in (wordlists.fileids()):
print(infile)
word = wordlists.words(infile) #calling the words function and the save its output
split_pun(word) #passing the value of words function in split_pun to remove punctuation but this gives me an error
答案 0 :(得分:1)
由于os.path.sep
是split_pun
类中的实例方法,因此您需要从实例中调用它。
UrduCorpusReader
应为split_pun(word)
(就像您在其上方使用wordlists.split_pun(word)
一样)。