当我尝试执行一个特定的python文件时。我正进入(状态 以下错误
Traceback (most recent call last):
File "<pyshell#4>", line 1, in <module>
g.stem(u"തുറക്കുക")
File "/usr/local/lib/python2.7/dist-packages/indicstemmer-0.1-py2.7.egg/indicstemmer/core.py", line 48, in stem
self.rulesDict = self.LoadRules()
File "/usr/local/lib/python2.7/dist-packages/indicstemmer-0.1-py2.7.egg/indicstemmer/core.py", line 81, in LoadRules
errors='ignore')
File "/usr/lib/python2.7/codecs.py", line 881, in open
file = __builtin__.open(filename, mode, buffering)
IOError: [Errno 2] No such file or directory: '/usr/local/lib/python2.7/dist-packages/indicstemmer-0.1-py2.7.egg/indicstemmer/stemmer_ml.rules'
这里有什么问题?
class Stemmer:
"""
Instantiate class to get the methods
"""
def __init__(self):
self.rules_file = os.path.join(os.path.dirname(__file__), \
'stemmer_ml.rules')
self.rulesDict = None
self.normalizer = normalizer.getInstance()
def stem(self, text):
"""
:param text: unicode encoded malayalam string
:returns: dictionary with words as the key and the stemmer result
as the values. stems all the words in the given text and
returns a dictionary
"""
text = self.normalizer.normalize(text)
if self.rulesDict is None:
self.rulesDict = self.LoadRules()
words = text.split(" ")
word_count = len(words)
result_dict = dict()
word_iter = 0
word = ""
while word_iter < word_count:
word = words[word_iter]
word = self.trim(word)
word = word.strip('!,.?:')
word_length = len(word)
suffix_pos_itr = 2
word_stemmed = ""
while suffix_pos_itr < word_length:
suffix = word[suffix_pos_itr:word_length]
if suffix in self.rulesDict:
word_stemmed = word[0:suffix_pos_itr] + \
self.rulesDict[suffix]
break
suffix_pos_itr = suffix_pos_itr+1
word_iter = word_iter+1
if(word_stemmed == ""):
word_stemmed = word
result_dict[ word ] = word_stemmed
print result_dict[2]
return result_dict
def LoadRules(self):
#print "Loading the rules..."
rules_dict = dict()
line = []
line_number = 0
rule_number = 0
rules_file = codecs.open(self.rules_file, encoding='utf-8', \
errors='ignore')
while 1:
line_number = line_number +1
text = unicode( rules_file.readline())
if text == "":
break
if text[0] == '#':
continue #this is a comment - ignore
text = text.split("#")[0] #remove the comment part of the line
line_number = line_number +1
line = text.strip() # remove unwanted space
if(line == ""):
continue
if(len(line.split("=")) != 2):
print "[Error] Syntax Error in the Rules. Line number: ", \
line_number
print "Line: "+ text
continue
lhs = line.split("=")[0].strip()
rhs = line.split("=")[1].strip()
if(len(rhs)>0):
if(lhs[0] == '"'):
lhs = lhs[1:len(lhs)] # if the string is "quoted"
if(lhs[len(lhs)-1] == '"'):
lhs = lhs[0:len(lhs)-1] # if the string is "quoted"
if(len(rhs)>0):
if(rhs[0] == '"'):
rhs = rhs[1:len(rhs)] # if the string is "quoted"
if(rhs[len(rhs)-1] == '"'):
rhs = rhs[0:len(rhs)-1] # if the string is "quoted"
rule_number = rule_number+1
rules_dict[lhs] = rhs
#print "[", rule_number ,"] " +lhs + " : " +rhs
#print "Found ",rule_number, " rules."
return rules_dict
设置文件
from setuptools import setup, find_packages
name = "indicstemmer"
setup(
name=name,
version="0.1",
license="LGPL-3.0",
description="Malayalam word stemmer",
long_description="""This application helps you to stem the words
in the given text. Currently supports only
Note that this is very experimental and uses a rule based approach.
""",
packages=find_packages(),
include_package_data=True,
setup_requires=['setuptools-git'],
install_requires=['setuptools','normalizer'],
test_suite="tests",
zip_safe=False,
)
测试
import unittest
from indicstemmer import getInstance
class TestIndicStemmer(unittest.TestCase):
def setUp(self):
self.instance = getInstance()
def test_stemmer(self):
self.assertEqual(u"തുറക്കുക",self.instance.stem(u"തുറക്കുന്ന")[u"തുറക്കുന്ന"])
def main():
suite = unittest.TestLoader().loadTestsFromTestCase(TestIndicStemmer)
unittest.TextTestRunner(verbosity=2).run(suite)
if __name__ == "__main__":
main()
我使用的是Ubuntu 12.04桌面版
答案 0 :(得分:1)
错误消息的重要行是
File "/usr/lib/python2.7/codecs.py", line 881, in open
file = __builtin__.open(filename, mode, buffering)
IOError: [Errno 2] No such file or directory: '/usr/local/lib/python2.7/dist-packages/indicstemmer-0.1-py2.7.egg/indicstemmer/stemmer_ml.rules'
这意味着indicstemmer模块未正确安装,因为无法找到所需的文件stemmer_ml.rules。
在调用gstem()之前检查您是否需要设置默认值,并且python库中的权限允许您获取规则。除了库包我需要重新安装。我注意到如果存在不同版本的python,有时候软件包会以错误的版本安装。但是,在这种情况下我怀疑它是因为它在崩溃之前一直到规则文件。