Question

当我尝试执行一个特定的python文件时。我正进入（状态以下错误

Traceback (most recent call last):
  File "<pyshell#4>", line 1, in <module>
    g.stem(u"തുറക്കുക")
  File "/usr/local/lib/python2.7/dist-packages/indicstemmer-0.1-py2.7.egg/indicstemmer/core.py", line 48, in stem
    self.rulesDict = self.LoadRules()
  File "/usr/local/lib/python2.7/dist-packages/indicstemmer-0.1-py2.7.egg/indicstemmer/core.py", line 81, in LoadRules
    errors='ignore')
  File "/usr/lib/python2.7/codecs.py", line 881, in open
    file = __builtin__.open(filename, mode, buffering)
IOError: [Errno 2] No such file or directory: '/usr/local/lib/python2.7/dist-packages/indicstemmer-0.1-py2.7.egg/indicstemmer/stemmer_ml.rules'

这里有什么问题？

class Stemmer:
    """
    Instantiate class to get the methods
    """
    def __init__(self):
       self.rules_file = os.path.join(os.path.dirname(__file__), \
        'stemmer_ml.rules')
        self.rulesDict = None
        self.normalizer = normalizer.getInstance()

    def stem(self, text):
        """
        :param text: unicode encoded malayalam string
        :returns: dictionary with words as the key and the stemmer result
        as the values. stems all the words in the given text and
        returns a dictionary
        """
        text = self.normalizer.normalize(text)
        if self.rulesDict is None:
            self.rulesDict = self.LoadRules()
        words = text.split(" ")
        word_count = len(words)
        result_dict = dict()
        word_iter = 0
        word = ""
        while word_iter < word_count:
            word = words[word_iter]
            word = self.trim(word)
            word = word.strip('!,.?:')
            word_length = len(word)
            suffix_pos_itr = 2
            word_stemmed = ""
            while suffix_pos_itr < word_length:
                suffix = word[suffix_pos_itr:word_length]
                if suffix in self.rulesDict:
                    word_stemmed = word[0:suffix_pos_itr] + \
                        self.rulesDict[suffix]
                    break
                suffix_pos_itr = suffix_pos_itr+1
            word_iter = word_iter+1
            if(word_stemmed == ""):
                word_stemmed = word
            result_dict[ word ] = word_stemmed
            print result_dict[2]
        return result_dict

    def LoadRules(self):
        #print "Loading the rules..."
        rules_dict = dict()
        line = []
        line_number = 0
        rule_number = 0
        rules_file = codecs.open(self.rules_file, encoding='utf-8', \
            errors='ignore')
        while 1:
            line_number = line_number +1
            text = unicode( rules_file.readline())
            if text == "":
                break
            if text[0] == '#':
                continue  #this is a comment - ignore
            text = text.split("#")[0]   #remove the comment part of the line
            line_number = line_number +1
            line = text.strip()  # remove unwanted space
            if(line == ""):
                continue
            if(len(line.split("=")) != 2):
                print "[Error] Syntax Error in the Rules. Line number: ", \
                    line_number
                print "Line: "+ text
                continue
            lhs = line.split("=")[0].strip()
            rhs = line.split("=")[1].strip()
            if(len(rhs)>0):
                if(lhs[0] == '"'):
                    lhs = lhs[1:len(lhs)] # if the string is "quoted"
                if(lhs[len(lhs)-1] == '"'):
                    lhs = lhs[0:len(lhs)-1] # if the string is "quoted"
            if(len(rhs)>0):
                if(rhs[0] == '"'):
                    rhs = rhs[1:len(rhs)]  # if the string is "quoted"
                if(rhs[len(rhs)-1] == '"'):
                    rhs = rhs[0:len(rhs)-1]     # if the string is "quoted"
            rule_number = rule_number+1
            rules_dict[lhs] = rhs
            #print "[", rule_number ,"] " +lhs + " : " +rhs
        #print "Found ",rule_number, " rules."
        return rules_dict

设置文件

from setuptools import setup, find_packages

name = "indicstemmer"

setup(
    name=name,
    version="0.1",
    license="LGPL-3.0",
    description="Malayalam word stemmer",

    long_description="""This application helps you to stem the words
    in the given text. Currently supports only 
    Note that this is very experimental and uses a rule based approach.

    """,
    packages=find_packages(),
    include_package_data=True,
    setup_requires=['setuptools-git'],
    install_requires=['setuptools','normalizer'],
    test_suite="tests",
    zip_safe=False,
)

测试

import unittest
from indicstemmer import getInstance


class TestIndicStemmer(unittest.TestCase):

    def setUp(self):
        self.instance = getInstance()

    def test_stemmer(self):
        self.assertEqual(u"തുറക്കുക",self.instance.stem(u"തുറക്കുന്ന")[u"തുറക്കുന്ന"])

def main():
    suite = unittest.TestLoader().loadTestsFromTestCase(TestIndicStemmer)
    unittest.TextTestRunner(verbosity=2).run(suite)

if __name__ == "__main__":
    main()

我使用的是Ubuntu 12.04桌面版

Answer 1

错误消息的重要行是

File "/usr/lib/python2.7/codecs.py", line 881, in open
    file = __builtin__.open(filename, mode, buffering)
IOError: [Errno 2] No such file or directory: '/usr/local/lib/python2.7/dist-packages/indicstemmer-0.1-py2.7.egg/indicstemmer/stemmer_ml.rules'

这意味着indicstemmer模块未正确安装，因为无法找到所需的文件stemmer_ml.rules。

在调用gstem（）之前检查您是否需要设置默认值，并且python库中的权限允许您获取规则。除了库包我需要重新安装。我注意到如果存在不同版本的python，有时候软件包会以错误的版本安装。但是，在这种情况下我怀疑它是因为它在崩溃之前一直到规则文件。

在Python中获取IO错误

1 个答案: