UnicodeDecodeError:Python

时间:2014-11-06 01:09:19

标签: python unicode

我收到此错误。 我不确定这是我的错误还是别的什么。 我现在正在使用python 3.X版本。

Traceback (most recent call last):
  File "/Users/Administrator/Desktop/A2_b/author_program.py", line 104, in <module>
    signature = read_signature(dir_name + "/" + this_file)
  File "/Users/Administrator/Desktop/A2_b/author_program.py", line 48, in read_signature
    result = [sig_file.readline().strip()]
  File "/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/encodings/ascii.py", line 26, in decode
    return codecs.ascii_decode(input, self.errors)[0]
UnicodeDecodeError: 'ascii' codec can't decode byte 0x80 in position 3131: ordinal not in range(128)

这是给我这个错误的代码。我只需要在这个文件中完成第一和第二个功能。

import author_functions, os.path

def get_valid_filename(msg):
    """ (str) -> str

    Prompt the user, using msg, to type the name of a file. This file should 
    exist in the same directory as the starter code. If the file does not
    exist, keep re-prompting until they give a valid filename.
    Return the name of that file.
    """
    filename = input(msg)
    while not os.path.exists(filename):
        print("That file does not exist.")
        filename = input(msg)

    return filename


def get_valid_directory_name(msg):
    """ (str) -> str

    Prompt the user, using msg, to type the name of a directory. If
    the directory does not exist, keep re-prompting until they give a valid
    directory. 
    Return the name of that directory.
    """
    dirname = input(msg)

    while not os.path.isdir(dirname):
        print("That directory does not exist.")
        dirname = input(msg)

    return dirname


### Provided helper function ###

def read_signature(filename):
    """ (str) -> list

    Read a linguistic signature from filename and return it as 
    a list of features. 
    """

    sig_file = open(filename, 'r')

    # Read the first feature.
    result = [sig_file.readline().strip()]

    # Read each remaining feature and convert each one to float.
    for line in sig_file:
        result.append(float(line.strip()))

    sig_file.close()

    return result    


# #############################
# The main program begins here 
# #############################

if __name__ == '__main__':

    prompt = 'Enter the name of the file with unknown author: '
    mystery_filename = get_valid_filename(prompt)

    prompt = 'Enter the name of the directory of signature files: '
    dir_name = get_valid_directory_name(prompt)

    # Every file in the dir_name directory must be a linguistic signature. 
    # We assume there is a minimum of one file.
    files = os.listdir(dir_name)

    # ####################################################################
    # The following code parses the mystery file and calculates its 
    # linguistic signature.                                         
    # ####################################################################

    mystery_file = open(mystery_filename, 'r')
    # readlines() gives us a list of strings, one for each line of the file
    text = mystery_file.readlines()
    mystery_file.close()

    # Calculate the signature for the mystery file
    mystery_signature = [mystery_filename]
    mystery_signature.append(author_functions.avg_word_length(text))
    mystery_signature.append(author_functions.type_token_ratio(text))
    mystery_signature.append(author_functions.hapax_legomena_ratio(text))
    mystery_signature.append(author_functions.avg_sentence_length(text))
    mystery_signature.append(author_functions.avg_sentence_complexity(text))

    # ####################################################
    # The following code reads the linguistic signatures, 
    # compares them with the mystery_signature,           
    # and reports the author that was the best match.                   
    # ####################################################

    # Weights of linguistic features.
    weights = [0, 11, 33, 50, 0.4, 4]

    # We assume there is at least one signature in the dir_name directory
    this_file = files[0]
    signature = read_signature(dir_name + "/" + this_file)
    best_score = author_functions.compare_signatures(mystery_signature,
                                                     signature, weights)
    best_author = signature[0]

    for this_file in files[1:]:
        signature = read_signature(dir_name + "/" + this_file)
        score = author_functions.compare_signatures(mystery_signature,
                                                    signature, weights)
        if score < best_score:
            best_score = score
            best_author = signature[0]

    if type(best_score) != float:
        print("Error! No score could be computed")
    else:
        print("Best author match:", best_author, "with score", best_score)

1 个答案:

答案 0 :(得分:2)

尝试sig_file = open(filename, 'rb')

b表示文件中存在二进制数据(不仅仅是ascii)

可能会解决您的问题