我正在编写此代码段:
def fileLoc(self,filename):
md5_data_with_commented_lines = hashlib.md5()
md5_data_without_commented_lines = hashlib.md5()
line_of_code =0
line_of_comments = 0
no_of_blank_lines = 0
flag = 0
with open(filename, 'r') as source_file:
for line in source_file:
if flag == 1:
md5_data_with_commented_lines.update(line.encode("iso-8859-1"))
#md5_data_with_commented_lines.update(line.encode('utf-8'))
if line.find('-->')==-1:
line_of_comments = line_of_comments + 1
else:
line_of_comments = line_of_comments + 1
flag = 0
else:
if len(line) == 1:
no_of_blank_lines =no_of_blank_lines + 1
elif line.find('<!--')!=-1:
md5_data_with_commented_lines.update(line.encode("iso-8859-1"))
#md5_data_with_commented_lines.update(line.encode('utf-8'))
line_of_comments = line_of_comments + 1
flag = 1
if line.find('-->')!=-1 and line.find('-->') > line.find('<!--'):
flag =0
else:
#md5_data_with_commented_lines.update(line.encode('utf-8'))
#md5_data_without_commented_lines.update(line.encode('utf-8'))
md5_data_with_commented_lines.update(line.encode("iso-8859-1"))
md5_data_without_commented_lines.update(line.encode("iso-8859-1"))
line_of_code = line_of_code +1
self.check_sum_with_commented_lines = str(md5_data_with_commented_lines.hexdigest())
self.check_sum_without_commented_lines = str(md5_data_without_commented_lines.hexdigest())
return [line_of_comments,line_of_code]
但我得到的是UnicodeEncodeError
-
UnicodeEncodeError: 'charmap' codec can't encode characters in position 1334-1335: character maps to <undefined>
答案 0 :(得分:0)
为什么要用单字节编码对数据进行编码? 理想情况下它应该是line.decode(&#34; iso-8859-1&#34;)。然后可以作为line.upper()
访问