import hashlib
file_name = "test.apk"
original_md5 = '5d41402abc4b2a76b9719d911017c592'
with open(file_name) as file_to_check:
# read contents of the file
data = file_to_check.read()
md5_returned = hashlib.md5(data).hexdigest()
if original_md5 == md5_returned:
print ("MD5 verified.")
else:
print ("MD5 verification failed!.")
错误
File "newhash.py", line 15, in <module>
data = file_to_check.read()
File "C:\Users\AppData\Local\Programs\Python\Python35-32\lib\encodings\cp1252.py", line 23, in decode
return codecs.charmap_decode(input,self.errors,decoding_table)[0]
UnicodeDecodeError: 'charmap' codec can't decode byte 0x8f in position 286: character maps to <undefined>
答案 0 :(得分:0)
问题是您将文件读取为文本文件而不是二进制文件。不知道你是使用python2还是python3,但是对于这两种情况你都可以找到如何从文件中读取字节而不是将它们解释为字符串。
答案 1 :(得分:0)
要计算校验和(md5,sha1等),您必须以二进制模式打开文件,因为您将对字节值求和:
要成为py27 / py3便携式,您应该使用io
包,如下所示:
import hashlib
import io
def md5sum(src):
md5 = hashlib.md5()
with io.open(src, mode="rb") as fd:
content = fd.read()
md5.update(content)
return md5
如果您的文件很大,您可能更喜欢按块读取文件,以避免将整个文件内容存储在内存中:
def md5sum(src, length=io.DEFAULT_BUFFER_SIZE):
md5 = hashlib.md5()
with io.open(src, mode="rb") as fd:
for chunk in iter(lambda: fd.read(length), b''):
md5.update(chunk)
return md5
如果您的文件非常很大,您可能还需要显示进度信息。您可以通过调用回调函数来执行此操作,该函数打印或记录计算的字节数量:
def md5sum(src, callback, length=io.DEFAULT_BUFFER_SIZE):
calculated = 0
md5 = hashlib.md5()
with io.open(src, mode="rb") as fd:
for chunk in iter(lambda: fd.read(length), b''):
md5.update(chunk)
calculated += len(chunk)
callback(calculated)
return md5