这是下面的代码,我正在遍历具有所有不同操作系统版本的json文件,并且尝试匹配md5hash值。文件太大,执行大约需要12秒。有什么办法可以减少处理时间?像将图像分割成块然后进行计算。
Python版本:2.7
calculate_hash.py
import hashlib
import os
import timeit
import multiprocessing
import ast
def md5_for_file(file, block_size=2**20):
start_time = timeit.default_timer()
md5 = hashlib.md5()
with open(file, "rb") as f:
while True:
data = f.read(block_size)
if not data:
break
md5.update(data)
p = md5.digest()
elapsed = timeit.default_timer()
print("finished in {} seconds for image file path {}".format(round(elapsed-start_time,2), file))
return p
def main():
tart_time = timeit.default_timer()
processes = []
with open("/images/hashfile.json", "r") as f:
data = f.read()
metadata = ast.literal_eval(data)
for image, hash in metadata.iteritems():
aPath = ("/images/{}".format(image))
q = multiprocessing.Process(target=md5_for_file, args=[aPath])
q.start(
processes.append(q)
for process in processes:
process.join()
elapsed = timeit.default_timer()
print("finished in {} seconds".format(round(elapsed-start_time,2)))
if ((__name__) == ("__main__")):
main()
hashfile.json
{
"ubuntu-1204.iso": "31767f19b2b07073492321a49b985fd5",
"ubuntu-1604.iso": "c4bd70ae44cea3d6456dfe2eed77a5b7",
"ubuntu-1804.iso": "f1f5ee66f9ecf2ba34ea0e211578e83b",
"ubuntu-2004.iso": "3bf6e6a7fe78159745ca9f303a863317"
}