我试图编写一个python脚本来搜索我当前的目录,通过标题识别jpg,然后散列这些文件。我到处都是。任何建议,将不胜感激。
from os import listdir, getcwd
from os.path import isfile, join, normpath, basename
import hashlib
jpgHead = b'\xff\xd8\xff\xe0'
def get_files():
current_path = normpath(getcwd())
return [join(current_path, f) for f in listdir(current_path) if
isfile(join(current_path, f))]
def checkJPG():
checkJPG=checkJPG.read(4)
if checkJPG==jpgHead
get_hashes()
def get_hashes():
files = checkJPG()
list_of_hashes = []
for each_file in files:
hash_md5 = hashlib.md5()
with open(each_file, "rb") as f:
list_of_hashes.append('Filename: {}\tHash:
{}\n'.format(basename(each_file), hash_md5.hexdigest()))
return list_of_hashes
def write_jpgHashes():
hashes=get_hashes()
with open('list_of_hashes.txt', 'w') as f:
for md5_hash in hashes:
f.write(md5_hash)
if __name__ == '__main__':
write_jpgHashes()
答案 0 :(得分:0)
我稍微修改了一些你的功能,试一试
from os import listdir, getcwd
from os.path import isfile, join, normpath, basename
import hashlib
jpgHead = b'\xff\xd8\xff\xe0'
def get_files(path = getcwd()):
current_path = normpath(path)
return [ join(current_path, f) for f in listdir(current_path) if isfile(join(current_path, f)) ]
def checkJPG(path):
with open(path, 'rb') as f :
header = f.read(4)
return header == jpgHead
def get_hashes():
list_of_hashes = []
for each_file in get_files() :
if checkJPG(each_file) :
list_of_hashes.append('Filename: {}\tHash: {}\n'.format(each_file, md5hf(each_file)))
return list_of_hashes
def md5hf(path):
#return hashlib.md5(open(path, "rb").read()).hexdigest() ## you can use this line for small files ##
hash_md5 = hashlib.md5()
with open(path, "rb") as f:
for chunk in iter(lambda : f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def write_jpgHashes():
hashes=get_hashes()
with open('list_of_hashes.txt', 'w') as f:
for md5_hash in hashes:
f.write(md5_hash)
if __name__ == '__main__':
write_jpgHashes()
注意:
checkJPG
变为布尔值list_of_hashes
get_hashes
md5hf
函数,以获取md5校验和