通过前4个字节识别文件来进行文件散列

时间:2017-05-06 15:29:27

标签: python

我试图编写一个python脚本来搜索我当前的目录,通过标题识别jpg,然后散列这些文件。我到处都是。任何建议,将不胜感激。

from os import listdir, getcwd
from os.path import isfile, join, normpath, basename
import hashlib

jpgHead = b'\xff\xd8\xff\xe0'

def get_files():
    current_path = normpath(getcwd())
    return [join(current_path, f) for f in listdir(current_path) if 
isfile(join(current_path, f))] 

def checkJPG():
    checkJPG=checkJPG.read(4)
    if checkJPG==jpgHead
    get_hashes()

def get_hashes():
    files = checkJPG()
    list_of_hashes = []
    for each_file in files:
        hash_md5 = hashlib.md5()
        with open(each_file, "rb") as f: 
        list_of_hashes.append('Filename: {}\tHash: 
        {}\n'.format(basename(each_file), hash_md5.hexdigest()))
        return list_of_hashes

def write_jpgHashes():
    hashes=get_hashes()
    with open('list_of_hashes.txt', 'w') as f:
        for md5_hash in hashes:
        f.write(md5_hash)


if __name__ == '__main__':

write_jpgHashes()

1 个答案:

答案 0 :(得分:0)

我稍微修改了一些你的功能,试一试

from os import listdir, getcwd
from os.path import isfile, join, normpath, basename
import hashlib

jpgHead = b'\xff\xd8\xff\xe0'

def get_files(path = getcwd()):
    current_path = normpath(path)
    return [ join(current_path, f) for f in listdir(current_path) if isfile(join(current_path, f)) ] 

def checkJPG(path):
    with open(path, 'rb') as f : 
        header = f.read(4)
    return header == jpgHead

def get_hashes():
    list_of_hashes = []
    for each_file in get_files() :
        if checkJPG(each_file) : 
            list_of_hashes.append('Filename: {}\tHash: {}\n'.format(each_file, md5hf(each_file)))
    return list_of_hashes

def md5hf(path): 
    #return hashlib.md5(open(path, "rb").read()).hexdigest()  ## you can use this line for small files ##  
    hash_md5 = hashlib.md5()
    with open(path, "rb") as f:
        for chunk in iter(lambda : f.read(4096), b""):
            hash_md5.update(chunk)
    return hash_md5.hexdigest()

def write_jpgHashes():
    hashes=get_hashes()
    with open('list_of_hashes.txt', 'w') as f:
        for md5_hash in hashes:
            f.write(md5_hash)

if __name__ == '__main__':
    write_jpgHashes()

注意:

  1. 修正了一些语法和缩进错误
  2. checkJPG变为布尔值
  3. list_of_hashes
  4. 中将文件的md5哈希添加到get_hashes
  5. 添加了md5hf函数,以获取md5校验和