我正在组装一个脚本来指纹最多8TB目录,包含超过100万个文件(包括一些文件~50 GB),并将结果导出为.csv,例如“ md5 ” “的 LastWriteTime ”, “文件大小”, “ FULLPATH \ file.ext ”:
"md5","YYYYMMDDHHMMSS","12345","A:\aaa\bb\c\file1.ext"
我坚持使用编码,输出.csv为空:
def md5(fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(2 ** 20), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def getSize(filename):
st = os.stat(filename)
return st.st_size()
with open('md5_filelist.csv', 'w') as md5_filelist:
file.write('hash_md5.hexdigest','timestamp','st.st_size','os.path.abspath')te')
我做错了什么(我是Python新手)?谢谢。
答案 0 :(得分:2)
再试一次:
import hashlib
import os
import time
your_target_folder = "."
def get_size(filename):
st = os.stat(filename)
return str(st.st_size)
def get_last_write_time(filename):
st = os.stat(filename)
convert_time_to_human_readable = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(st.st_mtime))
return convert_time_to_human_readable
def get_md5(fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(2 ** 20), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
for dirpath, _, filenames in os.walk(your_target_folder):
for items in filenames:
file_full_path = os.path.abspath(os.path.join(dirpath, items))
try:
my_last_data = get_md5(file_full_path) + ", " + get_last_write_time(file_full_path) + ", " + get_size(
file_full_path) + ", " + file_full_path + "\n"
with open("md5_filelist.csv", "a") as my_save_file:
my_save_file.write(my_last_data)
print(str(file_full_path) + " ||| Done")
except:
print("Error On " + str(file_full_path))
我更改了fullpathaddress方法,我添加了time.strftime("%Y-%m-%d%H:%M:%S",time.localtime(st.st_mtime))用于转换人类可读格式的时间。
祝你好运......