您好我已经创建了一个迷你程序来获取脚本所在目录中所有文件的MD5 hash
。
我的问题是,当我生成get_size()
个文件时,我的filelist.md5.txt
只有一行而get_size()
似乎输出了整个目录的总和而不是每个人文件大小。
如何在此脚本中输出单个文件大小?
我在filelist.md5.txt
文件中得到了这个输出:
#
# GENERATE_FILELIST
# (filename) (filesize) (md5) (major_version) (minor_version)
#
Test_2.txt 190 dea9fe052f1abf71bac7421c732b0475 ---- ----
但是我想得到这个输出:
#
# GENERATE_FILELIST
# (filename) (filesize) (md5) (major_version) (minor_version)
#
MD5.bat filesize b9a7c825517002e0da8e980c2c2c2cef ---- ----
MD5.py filesize b61124e8bef473d377f59aa0964174ce ---- ----
MD5test.bat filesize f29d68f9721c57d648164cae79dac71b ---- ----
MD5test.py filesize a7a3c45ebe1aca82f57591c7fccd6cfc ---- ----
MD5v1.bat filesize e5e7407117845a2413fe667fe7a2f681 ---- ----
MD5v1.py filesize 55ab90b5a623548825a0b40406fcdde2 ---- ----
MD5v2.bat filesize e9e31aaa62f6f37572cf89a03860cb96 ---- ----
MD5v3.bat filesize 559c0e9ed05fc9b4884c83bc3e04f8fd ---- ----
MD5v3.py filesize d20a8841f3c37d28fd3b74847731e212 ---- ----
Test_2.txt filesize dea9fe052f1abf71bac7421c732b0475 ---- ----
到目前为止代码:
import glob
import hashlib
import sys
import os
filenames = glob.glob('*.*')
# truncate the file to zero length before opening
f1 = open(os.path.expanduser(sys.path[0]) + '\\filelist.md5.txt', 'w')
#'a' will append the file, rather than write over whatever else you put in it like 'w'
with open('filelist.md5.txt', 'a') as f:
print >> f,''
print >> f,'#'
print >> f,'# GENERATE_FILELIST'
print >> f,'# (filename) (filesize) (md5) (major_version) (minor_version)'
print >> f,'#'
print >> f,''
f.close()
# print to console
for filename in filenames:
with open(filename, 'rb') as inputfile:
data = inputfile.read()
print '. -- ',filename, ' ---------- ', hashlib.md5(data).hexdigest()
# get the size of each file
def get_size(start_path = '.'):
total_size = 0
for dirpath, dirnames, filenames in os.walk(start_path):
for fn in filenames:
fp = os.path.join(dirpath, fn)
total_size += os.path.getsize(fp)
return total_size
#'a' will append the file, rather than write over whatever else you put in it like 'w'
with open('filelist.md5.txt', 'a') as f:
print >> f,'{:44}'.format(filename), get_size(),' ', hashlib.md5(data).hexdigest(),' ','----',' ','----'
f.close()
答案 0 :(得分:1)
您的get_size()
被写入以返回整个目录的大小,这不是您正在寻找的。 p>
dir=r'specify\path\here'
with open('filelist.md5.txt', 'w') as fx:
for f in os.listdir(dir):
path = os.path.join(dir, f)
if os.path.isfile(path):
# specify anything else you want to write inside fx.write()
fx.write(f + "\t\t" + str(os.path.getsize(path)) + "\n")
上面的代码将文件名和大小分隔为制表符,并以不同的行分隔。
您不必在执行with open('filelist.md5.txt', 'a') as f:
答案 1 :(得分:1)
试试这个(适用于larg_file,none_ascci_format_files_names,whitout glob模块和error_handling):
import hashlib, os, hashlib, sys
your_target_folder = "." # put your folder or just this "."
def get_size(filename):
st = os.stat(filename)
return str(st.st_size)
def get_minor_version(filename):
# Your Code ...
return "minor_version"
def get_major_version(filename):
# Your Code ...
return "major_version"
def get_md5(fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(2 ** 20), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
# this function works for none ascii files names ( like chinese format )!!
def sys_out(out_data):
try:
print(out_data)
except UnicodeEncodeError:
if sys.version_info >= (3,):
print(out_data.encode('utf8').decode(sys.stdout.encoding))
else:
print(out_data.encode('utf8'))
def make_beautiful_terminal_output(get_list):
col_width = max(len(word) for word in get_list) + 3 # padding
buffer_last = []
for row in get_list:
buffer_last.append("".join(word.ljust(col_width) for word in get_list))
return buffer_last[0]
def print_header():
header_tag = "(filename) (filesize) (md5) (major_version) (minor_version)\n"
with open("filelist.md5.txt", "a") as my_header:
my_header.write(header_tag)
print(header_tag)
print_header()
for dirpath, _, filenames in os.walk(your_target_folder):
for items in filenames:
file_full_path = os.path.abspath(os.path.join(dirpath, items))
try:
my_last_data = [items, get_size(file_full_path), get_md5(file_full_path), get_major_version(file_full_path), get_minor_version(file_full_path)]
terminal_output = make_beautiful_terminal_output(my_last_data)
sys_out(terminal_output)
re_buffer = terminal_output + "\n"
with open("filelist.md5.txt", "a", encoding='utf-8') as my_save_file:
my_save_file.write(re_buffer)
except:
sys_out("Error On " + str(file_full_path))