我试图比较两种允许我在python中读取二进制文件的方法。如果我使用搜索,我可以看到时间差异应用程序1秒。否则它没有那么多不同。但我是否需要使用搜索?
from xmlrpclib import Binary
from os.path import join
from hashlib import md5
import timeit
import os
import glob
class DefLogFile2(object):
def __init__(self, path, filename):
self.name = filename
self.path = path
def b64_encoded(self):
with open(join(self.path, self.name), 'rb') as f:
f.seek(0) # do I need this ?
z = lambda x: (Binary(x), md5(x).hexdigest())
return z(f.read())
class DefLogFile( object ):
def __init__(self, path, filename):
self.name = filename
self.logfile = open(os.path.join(path, filename), 'rb')
self.md5 = md5(self.logfile.read()).hexdigest()
self.logfile.seek(0) # do I need this?
self.b64encoded = Binary( self.logfile.read() )
self.logfile.close()
if __name__ == "__main__":
#timeit.timeit('"-".join(str(n) for n in range(100))', number=10000)
def z():
p = r'C:\MyFiles'
f_l = glob.glob("{}\\*.BIN".format(p))
w = []
for f in f_l:
w.append(DefLogFile2(os.path.dirname(f),os.path.basename(f)))
for i in w:
i.b64_encoded()
def b():
p = r'C:\MyFiles'
f_l = glob.glob("{}\\*.BIN".format(p))
w = []
for f in f_l:
w.append(DefLogFile(os.path.dirname(f),os.path.basename(f)))
for i in w:
i.b64encoded
for i in xrange(10):
print "z => {}".format(timeit.timeit(stmt = 'z()',setup = 'from __main__ import z', number=1000))
print "b => {}".format(timeit.timeit(stmt='b()', setup='from __main__ import b', number=1000))
当我不使用seek时,这就是我得到的:
z => 4.74381579487
b => 4.61342728205
z => 4.60564184615
b => 4.43551179487
z => 4.52421620513
如果我使用搜索,
z => 4.59685374359
b => 5.35988594872
z => 4.45907282051
b => 5.22442707692
答案 0 :(得分:0)
不,你在这里不需要seek
:它不应该损害性能,但它只是多余的,因为open
自动将文件指针放在文件的开头,所以这样做: / p>
f = open("stuff")
f.seek(0)
......这样做:
# open the file, blah, blah
current_position = 0
# the file is now open
# hey, let's change the current position with `f.seek(0)`!
current_position = 0 # doesn't do anything useful here