Question

我试图比较两种允许我在python中读取二进制文件的方法。如果我使用搜索，我可以看到时间差异应用程序1秒。否则它没有那么多不同。但我是否需要使用搜索？

from xmlrpclib import Binary
from os.path import join
from hashlib import md5
import timeit
import os
import glob
class DefLogFile2(object):
    def __init__(self, path, filename):
        self.name = filename
        self.path = path
    def b64_encoded(self):
        with open(join(self.path, self.name), 'rb') as f:
            f.seek(0) # do I need this ? 
            z = lambda x: (Binary(x), md5(x).hexdigest())
            return z(f.read())
class DefLogFile( object ):
    def __init__(self, path, filename):
        self.name = filename
        self.logfile  = open(os.path.join(path, filename), 'rb')
        self.md5      = md5(self.logfile.read()).hexdigest()
        self.logfile.seek(0) # do I need this?
        self.b64encoded = Binary( self.logfile.read() )
        self.logfile.close()
if __name__ == "__main__":
    #timeit.timeit('"-".join(str(n) for n in range(100))', number=10000)
    def z():
        p = r'C:\MyFiles'
        f_l  = glob.glob("{}\\*.BIN".format(p))
        w = []
        for f in f_l:
            w.append(DefLogFile2(os.path.dirname(f),os.path.basename(f)))
        for i in w:
            i.b64_encoded()
    def b():
        p = r'C:\MyFiles'
        f_l = glob.glob("{}\\*.BIN".format(p))
        w = []
        for f in f_l:
            w.append(DefLogFile(os.path.dirname(f),os.path.basename(f)))
        for i in w:
            i.b64encoded
    for i in xrange(10):
        print "z => {}".format(timeit.timeit(stmt = 'z()',setup = 'from __main__ import z', number=1000))
        print "b => {}".format(timeit.timeit(stmt='b()', setup='from __main__ import b', number=1000))

当我不使用seek时，这就是我得到的：

z => 4.74381579487
b => 4.61342728205
z => 4.60564184615
b => 4.43551179487
z => 4.52421620513

如果我使用搜索，

z => 4.59685374359
b => 5.35988594872
z => 4.45907282051
b => 5.22442707692

Answer 1

不，你在这里不需要seek：它不应该损害性能，但它只是多余的，因为open自动将文件指针放在文件的开头，所以这样做： / p>

f = open("stuff")
f.seek(0)

......这样做：

# open the file, blah, blah
current_position = 0
# the file is now open

# hey, let's change the current position with `f.seek(0)`!
current_position = 0 # doesn't do anything useful here

打开二进制文件寻求差异python

1 个答案: