是否有办法将文件描述符(不是类似IO的对象)直接读入bytearray?
现在我使用临时FileIO
对象进行调解,例如:
def fd_readinto(fd, ba):
fio = io.FileIO(fd, closefd = False)
return fio.readinto(ba)
答案 0 :(得分:1)
没有这样做的功能,你的方法是迄今为止最快的方法。
我打算使用bytearray(mmap)
和array.fromfile
建议os.read()
,bytearray
,甚至是自制memoryview
,但FileIO.readinto
是快速尖叫。
import os
import mmap, io, array
import timeit
fn = 'path_to_largish_file'
def fd_readinto_mmap(fd, ba):
m = mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
ba.extend(m)
m.close()
def fd_readinto_fio(fd, ba):
with io.FileIO(fd, closefd = False) as fio:
fio.readinto(ba)
def fd_readinto_array(fd, ba):
ar = array.array('c')
sz = os.fstat(fd).st_size
fp = os.fdopen(fd, 'rb')
ar.fromfile(fp, sz)
ba.extend(ar)
def fd_readinto_mv(fd, ba):
stat = os.fstat(fd)
blksize = getattr(stat, 'st_blksize', 4096)
bufsize = stat.st_size
buf = bytearray(bufsize)
m = memoryview(buf)
while True:
b = os.read(fd, blksize)
s = len(b)
if not s: break
m[:s], m = b, m[s:]
writtenbytes = buffer(buf, 0, bufsize-len(m))
ba.extend(writtenbytes)
setup = """
from __main__ import fn, fd_readinto_mmap, fd_readinto_fio, fd_readinto_array, fd_readinto_mv, reset_fd
import os
openfd = lambda : os.open(fn, os.O_RDONLY)
closefd = lambda fd: os.close(fd)
"""
reps = 2
tests = {
'fio' : "fd=openfd(); fd_readinto_fio(fd, bytearray()); closefd(fd)",
'mmap': "fd=openfd(); fd_readinto_mmap(fd, bytearray()); closefd(fd)",
'array': "fd=openfd(); fd_readinto_array(fd, bytearray());",
'mv' : "fd=openfd(); fd_readinto_mv(fd, bytearray()); closefd(fd)",
}
width = max(map(len, tests))
for n,t in tests.iteritems():
time = timeit.timeit(t, setup, number=reps)
print ("{:%s} {}" % width).format(n, time)
在我的系统上,FileIO
的速度提高了几个数量级。关于文件大小,FileIO.readinto
似乎几乎是恒定的时间。 (不确定这怎么可能。)
mmap 4.86922478676
array 4.19783091545
mv 7.75051403046
fio 9.29832458496e-05