似乎zipfile.ZipFile
需要随机访问,而urllib2
返回的“类文件”对象不支持该随机访问。
我尝试用io.BufferedRandom
包装它但得到了:
AttributeError: addinfourl instance has no attribute 'seekable'
答案 0 :(得分:1)
在没有其他回复的情况下,我已经使用下面的自制解决方案安顿下来。在阅读zip文件时,它可能不会减少内存占用空间,但在首先读取zip
标头时可能会改善延迟。
from io import BytesIO, SEEK_SET, SEEK_END
def _ceil_div(a, b):
return (a + b - 1) / b
def _align_up(a, b):
return _ceil_div(a, b) * b
class BufferedRandomReader:
"""Create random-access, read-only buffered stream adapter from a sequential
input stream which does not support random access (i.e., ```seek()```)
Example::
>>> stream = BufferedRandomReader(BytesIO('abc'))
>>> print stream.read(2)
ab
>>> stream.seek(0)
0L
>>> print stream.read()
abc
"""
def __init__(self, fin, chunk_size=512):
self._fin = fin
self._buf = BytesIO()
self._eof = False
self._chunk_size = chunk_size
def tell(self):
return self._buf.tell()
def read(self, n=-1):
"""Read at most ``n`` bytes from the file (less if the ```read``` hits
end-of-file before obtaining size bytes).
If ``n`` argument is negative or omitted, read all data until end of
file is reached. The bytes are returned as a string object. An empty
string is returned when end of file is encountered immediately.
"""
pos = self._buf.tell()
end = self._buf.seek(0, SEEK_END)
if n < 0:
if not self._eof:
self._buf.write(self._fin.read())
self._eof = True
else:
req = pos + n - end
if req > 0 and not self._eof: # need to grow
bcount = _align_up(req, self._chunk_size)
bytes = self._fin.read(bcount)
self._buf.write(bytes)
self._eof = len(bytes) < bcount
self._buf.seek(pos)
return self._buf.read(n)
def seek(self, offset, whence=SEEK_SET):
if whence == SEEK_END:
if not self._eof:
self._buf.seek(0, SEEK_END)
self._buf.write(self._fin.read())
self._eof = True
return self._buf.seek(offset, SEEK_END)
return self._buf.seek(offset, whence)
def close(self):
self._fin.close()
self._buf.close()
用法示例:
import urllib2
req = urllib2.urlopen('http://test/file.zip')
import zipfile
zf = zipfile.ZipFile(BufferedRandomReader(req), 'r')
...