我正在使用请求库通过HTTP获取内容。
我的代码是这样的:
r = requests.get('http://blabla/mybigfile', headers={"Range":"bytes %d-%d"%(fromByte, toByte)}, stream=True)
bytesRead=0
with open(filename, 'wb') as fd:
for chunk in r.iter_content(1024):
fd.write(chunk)
bytesRead=bytesRead+len(chunk)
if bytesRead<(byteTo-byteFrom):
print "WHAT??? WHY???"
问题是有时请求不能迭代所有块,总读取字节少于预期。 为什么呢?
这些是回复标题:
{'accept-ranges': 'bytes',
'cache-control': 'max-age=28800',
'connection': 'close',
'content-disposition': 'attachment',
'content-length': '262144',
'content-range': 'bytes 262144-524287/339619249',
'content-type': 'application/octet-stream',
'date': 'Sun, 07 Sep 2014 19:04:01 GMT',
'etag': '"1491643023"',
'expires': 'Mon, 08 Sep 2014 03:04:01 GMT',
'last-modified': 'Thu, 04 Sep 2014 13:59:44 GMT',
'server': 'lighttpd'}
REAL CODE
def downloadPart(self, byteFrom=0, byteTo=0, async=True):
tname = threading.currentThread().getName()
if byteTo is 0:
byteTo = self.fileSize - 1
print "Thread %s: download from %d to %d" % (tname, byteFrom, byteTo)
self.req = requests.get(self.url, headers={"Range":"bytes=%d-%d" % (byteFrom, byteTo)}, stream=True)
required = int(self.req.headers["content-length"])
b = byteFrom
bytes = 0
chunksize = 8 * RemoteFile.KB
with open(self.filename, 'r+b') as fd:
fd.seek(b)
for chunk in self.req.iter_content(chunksize):
if self._stop:
print "%s: stopped" % tname
return
fd.write(chunk)
fd.flush()
os.fsync(fd)
dsize = len(chunk)
bytes = bytes + dsize
b = b + dsize
self.lock.acquire()
self.progress = self.progress + dsize
self.lock.release()
# print "%s: %d [%d%%] @ %f KB" % (tname, b, self.getProgressPerc(), self.getDownloadSpeed())
if bytes < (byteTo - byteFrom):
print self.req.raw.tell()
print "End thread: %s: less required size %d instead %d" % (tname, bytes, byteTo - byteFrom)
return self.downloadPart(b, byteTo, async)
else:
print "End thread: %s, downloaded %d bytes" % (tname, bytes)
self.lock.acquire()
if self.currentByte < self.lastByte:
if async:
self.threads.remove(threading.currentThread())
t = self.createThread(self.currentByte + 1)
if t: t.start()
else:
print "Download complete!"
self._complete = True
self.lock.release()