我目前正在使用pycurl和python开发一个简单的多线程下载器,现在我可以暂停下载,但是这只会占用太多内存,并且如果进程被杀死,我将无法直接恢复它。因此,我想出了一种解决方法(尚未实现),以停止下载,将字节位置(我可以通过progress函数获得)保存到XML文件,然后通过指定该字节位置从那里继续下载。>
我卡在的地方是我以某种方式需要在执行perform()时关闭连接,因为perform()阻塞了,我以为我可以从另一个线程对其调用close(),但这只会引发异常。在pycurl或libcurl文档中似乎找不到任何东西可以帮助我完成我想要的事情。
关于为什么我在较简单的请求上使用pycurl的原因,我已经使用pycurl进行基本的自动化已有一段时间了,并且已经习惯了,如果没有其他方法可以用pycurl来完成我想做的事情,那么我想请求可能是最后的选择。
class Downloader:
def __init__(self, url, parts):
self.url = url
self.fileName = re.search(r"(?:[^/][\d\w.]+)+$", self.url, flags=re.IGNORECASE).group(0)
self.parts = parts
self.pause = False
self.fileSize = round(self._getSize())
self.partSize = round(self.fileSize / self.parts)
self.threads = list()
self.curlObjs = list()
# Get file size by only downloading the HEADER and then calling getinfo for the length.
def _getSize(self):
curl = pycurl.Curl()
curl.setopt(curl.URL, self.url)
curl.setopt(curl.FOLLOWLOCATION, True)
curl.setopt(curl.NOBODY, True)
curl.perform()
fileSize = curl.getinfo(curl.CONTENT_LENGTH_DOWNLOAD)
curl.close()
return fileSize
# Track individual file part download progress.
def _trackProgress(self, totalDown, currentDown, totalUp, currentUp):
pass # TODO
# if currentDown != 0 and currentDown == totalDown:
# print(f"Download Completed!\n{currentDown}/{totalDown}")
# Calculate the part size, execute _downloadRange in separate threads, merge file parts on download completion.
def download(self):
partStart = 0
partEnd = self.partSize
for part in range(1, self.parts + 1):
t = threading.Thread(target=self._downloadRange, args=(partStart, partEnd, part))
self.threads.append(t)
t.start()
partStart += self.partSize + 1 if part == 1 else self.partSize
partEnd += self.partSize
for t in self.threads:
t.join()
self._mergeFiles(self.fileName)
# Download the specified range and write it to a file part.
def _downloadRange(self, startRange, endRange, fileNo):
with open(f"{self.fileName}{fileNo}.part", "wb") as f:
curl = pycurl.Curl()
self.curlObjs.append(curl)
curl = curlObj.curl
curl.setopt(curl.URL, self.url)
curl.setopt(curl.FOLLOWLOCATION, True)
curl.setopt(curl.RANGE, f"{startRange}-{endRange}")
curl.setopt(curl.WRITEDATA, f)
curl.setopt(curl.NOPROGRESS, False)
curl.setopt(curl.XFERINFOFUNCTION, self._trackProgress)
curl.perform()
curl.close()
# Merge the file parts into one and delete the parts.
def _mergeFiles(self, fileName):
with open(fileName, "wb") as o:
for part in range(1, self.parts + 1):
with open(f"{self.fileName}{part}.part", "rb") as p:
o.write(p.read())
os.remove(f"{self.fileName}{part}.part")```