我正在构建一个异步下载文件的类。但是我面临一个奇怪的错误。
import pandas as pd
import requests
from requests_futures.sessions import FuturesSession
import os
import pathlib
class AsyncDownloader:
"""Download files asynchronously"""
__urls = set()
__dest_path = None
__user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0'
__read_timeout = 60
__connection_timeout = 30
def setSourceCSV(self, source_path, column_name):
self.source_path = source_path
self.column_name = column_name
try:
my_csv = pd.read_csv(source_path, usecols=[self.column_name], chunksize=10)
except ValueError:
print("The column name doesn't exist")
return
else:
# No exception whatsoever
for chunk in my_csv:
AsyncDownloader.__urls.update(set(getattr(chunk, self.column_name)))
def setDestinationPath(self, dest_path):
if dest_path.endswith('/'):
dest_path = dest_path[:-1]
self.dest_path = dest_path
# Make directory if not exist
# TODO Add exception in case we can't create the directory
pathlib.Path(self.dest_path).mkdir(parents=True, exist_ok=True)
if os.access(self.dest_path, os.W_OK):
AsyncDownloader.__dest_path = pathlib.Path(self.dest_path).resolve()
def setUserAgent(self, useragent):
self.useragent = useragent
AsyncDownloader.__user_agent = self.useragent
def setConnectionTimeout(self, ctimeout_secs):
self.timeout_secs = ctimeout_secs
AsyncDownloader.__connection_timeout = self.timeout_secs
def setReadTimeout(self, rtimeout_secs):
self.timeout_secs = rtimeout_secs
AsyncDownloader.__read_timeout = self.timeout_secs
def download(self):
try:
session = FuturesSession(max_workers=10)
session.headers.update({'user-agent': AsyncDownloader.__user_agent})
session.request(AsyncDownloader.__connection_timeout,
AsyncDownloader.__connection_timeout)
results = []
for url in AsyncDownloader.__urls:
results.append(session.get(url))
for result in results:
response = result.result()
filename = os.path.basename(response.url)
if AsyncDownloader.__dest_path is None:
AsyncDownloader.__dest_path = pathlib.Path(filename)
else:
AsyncDownloader.__dest_path = pathlib.Path(str(AsyncDownloader.__dest_path) + os.path.sep + filename).resolve()
# save file in directory
print(AsyncDownloader.__dest_path) # Shows correct path
with open(AsyncDownloader.__dest_path, 'wb') as fd:
for chunk in response.iter_content(chunk_size=128):
fd.write(chunk)
except requests.exceptions.HTTPError as errh:
print("Http Error:", errh)
except requests.exceptions.ConnectionError as errc:
print("Error Connecting:", errc)
except requests.exceptions.Timeout as errt:
print("Timeout Error:", errt)
except requests.exceptions.RequestException as err:
print("OOps: Something Else", err)
else:
return
def printURLs(self):
print(AsyncDownloader.__urls)
打印显示正确的路径
C:\Users\XYZ\PycharmProjects\AsyncDownloaderTest\images\Spring-Landscape-HD-Wallpapers-25912.jpg
但是open
看到了错误的路径
with open(AsyncDownloader.__dest_path, 'wb') as fd:
FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\XYZ\\PycharmProjects\\AsyncDownloaderTest\\images\\Spring-Landscape-HD-Wallpapers-25912.jpg\\FUE7XiFApEqWZQ85wYcAfM.jpg'`
我觉得身份确定没问题,所以我想知道出了什么问题。
答案 0 :(得分:0)
更改:
AsyncDownloader.__dest_path = pathlib.Path(str(AsyncDownloader.__dest_path)
+ os.path.sep + filename).resolve()
为:
AsyncDownloader.__dest_path = pathlib.Path(
os.path.split(str(AsyncDownloader.__dest_path))[0] + os.path.sep + filename).resolve()
这会将新文件名添加到目录,而不是上一个文件的完整路径名。
答案 1 :(得分:0)
更改以下行
AsyncDownloader.__dest_path = pathlib.Path(str(AsyncDownloader.__dest_path)
+ os.path.sep + filename).resolve()
为:
AsyncDownloader.__dest_path = pathlib.Path(os.path.join(os.path.dirname(AsyncDownloader.__dest_path), filename)).resolve()