我想从此website下载数据。
我检查了源代码,发现它使用以下链接格式下载数据。
url = 'http://current.hydro.gov.hk/en/download_csv.php?start_dt={}%20{}:00&end_dt={}%20{}:00&mode=Surface'
url_filled = url.format("2018-01-02", "00:00", "2018-01-02", "23:45")
然后我尝试使用请求下载CSV数据。
import requests
r = requests.get(url_filled)
但后来我收到了错误。
TimeoutError Traceback (most recent call last)
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\requests\packages\urllib3\connection.py in _new_conn(self)
140 conn = connection.create_connection(
--> 141 (self.host, self.port), self.timeout, **extra_kw)
142
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\requests\packages\urllib3\util\connection.py in create_connection(address, timeout, source_address, socket_options)
82 if err is not None:
---> 83 raise err
84
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\requests\packages\urllib3\util\connection.py in create_connection(address, timeout, source_address, socket_options)
72 sock.bind(source_address)
---> 73 sock.connect(sa)
74 return sock
TimeoutError: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond
During handling of the above exception, another exception occurred:
NewConnectionError Traceback (most recent call last)
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\requests\packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
599 body=body, headers=headers,
--> 600 chunked=chunked)
601
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\requests\packages\urllib3\connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
355 else:
--> 356 conn.request(method, url, **httplib_request_kw)
357
~\AppData\Local\Continuum\Anaconda3\lib\http\client.py in request(self, method, url, body, headers)
1106 """Send a complete request to the server."""
-> 1107 self._send_request(method, url, body, headers)
1108
~\AppData\Local\Continuum\Anaconda3\lib\http\client.py in _send_request(self, method, url, body, headers)
1151 body = _encode(body, 'body')
-> 1152 self.endheaders(body)
1153
~\AppData\Local\Continuum\Anaconda3\lib\http\client.py in endheaders(self, message_body)
1102 raise CannotSendHeader()
-> 1103 self._send_output(message_body)
1104
~\AppData\Local\Continuum\Anaconda3\lib\http\client.py in _send_output(self, message_body)
933
--> 934 self.send(msg)
935 if message_body is not None:
~\AppData\Local\Continuum\Anaconda3\lib\http\client.py in send(self, data)
876 if self.auto_open:
--> 877 self.connect()
878 else:
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\requests\packages\urllib3\connection.py in connect(self)
165 def connect(self):
--> 166 conn = self._new_conn()
167 self._prepare_conn(conn)
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\requests\packages\urllib3\connection.py in _new_conn(self)
149 raise NewConnectionError(
--> 150 self, "Failed to establish a new connection: %s" % e)
151
NewConnectionError: <requests.packages.urllib3.connection.HTTPConnection object at 0x00000000081F5D30>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
437 retries=self.max_retries,
--> 438 timeout=timeout
439 )
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\requests\packages\urllib3\connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
648 retries = retries.increment(method, url, error=e, _pool=self,
--> 649 _stacktrace=sys.exc_info()[2])
650 retries.sleep()
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\requests\packages\urllib3\util\retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
387 if new_retry.is_exhausted():
--> 388 raise MaxRetryError(_pool, url, error or ResponseError(cause))
389
MaxRetryError: HTTPConnectionPool(host='current.hydro.gov.hk', port=80): Max retries exceeded with url: /en/download_csv.php?start_dt=2018-01-02%2000:00:00&end_dt=2018-01-02%2023:45:00&mode=Surface (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x00000000081F5D30>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond',))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
<ipython-input-39-ac5f4cccaa6a> in <module>()
----> 1 r = requests.get(url_filled)
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\requests\api.py in get(url, params, **kwargs)
70
71 kwargs.setdefault('allow_redirects', True)
---> 72 return request('get', url, params=params, **kwargs)
73
74
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\requests\api.py in request(method, url, **kwargs)
56 # cases, and look like a memory leak in others.
57 with sessions.Session() as session:
---> 58 return session.request(method=method, url=url, **kwargs)
59
60
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
516 }
517 send_kwargs.update(settings)
--> 518 resp = self.send(prep, **send_kwargs)
519
520 return resp
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\requests\sessions.py in send(self, request, **kwargs)
637
638 # Send the request
--> 639 r = adapter.send(request, **kwargs)
640
641 # Total elapsed time of the request (approximately)
~\AppData\Local\Continuum\Anaconda3\lib\site-packages\requests\adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
500 raise ProxyError(e, request=request)
501
--> 502 raise ConnectionError(e, request=request)
503
504 except ClosedPoolError as e:
ConnectionError: HTTPConnectionPool(host='current.hydro.gov.hk', port=80): Max retries exceeded with url: /en/download_csv.php?start_dt=2018-01-02%2000:00:00&end_dt=2018-01-02%2023:45:00&mode=Surface (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x00000000081F5D30>: Failed to establish a new connection: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond',))
当我尝试在谷歌浏览器中打开链接时,它可以正常工作。出现一个对话框并询问下载位置。
任何人都可以提供帮助?感谢
答案 0 :(得分:1)
您是否正在通过代理服务器?如果是这样,您可以查看http://docs.python-requests.org/en/master/user/advanced/中的代理部分。我已经尝试过你的代码了,它运行良好
答案 1 :(得分:0)
网页服务器缓慢发送文件包。我们需要考虑到这一点。因此,我们可以使用
request.get
的分块机制。
import requests
# url of the csv file
csv_url = 'http://current.hydro.gov.hk/en/download_csv.php?start_dt=' + \
'2018-01-05%2000:00:00&end_dt=2018-01-06%2000:00:00&mode=Surface'
# sample csv filename
csv_filename = 'test.csv'
# use stream mode for chunking
csv_body = requests.get(csv_url, stream=True)
with open(csv_filename, 'wb') as fd:
for chunk in csv_body.iter_content(chunk_size=1024):
fd.write(chunk)