我正在尝试从GES DISC下载多个netcdf4文件,但是授权似乎有问题。
'fpath'是netcdf4文件的位置。如果我要粘贴到地址栏中,则会出现一个弹出框,显示“ https://urs.earthdata.nasa.gov”,需要用户名和密码。如果输入成功,将下载文件。但是,在request.get()中使用'fpath'不起作用。
如果我使用'https://urs.earthdata.nasa.gov'而不是fpath,request.get()成功连接,但是我无法下载netcdf4文件。
我尝试过解决方案中提到的here,但没有运气。
任何帮助,请感激
下面的代码示例
import requests
from requests.auth import HTTPBasicAuth
from datetime import timedelta, date
def daterange(start_date, end_date):
for n in range(int((end_date - start_date).days)):
yield start_date + timedelta(n)
start_date = date(2016, 1, 1)
end_date = date(2016, 1, 2)
for single_date in daterange(start_date, end_date):
YYYY = single_date.strftime("%Y")
MM = single_date.strftime("%m")
DD = single_date.strftime("%d")
fpath1 = 'https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2I1NXASM.5.12.4/' + YYYY + '/' + MM + '/'
fpath2 = 'MERRA2_400.inst1_2d_asm_Nx.' + YYYY + MM + DD + '.nc4.nc?'
fpath3 = 'U2M[0:23][94:160][469:534],TROPT[0:23][94:160][469:534],TROPPB[0:23][94:160][469:534],' \
'T2M[0:23][94:160][469:534],TQL[0:23][94:160][469:534],TOX[0:23][94:160][469:534],' \
'PS[0:23][94:160][469:534],V50M[0:23][94:160][469:534],DISPH[0:23][94:160][469:534],' \
'TO3[0:23][94:160][469:534],TS[0:23][94:160][469:534],T10M[0:23][94:160][469:534],' \
'TROPPT[0:23][94:160][469:534],TQI[0:23][94:160][469:534],SLP[0:23][94:160][469:534],' \
'TQV[0:23][94:160][469:534],V2M[0:23][94:160][469:534],TROPQ[0:23][94:160][469:534],' \
'V10M[0:23][94:160][469:534],U50M[0:23][94:160][469:534],U10M[0:23][94:160][469:534],' \
'QV2M[0:23][94:160][469:534],TROPPV[0:23][94:160][469:534],' \
'QV10M[0:23][94:160][469:534],time,lat[94:160],lon[469:534]'
fpath = fpath1 + fpath2 + fpath3
print(fpath)
# This successfully connects
# response = requests.get('https://urs.earthdata.nasa.gov', auth=HTTPBasicAuth('username', 'password'))
# print(response)
# This one does not
response = requests.get(fpath, auth=HTTPBasicAuth('username', 'password'))
print(response)
注意-任何人都可以通过访问this网站来创建免费帐户来访问此数据
答案 0 :(得分:0)
感谢@Stovfl向我指出正确的方向。
指导使我进入This website,其中包含有关如何为Earthdata设置会话的信息
更新后的完整代码如下
import requests
from datetime import timedelta, date
def daterange(start_date, end_date):
for n in range(int((end_date - start_date).days)):
yield start_date + timedelta(n)
start_date = date(2016, 1, 1)
end_date = date(2019, 7, 31)
# ***********************
# overriding requests.Session.rebuild_auth to maintain headers when redirected
# ***********************
class SessionWithHeaderRedirection(requests.Session):
AUTH_HOST = 'urs.earthdata.nasa.gov'
def __init__(self, username, password):
super().__init__()
self.auth = (username, password)
# Overrides from the library to keep headers when redirected to or from the NASA auth host.
def rebuild_auth(self, prepared_request, response):
headers = prepared_request.headers
url = prepared_request.url
if 'Authorization' in headers:
original_parsed = requests.utils.urlparse(response.request.url)
redirect_parsed = requests.utils.urlparse(url)
if (original_parsed.hostname != redirect_parsed.hostname) and \
redirect_parsed.hostname != self.AUTH_HOST and \
original_parsed.hostname != self.AUTH_HOST:
del headers['Authorization']
return
# create session with the user credentials that will be used to authenticate access to the data
username = "USERNAME"
password = "PASSWORD"
session = SessionWithHeaderRedirection(username, password)
# ***********************
# Loop through Files
# ***********************
for single_date in daterange(start_date, end_date):
YYYY = single_date.strftime("%Y")
MM = single_date.strftime("%m")
DD = single_date.strftime("%d")
fpath1 = 'https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2I1NXASM.5.12.4/' + YYYY + '/' + MM + '/'
fpath2 = 'MERRA2_400.inst1_2d_asm_Nx.' + YYYY + MM + DD + '.nc4.nc?'
fpath3 = 'U2M[0:23][94:160][469:534],TROPT[0:23][94:160][469:534],TROPPB[0:23][94:160][469:534],' \
'T2M[0:23][94:160][469:534],TQL[0:23][94:160][469:534],TOX[0:23][94:160][469:534],' \
'PS[0:23][94:160][469:534],V50M[0:23][94:160][469:534],DISPH[0:23][94:160][469:534],' \
'TO3[0:23][94:160][469:534],TS[0:23][94:160][469:534],T10M[0:23][94:160][469:534],' \
'TROPPT[0:23][94:160][469:534],TQI[0:23][94:160][469:534],SLP[0:23][94:160][469:534],' \
'TQV[0:23][94:160][469:534],V2M[0:23][94:160][469:534],TROPQ[0:23][94:160][469:534],' \
'V10M[0:23][94:160][469:534],U50M[0:23][94:160][469:534],U10M[0:23][94:160][469:534],' \
'QV2M[0:23][94:160][469:534],TROPPV[0:23][94:160][469:534],' \
'QV10M[0:23][94:160][469:534],time,lat[94:160],lon[469:534]'
url = fpath1 + fpath2 + fpath3
# print(url)
# extract the filename from the url to be used when saving the file
filename = 'MERRA2_400.inst1_2d_asm_Nx.' + YYYY + MM + DD + '.nc4.nc'
print(filename)
try:
# submit the request using the session
response = session.get(url, stream=True)
print(response.status_code)
# raise an exception in case of http errors
response.raise_for_status()
# save the file
with open(filename, 'wb') as fd:
for chunk in response.iter_content(chunk_size=1024 * 1024):
fd.write(chunk)
except requests.exceptions.HTTPError as e:
# handle any errors here
print(e)