Question

我正在尝试从GES DISC下载多个netcdf4文件，但是授权似乎有问题。

'fpath'是netcdf4文件的位置。如果我要粘贴到地址栏中，则会出现一个弹出框，显示“ https://urs.earthdata.nasa.gov”，需要用户名和密码。如果输入成功，将下载文件。但是，在request.get（）中使用'fpath'不起作用。

如果我使用'https://urs.earthdata.nasa.gov'而不是fpath，request.get（）成功连接，但是我无法下载netcdf4文件。

我尝试过解决方案中提到的here，但没有运气。

任何帮助，请感激

下面的代码示例

import requests
from requests.auth import HTTPBasicAuth
from datetime import timedelta, date


def daterange(start_date, end_date):
    for n in range(int((end_date - start_date).days)):
        yield start_date + timedelta(n)


start_date = date(2016, 1, 1)
end_date = date(2016, 1, 2)

for single_date in daterange(start_date, end_date):
    YYYY = single_date.strftime("%Y")
    MM = single_date.strftime("%m")
    DD = single_date.strftime("%d")
    fpath1 = 'https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2I1NXASM.5.12.4/' + YYYY + '/' + MM + '/'
    fpath2 = 'MERRA2_400.inst1_2d_asm_Nx.' + YYYY + MM + DD + '.nc4.nc?'
    fpath3 = 'U2M[0:23][94:160][469:534],TROPT[0:23][94:160][469:534],TROPPB[0:23][94:160][469:534],' \
             'T2M[0:23][94:160][469:534],TQL[0:23][94:160][469:534],TOX[0:23][94:160][469:534],' \
             'PS[0:23][94:160][469:534],V50M[0:23][94:160][469:534],DISPH[0:23][94:160][469:534],' \
             'TO3[0:23][94:160][469:534],TS[0:23][94:160][469:534],T10M[0:23][94:160][469:534],' \
             'TROPPT[0:23][94:160][469:534],TQI[0:23][94:160][469:534],SLP[0:23][94:160][469:534],' \
             'TQV[0:23][94:160][469:534],V2M[0:23][94:160][469:534],TROPQ[0:23][94:160][469:534],' \
             'V10M[0:23][94:160][469:534],U50M[0:23][94:160][469:534],U10M[0:23][94:160][469:534],' \
             'QV2M[0:23][94:160][469:534],TROPPV[0:23][94:160][469:534],' \
             'QV10M[0:23][94:160][469:534],time,lat[94:160],lon[469:534]'
    fpath = fpath1 + fpath2 + fpath3
    print(fpath)

    # This successfully connects
    # response = requests.get('https://urs.earthdata.nasa.gov', auth=HTTPBasicAuth('username', 'password'))
    # print(response)

    # This one does not
    response = requests.get(fpath, auth=HTTPBasicAuth('username', 'password'))
    print(response)

注意-任何人都可以通过访问this网站来创建免费帐户来访问此数据

Answer 1

感谢@Stovfl向我指出正确的方向。

指导使我进入This website，其中包含有关如何为Earthdata设置会话的信息

更新后的完整代码如下

import requests
from datetime import timedelta, date

def daterange(start_date, end_date):
    for n in range(int((end_date - start_date).days)):
        yield start_date + timedelta(n)

start_date = date(2016, 1, 1)
end_date = date(2019, 7, 31)

# ***********************
# overriding requests.Session.rebuild_auth to maintain headers when redirected
# ***********************
class SessionWithHeaderRedirection(requests.Session):
    AUTH_HOST = 'urs.earthdata.nasa.gov'

    def __init__(self, username, password):
        super().__init__()
        self.auth = (username, password)

    # Overrides from the library to keep headers when redirected to or from the NASA auth host.
    def rebuild_auth(self, prepared_request, response):
        headers = prepared_request.headers
        url = prepared_request.url
        if 'Authorization' in headers:
            original_parsed = requests.utils.urlparse(response.request.url)
            redirect_parsed = requests.utils.urlparse(url)
            if (original_parsed.hostname != redirect_parsed.hostname) and \
               redirect_parsed.hostname != self.AUTH_HOST and \
               original_parsed.hostname != self.AUTH_HOST:
                del headers['Authorization']
        return


# create session with the user credentials that will be used to authenticate access to the data
username = "USERNAME"
password = "PASSWORD"
session = SessionWithHeaderRedirection(username, password)

# ***********************
# Loop through Files
# ***********************
for single_date in daterange(start_date, end_date):
    YYYY = single_date.strftime("%Y")
    MM = single_date.strftime("%m")
    DD = single_date.strftime("%d")
    fpath1 = 'https://goldsmr4.gesdisc.eosdis.nasa.gov/opendap/MERRA2/M2I1NXASM.5.12.4/' + YYYY + '/' + MM + '/'
    fpath2 = 'MERRA2_400.inst1_2d_asm_Nx.' + YYYY + MM + DD + '.nc4.nc?'
    fpath3 = 'U2M[0:23][94:160][469:534],TROPT[0:23][94:160][469:534],TROPPB[0:23][94:160][469:534],' \
             'T2M[0:23][94:160][469:534],TQL[0:23][94:160][469:534],TOX[0:23][94:160][469:534],' \
             'PS[0:23][94:160][469:534],V50M[0:23][94:160][469:534],DISPH[0:23][94:160][469:534],' \
             'TO3[0:23][94:160][469:534],TS[0:23][94:160][469:534],T10M[0:23][94:160][469:534],' \
             'TROPPT[0:23][94:160][469:534],TQI[0:23][94:160][469:534],SLP[0:23][94:160][469:534],' \
             'TQV[0:23][94:160][469:534],V2M[0:23][94:160][469:534],TROPQ[0:23][94:160][469:534],' \
             'V10M[0:23][94:160][469:534],U50M[0:23][94:160][469:534],U10M[0:23][94:160][469:534],' \
             'QV2M[0:23][94:160][469:534],TROPPV[0:23][94:160][469:534],' \
             'QV10M[0:23][94:160][469:534],time,lat[94:160],lon[469:534]'
    url = fpath1 + fpath2 + fpath3
    # print(url)

    # extract the filename from the url to be used when saving the file
    filename = 'MERRA2_400.inst1_2d_asm_Nx.' + YYYY + MM + DD + '.nc4.nc'
    print(filename)

    try:
        # submit the request using the session
        response = session.get(url, stream=True)
        print(response.status_code)

        # raise an exception in case of http errors
        response.raise_for_status()

        # save the file
        with open(filename, 'wb') as fd:
            for chunk in response.iter_content(chunk_size=1024 * 1024):
                fd.write(chunk)

    except requests.exceptions.HTTPError as e:
        # handle any errors here
        print(e)

无法获得使用python请求的授权

1 个答案: