from earthaccess import Auth, DataCollections, DataGranules, Store
auth = Auth()
Auth()¶
earthaccess's Auth class provides 3 different strategies to authenticate ourselves with NASA EDL.
- netrc: Do we have a
.netrcfile with our EDL credentials? if so, we can use it withearthaccess. If we don't have it and want to create one we can, earthaccess allows users to type their credentials and persist them into a.netrcfile. - environment: If we have our EDL credentials as environment variables
- EARTHDATA_USERNAME
- EARTHDATA_PASSWORD
- interactive: We will be asked for our EDL credentials with optional persistence to
.netrc
To persist our credentials to a .netrc file we have to do the following:
auth.login(strategy="interactive", persist=True)
In this notebook we'll use the environment method followed by the netrc strategy. You can of course use the interactive strategy if you don't have a .netrc file.
auth.login(strategy="environment")
# are we authenticated?
if not auth.authenticated:
auth.login(strategy="netrc")
--------------------------------------------------------------------------- TimeoutError Traceback (most recent call last) File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1109/lib/python3.11/site-packages/urllib3/connectionpool.py:534, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length) 533 try: --> 534 response = conn.getresponse() 535 except (BaseSSLError, OSError) as e: File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1109/lib/python3.11/site-packages/urllib3/connection.py:565, in HTTPConnection.getresponse(self) 564 # Get the response from http.client.HTTPConnection --> 565 httplib_response = super().getresponse() 567 try: File ~/.asdf/installs/python/3.11.12/lib/python3.11/http/client.py:1395, in HTTPConnection.getresponse(self) 1394 try: -> 1395 response.begin() 1396 except ConnectionError: File ~/.asdf/installs/python/3.11.12/lib/python3.11/http/client.py:325, in HTTPResponse.begin(self) 324 while True: --> 325 version, status, reason = self._read_status() 326 if status != CONTINUE: File ~/.asdf/installs/python/3.11.12/lib/python3.11/http/client.py:286, in HTTPResponse._read_status(self) 285 def _read_status(self): --> 286 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1") 287 if len(line) > _MAXLINE: File ~/.asdf/installs/python/3.11.12/lib/python3.11/socket.py:718, in SocketIO.readinto(self, b) 717 try: --> 718 return self._sock.recv_into(b) 719 except timeout: File ~/.asdf/installs/python/3.11.12/lib/python3.11/ssl.py:1314, in SSLSocket.recv_into(self, buffer, nbytes, flags) 1311 raise ValueError( 1312 "non-zero flags not allowed in calls to recv_into() on %s" % 1313 self.__class__) -> 1314 return self.read(nbytes, buffer) 1315 else: File ~/.asdf/installs/python/3.11.12/lib/python3.11/ssl.py:1166, in SSLSocket.read(self, len, buffer) 1165 if buffer is not None: -> 1166 return self._sslobj.read(len, buffer) 1167 else: TimeoutError: The read operation timed out The above exception was the direct cause of the following exception: ReadTimeoutError Traceback (most recent call last) File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1109/lib/python3.11/site-packages/requests/adapters.py:644, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies) 643 try: --> 644 resp = conn.urlopen( 645 method=request.method, 646 url=url, 647 body=request.body, 648 headers=request.headers, 649 redirect=False, 650 assert_same_host=False, 651 preload_content=False, 652 decode_content=False, 653 retries=self.max_retries, 654 timeout=timeout, 655 chunked=chunked, 656 ) 658 except (ProtocolError, OSError) as err: File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1109/lib/python3.11/site-packages/urllib3/connectionpool.py:841, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw) 839 new_e = ProtocolError("Connection aborted.", new_e) --> 841 retries = retries.increment( 842 method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2] 843 ) 844 retries.sleep() File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1109/lib/python3.11/site-packages/urllib3/util/retry.py:474, in Retry.increment(self, method, url, response, error, _pool, _stacktrace) 473 if read is False or method is None or not self._is_method_retryable(method): --> 474 raise reraise(type(error), error, _stacktrace) 475 elif read is not None: File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1109/lib/python3.11/site-packages/urllib3/util/util.py:39, in reraise(tp, value, tb) 38 raise value.with_traceback(tb) ---> 39 raise value 40 finally: File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1109/lib/python3.11/site-packages/urllib3/connectionpool.py:787, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw) 786 # Make the request on the HTTPConnection object --> 787 response = self._make_request( 788 conn, 789 method, 790 url, 791 timeout=timeout_obj, 792 body=body, 793 headers=headers, 794 chunked=chunked, 795 retries=retries, 796 response_conn=response_conn, 797 preload_content=preload_content, 798 decode_content=decode_content, 799 **response_kw, 800 ) 802 # Everything went great! File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1109/lib/python3.11/site-packages/urllib3/connectionpool.py:536, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length) 535 except (BaseSSLError, OSError) as e: --> 536 self._raise_timeout(err=e, url=url, timeout_value=read_timeout) 537 raise File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1109/lib/python3.11/site-packages/urllib3/connectionpool.py:367, in HTTPConnectionPool._raise_timeout(self, err, url, timeout_value) 366 if isinstance(err, SocketTimeout): --> 367 raise ReadTimeoutError( 368 self, url, f"Read timed out. (read timeout={timeout_value})" 369 ) from err 371 # See the above comment about EAGAIN in Python 3. ReadTimeoutError: HTTPSConnectionPool(host='urs.earthdata.nasa.gov', port=443): Read timed out. (read timeout=10) During handling of the above exception, another exception occurred: ReadTimeout Traceback (most recent call last) Cell In[2], line 1 ----> 1 auth.login(strategy="environment") 2 # are we authenticated? 3 if not auth.authenticated: File ~/checkouts/readthedocs.org/user_builds/earthaccess/checkouts/1109/earthaccess/auth.py:148, in Auth.login(self, strategy, persist, system) 146 self._netrc() 147 elif strategy == "environment": --> 148 self._environment() 150 return self File ~/checkouts/readthedocs.org/user_builds/earthaccess/checkouts/1109/earthaccess/auth.py:300, in Auth._environment(self) 293 raise LoginStrategyUnavailable( 294 "Either the environment variables EARTHDATA_USERNAME and " 295 "EARTHDATA_PASSWORD must both be set, or EARTHDATA_TOKEN must be set for " 296 "the 'environment' login strategy." 297 ) 299 logger.debug("Using environment variables for EDL") --> 300 return self._get_credentials(username, password, token) File ~/checkouts/readthedocs.org/user_builds/earthaccess/checkouts/1109/earthaccess/auth.py:314, in Auth._get_credentials(self, username, password, user_token) 312 self.username = username 313 self.password = password --> 314 token_resp = self._find_or_create_token() 316 if not (token_resp.ok): # type: ignore 317 msg = f"Authentication with Earthdata Login failed with:\n{token_resp.text}" File ~/checkouts/readthedocs.org/user_builds/earthaccess/checkouts/1109/earthaccess/auth.py:332, in Auth._find_or_create_token(self) 330 def _find_or_create_token(self) -> requests.Response: 331 with self.get_session() as session: --> 332 return session.post( 333 self.EDL_FIND_OR_CREATE_TOKEN_URL, 334 headers={"Accept": "application/json"}, 335 timeout=10, 336 ) File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1109/lib/python3.11/site-packages/requests/sessions.py:637, in Session.post(self, url, data, json, **kwargs) 626 def post(self, url, data=None, json=None, **kwargs): 627 r"""Sends a POST request. Returns :class:`Response` object. 628 629 :param url: URL for the new :class:`Request` object. (...) 634 :rtype: requests.Response 635 """ --> 637 return self.request("POST", url, data=data, json=json, **kwargs) File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1109/lib/python3.11/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json) 584 send_kwargs = { 585 "timeout": timeout, 586 "allow_redirects": allow_redirects, 587 } 588 send_kwargs.update(settings) --> 589 resp = self.send(prep, **send_kwargs) 591 return resp File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1109/lib/python3.11/site-packages/requests/sessions.py:703, in Session.send(self, request, **kwargs) 700 start = preferred_clock() 702 # Send the request --> 703 r = adapter.send(request, **kwargs) 705 # Total elapsed time of the request (approximately) 706 elapsed = preferred_clock() - start File ~/checkouts/readthedocs.org/user_builds/earthaccess/envs/1109/lib/python3.11/site-packages/requests/adapters.py:690, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies) 688 raise SSLError(e, request=request) 689 elif isinstance(e, ReadTimeoutError): --> 690 raise ReadTimeout(e, request=request) 691 elif isinstance(e, _InvalidHeader): 692 raise InvalidHeader(e, request=request) ReadTimeout: HTTPSConnectionPool(host='urs.earthdata.nasa.gov', port=443): Read timed out. (read timeout=10)
Querying for restricted datasets¶
The DataCollection client can query CMR for any collection (dataset) using all of CMR's Query parameters and has built-in functions to extract useful information from the response.
auth.refresh_tokens()
If we belong to an early adopter group within NASA we can pass the Auth object to the other classes when we instantiate them.
# An anonymous query to CMR
Query = DataCollections().keyword('elevation')
# An authenticated query to CMR
Query = DataCollections(auth).keyword('elevation')
and it's the same with DataGranules
# An anonymous query to CMR
Query = DataGranules().keyword('elevation')
# An authenticated query to CMR
Query = DataGranules(auth).keyword('elevation')
Note: Some collections under an access control list are flagged by CMR and won't count when asking about results with
hits().
# The first step is to create a DataCollections query
Query = DataCollections()
# Use chain methods to customize our query
Query.short_name("ATL06").version("006")
print(f"Collections found: {Query.hits()}")
# filtering what UMM fields to print, to see the full record we omit the fields filters
# meta is always included as
collections = Query.fields(["ShortName", "Version"]).get(5)
# Inspect some results printing just the ShortName and Abstract
collections
Collections found: 1
[{
"meta": {
"concept-id": "C2670138092-NSIDC_CPRD",
"provider-id": "NSIDC_CPRD"
},
"umm": {
"ShortName": "ATL06",
"Version": "006"
}
}]
if not auth.refresh_tokens():
print("Something went wrong, we may need to regenerate our tokens manually")
Something went wrong, we may need to regenerate our tokens manually
/tmp/ipykernel_1551/2137380767.py:1: DeprecationWarning: No replacement, as tokens are now refreshed automatically. if not auth.refresh_tokens():
Query = DataCollections(auth)
# Use chain methods to customize our query
Query.short_name("ATL06").version("006")
# This will say 1, even though we get 2 back.
print(f"Collections found: {Query.hits()}")
collections = Query.fields(["ShortName", "Version"]).get()
# Inspect some results printing just the ShortName and Abstract
collections
Collections found: 1
[{
"meta": {
"concept-id": "C2670138092-NSIDC_CPRD",
"provider-id": "NSIDC_CPRD"
},
"umm": {
"ShortName": "ATL06",
"Version": "006"
}
}]
Oh no! What!? only 1 collection found even though we got 2 results back?!
Interpreting the results¶
The hits() method above will tell you the number of query hits, but only for publicly available data sets.
In this case because cloud hosted ICESat-2 data are not yet publicly available, CMR will return “1” hits, if you filtered DataCollections by provider = NSIDC_CPRD you'll get 0 hits. For now we need an alternative method of seeing how many cloud data sets are available at NSIDC. This is only temporary until cloud-hosted ICESat-2 become publicly available. We can create a collections object (we’re going to want one of these soon anyhow) and print the len() of the collections object to see the true number of hits.
Note: Since we cannot rely on
hits()we need to be aware thatget()may get us too many metadata records depending on the dataset and how broad our query is.
Query = (
DataGranules(auth)
.concept_id("C2153572614-NSIDC_CPRD")
.bounding_box(-134.7, 58.9, -133.9, 59.2)
.temporal("2020-03-01", "2020-03-30")
)
# Unfortunately the hits() methods will behave the same for granule queries
print(f"Granules found with hits(): {Query.hits()}")
cloud_granules = Query.get()
print(f"Actual number found: {len(cloud_granules)}")
Granules found with hits(): 0
Actual number found: 0
store = Store(auth)
files = store.get(cloud_granules, "./data/C2153572614-NSIDC_CPRD/")
The current session is not authenticated with NASA
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) Cell In[7], line 1 ----> 1 store = Store(auth) 2 files = store.get(cloud_granules, "./data/C2153572614-NSIDC_CPRD/") File ~/checkouts/readthedocs.org/user_builds/earthaccess/checkouts/1109/earthaccess/store.py:210, in Store.__init__(self, auth, pre_authorize) 208 logger.warning("The current session is not authenticated with NASA") 209 self.auth = None --> 210 self.in_region = self._running_in_us_west_2() File ~/checkouts/readthedocs.org/user_builds/earthaccess/checkouts/1109/earthaccess/store.py:235, in Store._running_in_us_west_2(self) 234 def _running_in_us_west_2(self) -> bool: --> 235 session = self.auth.get_session() 236 try: 237 # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html 238 token_ = session.put( 239 "http://169.254.169.254/latest/api/token", 240 headers={"X-aws-ec2-metadata-token-ttl-seconds": "21600"}, 241 timeout=1, 242 ) AttributeError: 'NoneType' object has no attribute 'get_session'
NASA Earthdata API Client 🌍