From 6892007a0b91b33280398f0a47eaadc81d3abc72 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Tue, 6 Feb 2024 12:49:14 -0700 Subject: [PATCH 01/16] Clarify that temporal and bbox args must be tuples in docstrings Addresses #279, but does not resolve it. --- earthaccess/api.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/earthaccess/api.py b/earthaccess/api.py index 8e518912..500a2be4 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -50,14 +50,19 @@ def search_datasets( * **provider**: particular to each DAAC, e.g. POCLOUD, LPDAAC etc. - * **temporal**: ("yyyy-mm-dd", "yyyy-mm-dd") + * **temporal**: a tuple representing temporal bounds in the form + ("yyyy-mm-dd", "yyyy-mm-dd") + + * **bounding_box**: a tuple representing spatial bounds in the form + (lower_left_lon, lower_left_lat , upper_right_lon, upper_right_lat) - * **bounding_box**: (lower_left_lon, lower_left_lat , - upper_right_lon, upper_right_lat) Returns: - an list of DataCollection results that can be used to get + + a list of DataCollection results that can be used to get information such as concept_id, doi, etc. about a dataset. + Examples: + ```python datasets = earthaccess.search_datasets( keyword="sea surface anomaly", @@ -103,14 +108,19 @@ def search_data( * **provider**: particular to each DAAC, e.g. POCLOUD, LPDAAC etc. - * **temporal**: ("yyyy-mm-dd", "yyyy-mm-dd") + * **temporal**: a tuple representing temporal bounds in the form + ("yyyy-mm-dd", "yyyy-mm-dd") + + * **bounding_box**: a tuple representing spatial bounds in the form + (lower_left_lon, lower_left_lat , upper_right_lon, upper_right_lat) - * **bounding_box**: (lower_left_lon, lower_left_lat , - upper_right_lon, upper_right_lat) Returns: + Granules: a list of DataGranules that can be used to access - the granule files by using `download()` or `open()`. + the granule files by using `download()` or `open()`. + Examples: + ```python datasets = earthaccess.search_data( doi="10.5067/SLREF-CDRV2", From 14cbfefd2e03ad45516e57542e92be6f05fe94aa Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Tue, 6 Feb 2024 14:04:36 -0700 Subject: [PATCH 02/16] Move shebang to first line --- scripts/docs-live.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/scripts/docs-live.sh b/scripts/docs-live.sh index 286f2a7c..6ed6a4be 100755 --- a/scripts/docs-live.sh +++ b/scripts/docs-live.sh @@ -1,7 +1,4 @@ - #!/usr/bin/env bash - -set -e -set -x +set -ex mkdocs serve --dev-addr 0.0.0.0:8008 --dirtyreload From 29ad00e13e2b80e1cdef0d2368778958d3ace8e0 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Tue, 6 Feb 2024 14:09:03 -0700 Subject: [PATCH 03/16] Fixup docstring formatting issues --- earthaccess/api.py | 71 ++++++++++++++++------------------------------ 1 file changed, 25 insertions(+), 46 deletions(-) diff --git a/earthaccess/api.py b/earthaccess/api.py index 500a2be4..077a6ced 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -31,38 +31,30 @@ def _normalize_location(location: Union[str, None]) -> Union[str, None]: def search_datasets( count: int = -1, **kwargs: Any ) -> List[earthaccess.results.DataCollection]: - """Search datasets using NASA's CMR + """Search datasets using NASA's CMR. [https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) Parameters: - count (Integer): Number of records to get, -1 = all - kwargs (Dict): arguments to CMR: + kwargs (Dict): + arguments to CMR: * **keyword**: case insensitive and support wild cards ? and *, - * **short_name**: e.g. ATL08 - * **doi**: DOI for a dataset - * **daac**: e.g. NSIDC or PODAAC - * **provider**: particular to each DAAC, e.g. POCLOUD, LPDAAC etc. - * **temporal**: a tuple representing temporal bounds in the form - ("yyyy-mm-dd", "yyyy-mm-dd") - + `("yyyy-mm-dd", "yyyy-mm-dd")` * **bounding_box**: a tuple representing spatial bounds in the form - (lower_left_lon, lower_left_lat , upper_right_lon, upper_right_lat) + `(lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat)` Returns: - - a list of DataCollection results that can be used to get - information such as concept_id, doi, etc. about a dataset. + a list of DataCollection results that can be used to get information about a + dataset, e.g. concept_id, doi, etc. Examples: - ```python datasets = earthaccess.search_datasets( keyword="sea surface anomaly", @@ -94,33 +86,25 @@ def search_data( [https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) Parameters: - count (Integer): Number of records to get, -1 = all - kwargs (Dict): arguments to CMR: + kwargs (Dict): + arguments to CMR: * **short_name**: dataset short name e.g. ATL08 - * **version**: dataset version - * **doi**: DOI for a dataset - * **daac**: e.g. NSIDC or PODAAC - * **provider**: particular to each DAAC, e.g. POCLOUD, LPDAAC etc. - * **temporal**: a tuple representing temporal bounds in the form - ("yyyy-mm-dd", "yyyy-mm-dd") - + `("yyyy-mm-dd", "yyyy-mm-dd")` * **bounding_box**: a tuple representing spatial bounds in the form - (lower_left_lon, lower_left_lat , upper_right_lon, upper_right_lat) + `(lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat)` Returns: - - Granules: a list of DataGranules that can be used to access - the granule files by using `download()` or `open()`. + a list of DataGranules that can be used to access the granule files by using + `download()` or `open()`. Examples: - ```python datasets = earthaccess.search_data( doi="10.5067/SLREF-CDRV2", @@ -144,17 +128,15 @@ def login(strategy: str = "all", persist: bool = False) -> Auth: """Authenticate with Earthdata login (https://urs.earthdata.nasa.gov/) Parameters: + strategy (String): + authentication method. - strategy (String): authentication method. - - "all": (default) try all methods until one works - - "interactive": enter username and password. - - "netrc": retrieve username and password from ~/.netrc. - - "environment": retrieve username and password from $EARTHDATA_USERNAME and $EARTHDATA_PASSWORD. + * **"all"**: (default) try all methods until one works + * **"interactive"**: enter username and password. + * **"netrc"**: retrieve username and password from ~/.netrc. + * **"environment"**: retrieve username and password from `$EARTHDATA_USERNAME` and `$EARTHDATA_PASSWORD`. persist (Boolean): will persist credentials in a .netrc file + Returns: an instance of Auth. """ @@ -218,8 +200,10 @@ def open( hosted on S3 or HTTPS by third party libraries like xarray. Parameters: - granules: a list of granule instances **or** list of URLs, e.g. s3://some-granule, - if a list of URLs is passed we need to specify the data provider e.g. POCLOUD, NSIDC_CPRD etc. + granules: a list of granule instances **or** list of URLs, e.g. + `s3://some-granule`, if a list of URLs is passed we need to specify the data + provider e.g. POCLOUD, NSIDC_CPRD etc. + Returns: a list of s3fs "file pointers" to s3 files. """ @@ -242,6 +226,7 @@ def get_s3_credentials( daac (String): a DAAC short_name like NSIDC or PODAAC etc provider (String: if we know the provider for the DAAC e.g. POCLOUD, LPCLOUD etc. results (list[earthaccess.results.DataGranule]): List of results from search_data() + Returns: a dictionary with S3 credentials for the DAAC or provider """ @@ -256,8 +241,6 @@ def get_s3_credentials( def collection_query() -> Type[CollectionQuery]: """Returns a query builder instance for NASA collections (datasets) - Parameters: - cloud_hosted (Boolean): initializes the query builder for cloud hosted collections. Returns: class earthaccess.DataCollections: a query builder instance for data collections. """ @@ -271,9 +254,6 @@ class earthaccess.DataCollections: a query builder instance for data collections def granule_query() -> Type[GranuleQuery]: """Returns a query builder instance for data granules - Parameters: - cloud_hosted (Boolean): initializes the query builder for a particular DOI - if we have it. Returns: class earthaccess.DataGranules: a query builder instance for data granules. """ @@ -359,7 +339,6 @@ def get_edl_token() -> str: Returns: str: EDL token - """ token = earthaccess.__auth__.token return token From fc2a379beb99d030885a314e241c421def83aa81 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Tue, 6 Feb 2024 14:09:16 -0700 Subject: [PATCH 04/16] Enable building docs with dev conda env --- binder/environment-dev.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/binder/environment-dev.yml b/binder/environment-dev.yml index 202d825b..6783a808 100644 --- a/binder/environment-dev.yml +++ b/binder/environment-dev.yml @@ -10,6 +10,16 @@ dependencies: - ipyleaflet>=0.13 - h5netcdf>=0.11 - cartopy + + - mkdocs>=1.2 + - mkdocs-material>=7.1,<9.0 + - markdown-include>=0.6 + - mkdocstrings>=0.19.0 + - mkdocstrings-python + - mkdocs-jupyter>=0.19.0 + - pymdown-extensions>=9.2 + - pip - pip: - poetry + - markdown-callouts>=0.2.0 From 71fb424eef7b6bf8cfc10c3b130ce1a4a35d24ec Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Tue, 6 Feb 2024 14:17:47 -0700 Subject: [PATCH 05/16] Remove redundant docstring type info --- earthaccess/api.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/earthaccess/api.py b/earthaccess/api.py index 077a6ced..21845d7d 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -36,7 +36,7 @@ def search_datasets( [https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) Parameters: - count (Integer): Number of records to get, -1 = all + count: Number of records to get, -1 = all kwargs (Dict): arguments to CMR: @@ -86,7 +86,7 @@ def search_data( [https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html) Parameters: - count (Integer): Number of records to get, -1 = all + count: Number of records to get, -1 = all kwargs (Dict): arguments to CMR: @@ -128,14 +128,14 @@ def login(strategy: str = "all", persist: bool = False) -> Auth: """Authenticate with Earthdata login (https://urs.earthdata.nasa.gov/) Parameters: - strategy (String): + strategy: authentication method. * **"all"**: (default) try all methods until one works * **"interactive"**: enter username and password. * **"netrc"**: retrieve username and password from ~/.netrc. * **"environment"**: retrieve username and password from `$EARTHDATA_USERNAME` and `$EARTHDATA_PASSWORD`. - persist (Boolean): will persist credentials in a .netrc file + persist: will persist credentials in a .netrc file Returns: an instance of Auth. @@ -223,9 +223,9 @@ def get_s3_credentials( this is useful for missions that do not use the same endpoint as their DAACs e.g. SWOT Parameters: - daac (String): a DAAC short_name like NSIDC or PODAAC etc - provider (String: if we know the provider for the DAAC e.g. POCLOUD, LPCLOUD etc. - results (list[earthaccess.results.DataGranule]): List of results from search_data() + daac: a DAAC short_name like NSIDC or PODAAC etc + provider: if we know the provider for the DAAC e.g. POCLOUD, LPCLOUD etc. + results: List of results from search_data() Returns: a dictionary with S3 credentials for the DAAC or provider @@ -242,7 +242,7 @@ def collection_query() -> Type[CollectionQuery]: """Returns a query builder instance for NASA collections (datasets) Returns: - class earthaccess.DataCollections: a query builder instance for data collections. + a query builder instance for data collections. """ if earthaccess.__auth__.authenticated: query_builder = DataCollections(earthaccess.__auth__) @@ -255,7 +255,7 @@ def granule_query() -> Type[GranuleQuery]: """Returns a query builder instance for data granules Returns: - class earthaccess.DataGranules: a query builder instance for data granules. + a query builder instance for data granules. """ if earthaccess.__auth__.authenticated: query_builder = DataGranules(earthaccess.__auth__) @@ -268,7 +268,7 @@ def get_fsspec_https_session() -> AbstractFileSystem: """Returns a fsspec session that can be used to access datafiles across many different DAACs Returns: - class AbstractFileSystem: an fsspec instance able to access data across DAACs + an fsspec instance able to access data across DAACs Examples: ```python @@ -291,7 +291,7 @@ def get_requests_https_session() -> requests.Session: require authentication with NASA EDL. Returns: - class requests.Session: an authenticated requests Session instance. + an authenticated requests Session instance. Examples: ```python @@ -316,12 +316,12 @@ def get_s3fs_session( """Returns a fsspec s3fs file session for direct access when we are in us-west-2 Parameters: - daac (String): Any DAAC short name e.g. NSIDC, GES_DISC - provider (String): Each DAAC can have a cloud provider, if the DAAC is specified, there is no need to use provider - results (list[class earthaccess.results.DataGranule]): A list of results from search_data(), earthaccess will use the metadata form CMR to obtain the S3 Endpoint + daac: Any DAAC short name e.g. NSIDC, GES_DISC + provider: Each DAAC can have a cloud provider, if the DAAC is specified, there is no need to use provider + results: A list of results from search_data(), earthaccess will use the metadata form CMR to obtain the S3 Endpoint Returns: - class s3fs.S3FileSystem: an authenticated s3fs session valid for 1 hour + an authenticated s3fs session valid for 1 hour """ daac = _normalize_location(daac) provider = _normalize_location(provider) @@ -338,7 +338,7 @@ def get_edl_token() -> str: """Returns the current token used for EDL Returns: - str: EDL token + EDL token """ token = earthaccess.__auth__.token return token From 48c88703be1b57dfa6a1b97f0c7458f36f66d1e4 Mon Sep 17 00:00:00 2001 From: Matt Fisher Date: Tue, 6 Feb 2024 14:21:14 -0700 Subject: [PATCH 06/16] Prefer Optional over Union with None for consistency in docs --- earthaccess/api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/earthaccess/api.py b/earthaccess/api.py index 21845d7d..ed858ac7 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -13,7 +13,7 @@ from .utils import _validation as validate -def _normalize_location(location: Union[str, None]) -> Union[str, None]: +def _normalize_location(location: Optional[str]) -> Optional[str]: """Handle user-provided `daac` and `provider` values These values must have a capital letter as the first character @@ -160,7 +160,7 @@ def login(strategy: str = "all", persist: bool = False) -> Auth: def download( granules: Union[DataGranule, List[DataGranule], str, List[str]], - local_path: Union[str, None], + local_path: Optional[str], provider: Optional[str] = None, threads: int = 8, ) -> List[str]: From 898f0cd5ad4946642fb374ad393208a2157e1861 Mon Sep 17 00:00:00 2001 From: danielfromearth Date: Wed, 7 Feb 2024 12:26:15 -0500 Subject: [PATCH 07/16] docstring cleanup --- earthaccess/results.py | 52 ++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/earthaccess/results.py b/earthaccess/results.py index a466e1c3..5569f670 100644 --- a/earthaccess/results.py +++ b/earthaccess/results.py @@ -56,9 +56,7 @@ def _filter_related_links(self, filter: str) -> List[str]: class DataCollection(CustomDict): - """ - Dictionary-like object to represent a data collection from CMR - """ + """Dictionary-like object to represent a data collection from CMR.""" _basic_meta_fields_ = [ "concept-id", @@ -78,10 +76,10 @@ class DataCollection(CustomDict): ] def summary(self) -> Dict[str, Any]: - """Summary containing short_name, concept-id, file-type, and cloud-info if the dataset is cloud hosted. + """Summary containing short_name, concept-id, file-type, and cloud-info (if cloud-hosted). Returns: - Returns a sumary of the collection metadata + A summary of the collection metadata. """ # we can print only the concept-id @@ -101,6 +99,7 @@ def get_umm(self, umm_field: str) -> Union[str, Dict[str, Any]]: """ Parameters: umm_field: Valid UMM item, i.e. `TemporalExtent` + Returns: Returns the value of a given field inside the UMM (Unified Metadata Model) """ @@ -111,14 +110,15 @@ def get_umm(self, umm_field: str) -> Union[str, Dict[str, Any]]: def concept_id(self) -> str: """ Returns: - Retrurns a collection's `concept_id`, this id is the most relevant search field on granule queries. + A collection's `concept_id`. + This id is the most relevant search field on granule queries. """ return self["meta"]["concept-id"] def data_type(self) -> str: """ Returns: - If available, it returns the collection data type, i.e. HDF5, CSV etc + The collection data type, i.e. HDF5, CSV etc., if available. """ if "ArchiveAndDistributionInformation" in self["umm"]: return str( @@ -131,7 +131,7 @@ def data_type(self) -> str: def version(self) -> str: """ Returns: - returns the collection's version. + The collection's version. """ if "Version" in self["umm"]: return self["umm"]["Version"] @@ -140,7 +140,7 @@ def version(self) -> str: def abstract(self) -> str: """ Returns: - Returns the abstract of a collection + The abstract of a collection """ if "Abstract" in self["umm"]: return self["umm"]["Abstract"] @@ -149,7 +149,7 @@ def abstract(self) -> str: def landing_page(self) -> str: """ Returns: - if available it returns the first landing page for the collection, can be many. + The first landing page for the collection (can be many), if available. """ links = self._filter_related_links("LANDING PAGE") if len(links) > 0: @@ -159,7 +159,7 @@ def landing_page(self) -> str: def get_data(self) -> List[str]: """ Returns: - Returns the GET DATA links, usually a link to a landing page, a DAAC portal or an FTP location. + The GET DATA links (usually a landing page link, a DAAC portal, or an FTP location). """ links = self._filter_related_links("GET DATA") return links @@ -167,7 +167,8 @@ def get_data(self) -> List[str]: def s3_bucket(self) -> Dict[str, Any]: """ Returns: - Returns the S3 bucket information if the collection has it (**cloud hosted collections only**) + The S3 bucket information if the collection has it. + (**cloud hosted collections only**) """ if "DirectDistributionInformation" in self["umm"]: return self["umm"]["DirectDistributionInformation"] @@ -180,9 +181,7 @@ def __repr__(self) -> str: class DataGranule(CustomDict): - """ - Dictionary-like object to represent a granule from CMR - """ + """Dictionary-like object to represent a granule from CMR.""" _basic_meta_fields_ = [ "concept-id", @@ -219,7 +218,7 @@ def __init__( def __repr__(self) -> str: """ Returns: - returns a basic representation of a data granule + A basic representation of a data granule. """ data_links = [link for link in self.data_links()] rep_str = f""" @@ -234,7 +233,7 @@ def __repr__(self) -> str: def _repr_html_(self) -> str: """ Returns: - Returns a rich representation for a data granule if we are in a Jupyter notebook. + A rich representation for a data granule if we are in a Jupyter notebook. """ granule_html_repr = _repr_granule_html(self) return granule_html_repr @@ -248,7 +247,7 @@ def get_s3_credentials_endpoint(self) -> Union[str, None]: def size(self) -> float: """ Returns: - Returns the total size for the granule in MB + The total size for the granule in MB. """ try: data_granule = self["umm"]["DataGranule"] @@ -290,17 +289,20 @@ def data_links( """Returns the data links form a granule Parameters: - access: direct or external, direct means in-region access for cloud hosted collections. - in_region: if we are running in us-west-2, meant for the store class, default is False + access: direct or external. + direct means in-region access for cloud-hosted collections. + in_region: True if we are running in us-west-2. + It is meant for the store class. + Returns: - the data link for the requested access type + The data link for the requested access type. """ https_links = self._filter_related_links("GET DATA") s3_links = self._filter_related_links("GET DATA VIA DIRECT ACCESS") if in_region: # we are in us-west-2 if self.cloud_hosted and access in (None, "direct"): - # this is a cloud collection and we didn't specify the access type + # this is a cloud collection, and we didn't specify the access type # default to S3 links if len(s3_links) == 0 and len(https_links) > 0: # This is guessing the S3 links for some cloud collections that for @@ -310,14 +312,14 @@ def data_links( # we have the s3 links so we return those return s3_links else: - # Even though we are in us-west-2 the user wants the HTTPS links + # Even though we are in us-west-2, the user wants the HTTPS links # used in region they are S3 signed links from TEA # https://github.com/asfadmin/thin-egress-app return https_links else: # we are not in region if access == "direct": - # maybe the user wants to collect S3 links ans use them later + # maybe the user wants to collect S3 links and use them later # from the cloud return s3_links else: @@ -327,7 +329,7 @@ def data_links( def dataviz_links(self) -> List[str]: """ Returns: - Returns the data visualization links, usually the browse images. + The data visualization links, usually the browse images. """ links = self._filter_related_links("GET RELATED VISUALIZATION") return links From ec33f43fb19b5ad89c2d5d5774c5a7be2181393b Mon Sep 17 00:00:00 2001 From: danielfromearth Date: Wed, 7 Feb 2024 14:04:07 -0500 Subject: [PATCH 08/16] more docstring cleanup in search.py --- earthaccess/api.py | 2 +- earthaccess/search.py | 125 +++++++++++++++++++++++------------------- 2 files changed, 71 insertions(+), 56 deletions(-) diff --git a/earthaccess/api.py b/earthaccess/api.py index ed858ac7..81e3c377 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -201,7 +201,7 @@ def open( Parameters: granules: a list of granule instances **or** list of URLs, e.g. - `s3://some-granule`, if a list of URLs is passed we need to specify the data + `s3://some-granule`, if a list of URLs is passed, we need to specify the data provider e.g. POCLOUD, NSIDC_CPRD etc. Returns: diff --git a/earthaccess/search.py b/earthaccess/search.py index 8392ca51..22a26923 100644 --- a/earthaccess/search.py +++ b/earthaccess/search.py @@ -15,7 +15,7 @@ class DataCollections(CollectionQuery): """ ???+ Info The DataCollection class queries against https://cmr.earthdata.nasa.gov/search/collections.umm_json, - the response has to be in umm_json in order to use the result classes. + the response has to be in umm_json to use the result classes. """ _fields = None @@ -37,13 +37,13 @@ def __init__(self, auth: Optional[Auth] = None, *args: Any, **kwargs: Any) -> No """Builds an instance of DataCollections to query CMR Parameters: - auth (Auth): An authenticated `Auth` instance, this is an optional parameter - for queries that need authentication e.g. restricted datasets + auth: An authenticated `Auth` instance. This is an optional parameter + for queries that need authentication, e.g. restricted datasets. """ super().__init__(*args, **kwargs) self.session = session() if auth is not None and auth.authenticated: - # To search we need the new bearer tokens from NASA Earthdata + # To search, we need the new bearer tokens from NASA Earthdata self.session = auth.get_session(bearer_token=True) self._debug = False @@ -54,16 +54,17 @@ def __init__(self, auth: Optional[Auth] = None, *args: Any, **kwargs: Any) -> No def hits(self) -> int: """Returns the number of hits the current query will return. This is done by making a lightweight query to CMR and inspecting the returned headers. - Restricted datasets will always return 0 results even if there are results. + Restricted datasets will always return zero results even if there are results. Returns: - number of results reported by CMR + The number of results reported by CMR. """ return super().hits() def concept_id(self, IDs: List[str]) -> Type[CollectionQuery]: - """Filter by concept ID (ex: C1299783579-LPDAAC_ECS or G1327299284-LPDAAC_ECS, S12345678-LPDAAC_ECS) + """Filter by concept ID. + For example: C1299783579-LPDAAC_ECS or G1327299284-LPDAAC_ECS, S12345678-LPDAAC_ECS Collections, granules, tools, services are uniquely identified with this ID. > @@ -73,24 +74,24 @@ def concept_id(self, IDs: List[str]) -> Type[CollectionQuery]: * If providing a service's concept ID here, it will uniquely identify those services. Parameters: - IDs (String, List): ID(s) to search by. Can be provided as a string or list of strings. + IDs: ID(s) to search by. Can be provided as a string or list of strings. """ super().concept_id(IDs) return self def keyword(self, text: str) -> Type[CollectionQuery]: - """Case insentive and wildcard (*) search through over two dozen fields in + """Case-insensitive and wildcard (*) search through over two dozen fields in a CMR collection record. This allows for searching against fields like summary and science keywords. Parameters: - text (String): text to search for + text: text to search for """ super().keyword(text) return self def doi(self, doi: str) -> Type[CollectionQuery]: - """Searh datasets by DOI + """Search datasets by DOI. ???+ Tip Not all datasets have an associated DOI, also DOI search works @@ -98,7 +99,7 @@ def doi(self, doi: str) -> Type[CollectionQuery]: We need to search by DOI, grab the concept_id and then get the data. Parameters: - doi (String): DOI of a datasets, e.g. 10.5067/AQR50-3Q7CS + doi: DOI of a datasets, e.g. 10.5067/AQR50-3Q7CS """ if not isinstance(doi, str): raise TypeError("doi must be of type str") @@ -137,13 +138,14 @@ def parameters(self, **kwargs: Any) -> Type[CollectionQuery]: return self def print_help(self, method: str = "fields") -> None: - """Prints the help information for a given method""" + """Prints the help information for a given method.""" print("Class components: \n") print([method for method in dir(self) if method.startswith("_") is False]) help(getattr(self, method)) def fields(self, fields: Optional[List[str]] = None) -> Type[CollectionQuery]: - """Masks the response by only showing the fields included in this list + """Masks the response by only showing the fields included in this list. + Parameters: fields (List): list of fields to show, these fields come from the UMM model e.g. Abstract, Title """ @@ -152,6 +154,7 @@ def fields(self, fields: Optional[List[str]] = None) -> Type[CollectionQuery]: def debug(self, debug: bool = True) -> Type[CollectionQuery]: """If True, prints the actual query to CMR, notice that the pagination happens in the headers. + Parameters: debug (Boolean): Print CMR query. """ @@ -166,7 +169,7 @@ def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: Restricted collections will not be matched using this parameter Parameters: - cloud_hosted (Boolean): True to require granules only be online + cloud_hosted: True to require granules only be online """ if not isinstance(cloud_hosted, bool): raise TypeError("cloud_hosted must be of type bool") @@ -178,27 +181,31 @@ def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: return self def provider(self, provider: str = "") -> Type[CollectionQuery]: - """Only match collections from a given provider, a NASA datacenter or DAAC can have 1 or more providers - i.e. PODAAC is a data center or DAAC, PODAAC is the default provider for on prem data, POCLOUD is - the PODAAC provider for their data in the cloud. + """Only match collections from a given provider. + + A NASA datacenter or DAAC can have one or more providers. + E.g., PODAAC is a data center or DAAC; PODAAC is the default provider for on-premises data, + POCLOUD is the PODAAC provider for their data in the cloud. Parameters: - provider (String): a provider code for any DAAC. e.g. POCLOUD, NSIDC_CPRD, etc. + provider: a provider code for any DAAC, e.g. POCLOUD, NSIDC_CPRD, etc. """ self.params["provider"] = provider return self def data_center(self, data_center_name: str = "") -> Type[CollectionQuery]: - """An alias name for `daac()` + """An alias name for `daac()`. + Parameters: - data_center_name (String): DAAC shortname, e.g. NSIDC, PODAAC, GESDISC + data_center_name: DAAC shortname, e.g. NSIDC, PODAAC, GESDISC """ return self.daac(data_center_name) def daac(self, daac_short_name: str = "") -> Type[CollectionQuery]: - """Only match collections for a given DAAC, by default the on-prem collections for the DAAC + """Only match collections for a given DAAC, by default the on-prem collections for the DAAC. + Parameters: - daac_short_name (String): a DAAC shortname, e.g. NSIDC, PODAAC, GESDISC + daac_short_name: a DAAC shortname, e.g. NSIDC, PODAAC, GESDISC """ if "cloud_hosted" in self.params: cloud_hosted = self.params["cloud_hosted"] @@ -218,7 +225,8 @@ def get(self, limit: int = 2000) -> list: they can be potentially millions of them. Parameters: - limit (Integer): The number of results to return + limit: The number of results to return + Returns: query results as a list of `DataCollection` instances. """ @@ -269,9 +277,9 @@ def temporal( to this method before calling execute(). Parameters: - date_from (String): earliest date of temporal range - date_to (string): latest date of temporal range - exclude_boundary (Boolean): whether or not to exclude the date_from/to in the matched range + date_from: earliest date of temporal range + date_to: latest date of temporal range + exclude_boundary: whether to exclude the date_from/to in the matched range """ DEFAULT = dt.datetime(1979, 1, 1) if date_from is not None: @@ -291,8 +299,7 @@ def temporal( class DataGranules(GranuleQuery): - """ - A Granule oriented client for NASA CMR + """A Granule oriented client for NASA CMR. API: https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html """ @@ -316,17 +323,17 @@ def __init__(self, auth: Any = None, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) self.session = session() if auth is not None and auth.authenticated: - # To search we need the new bearer tokens from NASA Earthdata + # To search, we need the new bearer tokens from NASA Earthdata self.session = auth.get_session(bearer_token=True) self._debug = False def hits(self) -> int: - """ - Returns the number of hits the current query will return. This is done by - making a lightweight query to CMR and inspecting the returned headers. + """Returns the number of hits the current query will return. + This is done by making a lightweight query to CMR and inspecting the returned headers. - :returns: number of results reported by CMR + Returns: + The number of results reported by CMR. """ url = self._build_url() @@ -353,6 +360,7 @@ def parameters(self, **kwargs: Any) -> Type[CollectionQuery]: temporal=("2015-01","2015-02"), point=(42.5, -101.25)) ``` + Returns: Query instance """ @@ -379,22 +387,24 @@ def provider(self, provider: str = "") -> Type[CollectionQuery]: the PODAAC provider for their data in the cloud. Parameters: - provider (String): a provider code for any DAAC. e.g. POCLOUD, NSIDC_CPRD, etc. + provider: a provider code for any DAAC. e.g. POCLOUD, NSIDC_CPRD, etc. """ self.params["provider"] = provider return self def data_center(self, data_center_name: str = "") -> Type[CollectionQuery]: - """An alias name for `daac()` + """An alias name for `daac()`. + Parameters: data_center_name (String): DAAC shortname, e.g. NSIDC, PODAAC, GESDISC """ return self.daac(data_center_name) def daac(self, daac_short_name: str = "") -> Type[CollectionQuery]: - """Only match collections for a given DAAC, by default the on-prem collections for the DAAC + """Only match collections for a given DAAC. Default to on-prem collections for the DAAC. + Parameters: - daac_short_name (String): a DAAC shortname, e.g. NSIDC, PODAAC, GESDISC + daac_short_name: a DAAC shortname, e.g. NSIDC, PODAAC, GESDISC """ if "cloud_hosted" in self.params: cloud_hosted = self.params["cloud_hosted"] @@ -446,7 +456,7 @@ def granule_name(self, granule_name: str) -> Type[CollectionQuery]: i.e. MODGRNLD.*.daily.* Parameters: - granule_name (String): granule name (accepts wildcards) + granule_name: granule name (accepts wildcards) """ if not isinstance(granule_name, str): raise TypeError("granule_name must be of type string") @@ -477,7 +487,7 @@ def instrument(self, instrument: str = "") -> Type[GranuleQuery]: """Filter by the instrument associated with the granule. Parameters: - instrument (str): name of the instrument + instrument: name of the instrument """ super().instrument(instrument) return self @@ -497,8 +507,8 @@ def cloud_cover( """Filter by the percentage of cloud cover present in the granule. Parameters: - min_cover (int): minimum percentage of cloud cover - max_cover (int): maximum percentage of cloud cover + min_cover: minimum percentage of cloud cover + max_cover: maximum percentage of cloud cover """ super().cloud_cover(min_cover, max_cover) return self @@ -527,10 +537,13 @@ def _is_cloud_hosted(self, granule: Any) -> bool: return False def short_name(self, short_name: str = "") -> Type[GranuleQuery]: - """ - Filter by short name (aka product or collection name). - :param short_name: name of collection - :returns: Query instance + """Filter by short name (aka product or collection name). + + Parameters: + short_name: name of a collection + + Returns: + Query instance """ super().short_name(short_name) return self @@ -545,7 +558,8 @@ def get(self, limit: int = 2000) -> list: they can be potentially millions of them. Parameters: - limit (Integer): The number of results to return + limit: The number of results to return + Returns: query results as a list of `DataCollection` instances. """ @@ -606,8 +620,9 @@ def get(self, limit: int = 2000) -> list: def debug(self, debug: bool = True) -> Type[GranuleQuery]: """If True, prints the actual query to CMR, notice that the pagination happens in the headers. + Parameters: - debug (Boolean): Print CMR query. + debug: Print CMR query. """ self._debug = True return self @@ -623,9 +638,9 @@ def temporal( ranges can be provided by successive calls to this method before calling execute(). Parameters: - date_from (Date, String): earliest date of temporal range - date_to (Date, String): latest date of temporal range - exclude_boundary (Boolean): whether or not to exclude the date_from/to in the matched range + date_from: earliest date of temporal range + date_to: latest date of temporal range + exclude_boundary: whether to exclude the date_from/to in the matched range """ DEFAULT = dt.datetime(1979, 1, 1) if date_from is not None: @@ -668,7 +683,7 @@ def polygon(self, coordinates: List[Tuple[str, str]]) -> Type[GranuleQuery]: collection filtering parameter such as short_name or entry_title. Parameters: - coordinates (List): list of (lon, lat) tuples + coordinates: list of (lon, lat) tuples """ super().polygon(coordinates) return self @@ -699,7 +714,7 @@ def line(self, coordinates: List[Tuple[str, str]]) -> Type[GranuleQuery]: with a collection filtering parameter such as short_name or entry_title. Parameters: - coordinates (List): a list of (lon, lat) tuples + coordinates: a list of (lon, lat) tuples """ super().line(coordinates) return self @@ -715,14 +730,14 @@ def downloadable(self, downloadable: bool = True) -> Type[GranuleQuery]: return self def doi(self, doi: str) -> Type[GranuleQuery]: - """Searh data granules by DOI + """Search data granules by DOI ???+ Tip Not all datasets have an associated DOI, internally if a DOI is found earthaccess will grab the concept_id for the query to CMR. Parameters: - doi (String): DOI of a datasets, e.g. 10.5067/AQR50-3Q7CS + doi: DOI of a datasets, e.g. 10.5067/AQR50-3Q7CS """ collection = DataCollections().doi(doi).get() if len(collection) > 0: From 5a6bd14e33a00505cba8d7b129d2ae2c5b5e0f40 Mon Sep 17 00:00:00 2001 From: danielfromearth Date: Wed, 7 Feb 2024 14:14:00 -0500 Subject: [PATCH 09/16] more docstring cleanup in store.py --- earthaccess/store.py | 86 +++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 37 deletions(-) diff --git a/earthaccess/store.py b/earthaccess/store.py index 645721ad..27ee8814 100644 --- a/earthaccess/store.py +++ b/earthaccess/store.py @@ -79,7 +79,7 @@ def make_instance( def _get_url_granule_mapping( granules: List[DataGranule], access: str ) -> Mapping[str, DataGranule]: - """Construct a mapping between file urls and granules""" + """Construct a mapping between file urls and granules.""" url_mapping = {} for granule in granules: for url in granule.data_links(access=access): @@ -88,15 +88,13 @@ def _get_url_granule_mapping( class Store(object): - """ - Store class to access granules on-prem or in the cloud. - """ + """Store class to access granules on-prem or in the cloud.""" def __init__(self, auth: Any, pre_authorize: bool = False) -> None: - """Store is the class to access data + """Store is the class to access data. Parameters: - auth (Auth): Required, Auth instance to download and access data. + auth: Auth instance to download and access data. """ if auth.authenticated is True: self.auth = auth @@ -169,9 +167,9 @@ def set_requests_session( This HTTPS session can be used to download granules if we want to use a direct, lower level API Parameters: - url (String): used to test the credentials and populate the class auth cookies - method (String): HTTP method to test. default: "GET" - bearer_token (Boolean): if true will be used for authenticated queries on CMR + url: used to test the credentials and populate the class auth cookies + method: HTTP method to test. default: "GET" + bearer_token: if true will be used for authenticated queries on CMR Returns: fsspec HTTPFileSystem (aiohttp client session) @@ -202,13 +200,13 @@ def get_s3fs_session( provider: Optional[str] = None, endpoint: Optional[str] = None, ) -> s3fs.S3FileSystem: - """ - Returns a s3fs instance for a given cloud provider / DAAC + """Returns a s3fs instance for a given cloud provider / DAAC. Parameters: daac: any of the DAACs e.g. NSIDC, PODAAC - provider: a data provider if we know them, e.g PODAAC -> POCLOUD + provider: a data provider if we know them, e.g. PODAAC -> POCLOUD endpoint: pass the URL for the credentials directly + Returns: a s3fs file instance """ @@ -284,7 +282,7 @@ def get_requests_session(self, bearer_token: bool = True) -> requests.Session: This HTTPS session can be used to download granules if we want to use a direct, lower level API Parameters: - bearer_token (Boolean): if true will be used for authenticated queries on CMR + bearer_token: if true will be used for authenticated queries on CMR Returns: requests Session @@ -300,7 +298,9 @@ def open( hosted on S3 or HTTPS by third party libraries like xarray. Parameters: - granules (List): a list of granules(DataGranule) instances or list of URLs, e.g. s3://some-granule + granules: a list of granules(DataGranule) instances or list of URLs, + e.g. s3://some-granule + Returns: a list of s3fs "file pointers" to s3 files. """ @@ -318,7 +318,8 @@ def _open( hosted on S3 or HTTPS by third party libraries like xarray. Parameters: - granules (List): a list of granules(DataGranule) instances or list of URLs, e.g. s3://some-granule + granules: a list of granules(DataGranule) instances or list of URLs, e.g. s3://some-granule + Returns: a list of s3fs "file pointers" to s3 files. """ @@ -442,17 +443,19 @@ def get( ) -> List[str]: """Retrieves data granules from a remote storage system. - * If we run this in the cloud we are moving data from S3 to a cloud compute instance (EC2, AWS Lambda) + * If we run this in the cloud, + we are moving data from S3 to a cloud compute instance (EC2, AWS Lambda). * If we run it outside the us-west-2 region and the data granules are part of a cloud-based - collection the method will not get any files. - * If we requests data granules from an on-prem collection the data will be effectively downloaded - to a local directory. + collection, the method will not get any files. + * If we request data granules from an on-prem collection, + the data will be effectively downloaded to a local directory. Parameters: granules: a list of granules(DataGranule) instances or a list of granule links (HTTP) local_path: local directory to store the remote data granules access: direct or on_prem, if set it will use it for the access method. - threads: parallel number of threads to use to download the files, adjust as necessary, default = 8 + threads: parallel number of threads to use to download the files; + adjust as necessary, default = 8 Returns: List of downloaded files @@ -479,11 +482,12 @@ def _get( ) -> List[str]: """Retrieves data granules from a remote storage system. - * If we run this in the cloud we are moving data from S3 to a cloud compute instance (EC2, AWS Lambda) + * If we run this in the cloud, + we are moving data from S3 to a cloud compute instance (EC2, AWS Lambda). * If we run it outside the us-west-2 region and the data granules are part of a cloud-based - collection the method will not get any files. - * If we requests data granules from an on-prem collection the data will be effectively downloaded - to a local directory. + collection, the method will not get any files. + * If we request data granules from an on-prem collection, + the data will be effectively downloaded to a local directory. Parameters: granules: a list of granules(DataGranule) instances or a list of granule links (HTTP) @@ -568,15 +572,19 @@ def _get_granules( downloaded_files.append(file_name) return downloaded_files else: - # if the data is cloud based bu we are not in AWS it will be downloaded as if it was on prem + # if the data are cloud-based, but we are not in AWS, + # it will be downloaded as if it was on prem return self._download_onprem_granules(data_links, local_path, threads) def _download_file(self, url: str, directory: str) -> str: - """ - download a single file from an on-prem location, a DAAC data center. - :param url: the granule url - :param directory: local directory - :returns: local filepath or an exception + """Download a single file from an on-prem location, a DAAC data center. + + Parameters: + url: the granule url + directory: local directory + + Returns: + a local filepath or an exception """ # If the get data link is an Opendap location if "opendap" in url and url.endswith(".html"): @@ -595,7 +603,7 @@ def _download_file(self, url: str, directory: str) -> str: r.raise_for_status() with open(local_path, "wb") as f: # This is to cap memory usage for large files at 1MB per write to disk per thread - # https://docs.python-requests.org/en/master/user/quickstart/#raw-response-content + # https://docs.python-requests.org/en/latest/user/quickstart/#raw-response-content shutil.copyfileobj(r.raw, f, length=1024 * 1024) except Exception: print(f"Error while downloading the file {local_filename}") @@ -608,12 +616,16 @@ def _download_file(self, url: str, directory: str) -> str: def _download_onprem_granules( self, urls: List[str], directory: str, threads: int = 8 ) -> List[Any]: - """ - downloads a list of URLS into the data directory. - :param urls: list of granule URLs from an on-prem collection - :param directory: local directory to store the files - :param threads: parallel number of threads to use to download the files, adjust as necessary, default = 8 - :returns: None + """Downloads a list of URLS into the data directory. + + Parameters: + urls: list of granule URLs from an on-prem collection + directory: local directory to store the downloaded files + threads: parallel number of threads to use to download the files; + adjust as necessary, default = 8 + + Returns: + a list of local filepaths to which the files were downloaded """ if urls is None: raise ValueError("The granules didn't provide a valid GET DATA link") From 8b540d6096bc8e41122f643a9d702febab8ff571 Mon Sep 17 00:00:00 2001 From: danielfromearth Date: Wed, 7 Feb 2024 14:34:49 -0500 Subject: [PATCH 10/16] further docstring cleanup --- earthaccess/api.py | 6 +++--- earthaccess/auth.py | 28 ++++++++++++++-------------- earthaccess/search.py | 3 ++- earthaccess/store.py | 9 ++++++--- 4 files changed, 25 insertions(+), 21 deletions(-) diff --git a/earthaccess/api.py b/earthaccess/api.py index 81e3c377..c8140770 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -200,9 +200,9 @@ def open( hosted on S3 or HTTPS by third party libraries like xarray. Parameters: - granules: a list of granule instances **or** list of URLs, e.g. - `s3://some-granule`, if a list of URLs is passed, we need to specify the data - provider e.g. POCLOUD, NSIDC_CPRD etc. + granules: a list of granule instances **or** list of URLs, e.g. `s3://some-granule`. + If a list of URLs is passed, we need to specify the data provider. + provider: e.g. POCLOUD, NSIDC_CPRD etc. Returns: a list of s3fs "file pointers" to s3 files. diff --git a/earthaccess/auth.py b/earthaccess/auth.py index 9a3b22cb..90e72c82 100644 --- a/earthaccess/auth.py +++ b/earthaccess/auth.py @@ -49,9 +49,7 @@ def rebuild_auth(self, prepared_request: Any, response: Any) -> None: class Auth(object): - """ - Authentication class for operations that require Earthdata login (EDL) - """ + """Authentication class for operations that require Earthdata login (EDL).""" def __init__(self) -> None: # Maybe all these predefined URLs should be in a constants.py file @@ -63,18 +61,18 @@ def __init__(self) -> None: self.EDL_REVOKE_TOKEN = "https://urs.earthdata.nasa.gov/api/users/revoke_token" def login(self, strategy: str = "netrc", persist: bool = False) -> Any: - """Authenticate with Earthdata login + """Authenticate with Earthdata login. Parameters: - - strategy (String): authentication method. + strategy: authentication method. "interactive": enter username and password. "netrc": (default) retrieve username and password from ~/.netrc. "environment": retrieve username and password from $EARTHDATA_USERNAME and $EARTHDATA_PASSWORD. - persist (Boolean): will persist credentials in a .netrc file + persist: will persist credentials in a .netrc file + Returns: an instance of Auth. """ @@ -90,8 +88,9 @@ def login(self, strategy: str = "netrc", persist: bool = False) -> Any: return self def refresh_tokens(self) -> bool: - """Refresh CMR tokens - Tokens are used to do authenticated queries on CMR for restricted and early access datastes + """Refresh CMR tokens. + + Tokens are used to do authenticated queries on CMR for restricted and early access datasets. This method renews the tokens to make sure we can query the collections allowed to our EDL user. """ if len(self.tokens) == 0: @@ -146,7 +145,7 @@ def get_s3_credentials( provider: Optional[str] = None, endpoint: Optional[str] = None, ) -> Dict[str, str]: - """Gets AWS S3 credentials for a given NASA cloud provider, the + """Gets AWS S3 credentials for a given NASA cloud provider. The easier way is to use the DAAC short name. provider is optional if we know it. Parameters: @@ -154,7 +153,7 @@ def get_s3_credentials( daac: the name of a NASA DAAC, i.e. NSIDC or PODAAC endpoint: getting the credentials directly from the S3Credentials URL - Rreturns: + Returns: A Python dictionary with the temporary AWS S3 credentials """ @@ -199,14 +198,15 @@ def get_s3_credentials( print(f"Credentials for the cloud provider {daac} are not available") return {} else: - print("We need to auhtenticate with EDL first") + print("We need to authenticate with EDL first") return {} def get_session(self, bearer_token: bool = True) -> requests.Session: - """Returns a new request session instance + """Returns a new request session instance. Parameters: - bearer_token (Boolean): boolean, include bearer token + bearer_token: boolean, include bearer token + Returns: class Session instance with Auth and bearer token headers """ diff --git a/earthaccess/search.py b/earthaccess/search.py index 22a26923..15f0b49f 100644 --- a/earthaccess/search.py +++ b/earthaccess/search.py @@ -468,8 +468,9 @@ def granule_name(self, granule_name: str) -> Type[CollectionQuery]: def online_only(self, online_only: bool = True) -> Type[GranuleQuery]: """Only match granules that are listed online and not available for download. The opposite of this method is downloadable(). + Parameters: - online_only (Boolean): True to require granules only be online + online_only: True to require granules only be online """ super().online_only(online_only) return self diff --git a/earthaccess/store.py b/earthaccess/store.py index 27ee8814..1a615631 100644 --- a/earthaccess/store.py +++ b/earthaccess/store.py @@ -270,7 +270,7 @@ def get_fsspec_session(self) -> fsspec.AbstractFileSystem: token = self.auth.token["access_token"] client_kwargs = { "headers": {"Authorization": f"Bearer {token}"}, - # This is important! if we trust the env end send a bearer token + # This is important! If we trust the env and send a bearer token, # auth will fail! "trust_env": False, } @@ -282,7 +282,7 @@ def get_requests_session(self, bearer_token: bool = True) -> requests.Session: This HTTPS session can be used to download granules if we want to use a direct, lower level API Parameters: - bearer_token: if true will be used for authenticated queries on CMR + bearer_token: if true, will be used for authenticated queries on CMR Returns: requests Session @@ -300,6 +300,7 @@ def open( Parameters: granules: a list of granules(DataGranule) instances or list of URLs, e.g. s3://some-granule + provider: an option Returns: a list of s3fs "file pointers" to s3 files. @@ -318,7 +319,9 @@ def _open( hosted on S3 or HTTPS by third party libraries like xarray. Parameters: - granules: a list of granules(DataGranule) instances or list of URLs, e.g. s3://some-granule + granules: a list of granules(DataGranule) instances or list of URLs, + e.g. s3://some-granule + provider: an option Returns: a list of s3fs "file pointers" to s3 files. From 6ec1081413ffd27ab5a2593ffe991608ddd3a289 Mon Sep 17 00:00:00 2001 From: danielfromearth Date: Wed, 7 Feb 2024 14:36:01 -0500 Subject: [PATCH 11/16] typo --- earthaccess/auth.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/earthaccess/auth.py b/earthaccess/auth.py index 90e72c82..4d9eb921 100644 --- a/earthaccess/auth.py +++ b/earthaccess/auth.py @@ -228,13 +228,13 @@ def get_user_profile(self) -> Dict[str, Any]: else: return {} - def _interactive(self, presist_credentials: bool = False) -> bool: + def _interactive(self, persist_credentials: bool = False) -> bool: username = input("Enter your Earthdata Login username: ") password = getpass.getpass(prompt="Enter your Earthdata password: ") authenticated = self._get_credentials(username, password) if authenticated: logger.debug("Using user provided credentials for EDL") - if presist_credentials: + if persist_credentials: print("Persisting credentials to .netrc") self._persist_user_credentials(username, password) return authenticated From f35c4ea9d3aa166be42f35ede0d2cc839fc41788 Mon Sep 17 00:00:00 2001 From: danielfromearth Date: Wed, 7 Feb 2024 14:38:11 -0500 Subject: [PATCH 12/16] add indentation --- earthaccess/results.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/earthaccess/results.py b/earthaccess/results.py index 5569f670..ecfbc606 100644 --- a/earthaccess/results.py +++ b/earthaccess/results.py @@ -111,7 +111,7 @@ def concept_id(self) -> str: """ Returns: A collection's `concept_id`. - This id is the most relevant search field on granule queries. + This id is the most relevant search field on granule queries. """ return self["meta"]["concept-id"] @@ -168,7 +168,7 @@ def s3_bucket(self) -> Dict[str, Any]: """ Returns: The S3 bucket information if the collection has it. - (**cloud hosted collections only**) + (**cloud hosted collections only**) """ if "DirectDistributionInformation" in self["umm"]: return self["umm"]["DirectDistributionInformation"] From 2e57d2fbc8749e46b72d32c0e556288cacf52abf Mon Sep 17 00:00:00 2001 From: danielfromearth Date: Wed, 7 Feb 2024 14:44:55 -0500 Subject: [PATCH 13/16] further docstring cleanup in results.py --- earthaccess/results.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/earthaccess/results.py b/earthaccess/results.py index ecfbc606..4861f781 100644 --- a/earthaccess/results.py +++ b/earthaccess/results.py @@ -44,9 +44,7 @@ def _filter_fields_(self, fields: List[str]) -> Dict[str, Any]: return basic_dict def _filter_related_links(self, filter: str) -> List[str]: - """ - Filter RelatedUrls from the UMM fields on CMR - """ + """Filter RelatedUrls from the UMM fields on CMR.""" matched_links: List = [] if "RelatedUrls" in self["umm"]: for link in self["umm"]["RelatedUrls"]: @@ -101,7 +99,7 @@ def get_umm(self, umm_field: str) -> Union[str, Dict[str, Any]]: umm_field: Valid UMM item, i.e. `TemporalExtent` Returns: - Returns the value of a given field inside the UMM (Unified Metadata Model) + The value of a given field inside the UMM (Unified Metadata Model). """ if umm_field in self["umm"]: return self["umm"][umm_field] @@ -290,12 +288,12 @@ def data_links( Parameters: access: direct or external. - direct means in-region access for cloud-hosted collections. + direct means in-region access for cloud-hosted collections. in_region: True if we are running in us-west-2. - It is meant for the store class. + It is meant for the store class. Returns: - The data link for the requested access type. + The data links for the requested access type. """ https_links = self._filter_related_links("GET DATA") s3_links = self._filter_related_links("GET DATA VIA DIRECT ACCESS") From 8b6e58b1b89b02d37b67d60d044cb1a4bc966eab Mon Sep 17 00:00:00 2001 From: danielfromearth Date: Wed, 7 Feb 2024 15:33:59 -0500 Subject: [PATCH 14/16] further docstring cleanup in api.py and auth.py --- earthaccess/api.py | 35 ++++++++++++++++++----------------- earthaccess/auth.py | 17 +++++++---------- 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/earthaccess/api.py b/earthaccess/api.py index c8140770..9177b37c 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -14,7 +14,7 @@ def _normalize_location(location: Optional[str]) -> Optional[str]: - """Handle user-provided `daac` and `provider` values + """Handle user-provided `daac` and `provider` values. These values must have a capital letter as the first character followed by capital letters, numbers, or an underscore. Here we @@ -40,7 +40,7 @@ def search_datasets( kwargs (Dict): arguments to CMR: - * **keyword**: case insensitive and support wild cards ? and *, + * **keyword**: case-insensitive and support wildcards ? and *, * **short_name**: e.g. ATL08 * **doi**: DOI for a dataset * **daac**: e.g. NSIDC or PODAAC @@ -125,7 +125,7 @@ def search_data( def login(strategy: str = "all", persist: bool = False) -> Auth: - """Authenticate with Earthdata login (https://urs.earthdata.nasa.gov/) + """Authenticate with Earthdata login (https://urs.earthdata.nasa.gov/). Parameters: strategy: @@ -202,7 +202,7 @@ def open( Parameters: granules: a list of granule instances **or** list of URLs, e.g. `s3://some-granule`. If a list of URLs is passed, we need to specify the data provider. - provider: e.g. POCLOUD, NSIDC_CPRD etc. + provider: e.g. POCLOUD, NSIDC_CPRD, etc. Returns: a list of s3fs "file pointers" to s3 files. @@ -217,14 +217,14 @@ def get_s3_credentials( provider: Optional[str] = None, results: Optional[List[earthaccess.results.DataGranule]] = None, ) -> Dict[str, Any]: - """Returns temporary (1 hour) credentials for direct access to NASA S3 buckets, we can - use the daac name, the provider or a list of results from earthaccess.search_data() - if we use results earthaccess will use the metadata on the response to get the credentials, - this is useful for missions that do not use the same endpoint as their DAACs e.g. SWOT + """Returns temporary (1 hour) credentials for direct access to NASA S3 buckets. We can + use the daac name, the provider, or a list of results from earthaccess.search_data(). + If we use results, earthaccess will use the metadata on the response to get the credentials, + which is useful for missions that do not use the same endpoint as their DAACs, e.g. SWOT. Parameters: - daac: a DAAC short_name like NSIDC or PODAAC etc - provider: if we know the provider for the DAAC e.g. POCLOUD, LPCLOUD etc. + daac: a DAAC short_name like NSIDC or PODAAC, etc. + provider: if we know the provider for the DAAC, e.g. POCLOUD, LPCLOUD etc. results: List of results from search_data() Returns: @@ -239,7 +239,7 @@ def get_s3_credentials( def collection_query() -> Type[CollectionQuery]: - """Returns a query builder instance for NASA collections (datasets) + """Returns a query builder instance for NASA collections (datasets). Returns: a query builder instance for data collections. @@ -265,7 +265,7 @@ def granule_query() -> Type[GranuleQuery]: def get_fsspec_https_session() -> AbstractFileSystem: - """Returns a fsspec session that can be used to access datafiles across many different DAACs + """Returns a fsspec session that can be used to access datafiles across many different DAACs. Returns: an fsspec instance able to access data across DAACs @@ -286,8 +286,8 @@ def get_fsspec_https_session() -> AbstractFileSystem: def get_requests_https_session() -> requests.Session: - """Returns a requests Session instance with an authorized bearer token - this is useful to make requests to restricted URLs like data granules or services that + """Returns a requests Session instance with an authorized bearer token. + This is useful to make requests to restricted URLs like data granules or services that require authentication with NASA EDL. Returns: @@ -313,11 +313,12 @@ def get_s3fs_session( provider: Optional[str] = None, results: Optional[earthaccess.results.DataGranule] = None, ) -> s3fs.S3FileSystem: - """Returns a fsspec s3fs file session for direct access when we are in us-west-2 + """Returns a fsspec s3fs file session for direct access when we are in us-west-2. Parameters: daac: Any DAAC short name e.g. NSIDC, GES_DISC - provider: Each DAAC can have a cloud provider, if the DAAC is specified, there is no need to use provider + provider: Each DAAC can have a cloud provider. + If the DAAC is specified, there is no need to use provider. results: A list of results from search_data(), earthaccess will use the metadata form CMR to obtain the S3 Endpoint Returns: @@ -335,7 +336,7 @@ def get_s3fs_session( def get_edl_token() -> str: - """Returns the current token used for EDL + """Returns the current token used for EDL. Returns: EDL token diff --git a/earthaccess/auth.py b/earthaccess/auth.py index 4d9eb921..9b94e8d5 100644 --- a/earthaccess/auth.py +++ b/earthaccess/auth.py @@ -64,13 +64,12 @@ def login(self, strategy: str = "netrc", persist: bool = False) -> Any: """Authenticate with Earthdata login. Parameters: - strategy: authentication method. + strategy: + authentication method. - "interactive": enter username and password. - - "netrc": (default) retrieve username and password from ~/.netrc. - - "environment": retrieve username and password from $EARTHDATA_USERNAME and $EARTHDATA_PASSWORD. + * **"interactive"**: enter username and password. + * **"netrc"**: (default) retrieve username and password from ~/.netrc. + * **"environment"**: retrieve username and password from $EARTHDATA_USERNAME and $EARTHDATA_PASSWORD. persist: will persist credentials in a .netrc file Returns: @@ -89,7 +88,6 @@ def login(self, strategy: str = "netrc", persist: bool = False) -> Any: def refresh_tokens(self) -> bool: """Refresh CMR tokens. - Tokens are used to do authenticated queries on CMR for restricted and early access datasets. This method renews the tokens to make sure we can query the collections allowed to our EDL user. """ @@ -149,13 +147,12 @@ def get_s3_credentials( easier way is to use the DAAC short name. provider is optional if we know it. Parameters: - provider: A valid cloud provider, each DAAC has a provider code for their cloud distributions daac: the name of a NASA DAAC, i.e. NSIDC or PODAAC + provider: A valid cloud provider. Each DAAC has a provider code for their cloud distributions. endpoint: getting the credentials directly from the S3Credentials URL Returns: A Python dictionary with the temporary AWS S3 credentials - """ if self.authenticated: session = SessionWithHeaderRedirection(self.username, self.password) @@ -205,7 +202,7 @@ def get_session(self, bearer_token: bool = True) -> requests.Session: """Returns a new request session instance. Parameters: - bearer_token: boolean, include bearer token + bearer_token: whether to include bearer token Returns: class Session instance with Auth and bearer token headers From 9aea057578f6051e2f266ea50e3b7691a3a9e4ad Mon Sep 17 00:00:00 2001 From: danielfromearth Date: Wed, 7 Feb 2024 15:55:04 -0500 Subject: [PATCH 15/16] further docstring cleanup --- earthaccess/api.py | 12 +++++++----- earthaccess/auth.py | 23 ++++++++++++----------- earthaccess/daac.py | 2 +- earthaccess/kerchunk.py | 12 ++++++------ earthaccess/results.py | 4 ++-- earthaccess/search.py | 14 ++++++++------ earthaccess/store.py | 6 +++--- tests/integration/test_kerchunk.py | 20 ++++++++++---------- 8 files changed, 49 insertions(+), 44 deletions(-) diff --git a/earthaccess/api.py b/earthaccess/api.py index 9177b37c..9a6422a8 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -129,7 +129,7 @@ def login(strategy: str = "all", persist: bool = False) -> Auth: Parameters: strategy: - authentication method. + An authentication method. * **"all"**: (default) try all methods until one works * **"interactive"**: enter username and password. @@ -138,7 +138,7 @@ def login(strategy: str = "all", persist: bool = False) -> Auth: persist: will persist credentials in a .netrc file Returns: - an instance of Auth. + An instance of Auth. """ if strategy == "all": for strategy in ["environment", "netrc", "interactive"]: @@ -166,8 +166,9 @@ def download( ) -> List[str]: """Retrieves data granules from a remote storage system. - * If we run this in the cloud, we will be using S3 to move data to `local_path` - * If we run it outside AWS (us-west-2 region) and the dataset is cloud hostes we'll use HTTP links + * If we run this in the cloud, we will be using S3 to move data to `local_path`. + * If we run it outside AWS (us-west-2 region) and the dataset is cloud hosted, + we'll use HTTP links. Parameters: granules: a granule, list of granules, a granule link (HTTP), or a list of granule links (HTTP) @@ -319,7 +320,8 @@ def get_s3fs_session( daac: Any DAAC short name e.g. NSIDC, GES_DISC provider: Each DAAC can have a cloud provider. If the DAAC is specified, there is no need to use provider. - results: A list of results from search_data(), earthaccess will use the metadata form CMR to obtain the S3 Endpoint + results: A list of results from search_data(). + `earthaccess` will use the metadata from CMR to obtain the S3 Endpoint. Returns: an authenticated s3fs session valid for 1 hour diff --git a/earthaccess/auth.py b/earthaccess/auth.py index 9b94e8d5..004efa7c 100644 --- a/earthaccess/auth.py +++ b/earthaccess/auth.py @@ -65,15 +65,16 @@ def login(self, strategy: str = "netrc", persist: bool = False) -> Any: Parameters: strategy: - authentication method. + The authentication method. - * **"interactive"**: enter username and password. - * **"netrc"**: (default) retrieve username and password from ~/.netrc. - * **"environment"**: retrieve username and password from $EARTHDATA_USERNAME and $EARTHDATA_PASSWORD. - persist: will persist credentials in a .netrc file + * **"interactive"**: Enter a username and password. + * **"netrc"**: (default) Retrieve a username and password from ~/.netrc. + * **"environment"**: + Retrieve a username and password from $EARTHDATA_USERNAME and $EARTHDATA_PASSWORD. + persist: Will persist credentials in a `.netrc` file. Returns: - an instance of Auth. + An instance of Auth. """ if self.authenticated: logger.debug("We are already authenticated with NASA EDL") @@ -143,16 +144,16 @@ def get_s3_credentials( provider: Optional[str] = None, endpoint: Optional[str] = None, ) -> Dict[str, str]: - """Gets AWS S3 credentials for a given NASA cloud provider. The - easier way is to use the DAAC short name. provider is optional if we know it. + """Gets AWS S3 credentials for a given NASA cloud provider. + The easier way is to use the DAAC short name; provider is optional if we know it. Parameters: - daac: the name of a NASA DAAC, i.e. NSIDC or PODAAC + daac: The name of a NASA DAAC, e.g. NSIDC or PODAAC. provider: A valid cloud provider. Each DAAC has a provider code for their cloud distributions. - endpoint: getting the credentials directly from the S3Credentials URL + endpoint: Getting the credentials directly from the S3Credentials URL. Returns: - A Python dictionary with the temporary AWS S3 credentials + A Python dictionary with the temporary AWS S3 credentials. """ if self.authenticated: session = SessionWithHeaderRedirection(self.username, self.password) diff --git a/earthaccess/daac.py b/earthaccess/daac.py index a15972c1..4f0e99f3 100644 --- a/earthaccess/daac.py +++ b/earthaccess/daac.py @@ -128,7 +128,7 @@ def find_provider( if len(daac["cloud-providers"]) > 0: return daac["cloud-providers"][0] else: - # We found the DAAC but it does not have cloud data + # We found the DAAC, but it does not have cloud data return daac["on-prem-providers"][0] else: # return on prem provider code diff --git a/earthaccess/kerchunk.py b/earthaccess/kerchunk.py index eb3f4cae..02533a0e 100644 --- a/earthaccess/kerchunk.py +++ b/earthaccess/kerchunk.py @@ -7,14 +7,14 @@ def _get_chunk_metadata( - granuale: earthaccess.results.DataGranule, + granule: earthaccess.results.DataGranule, fs: fsspec.AbstractFileSystem | s3fs.S3FileSystem, ) -> list[dict]: from kerchunk.hdf import SingleHdf5ToZarr metadata = [] access = "direct" if isinstance(fs, s3fs.S3FileSystem) else "indirect" - for url in granuale.data_links(access=access): + for url in granule.data_links(access=access): with fs.open(url) as inf: h5chunks = SingleHdf5ToZarr(inf, url) m = h5chunks.translate() @@ -23,7 +23,7 @@ def _get_chunk_metadata( def consolidate_metadata( - granuales: list[earthaccess.results.DataGranule], + granules: list[earthaccess.results.DataGranule], kerchunk_options: dict | None = None, access: str = "direct", outfile: str | None = None, @@ -39,13 +39,13 @@ def consolidate_metadata( ) from e if access == "direct": - fs = earthaccess.get_s3fs_session(provider=granuales[0]["meta"]["provider-id"]) + fs = earthaccess.get_s3fs_session(provider=granules[0]["meta"]["provider-id"]) else: fs = earthaccess.get_fsspec_https_session() - # Get metadata for each granuale + # Get metadata for each granule get_chunk_metadata = dask.delayed(_get_chunk_metadata) - chunks = dask.compute(*[get_chunk_metadata(g, fs) for g in granuales]) + chunks = dask.compute(*[get_chunk_metadata(g, fs) for g in granules]) chunks = sum(chunks, start=[]) # Get combined metadata object diff --git a/earthaccess/results.py b/earthaccess/results.py index 4861f781..3faaf8fc 100644 --- a/earthaccess/results.py +++ b/earthaccess/results.py @@ -284,11 +284,11 @@ def _derive_s3_link(self, links: List[str]) -> List[str]: def data_links( self, access: Optional[str] = None, in_region: bool = False ) -> List[str]: - """Returns the data links form a granule + """Returns the data links from a granule. Parameters: access: direct or external. - direct means in-region access for cloud-hosted collections. + Direct means in-region access for cloud-hosted collections. in_region: True if we are running in us-west-2. It is meant for the store class. diff --git a/earthaccess/search.py b/earthaccess/search.py index 15f0b49f..e8074368 100644 --- a/earthaccess/search.py +++ b/earthaccess/search.py @@ -273,7 +273,7 @@ def temporal( self, date_from: str, date_to: str, exclude_boundary: bool = False ) -> Type[CollectionQuery]: """Filter by an open or closed date range. Dates can be provided as datetime objects - or ISO 8601 formatted strings. Multiple ranges can be provided by successive calls. + or ISO 8601 formatted strings. Multiple ranges can be provided by successive calls to this method before calling execute(). Parameters: @@ -382,12 +382,14 @@ def parameters(self, **kwargs: Any) -> Type[CollectionQuery]: return self def provider(self, provider: str = "") -> Type[CollectionQuery]: - """Only match collections from a given provider, a NASA datacenter or DAAC can have 1 or more providers - i.e. PODAAC is a data center or DAAC, PODAAC is the default provider for on prem data, POCLOUD is + """Only match collections from a given provider. + A NASA datacenter or DAAC can have one or more providers. + For example, PODAAC is a data center or DAAC, + PODAAC is the default provider for on-prem data, and POCLOUD is the PODAAC provider for their data in the cloud. Parameters: - provider: a provider code for any DAAC. e.g. POCLOUD, NSIDC_CPRD, etc. + provider: a provider code for any DAAC, e.g. POCLOUD, NSIDC_CPRD, etc. """ self.params["provider"] = provider return self @@ -452,8 +454,8 @@ def granule_name(self, granule_name: str) -> Type[CollectionQuery]: queries using the readable_granule_name metadata field. ???+ Tip - We can use wirldcards on a granule name to further refine our search - i.e. MODGRNLD.*.daily.* + We can use wildcards on a granule name to further refine our search, + e.g. MODGRNLD.*.daily.* Parameters: granule_name: granule name (accepts wildcards) diff --git a/earthaccess/store.py b/earthaccess/store.py index 1a615631..e20ef10e 100644 --- a/earthaccess/store.py +++ b/earthaccess/store.py @@ -106,7 +106,7 @@ def __init__(self, auth: Any, pre_authorize: bool = False) -> None: self._requests_cookies: Dict[str, Any] = {} self.set_requests_session(oauth_profile) if pre_authorize: - # collect cookies from other daacs + # collect cookies from other DAACs for url in DAAC_TEST_URLS: self.set_requests_session(url) @@ -348,7 +348,7 @@ def _open_granules( if granules[0].cloud_hosted: access = "direct" provider = granules[0]["meta"]["provider-id"] - # if the data has its own S3 credentials endpoint we'll use it + # if the data has its own S3 credentials endpoint, we will use it endpoint = self._own_s3_credentials(granules[0]["umm"]["RelatedUrls"]) if endpoint is not None: print(f"using endpoint: {endpoint}") @@ -473,7 +473,7 @@ def get( files = self._get(granules, local_path, provider, threads) return files else: - raise ValueError("List of URLs or DataGranule isntances expected") + raise ValueError("List of URLs or DataGranule instances expected") @singledispatchmethod def _get( diff --git a/tests/integration/test_kerchunk.py b/tests/integration/test_kerchunk.py index 39c95e99..58f93077 100644 --- a/tests/integration/test_kerchunk.py +++ b/tests/integration/test_kerchunk.py @@ -20,21 +20,21 @@ @pytest.fixture(scope="module") -def granuales(): - granuales = earthaccess.search_data( +def granules(): + granules = earthaccess.search_data( count=2, short_name="SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205", cloud_hosted=True, ) - return granuales + return granules @pytest.mark.parametrize("protocol", ["", "file://"]) -def test_consolidate_metadata_outfile(tmp_path, granuales, protocol): +def test_consolidate_metadata_outfile(tmp_path, granules, protocol): outfile = f"{protocol}{tmp_path / 'metadata.json'}" assert not os.path.exists(outfile) result = earthaccess.consolidate_metadata( - granuales, + granules, outfile=outfile, access="indirect", kerchunk_options={"concat_dims": "Time"}, @@ -43,9 +43,9 @@ def test_consolidate_metadata_outfile(tmp_path, granuales, protocol): assert result == outfile -def test_consolidate_metadata_memory(tmp_path, granuales): +def test_consolidate_metadata_memory(tmp_path, granules): result = earthaccess.consolidate_metadata( - granuales, + granules, access="indirect", kerchunk_options={"concat_dims": "Time"}, ) @@ -54,10 +54,10 @@ def test_consolidate_metadata_memory(tmp_path, granuales): @pytest.mark.parametrize("output", ["file", "memory"]) -def test_consolidate_metadata(tmp_path, granuales, output): +def test_consolidate_metadata(tmp_path, granules, output): xr = pytest.importorskip("xarray") # Open directly with `earthaccess.open` - expected = xr.open_mfdataset(earthaccess.open(granuales)) + expected = xr.open_mfdataset(earthaccess.open(granules)) # Open with kerchunk consolidated metadata file if output == "file": @@ -65,7 +65,7 @@ def test_consolidate_metadata(tmp_path, granuales, output): else: kwargs = {} metadata = earthaccess.consolidate_metadata( - granuales, access="indirect", kerchunk_options={"concat_dims": "Time"}, **kwargs + granules, access="indirect", kerchunk_options={"concat_dims": "Time"}, **kwargs ) fs = earthaccess.get_fsspec_https_session() From ebb15a16cb66d4503465ef6e2f3e83cadcf88a80 Mon Sep 17 00:00:00 2001 From: danielfromearth Date: Wed, 7 Feb 2024 16:23:33 -0500 Subject: [PATCH 16/16] further docstring cleanup, grammar --- earthaccess/api.py | 17 ++++++++-------- earthaccess/results.py | 8 ++++---- earthaccess/search.py | 13 ++++++------ earthaccess/store.py | 46 ++++++++++++++++++++++-------------------- 4 files changed, 43 insertions(+), 41 deletions(-) diff --git a/earthaccess/api.py b/earthaccess/api.py index 9a6422a8..a7d35fb0 100644 --- a/earthaccess/api.py +++ b/earthaccess/api.py @@ -40,7 +40,7 @@ def search_datasets( kwargs (Dict): arguments to CMR: - * **keyword**: case-insensitive and support wildcards ? and *, + * **keyword**: case-insensitive and supports wildcards ? and * * **short_name**: e.g. ATL08 * **doi**: DOI for a dataset * **daac**: e.g. NSIDC or PODAAC @@ -51,7 +51,7 @@ def search_datasets( `(lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat)` Returns: - a list of DataCollection results that can be used to get information about a + A list of DataCollection results that can be used to get information about a dataset, e.g. concept_id, doi, etc. Examples: @@ -90,7 +90,7 @@ def search_data( kwargs (Dict): arguments to CMR: - * **short_name**: dataset short name e.g. ATL08 + * **short_name**: dataset short name, e.g. ATL08 * **version**: dataset version * **doi**: DOI for a dataset * **daac**: e.g. NSIDC or PODAAC @@ -173,7 +173,7 @@ def download( Parameters: granules: a granule, list of granules, a granule link (HTTP), or a list of granule links (HTTP) local_path: local directory to store the remote data granules - provider: if we download a list of URLs we need to specify the provider. + provider: if we download a list of URLs, we need to specify the provider. threads: parallel number of threads to use to download the files, adjust as necessary, default = 8 Returns: @@ -269,7 +269,7 @@ def get_fsspec_https_session() -> AbstractFileSystem: """Returns a fsspec session that can be used to access datafiles across many different DAACs. Returns: - an fsspec instance able to access data across DAACs + An fsspec instance able to access data across DAACs. Examples: ```python @@ -280,7 +280,6 @@ def get_fsspec_https_session() -> AbstractFileSystem: with fs.open(DAAC_GRANULE) as f: f.read(10) ``` - """ session = earthaccess.__store__.get_fsspec_session() return session @@ -288,11 +287,11 @@ def get_fsspec_https_session() -> AbstractFileSystem: def get_requests_https_session() -> requests.Session: """Returns a requests Session instance with an authorized bearer token. - This is useful to make requests to restricted URLs like data granules or services that + This is useful for making requests to restricted URLs, such as data granules or services that require authentication with NASA EDL. Returns: - an authenticated requests Session instance. + An authenticated requests Session instance. Examples: ```python @@ -324,7 +323,7 @@ def get_s3fs_session( `earthaccess` will use the metadata from CMR to obtain the S3 Endpoint. Returns: - an authenticated s3fs session valid for 1 hour + An authenticated s3fs session valid for 1 hour. """ daac = _normalize_location(daac) provider = _normalize_location(provider) diff --git a/earthaccess/results.py b/earthaccess/results.py index 3faaf8fc..6d91ca7e 100644 --- a/earthaccess/results.py +++ b/earthaccess/results.py @@ -310,12 +310,12 @@ def data_links( # we have the s3 links so we return those return s3_links else: - # Even though we are in us-west-2, the user wants the HTTPS links - # used in region they are S3 signed links from TEA - # https://github.com/asfadmin/thin-egress-app + # Even though we are in us-west-2, the user wants the HTTPS links used in-region. + # They are S3 signed links from TEA. + # return https_links else: - # we are not in region + # we are not in-region if access == "direct": # maybe the user wants to collect S3 links and use them later # from the cloud diff --git a/earthaccess/search.py b/earthaccess/search.py index e8074368..aa410985 100644 --- a/earthaccess/search.py +++ b/earthaccess/search.py @@ -428,15 +428,16 @@ def orbit_number(self, orbit1: int, orbit2: int) -> Type[GranuleQuery]: return self def cloud_hosted(self, cloud_hosted: bool = True) -> Type[CollectionQuery]: - """Only match granules that are hosted in the cloud. This is valid for public - collections and if we are using the short_name parameter. Concept-Id is unambiguous. + """Only match granules that are hosted in the cloud. + This is valid for public collections and when using the short_name parameter. + Concept-Id is unambiguous. ???+ Tip - Cloud hosted collections can be public or restricted. - Restricted collections will not be matched using this parameter + Cloud-hosted collections can be public or restricted. + Restricted collections will not be matched using this parameter. Parameters: - cloud_hosted (Boolean): True to require granules only be online + cloud_hosted: True to require granules only be online """ if not isinstance(cloud_hosted, bool): raise TypeError("cloud_hosted must be of type bool") @@ -455,7 +456,7 @@ def granule_name(self, granule_name: str) -> Type[CollectionQuery]: ???+ Tip We can use wildcards on a granule name to further refine our search, - e.g. MODGRNLD.*.daily.* + e.g. `MODGRNLD.*.daily.*`. Parameters: granule_name: granule name (accepts wildcards) diff --git a/earthaccess/store.py b/earthaccess/store.py index e20ef10e..4981fe50 100644 --- a/earthaccess/store.py +++ b/earthaccess/store.py @@ -155,7 +155,7 @@ def _running_in_us_west_2(self) -> bool: return False if resp.status_code == 200 and b"us-west-2" == resp.content: - # On AWS in region us-west-2 + # On AWS, in region us-west-2 return True return False @@ -163,13 +163,14 @@ def set_requests_session( self, url: str, method: str = "get", bearer_token: bool = False ) -> None: """Sets up a `requests` session with bearer tokens that are used by CMR. - Mainly used to get the authentication cookies from different DAACs and URS - This HTTPS session can be used to download granules if we want to use a direct, lower level API + Mainly used to get the authentication cookies from different DAACs and URS. + This HTTPS session can be used to download granules if we want to use a direct, + lower level API. Parameters: url: used to test the credentials and populate the class auth cookies - method: HTTP method to test. default: "GET" - bearer_token: if true will be used for authenticated queries on CMR + method: HTTP method to test, default: "GET" + bearer_token: if true, will be used for authenticated queries on CMR Returns: fsspec HTTPFileSystem (aiohttp client session) @@ -203,7 +204,7 @@ def get_s3fs_session( """Returns a s3fs instance for a given cloud provider / DAAC. Parameters: - daac: any of the DAACs e.g. NSIDC, PODAAC + daac: any of the DAACs, e.g. NSIDC, PODAAC provider: a data provider if we know them, e.g. PODAAC -> POCLOUD endpoint: pass the URL for the credentials directly @@ -262,7 +263,8 @@ def get_s3fs_session( @lru_cache def get_fsspec_session(self) -> fsspec.AbstractFileSystem: """Returns a fsspec HTTPS session with bearer tokens that are used by CMR. - This HTTPS session can be used to download granules if we want to use a direct, lower level API + This HTTPS session can be used to download granules if we want to use a direct, + lower level API. Returns: fsspec HTTPFileSystem (aiohttp client session) @@ -279,7 +281,8 @@ def get_fsspec_session(self) -> fsspec.AbstractFileSystem: def get_requests_session(self, bearer_token: bool = True) -> requests.Session: """Returns a requests HTTPS session with bearer tokens that are used by CMR. - This HTTPS session can be used to download granules if we want to use a direct, lower level API + This HTTPS session can be used to download granules if we want to use a direct, + lower level API. Parameters: bearer_token: if true, will be used for authenticated queries on CMR @@ -303,7 +306,7 @@ def open( provider: an option Returns: - a list of s3fs "file pointers" to s3 files. + A list of s3fs "file pointers" to s3 files. """ if len(granules): return self._open(granules, provider) @@ -324,7 +327,7 @@ def _open( provider: an option Returns: - a list of s3fs "file pointers" to s3 files. + A list of s3fs "file pointers" to s3 files. """ raise NotImplementedError("granules should be a list of DataGranule or URLs") @@ -454,11 +457,10 @@ def get( the data will be effectively downloaded to a local directory. Parameters: - granules: a list of granules(DataGranule) instances or a list of granule links (HTTP) - local_path: local directory to store the remote data granules - access: direct or on_prem, if set it will use it for the access method. - threads: parallel number of threads to use to download the files; - adjust as necessary, default = 8 + granules: A list of granules(DataGranule) instances or a list of granule links (HTTP). + local_path: Local directory to store the remote data granules. + threads: Parallel number of threads to use to download the files; + adjust as necessary, default = 8. Returns: List of downloaded files @@ -493,10 +495,10 @@ def _get( the data will be effectively downloaded to a local directory. Parameters: - granules: a list of granules(DataGranule) instances or a list of granule links (HTTP) - local_path: local directory to store the remote data granules - access: direct or on_prem, if set it will use it for the access method. - threads: parallel number of threads to use to download the files, adjust as necessary, default = 8 + granules: A list of granules (DataGranule) instances or a list of granule links (HTTP). + local_path: Local directory to store the remote data granules + threads: Parallel number of threads to use to download the files; + adjust as necessary, default = 8. Returns: None @@ -548,7 +550,7 @@ def _get_granules( cloud_hosted = granules[0].cloud_hosted access = "direct" if (cloud_hosted and self.in_region) else "external" data_links = list( - # we are not in region + # we are not in-region chain.from_iterable( granule.data_links(access=access, in_region=self.in_region) for granule in granules @@ -587,7 +589,7 @@ def _download_file(self, url: str, directory: str) -> str: directory: local directory Returns: - a local filepath or an exception + A local filepath or an exception. """ # If the get data link is an Opendap location if "opendap" in url and url.endswith(".html"): @@ -628,7 +630,7 @@ def _download_onprem_granules( adjust as necessary, default = 8 Returns: - a list of local filepaths to which the files were downloaded + A list of local filepaths to which the files were downloaded. """ if urls is None: raise ValueError("The granules didn't provide a valid GET DATA link")