From 0095fe606796185e48479c48d3ea36a94bb0d685 Mon Sep 17 00:00:00 2001 From: Guillaume Maze Date: Tue, 17 Sep 2024 11:03:34 +0200 Subject: [PATCH] Allows for low level http fetching errors to be ignored - usefull for a Dask client as a parallel option --- argopy/data_fetchers/erddap_data.py | 10 ++--- argopy/stores/filesystems.py | 63 ++++++++++++++++++++++------- 2 files changed, 53 insertions(+), 20 deletions(-) diff --git a/argopy/data_fetchers/erddap_data.py b/argopy/data_fetchers/erddap_data.py index c0b27980..111d29c3 100644 --- a/argopy/data_fetchers/erddap_data.py +++ b/argopy/data_fetchers/erddap_data.py @@ -432,11 +432,11 @@ def _bgc_vlist_avail(self): for p in params: if p.lower() in self._bgc_vlist_erddap: results.append(p) - else: - log.error( - "Removed '%s' because it's not available on the erddap, but it must !" - % p - ) + # else: + # log.error( + # "Removed '%s' because it is not available on the erddap server (%s), but it should !" + # % (p, self._server) + # ) return results diff --git a/argopy/stores/filesystems.py b/argopy/stores/filesystems.py index 482fbcbd..9cf27acc 100644 --- a/argopy/stores/filesystems.py +++ b/argopy/stores/filesystems.py @@ -673,6 +673,7 @@ def download_url( n_attempt: int = 1, max_attempt: int = 5, cat_opts: dict = {}, + errors: str = 'raise', *args, **kwargs, ): @@ -683,19 +684,27 @@ def download_url( """ def make_request( - ffs, url, n_attempt: int = 1, max_attempt: int = 5, cat_opts: dict = {} + ffs, url, n_attempt: int = 1, max_attempt: int = 5, cat_opts: dict = {}, errors: str = 'raise', ): data = None if n_attempt <= max_attempt: try: data = ffs.cat_file(url, **cat_opts) + except FileNotFoundError as e: + if errors == 'raise': + raise e + elif errors == 'ignore': + log.error('FileNotFoundError raised from: %s' % url) except aiohttp.ClientResponseError as e: if e.status == 413: - log.debug( - "Error %i (Payload Too Large) raised with %s" - % (e.status, url) - ) - raise + if errors == 'raise': + raise e + elif errors == 'ignore': + log.error( + "Error %i (Payload Too Large) raised with %s" + % (e.status, url) + ) + elif e.status == 429: retry_after = int(e.headers.get("Retry-After", 5)) log.debug( @@ -707,14 +716,26 @@ def make_request( else: # Handle other client response errors print(f"Error: {e}") - except aiohttp.ClientError: - # Handle other request exceptions - # print(f"Error: {e}") - raise + + except aiohttp.ClientError as e: + if errors == 'raise': + raise e + elif errors == 'ignore': + log.error("Error: {e}") + + except fsspec.FSTimeoutError as e: + if errors == 'raise': + raise e + elif errors == 'ignore': + log.error("Error: {e}") else: - raise ValueError( - f"Error: All attempts failed to download this url: {url}" - ) + if errors == 'raise': + raise ValueError( + f"Error: All attempts failed to download this url: {url}" + ) + elif errors == 'ignore': + log.error("Error: All attempts failed to download this url: {url}") + return data, n_attempt url = self.curateurl(url) @@ -724,14 +745,18 @@ def make_request( n_attempt=n_attempt, max_attempt=max_attempt, cat_opts=cat_opts, + errors=errors, ) if data is None: - raise FileNotFoundError(url) + if errors == 'raise': + raise FileNotFoundError(url) + elif errors == 'ignore': + log.error("FileNotFoundError: %s" % url) return data - def open_dataset(self, url, **kwargs): + def open_dataset(self, url, errors: str = 'raise', **kwargs): """Open and decode a xarray dataset from an url Parameters @@ -749,6 +774,14 @@ def open_dataset(self, url, **kwargs): if "download_url_opts" in kwargs: dwn_opts.update(kwargs["download_url_opts"]) data = self.download_url(url, **dwn_opts) + log.info(dwn_opts) + + if data is None: + if errors == 'raise': + raise DataNotFound(url) + elif errors == 'ignore': + log.error("DataNotFound: %s" % url) + return None if data[0:3] != b"CDF": raise TypeError(