From ad84bf81996c0ed9e581754124fd6a7569772ea4 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 19 Dec 2023 15:27:13 -0500 Subject: [PATCH 01/15] add pooch to dependencies, add helpers file, first pass at a pytest-xdist-compatible implementation of pooch-based testdata fetching --- environment-dev.yml | 1 + environment.yml | 1 + pyproject.toml | 7 +- xhydro/testing/__init__.py | 5 +- xhydro/testing/helpers.py | 166 ++++++++++++++++++++++++++++++++++++ xhydro/testing/registry.txt | 0 6 files changed, 177 insertions(+), 3 deletions(-) create mode 100644 xhydro/testing/helpers.py create mode 100644 xhydro/testing/registry.txt diff --git a/environment-dev.yml b/environment-dev.yml index 60487057..82299013 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -6,6 +6,7 @@ dependencies: # Don't forget to sync changes between environment.yml, environment-dev.yml, and pyproject.toml! # Main packages - numpy + - pooch >=1.8.0 - statsmodels - xarray - xclim >=0.45.0 diff --git a/environment.yml b/environment.yml index ffbcea69..73d977fb 100644 --- a/environment.yml +++ b/environment.yml @@ -6,6 +6,7 @@ dependencies: # Don't forget to sync changes between environment.yml, environment-dev.yml, and pyproject.toml! # Main packages - numpy + - pooch >=1.8.0 - statsmodels - xarray - xclim >=0.45.0 diff --git a/pyproject.toml b/pyproject.toml index b474e9fd..58d3547d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ dynamic = ["description", "version"] dependencies = [ # Don't forget to sync changes between environment.yml, environment-dev.yml, and pyproject.toml! "numpy", + "pooch>=1.8.0", "statsmodels", "xarray", "xclim>=0.45.0", @@ -146,7 +147,8 @@ include = [ "docs/make.bat", "tests/*.py", "tox.ini", - "xhydro" + "xhydro", + "xhydro/testing/registry.txt" ] exclude = [ "*.py[co]", @@ -161,7 +163,8 @@ exclude = [ "Makefile", "docs/_*", "docs/apidoc/modules.rst", - "docs/apidoc/xhydro*.rst" + "docs/apidoc/xhydro*.rst", + "xhydro/testing/data/*" ] [tool.isort] diff --git a/xhydro/testing/__init__.py b/xhydro/testing/__init__.py index 92c8062e..8bb3e9cf 100644 --- a/xhydro/testing/__init__.py +++ b/xhydro/testing/__init__.py @@ -1 +1,4 @@ -"""Helpers for testing.""" +"""Testing utilities and helper functions.""" + +from .helpers import * +from .utils import * diff --git a/xhydro/testing/helpers.py b/xhydro/testing/helpers.py new file mode 100644 index 00000000..51983367 --- /dev/null +++ b/xhydro/testing/helpers.py @@ -0,0 +1,166 @@ +"""Helper functions for testing data management.""" + +import importlib.resources as ilr +import logging +import os +from pathlib import Path +from typing import Optional +from urllib.parse import urljoin + +import pooch + +from xhydro import __version__ as __xhydro_version__ + +__all__ = [ + "DATA_DIR", + "DATA_URL", + "DEVEREAUX", + "generate_registry", + "populate_testing_data", +] + +_default_cache_dir = pooch.os_cache("xhydro-testdata") + +DATA_DIR = os.getenv("XHYDRO_DATA_DIR", _default_cache_dir) +"""Sets the directory to store the testing datasets. + +If not set, the default location will be used (based on ``platformdirs``, see :func:`pooch.os_cache`). + +Notes +----- +When running tests locally, this can be set for both `pytest` and `tox` by exporting the variable: + +.. code-block:: console + + $ export XHYDRO_DATA_DIR="/path/to/my/data" + +or setting the variable at runtime: + +.. code-block:: console + + $ env XHYDRO_DATA_DIR="/path/to/my/data" pytest +""" + +TESTDATA_BRANCH = os.getenv("XHYDRO_TESTDATA_BRANCH", "main") +"""Sets the branch of hydrologie/xhydro-testdata to use when fetching testing datasets. + +Notes +----- +When running tests locally, this can be set for both `pytest` and `tox` by exporting the variable: + +.. code-block:: console + + $ export XHYDRO_TESTDATA_BRANCH="my_testing_branch" + +or setting the variable at runtime: + +.. code-block:: console + + $ env XHYDRO_TESTDATA_BRANCH="my_testing_branch" pytest +""" + +DATA_URL = f"https://github.com/hydrologie/xhydro-testdata/raw/{TESTDATA_BRANCH}" + + +def generate_registry( + filenames: Optional[list[str]] = None, base_url: str = DATA_URL +) -> None: + """Generate a registry file for the test data. + + Parameters + ---------- + filenames : list of str, optional + List of filenames to generate the registry file for. + If not provided, all files under xhydro/testing/data will be used. + base_url : str, optional + Base URL to the test data repository. + """ + # Gather the data folder and registry file locations from installed package_data + data_folder = ilr.files("xhydro").joinpath("testing/data") + registry_file = ilr.files("xhydro").joinpath("testing/registry.txt") + + # Download the files to the installed xhydro/testing/data folder + if filenames is None: + with ilr.as_file(data_folder) as data: + for file in data.rglob("*"): + filename = file.relative_to(data).as_posix() + pooch.retrieve( + url=urljoin(base_url, filename), + known_hash=None, + fname=filename, + path=data_folder, + ) + + # Generate the registry file + with ilr.as_file(data_folder) as data, ilr.as_file(registry_file) as registry: + pooch.make_registry(data.as_posix(), registry.as_posix()) + + +def load_registry() -> dict[str, str]: + # Get registry file from package_data + registry_file = ilr.files("xhydro").joinpath("testing/registry.txt") + if registry_file.is_file(): + logging.info("Registry file found in package_data: %s", registry_file) + + registry = dict() + with registry_file.open() as buffer: + for entry in buffer.readlines(): + registry[entry.split()[0]] = entry.split()[1] + + return registry + + +DEVEREAUX = pooch.create( + path=pooch.os_cache("xhydro-testdata"), + base_url=DATA_URL, + version=__xhydro_version__, + version_dev="main", + env="XHYDRO_DATA_DIR", + allow_updates="XHYDRO_DATA_UPDATES", + registry=load_registry(), +) +"""Pooch registry instance for xhydro test data. + +Notes +----- +There are two environment variables that can be used to control the behavior of this registry: +- ``XHYDRO_DATA_DIR``: If this environment variable is set, it will be used as the base directory to store the data + files. The directory should be an absolute path (i.e., it should start with ``/``). Otherwise, + the default location will be used (based on ``platformdirs``, see :func:`pooch.os_cache`). +- ``XHYDRO_DATA_UPDATES``: If this environment variable is set, then the data files will be downloaded even if the + upstream hashes do not match. This is useful if you want to always use the latest version of the data files. + +Examples +-------- +Using the registry to download a file: + +.. code-block:: python + + from xhydro.testing.utils import DEVEREAUX + import xarray as xr + + example_file = DEVEREAUX.fetch("example.nc") + data = xr.open_dataset(example_file) +""" + + +def populate_testing_data( + temp_folder: Optional[Path] = None, + branch: str = TESTDATA_BRANCH, + _local_cache: Path = _default_cache_dir, +): + """Populate the local cache with the testing data.""" + # Get registry file from package_data + registry = load_registry() + + # Set the local cache to the temp folder + if temp_folder is not None: + _local_cache = temp_folder + # Set the branch + DEVEREAUX.version_dev = branch + # Set the local cache + DEVEREAUX.path = _local_cache + + # Download the files + for filename in registry.keys(): + DEVEREAUX.fetch(filename) diff --git a/xhydro/testing/registry.txt b/xhydro/testing/registry.txt new file mode 100644 index 00000000..e69de29b From 2f8f25cc14422f4f38384f99d05d80ea6fdc597b Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 19 Dec 2023 15:27:40 -0500 Subject: [PATCH 02/15] add __all__, pass ESMFMKFILE to tox --- tox.ini | 1 + xhydro/testing/utils.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/tox.ini b/tox.ini index f53c4dcd..638d60e1 100644 --- a/tox.ini +++ b/tox.ini @@ -40,6 +40,7 @@ setenv = PYTHONPATH = {toxinidir} passenv = CI + ESMFMKFILE COVERALLS_* GITHUB_* extras = diff --git a/xhydro/testing/utils.py b/xhydro/testing/utils.py index 419eaa2a..dde7fd41 100644 --- a/xhydro/testing/utils.py +++ b/xhydro/testing/utils.py @@ -6,6 +6,10 @@ from pathlib import Path from typing import Optional, TextIO, Union +__all__ = [ + "publish_release_notes", +] + def publish_release_notes( style: str = "md", From db805da3b123ecd9b8e804420f52b4cb9fd8988e Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 19 Dec 2023 15:30:08 -0500 Subject: [PATCH 03/15] expose load_registry --- xhydro/testing/helpers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xhydro/testing/helpers.py b/xhydro/testing/helpers.py index 51983367..03df96de 100644 --- a/xhydro/testing/helpers.py +++ b/xhydro/testing/helpers.py @@ -16,6 +16,7 @@ "DATA_URL", "DEVEREAUX", "generate_registry", + "load_registry", "populate_testing_data", ] @@ -97,11 +98,13 @@ def generate_registry( def load_registry() -> dict[str, str]: + """Load the registry file for the test data.""" # Get registry file from package_data registry_file = ilr.files("xhydro").joinpath("testing/registry.txt") if registry_file.is_file(): logging.info("Registry file found in package_data: %s", registry_file) + # Load the registry file registry = dict() with registry_file.open() as buffer: for entry in buffer.readlines(): From d2f20e78897e7e43a8a5fc52eb41e9f6aced9daf Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 19 Dec 2023 15:33:16 -0500 Subject: [PATCH 04/15] update CHANGES.rst --- CHANGES.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.rst b/CHANGES.rst index 408d42f4..87acfd02 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -10,6 +10,7 @@ Contributors to this version: Trevor James Smith (:user:`Zeitsperre`). New features and enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * Added French language support to the documentation. (:issue:`53`, :pull:`55`). +* Added a new set of functions to support creating and updating `pooch` registries, caching testing datasets from `hydrologie/xhydro-testdata`, and ensuring that testing datasets can be loaded into temporary directories. v0.3.0 (2023-12-01) ------------------- From 3ac0e3b885cd61793c1329daae179480f5797982 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 19 Dec 2023 15:47:22 -0500 Subject: [PATCH 05/15] allow passing custom registry --- xhydro/testing/helpers.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/xhydro/testing/helpers.py b/xhydro/testing/helpers.py index 03df96de..e34de59d 100644 --- a/xhydro/testing/helpers.py +++ b/xhydro/testing/helpers.py @@ -4,7 +4,7 @@ import logging import os from pathlib import Path -from typing import Optional +from typing import Optional, Union from urllib.parse import urljoin import pooch @@ -97,12 +97,17 @@ def generate_registry( pooch.make_registry(data.as_posix(), registry.as_posix()) -def load_registry() -> dict[str, str]: +def load_registry(file: Optional[Union[str, Path]] = None) -> dict[str, str]: """Load the registry file for the test data.""" # Get registry file from package_data - registry_file = ilr.files("xhydro").joinpath("testing/registry.txt") - if registry_file.is_file(): - logging.info("Registry file found in package_data: %s", registry_file) + if file is None: + registry_file = ilr.files("xhydro").joinpath("testing/registry.txt") + if registry_file.is_file(): + logging.info("Registry file found in package_data: %s", registry_file) + else: + registry_file = Path(file) + if not registry_file.is_file(): + raise FileNotFoundError(f"Registry file not found: {registry_file}") # Load the registry file registry = dict() From 30efafc48ea54736c71455a2376eaec3e5398608 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 19 Dec 2023 15:51:21 -0500 Subject: [PATCH 06/15] allow passing custom registry to populate_testing_data --- xhydro/testing/helpers.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xhydro/testing/helpers.py b/xhydro/testing/helpers.py index e34de59d..7ae68913 100644 --- a/xhydro/testing/helpers.py +++ b/xhydro/testing/helpers.py @@ -153,13 +153,14 @@ def load_registry(file: Optional[Union[str, Path]] = None) -> dict[str, str]: def populate_testing_data( + registry: Optional[Union[str, Path]] = None, temp_folder: Optional[Path] = None, branch: str = TESTDATA_BRANCH, _local_cache: Path = _default_cache_dir, ): """Populate the local cache with the testing data.""" - # Get registry file from package_data - registry = load_registry() + # Get registry file from package_data or provided path + registry = load_registry(registry) # Set the local cache to the temp folder if temp_folder is not None: From 4107b5bb4f6995f66e4108b8bd276607ff06ac13 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Dec 2023 21:01:31 +0000 Subject: [PATCH 07/15] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f4426575..1252f70a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -122,7 +122,6 @@ filename = ".cruft.json" search = "\"version\": \"{current_version}\"" replace = "\"version\": \"{new_version}\"" - [tool.coverage.run] relative_files = true include = ["xhydro/*"] From cce41e4369d4e4c95b2d7c27669353c127e25139 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 19 Dec 2023 16:13:44 -0500 Subject: [PATCH 08/15] update pre-commit configuration --- .pre-commit-config.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8a024c74..c2312456 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,20 +29,20 @@ repos: hooks: - id: rst-inline-touching-normal - repo: https://github.com/psf/black-pre-commit-mirror - rev: 23.11.0 + rev: 23.12.0 hooks: - id: black - repo: https://github.com/PyCQA/isort - rev: 5.12.0 + rev: 5.13.2 hooks: - id: isort exclude: ^docs/ - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.4 + rev: v0.1.8 hooks: - id: ruff - repo: https://github.com/pycqa/flake8 - rev: 6.0.0 + rev: 6.1.0 hooks: - id: flake8 additional_dependencies: [ 'flake8-alphabetize', 'flake8-rst-docstrings' ] @@ -51,14 +51,14 @@ repos: rev: v0.3.9 hooks: - id: blackdoc - additional_dependencies: [ 'black==23.11.0' ] + additional_dependencies: [ 'black==23.12.0' ] - repo: https://github.com/adrienverge/yamllint.git rev: v1.33.0 hooks: - id: yamllint args: ['--config-file', '.yamllint.yaml'] - repo: https://github.com/python-jsonschema/check-jsonschema - rev: 0.27.1 + rev: 0.27.3 hooks: - id: check-github-workflows - id: check-readthedocs From 16bd484a8d450c6101f90c0a36f1e298d9e5aa45 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Fri, 22 Dec 2023 13:57:18 -0500 Subject: [PATCH 09/15] add numpydoc checking hook --- .pre-commit-config.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c2312456..8a86b810 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -38,7 +38,7 @@ repos: - id: isort exclude: ^docs/ - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.8 + rev: v0.1.9 hooks: - id: ruff - repo: https://github.com/pycqa/flake8 @@ -47,6 +47,11 @@ repos: - id: flake8 additional_dependencies: [ 'flake8-alphabetize', 'flake8-rst-docstrings' ] args: [ '--config=.flake8' ] + - repo: https://github.com/numpy/numpydoc + rev: v1.6.0 + hooks: + - id: numpydoc-validation + exclude: 'tests|docs/conf.py' - repo: https://github.com/keewis/blackdoc rev: v0.3.9 hooks: From f859bf2a6364a5a5a93eef04242bf48618517579 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Fri, 22 Dec 2023 13:57:34 -0500 Subject: [PATCH 10/15] adjust docstrings and numpydoc configuration --- pyproject.toml | 16 ++++++++++++++++ xhydro/cc.py | 17 +++++++++++++++-- xhydro/indicators.py | 31 ++++++++++++++++--------------- xhydro/testing/helpers.py | 34 +++++++++++++++++++++++++++++++--- xhydro/testing/utils.py | 3 ++- 5 files changed, 80 insertions(+), 21 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1252f70a..f2da0e27 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -181,6 +181,22 @@ warn_unused_configs = true module = [] ignore_missing_imports = true +[tool.numpydoc_validation] +checks = [ + "all", # report on all checks, except the below + "ES01", + "EX01", + "GL01", + "SA01" +] +exclude = [ + # don't report on objects that match any of these regex + '\.undocumented_method$', + '\.__repr__$', + # any object starting with an underscore is a private object + '\._\w+' +] + [tool.pytest.ini_options] addopts = [ "--verbose", diff --git a/xhydro/cc.py b/xhydro/cc.py index faf92334..0723b93a 100644 --- a/xhydro/cc.py +++ b/xhydro/cc.py @@ -1,4 +1,5 @@ """Module to compute climate change statistics using xscen functions.""" +import xarray # Special imports from xscen from xscen import ( # FIXME: To be replaced with climatological_op once available @@ -17,8 +18,20 @@ # FIXME: To be deleted once climatological_op is available in xscen -def climatological_op(ds, **kwargs): - """Compute climatological operation. +def climatological_op(ds: xarray.Dataset, **kwargs: dict) -> xarray.Dataset: + r"""Compute climatological operation. + + Parameters + ---------- + ds : xarray.Dataset + Input dataset. + \*\*kwargs : dict + Keyword arguments passed to :py:func:`xscen.aggregate.climatological_mean`. + + Returns + ------- + xarray.Dataset + Output dataset. Notes ----- diff --git a/xhydro/indicators.py b/xhydro/indicators.py index 9d0a3b8f..455a657b 100644 --- a/xhydro/indicators.py +++ b/xhydro/indicators.py @@ -64,36 +64,37 @@ def get_yearly_op( missing_options: Optional[dict] = None, interpolate_na: bool = False, ) -> xr.Dataset: - """ - Compute yearly operations on a variable. + """Compute yearly operations on a variable. Parameters ---------- - ds: xr.Dataset + ds : xr.Dataset Dataset containing the variable to compute the operation on. - op: str + op : str Operation to compute. One of ["max", "min", "mean", "sum"]. - input_var: str + input_var : str Name of the input variable. Defaults to "streamflow". - window: int + window : int Size of the rolling window. A "mean" operation is performed on the rolling window before the call to xclim. This parameter cannot be used with the "sum" operation. - timeargs: dict, optional + timeargs : dict, optional Dictionary of time arguments for the operation. Keys are the name of the period that will be added to the results (e.g. "winter", "summer", "annual"). Values are up to two dictionaries, with both being optional. The first is {'freq': str}, where str is a frequency supported by xarray (e.g. "YS", "AS-JAN", "AS-DEC"). It needs to be a yearly frequency. Defaults to "AS-JAN". - The second is an indexer as supported by :py:func:`xclim.core.calendar.select_time`. Defaults to {}, which means the whole year. + The second is an indexer as supported by :py:func:`xclim.core.calendar.select_time`. + Defaults to {}, which means the whole year. See :py:func:`xclim.core.calendar.select_time` for more information. - Examples: {"winter": {"freq": "AS-DEC", "date_bounds": ['12-01', '02-28']}}, {"jan": {"freq": "YS", "month": 1}}, {"annual": {}}. - missing: str + Examples: {"winter": {"freq": "AS-DEC", "date_bounds": ["12-01", "02-28"]}}, {"jan": {"freq": "YS", "month": 1}}, {"annual": {}}. + missing : str How to handle missing values. One of "skip", "any", "at_least_n", "pct", "wmo". See :py:func:`xclim.core.missing` for more information. - missing_options: dict, optional + missing_options : dict, optional Dictionary of options for the missing values' method. See :py:func:`xclim.core.missing` for more information. - interpolate_na: bool - Whether to interpolate missing values before computing the operation. Only used with the "sum" operation. Defaults to False. + interpolate_na : bool + Whether to interpolate missing values before computing the operation. Only used with the "sum" operation. + Defaults to False. Returns ------- @@ -105,7 +106,6 @@ def get_yearly_op( ----- If you want to perform a frequency analysis on a frequency that is finer than annual, simply use multiple timeargs (e.g. 1 per month) to create multiple distinct variables. - """ missing_options = missing_options or {} timeargs = timeargs or {"annual": {}} @@ -174,7 +174,8 @@ def get_yearly_op( and freq != "AS-DEC" ): warnings.warn( - "The frequency is not AS-DEC, but the season indexer includes DJF. This will lead to misleading results." + "The frequency is not AS-DEC, but the season indexer includes DJF. " + "This will lead to misleading results." ) elif ( "doy_bounds" in indexer.keys() diff --git a/xhydro/testing/helpers.py b/xhydro/testing/helpers.py index 7ae68913..287243df 100644 --- a/xhydro/testing/helpers.py +++ b/xhydro/testing/helpers.py @@ -98,7 +98,18 @@ def generate_registry( def load_registry(file: Optional[Union[str, Path]] = None) -> dict[str, str]: - """Load the registry file for the test data.""" + """Load the registry file for the test data. + + Parameters + ---------- + file : str or Path, optional + Path to the registry file. If not provided, the registry file found within the package data will be used. + + Returns + ------- + dict + Dictionary of filenames and hashes. + """ # Get registry file from package_data if file is None: registry_file = ilr.files("xhydro").joinpath("testing/registry.txt") @@ -157,8 +168,25 @@ def populate_testing_data( temp_folder: Optional[Path] = None, branch: str = TESTDATA_BRANCH, _local_cache: Path = _default_cache_dir, -): - """Populate the local cache with the testing data.""" +) -> None: + """Populate the local cache with the testing data. + + Parameters + ---------- + registry : str or Path, optional + Path to the registry file. If not provided, the registry file from package_data will be used. + temp_folder : Path, optional + Path to a temporary folder to use as the local cache. If not provided, the default location will be used. + branch : str, optional + Branch of hydrologie/xhydro-testdata to use when fetching testing datasets. + _local_cache : Path, optional + Path to the local cache. Defaults to the default location. + + Returns + ------- + None + The testing data will be downloaded to the local cache. + """ # Get registry file from package_data or provided path registry = load_registry(registry) diff --git a/xhydro/testing/utils.py b/xhydro/testing/utils.py index dde7fd41..ff52d2f4 100644 --- a/xhydro/testing/utils.py +++ b/xhydro/testing/utils.py @@ -30,7 +30,8 @@ def publish_release_notes( Returns ------- - str, optional + str or None + Formatted release notes as a string, if `file` is not provided. Notes ----- From d73d472769a41f0b4086964c7b322d57d4b4a5e2 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Fri, 22 Dec 2023 14:16:41 -0500 Subject: [PATCH 11/15] temporary pin for pydantic --- environment-dev.yml | 1 + environment.yml | 3 ++- pyproject.toml | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/environment-dev.yml b/environment-dev.yml index 82299013..957732bd 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -7,6 +7,7 @@ dependencies: # Main packages - numpy - pooch >=1.8.0 + - pydantic >=2.0,<2.5.3 - statsmodels - xarray - xclim >=0.45.0 diff --git a/environment.yml b/environment.yml index 73d977fb..5f6dcca3 100644 --- a/environment.yml +++ b/environment.yml @@ -6,7 +6,8 @@ dependencies: # Don't forget to sync changes between environment.yml, environment-dev.yml, and pyproject.toml! # Main packages - numpy - - pooch >=1.8.0 + - pooch >=1.8.0\ + - pydantic >=2.0,<2.5.3 - statsmodels - xarray - xclim >=0.45.0 diff --git a/pyproject.toml b/pyproject.toml index f2da0e27..266672f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ dependencies = [ # Don't forget to sync changes between environment.yml, environment-dev.yml, and pyproject.toml! "numpy", "pooch>=1.8.0", + "pydantic>=2.0,<2.5.3", "statsmodels", "xarray", "xclim>=0.45.0", From 9654d97847aba7be033d10110c58b0fbfd590999 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Fri, 22 Dec 2023 14:29:41 -0500 Subject: [PATCH 12/15] address docstring formatting --- xhydro/testing/helpers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xhydro/testing/helpers.py b/xhydro/testing/helpers.py index 287243df..7e662b9f 100644 --- a/xhydro/testing/helpers.py +++ b/xhydro/testing/helpers.py @@ -144,10 +144,10 @@ def load_registry(file: Optional[Union[str, Path]] = None) -> dict[str, str]: ----- There are two environment variables that can be used to control the behavior of this registry: - ``XHYDRO_DATA_DIR``: If this environment variable is set, it will be used as the base directory to store the data - files. The directory should be an absolute path (i.e., it should start with ``/``). Otherwise, - the default location will be used (based on ``platformdirs``, see :func:`pooch.os_cache`). + files. The directory should be an absolute path (i.e., it should start with ``/``). Otherwise, + the default location will be used (based on ``platformdirs``, see :func:`pooch.os_cache`). - ``XHYDRO_DATA_UPDATES``: If this environment variable is set, then the data files will be downloaded even if the - upstream hashes do not match. This is useful if you want to always use the latest version of the data files. + upstream hashes do not match. This is useful if you want to always use the latest version of the data files. Examples -------- From 3ddb151ca654902dfd4870676a4f5b4fdd0cc562 Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Fri, 22 Dec 2023 14:55:25 -0500 Subject: [PATCH 13/15] update CHANGES.rst and fix docstring --- CHANGES.rst | 13 +++++++++++++ xhydro/testing/helpers.py | 14 ++++++++------ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 3e9d4f59..00aedb6d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -11,6 +11,19 @@ New features and enhancements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ * Added French language support to the documentation. (:issue:`53`, :pull:`55`). * Added a new set of functions to support creating and updating `pooch` registries, caching testing datasets from `hydrologie/xhydro-testdata`, and ensuring that testing datasets can be loaded into temporary directories. +* `xhydro` is now configured to use `pooch` to download and cache testing datasets from `hydrologie/xhydro-testdata`. (:pull:`62`). + +Breaking changes +^^^^^^^^^^^^^^^^ +* Added `pooch` as an installation dependency. (:pull:`62`). + +Internal changes +^^^^^^^^^^^^^^^^ +* Added a new module for testing purposes: `xhydro.testing.helpers` with some new functions. (:pull:`62`): + * `generate_registry`: Parses data found in package (`xhydro.testing.data`), and adds it to the `registry.txt` + * `load_registry`: Loads installed (or custom) registry and returns dictionary + * `populate_testing_data`: Fetches the registry and optionally caches files at a different location (helpful for `pytest-xdist`). +* Added a `pre-commit` hook (`numpydoc`) to ensure that `numpy` docstrings are formatted correctly. (:pull:`62`). v0.3.0 (2023-12-01) ------------------- diff --git a/xhydro/testing/helpers.py b/xhydro/testing/helpers.py index 7e662b9f..b5d32142 100644 --- a/xhydro/testing/helpers.py +++ b/xhydro/testing/helpers.py @@ -142,12 +142,14 @@ def load_registry(file: Optional[Union[str, Path]] = None) -> dict[str, str]: Notes ----- -There are two environment variables that can be used to control the behavior of this registry: -- ``XHYDRO_DATA_DIR``: If this environment variable is set, it will be used as the base directory to store the data - files. The directory should be an absolute path (i.e., it should start with ``/``). Otherwise, - the default location will be used (based on ``platformdirs``, see :func:`pooch.os_cache`). -- ``XHYDRO_DATA_UPDATES``: If this environment variable is set, then the data files will be downloaded even if the - upstream hashes do not match. This is useful if you want to always use the latest version of the data files. +There are two environment variables that can be used to control the behaviour of this registry: + + - ``XHYDRO_DATA_DIR``: If this environment variable is set, it will be used as the base directory to store the data + files. The directory should be an absolute path (i.e., it should start with ``/``). Otherwise, + the default location will be used (based on ``platformdirs``, see :func:`pooch.os_cache`). + + - ``XHYDRO_DATA_UPDATES``: If this environment variable is set, then the data files will be downloaded even if the + upstream hashes do not match. This is useful if you want to always use the latest version of the data files. Examples -------- From 5073fc88b0312100c81dd7f402b932b1f6d182de Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Mon, 8 Jan 2024 14:56:57 -0500 Subject: [PATCH 14/15] Update environment-dev.yml Co-authored-by: RondeauG <38501935+RondeauG@users.noreply.github.com> --- environment-dev.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment-dev.yml b/environment-dev.yml index 957732bd..4049c6e1 100644 --- a/environment-dev.yml +++ b/environment-dev.yml @@ -7,7 +7,7 @@ dependencies: # Main packages - numpy - pooch >=1.8.0 - - pydantic >=2.0,<2.5.3 + - pydantic >=2.0,<2.5.3 # FIXME: Remove pin once our dependencies (xclim, xscen) support pydantic 2.5.3 - statsmodels - xarray - xclim >=0.45.0 From 04dcc16681f7bb5d572b0da3c09381807f1e90ed Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Mon, 8 Jan 2024 17:22:30 -0500 Subject: [PATCH 15/15] add notices and warnings about the testdata env vars --- CONTRIBUTING.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index f8205915..dbfff75f 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -106,6 +106,14 @@ Ready to contribute? Here's how to set up ``xhydro`` for local development. # Or, to run multiple build tests $ tox + .. note:: + + Running `pytest` or `tox` will automatically fetch and cache the testing data for the package to your local cache (using the `platformdirs` library). On Linux, this is located at ``XDG_CACHE_HOME`` (usually ``~/.cache``). On Windows, this is located at ``%LOCALAPPDATA%`` (usually ``C:\Users\username\AppData\Local``). On MacOS, this is located at ``~/Library/Caches``. + + If for some reason you wish to cache this data elsewhere, you can set the ``XHYDRO_DATA_DIR`` environment variable to a different location before running the tests. For example, to cache the data in the current working directory, run:: + + $ export XHYDRO_DATA_DIR=$(pwd)/.cache + #. Commit your changes and push your branch to GitHub:: $ git add . @@ -134,6 +142,12 @@ Ready to contribute? Here's how to set up ``xhydro`` for local development. You will have contributed your first changes to ``xhydro``! +.. warning:: + + If your Pull Request relies on modifications to the testing data of `xhydro`, you will need to update the testing data repository as well. As a preliminary testing measure, the branch of the testing data can be modified at testing time (from `main`) by setting the ``XHYDRO_TESTDATA_BRANCH`` environment variable to the branch name of the ``xhydro-testdata`` repository. + + Be sure to consult the ReadMe found at https://github.com/hydrologie/xhydro-testdata as well. + Pull Request Guidelines -----------------------