diff --git a/.github/workflows/ci-pre-commit.yml b/.github/workflows/ci-pre-commit.yml new file mode 100644 index 0000000..92a3c07 --- /dev/null +++ b/.github/workflows/ci-pre-commit.yml @@ -0,0 +1,16 @@ +name: linting + +on: + push: + branches: "*" + pull_request: + branches: "*" + +jobs: + linting: + name: "pre-commit hooks" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + - uses: pre-commit/action@v2.0.3 diff --git a/.github/workflows/ci-pypi-deploy.yml b/.github/workflows/ci-pypi-deploy.yml new file mode 100644 index 0000000..23baf9e --- /dev/null +++ b/.github/workflows/ci-pypi-deploy.yml @@ -0,0 +1,24 @@ +name: package-release + +on: + workflow_dispatch: + pull_request: + push: + branches: + - master + release: + types: + - published + +jobs: + build: + name: build and upload release to pypi + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + - uses: casperdcl/deploy-pypi@v2 + with: + password: ${{ secrets.PYPI_TOKEN }} + pip: wheel -w dist/ --no-deps . + upload: ${{ github.event_name == 'release' && github.event.action == 'published' }} diff --git a/.github/workflows/python-package-pip.yml b/.github/workflows/python-package-pip.yml new file mode 100644 index 0000000..39a687b --- /dev/null +++ b/.github/workflows/python-package-pip.yml @@ -0,0 +1,22 @@ +name: test +on: [push, pull_request] + +jobs: + test: + name: Test pip install python ${{ matrix.python-version }} on ${{ matrix.os }} + + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest] + python-version: [3.9] + steps: + - uses: actions/checkout@v4 + - name: Setup PDM + uses: pdm-project/setup-pdm@v4 + - name: Install package with pdm + run: | + pdm install + - name: Run tests + run: | + pdm run python -m pytest diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bbeb990..d692f1c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -16,6 +16,6 @@ repos: hooks: - id: black - repo: https://github.com/PyCQA/flake8 - rev: 4.0.1 + rev: 5.0.4 hooks: - id: flake8 diff --git a/pdm.lock b/pdm.lock index d1d84d8..765fa95 100644 --- a/pdm.lock +++ b/pdm.lock @@ -5,7 +5,7 @@ groups = ["default", "dev"] strategy = ["cross_platform", "inherit_metadata"] lock_version = "4.4.1" -content_hash = "sha256:4321bf917a10814487c5a62140f6ed9a7a0e92b783685e14e21f81f6886d50f0" +content_hash = "sha256:ce53a3f96edae1d60600f6186192517c0cc7c36b3ba9eb2d570b5dc20b8cd8eb" [[package]] name = "asciitree" @@ -32,7 +32,7 @@ name = "colorama" version = "0.4.6" requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" summary = "Cross-platform colored terminal text." -groups = ["default"] +groups = ["default", "dev"] marker = "sys_platform == \"win32\"" files = [ {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, @@ -49,6 +49,18 @@ files = [ {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"}, ] +[[package]] +name = "exceptiongroup" +version = "1.2.0" +requires_python = ">=3.7" +summary = "Backport of PEP 654 (exception groups)" +groups = ["dev"] +marker = "python_version < \"3.11\"" +files = [ + {file = "exceptiongroup-1.2.0-py3-none-any.whl", hash = "sha256:4bfd3996ac73b41e9b9628b04e079f193850720ea5945fc96a08633c66912f14"}, + {file = "exceptiongroup-1.2.0.tar.gz", hash = "sha256:91f5c769735f051a4290d52edd0858999b57e5876e9f85937691bd4c9fa3ed68"}, +] + [[package]] name = "fasteners" version = "0.19" @@ -83,6 +95,17 @@ files = [ {file = "identify-2.5.35.tar.gz", hash = "sha256:10a7ca245cfcd756a554a7288159f72ff105ad233c7c4b9c6f0f4d108f5f6791"}, ] +[[package]] +name = "iniconfig" +version = "2.0.0" +requires_python = ">=3.7" +summary = "brain-dead simple config-ini parsing" +groups = ["dev"] +files = [ + {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"}, + {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, +] + [[package]] name = "loguru" version = "0.7.2" @@ -186,6 +209,63 @@ files = [ {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, ] +[[package]] +name = "packaging" +version = "24.0" +requires_python = ">=3.7" +summary = "Core utilities for Python packages" +groups = ["default", "dev"] +files = [ + {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"}, + {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, +] + +[[package]] +name = "pandas" +version = "2.2.1" +requires_python = ">=3.9" +summary = "Powerful data structures for data analysis, time series, and statistics" +groups = ["default"] +dependencies = [ + "numpy<2,>=1.22.4; python_version < \"3.11\"", + "numpy<2,>=1.23.2; python_version == \"3.11\"", + "numpy<2,>=1.26.0; python_version >= \"3.12\"", + "python-dateutil>=2.8.2", + "pytz>=2020.1", + "tzdata>=2022.7", +] +files = [ + {file = "pandas-2.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8df8612be9cd1c7797c93e1c5df861b2ddda0b48b08f2c3eaa0702cf88fb5f88"}, + {file = "pandas-2.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0f573ab277252ed9aaf38240f3b54cfc90fff8e5cab70411ee1d03f5d51f3944"}, + {file = "pandas-2.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f02a3a6c83df4026e55b63c1f06476c9aa3ed6af3d89b4f04ea656ccdaaaa359"}, + {file = "pandas-2.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c38ce92cb22a4bea4e3929429aa1067a454dcc9c335799af93ba9be21b6beb51"}, + {file = "pandas-2.2.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:c2ce852e1cf2509a69e98358e8458775f89599566ac3775e70419b98615f4b06"}, + {file = "pandas-2.2.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:53680dc9b2519cbf609c62db3ed7c0b499077c7fefda564e330286e619ff0dd9"}, + {file = "pandas-2.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:94e714a1cca63e4f5939cdce5f29ba8d415d85166be3441165edd427dc9f6bc0"}, + {file = "pandas-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f821213d48f4ab353d20ebc24e4faf94ba40d76680642fb7ce2ea31a3ad94f9b"}, + {file = "pandas-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c70e00c2d894cb230e5c15e4b1e1e6b2b478e09cf27cc593a11ef955b9ecc81a"}, + {file = "pandas-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e97fbb5387c69209f134893abc788a6486dbf2f9e511070ca05eed4b930b1b02"}, + {file = "pandas-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:101d0eb9c5361aa0146f500773395a03839a5e6ecde4d4b6ced88b7e5a1a6403"}, + {file = "pandas-2.2.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:7d2ed41c319c9fb4fd454fe25372028dfa417aacb9790f68171b2e3f06eae8cd"}, + {file = "pandas-2.2.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:af5d3c00557d657c8773ef9ee702c61dd13b9d7426794c9dfeb1dc4a0bf0ebc7"}, + {file = "pandas-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:06cf591dbaefb6da9de8472535b185cba556d0ce2e6ed28e21d919704fef1a9e"}, + {file = "pandas-2.2.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:88ecb5c01bb9ca927ebc4098136038519aa5d66b44671861ffab754cae75102c"}, + {file = "pandas-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:04f6ec3baec203c13e3f8b139fb0f9f86cd8c0b94603ae3ae8ce9a422e9f5bee"}, + {file = "pandas-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a935a90a76c44fe170d01e90a3594beef9e9a6220021acfb26053d01426f7dc2"}, + {file = "pandas-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c391f594aae2fd9f679d419e9a4d5ba4bce5bb13f6a989195656e7dc4b95c8f0"}, + {file = "pandas-2.2.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:9d1265545f579edf3f8f0cb6f89f234f5e44ba725a34d86535b1a1d38decbccc"}, + {file = "pandas-2.2.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:11940e9e3056576ac3244baef2fedade891977bcc1cb7e5cc8f8cc7d603edc89"}, + {file = "pandas-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:4acf681325ee1c7f950d058b05a820441075b0dd9a2adf5c4835b9bc056bf4fb"}, + {file = "pandas-2.2.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9bd8a40f47080825af4317d0340c656744f2bfdb6819f818e6ba3cd24c0e1397"}, + {file = "pandas-2.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:df0c37ebd19e11d089ceba66eba59a168242fc6b7155cba4ffffa6eccdfb8f16"}, + {file = "pandas-2.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:739cc70eaf17d57608639e74d63387b0d8594ce02f69e7a0b046f117974b3019"}, + {file = "pandas-2.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9d3558d263073ed95e46f4650becff0c5e1ffe0fc3a015de3c79283dfbdb3df"}, + {file = "pandas-2.2.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:4aa1d8707812a658debf03824016bf5ea0d516afdea29b7dc14cf687bc4d4ec6"}, + {file = "pandas-2.2.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:76f27a809cda87e07f192f001d11adc2b930e93a2b0c4a236fde5429527423be"}, + {file = "pandas-2.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:1ba21b1d5c0e43416218db63037dbe1a01fc101dc6e6024bcad08123e48004ab"}, + {file = "pandas-2.2.1.tar.gz", hash = "sha256:0ab90f87093c13f3e8fa45b48ba9f39181046e8f3317d3aadb2fffbb1b978572"}, +] + [[package]] name = "platformdirs" version = "4.2.0" @@ -197,6 +277,17 @@ files = [ {file = "platformdirs-4.2.0.tar.gz", hash = "sha256:ef0cc731df711022c174543cb70a9b5bd22e5a9337c8624ef2c2ceb8ddad8768"}, ] +[[package]] +name = "pluggy" +version = "1.4.0" +requires_python = ">=3.8" +summary = "plugin and hook calling mechanisms for python" +groups = ["dev"] +files = [ + {file = "pluggy-1.4.0-py3-none-any.whl", hash = "sha256:7db9f7b503d67d1c5b95f59773ebb58a8c1c288129a88665838012cfb07b8981"}, + {file = "pluggy-1.4.0.tar.gz", hash = "sha256:8c85c2876142a764e5b7548e7d9a0e0ddb46f5185161049a79b7e974454223be"}, +] + [[package]] name = "pre-commit" version = "3.7.0" @@ -215,6 +306,49 @@ files = [ {file = "pre_commit-3.7.0.tar.gz", hash = "sha256:e209d61b8acdcf742404408531f0c37d49d2c734fd7cff2d6076083d191cb060"}, ] +[[package]] +name = "pytest" +version = "8.1.1" +requires_python = ">=3.8" +summary = "pytest: simple powerful testing with Python" +groups = ["dev"] +dependencies = [ + "colorama; sys_platform == \"win32\"", + "exceptiongroup>=1.0.0rc8; python_version < \"3.11\"", + "iniconfig", + "packaging", + "pluggy<2.0,>=1.4", + "tomli>=1; python_version < \"3.11\"", +] +files = [ + {file = "pytest-8.1.1-py3-none-any.whl", hash = "sha256:2a8386cfc11fa9d2c50ee7b2a57e7d898ef90470a7a34c4b949ff59662bb78b7"}, + {file = "pytest-8.1.1.tar.gz", hash = "sha256:ac978141a75948948817d360297b7aae0fcb9d6ff6bc9ec6d514b85d5a65c044"}, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +summary = "Extensions to the standard Python datetime module" +groups = ["default"] +dependencies = [ + "six>=1.5", +] +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[[package]] +name = "pytz" +version = "2024.1" +summary = "World timezone definitions, modern and historical" +groups = ["default"] +files = [ + {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, + {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, +] + [[package]] name = "pyyaml" version = "6.0.1" @@ -267,6 +401,40 @@ files = [ {file = "setuptools-69.2.0.tar.gz", hash = "sha256:0ff4183f8f42cd8fa3acea16c45205521a4ef28f73c6391d8a25e92893134f2e"}, ] +[[package]] +name = "six" +version = "1.16.0" +requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +summary = "Python 2 and 3 compatibility utilities" +groups = ["default"] +files = [ + {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, + {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, +] + +[[package]] +name = "tomli" +version = "2.0.1" +requires_python = ">=3.7" +summary = "A lil' TOML parser" +groups = ["dev"] +marker = "python_version < \"3.11\"" +files = [ + {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, + {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, +] + +[[package]] +name = "tzdata" +version = "2024.1" +requires_python = ">=2" +summary = "Provider of IANA time zone data" +groups = ["default"] +files = [ + {file = "tzdata-2024.1-py2.py3-none-any.whl", hash = "sha256:9068bc196136463f5245e51efda838afa15aaeca9903f49050dfa2679db4d252"}, + {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, +] + [[package]] name = "virtualenv" version = "20.25.1" @@ -295,6 +463,22 @@ files = [ {file = "win32_setctime-1.1.0.tar.gz", hash = "sha256:15cf5750465118d6929ae4de4eb46e8edae9a5634350c01ba582df868e932cb2"}, ] +[[package]] +name = "xarray" +version = "2024.3.0" +requires_python = ">=3.9" +summary = "N-D labeled arrays and datasets in Python" +groups = ["default"] +dependencies = [ + "numpy>=1.23", + "packaging>=22", + "pandas>=1.5", +] +files = [ + {file = "xarray-2024.3.0-py3-none-any.whl", hash = "sha256:ca2bc4da2bf2e7879e15862a7a7c3fc76ad19f6a08931d030220cef39a29118d"}, + {file = "xarray-2024.3.0.tar.gz", hash = "sha256:5c1db19efdde61db7faedad8fc944f4e29698fb6fbd578d352668b63598bd1d8"}, +] + [[package]] name = "zarr" version = "2.17.2" diff --git a/pyproject.toml b/pyproject.toml index 891bacc..0f65d1c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,6 +9,7 @@ dependencies = [ "pyyaml>=6.0.1", "loguru>=0.7.2", "zarr>=2.17.2", + "xarray>=2024.3.0", ] requires-python = ">=3.9" readme = "README.md" @@ -25,6 +26,7 @@ distribution = true [tool.pdm.dev-dependencies] dev = [ "pre-commit>=3.7.0", + "pytest>=8.1.1", ] [tool.isort] diff --git a/src/mllam_data_interface/check.py b/src/mllam_data_interface/check.py index c4895cc..c88a4d5 100644 --- a/src/mllam_data_interface/check.py +++ b/src/mllam_data_interface/check.py @@ -1,8 +1,8 @@ +import importlib from pathlib import Path import xarray as xr import yaml -import importlib SPECS_ROOT_PATH = Path(__file__).parent.parent.parent / "specs" @@ -30,13 +30,18 @@ def load_spec(spec_identifier: str): spec_name, spec_version = spec_identifier.split(":") # fp = SPECS_ROOT_PATH / spec_name / f"{spec_version}.yaml" - fp = importlib.resources.files(__package__) / "specs" / spec_name / f"{spec_version}.yaml" + fp = ( + importlib.resources.files(__package__) + / "specs" + / spec_name + / f"{spec_version}.yaml" + ) if not fp.exists(): raise ValueError(f"Spec file {fp} for {spec_identifier} does not exist.") with open(fp, "r") as f: spec = yaml.safe_load(f) - + return spec diff --git a/tests/test_specs.py b/tests/test_specs.py new file mode 100644 index 0000000..c0e92f8 --- /dev/null +++ b/tests/test_specs.py @@ -0,0 +1,51 @@ +import importlib.resources + +import numpy as np +import pytest +import xarray as xr + +from mllam_data_interface.check import check_dataset, load_spec + +SPEC_IDENTIFIER_FORMAT = "{spec_name}:{spec_version}" + + +def _find_all_specs(): + fps = list( + (importlib.resources.files("mllam_data_interface") / "specs").rglob("*.yaml") + ) + + # parse spec name and version from each path + spec_identifiers = [] + for fp in fps: + spec_name = fp.parent.name + spec_version = fp.stem + spec_identifier = SPEC_IDENTIFIER_FORMAT.format( + spec_name=spec_name, spec_version=spec_version + ) + spec_identifiers.append(spec_identifier) + + return spec_identifiers + + +@pytest.mark.parametrize("spec_identifier", _find_all_specs()) +def test_parsing(spec_identifier): + spec = load_spec(spec_identifier=spec_identifier) + + # generate a fake dataset that matches the spec + ds = xr.Dataset() + for var_name, var_parts in spec.get("variables", {}).items(): + dims = var_parts.get("dims", []) + shape = [ + 3, + ] * len(dims) + ds[var_name] = xr.DataArray(np.random.rand(*shape), dims=dims) + + for attr in spec.get("attributes", []): + ds.attrs[attr] = "some_value" + + check_dataset(ds=ds, spec_identifier=spec_identifier) + + # check that an exception is raised with an empty dataset that doesn't match the + # spec + with pytest.raises(ValueError): + check_dataset(ds=xr.Dataset(), spec_identifier=spec_identifier)