Skip to content

Commit

Permalink
Merge pull request #55 from ecmwf/develop
Browse files Browse the repository at this point in the history
Update main for release
  • Loading branch information
JesperDramsch authored Sep 25, 2024
2 parents 4c11a33 + ae555d6 commit 1f8fed1
Show file tree
Hide file tree
Showing 43 changed files with 1,634 additions and 241 deletions.
6 changes: 6 additions & 0 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# CODEOWNERS file

# Protect workflow files
/.github/ @theissenhelen @jesperdramsch @gmertes @b8raoult @floriankrb
/.pre-commit-config.yaml @theissenhelen @jesperdramsch @gmertes @b8raoult @floriankrb
/pyproject.toml @theissenhelen @jesperdramsch @gmertes @b8raoult @floriankrb
18 changes: 18 additions & 0 deletions .github/ci-hpc-config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
build:
python: '3.10'
modules:
- ninja
dependencies:
- ecmwf/ecbuild@develop
- ecmwf/eccodes@develop
- ecmwf/eckit@develop
- ecmwf/odc@develop
python_dependencies:
- ecmwf/anemoi-utils@develop
- ecmwf/earthkit-data@develop
- ecmwf/earthkit-meteo@develop
- ecmwf/earthkit-geo@develop
parallel: 64

pytest_cmd: |
python -m pytest -vv -m 'not notebook and not no_cache_init' --cov=. --cov-report=xml
3 changes: 3 additions & 0 deletions .github/workflows/changelog-pr-update.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ on:
branches:
- main
- develop
paths-ignore:
- .pre-commit-config.yaml
- .readthedocs.yaml
jobs:
Check-Changelog:
name: Check Changelog Action
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/changelog-release-update.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ name: "Update Changelog"
on:
release:
types: [released]
workflow_dispatch: ~

permissions:
pull-requests: write
Expand Down
17 changes: 15 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,29 @@ on:
- 'develop'
tags-ignore:
- '**'
paths-ignore:
- "docs/**"
- "CHANGELOG.md"
- "README.md"

# Trigger the workflow on pull request
pull_request: ~
pull_request:
paths-ignore:
- "docs/**"
- "CHANGELOG.md"
- "README.md"

# Trigger the workflow manually
workflow_dispatch: ~

# Trigger after public PR approved for CI
pull_request_target:
types: [labeled]
paths-ignore:
- "docs/**"
- "CHANGELOG.md"
- "README.md"


jobs:
# Run CI including downstream packages on self-hosted runners
Expand All @@ -34,7 +47,7 @@ jobs:
downstream-ci-hpc:
name: downstream-ci-hpc
if: ${{ !github.event.pull_request.head.repo.fork && github.event.action != 'labeled' || github.event.label.name == 'approved-for-ci' }}
uses: ecmwf-actions/downstream-ci/.github/workflows/downstream-ci.yml@main
uses: ecmwf-actions/downstream-ci/.github/workflows/downstream-ci-hpc.yml@main
with:
anemoi-datasets: ecmwf/anemoi-datasets@${{ github.event.pull_request.head.sha || github.sha }}
secrets: inherit
2 changes: 1 addition & 1 deletion .github/workflows/python-pull-request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: Code Quality checks for PRs

on:
push:
pull_request_target:
pull_request:
types: [opened, synchronize, reopened]

jobs:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ celerybeat.pid
*.sage.py

# Environments
.envrc
.env
.venv
env/
Expand Down
20 changes: 12 additions & 8 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ repos:
- id: no-commit-to-branch # Prevent committing to main / master
- id: check-added-large-files # Check for large files added to git
- id: check-merge-conflict # Check for files that contain merge conflict
- repo: https://github.com/pre-commit/pygrep-hooks
rev: v1.10.0 # Use the ref you want to point at
hooks:
- id: python-use-type-annotations # Check for missing type annotations
- id: python-check-blanket-noqa # Check for # noqa: all
- id: python-no-log-warn # Check for log.warn
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 24.8.0
hooks:
Expand All @@ -37,15 +43,15 @@ repos:
rev: v0.6.4
hooks:
- id: ruff
# Next line is to exclude for documentation code snippets
exclude: 'docs/(.*/)?[a-z]\w+_.py$'
# Next line if for documenation cod snippets
exclude: '^[^_].*_\.py$'
args:
- --line-length=120
- --fix
- --exit-non-zero-on-fix
- --preview
- repo: https://github.com/sphinx-contrib/sphinx-lint
rev: v0.9.1
rev: v1.0.0
hooks:
- id: sphinx-lint
# For now, we use it. But it does not support a lot of sphinx features
Expand All @@ -59,12 +65,10 @@ repos:
hooks:
- id: docconvert
args: ["numpy"]
- repo: https://github.com/b8raoult/optional-dependencies-all
rev: "0.0.6"
hooks:
- id: optional-dependencies-all
args: ["--inplace", "--exclude-keys=dev,docs,tests", "--group=dev=all,docs,tests"]
- repo: https://github.com/tox-dev/pyproject-fmt
rev: "2.2.3"
hooks:
- id: pyproject-fmt

ci:
autoupdate_schedule: monthly
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,21 @@ Keep it human-readable, your future self will thank you!
## [Unreleased]

### Added

- New `rescale` keyword in `open_dataset` to change units of variables #36
- Simplify imports

### Changed

- Added incremental building of datasets
- Add missing dependency for documentation building
- Fix failing test due to previous merge
- Bug fix when creating dataset from zarr
- Bug fix with area selection in cutout operation
- add paths-ignore to ci workflow

### Removed
- pytest for notebooks

## [0.4.5]

Expand All @@ -25,6 +35,7 @@ Keep it human-readable, your future self will thank you!
- CI workflow to update the changelog on release
- adds the reusable cd pypi workflow
- merge strategy for changelog in .gitattributes #25
- adds ci hpc config (#43)

### Changed

Expand Down Expand Up @@ -74,6 +85,9 @@ Keep it human-readable, your future self will thank you!
- combine datasets

## Git Diffs:
[Unreleased]: https://github.com/ecmwf/anemoi-datasets/compare/0.4.5...HEAD
[0.4.5]: https://github.com/ecmwf/anemoi-datasets/compare/0.4.4...0.4.5
[0.4.4]: https://github.com/ecmwf/anemoi-datasets/compare/0.4.0...0.4.4
[0.4.0]: https://github.com/ecmwf/anemoi-datasets/compare/0.3.0...0.4.0
[0.3.0]: https://github.com/ecmwf/anemoi-datasets/compare/0.2.0...0.3.0
[0.2.0]: https://github.com/ecmwf/anemoi-datasets/compare/0.1.0...0.2.0
Expand Down
6 changes: 3 additions & 3 deletions docs/building/incremental.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ To add statistics for 6h increments:

.. code:: bash
anemoi-datasets init-additions dataset.zarr --delta 6h anemoi-datasets
anemoi-datasets load-additions dataset.zarr --part 1/2 --delta 6h anemoi-datasets
anemoi-datasets init-additions dataset.zarr --delta 6h
anemoi-datasets load-additions dataset.zarr --part 1/2 --delta 6h
anemoi-datasets load-additions dataset.zarr --part 2/2 --delta 6h
anemoi-datasets finalise-additions dataset.zarr --delta 6h
Expand All @@ -96,7 +96,7 @@ To add statistics for 12h increments:
.. code:: bash
anemoi-datasets init-additions dataset.zarr --delta 12h
anemoi-datasets load-additions dataset.zarr --part 1/2 --delta 12h anemoi-datasets
anemoi-datasets load-additions dataset.zarr --part 1/2 --delta 12h
anemoi-datasets load-additions dataset.zarr --part 2/2 --delta 12h
anemoi-datasets finalise-additions dataset.zarr --delta 12h
Expand Down
30 changes: 30 additions & 0 deletions docs/using/code/rescale_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Scale and offset can be passed as a dictionnary...

ds = open_dataset(
dataset,
rescale={"2t": {"scale": 1.0, "offset": -273.15}},
)

# ... a tuple of floating points ....

ds = open_dataset(
dataset,
rescale={"2t": (1.0, -273.15)},
)

# ... or a tuple of strings representing units.

ds = open_dataset(
dataset,
rescale={"2t": ("K", "degC")},
)

# Several variables can be rescaled at once.

ds = open_dataset(
dataset,
rescale={
"2t": ("K", "degC"),
"tp": ("m", "mm"),
},
)
25 changes: 25 additions & 0 deletions docs/using/selecting.rst
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,28 @@ You can also rename variables:

This will be useful when you join datasets and do not want variables
from one dataset to override the ones from the other.

*********
rescale
*********

When combining datasets, you may want to rescale the variables so that
their have matching units. This can be done with the `rescale` option:

.. literalinclude:: code/rescale_.py
:language: python

The `rescale` option will also rescale the statistics. The rescaling is
currently limited to simple linear conversions.

When provided with units, the `rescale` option uses the cfunits_ package
find the `scale` and `offset` attributes of the units and uses these to
rescale the data.

.. warning::

When providing units, the library assumes that the mapping between
them is a linear transformation. No check is does to ensure this is
the case.

.. _cfunits: https://github.com/NCAS-CMS/cfunits
34 changes: 14 additions & 20 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,44 +51,42 @@ dynamic = [
]
dependencies = [
"anemoi-utils[provenance]>=0.3.15",
"cfunits",
"numpy",
"pyyaml",
"semantic-version",
"tqdm",
"zarr",
"zarr<=2.17",
]

optional-dependencies.all = [
"aiohttp",
"boto3",
"earthkit-data[mars]>=0.9",
"earthkit-geo>=0.2",
"earthkit-meteo",
"eccodes>=2.37",
"ecmwflibs>=0.6.3",
"entrypoints",
"gcsfs",
"kerchunk",
"pyproj",
"requests",
"s3fs",
]

optional-dependencies.create = [
"earthkit-data[mars]>=0.9",
"earthkit-geo>=0.2",
"earthkit-meteo",
"eccodes>=2.37",
"ecmwflibs>=0.6.3",
"entrypoints",
"pyproj",
]

optional-dependencies.dev = [
"aiohttp",
"boto3",
"earthkit-data[mars]>=0.9",
"earthkit-geo>=0.2",
"earthkit-meteo",
"eccodes>=2.37",
"ecmwflibs>=0.6.3",
"entrypoints",
"gcsfs",
"kerchunk",
Expand All @@ -97,39 +95,32 @@ optional-dependencies.dev = [
"pyproj",
"pytest",
"requests",
"rstfmt",
"s3fs",
"sphinx",
"sphinx-argparse<0.5",
"sphinx-rtd-theme",
]

optional-dependencies.docs = [
"nbsphinx",
"pandoc",
"rstfmt",
"sphinx",
"sphinx-argparse<0.5",
"sphinx-argparse",
"sphinx-rtd-theme",
]

optional-dependencies.kerchunk = [
"gcsfs",
"kerchunk",
"s3fs",
]

optional-dependencies.remote = [
"aiohttp",
"boto3",
"requests",
"s3fs",
]

optional-dependencies.tests = [
"pytest",
]

optional-dependencies.xarray = [
"gcsfs",
"kerchunk",
]

urls.Documentation = "https://anemoi-datasets.readthedocs.io/"
urls.Homepage = "https://github.com/ecmwf/anemoi-datasets/"
urls.Issues = "https://github.com/ecmwf/anemoi-datasets/issues"
Expand All @@ -145,3 +136,6 @@ scripts.anemoi-datasets = "anemoi.datasets.__main__:main"

[tool.setuptools_scm]
version_file = "src/anemoi/datasets/_version.py"

[tool.isort]
profile = "black"
5 changes: 3 additions & 2 deletions src/anemoi/datasets/commands/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def task(what, options, *args, **kwargs):
"""

now = datetime.datetime.now()
LOG.info(f"Task {what}({args},{kwargs}) starting")
LOG.info(f"🎬 Task {what}({args},{kwargs}) starting")

from anemoi.datasets.create import creator_factory

Expand All @@ -28,7 +28,7 @@ def task(what, options, *args, **kwargs):
c = creator_factory(what.replace("-", "_"), **options)
result = c.run()

LOG.debug(f"Task {what}({args},{kwargs}) completed ({datetime.datetime.now()-now})")
LOG.info(f"🏁 Task {what}({args},{kwargs}) completed ({datetime.datetime.now()-now})")
return result


Expand Down Expand Up @@ -57,6 +57,7 @@ def add_arguments(self, command_parser):
command_parser.add_argument("--trace", action="store_true")

def run(self, args):

now = time.time()
if args.threads + args.processes:
self.parallel_create(args)
Expand Down
Loading

0 comments on commit 1f8fed1

Please sign in to comment.