Skip to content

Commit

Permalink
Merge branch 'develop' of github.com:MeteoSwiss-APN/anemoi-datasets i…
Browse files Browse the repository at this point in the history
…nto develop
  • Loading branch information
icedoom888 committed Dec 11, 2024
2 parents a4586d5 + 2c1e6b1 commit b8471d3
Show file tree
Hide file tree
Showing 9 changed files with 49 additions and 6 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ repos:
- --force-single-line-imports
- --profile black
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.7.2
rev: v0.8.1
hooks:
- id: ruff
args:
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Keep it human-readable, your future self will thank you!
## Changed

- Fix metadata serialization handling of numpy.integer (#140)
- Fix negative variance for constant variables (#148)
- Fix cutout slicing of grid dimension (#145)

### Added
Expand Down
2 changes: 2 additions & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@ datasets <building-introduction>`.

- :doc:`overview`
- :doc:`installing`
- :doc:`naming_conventions`

.. toctree::
:maxdepth: 1
:hidden:

overview
installing
naming_conventions

**Using training datasets**

Expand Down
8 changes: 8 additions & 0 deletions docs/naming_conventions.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
####################
Naming conventions
####################

This is the current naming conventions for datasets :

`Anemoi Naming Conventions
<https://anemoi-registry.readthedocs.io/en/latest/naming-conventions.html>`_
2 changes: 1 addition & 1 deletion src/anemoi/datasets/create/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def raise_if_not_valid(self, print=print):
raise ValueError(self.error_message)

def _parse(self, name):
pattern = r"^(\w+)-([\w-]+)-(\w+)-(\w+)-(\d\d\d\d)-(\d\d\d\d)-(\d+h)-v(\d+)-?([a-zA-Z0-9-]+)?$"
pattern = r"^(\w+)-([\w-]+)-(\w+)-(\w+)-(\d\d\d\d)-(\d\d\d\d)-(\d+h|\d+m)-v(\d+)-?([a-zA-Z0-9-]+)?$"
match = re.match(pattern, name)

if not match:
Expand Down
16 changes: 15 additions & 1 deletion src/anemoi/datasets/create/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def assert_is_fieldlist(obj):
def import_function(name, kind):

from anemoi.transform.filters import filter_registry
from anemoi.transform.sources import source_registry

name = name.replace("-", "_")

Expand All @@ -45,7 +46,20 @@ def import_function(name, kind):
if filter_registry.lookup(name, return_none=True):

def proc(context, data, *args, **kwargs):
return filter_registry.create(name, *args, **kwargs)(data)
filter = filter_registry.create(name, *args, **kwargs)
filter.context = context
# filter = filter_registry.create(context, name, *args, **kwargs)
return filter.forward(data)

return proc

if kind == "sources":
if source_registry.lookup(name, return_none=True):

def proc(context, data, *args, **kwargs):
source = source_registry.create(name, *args, **kwargs)
# source = source_registry.create(context, name, *args, **kwargs)
return source.forward(data)

return proc

Expand Down
18 changes: 18 additions & 0 deletions src/anemoi/datasets/create/input/repeated_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@ def transform(self, group_of_dates):
end += self.frequency

to_try = sorted(to_try - self.tried)
info = {k: "no-data" for k in to_try}

if not to_try:
LOG.warning(f"No new dates to try for {group_of_dates} in {self.source}")
# return []

if to_try:
result = self.source.select(
Expand All @@ -82,19 +87,32 @@ def transform(self, group_of_dates):
)
)

cnt = 0
for f in result.datasource:
cnt += 1
# We could keep the fields in a dictionary, but we don't want to keep the fields in memory
date = as_datetime(f.metadata("valid_datetime"))

if self.skip_all_nans:
if np.isnan(f.to_numpy()).all():
LOG.warning(f"Skipping {date} because all values are NaN")
info[date] = "all-nans"
continue

info[date] = "ok"
self.found.add(date)

if cnt == 0:
raise ValueError(f"No data found for {group_of_dates} in {self.source}")

self.tried.update(to_try)

if not self.found:
for k, v in info.items():
LOG.warning(f"{k}: {v}")

raise ValueError(f"No matching data found for {asked_dates} in {self.source}")

new_dates = defaultdict(list)

for date in asked_dates:
Expand Down
4 changes: 2 additions & 2 deletions src/anemoi/datasets/create/statistics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def fix_variance(x, name, count, sums, squares):

variances = squares / count - mean * mean
assert variances.shape == squares.shape == mean.shape
if all(variances >= 0):
if np.all(variances >= 0):
LOG.warning(f"All individual variances for {name} are positive, setting variance to 0.")
return 0

Expand All @@ -108,7 +108,7 @@ def fix_variance(x, name, count, sums, squares):
# return 0

LOG.warning(f"ERROR at least one individual variance is negative ({np.nanmin(variances)}).")
return x
return 0


def check_variance(x, variables_names, minimum, maximum, mean, count, sums, squares):
Expand Down
2 changes: 1 addition & 1 deletion src/anemoi/datasets/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,7 @@ def _compute_constant_fields_from_a_few_samples(self):
sample_count = min(4, len(indices))
count = len(indices)

p = slice(0, count, count // (sample_count - 1))
p = slice(0, count, count // max(1, sample_count - 1))
samples = list(range(*p.indices(count)))

samples.append(count - 1) # Add last
Expand Down

0 comments on commit b8471d3

Please sign in to comment.