Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/develop' into feature/anemoi-obs…
Browse files Browse the repository at this point in the history
…ervations
  • Loading branch information
floriankrb committed Sep 2, 2024
2 parents 78b78bd + 2558df2 commit 40a3b77
Show file tree
Hide file tree
Showing 34 changed files with 569 additions and 418 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -188,3 +188,5 @@ _build/
*.sync
*.dot
_dev/
*.to_upload
*.tmp
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ Keep it human-readable, your future self will thank you!

### Changed

- Support sub-hourly datasets.
- Change negative variance detection to make it less restrictive
- Fix cutout bug that left some global grid points in the lam part

### Removed

## [0.4.4] Bug fixes
Expand Down
25 changes: 25 additions & 0 deletions docs/using/combining.rst
Original file line number Diff line number Diff line change
Expand Up @@ -155,3 +155,28 @@ cutout:
:width: 75%
:align: center
:alt: Cutout

You can also pass a `min_distance_km` parameter to the `cutout`
function. Any grid points in the global dataset that are closer than
this distance to a grid point in the LAM dataset will be removed. This
can be useful to control the behaviour of the algorithm at the edge of
the cutout area. If no value is provided, the algorithm will compute its
value as the smallest distance between two grid points in the global
dataset over the cutout area. If you do not want to use this feature,
you can set `min_distance_km=0`, or provide your own value.

The plots below illustrate how the cutout differs if `min_distance_km`
is not given (top) or if `min_distance_km` is is set to `0` (bottom).
The difference can be seen at the boundary between the two grids:

.. image:: images/cutout-5.png
:align: center
:alt: Cutout

.. image:: images/cutout-6.png
:align: center
:alt: Cutout

To debug the combination, you can pass `plot=True` to the `cutout`
function (when running from a Notebook), of use `plot="prefix"` to save
the plots to series of PNG files in the current directory.
Binary file added docs/using/images/cutout-5.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/using/images/cutout-6.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ dynamic = [
"version",
]
dependencies = [
"anemoi-utils[provenance]>=0.3.13",
"anemoi-utils[provenance]>=0.3.15",
"numpy",
"pyyaml",
"semantic-version",
Expand Down
2 changes: 1 addition & 1 deletion src/anemoi/datasets/create/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def raise_if_not_valid(self, print=print):
raise ValueError(self.error_message)

def _parse(self, name):
pattern = r"^(\w+)-([\w-]+)-(\w+)-(\w+)-(\d\d\d\d)-(\d\d\d\d)-(\d+h)-v(\d+)-?([a-zA-Z0-9-]+)$"
pattern = r"^(\w+)-([\w-]+)-(\w+)-(\w+)-(\d\d\d\d)-(\d\d\d\d)-(\d+h)-v(\d+)-?([a-zA-Z0-9-]+)?$"
match = re.match(pattern, name)

assert match, (name, pattern)
Expand Down
14 changes: 12 additions & 2 deletions src/anemoi/datasets/create/functions/sources/xarray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,19 @@ def load_one(emoji, context, dates, dataset, options={}, flavour=None, **kwargs)
result = MultiFieldList([fs.sel(valid_datetime=date, **kwargs) for date in dates])

if len(result) == 0:
LOG.warning(f"No data found for {dataset} and dates {dates}")
LOG.warning(f"No data found for {dataset} and dates {dates} and {kwargs}")
LOG.warning(f"Options: {options}")
LOG.warning(data)

for i, k in enumerate(fs):
a = ["valid_datetime", k.metadata("valid_datetime", default=None)]
for n in kwargs.keys():
a.extend([n, k.metadata(n, default=None)])
print([str(x) for x in a])

if i > 16:
break

# LOG.warning(data)

return result

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class Coordinate:
is_time = False
is_step = False
is_date = False
is_member = False

def __init__(self, variable):
self.variable = variable
Expand Down Expand Up @@ -201,8 +202,14 @@ def normalise(self, value):


class EnsembleCoordinate(Coordinate):
is_member = True
mars_names = ("number",)

def normalise(self, value):
if int(value) == value:
return int(value)
return value


class LongitudeCoordinate(Coordinate):
is_grid = True
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def to_numpy(self, flatten=False, dtype=None):
return values.reshape(self.shape)

def _make_metadata(self):
return XArrayMetadata(self, self.owner.mapping)
return XArrayMetadata(self)

def grid_points(self):
return self.owner.grid_points()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,6 @@ def sel(self, **kwargs):

for v in self.variables:

v.update_metadata_mapping(kwargs)

# First, select matching variables
# This will consume 'param' or 'variable' from kwargs
# and return the rest
Expand Down
22 changes: 21 additions & 1 deletion src/anemoi/datasets/create/functions/sources/xarray/flavour.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@


from .coordinates import DateCoordinate
from .coordinates import EnsembleCoordinate
from .coordinates import LatitudeCoordinate
from .coordinates import LevelCoordinate
from .coordinates import LongitudeCoordinate
Expand Down Expand Up @@ -135,6 +136,17 @@ def _guess(self, c, coord):
if d is not None:
return d

d = self._is_number(
c,
axis=axis,
name=name,
long_name=long_name,
standard_name=standard_name,
units=units,
)
if d is not None:
return d

if c.shape in ((1,), tuple()):
return ScalarCoordinate(c)

Expand Down Expand Up @@ -249,9 +261,13 @@ def _is_level(self, c, *, axis, name, long_name, standard_name, units):
if standard_name == "depth":
return LevelCoordinate(c, "depth")

if name == "pressure":
if name == "vertical" and units == "hPa":
return LevelCoordinate(c, "pl")

def _is_number(self, c, *, axis, name, long_name, standard_name, units):
if name in ("realization", "number"):
return EnsembleCoordinate(c)


class FlavourCoordinateGuesser(CoordinateGuesser):
def __init__(self, ds, flavour):
Expand Down Expand Up @@ -328,3 +344,7 @@ def _levtype(self, c, *, axis, name, long_name, standard_name, units):
return self.flavour["levtype"]

raise NotImplementedError(f"levtype for {c=}")

def _is_number(self, c, *, axis, name, long_name, standard_name, units):
if self._match(c, "number", locals()):
return DateCoordinate(c)
56 changes: 27 additions & 29 deletions src/anemoi/datasets/create/functions/sources/xarray/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,53 +10,49 @@
import logging
from functools import cached_property

from anemoi.utils.dates import as_datetime
from earthkit.data.core.geography import Geography
from earthkit.data.core.metadata import RawMetadata
from earthkit.data.utils.dates import to_datetime
from earthkit.data.utils.projections import Projection

LOG = logging.getLogger(__name__)


class MDMapping:
class _MDMapping:

def __init__(self, mapping):
self.user_to_internal = mapping
def __init__(self, variable):
self.variable = variable
self.time = variable.time
self.mapping = dict(param="variable")
for c in variable.coordinates:
for v in c.mars_names:
assert v not in self.mapping, f"Duplicate key '{v}' in {c}"
self.mapping[v] = c.variable.name

def from_user(self, kwargs):
if isinstance(kwargs, str):
return self.user_to_internal.get(kwargs, kwargs)
return {self.user_to_internal.get(k, k): v for k, v in kwargs.items()}
def _from_user(self, key):
return self.mapping.get(key, key)

def __len__(self):
return len(self.user_to_internal)
def from_user(self, kwargs):
print("from_user", kwargs, self)
return {self._from_user(k): v for k, v in kwargs.items()}

def __repr__(self):
return f"MDMapping({self.user_to_internal})"
return f"MDMapping({self.mapping})"

def fill_time_metadata(self, field, md):
md["valid_datetime"] = as_datetime(self.variable.time.fill_time_metadata(field._md, md)).isoformat()


class XArrayMetadata(RawMetadata):
LS_KEYS = ["variable", "level", "valid_datetime", "units"]
NAMESPACES = ["default", "mars"]
MARS_KEYS = ["param", "step", "levelist", "levtype", "number", "date", "time"]

def __init__(self, field, mapping):
def __init__(self, field):
self._field = field
md = field._md.copy()

self._mapping = mapping
if mapping is None:
time_coord = [c for c in field.owner.coordinates if c.is_time]
if len(time_coord) == 1:
time_key = time_coord[0].name
else:
time_key = "time"
else:
time_key = mapping.from_user("valid_datetime")
self._time = to_datetime(md.pop(time_key))
self._field.owner.time.fill_time_metadata(self._time, md)
md["valid_datetime"] = self._time.isoformat()

self._mapping = _MDMapping(field.owner)
self._mapping.fill_time_metadata(field, md)
super().__init__(md)

@cached_property
Expand Down Expand Up @@ -88,10 +84,13 @@ def _base_datetime(self):
return self._field.forecast_reference_time

def _valid_datetime(self):
return self._time
return self._get("valid_datetime")

def _get(self, key, **kwargs):

if key in self._d:
return self._d[key]

if key.startswith("mars."):
key = key[5:]
if key not in self.MARS_KEYS:
Expand All @@ -100,8 +99,7 @@ def _get(self, key, **kwargs):
else:
return kwargs.get("default", None)

if self._mapping is not None:
key = self._mapping.from_user(key)
key = self._mapping._from_user(key)

return super()._get(key, **kwargs)

Expand Down
Loading

0 comments on commit 40a3b77

Please sign in to comment.