Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
floriankrb committed May 17, 2024
2 parents 9421cbd + 3e5fdcc commit 66fd11d
Show file tree
Hide file tree
Showing 16 changed files with 565 additions and 90 deletions.
2 changes: 1 addition & 1 deletion docs/building/sources.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ The following `sources` are currently available:
sources/opendap
sources/forcings
sources/accumulations
sources/perturbations
sources/recentre
29 changes: 29 additions & 0 deletions docs/building/sources/hindcasts.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
###########
hindcasts
###########

.. note::

The `hindcasts` source is currently using the `mars` source
internally. This will be changed in the future.

Hindcasts data, also known as reforecasts, are unique because they are
run for a specific day of year (such as 1 January or 11 June) for
multiple years. So, for a given reference date like 2022-05-12, we can
have hindcasts for 2001-05-12, 2002-05-12, 2003-05-12, and so on. This
is useful in many cases. For more details, please refer to this ECMWF
documentation.

The `hindcasts` source has a special argument called `reference_year`,
which represents the year of the reference date. Based on the requested
valid datetime and on the `reference_year`, the `hindcasts` source will
calculate the `hdate` and `date` and `time` appropriately.

For example, if the `reference_year` is 2022, then the data for
2002-05-12 will use data with `hdate=2002-05-12` and `date=2022-05-12`.

.. literalinclude:: yaml/hindcasts.yaml
:language: yaml

Using `step` in the `hindcasts` source is implemented and works as
expected.
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
.. _perturbations:
.. _recentre:

###############
perturbations
###############
##########
recentre
##########

Perturbations refers to the small variations centered around a nominal
value of a parameter. When dealing with `ensemble forecasting`_, the
perturbations are related to the difference between `ensemble members`
and their given `center`.
and their given `centre`.

The `perturbations` function computes a set of new ensemble members
centered on a different center from previous ensemble members using the
following formula:
The `recentre` function computes a set of new ensemble members centered
on a different centre from previous ensemble members using the following
formula:

.. math::
members_{new} = center + ( members - \overline{members} )
members_{new} = centre + ( members - \overline{members} )
Additionally, some variables must be non-negative to have a physical
meaning (e.g. accumulated variables or `specific humidity`). To ensure
this, positive clipping is performed using the alternative fomula :

.. math::
members_{new} = max(0, center + ( members - \overline{members} ))
members_{new} = max(0, centre + ( members - \overline{members} ))
The current implementation enforces that following variables are
positive when using the `perturbations` function :
Expand All @@ -45,12 +45,12 @@ It uses the following arguments:
members
A :ref:`reference <yaml-reference>` to the ensemble members.

center
centre
A :ref:`reference <yaml-reference>` to the new center requested.

Examples

.. literalinclude:: yaml/perturbations.yaml
.. literalinclude:: yaml/recentre.yaml
:language: yaml

.. _convective precipitation: https://codes.ecmwf.int/grib/param-db/?id=143
Expand Down
8 changes: 8 additions & 0 deletions docs/building/yaml/hindcasts.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
input:
hindcasts:
levtype: sfc
param: [2t, msl]
grid: [0.25, 0.25]
stream: enfh
type: cf
reference_year: 2022
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
SKIP = ("class", "stream", "type", "number", "expver", "_leg_number", "anoffset")


def check_compatible(f1, f2, center_field_as_mars, ensemble_field_as_mars):
def check_compatible(f1, f2, centre_field_as_mars, ensemble_field_as_mars):
assert f1.mars_grid == f2.mars_grid, (f1.mars_grid, f2.mars_grid)
assert f1.mars_area == f2.mars_area, (f1.mars_area, f2.mars_area)
assert f1.shape == f2.shape, (f1.shape, f2.shape)
Expand All @@ -43,21 +43,22 @@ def check_compatible(f1, f2, center_field_as_mars, ensemble_field_as_mars):
f2.metadata("valid_datetime"),
)

for k in set(center_field_as_mars.keys()) | set(ensemble_field_as_mars.keys()):
for k in set(centre_field_as_mars.keys()) | set(ensemble_field_as_mars.keys()):
if k in SKIP:
continue
assert center_field_as_mars[k] == ensemble_field_as_mars[k], (
assert centre_field_as_mars[k] == ensemble_field_as_mars[k], (
k,
center_field_as_mars[k],
centre_field_as_mars[k],
ensemble_field_as_mars[k],
)


def perturbations(
def recentre(
*,
members,
center,
centre,
clip_variables=CLIP_VARIABLES,
alpha=1.0,
output=None,
):

Expand All @@ -70,16 +71,16 @@ def perturbations(

LOG.info("Ordering fields")
members = members.order_by(*keys)
center = center.order_by(*keys)
centre = centre.order_by(*keys)
LOG.info("Done")

if len(center) * n_numbers != len(members):
LOG.error("%s %s %s", len(center), n_numbers, len(members))
if len(centre) * n_numbers != len(members):
LOG.error("%s %s %s", len(centre), n_numbers, len(members))
for f in members:
LOG.error("Member: %r", f)
for f in center:
LOG.error("Center: %r", f)
raise ValueError(f"Inconsistent number of fields: {len(center)} * {n_numbers} != {len(members)}")
for f in centre:
LOG.error("centre: %r", f)
raise ValueError(f"Inconsistent number of fields: {len(centre)} * {n_numbers} != {len(members)}")

if output is None:
# prepare output tmp file so we can read it back
Expand All @@ -93,44 +94,44 @@ def perturbations(

seen = set()

for i, center_field in enumerate(center):
param = center_field.metadata("param")
center_field_as_mars = center_field.as_mars()
for i, centre_field in enumerate(centre):
param = centre_field.metadata("param")
centre_field_as_mars = centre_field.as_mars()

# load the center field
center_np = center_field.to_numpy()
# load the centre field
centre_np = centre_field.to_numpy()

# load the ensemble fields and compute the mean
members_np = np.zeros((n_numbers, *center_np.shape))
members_np = np.zeros((n_numbers, *centre_np.shape))

for j in range(n_numbers):
ensemble_field = members[i * n_numbers + j]
ensemble_field_as_mars = ensemble_field.as_mars()
check_compatible(center_field, ensemble_field, center_field_as_mars, ensemble_field_as_mars)
check_compatible(centre_field, ensemble_field, centre_field_as_mars, ensemble_field_as_mars)
members_np[j] = ensemble_field.to_numpy()

ensemble_field_as_mars = tuple(sorted(ensemble_field_as_mars.items()))
assert ensemble_field_as_mars not in seen, ensemble_field_as_mars
seen.add(ensemble_field_as_mars)

# cmin=np.amin(center_np)
# cmin=np.amin(centre_np)
# emin=np.amin(members_np)

# if cmin < 0 and emin >= 0:
# LOG.warning(f"Negative values in {param} cmin={cmin} emin={emin}")
# LOG.warning(f"Center: {center_field_as_mars}")
# LOG.warning(f"centre: {centre_field_as_mars}")

mean_np = members_np.mean(axis=0)

for j in range(n_numbers):
template = members[i * n_numbers + j]
e = members_np[j]
m = mean_np
c = center_np
c = centre_np

assert e.shape == c.shape == m.shape, (e.shape, c.shape, m.shape)

x = c - m + e
x = c + (e - m) * alpha

if param in clip_variables:
# LOG.warning(f"Clipping {param} to be positive")
Expand Down
8 changes: 7 additions & 1 deletion src/anemoi/datasets/create/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,16 @@ def __init__(self, config, *args, **kwargs):
self.setdefault("build", Config())
self.build.setdefault("group_by", "monthly")
self.build.setdefault("use_grib_paramid", False)
self.build.setdefault("variable_naming", "default")
variable_naming = dict(
param="{param}",
param_levelist="{param}_{levelist}",
default="{param}_{levelist}",
).get(self.build.variable_naming, self.build.variable_naming)

self.setdefault("output", Config())
self.output.setdefault("order_by", ["valid_datetime", "param_level", "number"])
self.output.setdefault("remapping", Config(param_level="{param}_{levelist}"))
self.output.setdefault("remapping", Config(param_level=variable_naming))
self.output.setdefault("statistics", "param_level")
self.output.setdefault("chunking", Config(dates=1, ensembles=1))
self.output.setdefault("dtype", "float32")
Expand Down
10 changes: 7 additions & 3 deletions src/anemoi/datasets/create/functions/sources/accumulations.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# nor does it submit to any jurisdiction.
#
import datetime
import logging
import warnings
from copy import deepcopy

Expand All @@ -18,7 +19,7 @@

from anemoi.datasets.create.utils import to_datetime_list

DEBUG = True
LOG = logging.getLogger(__name__)


def member(field):
Expand Down Expand Up @@ -73,7 +74,10 @@ def check(self, field):
def write(self, template):

assert self.startStep != self.endStep, (self.startStep, self.endStep)
assert np.all(self.values >= 0), (np.amin(self.values), np.amax(self.values))
if np.all(self.values < 0):
LOG.warning(
f"Negative values when computing accumutation for {self.param} ({self.date} {self.time}): min={np.amin(self.values)} max={np.amax(self.values)}"
)

self.out.write(
self.values,
Expand Down Expand Up @@ -395,6 +399,7 @@ def accumulations(context, dates, **request):

KWARGS = {
("od", "oper"): dict(patch=scda),
("od", "elda"): dict(base_times=(6, 18)),
("ea", "oper"): dict(data_accumulation_period=1, base_times=(6, 18)),
("ea", "enda"): dict(data_accumulation_period=3, base_times=(6, 18)),
}
Expand Down Expand Up @@ -431,6 +436,5 @@ def accumulations(context, dates, **request):
dates = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]")
dates = to_datetime_list(dates)

DEBUG = True
for f in accumulations(None, dates, **config):
print(f, f.to_numpy().mean())
Loading

0 comments on commit 66fd11d

Please sign in to comment.