diff --git a/docs/building/sources.rst b/docs/building/sources.rst index d81990e1..99fb8415 100644 --- a/docs/building/sources.rst +++ b/docs/building/sources.rst @@ -15,4 +15,4 @@ The following `sources` are currently available: sources/opendap sources/forcings sources/accumulations - sources/perturbations + sources/recentre diff --git a/docs/building/sources/hindcasts.rst b/docs/building/sources/hindcasts.rst new file mode 100644 index 00000000..fc7c60af --- /dev/null +++ b/docs/building/sources/hindcasts.rst @@ -0,0 +1,29 @@ +########### + hindcasts +########### + +.. note:: + + The `hindcasts` source is currently using the `mars` source + internally. This will be changed in the future. + +Hindcasts data, also known as reforecasts, are unique because they are +run for a specific day of year (such as 1 January or 11 June) for +multiple years. So, for a given reference date like 2022-05-12, we can +have hindcasts for 2001-05-12, 2002-05-12, 2003-05-12, and so on. This +is useful in many cases. For more details, please refer to this ECMWF +documentation. + +The `hindcasts` source has a special argument called `reference_year`, +which represents the year of the reference date. Based on the requested +valid datetime and on the `reference_year`, the `hindcasts` source will +calculate the `hdate` and `date` and `time` appropriately. + +For example, if the `reference_year` is 2022, then the data for +2002-05-12 will use data with `hdate=2002-05-12` and `date=2022-05-12`. + +.. literalinclude:: yaml/hindcasts.yaml + :language: yaml + +Using `step` in the `hindcasts` source is implemented and works as +expected. diff --git a/docs/building/sources/perturbations.rst b/docs/building/sources/recentre.rst similarity index 79% rename from docs/building/sources/perturbations.rst rename to docs/building/sources/recentre.rst index 400b12ad..f095ef26 100644 --- a/docs/building/sources/perturbations.rst +++ b/docs/building/sources/recentre.rst @@ -1,21 +1,21 @@ -.. _perturbations: +.. _recentre: -############### - perturbations -############### +########## + recentre +########## Perturbations refers to the small variations centered around a nominal value of a parameter. When dealing with `ensemble forecasting`_, the perturbations are related to the difference between `ensemble members` -and their given `center`. +and their given `centre`. -The `perturbations` function computes a set of new ensemble members -centered on a different center from previous ensemble members using the -following formula: +The `recentre` function computes a set of new ensemble members centered +on a different centre from previous ensemble members using the following +formula: .. math:: - members_{new} = center + ( members - \overline{members} ) + members_{new} = centre + ( members - \overline{members} ) Additionally, some variables must be non-negative to have a physical meaning (e.g. accumulated variables or `specific humidity`). To ensure @@ -23,7 +23,7 @@ this, positive clipping is performed using the alternative fomula : .. math:: - members_{new} = max(0, center + ( members - \overline{members} )) + members_{new} = max(0, centre + ( members - \overline{members} )) The current implementation enforces that following variables are positive when using the `perturbations` function : @@ -45,12 +45,12 @@ It uses the following arguments: members A :ref:`reference ` to the ensemble members. -center +centre A :ref:`reference ` to the new center requested. Examples -.. literalinclude:: yaml/perturbations.yaml +.. literalinclude:: yaml/recentre.yaml :language: yaml .. _convective precipitation: https://codes.ecmwf.int/grib/param-db/?id=143 diff --git a/docs/building/yaml/hindcasts.yaml b/docs/building/yaml/hindcasts.yaml new file mode 100644 index 00000000..2766a3d3 --- /dev/null +++ b/docs/building/yaml/hindcasts.yaml @@ -0,0 +1,8 @@ +input: + hindcasts: + levtype: sfc + param: [2t, msl] + grid: [0.25, 0.25] + stream: enfh + type: cf + reference_year: 2022 diff --git a/src/anemoi/datasets/compute/perturbations.py b/src/anemoi/datasets/compute/recentre.py similarity index 77% rename from src/anemoi/datasets/compute/perturbations.py rename to src/anemoi/datasets/compute/recentre.py index f86c1256..4759fafd 100644 --- a/src/anemoi/datasets/compute/perturbations.py +++ b/src/anemoi/datasets/compute/recentre.py @@ -32,7 +32,7 @@ SKIP = ("class", "stream", "type", "number", "expver", "_leg_number", "anoffset") -def check_compatible(f1, f2, center_field_as_mars, ensemble_field_as_mars): +def check_compatible(f1, f2, centre_field_as_mars, ensemble_field_as_mars): assert f1.mars_grid == f2.mars_grid, (f1.mars_grid, f2.mars_grid) assert f1.mars_area == f2.mars_area, (f1.mars_area, f2.mars_area) assert f1.shape == f2.shape, (f1.shape, f2.shape) @@ -43,21 +43,22 @@ def check_compatible(f1, f2, center_field_as_mars, ensemble_field_as_mars): f2.metadata("valid_datetime"), ) - for k in set(center_field_as_mars.keys()) | set(ensemble_field_as_mars.keys()): + for k in set(centre_field_as_mars.keys()) | set(ensemble_field_as_mars.keys()): if k in SKIP: continue - assert center_field_as_mars[k] == ensemble_field_as_mars[k], ( + assert centre_field_as_mars[k] == ensemble_field_as_mars[k], ( k, - center_field_as_mars[k], + centre_field_as_mars[k], ensemble_field_as_mars[k], ) -def perturbations( +def recentre( *, members, - center, + centre, clip_variables=CLIP_VARIABLES, + alpha=1.0, output=None, ): @@ -70,16 +71,16 @@ def perturbations( LOG.info("Ordering fields") members = members.order_by(*keys) - center = center.order_by(*keys) + centre = centre.order_by(*keys) LOG.info("Done") - if len(center) * n_numbers != len(members): - LOG.error("%s %s %s", len(center), n_numbers, len(members)) + if len(centre) * n_numbers != len(members): + LOG.error("%s %s %s", len(centre), n_numbers, len(members)) for f in members: LOG.error("Member: %r", f) - for f in center: - LOG.error("Center: %r", f) - raise ValueError(f"Inconsistent number of fields: {len(center)} * {n_numbers} != {len(members)}") + for f in centre: + LOG.error("centre: %r", f) + raise ValueError(f"Inconsistent number of fields: {len(centre)} * {n_numbers} != {len(members)}") if output is None: # prepare output tmp file so we can read it back @@ -93,32 +94,32 @@ def perturbations( seen = set() - for i, center_field in enumerate(center): - param = center_field.metadata("param") - center_field_as_mars = center_field.as_mars() + for i, centre_field in enumerate(centre): + param = centre_field.metadata("param") + centre_field_as_mars = centre_field.as_mars() - # load the center field - center_np = center_field.to_numpy() + # load the centre field + centre_np = centre_field.to_numpy() # load the ensemble fields and compute the mean - members_np = np.zeros((n_numbers, *center_np.shape)) + members_np = np.zeros((n_numbers, *centre_np.shape)) for j in range(n_numbers): ensemble_field = members[i * n_numbers + j] ensemble_field_as_mars = ensemble_field.as_mars() - check_compatible(center_field, ensemble_field, center_field_as_mars, ensemble_field_as_mars) + check_compatible(centre_field, ensemble_field, centre_field_as_mars, ensemble_field_as_mars) members_np[j] = ensemble_field.to_numpy() ensemble_field_as_mars = tuple(sorted(ensemble_field_as_mars.items())) assert ensemble_field_as_mars not in seen, ensemble_field_as_mars seen.add(ensemble_field_as_mars) - # cmin=np.amin(center_np) + # cmin=np.amin(centre_np) # emin=np.amin(members_np) # if cmin < 0 and emin >= 0: # LOG.warning(f"Negative values in {param} cmin={cmin} emin={emin}") - # LOG.warning(f"Center: {center_field_as_mars}") + # LOG.warning(f"centre: {centre_field_as_mars}") mean_np = members_np.mean(axis=0) @@ -126,11 +127,11 @@ def perturbations( template = members[i * n_numbers + j] e = members_np[j] m = mean_np - c = center_np + c = centre_np assert e.shape == c.shape == m.shape, (e.shape, c.shape, m.shape) - x = c - m + e + x = c + (e - m) * alpha if param in clip_variables: # LOG.warning(f"Clipping {param} to be positive") diff --git a/src/anemoi/datasets/create/config.py b/src/anemoi/datasets/create/config.py index 6acc17ac..93db4a17 100644 --- a/src/anemoi/datasets/create/config.py +++ b/src/anemoi/datasets/create/config.py @@ -154,10 +154,16 @@ def __init__(self, config, *args, **kwargs): self.setdefault("build", Config()) self.build.setdefault("group_by", "monthly") self.build.setdefault("use_grib_paramid", False) + self.build.setdefault("variable_naming", "default") + variable_naming = dict( + param="{param}", + param_levelist="{param}_{levelist}", + default="{param}_{levelist}", + ).get(self.build.variable_naming, self.build.variable_naming) self.setdefault("output", Config()) self.output.setdefault("order_by", ["valid_datetime", "param_level", "number"]) - self.output.setdefault("remapping", Config(param_level="{param}_{levelist}")) + self.output.setdefault("remapping", Config(param_level=variable_naming)) self.output.setdefault("statistics", "param_level") self.output.setdefault("chunking", Config(dates=1, ensembles=1)) self.output.setdefault("dtype", "float32") diff --git a/src/anemoi/datasets/create/functions/sources/accumulations.py b/src/anemoi/datasets/create/functions/sources/accumulations.py index 83910016..da7611b7 100644 --- a/src/anemoi/datasets/create/functions/sources/accumulations.py +++ b/src/anemoi/datasets/create/functions/sources/accumulations.py @@ -7,6 +7,7 @@ # nor does it submit to any jurisdiction. # import datetime +import logging import warnings from copy import deepcopy @@ -18,7 +19,7 @@ from anemoi.datasets.create.utils import to_datetime_list -DEBUG = True +LOG = logging.getLogger(__name__) def member(field): @@ -73,7 +74,10 @@ def check(self, field): def write(self, template): assert self.startStep != self.endStep, (self.startStep, self.endStep) - assert np.all(self.values >= 0), (np.amin(self.values), np.amax(self.values)) + if np.all(self.values < 0): + LOG.warning( + f"Negative values when computing accumutation for {self.param} ({self.date} {self.time}): min={np.amin(self.values)} max={np.amax(self.values)}" + ) self.out.write( self.values, @@ -395,6 +399,7 @@ def accumulations(context, dates, **request): KWARGS = { ("od", "oper"): dict(patch=scda), + ("od", "elda"): dict(base_times=(6, 18)), ("ea", "oper"): dict(data_accumulation_period=1, base_times=(6, 18)), ("ea", "enda"): dict(data_accumulation_period=3, base_times=(6, 18)), } @@ -431,6 +436,5 @@ def accumulations(context, dates, **request): dates = yaml.safe_load("[2022-12-30 18:00, 2022-12-31 00:00, 2022-12-31 06:00, 2022-12-31 12:00]") dates = to_datetime_list(dates) - DEBUG = True for f in accumulations(None, dates, **config): print(f, f.to_numpy().mean()) diff --git a/src/anemoi/datasets/create/functions/sources/hindcasts.py b/src/anemoi/datasets/create/functions/sources/hindcasts.py new file mode 100644 index 00000000..3d0ff0e7 --- /dev/null +++ b/src/anemoi/datasets/create/functions/sources/hindcasts.py @@ -0,0 +1,437 @@ +# (C) Copyright 2024 ECMWF. +# +# This software is licensed under the terms of the Apache Licence Version 2.0 +# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. +# In applying this licence, ECMWF does not waive the privileges and immunities +# granted to it by virtue of its status as an intergovernmental organisation +# nor does it submit to any jurisdiction. +# +import datetime +import warnings +from copy import deepcopy + +import climetlab as cml +import numpy as np +from climetlab.core.temporary import temp_file +from climetlab.readers.grib.output import new_grib_output +from climetlab.utils.availability import Availability + +from anemoi.datasets.create.functions.sources.mars import mars + +DEBUG = True + + +def member(field): + # Bug in eccodes has number=0 randomly + number = field.metadata("number") + if number is None: + number = 0 + return number + + +class Accumulation: + def __init__(self, out, /, param, date, time, number, step, frequency, **kwargs): + self.out = out + self.param = param + self.date = date + self.time = time + self.steps = step + self.number = number + self.values = None + self.seen = set() + self.startStep = None + self.endStep = None + self.done = False + self.frequency = frequency + self._check = None + + @property + def key(self): + return (self.param, self.date, self.time, self.steps, self.number) + + def check(self, field): + if self._check is None: + self._check = field.as_mars() + + assert self.param == field.metadata("param"), (self.param, field.metadata("param")) + assert self.date == field.metadata("date"), (self.date, field.metadata("date")) + assert self.time == field.metadata("time"), (self.time, field.metadata("time")) + assert self.number == member(field), (self.number, member(field)) + + return + + mars = field.as_mars() + keys1 = sorted(self._check.keys()) + keys2 = sorted(mars.keys()) + + assert keys1 == keys2, (keys1, keys2) + + for k in keys1: + if k not in ("step",): + assert self._check[k] == mars[k], (k, self._check[k], mars[k]) + + def write(self, template): + + assert self.startStep != self.endStep, (self.startStep, self.endStep) + assert np.all(self.values >= 0), (np.amin(self.values), np.amax(self.values)) + + self.out.write( + self.values, + template=template, + stepType="accum", + startStep=self.startStep, + endStep=self.endStep, + ) + self.values = None + self.done = True + + def add(self, field, values): + + self.check(field) + + step = field.metadata("step") + if step not in self.steps: + return + + if not np.all(values >= 0): + warnings.warn(f"Negative values for {field}: {np.amin(values)} {np.amax(values)}") + + assert not self.done, (self.key, step) + assert step not in self.seen, (self.key, step) + + startStep = field.metadata("startStep") + endStep = field.metadata("endStep") + + if self.buggy_steps and startStep == endStep: + startStep = 0 + + assert step == endStep, (startStep, endStep, step) + + self.compute(values, startStep, endStep) + + self.seen.add(step) + + if len(self.seen) == len(self.steps): + self.write(template=field) + + @classmethod + def mars_date_time_steps(cls, dates, step1, step2, frequency, base_times, adjust_step): + + # assert step1 > 0, (step1, step2, frequency) + + for valid_date in dates: + base_date = valid_date - datetime.timedelta(hours=step2) + add_step = 0 + if base_date.hour not in base_times: + if not adjust_step: + raise ValueError( + f"Cannot find a base time in {base_times} that validates on {valid_date.isoformat()} for step={step2}" + ) + + while base_date.hour not in base_times: + # print(f'{base_date=}, {base_times=}, {add_step=} {frequency=}') + base_date -= datetime.timedelta(hours=1) + add_step += 1 + + yield cls._mars_date_time_step(base_date, step1, step2, add_step, frequency) + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.key})" + + +class AccumulationFromStart(Accumulation): + buggy_steps = True + + def compute(self, values, startStep, endStep): + + assert startStep == 0, startStep + + if self.values is None: + + self.values = np.copy(values) + self.startStep = 0 + self.endStep = endStep + + else: + assert endStep != self.endStep, (self.endStep, endStep) + + if endStep > self.endStep: + # assert endStep - self.endStep == self.stepping, (self.endStep, endStep, self.stepping) + self.values = values - self.values + self.startStep = self.endStep + self.endStep = endStep + else: + # assert self.endStep - endStep == self.stepping, (self.endStep, endStep, self.stepping) + self.values = self.values - values + self.startStep = endStep + + if not np.all(self.values >= 0): + warnings.warn(f"Negative values for {self.param}: {np.amin(self.values)} {np.amax(self.values)}") + self.values = np.maximum(self.values, 0) + + @classmethod + def _mars_date_time_step(cls, base_date, step1, step2, add_step, frequency): + assert not frequency, frequency + + steps = (step1 + add_step, step2 + add_step) + if steps[0] == 0: + steps = (steps[1],) + + return ( + base_date.year * 10000 + base_date.month * 100 + base_date.day, + base_date.hour * 100 + base_date.minute, + steps, + ) + + +class AccumulationFromLastStep(Accumulation): + buggy_steps = False + + def compute(self, values, startStep, endStep): + + assert endStep - startStep == self.frequency, (startStep, endStep, self.frequency) + + if self.startStep is None: + self.startStep = startStep + else: + self.startStep = min(self.startStep, startStep) + + if self.endStep is None: + self.endStep = endStep + else: + self.endStep = max(self.endStep, endStep) + + if self.values is None: + self.values = np.zeros_like(values) + + self.values += values + + @classmethod + def _mars_date_time_step(cls, base_date, step1, step2, add_step, frequency): + assert frequency > 0, frequency + # assert step1 > 0, (step1, step2, frequency, add_step, base_date) + + steps = [] + for step in range(step1 + frequency, step2 + frequency, frequency): + steps.append(step + add_step) + return ( + base_date.year * 10000 + base_date.month * 100 + base_date.day, + base_date.hour * 100 + base_date.minute, + tuple(steps), + ) + + +def identity(x): + return x + + +def compute_accumulations( + dates, + request, + user_accumulation_period=6, + data_accumulation_period=None, + patch=identity, + base_times=None, +): + adjust_step = isinstance(user_accumulation_period, int) + + if not isinstance(user_accumulation_period, (list, tuple)): + user_accumulation_period = (0, user_accumulation_period) + + assert len(user_accumulation_period) == 2, user_accumulation_period + step1, step2 = user_accumulation_period + assert step1 < step2, user_accumulation_period + + if base_times is None: + base_times = [0, 6, 12, 18] + + base_times = [t // 100 if t > 100 else t for t in base_times] + + AccumulationClass = AccumulationFromStart if data_accumulation_period in (0, None) else AccumulationFromLastStep + + mars_date_time_steps = AccumulationClass.mars_date_time_steps( + dates, + step1, + step2, + data_accumulation_period, + base_times, + adjust_step, + ) + + request = deepcopy(request) + + param = request["param"] + if not isinstance(param, (list, tuple)): + param = [param] + + number = request.get("number", [0]) + assert isinstance(number, (list, tuple)) + + frequency = data_accumulation_period + + type_ = request.get("type", "an") + if type_ == "an": + type_ = "fc" + + request.update({"type": type_, "levtype": "sfc"}) + + tmp = temp_file() + path = tmp.path + out = new_grib_output(path) + + requests = [] + + accumulations = {} + + for date, time, steps in mars_date_time_steps: + for p in param: + for n in number: + requests.append( + patch( + { + "param": p, + "date": date, + "time": time, + "step": sorted(steps), + "number": n, + } + ) + ) + + compressed = Availability(requests) + ds = cml.load_source("empty") + for r in compressed.iterate(): + request.update(r) + print("🌧️", request) + ds = ds + cml.load_source("mars", **request) + + accumulations = {} + for a in [AccumulationClass(out, frequency=frequency, **r) for r in requests]: + for s in a.steps: + key = (a.param, a.date, a.time, s, a.number) + accumulations.setdefault(key, []).append(a) + + for field in ds: + key = ( + field.metadata("param"), + field.metadata("date"), + field.metadata("time"), + field.metadata("step"), + member(field), + ) + values = field.values # optimisation + assert accumulations[key], key + for a in accumulations[key]: + a.add(field, values) + + for acc in accumulations.values(): + for a in acc: + assert a.done, (a.key, a.seen, a.steps) + + out.close() + + ds = cml.load_source("file", path) + + assert len(ds) / len(param) / len(number) == len(dates), ( + len(ds), + len(param), + len(dates), + ) + ds._tmp = tmp + + return ds + + +def to_list(x): + if isinstance(x, (list, tuple)): + return x + return [x] + + +def normalise_time_to_hours(r): + r = deepcopy(r) + if "time" not in r: + return r + + times = [] + for t in to_list(r["time"]): + assert len(t) == 4, r + assert t.endswith("00"), r + times.append(int(t) // 100) + r["time"] = tuple(times) + return r + + +def normalise_number(r): + if "number" not in r: + return r + number = r["number"] + number = to_list(number) + + if len(number) > 4 and (number[1] == "to" and number[3] == "by"): + return list(range(int(number[0]), int(number[2]) + 1, int(number[4]))) + + if len(number) > 2 and number[1] == "to": + return list(range(int(number[0]), int(number[2]) + 1)) + + r["number"] = number + return r + + +class HindcastCompute: + def __init__(self, base_times, available_steps, request): + self.base_times = base_times + self.available_steps = available_steps + self.request = request + + def compute_hindcast(self, date): + for step in self.available_steps: + start_date = date - datetime.timedelta(hours=step) + hours = start_date.hour + if hours in self.base_times: + r = deepcopy(self.request) + r["date"] = start_date + r["time"] = f"{start_date.hour:02d}00" + r["step"] = step + return r + raise ValueError( + f"Cannot find data for {self.request} for {date} (base_times={self.base_times}, available_steps={self.available_steps})" + ) + + +def use_reference_year(reference_year, request): + request = deepcopy(request) + hdate = request.pop("date") + date = datetime.datetime(reference_year, hdate.month, hdate.day) + request.update(date=date.strftime("%Y-%m-%d"), hdate=hdate.strftime("%Y-%m-%d")) + return request + + +def hindcasts(context, dates, **request): + request["param"] = to_list(request["param"]) + request["step"] = to_list(request["step"]) + request["step"] = [int(_) for _ in request["step"]] + + if request.get("stream") == "enfh" and "base_times" not in request: + request["base_times"] = [0] + + available_steps = request.pop("step") + available_steps = to_list(available_steps) + + base_times = request.pop("base_times") + + reference_year = request.pop("reference_year") + + context.trace("H️", f"hindcast {request} {base_times} {available_steps} {reference_year}") + + c = HindcastCompute(base_times, available_steps, request) + requests = [] + for d in dates: + req = c.compute_hindcast(d) + req = use_reference_year(reference_year, req) + + requests.append(req) + return mars(context, dates, *requests, date_key="hdate") + + +execute = hindcasts diff --git a/src/anemoi/datasets/create/functions/sources/mars.py b/src/anemoi/datasets/create/functions/sources/mars.py index e2a9fe24..73096a99 100644 --- a/src/anemoi/datasets/create/functions/sources/mars.py +++ b/src/anemoi/datasets/create/functions/sources/mars.py @@ -42,15 +42,21 @@ def normalise_time_delta(t): return t -def _expand_mars_request(request, date): +def _expand_mars_request(request, date, date_key="date"): requests = [] step = to_list(request.get("step", [0])) for s in step: r = deepcopy(request) - base = date - datetime.timedelta(hours=int(s)) + + if isinstance(s, str) and "-" in s: + assert s.count("-") == 1, s + # this takes care of the cases where the step is a period such as 0-24 or 12-24 + hours = int(str(s).split("-")[-1]) + + base = date - datetime.timedelta(hours=hours) r.update( { - "date": base.strftime("%Y%m%d"), + date_key: base.strftime("%Y%m%d"), "time": base.strftime("%H%M"), "step": s, } @@ -66,13 +72,13 @@ def _expand_mars_request(request, date): return requests -def factorise_requests(dates, *requests): +def factorise_requests(dates, *requests, date_key="date"): updates = [] for req in requests: # req = normalise_request(req) for d in dates: - updates += _expand_mars_request(req, date=d) + updates += _expand_mars_request(req, date=d, date_key=date_key) compressed = Availability(updates) for r in compressed.iterate(): @@ -96,11 +102,11 @@ def use_grib_paramid(r): return r -def mars(context, dates, *requests, **kwargs): +def mars(context, dates, *requests, date_key="date", **kwargs): if not requests: requests = [kwargs] - requests = factorise_requests(dates, *requests) + requests = factorise_requests(dates, *requests, date_key=date_key) ds = load_source("empty") for r in requests: r = {k: v for k, v in r.items() if v != ("-",)} diff --git a/src/anemoi/datasets/create/functions/sources/perturbations.py b/src/anemoi/datasets/create/functions/sources/recentre.py similarity index 83% rename from src/anemoi/datasets/create/functions/sources/perturbations.py rename to src/anemoi/datasets/create/functions/sources/recentre.py index 53428da8..563b38a6 100644 --- a/src/anemoi/datasets/create/functions/sources/perturbations.py +++ b/src/anemoi/datasets/create/functions/sources/recentre.py @@ -8,7 +8,7 @@ # from copy import deepcopy -from anemoi.datasets.compute.perturbations import perturbations as compute_perturbations +from anemoi.datasets.compute.recentre import recentre as _recentre from .mars import mars @@ -50,10 +50,10 @@ def load_if_needed(context, dates, dict_or_dataset): return dict_or_dataset -def perturbations(context, dates, members, center, remapping={}, patches={}): +def recentre(context, dates, members, centre, alpha=1.0, remapping={}, patches={}): members = load_if_needed(context, dates, members) - center = load_if_needed(context, dates, center) - return compute_perturbations(members, center) + centre = load_if_needed(context, dates, centre) + return _recentre(members=members, centre=centre, alpha=alpha) -execute = perturbations +execute = recentre diff --git a/src/anemoi/datasets/create/input.py b/src/anemoi/datasets/create/input.py index ab08798d..faef1b19 100644 --- a/src/anemoi/datasets/create/input.py +++ b/src/anemoi/datasets/create/input.py @@ -353,11 +353,6 @@ def data_request(self): """Returns a dictionary with the parameters needed to retrieve the data.""" return _data_request(self.datasource) - @property - def variables_with_nans(self): - print("❌❌HERE") - return - def get_cube(self): trace("🧊", f"getting cube from {self.__class__.__name__}") ds = self.datasource diff --git a/src/anemoi/datasets/create/loaders.py b/src/anemoi/datasets/create/loaders.py index 2e359c37..4ebc2943 100644 --- a/src/anemoi/datasets/create/loaders.py +++ b/src/anemoi/datasets/create/loaders.py @@ -615,6 +615,11 @@ def finalise(self): assert len(found) + len(missing) == len(self.dates), (len(found), len(missing), len(self.dates)) assert found.union(missing) == set(self.dates), (found, missing, set(self.dates)) + if len(ifound) < 2: + LOG.warn(f"Not enough data found in {self.path} to compute {self.__class__.__name__}. Skipped.") + self.tmp_storage.delete() + return + mask = sorted(list(ifound)) for k in ["minimum", "maximum", "sums", "squares", "count", "has_nans"]: agg[k] = agg[k][mask, ...] diff --git a/src/anemoi/datasets/create/statistics/__init__.py b/src/anemoi/datasets/create/statistics/__init__.py index cc3d8095..e8d9935f 100644 --- a/src/anemoi/datasets/create/statistics/__init__.py +++ b/src/anemoi/datasets/create/statistics/__init__.py @@ -84,25 +84,15 @@ def check_variance(x, variables_names, minimum, maximum, mean, count, sums, squa return print(x) print(variables_names) - print(count) for i, (name, y) in enumerate(zip(variables_names, x)): if y >= 0: continue print("---") - print( - name, - y, - maximum[i], - minimum[i], - mean[i], - count[i], - sums[i], - squares[i], - ) - - print(name, np.min(sums[i]), np.max(sums[i]), np.argmin(sums[i])) - print(name, np.min(squares[i]), np.max(squares[i]), np.argmin(squares[i])) - print(name, np.min(count[i]), np.max(count[i]), np.argmin(count[i])) + print(f"❗ Negative variance for {name=}, variance={y}") + print(f" max={maximum[i]} min={minimum[i]} mean={mean[i]} count={count[i]} sum={sums[i]} square={squares[i]}") + print(f" -> sums: min={np.min(sums[i])}, max={np.max(sums[i])}, argmin={np.argmin(sums[i])}") + print(f" -> squares: min={np.min(squares[i])}, max={np.max(squares[i])}, argmin={np.argmin(squares[i])}") + print(f" -> count: min={np.min(count[i])}, max={np.max(count[i])}, argmin={np.argmin(count[i])}") raise ValueError("Negative variance") diff --git a/src/anemoi/datasets/grids.py b/src/anemoi/datasets/grids.py index 1646eedb..e4df47ee 100644 --- a/src/anemoi/datasets/grids.py +++ b/src/anemoi/datasets/grids.py @@ -178,7 +178,7 @@ def cutout_mask( min_dlats = np.min(np.diff(glats)) min_dlons = np.min(np.diff(glons)) - # Use the center of the LAM grid as the reference point + # Use the centre of the LAM grid as the reference point centre = np.mean(lats), np.mean(lons) centre_xyz = np.array(latlon_to_xyz(*centre)) @@ -198,7 +198,7 @@ def cutout_mask( t = Triangle3D(lam_points[index[0]], lam_points[index[1]], lam_points[index[2]]) # distance = np.min(distance) # The point is inside the triangle if the intersection with the ray - # from the point to the center of the Earth is not None + # from the point to the centre of the Earth is not None # (the direction of the ray is not important) intersect = t.intersect(zero, global_point) diff --git a/tests/create/perturbations.yaml b/tests/create/recentre.yaml similarity index 79% rename from tests/create/perturbations.yaml rename to tests/create/recentre.yaml index 4bf70abb..1203d51f 100644 --- a/tests/create/perturbations.yaml +++ b/tests/create/recentre.yaml @@ -36,7 +36,7 @@ common: type: an number: [1, 2, 4] # number: [1, 2, 3, 4, 5, 6, 7, 8, 9] - center: ¢er + centre: ¢re stream: oper type: an @@ -53,27 +53,27 @@ data_sources: - accumulations: <<: *ensembles <<: *acc - center: + centre: join: - mars: - <<: *center + <<: *centre <<: *sfc - mars: - <<: *center + <<: *centre <<: *pl - accumulations: - <<: *center + <<: *centre <<: *acc input: join: - - perturbations: + - recentre: # the ensemble data which has one additional dimension members: ${data_sources.ensembles} - # the new center of the data - center: ${data_sources.center} + # the new centre of the data + centre: ${data_sources.centre} - forcings: - template: ${input.join.0.perturbations} + template: ${input.join.0.recentre} param: - cos_latitude - cos_longitude @@ -84,9 +84,3 @@ input: - sin_julian_day - sin_local_time - insolation - -output: - order_by: [valid_datetime, param_level, number] - statistics: param_level - remapping: - param_level: "{param}_{levelist}" diff --git a/tests/create/test_create.py b/tests/create/test_create.py index 7edb68f8..0c33908d 100755 --- a/tests/create/test_create.py +++ b/tests/create/test_create.py @@ -29,7 +29,7 @@ HERE = os.path.dirname(__file__) # find_yamls NAMES = sorted([os.path.basename(path).split(".")[0] for path in glob.glob(os.path.join(HERE, "*.yaml"))]) -SKIP = ["perturbations"] +SKIP = ["recentre"] NAMES = [name for name in NAMES if name not in SKIP] assert NAMES, "No yaml files found in " + HERE