Merge branch 'release/0.15.0'

ecmwf · May 15, 2023 · 2733167 · 2733167
2 parents 17af3a0 + e9196ac
commit 2733167
Show file tree

Hide file tree

Showing 14 changed files with 301 additions and 161 deletions.
diff --git a/climetlab/datasets/__init__.py b/climetlab/datasets/__init__.py
@@ -273,7 +273,7 @@ def load_dataset(name: str, *args, **kwargs) -> Dataset:
     klass = get_dataset.lookup(name)
 
     if name not in TERMS_OF_USE_SHOWN:
-        if klass.terms_of_use is not None:
+        if hasattr(klass, "terms_of_use") and klass.terms_of_use is not None:
             print(klass.terms_of_use)
         TERMS_OF_USE_SHOWN.add(name)
 

diff --git a/climetlab/datasets/sample-bufr-data.yaml b/climetlab/datasets/sample-bufr-data.yaml
@@ -2,7 +2,7 @@
 dataset:
   source: url
   args:
-    url: http://download.ecmwf.int/test-data/metview/gallery/temp.bufr
+    url: http://get.ecmwf.int/test-data/metview/gallery/temp.bufr
 
   metadata:
     documentation: Sample BUFR file containing TEMP messages
diff --git a/climetlab/indexing/cube.py b/climetlab/indexing/cube.py
@@ -78,18 +78,23 @@ def __init__(
         if math.prod(self.user_shape) != len(self.source):
             details = []
             for k, v in self.user_coords.items():
-                details += f"{k=}, {len(v)}, {v}"
+                details.append(f"{k=}, {len(v)}, {v}")
+            assert not isinstance(
+                self.source, str
+            ), f"Not expecting a str here ({self.source})"
             for i, f in enumerate(self.source):
                 details.append(f"{i}={f}")
                 if i > 30:
                     details.append("...")
                     break
-            raise ValueError(
+
+            msg = (
                 f"Shape {self.user_shape} [{math.prod(self.user_shape):,}]"
-                f" does not match number of fields {len(self.source):,}. "
-                f"Difference: {len(self.source)-math.prod(self.user_shape):,}"
-                "\n".join(details)
+                + f" does not match number of available fields {len(self.source):,}. "
+                + f"Difference: {len(self.source)-math.prod(self.user_shape):,}"
+                + "\n".join(details)
             )
+            raise ValueError(msg)
 
     @property
     def field_shape(self):

diff --git a/climetlab/loaders/__init__.py b/climetlab/loaders/__init__.py
@@ -9,104 +9,23 @@
 
 
 import datetime
-import itertools
 import json
 import logging
 import os
-import re
 import time
 import warnings
 
 import numpy as np
 
 import climetlab as cml
-from climetlab.core.order import build_remapping, normalize_order_by
-from climetlab.utils import load_json_or_yaml, progress_bar
+from climetlab.core.order import build_remapping  # noqa:F401
+from climetlab.utils import progress_bar
+from climetlab.utils.config import LoadersConfig
 from climetlab.utils.humanize import bytes, seconds
 
 LOG = logging.getLogger(__name__)
 
 
-class Config:
-    def __init__(self, config, **kwargs):
-        if isinstance(config, str):
-            config = load_json_or_yaml(config)
-        self.config = config
-        self.input = config["input"]
-        self.output = config["output"]
-        self.constants = config.get("constants")
-        self.order = normalize_order_by(self.output["order"])
-        self.remapping = build_remapping(self.output.get("remapping"))
-
-        self.loop = self.config.get("loop")
-        self.chunking = self.output.get("chunking", {})
-        self.dtype = self.output.get("dtype", "float32")
-
-        self.reading_chunks = config.get("reading_chunks")
-        self.flatten_values = self.output.get("flatten_values", False)
-        self.grid_points_first = self.output.get("grid_points_first", False)
-        if self.grid_points_first and not self.flatten_values:
-            raise NotImplementedError(
-                "For now, grid_points_first is only valid if flatten_values"
-            )
-
-        # The axis along which we append new data
-        # TODO: assume grid points can be 2d as well
-        self.append_axis = 1 if self.grid_points_first else 0
-
-        self.collect_statistics = False
-        if "statistics" in self.output:
-            statistics_axis_name = self.output["statistics"]
-            statistics_axis = -1
-            for i, k in enumerate(self.order):
-                if k == statistics_axis_name:
-                    statistics_axis = i
-
-            assert statistics_axis >= 0, (statistics_axis_name, self.order)
-
-            self.statistics_names = self.order[statistics_axis_name]
-
-            # TODO: consider 2D grid points
-            self.statistics_axis = (
-                statistics_axis + 1 if self.grid_points_first else statistics_axis
-            )
-            self.collect_statistics = True
-
-    def substitute(self, vars):
-        def substitute(x, vars):
-            if isinstance(x, (tuple, list)):
-                return [substitute(y, vars) for y in x]
-
-            if isinstance(x, dict):
-                return {k: substitute(v, vars) for k, v in x.items()}
-
-            if isinstance(x, str):
-                if not re.match(r"\$(\w+)", x):
-                    return x
-                lst = []
-                for i, bit in enumerate(re.split(r"\$(\w+)", x)):
-                    if i % 2:
-                        if bit.upper() == bit:
-                            # substitute by the var env if $UPPERCASE
-                            lst.append(os.environ[bit])
-                        else:
-                            # substitute by the value in the 'vars' dict
-                            lst.append(vars[bit])
-                    else:
-                        lst.append(bit)
-
-                lst = [e for e in lst if e != ""]
-
-                if len(lst) == 1:
-                    return lst[0]
-
-                return "".join(str(_) for _ in lst)
-
-            return x
-
-        return Config(substitute(self.config, vars))
-
-
 def _tidy(o):
     if isinstance(o, dict):
         return {k: _tidy(v) for k, v in o.items()}
@@ -202,8 +121,8 @@ def create_array(self, config, cube, append):
             self.statistics = []
 
         shape = cube.extended_user_shape
-        chunks = cube.chunking(config.chunking)
-        dtype = config.dtype
+        chunks = cube.chunking(config.output.chunking)
+        dtype = config.output.dtype
 
         print(
             f"Creating ZARR file '{self.path}', with {shape=}, "
@@ -216,7 +135,7 @@ def create_array(self, config, cube, append):
             original_shape = self.z.shape
             assert len(shape) == len(original_shape)
 
-            axis = config.append_axis
+            axis = config.output.append_axis
 
             new_shape = []
             for i, (o, s) in enumerate(zip(original_shape, shape)):
@@ -306,7 +225,7 @@ def add_metadata(self, config):
             statistics_by_index["maximum"] = list(maximum)
             statistics_by_index["minimum"] = list(minimum)
 
-        metadata["config"] = _tidy(config.config)
+        metadata["config"] = _tidy(config)
 
         self.z.attrs["climetlab"] = metadata
 
@@ -378,19 +297,19 @@ def _load(loader, config, append, **kwargs):
     print("Loading input", config.input)
 
     data = cml.load_source("loader", config.input)
-    if config.constants:
-        data = data + cml.load_source("constants", data, config.constants)
+    if "constant" in config.input:
+        data = data + cml.load_source("constants", data, config.input.constants)
 
     assert len(data)
     print(f"Done in {seconds(time.time()-start)}, length: {len(data):,}.")
 
     start = time.time()
     print("Sort dataset")
     cube = data.cube(
-        config.order,
-        remapping=config.remapping,
-        flatten_values=config.flatten_values,
-        grid_points_first=config.grid_points_first,
+        config.output.order_by,
+        remapping=config.output.remapping,
+        flatten_values=config.output.flatten_values,
+        grid_points_first=config.output.grid_points_first,
     )
     cube = cube.squeeze()
     print(f"Done in {seconds(time.time()-start)}.")
@@ -431,45 +350,8 @@ def _load(loader, config, append, **kwargs):
     )
 
 
-def expand(values):
-    if isinstance(values, list):
-        return values
-
-    if isinstance(values, dict):
-        if "start" in values and "stop" in values:
-            start = values["start"]
-            stop = values["stop"]
-            step = values.get("step", 1)
-            return range(start, stop + 1, step)
-
-        if "monthly" in values:
-            start = values["monthly"]["start"]
-            stop = values["monthly"]["stop"]
-            date = start
-            last = None
-            result = []
-            lst = []
-            while True:
-                year, month = date.year, date.month
-                if (year, month) != last:
-                    if lst:
-                        result.append([d.isoformat() for d in lst])
-                    lst = []
-
-                lst.append(date)
-                last = (year, month)
-                date = date + datetime.timedelta(days=1)
-                if date > stop:
-                    break
-            if lst:
-                result.append([d.isoformat() for d in lst])
-            return result
-
-    raise ValueError(f"Cannot expand loop from {values}")
-
-
 def load(loader, config, append=False, metadata_only=False, **kwargs):
-    config = Config(config)
+    config = LoadersConfig(config)
 
     if metadata_only:
         loader.add_metadata(config)
@@ -481,15 +363,7 @@ def load(loader, config, append=False, metadata_only=False, **kwargs):
         loader.add_metadata(config)
         return
 
-    def loops():
-        yield from (
-            dict(zip(config.loop.keys(), items))
-            for items in itertools.product(
-                expand(*list(config.loop.values())),
-            )
-        )
-
-    for vars in loops():
+    for vars in config._iter_loops():
         print(vars)
         _load(loader, config.substitute(vars), append=append, **kwargs)
         loader.add_metadata(config)

diff --git a/climetlab/readers/grib/output.py b/climetlab/readers/grib/output.py
@@ -72,7 +72,6 @@ def write(
         self,
         values,
         check_nans=False,
-        missing_value=1e36,
         metadata={},
         template=None,
         **kwarg,
@@ -106,8 +105,10 @@ def write(
             import numpy as np
 
             if np.isnan(values).any():
-                missing_value = np.finfo(values.dtype).max
+                # missing_value = np.finfo(values.dtype).max
+                missing_value = 9999
                 values = np.nan_to_num(values, nan=missing_value)
+                metadata["missingValue"] = missing_value
                 metadata["bitmapPresent"] = 1
 
         LOG.debug("GribOutput.metadata %s, other %s", metadata, other)

diff --git a/climetlab/scripts/main.py b/climetlab/scripts/main.py
@@ -27,6 +27,7 @@
 from .grib import GribCmd
 from .grib_info import GribInfoCmd
 from .settings import SettingsCmd
+from .test_data import TestDataCmd
 
 LOG = logging.getLogger(__name__)
 
@@ -68,6 +69,7 @@ class CliMetLabApp(
     GribInfoCmd,
     AvailabilityCmd,
     LoadersCmd,
+    TestDataCmd,
     *get_plugins(),
 ):
     # intro = 'Welcome to climetlab. Type ? to list commands.\n'
@@ -126,7 +128,7 @@ def replace_dashes(txt):
             print(colored(str(e), "red"))
         except Exception:
             traceback.print_exc()
-        return False
+        return 33
 
 
 def main():
@@ -160,7 +162,9 @@ def main():
     app = CliMetLabApp()
 
     if cmdline:
-        return app.onecmd(" ".join(cmdline))
+        res = app.onecmd(" ".join(cmdline))
+        if res:
+            sys.exit(res)
     else:
         app.cmdloop()
 

diff --git a/climetlab/scripts/test_data.py b/climetlab/scripts/test_data.py
@@ -0,0 +1,37 @@
+# (C) Copyright 2021 ECMWF.
+#
+# This software is licensed under the terms of the Apache Licence Version 2.0
+# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
+# In applying this licence, ECMWF does not waive the privileges and immunities
+# granted to it by virtue of its status as an intergovernmental organisation
+# nor does it submit to any jurisdiction.
+#
+
+from .tools import parse_args
+
+
+class TestDataCmd:
+    @parse_args(
+        directory=(
+            None,
+            dict(
+                metavar="DIRECTORY",
+                help="Shell to use for autocompletion. Must be zsh or bash.",
+                nargs="?",
+            ),
+        ),
+    )
+    def do_test_data(self, args):
+        """
+        Create a directory with data used to test climetlab.
+
+        """
+        from climetlab.testing import build_testdata
+
+        directory = args.directory
+        if not directory:
+            directory = "./test-data"
+
+        print(f"Adding testdata in {directory}")
+        build_testdata(directory)
+        print(f"Added testdata in {directory}")
diff --git a/climetlab/sources/constants.py b/climetlab/sources/constants.py
@@ -188,7 +188,6 @@ def __init__(self, source_or_dataset, request={}, repeat=1, **kwargs):
         request.setdefault("time", [None])
 
         self.request = self._request(request)
-        print(self.request)
 
         if "date" in self.request:
             self.dates = [

diff --git a/climetlab/sources/loader.py b/climetlab/sources/loader.py
@@ -27,6 +27,7 @@ def execute(self, v, data, last, inherit):
                 one = last
             print(f"Using data from: {name}, {one}")
             source = self.load(name, **one)
+
             assert len(source), f"No data for {(name, one)}"
             data.append(source)