Skip to content

Commit

Permalink
Merge branch 'release/0.15.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
floriankrb committed May 15, 2023
2 parents 17af3a0 + e9196ac commit 2733167
Show file tree
Hide file tree
Showing 14 changed files with 301 additions and 161 deletions.
2 changes: 1 addition & 1 deletion climetlab/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def load_dataset(name: str, *args, **kwargs) -> Dataset:
klass = get_dataset.lookup(name)

if name not in TERMS_OF_USE_SHOWN:
if klass.terms_of_use is not None:
if hasattr(klass, "terms_of_use") and klass.terms_of_use is not None:
print(klass.terms_of_use)
TERMS_OF_USE_SHOWN.add(name)

Expand Down
2 changes: 1 addition & 1 deletion climetlab/datasets/sample-bufr-data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
dataset:
source: url
args:
url: http://download.ecmwf.int/test-data/metview/gallery/temp.bufr
url: http://get.ecmwf.int/test-data/metview/gallery/temp.bufr

metadata:
documentation: Sample BUFR file containing TEMP messages
15 changes: 10 additions & 5 deletions climetlab/indexing/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,18 +78,23 @@ def __init__(
if math.prod(self.user_shape) != len(self.source):
details = []
for k, v in self.user_coords.items():
details += f"{k=}, {len(v)}, {v}"
details.append(f"{k=}, {len(v)}, {v}")
assert not isinstance(
self.source, str
), f"Not expecting a str here ({self.source})"
for i, f in enumerate(self.source):
details.append(f"{i}={f}")
if i > 30:
details.append("...")
break
raise ValueError(

msg = (
f"Shape {self.user_shape} [{math.prod(self.user_shape):,}]"
f" does not match number of fields {len(self.source):,}. "
f"Difference: {len(self.source)-math.prod(self.user_shape):,}"
"\n".join(details)
+ f" does not match number of available fields {len(self.source):,}. "
+ f"Difference: {len(self.source)-math.prod(self.user_shape):,}"
+ "\n".join(details)
)
raise ValueError(msg)

@property
def field_shape(self):
Expand Down
156 changes: 15 additions & 141 deletions climetlab/loaders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,104 +9,23 @@


import datetime
import itertools
import json
import logging
import os
import re
import time
import warnings

import numpy as np

import climetlab as cml
from climetlab.core.order import build_remapping, normalize_order_by
from climetlab.utils import load_json_or_yaml, progress_bar
from climetlab.core.order import build_remapping # noqa:F401
from climetlab.utils import progress_bar
from climetlab.utils.config import LoadersConfig
from climetlab.utils.humanize import bytes, seconds

LOG = logging.getLogger(__name__)


class Config:
def __init__(self, config, **kwargs):
if isinstance(config, str):
config = load_json_or_yaml(config)
self.config = config
self.input = config["input"]
self.output = config["output"]
self.constants = config.get("constants")
self.order = normalize_order_by(self.output["order"])
self.remapping = build_remapping(self.output.get("remapping"))

self.loop = self.config.get("loop")
self.chunking = self.output.get("chunking", {})
self.dtype = self.output.get("dtype", "float32")

self.reading_chunks = config.get("reading_chunks")
self.flatten_values = self.output.get("flatten_values", False)
self.grid_points_first = self.output.get("grid_points_first", False)
if self.grid_points_first and not self.flatten_values:
raise NotImplementedError(
"For now, grid_points_first is only valid if flatten_values"
)

# The axis along which we append new data
# TODO: assume grid points can be 2d as well
self.append_axis = 1 if self.grid_points_first else 0

self.collect_statistics = False
if "statistics" in self.output:
statistics_axis_name = self.output["statistics"]
statistics_axis = -1
for i, k in enumerate(self.order):
if k == statistics_axis_name:
statistics_axis = i

assert statistics_axis >= 0, (statistics_axis_name, self.order)

self.statistics_names = self.order[statistics_axis_name]

# TODO: consider 2D grid points
self.statistics_axis = (
statistics_axis + 1 if self.grid_points_first else statistics_axis
)
self.collect_statistics = True

def substitute(self, vars):
def substitute(x, vars):
if isinstance(x, (tuple, list)):
return [substitute(y, vars) for y in x]

if isinstance(x, dict):
return {k: substitute(v, vars) for k, v in x.items()}

if isinstance(x, str):
if not re.match(r"\$(\w+)", x):
return x
lst = []
for i, bit in enumerate(re.split(r"\$(\w+)", x)):
if i % 2:
if bit.upper() == bit:
# substitute by the var env if $UPPERCASE
lst.append(os.environ[bit])
else:
# substitute by the value in the 'vars' dict
lst.append(vars[bit])
else:
lst.append(bit)

lst = [e for e in lst if e != ""]

if len(lst) == 1:
return lst[0]

return "".join(str(_) for _ in lst)

return x

return Config(substitute(self.config, vars))


def _tidy(o):
if isinstance(o, dict):
return {k: _tidy(v) for k, v in o.items()}
Expand Down Expand Up @@ -202,8 +121,8 @@ def create_array(self, config, cube, append):
self.statistics = []

shape = cube.extended_user_shape
chunks = cube.chunking(config.chunking)
dtype = config.dtype
chunks = cube.chunking(config.output.chunking)
dtype = config.output.dtype

print(
f"Creating ZARR file '{self.path}', with {shape=}, "
Expand All @@ -216,7 +135,7 @@ def create_array(self, config, cube, append):
original_shape = self.z.shape
assert len(shape) == len(original_shape)

axis = config.append_axis
axis = config.output.append_axis

new_shape = []
for i, (o, s) in enumerate(zip(original_shape, shape)):
Expand Down Expand Up @@ -306,7 +225,7 @@ def add_metadata(self, config):
statistics_by_index["maximum"] = list(maximum)
statistics_by_index["minimum"] = list(minimum)

metadata["config"] = _tidy(config.config)
metadata["config"] = _tidy(config)

self.z.attrs["climetlab"] = metadata

Expand Down Expand Up @@ -378,19 +297,19 @@ def _load(loader, config, append, **kwargs):
print("Loading input", config.input)

data = cml.load_source("loader", config.input)
if config.constants:
data = data + cml.load_source("constants", data, config.constants)
if "constant" in config.input:
data = data + cml.load_source("constants", data, config.input.constants)

assert len(data)
print(f"Done in {seconds(time.time()-start)}, length: {len(data):,}.")

start = time.time()
print("Sort dataset")
cube = data.cube(
config.order,
remapping=config.remapping,
flatten_values=config.flatten_values,
grid_points_first=config.grid_points_first,
config.output.order_by,
remapping=config.output.remapping,
flatten_values=config.output.flatten_values,
grid_points_first=config.output.grid_points_first,
)
cube = cube.squeeze()
print(f"Done in {seconds(time.time()-start)}.")
Expand Down Expand Up @@ -431,45 +350,8 @@ def _load(loader, config, append, **kwargs):
)


def expand(values):
if isinstance(values, list):
return values

if isinstance(values, dict):
if "start" in values and "stop" in values:
start = values["start"]
stop = values["stop"]
step = values.get("step", 1)
return range(start, stop + 1, step)

if "monthly" in values:
start = values["monthly"]["start"]
stop = values["monthly"]["stop"]
date = start
last = None
result = []
lst = []
while True:
year, month = date.year, date.month
if (year, month) != last:
if lst:
result.append([d.isoformat() for d in lst])
lst = []

lst.append(date)
last = (year, month)
date = date + datetime.timedelta(days=1)
if date > stop:
break
if lst:
result.append([d.isoformat() for d in lst])
return result

raise ValueError(f"Cannot expand loop from {values}")


def load(loader, config, append=False, metadata_only=False, **kwargs):
config = Config(config)
config = LoadersConfig(config)

if metadata_only:
loader.add_metadata(config)
Expand All @@ -481,15 +363,7 @@ def load(loader, config, append=False, metadata_only=False, **kwargs):
loader.add_metadata(config)
return

def loops():
yield from (
dict(zip(config.loop.keys(), items))
for items in itertools.product(
expand(*list(config.loop.values())),
)
)

for vars in loops():
for vars in config._iter_loops():
print(vars)
_load(loader, config.substitute(vars), append=append, **kwargs)
loader.add_metadata(config)
Expand Down
5 changes: 3 additions & 2 deletions climetlab/readers/grib/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ def write(
self,
values,
check_nans=False,
missing_value=1e36,
metadata={},
template=None,
**kwarg,
Expand Down Expand Up @@ -106,8 +105,10 @@ def write(
import numpy as np

if np.isnan(values).any():
missing_value = np.finfo(values.dtype).max
# missing_value = np.finfo(values.dtype).max
missing_value = 9999
values = np.nan_to_num(values, nan=missing_value)
metadata["missingValue"] = missing_value
metadata["bitmapPresent"] = 1

LOG.debug("GribOutput.metadata %s, other %s", metadata, other)
Expand Down
8 changes: 6 additions & 2 deletions climetlab/scripts/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from .grib import GribCmd
from .grib_info import GribInfoCmd
from .settings import SettingsCmd
from .test_data import TestDataCmd

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -68,6 +69,7 @@ class CliMetLabApp(
GribInfoCmd,
AvailabilityCmd,
LoadersCmd,
TestDataCmd,
*get_plugins(),
):
# intro = 'Welcome to climetlab. Type ? to list commands.\n'
Expand Down Expand Up @@ -126,7 +128,7 @@ def replace_dashes(txt):
print(colored(str(e), "red"))
except Exception:
traceback.print_exc()
return False
return 33


def main():
Expand Down Expand Up @@ -160,7 +162,9 @@ def main():
app = CliMetLabApp()

if cmdline:
return app.onecmd(" ".join(cmdline))
res = app.onecmd(" ".join(cmdline))
if res:
sys.exit(res)
else:
app.cmdloop()

Expand Down
37 changes: 37 additions & 0 deletions climetlab/scripts/test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# (C) Copyright 2021 ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.
#

from .tools import parse_args


class TestDataCmd:
@parse_args(
directory=(
None,
dict(
metavar="DIRECTORY",
help="Shell to use for autocompletion. Must be zsh or bash.",
nargs="?",
),
),
)
def do_test_data(self, args):
"""
Create a directory with data used to test climetlab.
"""
from climetlab.testing import build_testdata

directory = args.directory
if not directory:
directory = "./test-data"

print(f"Adding testdata in {directory}")
build_testdata(directory)
print(f"Added testdata in {directory}")
1 change: 0 additions & 1 deletion climetlab/sources/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,6 @@ def __init__(self, source_or_dataset, request={}, repeat=1, **kwargs):
request.setdefault("time", [None])

self.request = self._request(request)
print(self.request)

if "date" in self.request:
self.dates = [
Expand Down
1 change: 1 addition & 0 deletions climetlab/sources/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def execute(self, v, data, last, inherit):
one = last
print(f"Using data from: {name}, {one}")
source = self.load(name, **one)

assert len(source), f"No data for {(name, one)}"
data.append(source)

Expand Down
Loading

0 comments on commit 2733167

Please sign in to comment.