Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated instance_id utility function #290

Open
wants to merge 23 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 83 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import pytest
import xarray as xr

from xmip.utils import cmip6_dataset_id, google_cmip_col, model_id_match
from xmip.utils import (
cmip6_dataset_id,
google_cmip_col,
instance_id_from_dataset,
model_id_match,
)


def test_google_cmip_col():
Expand Down Expand Up @@ -68,3 +73,80 @@ def test_cmip6_dataset_id():
cmip6_dataset_id(ds, id_attrs=["grid_label", "activity_id", "wrong_attrs"])
== "gl.ai.none"
)


class Test_instance_id_from_dataset:
def test_default_cmip6(self):
ds = xr.Dataset(
attrs={
"mip_era": "a",
"grid_label": "b",
"version": "c",
"activity_id": "d",
"institution_id": "e",
"source_id": "f",
"experiment_id": "g",
"member_id": "h",
"table_id": "i",
"variable_id": "j",
}
)
assert instance_id_from_dataset(ds) == "a.d.e.f.g.h.i.j.b.c"

def test_custom_sep(self):
ds = xr.Dataset(
attrs={
"a": "a",
"b": "b",
}
)
iid = instance_id_from_dataset(ds, id_schema="a.b", sep="-")
assert iid == "a-b"

def test_custom_schema(self):
ds = xr.Dataset(attrs={"some": "thing", "totally": "unrelated"})
assert (
instance_id_from_dataset(ds, id_schema="some.totally") == "thing.unrelated"
)

@pytest.mark.parametrize("missing_value", ["none", "some"])
def test_missing_attrs_print_missing(self, missing_value):
ds = xr.Dataset(
attrs={
"a": "a",
"b": "b",
}
)
iid = instance_id_from_dataset(
ds, id_schema="a.b.c", print_missing=True, missing_value=missing_value
)
assert iid == f"a.b.{missing_value}"

def test_missing_attrs_omit(self):
ds = xr.Dataset(
attrs={
"a": "a",
"b": "b",
}
)
iid_omit = instance_id_from_dataset(ds, id_schema="a.b.c", print_missing=False)
assert iid_omit == "a.b"

def test_missing_attrs_warning(self):
ds = xr.Dataset(
attrs={
"mip_era": "a",
"activity_id": "d",
"institution_id": "e",
"source_id": "f",
"experiment_id": "g",
"member_id": "h",
"table_id": "i",
"variable_id": "j",
}
)
with pytest.warns(
UserWarning,
match=r"Could not find dataset attributes for facets: \['grid_label', 'version'\]",
):
instance_id_from_dataset(ds)
34 changes: 34 additions & 0 deletions xmip/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
except ImportError:
intake = None

import warnings

import xarray as xr


def google_cmip_col(catalog="main"):
"""A tiny utility function to point to the 'official' pangeo cmip6 cloud files."""
Expand Down Expand Up @@ -56,9 +60,38 @@ def model_id_match(match_list, id_tuple):


def _key_from_attrs(ds, attrs, sep="."):
raise
return sep.join([ds.attrs[i] if i in ds.attrs.keys() else "none" for i in attrs])


cmip_instance_id_schema = "mip_era.activity_id.institution_id.source_id.experiment_id.member_id.table_id.variable_id.grid_label.version"


def instance_id_from_dataset(
ds: xr.Dataset,
id_schema: str = None,
print_missing=True,
missing_value="none",
sep=".",
) -> str:
"""
Formats a CMIP6 compatible instance id from `ds` attributes according to `id_schema` (defaults to official CMIP naming schema).
If `print_missing` is true missing facets as replaced with `missing_value`, otherwise missing facets are omitted.
"""
if id_schema is None:
id_schema = cmip_instance_id_schema
facets = id_schema.split(".")
facet_dict = {k: ds.attrs.get(k, missing_value) for k in facets}
if not print_missing:
facets = [f for f in facets if facet_dict[f] != missing_value]
missing_value_dict = {k: v for k, v in facet_dict.items() if v == missing_value}
if len(missing_value_dict.keys()) > 0:
warnings.warn(
f"Could not find dataset attributes for facets: {list(missing_value_dict.keys())}"
)
return sep.join([facet_dict[f] for f in facets])


def cmip6_dataset_id(
ds,
sep=".",
Expand Down Expand Up @@ -88,6 +121,7 @@ def cmip6_dataset_id(
str
Concatenated
"""
raise
return _key_from_attrs(ds, id_attrs, sep=sep)


Expand Down
Loading