From dd032541da805ef4b52e1c2c7ab38edef8989861 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Fri, 25 Oct 2024 13:26:02 -0400 Subject: [PATCH 1/5] better attributes, manage weights on time, optional delta_types --- src/xhydro/cc.py | 115 ++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 93 insertions(+), 22 deletions(-) diff --git a/src/xhydro/cc.py b/src/xhydro/cc.py index 9750737a..eab31223 100644 --- a/src/xhydro/cc.py +++ b/src/xhydro/cc.py @@ -1,9 +1,10 @@ """Module to compute climate change statistics using xscen functions.""" -from typing import Optional, Union +from datetime import datetime import numpy as np import xarray as xr +import xscen as xs # Special imports from xscen from xscen import climatological_op, compute_deltas, ensemble_stats, produce_horizon @@ -20,8 +21,8 @@ def sampled_indicators( ds: xr.Dataset, deltas: xr.Dataset, - delta_type: str, *, + delta_type: str | dict | None = None, ds_weights: xr.DataArray | None = None, delta_weights: xr.DataArray | None = None, n: int = 50000, @@ -37,8 +38,10 @@ def sampled_indicators( The percentiles should be stored in either a dimension called "percentile" [0, 100] or "quantile" [0, 1]. deltas : xr.Dataset Dataset containing the future deltas to apply to the historical indicators. - delta_type : str - Type of delta provided. Must be one of ['absolute', 'percentage']. + delta_type : str or dict, optional + Type of delta provided. Recognized values are: ['absolute', 'abs.', '+'], ['percentage', 'pct.', '%']. + If a dict is provided, it should map the variable names to their respective delta type. + If None, the variables should have a 'delta_kind' attribute. ds_weights : xr.DataArray, optional Weights to use when sampling the historical indicators, for dimensions other than 'percentile'/'quantile'. Dimensions not present in this Dataset, or if None, will be sampled uniformly unless they are shared with 'deltas'. @@ -65,6 +68,9 @@ def sampled_indicators( Notes ----- + Weights along the 'time' or 'horizon' dimensions are supported, but behave differently than other dimensions. They will not + be stacked alongside other dimensions in the new 'sample' dimension. Rather, a separate sampling will be done for each time/horizon, + The future percentiles are computed as follows: 1. Sample 'n' values from the historical distribution, weighting the percentiles by their associated coverage. 2. Sample 'n' values from the delta distribution, using the provided weights. @@ -119,27 +125,63 @@ def sampled_indicators( _, pdim, mult = _perc_or_quantile(ds) ds_dist = ( _weighted_sampling(ds, percentile_weights, n=n, seed=seed) - .drop_vars(["sample", *percentile_weights.dims]) + .drop_vars("sample", errors="ignore") .assign_coords({"sample": np.arange(n)}) ) deltas_dist = ( _weighted_sampling(deltas, delta_weights, n=n, seed=seed) - .drop_vars(["sample", *delta_weights.dims]) + .drop_vars("sample", errors="ignore") .assign_coords({"sample": np.arange(n)}) ) - # Element-wise multiplication of the ref_dist and ens_dist - if delta_type == "percentage": - fut_dist = ds_dist * (1 + deltas_dist / 100) - elif delta_type == "absolute": - fut_dist = ds_dist + deltas_dist - else: - raise ValueError( - f"Unknown operation '{delta_type}', expected one of ['absolute', 'percentage']." - ) + # Prepare the operation to apply on the variables + if delta_type is None: + delta_type = {var: deltas[var].attrs["delta_kind"] for var in deltas.data_vars} + elif isinstance(delta_type, str): + delta_type = {var: delta_type for var in deltas.data_vars} + + # Apply the deltas element-wise to the historical distribution + fut_dist = xr.Dataset() + for var in deltas.data_vars: + with xr.set_options(keep_attrs=True): + if delta_type[var] in ["absolute", "abs.", "+"]: + fut_dist[var] = ds_dist[var] + deltas_dist[var] + elif delta_type[var] in ["percentage", "pct.", "%"]: + fut_dist[var] = ds_dist[var] * (1 + deltas_dist[var] / 100) + else: + raise ValueError( + f"Unknown operation '{delta_type[var]}', expected one of ['absolute', 'abs.', '+'], ['percentage', 'pct.', '%']." + ) + fut_dist[var].attrs = ds_dist[var].attrs + fut_dist[var].attrs["sampling_kind"] = delta_type[var] + fut_dist[var].attrs["sampling_n"] = n + fut_dist[var].attrs["sampling_seed"] = seed + fut_dist[var].attrs[ + "description" + ] = f"Future distribution of {fut_dist[var].attrs.get('long_name', var)} constructed from a perturbation approach and random sampling." + fut_dist[var].attrs[ + "long_name" + ] = f"Future distribution of {fut_dist[var].attrs.get('long_name', var)}." + old_history = ds_dist[var].attrs.get("history", "") + deltas_dist[ + var + ].attrs.get("history", "") + fut_dist[var].attrs["history"] = ( + f"[{datetime.now().isoformat()}] Perturbation approach and random sampling applied to historical distribution and deltas " + f"using 'xhydro.sampled_indicators' and {n} samples.\n {old_history}" + ) + # Build the attributes based on common attributes between the historical and delta distributions + fut_dist = xs.clean_up(fut_dist, common_attrs_only=[ds, deltas]) # Compute future percentiles - fut_pct = fut_dist.quantile(ds.percentile / mult, dim="sample") + with xr.set_options(keep_attrs=True): + fut_pct = fut_dist.quantile(ds.percentile / mult, dim="sample") + for var in fut_pct.data_vars: + fut_pct[var].attrs["description"] = ( + fut_pct[var].attrs["description"].replace("distribution", f"{pdim}s") + ) + fut_pct[var].attrs["long_name"] = ( + fut_pct[var].attrs["long_name"].replace("distribution", f"{pdim}s") + ) if pdim == "percentile": fut_pct = fut_pct.rename({"quantile": "percentile"}) @@ -185,6 +227,7 @@ def _weighted_sampling( ds: xr.Dataset, weights: xr.DataArray, n: int = 50000, seed: int | None = None ) -> xr.Dataset: """Sample from a distribution with weights. + In the case of weights on a 'time' or 'horizon' dimension, the sampling is done separately for each time/horizon, but with the same seed. Parameters ---------- @@ -205,23 +248,51 @@ def _weighted_sampling( # Prepare the weights if weights.isnull().any(): raise ValueError("The weights contain NaNs.") - weights = weights / weights.sum() # Must equal 1 + + # Weights on the time dimension need to be handled differently + time_dim = [dim for dim in weights.dims if dim in ["time", "horizon"]] + if len(time_dim) > 1: + raise NotImplementedError( + "Weights on multiple time dimensions are not supported." + ) + time_dim = time_dim[0] if time_dim else "" + other_dims = list(set(weights.dims).difference([time_dim])) + + # Must equal 1 + weights = weights / weights.sum(other_dims) # For performance reasons, remove the chunking along impacted dimensions ds = ds.chunk({dim: -1 for dim in weights.dims}) weights = weights.chunk({dim: -1 for dim in weights.dims}) # Stack the dimensions containing weights - ds = ds.stack({"sample": sorted(list(weights.dims))}) - weights = weights.stack({"sample": sorted(list(weights.dims))}) + ds = ds.stack({"sample": sorted(other_dims)}) + weights = weights.stack({"sample": sorted(other_dims)}) weights = weights.reindex_like(ds) # Sample the dimensions with weights rng = np.random.default_rng(seed=seed) - idx = rng.choice(weights.size, size=n, p=weights) - # Create the distribution dataset - ds_dist = ds.isel({"sample": idx}).chunk({"sample": -1}) + if not time_dim: + idx = rng.choice(len(weights.sample), size=n, p=weights) + # Create the distribution dataset + ds_dist = ds.isel({"sample": idx}).chunk({"sample": -1}) + else: + # 2D weights are not supported by np.random.choice, so we need to loop over the time dimension + ds_tmp = [ + ds.sel({time_dim: time}).isel( + { + "sample": rng.choice( + len(weights.sample), size=n, p=weights.sel({time_dim: time}) + ) + } + ) + for time in ds[time_dim].values + ] + # Remove the coordinates from the sample dimension to allow concatenation + for i, ds_ in enumerate(ds_tmp): + ds_tmp[i] = ds_.reset_index("sample", drop=True) + ds_dist = xr.concat(ds_tmp, dim=time_dim).chunk({time_dim: -1, "sample": -1}) return ds_dist From fd87791c7ee2be117d5492322db84e4a336dfa10 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Mon, 28 Oct 2024 14:01:30 -0400 Subject: [PATCH 2/5] add tests --- src/xhydro/cc.py | 109 +++++++++++++++-------- tests/test_cc.py | 220 ++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 280 insertions(+), 49 deletions(-) diff --git a/src/xhydro/cc.py b/src/xhydro/cc.py index eab31223..711a168b 100644 --- a/src/xhydro/cc.py +++ b/src/xhydro/cc.py @@ -18,11 +18,11 @@ ] -def sampled_indicators( +def sampled_indicators( # noqa: C901 ds: xr.Dataset, deltas: xr.Dataset, *, - delta_type: str | dict | None = None, + delta_kind: str | dict | None = None, ds_weights: xr.DataArray | None = None, delta_weights: xr.DataArray | None = None, n: int = 50000, @@ -38,7 +38,7 @@ def sampled_indicators( The percentiles should be stored in either a dimension called "percentile" [0, 100] or "quantile" [0, 1]. deltas : xr.Dataset Dataset containing the future deltas to apply to the historical indicators. - delta_type : str or dict, optional + delta_kind : str or dict, optional Type of delta provided. Recognized values are: ['absolute', 'abs.', '+'], ['percentage', 'pct.', '%']. If a dict is provided, it should map the variable names to their respective delta type. If None, the variables should have a 'delta_kind' attribute. @@ -96,7 +96,9 @@ def sampled_indicators( ) } ) + deltas_weighted = True if delta_weights is None: + deltas_weighted = False dims = set(deltas.dims).difference(list(shared_dims) + exclude_dims) delta_weights = xr.DataArray( np.ones([deltas.sizes[dim] for dim in dims]), @@ -121,6 +123,43 @@ def sampled_indicators( f"Dimension(s) {problem_dims} is shared between 'ds' and 'deltas', but not between 'ds_weights' and 'delta_weights'." ) + # Prepare the operation to apply on the variables + if delta_kind is None: + try: + delta_kind = { + var: deltas[var].attrs["delta_kind"] for var in deltas.data_vars + } + except KeyError: + raise KeyError( + "The 'delta_kind' argument is None, but the variables within the 'deltas' Dataset are missing a 'delta_kind' attribute." + ) + elif isinstance(delta_kind, str): + delta_kind = {var: delta_kind for var in deltas.data_vars} + elif isinstance(delta_kind, dict): + if not all([var in delta_kind for var in deltas.data_vars]): + raise ValueError( + f"If 'delta_kind' is a dict, it should contain all the variables in 'deltas'. Missing variables: " + f"{list(set(deltas.data_vars) - set(delta_kind))}." + ) + + def _add_sampling_attrs(d, prefix, history, ds_for_attrs=None): + for var in d.data_vars: + if ds_for_attrs is not None: + d[var].attrs = ds_for_attrs[var].attrs + d[var].attrs["sampling_kind"] = delta_kind[var] + d[var].attrs["sampling_n"] = n + d[var].attrs["sampling_seed"] = seed + d[var].attrs[ + "description" + ] = f"{prefix} of {d[var].attrs.get('long_name', var)} constructed from a perturbation approach and random sampling." + d[var].attrs[ + "long_name" + ] = f"{prefix} of {d[var].attrs.get('long_name', var)}" + old_history = d[var].attrs.get("history", "") + d[var].attrs[ + "history" + ] = f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {history}\n {old_history}" + # Sample the distributions _, pdim, mult = _perc_or_quantile(ds) ds_dist = ( @@ -128,60 +167,60 @@ def sampled_indicators( .drop_vars("sample", errors="ignore") .assign_coords({"sample": np.arange(n)}) ) + _add_sampling_attrs( + ds_dist, + "Historical distribution", + f"{'Weighted' if ds_weights is not None else 'Uniform'} random sampling " + f"applied to the historical distribution using 'xhydro.sampled_indicators' " + f"and {n} samples.", + ds_for_attrs=ds, + ) + deltas_dist = ( _weighted_sampling(deltas, delta_weights, n=n, seed=seed) .drop_vars("sample", errors="ignore") .assign_coords({"sample": np.arange(n)}) ) - - # Prepare the operation to apply on the variables - if delta_type is None: - delta_type = {var: deltas[var].attrs["delta_kind"] for var in deltas.data_vars} - elif isinstance(delta_type, str): - delta_type = {var: delta_type for var in deltas.data_vars} + _add_sampling_attrs( + deltas_dist, + "Future delta distribution", + f"{'Weighted' if deltas_weighted else 'Uniform'} random sampling " + f"applied to the delta distribution using 'xhydro.sampled_indicators' " + f"and {n} samples.", + ds_for_attrs=deltas, + ) # Apply the deltas element-wise to the historical distribution fut_dist = xr.Dataset() for var in deltas.data_vars: with xr.set_options(keep_attrs=True): - if delta_type[var] in ["absolute", "abs.", "+"]: + if delta_kind[var] in ["absolute", "abs.", "+"]: fut_dist[var] = ds_dist[var] + deltas_dist[var] - elif delta_type[var] in ["percentage", "pct.", "%"]: + elif delta_kind[var] in ["percentage", "pct.", "%"]: fut_dist[var] = ds_dist[var] * (1 + deltas_dist[var] / 100) else: raise ValueError( - f"Unknown operation '{delta_type[var]}', expected one of ['absolute', 'abs.', '+'], ['percentage', 'pct.', '%']." + f"Unknown operation '{delta_kind[var]}', expected one of ['absolute', 'abs.', '+'], ['percentage', 'pct.', '%']." ) - fut_dist[var].attrs = ds_dist[var].attrs - fut_dist[var].attrs["sampling_kind"] = delta_type[var] - fut_dist[var].attrs["sampling_n"] = n - fut_dist[var].attrs["sampling_seed"] = seed - fut_dist[var].attrs[ - "description" - ] = f"Future distribution of {fut_dist[var].attrs.get('long_name', var)} constructed from a perturbation approach and random sampling." - fut_dist[var].attrs[ - "long_name" - ] = f"Future distribution of {fut_dist[var].attrs.get('long_name', var)}." - old_history = ds_dist[var].attrs.get("history", "") + deltas_dist[ - var - ].attrs.get("history", "") - fut_dist[var].attrs["history"] = ( - f"[{datetime.now().isoformat()}] Perturbation approach and random sampling applied to historical distribution and deltas " - f"using 'xhydro.sampled_indicators' and {n} samples.\n {old_history}" + _add_sampling_attrs( + fut_dist, + "Future distribution", + f"Future distribution constructed from a perturbation approach and random sampling with {n} samples.", + ds_for_attrs=ds, ) + # Build the attributes based on common attributes between the historical and delta distributions fut_dist = xs.clean_up(fut_dist, common_attrs_only=[ds, deltas]) # Compute future percentiles with xr.set_options(keep_attrs=True): fut_pct = fut_dist.quantile(ds.percentile / mult, dim="sample") - for var in fut_pct.data_vars: - fut_pct[var].attrs["description"] = ( - fut_pct[var].attrs["description"].replace("distribution", f"{pdim}s") - ) - fut_pct[var].attrs["long_name"] = ( - fut_pct[var].attrs["long_name"].replace("distribution", f"{pdim}s") - ) + _add_sampling_attrs( + fut_pct, + "Future percentiles", + f"Future percentiles computed from the future distribution using 'xhydro.sampled_indicators' and {n} samples.", + ds_for_attrs=ds, + ) if pdim == "percentile": fut_pct = fut_pct.rename({"quantile": "percentile"}) diff --git a/tests/test_cc.py b/tests/test_cc.py index ad734c58..18e2e897 100644 --- a/tests/test_cc.py +++ b/tests/test_cc.py @@ -1,4 +1,5 @@ import numpy as np +import pandas as pd import pytest import xarray as xr @@ -20,8 +21,8 @@ def test_xscen_imported(): class TestSampledIndicators: - @pytest.mark.parametrize("delta_type", ["absolute", "percentage", "foo"]) - def test_sampled_indicators_type(self, delta_type): + @pytest.mark.parametrize("delta_kind", ["absolute", "percentage", "foo", None]) + def test_sampled_indicators_type(self, delta_kind): ds = xr.DataArray( np.arange(1, 8), coords={"percentile": [1, 10, 20, 50, 80, 90, 99]} ).to_dataset(name="QMOYAN") @@ -29,9 +30,19 @@ def test_sampled_indicators_type(self, delta_type): np.arange(-10, 55, 5), coords={"realization": np.arange(13)}, ).to_dataset(name="QMOYAN") - if delta_type in ["absolute", "percentage"]: + + if delta_kind is None: + with pytest.raises( + KeyError, match="argument is None, but the variables within the" + ): + xh.cc.sampled_indicators( + ds, deltas, delta_kind=delta_kind, n=10, seed=42 + ) + deltas["QMOYAN"].attrs["delta_kind"] = "absolute" + + if delta_kind in ["absolute", "percentage"] or delta_kind is None: out = xh.cc.sampled_indicators( - ds, deltas, delta_type, n=10, seed=42, return_dist=True + ds, deltas, delta_kind=delta_kind, n=10, seed=42, return_dist=True ) np.testing.assert_array_equal( @@ -45,7 +56,7 @@ def test_sampled_indicators_type(self, delta_type): for chosen in np.unique(out[2].QMOYAN.values) ) - if delta_type == "absolute": + if delta_kind == "absolute" or delta_kind is None: assert ( np.min(out[3].QMOYAN) >= 1 - 10 ) # Min of historical minus min of deltas @@ -68,9 +79,79 @@ def test_sampled_indicators_type(self, delta_type): else: with pytest.raises( - ValueError, match=f"Unknown operation '{delta_type}', expected one" + ValueError, match=f"Unknown operation '{delta_kind}', expected one" ): - xh.cc.sampled_indicators(ds, deltas, delta_type, n=10, seed=42) + xh.cc.sampled_indicators( + ds, deltas, delta_kind=delta_kind, n=10, seed=42 + ) + + @pytest.mark.parametrize("dk", ["dict", "dict_bad", None]) + def test_delta_dict(self, dk): + ds = xr.DataArray( + np.arange(1, 8), coords={"percentile": [1, 10, 20, 50, 80, 90, 99]} + ).to_dataset(name="QMOYAN") + ds["QMOYABS"] = ds["QMOYAN"].copy() + deltas = xr.DataArray( + np.arange(-10, 55, 5), + coords={"realization": np.arange(13)}, + ).to_dataset(name="QMOYAN") + deltas["QMOYABS"] = deltas["QMOYAN"].copy() + + if dk in ["dict", "dict_bad"]: + delta_kind = ( + {"QMOYAN": "pct.", "QMOYABS": "abs."} + if dk == "dict" + else {"QMOYAN": "pct."} + ) + else: + delta_kind = None + ds["QMOYAN"].attrs["delta_kind"] = "pct." + ds["QMOYABS"].attrs["delta_kind"] = "abs." + deltas["QMOYAN"].attrs["delta_kind"] = "pct." + deltas["QMOYABS"].attrs["delta_kind"] = "abs." + + if dk == "dict_bad": + with pytest.raises( + ValueError, match="is a dict, it should contain all the variables" + ): + xh.cc.sampled_indicators( + ds, deltas, delta_kind=delta_kind, n=10, seed=42, return_dist=True + ) + else: + out = xh.cc.sampled_indicators( + ds, deltas, delta_kind=delta_kind, n=10, seed=42, return_dist=True + ) + + np.testing.assert_array_equal( + out[0]["percentile"], [1, 10, 20, 50, 80, 90, 99] + ) + assert all( + chosen in np.arange(1, 8) for chosen in np.unique(out[1].QMOYAN.values) + ) + assert all( + chosen in np.arange(-10, 55, 5) + for chosen in np.unique(out[2].QMOYAN.values) + ) + + assert ( + np.min(out[3].QMOYABS) >= 1 - 10 + ) # Min of historical minus min of deltas + assert ( + np.max(out[3].QMOYABS) <= 7 + 50 + ) # Max of historical plus max of deltas + np.testing.assert_array_almost_equal( + out[0]["QMOYABS"].values, [-3.0, -3.0, 14.6, 40.0, 46.2, 51.6, 56.46] + ) + + assert np.min(out[3].QMOYAN) >= 1 * ( + 1 - 10 / 100 + ) # Min of historical minus min of deltas + assert np.max(out[3].QMOYAN) <= 7 * ( + 1 + 50 / 100 + ) # Max of historical plus max of deltas + np.testing.assert_array_almost_equal( + out[0]["QMOYAN"].values, [1.9, 1.9, 4.06, 6.75, 7.34, 8.88, 10.338] + ) def test_sampled_indicators_return(self): ds = xr.DataArray( @@ -82,12 +163,12 @@ def test_sampled_indicators_return(self): ).to_dataset(name="QMOYAN") out1 = xh.cc.sampled_indicators( - ds, deltas, "absolute", n=10, seed=42, return_dist=False + ds, deltas, delta_kind="absolute", n=10, seed=42, return_dist=False ) assert isinstance(out1, xr.Dataset) out2 = xh.cc.sampled_indicators( - ds, deltas, "absolute", n=10, seed=42, return_dist=True + ds, deltas, delta_kind="absolute", n=10, seed=42, return_dist=True ) assert isinstance(out2, tuple) assert len(out2) == 4 @@ -156,7 +237,7 @@ def test_sampled_indicators_weights(self, weights): if weights is None: out = xh.cc.sampled_indicators( - ds, deltas, "percentage", n=10, seed=42, return_dist=True + ds, deltas, delta_kind="percentage", n=10, seed=42, return_dist=True ) np.testing.assert_array_almost_equal( out[0].QMOYAN.isel(station=0).values, [1.809, 5.5, 11.8, 12.0, 15.8155] @@ -165,7 +246,7 @@ def test_sampled_indicators_weights(self, weights): out = xh.cc.sampled_indicators( ds, deltas, - "percentage", + delta_kind="percentage", n=10, seed=42, return_dist=True, @@ -179,7 +260,7 @@ def test_sampled_indicators_weights(self, weights): out = xh.cc.sampled_indicators( ds, deltas, - "percentage", + delta_kind="percentage", n=10, seed=42, return_dist=True, @@ -195,7 +276,7 @@ def test_sampled_indicators_weights(self, weights): out = xh.cc.sampled_indicators( ds, deltas, - "percentage", + delta_kind="percentage", n=10, seed=42, return_dist=True, @@ -220,6 +301,102 @@ def test_sampled_indicators_weights(self, weights): "station" in o.dims for o in out ) # "station" is a shared dimension, so it should be in all outputs + def test_weighted_time(self): + ds = xr.DataArray( + np.array( + [ + [[1, 2, 3, 4, 5], [101, 102, 103, 104, 105]], + [[6, 7, 8, 9, 10], [106, 107, 108, 109, 110]], + ] + ), + dims=("foo", "station", "percentile"), + coords={ + "percentile": [1, 25, 50, 75, 99], + "station": ["a", "b"], + "foo": ["bar", "baz"], + }, + ).to_dataset(name="QMOYAN") + ds["QMOYAN"].attrs["units"] = "m3/s" + deltas = xr.DataArray( + np.array( + [ + [[-10, -5, 0, 5, 10], [-25, -20, -15, -10, -5]], + [[40, 45, 50, 55, 60], [115, 120, 125, 130, 135]], + ] + ), + dims=("time", "station", "realization"), + coords={ + "realization": [1, 2, 3, 4, 5], + "station": ["a", "b"], + "time": pd.date_range("2000-01-01", periods=2), + }, + ).to_dataset(name="QMOYAN") + deltas["QMOYAN"].attrs["units"] = "%" + ds_weights = xr.DataArray( + np.array([0, 1]), + dims="foo", + coords={ + "foo": ["bar", "baz"], + }, + ) + delta_weights = xr.DataArray( + np.array([[0, 0, 0, 1, 0], [0, 1, 0, 0, 0]]), + dims=("time", "realization"), + coords={ + "realization": [1, 2, 3, 4, 5], + "time": pd.date_range("2000-01-01", periods=2), + }, + ) + + out = xh.cc.sampled_indicators( + ds, + deltas, + delta_kind="absolute", + n=10, + seed=42, + return_dist=True, + ds_weights=ds_weights, + delta_weights=delta_weights, + ) + np.testing.assert_array_almost_equal( + out[0].QMOYAN.isel(station=0, time=0).values, + [ + 11.0, + 13.0, + 14.0, + 14.0, + 14.91, + ], # Roughly ds.QMOYAN.isel(station=0, foo=1).values + 5 + ) + np.testing.assert_array_almost_equal( + out[0].QMOYAN.isel(station=0, time=1).values, + [ + 51.0, + 53.0, + 54.0, + 54.0, + 54.91, + ], # Roughly ds.QMOYAN.isel(station=0, foo=1).values + 45 + ) + + assert all( + "station" in o.dims for o in out + ) # "station" is a shared dimension, so it should be in all outputs + assert all( + "time" in o.dims for o in [out[0], out[2], out[3]] + ) # Time dimension should never be removed + + # Check a few attributes + assert all(o.QMOYAN.attrs["sampling_kind"] == "absolute" for o in out) + assert all(o.QMOYAN.attrs["sampling_n"] == 10 for o in out) + assert all(o.QMOYAN.attrs["sampling_seed"] == 42 for o in out) + assert out[0].QMOYAN.attrs["long_name"] == "Future percentiles of QMOYAN" + assert out[1].QMOYAN.attrs["long_name"] == "Historical distribution of QMOYAN" + assert out[2].QMOYAN.attrs["long_name"] == "Future delta distribution of QMOYAN" + assert out[3].QMOYAN.attrs["long_name"] == "Future distribution of QMOYAN" + assert all(o.QMOYAN.attrs["units"] == "m3/s" for o in [out[0], out[1], out[3]]) + assert out[2].QMOYAN.attrs["units"] == "%" + def test_sampled_indicators_weight_err(self): ds = xr.DataArray( np.array( @@ -263,9 +440,24 @@ def test_sampled_indicators_weight_err(self): match="is shared between 'ds' and 'deltas', but not between 'ds_weights' and 'delta_weights'.", ): xh.cc.sampled_indicators( - ds, deltas, "percentage", n=10, seed=42, delta_weights=delta_weights + ds, + deltas, + delta_kind="percentage", + n=10, + seed=42, + delta_weights=delta_weights, ) + deltas = deltas.rename({"platform": "time", "station": "horizon"}) + delta_weights = delta_weights.rename( + {"platform": "time", "realization": "horizon"} + ) + with pytest.raises( + NotImplementedError, + match="Weights on multiple time dimensions are not supported.", + ): + xh.cc._weighted_sampling(deltas, delta_weights, n=10, seed=42) + def test_p_weights(self): def _make_da(arr): return xr.DataArray(arr, dims="percentile", coords={"percentile": arr}) From 0bb7008837bebac00867fd4bdfeb5d76921dc7a4 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Mon, 28 Oct 2024 14:18:29 -0400 Subject: [PATCH 3/5] fix notebook --- docs/notebooks/climate_change.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/notebooks/climate_change.ipynb b/docs/notebooks/climate_change.ipynb index 4a6d532d..0fa73cd2 100644 --- a/docs/notebooks/climate_change.ipynb +++ b/docs/notebooks/climate_change.ipynb @@ -516,7 +516,7 @@ "fut_pct, hist_dist, delta_dist, fut_dist = xh.cc.sampled_indicators(\n", " ref,\n", " deltas=deltas,\n", - " delta_type=\"percentage\",\n", + " delta_kind=\"percentage\",\n", " delta_weights=weights,\n", " n=n,\n", " seed=0,\n", From e26f32e1459a9524df4a527cde3bd4e7f64525e9 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Mon, 28 Oct 2024 14:25:22 -0400 Subject: [PATCH 4/5] upd changelog --- CHANGELOG.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index e8adf3d6..1293cdb9 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,11 +6,17 @@ v0.5.0 (unreleased) ------------------- Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`), Trevor James Smith (:user:`Zeitsperre`). +New features and enhancements +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +* In ``xhydro.cc.sampled_indicators``, the `delta_type` argument can now be a dictionary or None, in which case the attribute `delta_kind` is used. (:pull:`220`). +* In ``xhydro.cc.sampled_indicators``, weights along a `time` or `horizon` dimension will no longer reduce that dimension. (:pull:`220`). + Breaking changes ^^^^^^^^^^^^^^^^ * The `xhydro` testing utilities have been rewritten to use `pooch` for downloading and caching testing datasets from `hydrologie/xhydro-testdata`. (:pull:`212`). * The `xhydro` testing utilities now require `pytest-xdist` as a development dependency. (:pull:`212`). * Many core dependencies have been updated to more modern versions. (:pull:`218`). +* The `delta_type` argument in ``xhydro.cc.sampled_indicators`` has been renamed to `delta_kind` and is no longer positional. (:pull:`220`). Internal changes ^^^^^^^^^^^^^^^^ From e7368db41c0dfb277bcc85610a32c73d3cce79f2 Mon Sep 17 00:00:00 2001 From: RondeauG Date: Tue, 5 Nov 2024 15:35:22 -0500 Subject: [PATCH 5/5] upd notebook --- docs/notebooks/climate_change.ipynb | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/docs/notebooks/climate_change.ipynb b/docs/notebooks/climate_change.ipynb index 0fa73cd2..23b22587 100644 --- a/docs/notebooks/climate_change.ipynb +++ b/docs/notebooks/climate_change.ipynb @@ -16,8 +16,6 @@ "outputs": [], "source": [ "# Imports\n", - "# Hide INFO-level logs\n", - "import logging\n", "from pathlib import Path\n", "\n", "import hvplot.xarray # noqa\n", @@ -29,20 +27,17 @@ "import xhydro as xh\n", "from xhydro.testing.helpers import deveraux\n", "\n", - "logger = logging.getLogger()\n", - "logger.setLevel(logging.CRITICAL)\n", - "\n", "D = deveraux()\n", "\n", - "# Streamflow file (1 file - Hydrotel driven by BCC-CSM-1.1(m))\n", + "# Future streamflow file (1 file - Hydrotel driven by BCC-CSM-1.1(m))\n", "streamflow_file = D.fetch(\n", " \"cc_indicators/streamflow_BCC-CSM1.1-m_rcp45.nc\",\n", ")\n", "\n", - "# Reference QMOYAN (6 platforms)\n", + "# Reference mean annual streamflow (QMOYAN) for 6 calibrations of Hydrotel\n", "reference_files = D.fetch(\"cc_indicators/reference.zip\", pooch.Unzip())\n", "\n", - "# QMOYAN deltas (63 simulations x 6 platforms)\n", + "# Future deltas of QMOYAN (63 simulations x 6 calibrations of Hydrotel)\n", "deltas_files = D.fetch(\"cc_indicators/deltas.zip\", pooch.Unzip())" ] }, @@ -492,7 +487,7 @@ "\n", "- *ds*: Dataset containing the historical indicators. The indicators are expected to be represented by a distribution of pre-computed percentiles.\n", "- *deltas*: Dataset containing the future deltas to apply to the historical indicators.\n", - "- *delta_type*: Type of delta provided. Must be one of ['absolute', 'percentage'].\n", + "- *delta_kind*: Kind of delta provided. Must be one of ['absolute', 'percentage'].\n", "- *ds_weights* (optional): Weights to use when sampling the historical indicators, for dimensions other than 'percentile'/'quantile'. Dimensions not present in this Dataset, or if None, will be sampled uniformly unless they are shared with 'deltas'.\n", "- *delta_weights* (optional): Weights to use when sampling the deltas, such as along the 'realization' dimension. Dimensions not present in this Dataset, or if None, will be sampled uniformly unless they are shared with 'ds'.\n", "- *n*: Number of samples to generate.\n",