Add a periods argument to local.fit (#219)

### Pull Request Checklist: - [x] This PR addresses an already opened issue (for bug fixes / features) - This PR fixes #xyz - [ ] (If applicable) Documentation has been added / updated (for bug fixes / features). - [x] (If applicable) Tests have been added. - [x] CHANGELOG.rst has been updated (with summary of main changes). - [x] Link to issue (:issue:`number`) and pull request (:pull:`number`) has been added. ### What kind of change does this PR introduce? * Add a `periods` argument to `local.fit`, to compute multiple separate periods with a single call. Both `periods` and `horizon` follow `xscen` conventions. ### Does this PR introduce a breaking change? - No. When `periods=None`, the function acts as before. ### Other information:
hydrologie · Oct 28, 2024 · 21c0025 · 21c0025
2 parents ac808b5 + 1ddc5f6
commit 21c0025
Show file tree

Hide file tree

Showing 3 changed files with 106 additions and 28 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -6,6 +6,10 @@ v0.5.0 (unreleased)
 -------------------
 Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`), Trevor James Smith (:user:`Zeitsperre`).
 
+New features and enhancements
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+* A `periods` parameter was added to ``frequency_analysis.local.fit`` to compute multiple separate periods with a single call. (:pull:`219`).
+
 Breaking changes
 ^^^^^^^^^^^^^^^^
 * The `xhydro` testing utilities have been rewritten to use `pooch` for downloading and caching testing datasets from `hydrologie/xhydro-testdata`. (:pull:`212`).

diff --git a/src/xhydro/frequency_analysis/local.py b/src/xhydro/frequency_analysis/local.py
@@ -1,14 +1,14 @@
 """Local frequency analysis functions and utilities."""
 
 import datetime
-from typing import Optional, Union
 
 import numpy as np
 import statsmodels
 import xarray as xr
 import xclim.indices.stats
 from scipy.stats.mstats import plotting_positions
 from statsmodels.tools import eval_measures
+from xscen.utils import standardize_periods
 
 __all__ = [
     "criteria",
@@ -19,9 +19,11 @@
 
 def fit(
     ds,
+    *,
     distributions: list[str] | None = None,
     min_years: int | None = None,
     method: str = "ML",
+    periods: list[str] | list[list[str]] | None = None,
 ) -> xr.Dataset:
     """Fit multiple distributions to data.
 
@@ -36,6 +38,10 @@ def fit(
         Minimum number of years required for a distribution to be fitted.
     method : str
         Fitting method. Defaults to "ML" (maximum likelihood).
+    periods : list of str or list of list of str, optional
+        Either [start, end] or list of [start, end] of periods to be considered.
+        If multiple periods are given, the output will have a `horizon` dimension.
+        If None, all data is used.
 
     Returns
     -------
@@ -48,38 +54,56 @@ def fit(
     maximum number of unique parameters between the distributions.
     """
     distributions = distributions or ["genextreme", "pearson3", "gumbel_r", "expon"]
+
+    periods = (
+        standardize_periods(periods, multiple=True)
+        if periods is not None
+        else [[str(int(ds.time.dt.year.min())), str(int(ds.time.dt.year.max()))]]
+    )
+
     out = []
-    for v in ds.data_vars:
-        p = []
-        for d in distributions:
-            p.append(  # noqa: PERF401
-                xclim.indices.stats.fit(
-                    ds[v].chunk({"time": -1}), dist=d, method=method
+    for period in periods:
+        outp = []
+        ds_subset = ds.sel(time=slice(period[0], period[1]))
+        for v in ds.data_vars:
+            p = []
+            for d in distributions:
+                p.append(  # noqa: PERF401
+                    xclim.indices.stats.fit(
+                        ds_subset[v].chunk({"time": -1}), dist=d, method=method
+                    )
+                    .assign_coords(scipy_dist=d)
+                    .expand_dims("scipy_dist")
                 )
-                .assign_coords(scipy_dist=d)
-                .expand_dims("scipy_dist")
-            )
-        params = xr.concat(p, dim="scipy_dist")
-
-        # Reorder dparams to match the order of the parameters across all distributions, since subsequent operations rely on this.
-        p_order = sorted(set(params.dparams.values).difference(["loc", "scale"])) + [
-            "loc",
-            "scale",
-        ]
-        params = params.sel(dparams=p_order)
-
-        if min_years is not None:
-            params = params.where(ds[v].notnull().sum("time") >= min_years)
-        params.attrs["scipy_dist"] = distributions
-        params.attrs["description"] = "Parameters of the distributions"
-        params.attrs["long_name"] = "Distribution parameters"
-        params.attrs["min_years"] = min_years
-        out.append(params)
-
-    out = xr.merge(out)
+            params = xr.concat(p, dim="scipy_dist")
+
+            # Reorder dparams to match the order of the parameters across all distributions, since subsequent operations rely on this.
+            p_order = sorted(
+                set(params.dparams.values).difference(["loc", "scale"])
+            ) + [
+                "loc",
+                "scale",
+            ]
+            params = params.sel(dparams=p_order)
+
+            if min_years is not None:
+                params = params.where(ds_subset[v].notnull().sum("time") >= min_years)
+            params = params.expand_dims({"horizon": ["-".join(period)]})
+            params.attrs["scipy_dist"] = distributions
+            params.attrs["description"] = "Parameters of the distributions"
+            params.attrs["long_name"] = "Distribution parameters"
+            params.attrs["min_years"] = min_years
+            outp.append(params)
+        out.append(xr.merge(outp))
+
+    out = xr.concat(out, dim="horizon")
     out = out.chunk({"dparams": -1})
     out.attrs = ds.attrs
 
+    if len(out.horizon) == 1:
+        # If only one period was asked, remove the horizon dimension, but keep the period in the coordinates.
+        out = out.squeeze("horizon")
+
     return out
 
 

diff --git a/tests/test_local.py b/tests/test_local.py
@@ -73,6 +73,56 @@ def test_min_years(self, miny):
                 [[np.nan, np.nan]],
             )
 
+    @pytest.mark.parametrize(
+        "periods",
+        [
+            None,
+            ["1961", "1980"],
+            ["1962", "2010"],
+            ["2001", "2020"],
+            [["2001", "2020"], ["2041", "2060"]],
+        ],
+    )
+    def test_periods(self, periods):
+        ds = timeseries(
+            np.arange(1, 100) * 5 + 350,
+            variable="streamflow",
+            start="2001-01-01",
+            freq="YS",
+            as_dataset=True,
+        )
+
+        min_years = 30 if periods == ["2001", "2020"] else None
+        params = xhfa.local.fit(
+            ds, distributions=["gumbel_r"], periods=periods, min_years=min_years
+        )
+
+        if periods is None:
+            np.testing.assert_array_almost_equal(
+                params.streamflow.squeeze(), [528.9, 129.23], decimal=2
+            )
+
+        elif isinstance(periods[0], str):
+            np.testing.assert_array_equal(
+                params.horizon, [f"{periods[0]}-{periods[1]}"]
+            )
+            results = {
+                "1961": np.array([np.nan, np.nan]),
+                "1962": np.array([370.35, 12.96]),
+                "2001": np.array([np.nan, np.nan]),
+            }
+            np.testing.assert_array_almost_equal(
+                params.streamflow.squeeze(), results[periods[0]], decimal=2
+            )
+
+        elif periods[0] == ["2001", "2020"]:
+            np.testing.assert_array_equal(params.horizon, ["2001-2020", "2041-2060"])
+            np.testing.assert_array_almost_equal(
+                params.streamflow.squeeze(),
+                np.array([[388.15, 26.06], [588.15, 26.06]]),
+                decimal=2,
+            )
+
 
 @pytest.mark.parametrize("mode", ["max", "min", "foo"])
 def test_quantiles(mode):