Skip to content

Commit

Permalink
Add a periods argument to local.fit (#219)
Browse files Browse the repository at this point in the history
<!-- Please ensure the PR fulfills the following requirements! -->
<!-- If this is your first PR, make sure to add your details to the
AUTHORS.rst! -->
### Pull Request Checklist:
- [x] This PR addresses an already opened issue (for bug fixes /
features)
  - This PR fixes #xyz
- [ ] (If applicable) Documentation has been added / updated (for bug
fixes / features).
- [x] (If applicable) Tests have been added.
- [x] CHANGELOG.rst has been updated (with summary of main changes).
- [x] Link to issue (:issue:`number`) and pull request (:pull:`number`)
has been added.

### What kind of change does this PR introduce?

* Add a `periods` argument to `local.fit`, to compute multiple separate
periods with a single call. Both `periods` and `horizon` follow `xscen`
conventions.

### Does this PR introduce a breaking change?

- No. When `periods=None`, the function acts as before.


### Other information:
  • Loading branch information
RondeauG authored Oct 28, 2024
2 parents ac808b5 + 1ddc5f6 commit 21c0025
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 28 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ v0.5.0 (unreleased)
-------------------
Contributors to this version: Gabriel Rondeau-Genesse (:user:`RondeauG`), Trevor James Smith (:user:`Zeitsperre`).

New features and enhancements
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
* A `periods` parameter was added to ``frequency_analysis.local.fit`` to compute multiple separate periods with a single call. (:pull:`219`).

Breaking changes
^^^^^^^^^^^^^^^^
* The `xhydro` testing utilities have been rewritten to use `pooch` for downloading and caching testing datasets from `hydrologie/xhydro-testdata`. (:pull:`212`).
Expand Down
80 changes: 52 additions & 28 deletions src/xhydro/frequency_analysis/local.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
"""Local frequency analysis functions and utilities."""

import datetime
from typing import Optional, Union

import numpy as np
import statsmodels
import xarray as xr
import xclim.indices.stats
from scipy.stats.mstats import plotting_positions
from statsmodels.tools import eval_measures
from xscen.utils import standardize_periods

__all__ = [
"criteria",
Expand All @@ -19,9 +19,11 @@

def fit(
ds,
*,
distributions: list[str] | None = None,
min_years: int | None = None,
method: str = "ML",
periods: list[str] | list[list[str]] | None = None,
) -> xr.Dataset:
"""Fit multiple distributions to data.
Expand All @@ -36,6 +38,10 @@ def fit(
Minimum number of years required for a distribution to be fitted.
method : str
Fitting method. Defaults to "ML" (maximum likelihood).
periods : list of str or list of list of str, optional
Either [start, end] or list of [start, end] of periods to be considered.
If multiple periods are given, the output will have a `horizon` dimension.
If None, all data is used.
Returns
-------
Expand All @@ -48,38 +54,56 @@ def fit(
maximum number of unique parameters between the distributions.
"""
distributions = distributions or ["genextreme", "pearson3", "gumbel_r", "expon"]

periods = (
standardize_periods(periods, multiple=True)
if periods is not None
else [[str(int(ds.time.dt.year.min())), str(int(ds.time.dt.year.max()))]]
)

out = []
for v in ds.data_vars:
p = []
for d in distributions:
p.append( # noqa: PERF401
xclim.indices.stats.fit(
ds[v].chunk({"time": -1}), dist=d, method=method
for period in periods:
outp = []
ds_subset = ds.sel(time=slice(period[0], period[1]))
for v in ds.data_vars:
p = []
for d in distributions:
p.append( # noqa: PERF401
xclim.indices.stats.fit(
ds_subset[v].chunk({"time": -1}), dist=d, method=method
)
.assign_coords(scipy_dist=d)
.expand_dims("scipy_dist")
)
.assign_coords(scipy_dist=d)
.expand_dims("scipy_dist")
)
params = xr.concat(p, dim="scipy_dist")

# Reorder dparams to match the order of the parameters across all distributions, since subsequent operations rely on this.
p_order = sorted(set(params.dparams.values).difference(["loc", "scale"])) + [
"loc",
"scale",
]
params = params.sel(dparams=p_order)

if min_years is not None:
params = params.where(ds[v].notnull().sum("time") >= min_years)
params.attrs["scipy_dist"] = distributions
params.attrs["description"] = "Parameters of the distributions"
params.attrs["long_name"] = "Distribution parameters"
params.attrs["min_years"] = min_years
out.append(params)

out = xr.merge(out)
params = xr.concat(p, dim="scipy_dist")

# Reorder dparams to match the order of the parameters across all distributions, since subsequent operations rely on this.
p_order = sorted(
set(params.dparams.values).difference(["loc", "scale"])
) + [
"loc",
"scale",
]
params = params.sel(dparams=p_order)

if min_years is not None:
params = params.where(ds_subset[v].notnull().sum("time") >= min_years)
params = params.expand_dims({"horizon": ["-".join(period)]})
params.attrs["scipy_dist"] = distributions
params.attrs["description"] = "Parameters of the distributions"
params.attrs["long_name"] = "Distribution parameters"
params.attrs["min_years"] = min_years
outp.append(params)
out.append(xr.merge(outp))

out = xr.concat(out, dim="horizon")
out = out.chunk({"dparams": -1})
out.attrs = ds.attrs

if len(out.horizon) == 1:
# If only one period was asked, remove the horizon dimension, but keep the period in the coordinates.
out = out.squeeze("horizon")

return out


Expand Down
50 changes: 50 additions & 0 deletions tests/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,56 @@ def test_min_years(self, miny):
[[np.nan, np.nan]],
)

@pytest.mark.parametrize(
"periods",
[
None,
["1961", "1980"],
["1962", "2010"],
["2001", "2020"],
[["2001", "2020"], ["2041", "2060"]],
],
)
def test_periods(self, periods):
ds = timeseries(
np.arange(1, 100) * 5 + 350,
variable="streamflow",
start="2001-01-01",
freq="YS",
as_dataset=True,
)

min_years = 30 if periods == ["2001", "2020"] else None
params = xhfa.local.fit(
ds, distributions=["gumbel_r"], periods=periods, min_years=min_years
)

if periods is None:
np.testing.assert_array_almost_equal(
params.streamflow.squeeze(), [528.9, 129.23], decimal=2
)

elif isinstance(periods[0], str):
np.testing.assert_array_equal(
params.horizon, [f"{periods[0]}-{periods[1]}"]
)
results = {
"1961": np.array([np.nan, np.nan]),
"1962": np.array([370.35, 12.96]),
"2001": np.array([np.nan, np.nan]),
}
np.testing.assert_array_almost_equal(
params.streamflow.squeeze(), results[periods[0]], decimal=2
)

elif periods[0] == ["2001", "2020"]:
np.testing.assert_array_equal(params.horizon, ["2001-2020", "2041-2060"])
np.testing.assert_array_almost_equal(
params.streamflow.squeeze(),
np.array([[388.15, 26.06], [588.15, 26.06]]),
decimal=2,
)


@pytest.mark.parametrize("mode", ["max", "min", "foo"])
def test_quantiles(mode):
Expand Down

0 comments on commit 21c0025

Please sign in to comment.