From f97c84a82773e2715bad1f5ad00c2641c37ff7cc Mon Sep 17 00:00:00 2001 From: TC-FF Date: Wed, 10 May 2023 16:09:58 -0400 Subject: [PATCH 01/24] replaced github username by organisation's name --- CONTRIBUTING.rst | 4 ++-- docs/installation.rst | 8 ++++---- setup.py | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 7423e59f..879d3544 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -15,7 +15,7 @@ Types of Contributions Report Bugs ~~~~~~~~~~~ -Report bugs at https://github.com/TC-FF/xhydro/issues. +Report bugs at https://github.com/hydrologie/xhydro/issues. If you are reporting a bug, please include: @@ -45,7 +45,7 @@ articles, and such. Submit Feedback ~~~~~~~~~~~~~~~ -The best way to send feedback is to file an issue at https://github.com/TC-FF/xhydro/issues. +The best way to send feedback is to file an issue at https://github.com/hydrologie/xhydro/issues. If you are proposing a feature: diff --git a/docs/installation.rst b/docs/installation.rst index 56a1d0b5..cd028f16 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -32,13 +32,13 @@ You can either clone the public repository: .. code-block:: console - $ git clone git://github.com/TC-FF/xhydro + $ git clone git://github.com/hydrologie/xhydro Or download the `tarball`_: .. code-block:: console - $ curl -OJL https://github.com/TC-FF/xhydro/tarball/master + $ curl -OJL https://github.com/hydrologie/xhydro/tarball/master Once you have a copy of the source, you can install it with: @@ -47,5 +47,5 @@ Once you have a copy of the source, you can install it with: $ python setup.py install -.. _Github repo: https://github.com/TC-FF/xhydro -.. _tarball: https://github.com/TC-FF/xhydro/tarball/master +.. _Github repo: https://github.com/hydrologie/xhydro +.. _tarball: https://github.com/hydrologie/xhydro/tarball/master diff --git a/setup.py b/setup.py index 33d673aa..890abfcd 100644 --- a/setup.py +++ b/setup.py @@ -60,7 +60,7 @@ packages=find_packages(include=['xhydro', 'xhydro.*']), test_suite='tests', tests_require=test_requirements, - url='https://github.com/TC-FF/xhydro', + url='https://github.com/hydrologie/xhydro', version='0.1.0', zip_safe=False, ) From 7a40b366af5df4e0415a8f0c6b772b3a45c93c8c Mon Sep 17 00:00:00 2001 From: TC-FF Date: Wed, 10 May 2023 16:10:40 -0400 Subject: [PATCH 02/24] changed layout in documentation --- docs/index.rst | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 830a706e..42178b11 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,9 +1,10 @@ -Welcome to xHydro's documentation! +xHydro's documentation ====================================== .. toctree:: :maxdepth: 2 :caption: Contents: + :hidden: readme installation @@ -13,8 +14,8 @@ Welcome to xHydro's documentation! authors history -Indices and tables -================== -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` +.. Indices and tables +.. ================== +.. * :ref:`genindex` +.. * :ref:`modindex` +.. * :ref:`search` From f29380f553a56476ea8838657efc2082c9a93b09 Mon Sep 17 00:00:00 2001 From: TC-FF Date: Thu, 1 Jun 2023 14:51:14 -0400 Subject: [PATCH 03/24] first functions --- xhydro/frequency_analysis/local.py | 35 ++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 xhydro/frequency_analysis/local.py diff --git a/xhydro/frequency_analysis/local.py b/xhydro/frequency_analysis/local.py new file mode 100644 index 00000000..1698151c --- /dev/null +++ b/xhydro/frequency_analysis/local.py @@ -0,0 +1,35 @@ +import xarray as xr +import copy + +class Data: + + def __init__(self, + ds: xr.Dataset): + """init function takes a dataset as input and initialised an empty season dictionary and a list of catchments from dimension id + + Parameters + ---------- + ds : xr.Dataset + _description_ + """ + self.data = ds + self._season = {} + self._catchments = self.data.id.to_numpy().tolist() + + def _repr_html_(self): + """Function to show xr.Dataset._repr_html_ when looking at class Data + + Returns + ------- + xr.Dataset._repr_html_() + """ + return self.data._repr_html_() + + def copy(self): + """makes a copy of itself using copy library + + Returns + ------- + xhydro.frequency_analysis.local.Data() + """ + return copy.copy(self) \ No newline at end of file From b9dff837c2c5ccbea61bae50ca8b306a7265f38b Mon Sep 17 00:00:00 2001 From: TC-FF Date: Thu, 1 Jun 2023 14:58:44 -0400 Subject: [PATCH 04/24] added select_catchments function --- xhydro/frequency_analysis/local.py | 47 +++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/xhydro/frequency_analysis/local.py b/xhydro/frequency_analysis/local.py index 1698151c..b3d40dc7 100644 --- a/xhydro/frequency_analysis/local.py +++ b/xhydro/frequency_analysis/local.py @@ -32,4 +32,49 @@ def copy(self): ------- xhydro.frequency_analysis.local.Data() """ - return copy.copy(self) \ No newline at end of file + return copy.copy(self) + + def select_catchments(self, + catchment_list: list): + """ + select specified catchements from attribute data. Also supports the use of a wildcard (*). + + Parameters + ---------- + catchment_list : List + List of catchments that will be selcted along the id dimension + + Returns + ------- + ds : xarray.DataSet + New dataset with only specified catchments + + Examples + -------- + >>> import xarray as xr + >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) + >>> donnees = Data(ds) + >>> filtered_ds = donnees.select_catchments(catchment_list = ['051001','051005']) + >>> filtered_ds = donnees.select_catchments(catchment_list = ['05*','0234*', '023301']) + """ + + # Create a copy of the object + obj = self.copy() + + + + # sub function to select complete list based on wilcards + def multi_filter(names, + patterns: list): + return [name for name in names for pattern in patterns if fnmatch.fnmatch(name, pattern) ] + + # Getting the full list + catchment_list = multi_filter(obj.catchments, catchment_list) + + # Setting the list as a class attribute + obj._catchments = catchment_list + + # Filtering over the list + obj.data = obj.data.sel(id=self.data.id.isin(catchment_list)) + return obj \ No newline at end of file From d09c450852404b971b10d07af7c36e0af71897ac Mon Sep 17 00:00:00 2001 From: TC-FF Date: Thu, 1 Jun 2023 15:19:31 -0400 Subject: [PATCH 05/24] custom_group_by --- xhydro/frequency_analysis/local.py | 41 ++++++++++++++++++++++++++++-- 1 file changed, 39 insertions(+), 2 deletions(-) diff --git a/xhydro/frequency_analysis/local.py b/xhydro/frequency_analysis/local.py index b3d40dc7..d5467b1a 100644 --- a/xhydro/frequency_analysis/local.py +++ b/xhydro/frequency_analysis/local.py @@ -1,4 +1,5 @@ import xarray as xr +import numpy as np import copy class Data: @@ -74,7 +75,43 @@ def multi_filter(names, # Setting the list as a class attribute obj._catchments = catchment_list - + # Filtering over the list obj.data = obj.data.sel(id=self.data.id.isin(catchment_list)) - return obj \ No newline at end of file + return obj + + def custom_group_by(self, + beg: int, + end: int): + """ + a custum fonction to groupby with specified julian days. + + Parameters + ---------- + beg : Int + Julian day of the begining of the period + + end : Int + Julian day of the end of the period + + Returns + ------- + ds : xarray.DataSet + dataset with data grouped by time over specified dates + + Examples + -------- + >>> import xarray as xr + >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) + >>> donnees = Data(ds) + >>> grouped_ds = donnees.custom_group_by(1, 90) + + """ + + if beg > end: + # TODO chevauchement d'années ex : année hydrologique, hiver de décembre à mars, etc + pass + else: + # +1 to include the end + return self.data.sel(time=np.isin(self.data.time.dt.dayofyear, range(beg, end + 1))).groupby('time.year') \ No newline at end of file From fd58a7110e0cd42e2141d71a7483c4732e4d3bbf Mon Sep 17 00:00:00 2001 From: TC-FF Date: Thu, 1 Jun 2023 15:21:16 -0400 Subject: [PATCH 06/24] custom_group_by comment in english --- xhydro/frequency_analysis/local.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xhydro/frequency_analysis/local.py b/xhydro/frequency_analysis/local.py index d5467b1a..43a5261d 100644 --- a/xhydro/frequency_analysis/local.py +++ b/xhydro/frequency_analysis/local.py @@ -110,7 +110,7 @@ def custom_group_by(self, """ if beg > end: - # TODO chevauchement d'années ex : année hydrologique, hiver de décembre à mars, etc + # TODO when beg `end, it means it has to overlap years, for example, from octobre to march pass else: # +1 to include the end From 46f640d959b7ff2e6ff8c1b023925c69d0060a89 Mon Sep 17 00:00:00 2001 From: TC-FF Date: Thu, 1 Jun 2023 15:35:28 -0400 Subject: [PATCH 07/24] seasons related functions --- xhydro/frequency_analysis/local.py | 116 ++++++++++++++++++++++++++++- 1 file changed, 115 insertions(+), 1 deletion(-) diff --git a/xhydro/frequency_analysis/local.py b/xhydro/frequency_analysis/local.py index 43a5261d..4167e9df 100644 --- a/xhydro/frequency_analysis/local.py +++ b/xhydro/frequency_analysis/local.py @@ -1,5 +1,6 @@ import xarray as xr import numpy as np +import warnings import copy class Data: @@ -114,4 +115,117 @@ def custom_group_by(self, pass else: # +1 to include the end - return self.data.sel(time=np.isin(self.data.time.dt.dayofyear, range(beg, end + 1))).groupby('time.year') \ No newline at end of file + return self.data.sel(time=np.isin(self.data.time.dt.dayofyear, range(beg, end + 1))).groupby('time.year') + + + @property + def season(self): + return self._season + + @season.setter + def season(self, + liste: list): + """ + The setter for the season property Issues a Warining if a new season is overlapping with another one. + Alos issues a warning if the season name was already used, mand then overwrites it. + + Parameters + ---------- + liste : List + List of Name, begining in Julian day, end in Julian day + + Returns + ------- + ds : xarray.DataSet + appends Dict self._season with name as key, and begining and end as values + + Examples + -------- + >>> import xarray as xr + >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) + >>> donnees = Data(ds) + >>> donnees.season = ['Yearly', 1, 365] + """ + + #TODO a dictionary would probalby be more suited + name = liste[0] + beg = liste[1] + end = liste[2] + for season, dates in self.season.items(): + if not isinstance(dates, xr.Dataset): + # We dont check for overlapping if season is a dataset + if name == season: + warnings.warn('Warning, ' + name + ' was already defined and has been overwritten') + elif dates[0] <= beg and dates[1] >= beg or dates[0] <= end and dates[1] >= end: + warnings.warn('Warning, ' + name + ' overlapping with ' + season) + + self._season[name] = [beg, end] + + + def rm_season(self, + name: str): + """ + Fonction to remove a season. Isues a Warining if the name is not a valid season. + + Parameters + ---------- + name : Str + Name of the season to be removed + + Returns + ------- + ds : xarray.DataSet + The dataset is returned with =out the season specified. + + Examples + -------- + >>> import xarray as xr + >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) + >>> donnees = Data(ds) + >>> donnees.season = ['Yearly', 1, 365] + >>> donnees.rm_season('Yearly') + """ + + try: + del self._season[name] + except: + print('No season named ' + name) + + def get_seasons(self): + """ + Function to list the seasons. + + Returns + ------- + list : List + a list of the season names + + Examples + -------- + >>> import xarray as xr + >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) + >>> donnees = Data(ds) + >>> donnees.season = ['Yearly', 1, 365] + >>> donnees.season = ['Spring', 60, 120] + >>> ['Yearly', 'Spring'] + """ + return list(self.season.keys()) + + def _get_season_values(self, + season: str): + """Function to get the values of a given season + + Parameters + ---------- + season : str + name of a previously defined season + + Returns + ------- + list + return a list of julian day of begining and end of season + """ + return self._season[season] \ No newline at end of file From 9abd34a332891cbcf2fa78c26fa69eaeedf9ef46 Mon Sep 17 00:00:00 2001 From: TC-FF Date: Thu, 1 Jun 2023 16:38:08 -0400 Subject: [PATCH 08/24] get_bool_over_tolerence function added --- xhydro/frequency_analysis/local.py | 45 +++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/xhydro/frequency_analysis/local.py b/xhydro/frequency_analysis/local.py index 4167e9df..1d5a4603 100644 --- a/xhydro/frequency_analysis/local.py +++ b/xhydro/frequency_analysis/local.py @@ -228,4 +228,47 @@ def _get_season_values(self, list return a list of julian day of begining and end of season """ - return self._season[season] \ No newline at end of file + return self._season[season] + + def get_bool_over_tolerence(self, + tol: float, + season = None): + """ + Fonction to check if a season has enough values to be used. For each season True will be returned if there is les missing dats then the specified tolerence. + + Parameters + ---------- + tol : Float + Tolerance in decimal form (0.15 for 15%) + + season : String + Name of the season to be checked + + Returns + ------- + da : xr.DataArray + DataArray of boolean + + Examples + -------- + >>> import xarray as xr + >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) + >>> donnees = Data(ds) + >>> donnees.custom_group_by(1, 365.max().where(grouped_ds.get_bool_over_tolerence(tolerence, season), drop=True) + """ + ds = copy.copy(self) + if season is None: + # If no season is specified, tolerence will be based on 365 values per year + # TODO generalize for different time step + tolerence = 365 * tol + grouped_ds = ds.data.groupby('time.year').count() + + else: + season_vals = ds._get_season_values(season) + season_size = season_vals[1] - season_vals[0] + 1 + # TODO generalize for different time step + grouped_ds = ds.custom_group_by(season_vals[0], season_vals[1]).count() + tolerence = season_size * (1-tol) + + return (grouped_ds.value > tolerence).load() \ No newline at end of file From e187afab3eed970dd37d7612b58956d3101dfbe1 Mon Sep 17 00:00:00 2001 From: TC-FF Date: Thu, 1 Jun 2023 16:40:59 -0400 Subject: [PATCH 09/24] added functions to get maximum and represent them --- xhydro/frequency_analysis/local.py | 136 ++++++++++++++++++++++++++++- 1 file changed, 135 insertions(+), 1 deletion(-) diff --git a/xhydro/frequency_analysis/local.py b/xhydro/frequency_analysis/local.py index 1d5a4603..030af027 100644 --- a/xhydro/frequency_analysis/local.py +++ b/xhydro/frequency_analysis/local.py @@ -271,4 +271,138 @@ def get_bool_over_tolerence(self, grouped_ds = ds.custom_group_by(season_vals[0], season_vals[1]).count() tolerence = season_size * (1-tol) - return (grouped_ds.value > tolerence).load() \ No newline at end of file + return (grouped_ds.value > tolerence).load() + + +def get_maximum(self, + tolerence: float = None, seasons = None): + """ + Fonction to tiddy _get_max results. + + Parameters + ---------- + tolerence : Float + Tolerance in decimal form (0.15 for 15%) + + seasons : List + List of season's name to be checked + + Returns + ------- + df : pd.Dataframe + Dataframe organised with id, season, year, Maximums + + Examples + -------- + >>> import xarray as xr + >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) + >>> donnees = Data(ds) + >>> donnees.get_maximum(tolerence=0.15, seasons=['Spring']) + >>> catchment_list = ['023301'] + >>> sub_set = donnees.select_catchments(catchment_list = catchment_list) + >>> sub_set.season = ['Spring', 60, 182] + >>> sub_set.get_maximum(tolerence=0.15, seasons=['Spring']) + >>> id season year Maximums + 0 023301 Spring 1928 231.000000 + 1 023301 Spring 1929 241.000000 + 2 023301 Spring 1930 317.000000 + ... + """ + return self._get_max(tolerence = tolerence, seasons = seasons).to_dataframe(name = 'Maximums').reset_index()[['id', 'season', 'year', 'start_date','end_date','Maximums']].dropna() + +def _get_max(self, tolerence = None, seasons = []): + """ + Fonction to get maximum value per season, according to a tolerence. + + Parameters + ---------- + tolerence : Float + Tolerance in decimal form (0.15 for 15%) + + seasons : List + List of season's name to be checked + + Returns + ------- + da : xr.DataArray + DataArray of maximums + + Examples + -------- + >>> import xarray as xr + >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) + >>> donnees = Data(ds) + >>> donnees.get_maximum(tolerence=0.15, seasons=['Spring']) + >>> catchment_list = ['023301'] + >>> sub_set = donnees.select_catchments(catchment_list = catchment_list) + >>> sub_set.season = ['Spring', 60, 182] + >>> sub_set._get_max(tolerence=0.15, seasons=['Spring']) + xarray.DataArray 'value' (season: 1, year: 52, id: 1) + """ + grouped_ds = self.copy() + # Function to get the max over one season + def max_over_one_season(grouped_ds, tolerence, season): + season_vals = grouped_ds._get_season_values(season) + if isinstance(season_vals, xr.Dataset): + years = np.unique(season_vals.year) + bvs = np.unique(season_vals.id) + max = np.empty((len(years), len(bvs)), dtype=object) + beg = np.empty((len(years), len(bvs)), dtype=object) + end = np.empty((len(years), len(bvs)), dtype=object) + for y, year in enumerate(years): + for b, bv in enumerate(bvs): + dd = season_vals.sel(year=year, id=bv).value.to_numpy().tolist() + beg[y, b] = pd.to_datetime(str(year)+str(dd[0]), format='%Y%j') + end[y, b] = pd.to_datetime(str(year)+str(dd[1]), format='%Y%j') + ds_year = grouped_ds.data.where(grouped_ds.data.time.dt.year==year, drop=True) + ds_year = ds_year.sel(id=bv) + + # +1 to include end + ds_period = ds_year.sel(time=np.isin(ds_year.time.dt.dayofyear, range(dd[0], dd[1] + 1))) + + d = ds_period.value.values + timestep = float(ds_year.time.dt.dayofyear.timestep.values.tolist()) + nb_expected = (dd[1]+1-dd[0]) / timestep + # nb_expected is used to accound for missing values as well as nan + if np.count_nonzero(~np.isnan(d)) / nb_expected > (1 - tolerence): + max[y, b] = np.nanmax(d)#.tolist() + else: + max[y, b] = np.nan + + + max_ds = xr.Dataset() + + max_ds.coords['year'] = xr.DataArray(years, dims=('year',)) + max_ds.coords['id'] = xr.DataArray(bvs, dims=('id',)) + max_ds.coords['start_date'] = xr.DataArray(beg, dims=('year', 'id')) + max_ds.coords['end_date'] = xr.DataArray(end, dims=('year', 'id')) + max_ds['value'] = xr.DataArray(max.astype(float), dims=('year', 'id')) + # For each bv + # For each year + #check for tolerence + #get max + #create a DS + return max_ds + else: + #TODO add year from grouped_ds.data.dt.year and make full str start_date and end_date + grouped_ds.data.coords['start_date'] = pd.to_datetime(str(season_vals[0]), format='%j').strftime("%m-%d") + grouped_ds.data.coords['end_date'] = pd.to_datetime(str(season_vals[1]), format='%j').strftime("%m-%d") + + return grouped_ds.custom_group_by(season_vals[0], season_vals[1]).max().where(grouped_ds.get_bool_over_tolerence(tolerence, season), drop=True) + + + if seasons: + # Creating a new dimension of season and merging all Dataset from max_over_one_season + return xr.concat( \ + [max_over_one_season(grouped_ds, tolerence, season) \ + .assign_coords(season=season) \ + .expand_dims('season') \ + for season in seasons], dim='season' + ).value + + else: + #TODO Tolerence not used if no period is defined + return grouped_ds.data.groupby('time.year').max().value.assign_coords(season='Whole year') \ + .expand_dims('season') \ No newline at end of file From 505700398a2e247305a3be5a5504d389c8742eab Mon Sep 17 00:00:00 2001 From: TC-FF Date: Tue, 13 Jun 2023 13:54:52 -0400 Subject: [PATCH 10/24] added imports and function to calculate volume --- xhydro/frequency_analysis/local.py | 85 +++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/xhydro/frequency_analysis/local.py b/xhydro/frequency_analysis/local.py index 030af027..0100edd8 100644 --- a/xhydro/frequency_analysis/local.py +++ b/xhydro/frequency_analysis/local.py @@ -2,6 +2,8 @@ import numpy as np import warnings import copy +from typing import Union +import pandas as pd class Data: @@ -405,4 +407,85 @@ def max_over_one_season(grouped_ds, tolerence, season): else: #TODO Tolerence not used if no period is defined return grouped_ds.data.groupby('time.year').max().value.assign_coords(season='Whole year') \ - .expand_dims('season') \ No newline at end of file + .expand_dims('season') + + +def calculate_volume(self, + dates: Union[list, xr.Dataset] =None, + tolerence=0.15): + ds = self.copy() + + def conversion_factor_to_hm3(timestep): + #timestep in days + # TODO check if last date is included + return float(timestep) * 60 * 60 * 24 + + if dates is None: + # TODO use season dates + pass + elif isinstance(dates, list): + #TODO bool over tolerence takes season, generalise + with warnings.catch_warnings(): # Removes overlaping warning + warnings.simplefilter("ignore") + self.season = ['Volumes', dates[0], dates[1]] + grouped_ds = ds.custom_group_by(dates[0], dates[1]).sum().where(ds.get_bool_over_tolerence(tolerence, 'Volumes'), drop=True) + self.rm_season('Volumes') + # Transform tp hm³ + # TODO add start and end and clear other attributes + grouped_ds = grouped_ds * xr.apply_ufunc(conversion_factor_to_hm3, grouped_ds['timestep'], input_core_dims=[[]], vectorize=True) \ + * (dates[1] - dates[0]) \ + / 1000000 + + df = grouped_ds.year.to_dataframe() + df['beg']=dates[0] + df['end']=dates[1] + + grouped_ds['start_date'] = pd.to_datetime(df['year'] * 1000 + df['beg'], format='%Y%j') + grouped_ds['end_date'] = pd.to_datetime(df['year'] * 1000 + df['end'], format='%Y%j') + + grouped_ds['units'] = 'hm³' + + return grouped_ds.drop_vars (['_last_update_timestamp', + 'aggregation', + 'data_type', + 'data_type', + 'drainage_area', + 'latitude', + 'longitude', + 'name', + 'source', + 'timestep', + 'province', + 'regulated']).rename_vars({'value':'volume'}) + elif isinstance(dates, xr.Dataset): + #TODO Make sure DS has same dimensions than target + vol = np.empty((len(np.unique(ds.data.time.dt.year)), len(ds.data.id)), dtype=object) + beg = np.empty((len(np.unique(ds.data.time.dt.year)), len(ds.data.id)), dtype=object) + end = np.empty((len(np.unique(ds.data.time.dt.year)), len(ds.data.id)), dtype=object) + for y, year in enumerate(np.unique(ds.data.time.dt.year)): + for b, bv in enumerate(ds.data.id): + dd = dates.sel(year=year, id=bv).value.to_numpy().tolist() + beg[y, b] = pd.to_datetime(str(year)+str(dd[0]), format='%Y%j') + end[y, b] = pd.to_datetime(str(year)+str(dd[1]), format='%Y%j') + ds_year = ds.data.where(ds.data.time.dt.year==year, drop=True) + ds_year = ds_year.sel(id=bv) + # +1 pou inclure la fin, + # TODO si une seule journe dans ds_period, ¸a donne 0 + # TODO check for tolerence + ds_period = ds_year.sel(time=np.isin(ds_year.time.dt.dayofyear, range(dd[0], dd[1] + 1))) + # delta en ns, à rapporter en s (1000000000) puis le tout en hm³ (1000000) + delta = ds_period.time[-1]-ds_period.time[0] + delta = delta.to_numpy().tolist() / (1000000000 * 1000000) + vol[y, b] = sum(ds_period.value.values).tolist() * delta + + + vol_ds = xr.Dataset() + + vol_ds.coords['year'] = xr.DataArray(np.unique(ds.data.time.dt.year), dims=('year',)) + vol_ds.coords['id'] = xr.DataArray(ds.data.id.to_numpy(), dims=('id',)) + vol_ds.coords['units'] = 'hm³' + vol_ds.coords['start_date'] = xr.DataArray(beg, dims=('year', 'id')) + vol_ds.coords['end_date'] = xr.DataArray(end, dims=('year', 'id')) + vol_ds['volume'] = xr.DataArray(vol, dims=('year', 'id')) + + return vol_ds \ No newline at end of file From c910dc24b77c4c975114c7bac654f05550ce3c27 Mon Sep 17 00:00:00 2001 From: TC-FF Date: Tue, 13 Jun 2023 16:26:21 -0400 Subject: [PATCH 11/24] changed to comply with flake8 standards --- xhydro/frequency_analysis/local.py | 503 ++++++++++++++++------------- 1 file changed, 284 insertions(+), 219 deletions(-) diff --git a/xhydro/frequency_analysis/local.py b/xhydro/frequency_analysis/local.py index 0100edd8..8edab82e 100644 --- a/xhydro/frequency_analysis/local.py +++ b/xhydro/frequency_analysis/local.py @@ -4,12 +4,15 @@ import copy from typing import Union import pandas as pd +import fnmatch + class Data: - def __init__(self, + def __init__(self, ds: xr.Dataset): - """init function takes a dataset as input and initialised an empty season dictionary and a list of catchments from dimension id + """init function takes a dataset as input and initialize an empty + season dictionary and a list of catchments from dimension id Parameters ---------- @@ -21,7 +24,7 @@ def __init__(self, self._catchments = self.data.id.to_numpy().tolist() def _repr_html_(self): - """Function to show xr.Dataset._repr_html_ when looking at class Data + """Function to show xr.Dataset._repr_html_ when looking at class Data Returns ------- @@ -37,41 +40,45 @@ def copy(self): xhydro.frequency_analysis.local.Data() """ return copy.copy(self) - + def select_catchments(self, - catchment_list: list): + catchment_list: list): """ - select specified catchements from attribute data. Also supports the use of a wildcard (*). - + select specified catchements from attribute data. + Also supports the use of a wildcard (*). + Parameters ---------- catchment_list : List List of catchments that will be selcted along the id dimension - + Returns ------- ds : xarray.DataSet New dataset with only specified catchments - + Examples -------- >>> import xarray as xr - >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> cehq_data_path = + '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) >>> donnees = Data(ds) - >>> filtered_ds = donnees.select_catchments(catchment_list = ['051001','051005']) - >>> filtered_ds = donnees.select_catchments(catchment_list = ['05*','0234*', '023301']) + >>> filtered_ds = donnees.select_catchments( + catchment_list = ['051001','051005']) + >>> filtered_ds = donnees.select_catchments( + catchment_list = ['05*','0234*', '023301']) """ # Create a copy of the object obj = self.copy() - - - # sub function to select complete list based on wilcards def multi_filter(names, - patterns: list): - return [name for name in names for pattern in patterns if fnmatch.fnmatch(name, pattern) ] + patterns: list): + # sub function to select complete list based on wilcards + return [name for name in names + for pattern in patterns + if fnmatch.fnmatch(name, pattern)] # Getting the full list catchment_list = multi_filter(obj.catchments, catchment_list) @@ -82,13 +89,13 @@ def multi_filter(names, # Filtering over the list obj.data = obj.data.sel(id=self.data.id.isin(catchment_list)) return obj - - def custom_group_by(self, - beg: int, - end: int): + + def custom_group_by(self, + beg: int, + end: int): """ a custum fonction to groupby with specified julian days. - + Parameters ---------- beg : Int @@ -101,36 +108,42 @@ def custom_group_by(self, ------- ds : xarray.DataSet dataset with data grouped by time over specified dates - + Examples -------- >>> import xarray as xr - >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> cehq_data_path = + '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) >>> donnees = Data(ds) >>> grouped_ds = donnees.custom_group_by(1, 90) - + """ - + if beg > end: - # TODO when beg `end, it means it has to overlap years, for example, from octobre to march + # TODO when beg `end, it means it has to overlap years, + # for example, from octobre to march pass else: - # +1 to include the end - return self.data.sel(time=np.isin(self.data.time.dt.dayofyear, range(beg, end + 1))).groupby('time.year') - + # +1 to include the end + return self.data.sel( + time=np.isin( + self.data.time.dt.dayofyear, range(beg, end + 1))) \ + .groupby('time.year') @property def season(self): return self._season - + @season.setter - def season(self, - liste: list): + def season(self, + liste: list): """ - The setter for the season property Issues a Warining if a new season is overlapping with another one. - Alos issues a warning if the season name was already used, mand then overwrites it. - + The setter for the season property Issues a Warining + if a new season is overlapping with another one. + AlsO issues a warning if the season name was already used, + and then overwrites it. + Parameters ---------- liste : List @@ -139,18 +152,20 @@ def season(self, Returns ------- ds : xarray.DataSet - appends Dict self._season with name as key, and begining and end as values - + appends Dict self._season with name as key, + and begining and end as values + Examples -------- >>> import xarray as xr - >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> cehq_data_path = + '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) >>> donnees = Data(ds) >>> donnees.season = ['Yearly', 1, 365] """ - #TODO a dictionary would probalby be more suited + # TODO a dictionary would probalby be more suited name = liste[0] beg = liste[1] end = liste[2] @@ -158,18 +173,22 @@ def season(self, if not isinstance(dates, xr.Dataset): # We dont check for overlapping if season is a dataset if name == season: - warnings.warn('Warning, ' + name + ' was already defined and has been overwritten') - elif dates[0] <= beg and dates[1] >= beg or dates[0] <= end and dates[1] >= end: - warnings.warn('Warning, ' + name + ' overlapping with ' + season) - - self._season[name] = [beg, end] + warnings.warn( + 'Warning, ' + name + + ' was already defined and has been overwritten') + elif dates[0] <= beg and dates[1] >= beg or \ + dates[0] <= end and dates[1] >= end: + warnings.warn( + 'Warning, ' + name + ' overlapping with ' + season) + self._season[name] = [beg, end] - def rm_season(self, - name: str): + def rm_season(self, + name: str): """ - Fonction to remove a season. Isues a Warining if the name is not a valid season. - + Fonction to remove a season. + Isues a Warining if the name is not a valid season. + Parameters ---------- name : Str @@ -178,12 +197,13 @@ def rm_season(self, Returns ------- ds : xarray.DataSet - The dataset is returned with =out the season specified. - + The dataset is returned with =out the season specified. + Examples -------- >>> import xarray as xr - >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> cehq_data_path = + '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) >>> donnees = Data(ds) >>> donnees.season = ['Yearly', 1, 365] @@ -194,20 +214,21 @@ def rm_season(self, del self._season[name] except: print('No season named ' + name) - + def get_seasons(self): """ Function to list the seasons. - + Returns ------- list : List - a list of the season names - + a list of the season names + Examples -------- >>> import xarray as xr - >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> cehq_data_path = + '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) >>> donnees = Data(ds) >>> donnees.season = ['Yearly', 1, 365] @@ -215,9 +236,9 @@ def get_seasons(self): >>> ['Yearly', 'Spring'] """ return list(self.season.keys()) - - def _get_season_values(self, - season: str): + + def _get_season_values(self, + season: str): """Function to get the values of a given season Parameters @@ -231,13 +252,15 @@ def _get_season_values(self, return a list of julian day of begining and end of season """ return self._season[season] - - def get_bool_over_tolerence(self, - tol: float, - season = None): + + def get_bool_over_tolerence(self, + tol: float, + season=None): """ - Fonction to check if a season has enough values to be used. For each season True will be returned if there is les missing dats then the specified tolerence. - + Fonction to check if a season has enough values to be used. + For each season True will be returned if there is less missing data + than the specified tolerence. + Parameters ---------- tol : Float @@ -249,19 +272,22 @@ def get_bool_over_tolerence(self, Returns ------- da : xr.DataArray - DataArray of boolean - + DataArray of boolean + Examples -------- >>> import xarray as xr - >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> cehq_data_path = + '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) >>> donnees = Data(ds) - >>> donnees.custom_group_by(1, 365.max().where(grouped_ds.get_bool_over_tolerence(tolerence, season), drop=True) + >>> donnees.custom_group_by(1, 365.max().where( + grouped_ds.get_bool_over_tolerence(tolerence, season), drop=True) """ ds = copy.copy(self) if season is None: - # If no season is specified, tolerence will be based on 365 values per year + # If no season is specified, + # tolerence will be based on 365 values per year # TODO generalize for different time step tolerence = 365 * tol grouped_ds = ds.data.groupby('time.year').count() @@ -270,17 +296,18 @@ def get_bool_over_tolerence(self, season_vals = ds._get_season_values(season) season_size = season_vals[1] - season_vals[0] + 1 # TODO generalize for different time step - grouped_ds = ds.custom_group_by(season_vals[0], season_vals[1]).count() + grouped_ds = ds.custom_group_by(season_vals[0], + season_vals[1]).count() tolerence = season_size * (1-tol) - + return (grouped_ds.value > tolerence).load() - -def get_maximum(self, - tolerence: float = None, seasons = None): + +def get_maximum(self, + tolerence: float = None, seasons=None): """ Fonction to tiddy _get_max results. - + Parameters ---------- tolerence : Float @@ -293,11 +320,12 @@ def get_maximum(self, ------- df : pd.Dataframe Dataframe organised with id, season, year, Maximums - + Examples -------- >>> import xarray as xr - >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> cehq_data_path = + '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) >>> donnees = Data(ds) >>> donnees.get_maximum(tolerence=0.15, seasons=['Spring']) @@ -311,12 +339,17 @@ def get_maximum(self, 2 023301 Spring 1930 317.000000 ... """ - return self._get_max(tolerence = tolerence, seasons = seasons).to_dataframe(name = 'Maximums').reset_index()[['id', 'season', 'year', 'start_date','end_date','Maximums']].dropna() - -def _get_max(self, tolerence = None, seasons = []): + return self._get_max(tolerence=tolerence, seasons=seasons). \ + to_dataframe(name='Maximums'). \ + reset_index()\ + [['id', 'season', 'year', 'start_date', 'end_date', 'Maximums']]. \ + dropna() + + +def _get_max(self, tolerence=None, seasons=[]): """ Fonction to get maximum value per season, according to a tolerence. - + Parameters ---------- tolerence : Float @@ -328,12 +361,13 @@ def _get_max(self, tolerence = None, seasons = []): Returns ------- da : xr.DataArray - DataArray of maximums - + DataArray of maximums + Examples -------- >>> import xarray as xr - >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> cehq_data_path = + '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) >>> donnees = Data(ds) >>> donnees.get_maximum(tolerence=0.15, seasons=['Spring']) @@ -344,148 +378,179 @@ def _get_max(self, tolerence = None, seasons = []): xarray.DataArray 'value' (season: 1, year: 52, id: 1) """ grouped_ds = self.copy() - # Function to get the max over one season + def max_over_one_season(grouped_ds, tolerence, season): - season_vals = grouped_ds._get_season_values(season) - if isinstance(season_vals, xr.Dataset): - years = np.unique(season_vals.year) - bvs = np.unique(season_vals.id) - max = np.empty((len(years), len(bvs)), dtype=object) - beg = np.empty((len(years), len(bvs)), dtype=object) - end = np.empty((len(years), len(bvs)), dtype=object) - for y, year in enumerate(years): - for b, bv in enumerate(bvs): - dd = season_vals.sel(year=year, id=bv).value.to_numpy().tolist() - beg[y, b] = pd.to_datetime(str(year)+str(dd[0]), format='%Y%j') - end[y, b] = pd.to_datetime(str(year)+str(dd[1]), format='%Y%j') - ds_year = grouped_ds.data.where(grouped_ds.data.time.dt.year==year, drop=True) - ds_year = ds_year.sel(id=bv) - - # +1 to include end - ds_period = ds_year.sel(time=np.isin(ds_year.time.dt.dayofyear, range(dd[0], dd[1] + 1))) - - d = ds_period.value.values - timestep = float(ds_year.time.dt.dayofyear.timestep.values.tolist()) - nb_expected = (dd[1]+1-dd[0]) / timestep - # nb_expected is used to accound for missing values as well as nan - if np.count_nonzero(~np.isnan(d)) / nb_expected > (1 - tolerence): - max[y, b] = np.nanmax(d)#.tolist() - else: - max[y, b] = np.nan - - - max_ds = xr.Dataset() - - max_ds.coords['year'] = xr.DataArray(years, dims=('year',)) - max_ds.coords['id'] = xr.DataArray(bvs, dims=('id',)) - max_ds.coords['start_date'] = xr.DataArray(beg, dims=('year', 'id')) - max_ds.coords['end_date'] = xr.DataArray(end, dims=('year', 'id')) - max_ds['value'] = xr.DataArray(max.astype(float), dims=('year', 'id')) - # For each bv - # For each year - #check for tolerence - #get max - #create a DS - return max_ds - else: - #TODO add year from grouped_ds.data.dt.year and make full str start_date and end_date - grouped_ds.data.coords['start_date'] = pd.to_datetime(str(season_vals[0]), format='%j').strftime("%m-%d") - grouped_ds.data.coords['end_date'] = pd.to_datetime(str(season_vals[1]), format='%j').strftime("%m-%d") - - return grouped_ds.custom_group_by(season_vals[0], season_vals[1]).max().where(grouped_ds.get_bool_over_tolerence(tolerence, season), drop=True) + season_vals = grouped_ds._get_season_values(season) + if isinstance(season_vals, xr.Dataset): + years = np.unique(season_vals.year) + bvs = np.unique(season_vals.id) + max = np.empty((len(years), len(bvs)), dtype=object) + beg = np.empty((len(years), len(bvs)), dtype=object) + end = np.empty((len(years), len(bvs)), dtype=object) + for y, year in enumerate(years): + for b, bv in enumerate(bvs): + dd = season_vals.sel(year=year, id=bv). \ + value.to_numpy().tolist() + beg[y, b] = pd.to_datetime(str(year) + str(dd[0]), + format='%Y%j') + end[y, b] = pd.to_datetime(str(year) + str(dd[1]), + format='%Y%j') + ds_year = grouped_ds.data.where( + grouped_ds.data.time.dt.year == year, drop=True) + ds_year = ds_year.sel(id=bv) + + # +1 to include end + ds_period = ds_year.sel(time=np.isin( + ds_year.time.dt.dayofyear, range(dd[0], dd[1] + 1))) + + d = ds_period.value.values + timestep = float( + ds_year.time.dt.dayofyear.timestep.values.tolist()) + nb_expected = (dd[1]+1-dd[0]) / timestep + # nb_expected is used to account for missing and nan + if np.count_nonzero( + ~np.isnan(d)) / nb_expected > (1 - tolerence): + max[y, b] = np.nanmax(d) # .tolist() + else: + max[y, b] = np.nan + + max_ds = xr.Dataset() + + max_ds.coords['year'] = xr.DataArray(years, dims=('year',)) + max_ds.coords['id'] = xr.DataArray(bvs, dims=('id',)) + max_ds.coords['start_date'] = xr.DataArray( + beg, dims=('year', 'id')) + max_ds.coords['end_date'] = xr.DataArray( + end, dims=('year', 'id')) + max_ds['value'] = xr.DataArray( + max.astype(float), dims=('year', 'id')) + # For each bv + # For each year + # check for tolerence + # get max + # create a DS + return max_ds + else: + # TODO add year from grouped_ds.data.dt.year + # and make full str start_date and end_date + grouped_ds.data.coords['start_date'] = pd.to_datetime( + str(season_vals[0]), format='%j').strftime("%m-%d") + grouped_ds.data.coords['end_date'] = pd.to_datetime( + str(season_vals[1]), format='%j').strftime("%m-%d") + + return grouped_ds.custom_group_by( + season_vals[0], season_vals[1]).max().\ + where(grouped_ds.get_bool_over_tolerence(tolerence, season), + drop=True) if seasons: - # Creating a new dimension of season and merging all Dataset from max_over_one_season - return xr.concat( \ - [max_over_one_season(grouped_ds, tolerence, season) \ - .assign_coords(season=season) \ - .expand_dims('season') \ - for season in seasons], dim='season' + # Creating a new dimension of season and + # merging all Dataset from max_over_one_season + return xr.concat( + [max_over_one_season(grouped_ds, tolerence, season) + .assign_coords(season=season) + .expand_dims('season') + for season in seasons], dim='season' ).value - - else: - #TODO Tolerence not used if no period is defined - return grouped_ds.data.groupby('time.year').max().value.assign_coords(season='Whole year') \ - .expand_dims('season') - - -def calculate_volume(self, - dates: Union[list, xr.Dataset] =None, - tolerence=0.15): + + else: + # TODO Tolerence not used if no period is defined + return grouped_ds.data.groupby('time.year').\ + max().value.assign_coords(season='Whole year') \ + .expand_dims('season') + + +def calculate_volume(self, + dates: Union[list, xr.Dataset] = None, + tolerence=0.15): ds = self.copy() - + def conversion_factor_to_hm3(timestep): - #timestep in days - # TODO check if last date is included - return float(timestep) * 60 * 60 * 24 + # timestep in days + # TODO check if last date is included + return float(timestep) * 60 * 60 * 24 if dates is None: - # TODO use season dates - pass + # TODO use season dates + pass elif isinstance(dates, list): - #TODO bool over tolerence takes season, generalise - with warnings.catch_warnings(): # Removes overlaping warning - warnings.simplefilter("ignore") - self.season = ['Volumes', dates[0], dates[1]] - grouped_ds = ds.custom_group_by(dates[0], dates[1]).sum().where(ds.get_bool_over_tolerence(tolerence, 'Volumes'), drop=True) - self.rm_season('Volumes') - # Transform tp hm³ + # TODO bool over tolerence takes season, generalise + with warnings.catch_warnings(): # Removes overlaping warning + warnings.simplefilter("ignore") + self.season = ['Volumes', dates[0], dates[1]] + grouped_ds = ds.custom_group_by(dates[0], dates[1])\ + .sum().where( + ds.get_bool_over_tolerence(tolerence, 'Volumes'), drop=True) + self.rm_season('Volumes') + # Transform tp hm³ # TODO add start and end and clear other attributes - grouped_ds = grouped_ds * xr.apply_ufunc(conversion_factor_to_hm3, grouped_ds['timestep'], input_core_dims=[[]], vectorize=True) \ - * (dates[1] - dates[0]) \ - / 1000000 - - df = grouped_ds.year.to_dataframe() - df['beg']=dates[0] - df['end']=dates[1] - - grouped_ds['start_date'] = pd.to_datetime(df['year'] * 1000 + df['beg'], format='%Y%j') - grouped_ds['end_date'] = pd.to_datetime(df['year'] * 1000 + df['end'], format='%Y%j') - - grouped_ds['units'] = 'hm³' - - return grouped_ds.drop_vars (['_last_update_timestamp', - 'aggregation', - 'data_type', - 'data_type', - 'drainage_area', - 'latitude', - 'longitude', - 'name', - 'source', - 'timestep', - 'province', - 'regulated']).rename_vars({'value':'volume'}) + grouped_ds = grouped_ds * xr.apply_ufunc( + conversion_factor_to_hm3, grouped_ds['timestep'], + input_core_dims=[[]], vectorize=True) \ + * (dates[1] - dates[0]) \ + / 1000000 + + df = grouped_ds.year.to_dataframe() + df['beg'] = dates[0] + df['end'] = dates[1] + + grouped_ds['start_date'] = pd.to_datetime( + df['year'] * 1000 + df['beg'], format='%Y%j') + grouped_ds['end_date'] = pd.to_datetime( + df['year'] * 1000 + df['end'], format='%Y%j') + + grouped_ds['units'] = 'hm³' + + return grouped_ds.drop_vars(['_last_update_timestamp', + 'aggregation', + 'data_type', + 'data_type', + 'drainage_area', + 'latitude', + 'longitude', + 'name', + 'source', + 'timestep', + 'province', + 'regulated'])\ + .rename_vars({'value': 'volume'}) elif isinstance(dates, xr.Dataset): - #TODO Make sure DS has same dimensions than target - vol = np.empty((len(np.unique(ds.data.time.dt.year)), len(ds.data.id)), dtype=object) - beg = np.empty((len(np.unique(ds.data.time.dt.year)), len(ds.data.id)), dtype=object) - end = np.empty((len(np.unique(ds.data.time.dt.year)), len(ds.data.id)), dtype=object) - for y, year in enumerate(np.unique(ds.data.time.dt.year)): - for b, bv in enumerate(ds.data.id): - dd = dates.sel(year=year, id=bv).value.to_numpy().tolist() - beg[y, b] = pd.to_datetime(str(year)+str(dd[0]), format='%Y%j') - end[y, b] = pd.to_datetime(str(year)+str(dd[1]), format='%Y%j') - ds_year = ds.data.where(ds.data.time.dt.year==year, drop=True) - ds_year = ds_year.sel(id=bv) - # +1 pou inclure la fin, - # TODO si une seule journe dans ds_period, ¸a donne 0 - # TODO check for tolerence - ds_period = ds_year.sel(time=np.isin(ds_year.time.dt.dayofyear, range(dd[0], dd[1] + 1))) - # delta en ns, à rapporter en s (1000000000) puis le tout en hm³ (1000000) - delta = ds_period.time[-1]-ds_period.time[0] - delta = delta.to_numpy().tolist() / (1000000000 * 1000000) - vol[y, b] = sum(ds_period.value.values).tolist() * delta - - - vol_ds = xr.Dataset() - - vol_ds.coords['year'] = xr.DataArray(np.unique(ds.data.time.dt.year), dims=('year',)) - vol_ds.coords['id'] = xr.DataArray(ds.data.id.to_numpy(), dims=('id',)) - vol_ds.coords['units'] = 'hm³' - vol_ds.coords['start_date'] = xr.DataArray(beg, dims=('year', 'id')) - vol_ds.coords['end_date'] = xr.DataArray(end, dims=('year', 'id')) - vol_ds['volume'] = xr.DataArray(vol, dims=('year', 'id')) - - return vol_ds \ No newline at end of file + # TODO Make sure DS has same dimensions than target + vol = np.empty((len(np.unique( + ds.data.time.dt.year)), len(ds.data.id)), dtype=object) + beg = np.empty((len(np.unique( + ds.data.time.dt.year)), len(ds.data.id)), dtype=object) + end = np.empty((len(np.unique( + ds.data.time.dt.year)), len(ds.data.id)), dtype=object) + for y, year in enumerate(np.unique(ds.data.time.dt.year)): + for b, bv in enumerate(ds.data.id): + dd = dates.sel(year=year, id=bv).value.to_numpy().tolist() + beg[y, b] = pd.to_datetime(str(year)+str(dd[0]), format='%Y%j') + end[y, b] = pd.to_datetime(str(year)+str(dd[1]), format='%Y%j') + ds_year = ds.data.where( + ds.data.time.dt.year == year, drop=True) + ds_year = ds_year.sel(id=bv) + # +1 pou inclure la fin, + # TODO si une seule journe dans ds_period, ¸a donne 0 + # TODO check for tolerence + ds_period = ds_year.sel(time=np.isin( + ds_year.time.dt.dayofyear, + range(dd[0], dd[1] + 1))) + # delta en ns, à rapporter en s (1000000000) + # puis le tout en hm³ (1000000) + delta = ds_period.time[-1]-ds_period.time[0] + delta = delta.to_numpy().tolist() / (1000000000 * 1000000) + vol[y, b] = sum(ds_period.value.values).tolist() * delta + + vol_ds = xr.Dataset() + + vol_ds.coords['year'] = xr.DataArray(np.unique( + ds.data.time.dt.year), dims=('year',)) + vol_ds.coords['id'] = xr.DataArray(ds.data.id.to_numpy(), dims=('id',)) + vol_ds.coords['units'] = 'hm³' + vol_ds.coords['start_date'] = xr.DataArray(beg, dims=('year', 'id')) + vol_ds.coords['end_date'] = xr.DataArray(end, dims=('year', 'id')) + vol_ds['volume'] = xr.DataArray(vol, dims=('year', 'id')) + + return vol_ds From ce72f2f09a81bfed721f3e3e8901d86519275353 Mon Sep 17 00:00:00 2001 From: TC-FF Date: Wed, 14 Jun 2023 08:13:12 -0400 Subject: [PATCH 12/24] ran black formatter --- xhydro/frequency_analysis/local.py | 289 +++++++++++++++-------------- 1 file changed, 150 insertions(+), 139 deletions(-) diff --git a/xhydro/frequency_analysis/local.py b/xhydro/frequency_analysis/local.py index 8edab82e..9334015a 100644 --- a/xhydro/frequency_analysis/local.py +++ b/xhydro/frequency_analysis/local.py @@ -8,9 +8,7 @@ class Data: - - def __init__(self, - ds: xr.Dataset): + def __init__(self, ds: xr.Dataset): """init function takes a dataset as input and initialize an empty season dictionary and a list of catchments from dimension id @@ -41,8 +39,7 @@ def copy(self): """ return copy.copy(self) - def select_catchments(self, - catchment_list: list): + def select_catchments(self, catchment_list: list): """ select specified catchements from attribute data. Also supports the use of a wildcard (*). @@ -73,12 +70,14 @@ def select_catchments(self, # Create a copy of the object obj = self.copy() - def multi_filter(names, - patterns: list): + def multi_filter(names, patterns: list): # sub function to select complete list based on wilcards - return [name for name in names - for pattern in patterns - if fnmatch.fnmatch(name, pattern)] + return [ + name + for name in names + for pattern in patterns + if fnmatch.fnmatch(name, pattern) + ] # Getting the full list catchment_list = multi_filter(obj.catchments, catchment_list) @@ -90,9 +89,7 @@ def multi_filter(names, obj.data = obj.data.sel(id=self.data.id.isin(catchment_list)) return obj - def custom_group_by(self, - beg: int, - end: int): + def custom_group_by(self, beg: int, end: int): """ a custum fonction to groupby with specified julian days. @@ -127,17 +124,15 @@ def custom_group_by(self, else: # +1 to include the end return self.data.sel( - time=np.isin( - self.data.time.dt.dayofyear, range(beg, end + 1))) \ - .groupby('time.year') + time=np.isin(self.data.time.dt.dayofyear, range(beg, end + 1)) + ).groupby("time.year") @property def season(self): return self._season @season.setter - def season(self, - liste: list): + def season(self, liste: list): """ The setter for the season property Issues a Warining if a new season is overlapping with another one. @@ -174,17 +169,21 @@ def season(self, # We dont check for overlapping if season is a dataset if name == season: warnings.warn( - 'Warning, ' + name + - ' was already defined and has been overwritten') - elif dates[0] <= beg and dates[1] >= beg or \ - dates[0] <= end and dates[1] >= end: - warnings.warn( - 'Warning, ' + name + ' overlapping with ' + season) + "Warning, " + + name + + " was already defined and has been overwritten" + ) + elif ( + dates[0] <= beg + and dates[1] >= beg + or dates[0] <= end + and dates[1] >= end + ): + warnings.warn("Warning, " + name + " overlapping with " + season) self._season[name] = [beg, end] - def rm_season(self, - name: str): + def rm_season(self, name: str): """ Fonction to remove a season. Isues a Warining if the name is not a valid season. @@ -213,7 +212,7 @@ def rm_season(self, try: del self._season[name] except: - print('No season named ' + name) + print("No season named " + name) def get_seasons(self): """ @@ -237,8 +236,7 @@ def get_seasons(self): """ return list(self.season.keys()) - def _get_season_values(self, - season: str): + def _get_season_values(self, season: str): """Function to get the values of a given season Parameters @@ -253,9 +251,7 @@ def _get_season_values(self, """ return self._season[season] - def get_bool_over_tolerence(self, - tol: float, - season=None): + def get_bool_over_tolerence(self, tol: float, season=None): """ Fonction to check if a season has enough values to be used. For each season True will be returned if there is less missing data @@ -290,21 +286,19 @@ def get_bool_over_tolerence(self, # tolerence will be based on 365 values per year # TODO generalize for different time step tolerence = 365 * tol - grouped_ds = ds.data.groupby('time.year').count() + grouped_ds = ds.data.groupby("time.year").count() else: season_vals = ds._get_season_values(season) season_size = season_vals[1] - season_vals[0] + 1 # TODO generalize for different time step - grouped_ds = ds.custom_group_by(season_vals[0], - season_vals[1]).count() - tolerence = season_size * (1-tol) + grouped_ds = ds.custom_group_by(season_vals[0], season_vals[1]).count() + tolerence = season_size * (1 - tol) return (grouped_ds.value > tolerence).load() -def get_maximum(self, - tolerence: float = None, seasons=None): +def get_maximum(self, tolerence: float = None, seasons=None): """ Fonction to tiddy _get_max results. @@ -339,11 +333,12 @@ def get_maximum(self, 2 023301 Spring 1930 317.000000 ... """ - return self._get_max(tolerence=tolerence, seasons=seasons). \ - to_dataframe(name='Maximums'). \ - reset_index()\ - [['id', 'season', 'year', 'start_date', 'end_date', 'Maximums']]. \ - dropna() + return ( + self._get_max(tolerence=tolerence, seasons=seasons) + .to_dataframe(name="Maximums") + .reset_index()[["id", "season", "year", "start_date", "end_date", "Maximums"]] + .dropna() + ) def _get_max(self, tolerence=None, seasons=[]): @@ -380,7 +375,6 @@ def _get_max(self, tolerence=None, seasons=[]): grouped_ds = self.copy() def max_over_one_season(grouped_ds, tolerence, season): - season_vals = grouped_ds._get_season_values(season) if isinstance(season_vals, xr.Dataset): years = np.unique(season_vals.year) @@ -390,41 +384,35 @@ def max_over_one_season(grouped_ds, tolerence, season): end = np.empty((len(years), len(bvs)), dtype=object) for y, year in enumerate(years): for b, bv in enumerate(bvs): - dd = season_vals.sel(year=year, id=bv). \ - value.to_numpy().tolist() - beg[y, b] = pd.to_datetime(str(year) + str(dd[0]), - format='%Y%j') - end[y, b] = pd.to_datetime(str(year) + str(dd[1]), - format='%Y%j') + dd = season_vals.sel(year=year, id=bv).value.to_numpy().tolist() + beg[y, b] = pd.to_datetime(str(year) + str(dd[0]), format="%Y%j") + end[y, b] = pd.to_datetime(str(year) + str(dd[1]), format="%Y%j") ds_year = grouped_ds.data.where( - grouped_ds.data.time.dt.year == year, drop=True) + grouped_ds.data.time.dt.year == year, drop=True + ) ds_year = ds_year.sel(id=bv) # +1 to include end - ds_period = ds_year.sel(time=np.isin( - ds_year.time.dt.dayofyear, range(dd[0], dd[1] + 1))) + ds_period = ds_year.sel( + time=np.isin(ds_year.time.dt.dayofyear, range(dd[0], dd[1] + 1)) + ) d = ds_period.value.values - timestep = float( - ds_year.time.dt.dayofyear.timestep.values.tolist()) - nb_expected = (dd[1]+1-dd[0]) / timestep + timestep = float(ds_year.time.dt.dayofyear.timestep.values.tolist()) + nb_expected = (dd[1] + 1 - dd[0]) / timestep # nb_expected is used to account for missing and nan - if np.count_nonzero( - ~np.isnan(d)) / nb_expected > (1 - tolerence): + if np.count_nonzero(~np.isnan(d)) / nb_expected > (1 - tolerence): max[y, b] = np.nanmax(d) # .tolist() else: max[y, b] = np.nan max_ds = xr.Dataset() - max_ds.coords['year'] = xr.DataArray(years, dims=('year',)) - max_ds.coords['id'] = xr.DataArray(bvs, dims=('id',)) - max_ds.coords['start_date'] = xr.DataArray( - beg, dims=('year', 'id')) - max_ds.coords['end_date'] = xr.DataArray( - end, dims=('year', 'id')) - max_ds['value'] = xr.DataArray( - max.astype(float), dims=('year', 'id')) + max_ds.coords["year"] = xr.DataArray(years, dims=("year",)) + max_ds.coords["id"] = xr.DataArray(bvs, dims=("id",)) + max_ds.coords["start_date"] = xr.DataArray(beg, dims=("year", "id")) + max_ds.coords["end_date"] = xr.DataArray(end, dims=("year", "id")) + max_ds["value"] = xr.DataArray(max.astype(float), dims=("year", "id")) # For each bv # For each year # check for tolerence @@ -434,36 +422,43 @@ def max_over_one_season(grouped_ds, tolerence, season): else: # TODO add year from grouped_ds.data.dt.year # and make full str start_date and end_date - grouped_ds.data.coords['start_date'] = pd.to_datetime( - str(season_vals[0]), format='%j').strftime("%m-%d") - grouped_ds.data.coords['end_date'] = pd.to_datetime( - str(season_vals[1]), format='%j').strftime("%m-%d") - - return grouped_ds.custom_group_by( - season_vals[0], season_vals[1]).max().\ - where(grouped_ds.get_bool_over_tolerence(tolerence, season), - drop=True) + grouped_ds.data.coords["start_date"] = pd.to_datetime( + str(season_vals[0]), format="%j" + ).strftime("%m-%d") + grouped_ds.data.coords["end_date"] = pd.to_datetime( + str(season_vals[1]), format="%j" + ).strftime("%m-%d") + + return ( + grouped_ds.custom_group_by(season_vals[0], season_vals[1]) + .max() + .where(grouped_ds.get_bool_over_tolerence(tolerence, season), drop=True) + ) if seasons: # Creating a new dimension of season and # merging all Dataset from max_over_one_season return xr.concat( - [max_over_one_season(grouped_ds, tolerence, season) - .assign_coords(season=season) - .expand_dims('season') - for season in seasons], dim='season' - ).value + [ + max_over_one_season(grouped_ds, tolerence, season) + .assign_coords(season=season) + .expand_dims("season") + for season in seasons + ], + dim="season", + ).value else: # TODO Tolerence not used if no period is defined - return grouped_ds.data.groupby('time.year').\ - max().value.assign_coords(season='Whole year') \ - .expand_dims('season') + return ( + grouped_ds.data.groupby("time.year") + .max() + .value.assign_coords(season="Whole year") + .expand_dims("season") + ) -def calculate_volume(self, - dates: Union[list, xr.Dataset] = None, - tolerence=0.15): +def calculate_volume(self, dates: Union[list, xr.Dataset] = None, tolerence=0.15): ds = self.copy() def conversion_factor_to_hm3(timestep): @@ -478,79 +473,95 @@ def conversion_factor_to_hm3(timestep): # TODO bool over tolerence takes season, generalise with warnings.catch_warnings(): # Removes overlaping warning warnings.simplefilter("ignore") - self.season = ['Volumes', dates[0], dates[1]] - grouped_ds = ds.custom_group_by(dates[0], dates[1])\ - .sum().where( - ds.get_bool_over_tolerence(tolerence, 'Volumes'), drop=True) - self.rm_season('Volumes') + self.season = ["Volumes", dates[0], dates[1]] + grouped_ds = ( + ds.custom_group_by(dates[0], dates[1]) + .sum() + .where(ds.get_bool_over_tolerence(tolerence, "Volumes"), drop=True) + ) + self.rm_season("Volumes") # Transform tp hm³ # TODO add start and end and clear other attributes - grouped_ds = grouped_ds * xr.apply_ufunc( - conversion_factor_to_hm3, grouped_ds['timestep'], - input_core_dims=[[]], vectorize=True) \ - * (dates[1] - dates[0]) \ + grouped_ds = ( + grouped_ds + * xr.apply_ufunc( + conversion_factor_to_hm3, + grouped_ds["timestep"], + input_core_dims=[[]], + vectorize=True, + ) + * (dates[1] - dates[0]) / 1000000 + ) df = grouped_ds.year.to_dataframe() - df['beg'] = dates[0] - df['end'] = dates[1] - - grouped_ds['start_date'] = pd.to_datetime( - df['year'] * 1000 + df['beg'], format='%Y%j') - grouped_ds['end_date'] = pd.to_datetime( - df['year'] * 1000 + df['end'], format='%Y%j') - - grouped_ds['units'] = 'hm³' - - return grouped_ds.drop_vars(['_last_update_timestamp', - 'aggregation', - 'data_type', - 'data_type', - 'drainage_area', - 'latitude', - 'longitude', - 'name', - 'source', - 'timestep', - 'province', - 'regulated'])\ - .rename_vars({'value': 'volume'}) + df["beg"] = dates[0] + df["end"] = dates[1] + + grouped_ds["start_date"] = pd.to_datetime( + df["year"] * 1000 + df["beg"], format="%Y%j" + ) + grouped_ds["end_date"] = pd.to_datetime( + df["year"] * 1000 + df["end"], format="%Y%j" + ) + + grouped_ds["units"] = "hm³" + + return grouped_ds.drop_vars( + [ + "_last_update_timestamp", + "aggregation", + "data_type", + "data_type", + "drainage_area", + "latitude", + "longitude", + "name", + "source", + "timestep", + "province", + "regulated", + ] + ).rename_vars({"value": "volume"}) elif isinstance(dates, xr.Dataset): # TODO Make sure DS has same dimensions than target - vol = np.empty((len(np.unique( - ds.data.time.dt.year)), len(ds.data.id)), dtype=object) - beg = np.empty((len(np.unique( - ds.data.time.dt.year)), len(ds.data.id)), dtype=object) - end = np.empty((len(np.unique( - ds.data.time.dt.year)), len(ds.data.id)), dtype=object) + vol = np.empty( + (len(np.unique(ds.data.time.dt.year)), len(ds.data.id)), dtype=object + ) + beg = np.empty( + (len(np.unique(ds.data.time.dt.year)), len(ds.data.id)), dtype=object + ) + end = np.empty( + (len(np.unique(ds.data.time.dt.year)), len(ds.data.id)), dtype=object + ) for y, year in enumerate(np.unique(ds.data.time.dt.year)): for b, bv in enumerate(ds.data.id): dd = dates.sel(year=year, id=bv).value.to_numpy().tolist() - beg[y, b] = pd.to_datetime(str(year)+str(dd[0]), format='%Y%j') - end[y, b] = pd.to_datetime(str(year)+str(dd[1]), format='%Y%j') - ds_year = ds.data.where( - ds.data.time.dt.year == year, drop=True) + beg[y, b] = pd.to_datetime(str(year) + str(dd[0]), format="%Y%j") + end[y, b] = pd.to_datetime(str(year) + str(dd[1]), format="%Y%j") + ds_year = ds.data.where(ds.data.time.dt.year == year, drop=True) ds_year = ds_year.sel(id=bv) # +1 pou inclure la fin, # TODO si une seule journe dans ds_period, ¸a donne 0 # TODO check for tolerence - ds_period = ds_year.sel(time=np.isin( - ds_year.time.dt.dayofyear, - range(dd[0], dd[1] + 1))) + ds_period = ds_year.sel( + time=np.isin(ds_year.time.dt.dayofyear, range(dd[0], dd[1] + 1)) + ) # delta en ns, à rapporter en s (1000000000) # puis le tout en hm³ (1000000) - delta = ds_period.time[-1]-ds_period.time[0] + delta = ds_period.time[-1] - ds_period.time[0] delta = delta.to_numpy().tolist() / (1000000000 * 1000000) vol[y, b] = sum(ds_period.value.values).tolist() * delta vol_ds = xr.Dataset() - vol_ds.coords['year'] = xr.DataArray(np.unique( - ds.data.time.dt.year), dims=('year',)) - vol_ds.coords['id'] = xr.DataArray(ds.data.id.to_numpy(), dims=('id',)) - vol_ds.coords['units'] = 'hm³' - vol_ds.coords['start_date'] = xr.DataArray(beg, dims=('year', 'id')) - vol_ds.coords['end_date'] = xr.DataArray(end, dims=('year', 'id')) - vol_ds['volume'] = xr.DataArray(vol, dims=('year', 'id')) + vol_ds.coords["year"] = xr.DataArray( + np.unique(ds.data.time.dt.year), dims=("year",) + ) + vol_ds.coords["id"] = xr.DataArray(ds.data.id.to_numpy(), dims=("id",)) + vol_ds.coords["units"] = "hm³" + vol_ds.coords["start_date"] = xr.DataArray(beg, dims=("year", "id")) + vol_ds.coords["end_date"] = xr.DataArray(end, dims=("year", "id")) + vol_ds["volume"] = xr.DataArray(vol, dims=("year", "id")) return vol_ds From 62c0869ef0887a3b43dc01d044e59801c4da21d0 Mon Sep 17 00:00:00 2001 From: TC-FF Date: Tue, 20 Jun 2023 09:26:50 -0400 Subject: [PATCH 13/24] added class local --- xhydro/frequency_analysis/local.py | 333 +++++++++++++++++++++++++++++ 1 file changed, 333 insertions(+) diff --git a/xhydro/frequency_analysis/local.py b/xhydro/frequency_analysis/local.py index 9334015a..d6b7529f 100644 --- a/xhydro/frequency_analysis/local.py +++ b/xhydro/frequency_analysis/local.py @@ -5,6 +5,10 @@ from typing import Union import pandas as pd import fnmatch +import scipy.stats + +from statsmodels.tools import eval_measures +from xclim.indices.stats import fit, parametric_quantile class Data: @@ -565,3 +569,332 @@ def conversion_factor_to_hm3(timestep): vol_ds["volume"] = xr.DataArray(vol, dims=("year", "id")) return vol_ds + + +class Local: + def __init__( + self, + data_ds, + return_period, + dates_vol=None, + dist_list=[ + "expon", + "gamma", + "genextreme", + "genpareto", + "gumbel_r", + "pearson3", + "weibull_min", + ], + tolerence=0.15, + seasons=None, + min_year=15, + vars_of_interest=["max", "vol"], + calculated=False, + ): + # TODO if type of data is object instead of float, it will crash, better to convert or to raise a warning ? + # data_ds.data = data_ds.astype(float) + self.data = data_ds.astype(float) + self.return_period = return_period + self.dist_list = dist_list + self.dates_vol = dates_vol + self.tolerence = tolerence + self.seasons = seasons + self.min_year = min_year + self.analyse_max = None + self.analyse_vol = None + + if "max" in vars_of_interest: + self.analyse_max = self._freq_analys(calculated, var_of_interest="max") + if "vol" in vars_of_interest: + self.analyse_vol = self._freq_analys(calculated, var_of_interest="vol") + + def _freq_analys(self, calculated: bool, var_of_interest: str): + """ + This function is executed upon initialization of calss Local. It performs multiple frequency analysis over the data provided. + + Parameters + ---------- + self.data : xhydro.Data + a dataset containing hydrological data + + self.return_period : list + list of return periods as float + + self.dist_list : list + list of distribution supported by scypy.stat + + self.tolerence : float + percentage of missing value tolerence in decimal form (0.15 for 15%), if above within the season, the maximum for that year will be skipped + + self.seasons : list + list of seasons names, begining and end of said seasons must have been set previously in the object xhydro.Data + + self.min_year : int + Minimum number of year. If a station has less year than the minimum, for any given season, the station will be skipped + + Returns + ------- + self.analyse : xr.Dataset + A Dataset with dimensions id, season, scipy_dist and return_period indexes with variables Criterions and Quantiles + + Examples + -------- + >>> TODO Not sure how to set example here + + + """ + + def get_criterions(data, params, dist): + data = data[~np.isnan(data)] + # params = params[~np.isnan(params)] + + LLH = dist.logpdf(data, *params).sum() # log-likelihood + + aic = eval_measures.aic(llf=LLH, nobs=len(data), df_modelwc=len(params)) + bic = eval_measures.bic(llf=LLH, nobs=len(data), df_modelwc=len(params)) + try: + aicc = eval_measures.aicc( + llf=LLH, nobs=len(data), df_modelwc=len(params) + ) + except: + aicc = np.nan + + # logLik = np.sum( stats.gamma.logpdf(data, fitted_params[0], loc=fitted_params[1], scale=fitted_params[2]) ) + # k = len(fitted_params) + # aic = 2*k - 2*(logLik) + return {"aic": aic, "bic": bic, "aicc": aicc} + + def fit_one_distrib(ds_max, dist): + return ( + fit(ds_max.chunk(dict(time=-1)), dist=dist) + .assign_coords(scipy_dist=dist) + .expand_dims("scipy_dist") + ) # .rename('value') + + if calculated: + ds_calc = self.data.rename({"year": "time"}).load() + else: + if var_of_interest == "max": + ds_calc = ( + self.data._get_max(self.tolerence, self.seasons) + .rename({"year": "time"}) + .load() + ) + elif var_of_interest == "vol": + ds_calc = ( + self.data.calculate_volume( + tolerence=self.tolerence, dates=self.dates_vol + ) + .rename({"year": "time", "volume": "value"}) + .astype(float) + .load() + ) + ds_calc = ds_calc.value + ds_calc = ds_calc.dropna(dim="id", thresh=self.min_year) + + quantiles = [] + criterions = [] + parameters = [] + for dist in self.dist_list: + # FIXME .load() causes issues, but it was added to fix something + + params = fit_one_distrib(ds_calc, dist).load() + parameters.append(params) + # quantiles.append(xr.merge([parametric_quantile(params, q=1 - 1.0 / T).rename('value') for T in self.return_period])) + quantiles.append( + xr.merge( + [ + parametric_quantile(params, q=1 - 1.0 / T) + for T in self.return_period + ] + ) + ) + dist_obj = getattr(scipy.stats, dist) + # criterions.append(xr.apply_ufunc(get_criterions, ds_calc, params, dist_obj, input_core_dims=[['time'], ['dparams'], []], vectorize=True).to_dataset(name='Criterions')) + + crit = xr.apply_ufunc( + get_criterions, + ds_calc, + params, + dist_obj, + input_core_dims=[["time"], ["dparams"], []], + vectorize=True, + ) + # If crit is a DataArray, the variable we name it value, if it's a DataSet, it will already have variables names + if isinstance(crit, xr.DataArray): + crit.name = "value" + criterions.append(crit) + + def append_var_names(ds, str): + try: + var_list = list(ds.keys()) + except: + new_name = ds.name + str + return ds.rename(new_name) + dict_names = dict(zip(var_list, [s + str for s in var_list])) + return ds.rename(dict_names) + + # ds_paramters = xr.Dataset() + ds_paramters = xr.concat( + parameters, dim="scipy_dist", combine_attrs="drop_conflicts" + ) + ds_paramters = append_var_names(ds_paramters, "_parameters") + + ds_quantiles = xr.merge(quantiles) + ds_quantiles = append_var_names(ds_quantiles, "_quantiles") + + ds_criterions = xr.merge(criterions) + ds_criterions = append_var_names(ds_criterions, "_criterions") + + ds_quantiles = ds_quantiles.rename({"quantile": "return_period"}) + ds_quantiles["return_period"] = 1.0 / (1 - ds_quantiles.return_period) + + return xr.merge([ds_criterions, ds_quantiles, ds_calc, ds_paramters]) + + def view_criterions(self, var_of_interest): + """ + Fonction to get Criterions results from a xhydro.Local object. Output is rounded for easiser visualisation. + + Returns + ------- + df : pd.Dataframe + Dataframe organised with id, season, year, scipy_dist + + Examples + -------- + >>> import xarray as xr + >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) + >>> donnees = Data(ds) + >>> donnees.get_maximum(tolerence=0.15, seasons=['Spring']) + >>> catchment_list = ['023301'] + >>> sub_set = donnees.select_catchments(catchment_list = catchment_list) + >>> sub_set.season = ['Spring', 60, 182] + >>> return_period = np.array([1.01, 2, 2.33, 5, 10, 20, 50, 100, 200, 500, 1000, 10000]) + >>> dist_list = ['expon', 'gamma', 'genextreme', 'genpareto', 'gumbel_r', 'pearson3', 'weibull_min'] + >>> fa = xh.Local(data_ds = sub_set, return_period = return_period, dist_list = dist_list, tolerence = 0.15, seasons = ['Automne', 'Printemps'], min_year = 15) + >>> fa.view_quantiles() + >>> id season level_2 return_period Quantiles + 0 023301 Spring expon 1.01 22.157376 + 1 023301 Spring expon 2.00 87.891419 + 2 023301 Spring expon 2.33 102.585536 + 3 023301 Spring expon 5.00 176.052678 + 4 023301 Spring expon 10.00 242.744095 + ... + """ + # dataarray to_dataframe uses first diemnsion as nameless index, so depending on the position in dim_order, dimension gets names level_x + var_list = [s for s in self.analyse_max.keys() if "criterions" in s] + + if var_of_interest == "vol": + return ( + self.analyse_vol[var_list] + .to_dataframe(dim_order=["id", "scipy_dist"])[var_list] + .reset_index() + .rename(columns={"level_1": "scipy_dist"}) + .round() + ) + elif var_of_interest == "max": + return ( + self.analyse_max[var_list] + .to_dataframe(dim_order=["id", "season", "scipy_dist"])[var_list] + .reset_index() + .rename(columns={"level_2": "scipy_dist"}) + .round() + ) + else: + return print('use "vol" for volumes or "max" for maximums ') + + def view_quantiles(self, var_of_interest): + """ + Fonction to get Quantiles results from a xhydro.Local object. + + Returns + ------- + df : pd.Dataframe + Dataframe organised with id, season, year, scipy_dist, return_period + + Examples + -------- + >>> import xarray as xr + >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) + >>> donnees = Data(ds) + >>> donnees.get_maximum(tolerence=0.15, seasons=['Spring']) + >>> catchment_list = ['023301'] + >>> sub_set = donnees.select_catchments(catchment_list = catchment_list) + >>> sub_set.season = ['Spring', 60, 182] + >>> return_period = np.array([1.01, 2, 2.33, 5, 10, 20, 50, 100, 200, 500, 1000, 10000]) + >>> dist_list = ['expon', 'gamma', 'genextreme', 'genpareto', 'gumbel_r', 'pearson3', 'weibull_min'] + >>> fa = xh.Local(data_ds = sub_set, return_period = return_period, dist_list = dist_list, tolerence = 0.15, seasons = ['Automne', 'Printemps'], min_year = 15) + >>> fa.view_criterions() + >>> id season level_2 Criterions + >>> 0 023301 Spring expon {'aic': 582.9252842821857, 'bic': 586.82777171... + >>> 1 023301 Spring gamma {'aic': 1739.77441499742, 'bic': 1745.62814615... + ... + """ + + var_list = [s for s in self.analyse_max.keys() if "quantiles" in s] + if var_of_interest == "vol": + return ( + self.analyse_vol[var_list] + .to_dataframe(dim_order=["id", "scipy_dist", "return_period"])[var_list] + .reset_index() + .rename(columns={"level_1": "scipy_dist"}) + .round() + ) + elif var_of_interest == "max": + return ( + self.analyse_max[var_list] + .to_dataframe( + dim_order=["id", "season", "scipy_dist", "return_period"] + )[var_list] + .reset_index() + .rename(columns={"level_2": "scipy_dist"}) + .round() + ) + else: + return print('use "vol" for volumes or "max" for maximums ') + + def view_values(self, var_of_interest): + """ + Fonction to get values results from a xhydro.Local object. + + Returns + ------- + df : pd.Dataframe + Dataframe organised with id, season, year, scipy_dist, return_period + + Examples + -------- + >>> import xarray as xr + >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) + >>> donnees = Data(ds) + >>> donnees.get_maximum(tolerence=0.15, seasons=['Spring']) + >>> catchment_list = ['023301'] + >>> sub_set = donnees.select_catchments(catchment_list = catchment_list) + >>> sub_set.season = ['Spring', 60, 182] + >>> return_period = np.array([1.01, 2, 2.33, 5, 10, 20, 50, 100, 200, 500, 1000, 10000]) + >>> dist_list = ['expon', 'gamma', 'genextreme', 'genpareto', 'gumbel_r', 'pearson3', 'weibull_min'] + >>> fa = xh.Local(data_ds = sub_set, return_period = return_period, dist_list = dist_list, tolerence = 0.15, seasons = ['Automne', 'Printemps'], min_year = 15) + >>> fa.view_criterions() + >>> id season level_2 Criterions + >>> 0 023301 Spring expon {'aic': 582.9252842821857, 'bic': 586.82777171... + >>> 1 023301 Spring gamma {'aic': 1739.77441499742, 'bic': 1745.62814615... + ... + """ + # TODO Output as dict is ugly + + if var_of_interest == "vol": + return self.analyse_vol.value.to_dataframe().dropna().reset_index() + elif var_of_interest == "max": + return ( + self.analyse_max.value.to_dataframe(name="Maximums") + .reset_index()[ + ["id", "season", "time", "start_date", "end_date", "Maximums"] + ] + .dropna() + ) + else: + return print('use "vol" for volumes or "max" for maximums ') From 60c55d194f80f067bc45760b54504136c2d7dbc8 Mon Sep 17 00:00:00 2001 From: TC-FF Date: Fri, 23 Jun 2023 11:52:42 -0400 Subject: [PATCH 14/24] Added utils functions to go with local and data classes --- xhydro/utils.py | 52 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 xhydro/utils.py diff --git a/xhydro/utils.py b/xhydro/utils.py new file mode 100644 index 00000000..bdad1a75 --- /dev/null +++ b/xhydro/utils.py @@ -0,0 +1,52 @@ +import datetime + + +def get_julian_day(month, day, year=None): + """ + Return julian day for a specified date, if year is not specified, uses curent year + + Parameters + ---------- + month : int + integer of the target month + + day : int + integer of the target day + + year : int + integer of the target year + + Returns + ------- + int + julian day (1 - 366) + + Examples + -------- + >>> import xarray as xr + >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) + >>> donnees = Data(ds) + >>> jj = donnees.get_julian_day(month = 9, day = 1) + >>> jj: 244 + >>> jj = donnees.get_julian_day(month = 9, day = 1, year = 2000) + >>> jj: 245 + """ + if year is None: + year = datetime.date.today().year + + return datetime.datetime(year, month, day).timetuple().tm_yday + + +def get_timestep(array_in): + if len(array_in) < 2: + # Returns a timestep of one for a one timestep array + return 1 + timestep = ((array_in[-1] - array_in[0]) / (array_in.size - 1)).values.astype( + "timedelta64[m]" + ) + if timestep >= 60 and timestep < 24 * 60: + timestep = timestep.astype("timedelta64[h]") + elif timestep >= 24 * 60: + timestep = timestep.astype("timedelta64[D]") + return timestep From 40e74ad639aa3d8333ab4d3d6bb7b162f1441744 Mon Sep 17 00:00:00 2001 From: TC-FF Date: Fri, 23 Jun 2023 11:53:12 -0400 Subject: [PATCH 15/24] updated init to run first Notebook --- xhydro/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xhydro/__init__.py b/xhydro/__init__.py index f97393f5..7ebf9f12 100644 --- a/xhydro/__init__.py +++ b/xhydro/__init__.py @@ -3,3 +3,6 @@ __author__ = """Thomas-Charles Fortier Filion""" __email__ = "tcff_hydro@outlook.com" __version__ = "0.1.5" + +from .frequency_analysis.local import * +from .utils import get_julian_day, get_timestep From 02b94bba36d422a167a5b11ed70b3f5be09c0a72 Mon Sep 17 00:00:00 2001 From: TC-FF Date: Fri, 23 Jun 2023 11:53:54 -0400 Subject: [PATCH 16/24] made changes to adrees new dataset dimensions --- xhydro/frequency_analysis/local.py | 545 +++++++++++++++-------------- 1 file changed, 288 insertions(+), 257 deletions(-) diff --git a/xhydro/frequency_analysis/local.py b/xhydro/frequency_analysis/local.py index d6b7529f..ed68d760 100644 --- a/xhydro/frequency_analysis/local.py +++ b/xhydro/frequency_analysis/local.py @@ -6,6 +6,7 @@ import pandas as pd import fnmatch import scipy.stats +import xhydro as xh from statsmodels.tools import eval_measures from xclim.indices.stats import fit, parametric_quantile @@ -84,7 +85,7 @@ def multi_filter(names, patterns: list): ] # Getting the full list - catchment_list = multi_filter(obj.catchments, catchment_list) + catchment_list = multi_filter(obj._catchments, catchment_list) # Setting the list as a class attribute obj._catchments = catchment_list @@ -299,279 +300,304 @@ def get_bool_over_tolerence(self, tol: float, season=None): grouped_ds = ds.custom_group_by(season_vals[0], season_vals[1]).count() tolerence = season_size * (1 - tol) - return (grouped_ds.value > tolerence).load() - - -def get_maximum(self, tolerence: float = None, seasons=None): - """ - Fonction to tiddy _get_max results. - - Parameters - ---------- - tolerence : Float - Tolerance in decimal form (0.15 for 15%) - - seasons : List - List of season's name to be checked - - Returns - ------- - df : pd.Dataframe - Dataframe organised with id, season, year, Maximums - - Examples - -------- - >>> import xarray as xr - >>> cehq_data_path = - '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' - >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) - >>> donnees = Data(ds) - >>> donnees.get_maximum(tolerence=0.15, seasons=['Spring']) - >>> catchment_list = ['023301'] - >>> sub_set = donnees.select_catchments(catchment_list = catchment_list) - >>> sub_set.season = ['Spring', 60, 182] - >>> sub_set.get_maximum(tolerence=0.15, seasons=['Spring']) - >>> id season year Maximums - 0 023301 Spring 1928 231.000000 - 1 023301 Spring 1929 241.000000 - 2 023301 Spring 1930 317.000000 - ... - """ - return ( - self._get_max(tolerence=tolerence, seasons=seasons) - .to_dataframe(name="Maximums") - .reset_index()[["id", "season", "year", "start_date", "end_date", "Maximums"]] - .dropna() - ) - - -def _get_max(self, tolerence=None, seasons=[]): - """ - Fonction to get maximum value per season, according to a tolerence. - - Parameters - ---------- - tolerence : Float - Tolerance in decimal form (0.15 for 15%) - - seasons : List - List of season's name to be checked - - Returns - ------- - da : xr.DataArray - DataArray of maximums - - Examples - -------- - >>> import xarray as xr - >>> cehq_data_path = - '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' - >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) - >>> donnees = Data(ds) - >>> donnees.get_maximum(tolerence=0.15, seasons=['Spring']) - >>> catchment_list = ['023301'] - >>> sub_set = donnees.select_catchments(catchment_list = catchment_list) - >>> sub_set.season = ['Spring', 60, 182] - >>> sub_set._get_max(tolerence=0.15, seasons=['Spring']) - xarray.DataArray 'value' (season: 1, year: 52, id: 1) - """ - grouped_ds = self.copy() - - def max_over_one_season(grouped_ds, tolerence, season): - season_vals = grouped_ds._get_season_values(season) - if isinstance(season_vals, xr.Dataset): - years = np.unique(season_vals.year) - bvs = np.unique(season_vals.id) - max = np.empty((len(years), len(bvs)), dtype=object) - beg = np.empty((len(years), len(bvs)), dtype=object) - end = np.empty((len(years), len(bvs)), dtype=object) - for y, year in enumerate(years): - for b, bv in enumerate(bvs): - dd = season_vals.sel(year=year, id=bv).value.to_numpy().tolist() - beg[y, b] = pd.to_datetime(str(year) + str(dd[0]), format="%Y%j") - end[y, b] = pd.to_datetime(str(year) + str(dd[1]), format="%Y%j") - ds_year = grouped_ds.data.where( - grouped_ds.data.time.dt.year == year, drop=True - ) - ds_year = ds_year.sel(id=bv) + return (grouped_ds[list(grouped_ds.keys())[0]] > tolerence).load() - # +1 to include end - ds_period = ds_year.sel( - time=np.isin(ds_year.time.dt.dayofyear, range(dd[0], dd[1] + 1)) - ) + def get_maximum(self, tolerence: float = None, seasons=None): + """ + Fonction to tiddy _get_max results. - d = ds_period.value.values - timestep = float(ds_year.time.dt.dayofyear.timestep.values.tolist()) - nb_expected = (dd[1] + 1 - dd[0]) / timestep - # nb_expected is used to account for missing and nan - if np.count_nonzero(~np.isnan(d)) / nb_expected > (1 - tolerence): - max[y, b] = np.nanmax(d) # .tolist() - else: - max[y, b] = np.nan - - max_ds = xr.Dataset() - - max_ds.coords["year"] = xr.DataArray(years, dims=("year",)) - max_ds.coords["id"] = xr.DataArray(bvs, dims=("id",)) - max_ds.coords["start_date"] = xr.DataArray(beg, dims=("year", "id")) - max_ds.coords["end_date"] = xr.DataArray(end, dims=("year", "id")) - max_ds["value"] = xr.DataArray(max.astype(float), dims=("year", "id")) - # For each bv - # For each year - # check for tolerence - # get max - # create a DS - return max_ds - else: - # TODO add year from grouped_ds.data.dt.year - # and make full str start_date and end_date - grouped_ds.data.coords["start_date"] = pd.to_datetime( - str(season_vals[0]), format="%j" - ).strftime("%m-%d") - grouped_ds.data.coords["end_date"] = pd.to_datetime( - str(season_vals[1]), format="%j" - ).strftime("%m-%d") + Parameters + ---------- + tolerence : Float + Tolerance in decimal form (0.15 for 15%) - return ( - grouped_ds.custom_group_by(season_vals[0], season_vals[1]) - .max() - .where(grouped_ds.get_bool_over_tolerence(tolerence, season), drop=True) - ) + seasons : List + List of season's name to be checked - if seasons: - # Creating a new dimension of season and - # merging all Dataset from max_over_one_season - return xr.concat( - [ - max_over_one_season(grouped_ds, tolerence, season) - .assign_coords(season=season) - .expand_dims("season") - for season in seasons - ], - dim="season", - ).value + Returns + ------- + df : pd.Dataframe + Dataframe organised with id, season, year, Maximums - else: - # TODO Tolerence not used if no period is defined + Examples + -------- + >>> import xarray as xr + >>> cehq_data_path = + '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) + >>> donnees = Data(ds) + >>> donnees.get_maximum(tolerence=0.15, seasons=['Spring']) + >>> catchment_list = ['023301'] + >>> sub_set = donnees.select_catchments(catchment_list = catchment_list) + >>> sub_set.season = ['Spring', 60, 182] + >>> sub_set.get_maximum(tolerence=0.15, seasons=['Spring']) + >>> id season year Maximums + 0 023301 Spring 1928 231.000000 + 1 023301 Spring 1929 241.000000 + 2 023301 Spring 1930 317.000000 + ... + """ return ( - grouped_ds.data.groupby("time.year") - .max() - .value.assign_coords(season="Whole year") - .expand_dims("season") + self._get_max(tolerence=tolerence, seasons=seasons) + .to_dataframe() + .reset_index()[ + [ + "id", + "season", + "year", + "start_date", + "end_date", + list(self.data.keys())[0], + ] + ] + .dropna() ) + def _get_max(self, tolerence=None, seasons=[]): + """ + Fonction to get maximum value per season, according to a tolerence. -def calculate_volume(self, dates: Union[list, xr.Dataset] = None, tolerence=0.15): - ds = self.copy() - - def conversion_factor_to_hm3(timestep): - # timestep in days - # TODO check if last date is included - return float(timestep) * 60 * 60 * 24 - - if dates is None: - # TODO use season dates - pass - elif isinstance(dates, list): - # TODO bool over tolerence takes season, generalise - with warnings.catch_warnings(): # Removes overlaping warning - warnings.simplefilter("ignore") - self.season = ["Volumes", dates[0], dates[1]] - grouped_ds = ( - ds.custom_group_by(dates[0], dates[1]) - .sum() - .where(ds.get_bool_over_tolerence(tolerence, "Volumes"), drop=True) - ) - self.rm_season("Volumes") - # Transform tp hm³ - # TODO add start and end and clear other attributes - grouped_ds = ( - grouped_ds - * xr.apply_ufunc( - conversion_factor_to_hm3, - grouped_ds["timestep"], - input_core_dims=[[]], - vectorize=True, + Parameters + ---------- + tolerence : Float + Tolerance in decimal form (0.15 for 15%) + + seasons : List + List of season's name to be checked + + Returns + ------- + da : xr.DataArray + DataArray of maximums + + Examples + -------- + >>> import xarray as xr + >>> cehq_data_path = + '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) + >>> donnees = Data(ds) + >>> donnees.get_maximum(tolerence=0.15, seasons=['Spring']) + >>> catchment_list = ['023301'] + >>> sub_set = donnees.select_catchments(catchment_list = catchment_list) + >>> sub_set.season = ['Spring', 60, 182] + >>> sub_set._get_max(tolerence=0.15, seasons=['Spring']) + xarray.DataArray 'value' (season: 1, year: 52, id: 1) + """ + grouped_ds = self.copy() + + def max_over_one_season(grouped_ds, tolerence, season): + season_vals = grouped_ds._get_season_values(season) + if isinstance(season_vals, xr.Dataset): + years = np.unique(season_vals.year) + bvs = np.unique(season_vals.id) + max = np.empty((len(years), len(bvs)), dtype=object) + beg = np.empty((len(years), len(bvs)), dtype=object) + end = np.empty((len(years), len(bvs)), dtype=object) + for y, year in enumerate(years): + for b, bv in enumerate(bvs): + dd = season_vals.sel(year=year, id=bv).value.to_numpy().tolist() + beg[y, b] = pd.to_datetime( + str(year) + str(dd[0]), format="%Y%j" + ) + end[y, b] = pd.to_datetime( + str(year) + str(dd[1]), format="%Y%j" + ) + ds_year = grouped_ds.data.where( + grouped_ds.data.time.dt.year == year, drop=True + ) + ds_year = ds_year.sel(id=bv) + + # +1 to include end + ds_period = ds_year.sel( + time=np.isin( + ds_year.time.dt.dayofyear, range(dd[0], dd[1] + 1) + ) + ) + + d = ds_period[list(ds_period.keys())[0]].values + timestep = xh.get_timestep(ds_year) + # timestep = float( + # ds_year.time.dt.dayofyear.timestep.values.tolist() + # ) + nb_expected = (dd[1] + 1 - dd[0]) / timestep + # nb_expected is used to account for missing and nan + if np.count_nonzero(~np.isnan(d)) / nb_expected > ( + 1 - tolerence + ): + max[y, b] = np.nanmax(d) # .tolist() + else: + max[y, b] = np.nan + + max_ds = xr.Dataset() + + max_ds.coords["year"] = xr.DataArray(years, dims=("year",)) + max_ds.coords["id"] = xr.DataArray(bvs, dims=("id",)) + max_ds.coords["start_date"] = xr.DataArray(beg, dims=("year", "id")) + max_ds.coords["end_date"] = xr.DataArray(end, dims=("year", "id")) + max_ds["value"] = xr.DataArray(max.astype(float), dims=("year", "id")) + # For each bv + # For each year + # check for tolerence + # get max + # create a DS + return max_ds + else: + # TODO add year from grouped_ds.data.dt.year + # and make full str start_date and end_date + grouped_ds.data.coords["start_date"] = pd.to_datetime( + str(season_vals[0]), format="%j" + ).strftime("%m-%d") + grouped_ds.data.coords["end_date"] = pd.to_datetime( + str(season_vals[1]), format="%j" + ).strftime("%m-%d") + + return ( + grouped_ds.custom_group_by(season_vals[0], season_vals[1]) + .max() + .where( + grouped_ds.get_bool_over_tolerence(tolerence, season), drop=True + ) + ) + + if seasons: + # Creating a new dimension of season and + # merging all Dataset from max_over_one_season + ds = xr.concat( + [ + max_over_one_season(grouped_ds, tolerence, season) + .assign_coords(season=season) + .expand_dims("season") + for season in seasons + ], + dim="season", ) - * (dates[1] - dates[0]) - / 1000000 - ) + return ds[list(ds.keys())[0]] - df = grouped_ds.year.to_dataframe() - df["beg"] = dates[0] - df["end"] = dates[1] + else: + # TODO Tolerence not used if no period is defined + return ( + grouped_ds.data.groupby("time.year") + .max() + .assign_coords(season="Whole year") + .expand_dims("season") + ) - grouped_ds["start_date"] = pd.to_datetime( - df["year"] * 1000 + df["beg"], format="%Y%j" - ) - grouped_ds["end_date"] = pd.to_datetime( - df["year"] * 1000 + df["end"], format="%Y%j" - ) + def calculate_volume(self, dates: Union[list, xr.Dataset] = None, tolerence=0.15): + ds = self.copy() - grouped_ds["units"] = "hm³" - - return grouped_ds.drop_vars( - [ - "_last_update_timestamp", - "aggregation", - "data_type", - "data_type", - "drainage_area", - "latitude", - "longitude", - "name", - "source", - "timestep", - "province", - "regulated", - ] - ).rename_vars({"value": "volume"}) - elif isinstance(dates, xr.Dataset): - # TODO Make sure DS has same dimensions than target - vol = np.empty( - (len(np.unique(ds.data.time.dt.year)), len(ds.data.id)), dtype=object - ) - beg = np.empty( - (len(np.unique(ds.data.time.dt.year)), len(ds.data.id)), dtype=object - ) - end = np.empty( - (len(np.unique(ds.data.time.dt.year)), len(ds.data.id)), dtype=object - ) - for y, year in enumerate(np.unique(ds.data.time.dt.year)): - for b, bv in enumerate(ds.data.id): - dd = dates.sel(year=year, id=bv).value.to_numpy().tolist() - beg[y, b] = pd.to_datetime(str(year) + str(dd[0]), format="%Y%j") - end[y, b] = pd.to_datetime(str(year) + str(dd[1]), format="%Y%j") - ds_year = ds.data.where(ds.data.time.dt.year == year, drop=True) - ds_year = ds_year.sel(id=bv) - # +1 pou inclure la fin, - # TODO si une seule journe dans ds_period, ¸a donne 0 - # TODO check for tolerence - ds_period = ds_year.sel( - time=np.isin(ds_year.time.dt.dayofyear, range(dd[0], dd[1] + 1)) + def conversion_factor_to_hm3(timestep): + # flow is in m³/s and we want m³, so we multiply by seconds + # TODO check if last date is included + return pd.to_timedelta(1, unit=timestep).total_seconds() + + if dates is None: + # TODO use season dates + pass + elif isinstance(dates, list): + # TODO bool over tolerence takes season, generalise + with warnings.catch_warnings(): # Removes overlaping warning + warnings.simplefilter("ignore") + self.season = ["Volumes", dates[0], dates[1]] + grouped_ds = ( + ds.custom_group_by(dates[0], dates[1]) + .sum() + .where(ds.get_bool_over_tolerence(tolerence, "Volumes"), drop=True) + ) + self.rm_season("Volumes") + # Transform tp hm³ + # TODO add start and end and clear other attributes + grouped_ds = ( + grouped_ds + * xr.apply_ufunc( + conversion_factor_to_hm3, + grouped_ds["timestep"], + input_core_dims=[[]], + vectorize=True, ) - # delta en ns, à rapporter en s (1000000000) - # puis le tout en hm³ (1000000) - delta = ds_period.time[-1] - ds_period.time[0] - delta = delta.to_numpy().tolist() / (1000000000 * 1000000) - vol[y, b] = sum(ds_period.value.values).tolist() * delta + * (dates[1] - dates[0]) + / 1000000 # from m³ to hm³ + ) - vol_ds = xr.Dataset() + df = grouped_ds.year.to_dataframe() + df["beg"] = dates[0] + df["end"] = dates[1] - vol_ds.coords["year"] = xr.DataArray( - np.unique(ds.data.time.dt.year), dims=("year",) - ) - vol_ds.coords["id"] = xr.DataArray(ds.data.id.to_numpy(), dims=("id",)) - vol_ds.coords["units"] = "hm³" - vol_ds.coords["start_date"] = xr.DataArray(beg, dims=("year", "id")) - vol_ds.coords["end_date"] = xr.DataArray(end, dims=("year", "id")) - vol_ds["volume"] = xr.DataArray(vol, dims=("year", "id")) + grouped_ds["start_date"] = pd.to_datetime( + df["year"] * 1000 + df["beg"], format="%Y%j" + ) + grouped_ds["end_date"] = pd.to_datetime( + df["year"] * 1000 + df["end"], format="%Y%j" + ) + grouped_ds["units"] = "hm³" + + return grouped_ds.drop_vars( + [ + "drainage_area", + "latitude", + "longitude", + "name", + "source", + "timestep", + "province", + "regulated", + ] + ).rename_vars({list(grouped_ds.keys())[0]: "volume"}) + elif isinstance(dates, xr.Dataset): + # TODO Make sure DS has same dimensions than target + vol = np.empty( + (len(np.unique(ds.data.time.dt.year)), len(ds.data.id)), dtype=object + ) + beg = np.empty( + (len(np.unique(ds.data.time.dt.year)), len(ds.data.id)), dtype=object + ) + end = np.empty( + (len(np.unique(ds.data.time.dt.year)), len(ds.data.id)), dtype=object + ) + for y, year in enumerate(np.unique(ds.data.time.dt.year)): + for b, bv in enumerate(ds.data.id): + try: + dd = dates.sel(year=year, id=bv).value.to_numpy().tolist() + except KeyError: + # KeyError can occur if ds is incomplete + pass + beg[y, b] = pd.to_datetime(str(year) + str(dd[0]), format="%Y%j") + end[y, b] = pd.to_datetime(str(year) + str(dd[1]), format="%Y%j") + ds_year = ds.data.where(ds.data.time.dt.year == year, drop=True) + ds_year = ds_year.sel(id=bv) + # +1 pou inclure la fin, + # TODO si une seule journe dans ds_period, ¸a donne 0 + # TODO check for tolerence + ds_period = ds_year.sel( + time=np.isin(ds_year.time.dt.dayofyear, range(dd[0], dd[1] + 1)) + ) + # delta en ns, à rapporter en s (1000000000) + # puis le tout en hm³ (1000000) + delta = ds_period.time[-1] - ds_period.time[0] + delta = delta.to_numpy().tolist() / (1000000000 * 1000000) + vol[y, b] = ( + sum( + ds_period.squeeze()[list(ds_period.keys())[0]].values + ).tolist() + * delta + ) + + vol_ds = xr.Dataset() - return vol_ds + vol_ds.coords["year"] = xr.DataArray( + np.unique(ds.data.time.dt.year), dims=("year",) + ) + vol_ds.coords["id"] = xr.DataArray(ds.data.id.to_numpy(), dims=("id",)) + vol_ds.coords["units"] = "hm³" + vol_ds.coords["start_date"] = xr.DataArray(beg, dims=("year", "id")) + vol_ds.coords["end_date"] = xr.DataArray(end, dims=("year", "id")) + vol_ds["volume"] = xr.DataArray(vol, dims=("year", "id")) + + return vol_ds class Local: + # TODO list(ds.keys())[0] used multiples time, genewralise ofr all var, not just [0] and do it a better way, ie, in the init def __init__( self, data_ds, @@ -593,8 +619,13 @@ def __init__( calculated=False, ): # TODO if type of data is object instead of float, it will crash, better to convert or to raise a warning ? - # data_ds.data = data_ds.astype(float) - self.data = data_ds.astype(float) + try: + # if data is provided + self.data = data_ds.astype(float) + except AttributeError: + # if not + self.data = data_ds.data.astype(float) + self.data = data_ds self.return_period = return_period self.dist_list = dist_list self.dates_vol = dates_vol @@ -890,7 +921,7 @@ def view_values(self, var_of_interest): return self.analyse_vol.value.to_dataframe().dropna().reset_index() elif var_of_interest == "max": return ( - self.analyse_max.value.to_dataframe(name="Maximums") + self.analyse_max.value.to_dataframe() .reset_index()[ ["id", "season", "time", "start_date", "end_date", "Maximums"] ] From db600c5edef9ae817b3cc3078b96bf1dc86d70be Mon Sep 17 00:00:00 2001 From: TC-FF Date: Fri, 23 Jun 2023 12:12:16 -0400 Subject: [PATCH 17/24] NB's first comit. Code still need upgrades for end --- docs/notebooks/local_frequency_analysis.ipynb | 1221 +++++++++++++++++ xhydro/frequency_analysis/local.py | 38 +- 2 files changed, 1241 insertions(+), 18 deletions(-) create mode 100644 docs/notebooks/local_frequency_analysis.ipynb diff --git a/docs/notebooks/local_frequency_analysis.ipynb b/docs/notebooks/local_frequency_analysis.ipynb new file mode 100644 index 00000000..0fd5e7f2 --- /dev/null +++ b/docs/notebooks/local_frequency_analysis.ipynb @@ -0,0 +1,1221 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The autoreload extension is already loaded. To reload it, use:\n", + " %reload_ext autoreload\n" + ] + } + ], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Frequency analysis \n", + "## Class Data()\n", + "This class takes a xarray.Dataset to initialise\n", + "After the class is initialised, it can be used to prepare the data\n", + "in this example, we extract the value form a melcc dataset and we filter on all catchments statirng with 0234- and catchemnt 051001\n", + "The function get_julian_day helps us defining our seasons dates which we then set using the attribute .season of our Data() DataSet\n", + "\n", + "- [loading the dataset Cell](#data_load)\n", + "- [Catchement selection](#data_catchments)\n", + "- [Seasons](#data_seasons)\n", + "- [rm Seasons](#data_rmseasons)\n", + "- [Maximum](#data_getMaximum)\n", + "\n", + "## Class Local()\n", + "- [Init](#fa_init)\n", + "- [View quantiles](#fa_qunatiles)\n", + "\n", + "\n", + "\n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "Load the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "import xarray as xr\n", + "import xhydro as xh\n", + "import numpy as np\n", + "import xdatasets as xd" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:        (id: 37, time: 19523, variable: 1, spatial_agg: 1,\n",
+       "                    timestep: 1, time_agg: 1, source: 1)\n",
+       "Coordinates: (12/15)\n",
+       "    drainage_area  (id) float32 1.09e+03 647.0 59.8 ... 227.0 2.163e+03 48.1\n",
+       "    end_date       (variable, id, spatial_agg, timestep, time_agg, source) datetime64[ns] ...\n",
+       "  * id             (id) object '020302' '020404' '020502' ... '024014' '024015'\n",
+       "    latitude       (id) float32 48.77 48.81 48.98 48.98 ... 46.05 46.2 46.18\n",
+       "    longitude      (id) float32 -64.52 -64.92 -64.43 ... -71.45 -72.1 -71.75\n",
+       "    name           (id) object 'Saint' 'York' ... 'Bécancour' 'Bourbon'\n",
+       "    ...             ...\n",
+       "  * spatial_agg    (spatial_agg) object 'watershed'\n",
+       "    start_date     (variable, id, spatial_agg, timestep, time_agg, source) datetime64[ns] ...\n",
+       "  * time           (time) datetime64[ns] 1970-01-01 1970-01-02 ... 2023-06-14\n",
+       "  * time_agg       (time_agg) object 'mean'\n",
+       "  * timestep       (timestep) object 'D'\n",
+       "  * variable       (variable) object 'streamflow'\n",
+       "Data variables:\n",
+       "    streamflow     (id, time, variable, spatial_agg, timestep, time_agg, source) float32 dask.array<chunksize=(1, 19523, 1, 1, 1, 1, 1), meta=np.ndarray>
" + ], + "text/plain": [ + "\n", + "Dimensions: (id: 37, time: 19523, variable: 1, spatial_agg: 1,\n", + " timestep: 1, time_agg: 1, source: 1)\n", + "Coordinates: (12/15)\n", + " drainage_area (id) float32 1.09e+03 647.0 59.8 ... 227.0 2.163e+03 48.1\n", + " end_date (variable, id, spatial_agg, timestep, time_agg, source) datetime64[ns] ...\n", + " * id (id) object '020302' '020404' '020502' ... '024014' '024015'\n", + " latitude (id) float32 48.77 48.81 48.98 48.98 ... 46.05 46.2 46.18\n", + " longitude (id) float32 -64.52 -64.92 -64.43 ... -71.45 -72.1 -71.75\n", + " name (id) object 'Saint' 'York' ... 'Bécancour' 'Bourbon'\n", + " ... ...\n", + " * spatial_agg (spatial_agg) object 'watershed'\n", + " start_date (variable, id, spatial_agg, timestep, time_agg, source) datetime64[ns] ...\n", + " * time (time) datetime64[ns] 1970-01-01 1970-01-02 ... 2023-06-14\n", + " * time_agg (time_agg) object 'mean'\n", + " * timestep (timestep) object 'D'\n", + " * variable (variable) object 'streamflow'\n", + "Data variables:\n", + " streamflow (id, time, variable, spatial_agg, timestep, time_agg, source) float32 dask.array" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = xd.Query(\n", + " **{\n", + " \"datasets\":{\n", + " \"deh\":{\n", + " \"id\" :[\"02*\"],\n", + " \"regulated\":[\"Natural\"],\n", + " \"variables\":[\"streamflow\"],\n", + " }\n", + " }, \"time\":{\"start\": \"1970-01-01\", \n", + " \"minimum_duration\":(15*365, 'd')},\n", + "\n", + " }\n", + ").data\n", + "ds" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "Use the function select_catchments. to select the catchment, \n", + "
this function can use a wildcard.\n", + "
This will create a new object Data\n", + "
.catchments. is used to show the cathcments, " + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "donnees = xh.Data(ds)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [], + "source": [ + "catchment_list = ['02*','03*', '05*']\n", + "sub_set = donnees.select_catchments(catchment_list = catchment_list)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "listing Data's seasons" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "module 'xhydro' has no attribute 'get_julian_day'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[48], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m a_beg \u001b[39m=\u001b[39m xh\u001b[39m.\u001b[39;49mget_julian_day(month \u001b[39m=\u001b[39m \u001b[39m9\u001b[39m,day \u001b[39m=\u001b[39m \u001b[39m1\u001b[39m)\n\u001b[0;32m 2\u001b[0m a_end \u001b[39m=\u001b[39m xh\u001b[39m.\u001b[39mget_julian_day(month \u001b[39m=\u001b[39m \u001b[39m12\u001b[39m,day \u001b[39m=\u001b[39m \u001b[39m1\u001b[39m)\n\u001b[0;32m 4\u001b[0m p_beg \u001b[39m=\u001b[39m xh\u001b[39m.\u001b[39mget_julian_day(month \u001b[39m=\u001b[39m \u001b[39m2\u001b[39m,day \u001b[39m=\u001b[39m \u001b[39m11\u001b[39m)\n", + "\u001b[1;31mAttributeError\u001b[0m: module 'xhydro' has no attribute 'get_julian_day'" + ] + } + ], + "source": [ + "a_beg = xh.get_julian_day(month = 9,day = 1)\n", + "a_end = xh.get_julian_day(month = 12,day = 1)\n", + "\n", + "p_beg = xh.get_julian_day(month = 2,day = 11)\n", + "p_end = xh.get_julian_day(month = 6,day = 19)\n", + "\n", + "\n", + "#Passer dictionnaire\n", + "sub_set.season = ['Automne', a_beg, a_end]\n", + "sub_set.season = ['Spring', p_beg, p_end]\n", + "sub_set.season = ['Annuelle', 1, 365]\n", + "\n", + "sub_set.get_seasons()\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "Use rm_season to remove a season" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sub_set.rm_season('Annuelle')\n", + "sub_set.get_seasons()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "La cellule suivante ne sert qu'a générer un dataset aléatoire avec des dates de début et de fin de crue. \n", + "­
Normalement ces dates seront chargées ou déterminées visuellement pas l'utilisateur" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import random\n", + "# Generating random flood dates for all year and one catchment\n", + "m = np.empty((1, len(range(1910, 2023))), dtype=object)\n", + "for i in np.ndindex(m.shape): m[i] = [random.randint(70, 70), random.randint(139, 139)]\n", + "\n", + "\n", + "dates_ds = xr.Dataset()\n", + "\n", + "dates_ds.coords['year'] = xr.DataArray(range(1910, 2023), dims=('year',))\n", + "dates_ds.coords['id'] = xr.DataArray(['051001'], dims=('id',))\n", + "\n", + "dates_ds['value'] = xr.DataArray(m, dims=('id', 'year'))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sub_set._season['Spring_ds'] = dates_ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "mm1 = sub_set._get_max(tolerence=0.15, seasons=['Spring_ds'])\n", + "mm2 = sub_set._get_max(tolerence=0.15, seasons=['Spring'])\n", + "# TODO max are the same, but first and last years are mission from Spring_ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mm1.to_dataframe(name = 'Maximums').reset_index()[['id', 'season', 'year', 'start_date','end_date','Maximums']].dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mm2.to_dataframe(name = 'Maximums').reset_index()[['id', 'season', 'year', 'start_date','end_date','Maximums']].dropna()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On calcul les volume de crues à dates fixes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vol = sub_set.calculate_volume(dates=[35, 36])\n", + "vol.volume.to_dataframe()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On calcul les volumes de crues avec un DataSet" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sub_set_example = sub_set.select_catchments(['051001'])\n", + "vol = sub_set_example.calculate_volume(dates=dates_ds)\n", + "vol.to_dataframe().dropna()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "vol.to_dataframe().dropna().reset_index()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "Use get_maximum to get the maximums per season for selected catcment, if no period selected, anual maxmaximum will be fectch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sub_set.get_maximum(tolerence=.85)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "sub_set.get_maximum(tolerence=0.15, seasons=['Spring'])" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# CLass Local()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "Init local with a data Ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "return_period = np.array([2, 5, 10, 20, 50, 100, 200, 1000, 2000, 10000])\n", + "dist_list = ['expon', 'gamma', 'genextreme', 'gennorm', 'gumbel_r', 'pearson3', 'weibull_min']\n", + "\n", + "fa = xh.Local(data_ds = sub_set, return_period = return_period, dist_list = dist_list, tolerence = 0.15, seasons = ['Spring'], min_year = 15, vars_of_interest = ['max'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fa.analyse_max" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fa.view_values('max')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fa.view_criterions('max')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fa.view_quantiles('max')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from xhydro.frequency_analysis.visualisation import *\n", + "plot_fa(fa, var='max', lang='fr', var_name='value')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fa_calc = xh.Local(data_ds = mm2, return_period = return_period, dist_list = dist_list, tolerence = 0.15, seasons = ['Spring'], min_year = 15, vars_of_interest = ['max'], dates_vol=dates_ds, calculated = True)\n", + "fa_calc.view_values('max')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fa.view_values('max')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fa.view_criterions('max')" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "view quantiles" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fa.view_quantiles('max')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fa_calc.view_quantiles('max')\n", + "max" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "view criterions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + }, + "vscode": { + "interpreter": { + "hash": "e28391989cdb8b31df72dd917935faad186df3329a743c813090fc6af977a1ca" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/xhydro/frequency_analysis/local.py b/xhydro/frequency_analysis/local.py index ed68d760..bfe8106b 100644 --- a/xhydro/frequency_analysis/local.py +++ b/xhydro/frequency_analysis/local.py @@ -597,7 +597,6 @@ def conversion_factor_to_hm3(timestep): class Local: - # TODO list(ds.keys())[0] used multiples time, genewralise ofr all var, not just [0] and do it a better way, ie, in the init def __init__( self, data_ds, @@ -634,6 +633,10 @@ def __init__( self.min_year = min_year self.analyse_max = None self.analyse_vol = None + # TODO list(ds.keys())[0] used multiples time, use self.var_name instead and generalise for all var, not just [0] + self.var_name = list(self.data.data.keys())[0] + + data_ds if "max" in vars_of_interest: self.analyse_max = self._freq_analys(calculated, var_of_interest="max") @@ -820,17 +823,15 @@ def view_criterions(self, var_of_interest): if var_of_interest == "vol": return ( self.analyse_vol[var_list] - .to_dataframe(dim_order=["id", "scipy_dist"])[var_list] - .reset_index() - .rename(columns={"level_1": "scipy_dist"}) + .to_dataframe() + .reset_index()[["id", "scipy_dist"] + var_list] .round() ) elif var_of_interest == "max": return ( self.analyse_max[var_list] - .to_dataframe(dim_order=["id", "season", "scipy_dist"])[var_list] - .reset_index() - .rename(columns={"level_2": "scipy_dist"}) + .to_dataframe() + .reset_index()[["id", "season", "scipy_dist"] + var_list] .round() ) else: @@ -869,19 +870,19 @@ def view_quantiles(self, var_of_interest): if var_of_interest == "vol": return ( self.analyse_vol[var_list] - .to_dataframe(dim_order=["id", "scipy_dist", "return_period"])[var_list] - .reset_index() - .rename(columns={"level_1": "scipy_dist"}) + .to_dataframe() + .reset_index()[ + ["id", "season", "scipy_dist", "return_period"] + var_list + ] .round() ) elif var_of_interest == "max": return ( self.analyse_max[var_list] - .to_dataframe( - dim_order=["id", "season", "scipy_dist", "return_period"] - )[var_list] - .reset_index() - .rename(columns={"level_2": "scipy_dist"}) + .to_dataframe() + .reset_index()[ + ["id", "season", "scipy_dist", "return_period"] + var_list + ] .round() ) else: @@ -918,12 +919,13 @@ def view_values(self, var_of_interest): # TODO Output as dict is ugly if var_of_interest == "vol": - return self.analyse_vol.value.to_dataframe().dropna().reset_index() + return self.analyse_vol[self.var_name].to_dataframe().dropna().reset_index() elif var_of_interest == "max": return ( - self.analyse_max.value.to_dataframe() + self.analyse_max[self.var_name] + .to_dataframe() .reset_index()[ - ["id", "season", "time", "start_date", "end_date", "Maximums"] + ["id", "season", "time", "start_date", "end_date", self.var_name] ] .dropna() ) From c12355875de8741fd7aba33ff0a8485abc803381 Mon Sep 17 00:00:00 2001 From: sebastienlanglois Date: Mon, 28 Aug 2023 00:56:21 -0400 Subject: [PATCH 18/24] change paths to general format --- xhydro/frequency_analysis/local.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/xhydro/frequency_analysis/local.py b/xhydro/frequency_analysis/local.py index bfe8106b..cbe80ed5 100644 --- a/xhydro/frequency_analysis/local.py +++ b/xhydro/frequency_analysis/local.py @@ -11,6 +11,10 @@ from statsmodels.tools import eval_measures from xclim.indices.stats import fit, parametric_quantile +__all__ = [ + "Data", + "Local", +] class Data: def __init__(self, ds: xr.Dataset): @@ -63,7 +67,7 @@ def select_catchments(self, catchment_list: list): -------- >>> import xarray as xr >>> cehq_data_path = - '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + '/datasets/xhydro/tests/cehq/zarr' >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) >>> donnees = Data(ds) >>> filtered_ds = donnees.select_catchments( @@ -115,7 +119,7 @@ def custom_group_by(self, beg: int, end: int): -------- >>> import xarray as xr >>> cehq_data_path = - '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + '/datasets/xhydro/tests/cehq/zarr' >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) >>> donnees = Data(ds) >>> grouped_ds = donnees.custom_group_by(1, 90) @@ -159,13 +163,13 @@ def season(self, liste: list): -------- >>> import xarray as xr >>> cehq_data_path = - '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + '/datasets/xhydro/tests/cehq/zarr' >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) >>> donnees = Data(ds) >>> donnees.season = ['Yearly', 1, 365] """ - # TODO a dictionary would probalby be more suited + # TODO Replace with dictionary instead name = liste[0] beg = liste[1] end = liste[2] @@ -207,7 +211,7 @@ def rm_season(self, name: str): -------- >>> import xarray as xr >>> cehq_data_path = - '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + '/datasets/xhydro/tests/cehq/zarr' >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) >>> donnees = Data(ds) >>> donnees.season = ['Yearly', 1, 365] @@ -232,7 +236,7 @@ def get_seasons(self): -------- >>> import xarray as xr >>> cehq_data_path = - '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + '/datasets/xhydro/tests/cehq/zarr' >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) >>> donnees = Data(ds) >>> donnees.season = ['Yearly', 1, 365] @@ -279,7 +283,7 @@ def get_bool_over_tolerence(self, tol: float, season=None): -------- >>> import xarray as xr >>> cehq_data_path = - '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + '/datasets/xhydro/tests/cehq/zarr' >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) >>> donnees = Data(ds) >>> donnees.custom_group_by(1, 365.max().where( @@ -323,7 +327,7 @@ def get_maximum(self, tolerence: float = None, seasons=None): -------- >>> import xarray as xr >>> cehq_data_path = - '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + '/datasets/xhydro/tests/cehq/zarr' >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) >>> donnees = Data(ds) >>> donnees.get_maximum(tolerence=0.15, seasons=['Spring']) @@ -374,7 +378,7 @@ def _get_max(self, tolerence=None, seasons=[]): -------- >>> import xarray as xr >>> cehq_data_path = - '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + '/datasets/xhydro/tests/cehq/zarr' >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) >>> donnees = Data(ds) >>> donnees.get_maximum(tolerence=0.15, seasons=['Spring']) @@ -798,7 +802,7 @@ def view_criterions(self, var_of_interest): Examples -------- >>> import xarray as xr - >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> cehq_data_path = '/datasets/xhydro/tests/cehq/zarr' >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) >>> donnees = Data(ds) >>> donnees.get_maximum(tolerence=0.15, seasons=['Spring']) @@ -849,7 +853,7 @@ def view_quantiles(self, var_of_interest): Examples -------- >>> import xarray as xr - >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> cehq_data_path = '/datasets/xhydro/tests/cehq/zarr' >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) >>> donnees = Data(ds) >>> donnees.get_maximum(tolerence=0.15, seasons=['Spring']) @@ -900,7 +904,7 @@ def view_values(self, var_of_interest): Examples -------- >>> import xarray as xr - >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> cehq_data_path = '/datasets/xhydro/tests/cehq/zarr' >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) >>> donnees = Data(ds) >>> donnees.get_maximum(tolerence=0.15, seasons=['Spring']) From 14368b55cee53cf0accfff92fffe6a8568bb9c11 Mon Sep 17 00:00:00 2001 From: sebastienlanglois Date: Mon, 28 Aug 2023 00:56:59 -0400 Subject: [PATCH 19/24] add xdatasets to retrieve data in example nb --- environment.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/environment.yml b/environment.yml index 25affbc6..03258d4a 100644 --- a/environment.yml +++ b/environment.yml @@ -35,3 +35,5 @@ dependencies: - xarray >=0.17.0 - xclim >=0.43.0 - zarr >=2.11.1 + - pip: + - xdatasets From a3f6ced7813dfecdc2b6599aab015cafb3f46abd Mon Sep 17 00:00:00 2001 From: sebastienlanglois Date: Mon, 28 Aug 2023 00:58:02 -0400 Subject: [PATCH 20/24] first draft notebook for local frequency analysis --- docs/notebooks/local_frequency_analysis.ipynb | 3932 ++++++++++++++--- 1 file changed, 3365 insertions(+), 567 deletions(-) diff --git a/docs/notebooks/local_frequency_analysis.ipynb b/docs/notebooks/local_frequency_analysis.ipynb index 0fd5e7f2..9fb1a318 100644 --- a/docs/notebooks/local_frequency_analysis.ipynb +++ b/docs/notebooks/local_frequency_analysis.ipynb @@ -2,31 +2,21 @@ "cells": [ { "cell_type": "code", - "execution_count": 43, + "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], + "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Frequency analysis \n", - "## Class Data()\n", - "This class takes a xarray.Dataset to initialise\n", + "\n", + "This class takes a `xarray.Dataset` to initialise\n", "After the class is initialised, it can be used to prepare the data\n", "in this example, we extract the value form a melcc dataset and we filter on all catchments statirng with 0234- and catchemnt 051001\n", "The function get_julian_day helps us defining our seasons dates which we then set using the attribute .season of our Data() DataSet\n", @@ -37,7 +27,9 @@ "- [rm Seasons](#data_rmseasons)\n", "- [Maximum](#data_getMaximum)\n", "\n", - "## Class Local()\n", + "\n", + "\n", + "\n", "- [Init](#fa_init)\n", "- [View quantiles](#fa_qunatiles)\n", "\n", @@ -47,29 +39,603 @@ ] }, { - "attachments": {}, + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + " var py_version = '3.2.2'.replace('rc', '-rc.').replace('.dev', '-dev.');\n", + " var is_dev = py_version.indexOf(\"+\") !== -1 || py_version.indexOf(\"-\") !== -1;\n", + " var reloading = false;\n", + " var Bokeh = root.Bokeh;\n", + " var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n", + "\n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks;\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + " if (js_modules == null) js_modules = [];\n", + " if (js_exports == null) js_exports = {};\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + "\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " if (!reloading) {\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " }\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + " window._bokeh_on_load = on_load\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " var skip = [];\n", + " if (window.requirejs) {\n", + " window.requirejs.config({'packages': {}, 'paths': {'jspanel': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/jspanel', 'jspanel-modal': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal', 'jspanel-tooltip': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip', 'jspanel-hint': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint', 'jspanel-layout': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout', 'jspanel-contextmenu': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu', 'jspanel-dock': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock', 'gridstack': 'https://cdn.jsdelivr.net/npm/gridstack@7.2.3/dist/gridstack-all', 'notyf': 'https://cdn.jsdelivr.net/npm/notyf@3/notyf.min'}, 'shim': {'jspanel': {'exports': 'jsPanel'}, 'gridstack': {'exports': 'GridStack'}}});\n", + " require([\"jspanel\"], function(jsPanel) {\n", + "\twindow.jsPanel = jsPanel\n", + "\ton_load()\n", + " })\n", + " require([\"jspanel-modal\"], function() {\n", + "\ton_load()\n", + " })\n", + " require([\"jspanel-tooltip\"], function() {\n", + "\ton_load()\n", + " })\n", + " require([\"jspanel-hint\"], function() {\n", + "\ton_load()\n", + " })\n", + " require([\"jspanel-layout\"], function() {\n", + "\ton_load()\n", + " })\n", + " require([\"jspanel-contextmenu\"], function() {\n", + "\ton_load()\n", + " })\n", + " require([\"jspanel-dock\"], function() {\n", + "\ton_load()\n", + " })\n", + " require([\"gridstack\"], function(GridStack) {\n", + "\twindow.GridStack = GridStack\n", + "\ton_load()\n", + " })\n", + " require([\"notyf\"], function() {\n", + "\ton_load()\n", + " })\n", + " root._bokeh_is_loading = css_urls.length + 9;\n", + " } else {\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n", + " }\n", + "\n", + " var existing_stylesheets = []\n", + " var links = document.getElementsByTagName('link')\n", + " for (var i = 0; i < links.length; i++) {\n", + " var link = links[i]\n", + " if (link.href != null) {\n", + "\texisting_stylesheets.push(link.href)\n", + " }\n", + " }\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " if (existing_stylesheets.indexOf(url) !== -1) {\n", + "\ton_load()\n", + "\tcontinue;\n", + " }\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " } if (((window['jsPanel'] !== undefined) && (!(window['jsPanel'] instanceof HTMLElement))) || window.requirejs) {\n", + " var urls = ['https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/jspanel.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock.js'];\n", + " for (var i = 0; i < urls.length; i++) {\n", + " skip.push(urls[i])\n", + " }\n", + " } if (((window['GridStack'] !== undefined) && (!(window['GridStack'] instanceof HTMLElement))) || window.requirejs) {\n", + " var urls = ['https://cdn.holoviz.org/panel/1.2.1/dist/bundled/gridstack/gridstack@7.2.3/dist/gridstack-all.js'];\n", + " for (var i = 0; i < urls.length; i++) {\n", + " skip.push(urls[i])\n", + " }\n", + " } if (((window['Notyf'] !== undefined) && (!(window['Notyf'] instanceof HTMLElement))) || window.requirejs) {\n", + " var urls = ['https://cdn.holoviz.org/panel/1.2.1/dist/bundled/notificationarea/notyf@3/notyf.min.js'];\n", + " for (var i = 0; i < urls.length; i++) {\n", + " skip.push(urls[i])\n", + " }\n", + " } var existing_scripts = []\n", + " var scripts = document.getElementsByTagName('script')\n", + " for (var i = 0; i < scripts.length; i++) {\n", + " var script = scripts[i]\n", + " if (script.src != null) {\n", + "\texisting_scripts.push(script.src)\n", + " }\n", + " }\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n", + "\tif (!window.requirejs) {\n", + "\t on_load();\n", + "\t}\n", + "\tcontinue;\n", + " }\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " for (var i = 0; i < js_modules.length; i++) {\n", + " var url = js_modules[i];\n", + " if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n", + "\tif (!window.requirejs) {\n", + "\t on_load();\n", + "\t}\n", + "\tcontinue;\n", + " }\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " element.type = \"module\";\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " for (const name in js_exports) {\n", + " var url = js_exports[name];\n", + " if (skip.indexOf(url) >= 0 || root[name] != null) {\n", + "\tif (!window.requirejs) {\n", + "\t on_load();\n", + "\t}\n", + "\tcontinue;\n", + " }\n", + " var element = document.createElement('script');\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.type = \"module\";\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " element.textContent = `\n", + " import ${name} from \"${url}\"\n", + " window.${name} = ${name}\n", + " window._bokeh_on_load()\n", + " `\n", + " document.head.appendChild(element);\n", + " }\n", + " if (!js_urls.length && !js_modules.length) {\n", + " on_load()\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " var js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.2.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.2.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.2.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.2.2.min.js\", \"https://cdn.holoviz.org/panel/1.2.1/dist/panel.min.js\"];\n", + " var js_modules = [];\n", + " var js_exports = {};\n", + " var css_urls = [];\n", + " var inline_js = [ function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + "function(Bokeh) {} // ensure no trailing comma for IE\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " if ((root.Bokeh !== undefined) || (force === true)) {\n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " // Cache old bokeh versions\n", + " if (Bokeh != undefined && !reloading) {\n", + "\tvar NewBokeh = root.Bokeh;\n", + "\tif (Bokeh.versions === undefined) {\n", + "\t Bokeh.versions = new Map();\n", + "\t}\n", + "\tif (NewBokeh.version !== Bokeh.version) {\n", + "\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n", + "\t}\n", + "\troot.Bokeh = Bokeh;\n", + " }} else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " }\n", + " root._bokeh_is_initializing = false\n", + " }\n", + "\n", + " function load_or_wait() {\n", + " // Implement a backoff loop that tries to ensure we do not load multiple\n", + " // versions of Bokeh and its dependencies at the same time.\n", + " // In recent versions we use the root._bokeh_is_initializing flag\n", + " // to determine whether there is an ongoing attempt to initialize\n", + " // bokeh, however for backward compatibility we also try to ensure\n", + " // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n", + " // before older versions are fully initialized.\n", + " if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n", + " root._bokeh_is_initializing = false;\n", + " root._bokeh_onload_callbacks = undefined;\n", + " console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n", + " load_or_wait();\n", + " } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n", + " setTimeout(load_or_wait, 100);\n", + " } else {\n", + " Bokeh = root.Bokeh;\n", + " bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n", + " root._bokeh_is_initializing = true\n", + " root._bokeh_onload_callbacks = []\n", + " if (!reloading && (!bokeh_loaded || is_dev)) {\n", + "\troot.Bokeh = undefined;\n", + " }\n", + " load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n", + "\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + "\trun_inline_js();\n", + " });\n", + " }\n", + " }\n", + " // Give older versions of the autoload script a head-start to ensure\n", + " // they initialize before we start loading newer version.\n", + " setTimeout(load_or_wait, 100)\n", + "}(window));" + ], + "application/vnd.holoviews_load.v0+json": "(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n var py_version = '3.2.2'.replace('rc', '-rc.').replace('.dev', '-dev.');\n var is_dev = py_version.indexOf(\"+\") !== -1 || py_version.indexOf(\"-\") !== -1;\n var reloading = false;\n var Bokeh = root.Bokeh;\n var bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks;\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, js_exports, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n if (js_exports == null) js_exports = {};\n\n root._bokeh_onload_callbacks.push(callback);\n\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls.length === 0 && js_modules.length === 0 && Object.keys(js_exports).length === 0) {\n run_callbacks();\n return null;\n }\n if (!reloading) {\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n }\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n window._bokeh_on_load = on_load\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n var skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {'jspanel': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/jspanel', 'jspanel-modal': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal', 'jspanel-tooltip': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip', 'jspanel-hint': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint', 'jspanel-layout': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout', 'jspanel-contextmenu': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu', 'jspanel-dock': 'https://cdn.jsdelivr.net/npm/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock', 'gridstack': 'https://cdn.jsdelivr.net/npm/gridstack@7.2.3/dist/gridstack-all', 'notyf': 'https://cdn.jsdelivr.net/npm/notyf@3/notyf.min'}, 'shim': {'jspanel': {'exports': 'jsPanel'}, 'gridstack': {'exports': 'GridStack'}}});\n require([\"jspanel\"], function(jsPanel) {\n\twindow.jsPanel = jsPanel\n\ton_load()\n })\n require([\"jspanel-modal\"], function() {\n\ton_load()\n })\n require([\"jspanel-tooltip\"], function() {\n\ton_load()\n })\n require([\"jspanel-hint\"], function() {\n\ton_load()\n })\n require([\"jspanel-layout\"], function() {\n\ton_load()\n })\n require([\"jspanel-contextmenu\"], function() {\n\ton_load()\n })\n require([\"jspanel-dock\"], function() {\n\ton_load()\n })\n require([\"gridstack\"], function(GridStack) {\n\twindow.GridStack = GridStack\n\ton_load()\n })\n require([\"notyf\"], function() {\n\ton_load()\n })\n root._bokeh_is_loading = css_urls.length + 9;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length + Object.keys(js_exports).length;\n }\n\n var existing_stylesheets = []\n var links = document.getElementsByTagName('link')\n for (var i = 0; i < links.length; i++) {\n var link = links[i]\n if (link.href != null) {\n\texisting_stylesheets.push(link.href)\n }\n }\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n if (existing_stylesheets.indexOf(url) !== -1) {\n\ton_load()\n\tcontinue;\n }\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n } if (((window['jsPanel'] !== undefined) && (!(window['jsPanel'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/jspanel.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/modal/jspanel.modal.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/tooltip/jspanel.tooltip.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/hint/jspanel.hint.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/layout/jspanel.layout.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/contextmenu/jspanel.contextmenu.js', 'https://cdn.holoviz.org/panel/1.2.1/dist/bundled/floatpanel/jspanel4@4.12.0/dist/extensions/dock/jspanel.dock.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['GridStack'] !== undefined) && (!(window['GridStack'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.2.1/dist/bundled/gridstack/gridstack@7.2.3/dist/gridstack-all.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['Notyf'] !== undefined) && (!(window['Notyf'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/1.2.1/dist/bundled/notificationarea/notyf@3/notyf.min.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } var existing_scripts = []\n var scripts = document.getElementsByTagName('script')\n for (var i = 0; i < scripts.length; i++) {\n var script = scripts[i]\n if (script.src != null) {\n\texisting_scripts.push(script.src)\n }\n }\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (var i = 0; i < js_modules.length; i++) {\n var url = js_modules[i];\n if (skip.indexOf(url) !== -1 || existing_scripts.indexOf(url) !== -1) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (const name in js_exports) {\n var url = js_exports[name];\n if (skip.indexOf(url) >= 0 || root[name] != null) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onerror = on_error;\n element.async = false;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n element.textContent = `\n import ${name} from \"${url}\"\n window.${name} = ${name}\n window._bokeh_on_load()\n `\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [\"https://cdn.bokeh.org/bokeh/release/bokeh-3.2.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-3.2.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-3.2.2.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-3.2.2.min.js\", \"https://cdn.holoviz.org/panel/1.2.1/dist/panel.min.js\"];\n var js_modules = [];\n var js_exports = {};\n var css_urls = [];\n var inline_js = [ function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n // Cache old bokeh versions\n if (Bokeh != undefined && !reloading) {\n\tvar NewBokeh = root.Bokeh;\n\tif (Bokeh.versions === undefined) {\n\t Bokeh.versions = new Map();\n\t}\n\tif (NewBokeh.version !== Bokeh.version) {\n\t Bokeh.versions.set(NewBokeh.version, NewBokeh)\n\t}\n\troot.Bokeh = Bokeh;\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n root._bokeh_is_initializing = false\n }\n\n function load_or_wait() {\n // Implement a backoff loop that tries to ensure we do not load multiple\n // versions of Bokeh and its dependencies at the same time.\n // In recent versions we use the root._bokeh_is_initializing flag\n // to determine whether there is an ongoing attempt to initialize\n // bokeh, however for backward compatibility we also try to ensure\n // that we do not start loading a newer (Panel>=1.0 and Bokeh>3) version\n // before older versions are fully initialized.\n if (root._bokeh_is_initializing && Date.now() > root._bokeh_timeout) {\n root._bokeh_is_initializing = false;\n root._bokeh_onload_callbacks = undefined;\n console.log(\"Bokeh: BokehJS was loaded multiple times but one version failed to initialize.\");\n load_or_wait();\n } else if (root._bokeh_is_initializing || (typeof root._bokeh_is_initializing === \"undefined\" && root._bokeh_onload_callbacks !== undefined)) {\n setTimeout(load_or_wait, 100);\n } else {\n Bokeh = root.Bokeh;\n bokeh_loaded = Bokeh != null && (Bokeh.version === py_version || (Bokeh.versions !== undefined && Bokeh.versions.has(py_version)));\n root._bokeh_is_initializing = true\n root._bokeh_onload_callbacks = []\n if (!reloading && (!bokeh_loaded || is_dev)) {\n\troot.Bokeh = undefined;\n }\n load_libs(css_urls, js_urls, js_modules, js_exports, function() {\n\tconsole.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n\trun_inline_js();\n });\n }\n }\n // Give older versions of the autoload script a head-start to ensure\n // they initialize before we start loading newer version.\n setTimeout(load_or_wait, 100)\n}(window));" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + "if ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n", + " window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n", + "}\n", + "\n", + "\n", + " function JupyterCommManager() {\n", + " }\n", + "\n", + " JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n", + " if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", + " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", + " comm_manager.register_target(comm_id, function(comm) {\n", + " comm.on_msg(msg_handler);\n", + " });\n", + " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", + " window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n", + " comm.onMsg = msg_handler;\n", + " });\n", + " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", + " google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n", + " var messages = comm.messages[Symbol.asyncIterator]();\n", + " function processIteratorResult(result) {\n", + " var message = result.value;\n", + " console.log(message)\n", + " var content = {data: message.data, comm_id};\n", + " var buffers = []\n", + " for (var buffer of message.buffers || []) {\n", + " buffers.push(new DataView(buffer))\n", + " }\n", + " var metadata = message.metadata || {};\n", + " var msg = {content, buffers, metadata}\n", + " msg_handler(msg);\n", + " return messages.next().then(processIteratorResult);\n", + " }\n", + " return messages.next().then(processIteratorResult);\n", + " })\n", + " }\n", + " }\n", + "\n", + " JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n", + " if (comm_id in window.PyViz.comms) {\n", + " return window.PyViz.comms[comm_id];\n", + " } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n", + " var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n", + " var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n", + " if (msg_handler) {\n", + " comm.on_msg(msg_handler);\n", + " }\n", + " } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n", + " var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n", + " comm.open();\n", + " if (msg_handler) {\n", + " comm.onMsg = msg_handler;\n", + " }\n", + " } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n", + " var comm_promise = google.colab.kernel.comms.open(comm_id)\n", + " comm_promise.then((comm) => {\n", + " window.PyViz.comms[comm_id] = comm;\n", + " if (msg_handler) {\n", + " var messages = comm.messages[Symbol.asyncIterator]();\n", + " function processIteratorResult(result) {\n", + " var message = result.value;\n", + " var content = {data: message.data};\n", + " var metadata = message.metadata || {comm_id};\n", + " var msg = {content, metadata}\n", + " msg_handler(msg);\n", + " return messages.next().then(processIteratorResult);\n", + " }\n", + " return messages.next().then(processIteratorResult);\n", + " }\n", + " }) \n", + " var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n", + " return comm_promise.then((comm) => {\n", + " comm.send(data, metadata, buffers, disposeOnDone);\n", + " });\n", + " };\n", + " var comm = {\n", + " send: sendClosure\n", + " };\n", + " }\n", + " window.PyViz.comms[comm_id] = comm;\n", + " return comm;\n", + " }\n", + " window.PyViz.comm_manager = new JupyterCommManager();\n", + " \n", + "\n", + "\n", + "var JS_MIME_TYPE = 'application/javascript';\n", + "var HTML_MIME_TYPE = 'text/html';\n", + "var EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\n", + "var CLASS_NAME = 'output';\n", + "\n", + "/**\n", + " * Render data to the DOM node\n", + " */\n", + "function render(props, node) {\n", + " var div = document.createElement(\"div\");\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(div);\n", + " node.appendChild(script);\n", + "}\n", + "\n", + "/**\n", + " * Handle when a new output is added\n", + " */\n", + "function handle_add_output(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + " if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + " var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + " if (id !== undefined) {\n", + " var nchildren = toinsert.length;\n", + " var html_node = toinsert[nchildren-1].children[0];\n", + " html_node.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var scripts = [];\n", + " var nodelist = html_node.querySelectorAll(\"script\");\n", + " for (var i in nodelist) {\n", + " if (nodelist.hasOwnProperty(i)) {\n", + " scripts.push(nodelist[i])\n", + " }\n", + " }\n", + "\n", + " scripts.forEach( function (oldScript) {\n", + " var newScript = document.createElement(\"script\");\n", + " var attrs = [];\n", + " var nodemap = oldScript.attributes;\n", + " for (var j in nodemap) {\n", + " if (nodemap.hasOwnProperty(j)) {\n", + " attrs.push(nodemap[j])\n", + " }\n", + " }\n", + " attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n", + " newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n", + " oldScript.parentNode.replaceChild(newScript, oldScript);\n", + " });\n", + " if (JS_MIME_TYPE in output.data) {\n", + " toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n", + " }\n", + " output_area._hv_plot_id = id;\n", + " if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n", + " window.PyViz.plot_index[id] = Bokeh.index[id];\n", + " } else {\n", + " window.PyViz.plot_index[id] = null;\n", + " }\n", + " } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + "}\n", + "\n", + "/**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + "function handle_clear_output(event, handle) {\n", + " var id = handle.cell.output_area._hv_plot_id;\n", + " var server_id = handle.cell.output_area._bokeh_server_id;\n", + " if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n", + " var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n", + " if (server_id !== null) {\n", + " comm.send({event_type: 'server_delete', 'id': server_id});\n", + " return;\n", + " } else if (comm !== null) {\n", + " comm.send({event_type: 'delete', 'id': id});\n", + " }\n", + " delete PyViz.plot_index[id];\n", + " if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n", + " var doc = window.Bokeh.index[id].model.document\n", + " doc.clear();\n", + " const i = window.Bokeh.documents.indexOf(doc);\n", + " if (i > -1) {\n", + " window.Bokeh.documents.splice(i, 1);\n", + " }\n", + " }\n", + "}\n", + "\n", + "/**\n", + " * Handle kernel restart event\n", + " */\n", + "function handle_kernel_cleanup(event, handle) {\n", + " delete PyViz.comms[\"hv-extension-comm\"];\n", + " window.PyViz.plot_index = {}\n", + "}\n", + "\n", + "/**\n", + " * Handle update_display_data messages\n", + " */\n", + "function handle_update_output(event, handle) {\n", + " handle_clear_output(event, {cell: {output_area: handle.output_area}})\n", + " handle_add_output(event, handle)\n", + "}\n", + "\n", + "function register_renderer(events, OutputArea) {\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[0]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " events.on('output_added.OutputArea', handle_add_output);\n", + " events.on('output_updated.OutputArea', handle_update_output);\n", + " events.on('clear_output.CodeCell', handle_clear_output);\n", + " events.on('delete.Cell', handle_clear_output);\n", + " events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n", + "\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " safe: true,\n", + " index: 0\n", + " });\n", + "}\n", + "\n", + "if (window.Jupyter !== undefined) {\n", + " try {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " } catch(err) {\n", + " }\n", + "}\n" + ], + "application/vnd.holoviews_load.v0+json": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n console.log(message)\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n comm.open();\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n }) \n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n" + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "ERROR 1: PROJ: proj_create_from_database: Open of /home/slanglois/mambaforge/envs/xhydro/share/proj failed\n" + ] + } + ], + "source": [ + "import xarray as xr\n", + "import xhydro as xh\n", + "import numpy as np\n", + "import xdatasets as xd" + ] + }, + { "cell_type": "markdown", "metadata": {}, "source": [ "\n", - "Load the dataset" + "## Prepare the data" ] }, { - "cell_type": "code", - "execution_count": 44, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "import xarray as xr\n", - "import xhydro as xh\n", - "import numpy as np\n", - "import xdatasets as xd" + "To conduct frequency analysis on historical time series from various sites, we begin by obtaining a dataset comprising hydrological information. \n", + "\n", + "Here, we use the [xdataset](https://hydrologie.github.io/xdatasets/notebooks/getting_started.html) library to acquire hydrological data from the [Ministère de l'Environnement, de la Lutte contre les changements climatiques, de la Faune et des Parcs](https://www.cehq.gouv.qc.ca/atlas-hydroclimatique/stations-hydrometriques/index.htm). Specifically, our query focuses on stations with IDs beginning with `02`, possessing a natural flow pattern and limited to streamflow data. \n", + "\n", + "Users may prefer to generate their own `xarray.DataArray` using their individual dataset. At a minimum, the `xarray.DataArray` used for frequency analysis needs to have an `id` and a `time` dimension." ] }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -439,97 +1005,74 @@ " fill: currentColor;\n", "}\n", "
<xarray.Dataset>\n",
-       "Dimensions:        (id: 37, time: 19523, variable: 1, spatial_agg: 1,\n",
-       "                    timestep: 1, time_agg: 1, source: 1)\n",
+       "Dimensions:        (id: 37, time: 20454)\n",
        "Coordinates: (12/15)\n",
        "    drainage_area  (id) float32 1.09e+03 647.0 59.8 ... 227.0 2.163e+03 48.1\n",
-       "    end_date       (variable, id, spatial_agg, timestep, time_agg, source) datetime64[ns] ...\n",
+       "    end_date       (id) datetime64[ns] 2006-10-13 2023-06-14 ... 2023-06-14\n",
        "  * id             (id) object '020302' '020404' '020502' ... '024014' '024015'\n",
        "    latitude       (id) float32 48.77 48.81 48.98 48.98 ... 46.05 46.2 46.18\n",
        "    longitude      (id) float32 -64.52 -64.92 -64.43 ... -71.45 -72.1 -71.75\n",
        "    name           (id) object 'Saint' 'York' ... 'Bécancour' 'Bourbon'\n",
        "    ...             ...\n",
-       "  * spatial_agg    (spatial_agg) object 'watershed'\n",
-       "    start_date     (variable, id, spatial_agg, timestep, time_agg, source) datetime64[ns] ...\n",
-       "  * time           (time) datetime64[ns] 1970-01-01 1970-01-02 ... 2023-06-14\n",
-       "  * time_agg       (time_agg) object 'mean'\n",
-       "  * timestep       (timestep) object 'D'\n",
-       "  * variable       (variable) object 'streamflow'\n",
+       "    spatial_agg    <U9 'watershed'\n",
+       "    start_date     (id) datetime64[ns] 1989-08-12 1980-10-01 ... 2006-07-24\n",
+       "  * time           (time) datetime64[ns] 1970-01-01 1970-01-02 ... 2025-12-31\n",
+       "    time_agg       <U4 'mean'\n",
+       "    timestep       <U1 'D'\n",
+       "    variable       <U10 'streamflow'\n",
        "Data variables:\n",
-       "    streamflow     (id, time, variable, spatial_agg, timestep, time_agg, source) float32 dask.array<chunksize=(1, 19523, 1, 1, 1, 1, 1), meta=np.ndarray>
  • " ], "text/plain": [ "\n", - "Dimensions: (id: 37, time: 19523, variable: 1, spatial_agg: 1,\n", - " timestep: 1, time_agg: 1, source: 1)\n", + "Dimensions: (id: 37, time: 20454)\n", "Coordinates: (12/15)\n", " drainage_area (id) float32 1.09e+03 647.0 59.8 ... 227.0 2.163e+03 48.1\n", - " end_date (variable, id, spatial_agg, timestep, time_agg, source) datetime64[ns] ...\n", + " end_date (id) datetime64[ns] 2006-10-13 2023-06-14 ... 2023-06-14\n", " * id (id) object '020302' '020404' '020502' ... '024014' '024015'\n", " latitude (id) float32 48.77 48.81 48.98 48.98 ... 46.05 46.2 46.18\n", " longitude (id) float32 -64.52 -64.92 -64.43 ... -71.45 -72.1 -71.75\n", " name (id) object 'Saint' 'York' ... 'Bécancour' 'Bourbon'\n", " ... ...\n", - " * spatial_agg (spatial_agg) object 'watershed'\n", - " start_date (variable, id, spatial_agg, timestep, time_agg, source) datetime64[ns] ...\n", - " * time (time) datetime64[ns] 1970-01-01 1970-01-02 ... 2023-06-14\n", - " * time_agg (time_agg) object 'mean'\n", - " * timestep (timestep) object 'D'\n", - " * variable (variable) object 'streamflow'\n", + " spatial_agg " + " streamflow (id, time) float32 nan nan nan nan nan ... nan nan nan nan" ] }, - "execution_count": 45, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ds = xd.Query(\n", + "data = xd.Query(\n", " **{\n", " \"datasets\":{\n", " \"deh\":{\n", @@ -800,245 +1162,1706 @@ " \"minimum_duration\":(15*365, 'd')},\n", "\n", " }\n", - ").data\n", - "ds" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "Use the function select_catchments. to select the catchment, \n", - "
    this function can use a wildcard.\n", - "
    This will create a new object Data\n", - "
    .catchments. is used to show the cathcments, " - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [], - "source": [ - "donnees = xh.Data(ds)" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "metadata": {}, - "outputs": [], - "source": [ - "catchment_list = ['02*','03*', '05*']\n", - "sub_set = donnees.select_catchments(catchment_list = catchment_list)" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "listing Data's seasons" + ").data.squeeze().load()\n", + "data" ] }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 7, "metadata": {}, "outputs": [ { - "ename": "AttributeError", - "evalue": "module 'xhydro' has no attribute 'get_julian_day'", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[48], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m a_beg \u001b[39m=\u001b[39m xh\u001b[39m.\u001b[39;49mget_julian_day(month \u001b[39m=\u001b[39m \u001b[39m9\u001b[39m,day \u001b[39m=\u001b[39m \u001b[39m1\u001b[39m)\n\u001b[0;32m 2\u001b[0m a_end \u001b[39m=\u001b[39m xh\u001b[39m.\u001b[39mget_julian_day(month \u001b[39m=\u001b[39m \u001b[39m12\u001b[39m,day \u001b[39m=\u001b[39m \u001b[39m1\u001b[39m)\n\u001b[0;32m 4\u001b[0m p_beg \u001b[39m=\u001b[39m xh\u001b[39m.\u001b[39mget_julian_day(month \u001b[39m=\u001b[39m \u001b[39m2\u001b[39m,day \u001b[39m=\u001b[39m \u001b[39m11\u001b[39m)\n", - "\u001b[1;31mAttributeError\u001b[0m: module 'xhydro' has no attribute 'get_julian_day'" - ] + "data": {}, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": {}, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/vnd.holoviews_exec.v0+json": "", + "text/html": [ + "
    \n", + "
    \n", + "
    \n", + "" + ], + "text/plain": [ + "Column\n", + " [0] HoloViews(DynamicMap, height=300, sizing_mode='fixed', widget_location='bottom', width=700)\n", + " [1] WidgetBox(align=('center', 'end'))\n", + " [0] Select(margin=(20, 20, 20, 20), name='id', options=['020302', '020404', ...], value='020302', width=250)" + ] + }, + "execution_count": 7, + "metadata": { + "application/vnd.holoviews_exec.v0+json": { + "id": "p1002" + } + }, + "output_type": "execute_result" } ], "source": [ - "a_beg = xh.get_julian_day(month = 9,day = 1)\n", - "a_end = xh.get_julian_day(month = 12,day = 1)\n", - "\n", - "p_beg = xh.get_julian_day(month = 2,day = 11)\n", - "p_end = xh.get_julian_day(month = 6,day = 19)\n", - "\n", - "\n", - "#Passer dictionnaire\n", - "sub_set.season = ['Automne', a_beg, a_end]\n", - "sub_set.season = ['Spring', p_beg, p_end]\n", - "sub_set.season = ['Annuelle', 1, 365]\n", - "\n", - "sub_set.get_seasons()\n" + "(data\n", + " .streamflow\n", + " .dropna('time', 'all')\n", + " .hvplot(x='time',grid=True, widget_location='bottom', groupby='id')\n", + ")" ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - "Use rm_season to remove a season" + "## Customize the analysis settings\n", + "\n", + "With a collection of hydrological data now at our disposal, we can provide the `xarray.Dataset` to the `Data` object. This step allows us to fine-tune certain configurations before proceeding with the frequency analysis." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ - "sub_set.rm_season('Annuelle')\n", - "sub_set.get_seasons()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "La cellule suivante ne sert qu'a générer un dataset aléatoire avec des dates de début et de fin de crue. \n", - "­
    Normalement ces dates seront chargées ou déterminées visuellement pas l'utilisateur" + "from xhydro.frequency_analysis.local import Data" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ - "import random\n", - "# Generating random flood dates for all year and one catchment\n", - "m = np.empty((1, len(range(1910, 2023))), dtype=object)\n", - "for i in np.ndindex(m.shape): m[i] = [random.randint(70, 70), random.randint(139, 139)]\n", - "\n", - "\n", - "dates_ds = xr.Dataset()\n", - "\n", - "dates_ds.coords['year'] = xr.DataArray(range(1910, 2023), dims=('year',))\n", - "dates_ds.coords['id'] = xr.DataArray(['051001'], dims=('id',))\n", - "\n", - "dates_ds['value'] = xr.DataArray(m, dims=('id', 'year'))\n" + "xfa = Data(data)" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "sub_set._season['Spring_ds'] = dates_ds" + "### a) Define the seasons\n", + "We can define seasons by supplying a season's name along with a range of Julian days." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ + "fall_start = xh.get_julian_day(month=9, day = 1)\n", + "fall_end = xh.get_julian_day(month=12, day=1)\n", "\n", - "mm1 = sub_set._get_max(tolerence=0.15, seasons=['Spring_ds'])\n", - "mm2 = sub_set._get_max(tolerence=0.15, seasons=['Spring'])\n", - "# TODO max are the same, but first and last years are mission from Spring_ds" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mm1.to_dataframe(name = 'Maximums').reset_index()[['id', 'season', 'year', 'start_date','end_date','Maximums']].dropna()" + "spring_start = xh.get_julian_day(month=2, day=11)\n", + "spring_end = xh.get_julian_day(month=6, day=19)" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mm2.to_dataframe(name = 'Maximums').reset_index()[['id', 'season', 'year', 'start_date','end_date','Maximums']].dropna()" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", + "execution_count": 24, "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/slanglois/PycharmProjects/xhydro/xhydro/frequency_analysis/local.py:191: UserWarning: Warning, fall overlapping with Fall\n", + " warnings.warn(\"Warning, \" + name + \" overlapping with \" + season)\n", + "/home/slanglois/PycharmProjects/xhydro/xhydro/frequency_analysis/local.py:191: UserWarning: Warning, spring overlapping with Spring\n", + " warnings.warn(\"Warning, \" + name + \" overlapping with \" + season)\n" + ] + } + ], "source": [ - "On calcul les volume de crues à dates fixes" + "xfa.season = ['fall', fall_start, fall_end]\n", + "xfa.season = ['spring', spring_start, spring_end]\n", + "xfa.season = ['annual', 1, 365]" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['Fall', 'Spring', 'spring_custom', 'fall', 'spring', 'annual']" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "vol = sub_set.calculate_volume(dates=[35, 36])\n", - "vol.volume.to_dataframe()" + "xfa.get_seasons()" ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "On calcul les volumes de crues avec un DataSet" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sub_set_example = sub_set.select_catchments(['051001'])\n", - "vol = sub_set_example.calculate_volume(dates=dates_ds)\n", - "vol.to_dataframe().dropna()\n" + "If a season is no longer required, it can readily be remove like this : " ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['Fall', 'Spring', 'spring_custom', 'fall', 'spring']" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "vol.to_dataframe().dropna().reset_index()" + "xfa.rm_season('annual')\n", + "xfa.get_seasons()" ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - "Use get_maximum to get the maximums per season for selected catcment, if no period selected, anual maxmaximum will be fectch" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sub_set.get_maximum(tolerence=.85)" + "In cases where distinct catchments necessitate individualized Julian Day ranges for each year, users can explicitly define these ranges" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset>\n",
    +       "Dimensions:  (year: 113, id: 1)\n",
    +       "Coordinates:\n",
    +       "  * year     (year) int64 1910 1911 1912 1913 1914 ... 2018 2019 2020 2021 2022\n",
    +       "  * id       (id) <U6 '020302'\n",
    +       "Data variables:\n",
    +       "    value    (id, year) object [70, 139] [70, 139] ... [70, 139] [70, 139]
    " + ], + "text/plain": [ + "\n", + "Dimensions: (year: 113, id: 1)\n", + "Coordinates:\n", + " * year (year) int64 1910 1911 1912 1913 1914 ... 2018 2019 2020 2021 2022\n", + " * id (id) \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    seasonyearidMaxima
    83spring_custom1993020302135.600006
    84spring_custom1994020302501.000000
    \n", + "" + ], + "text/plain": [ + " season year id Maxima\n", + "83 spring_custom 1993 020302 135.600006\n", + "84 spring_custom 1994 020302 501.000000" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "coords_to_drop = set(maxima1.coords )- set(maxima1.dims)\n", + "maxima1.drop(coords_to_drop).to_dataframe(name='Maxima').reset_index().dropna(how='any')" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    seasonyearidMaxima
    4Spring1970020802266.000000
    6Spring1970021502204.000000
    7Spring1970021601413.000000
    8Spring197002170254.700001
    11Spring1970022003309.000000
    ...............
    1990Spring202302370284.209999
    1991Spring2023024003208.600006
    1992Spring202302400417.190001
    1996Spring2023024014405.500000
    1997Spring202302401511.680000
    \n", + "

    1302 rows × 4 columns

    \n", + "
    " + ], + "text/plain": [ + " season year id Maxima\n", + "4 Spring 1970 020802 266.000000\n", + "6 Spring 1970 021502 204.000000\n", + "7 Spring 1970 021601 413.000000\n", + "8 Spring 1970 021702 54.700001\n", + "11 Spring 1970 022003 309.000000\n", + "... ... ... ... ...\n", + "1990 Spring 2023 023702 84.209999\n", + "1991 Spring 2023 024003 208.600006\n", + "1992 Spring 2023 024004 17.190001\n", + "1996 Spring 2023 024014 405.500000\n", + "1997 Spring 2023 024015 11.680000\n", + "\n", + "[1302 rows x 4 columns]" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "coords_to_drop = set(maxima2.coords )- set(maxima2.dims)\n", + "maxima2.drop(coords_to_drop).to_dataframe(name='Maxima').reset_index().dropna(how='any')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On calcul les volume de crues à dates fixes" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    end_datestart_datevolume
    yearidvariablespatial_aggtimesteptime_aggsource
    1970020302streamflowwatershed0mean01970-02-051970-02-04NaN
    020404streamflowwatershed0mean01970-02-051970-02-04NaN
    020502streamflowwatershed0mean01970-02-051970-02-04NaN
    020602streamflowwatershed0mean01970-02-051970-02-04NaN
    020802streamflowwatershed0mean01970-02-051970-02-046.851520
    ..............................
    2023024007streamflowwatershed0mean02023-02-052023-02-04NaN
    024010streamflowwatershed0mean02023-02-052023-02-04NaN
    024013streamflowwatershed0mean02023-02-052023-02-04NaN
    024014streamflowwatershed0mean02023-02-052023-02-042.327616
    024015streamflowwatershed0mean02023-02-052023-02-040.172256
    \n", + "

    1998 rows × 3 columns

    \n", + "
    " + ], + "text/plain": [ + " end_date \\\n", + "year id variable spatial_agg timestep time_agg source \n", + "1970 020302 streamflow watershed 0 mean 0 1970-02-05 \n", + " 020404 streamflow watershed 0 mean 0 1970-02-05 \n", + " 020502 streamflow watershed 0 mean 0 1970-02-05 \n", + " 020602 streamflow watershed 0 mean 0 1970-02-05 \n", + " 020802 streamflow watershed 0 mean 0 1970-02-05 \n", + "... ... \n", + "2023 024007 streamflow watershed 0 mean 0 2023-02-05 \n", + " 024010 streamflow watershed 0 mean 0 2023-02-05 \n", + " 024013 streamflow watershed 0 mean 0 2023-02-05 \n", + " 024014 streamflow watershed 0 mean 0 2023-02-05 \n", + " 024015 streamflow watershed 0 mean 0 2023-02-05 \n", + "\n", + " start_date \\\n", + "year id variable spatial_agg timestep time_agg source \n", + "1970 020302 streamflow watershed 0 mean 0 1970-02-04 \n", + " 020404 streamflow watershed 0 mean 0 1970-02-04 \n", + " 020502 streamflow watershed 0 mean 0 1970-02-04 \n", + " 020602 streamflow watershed 0 mean 0 1970-02-04 \n", + " 020802 streamflow watershed 0 mean 0 1970-02-04 \n", + "... ... \n", + "2023 024007 streamflow watershed 0 mean 0 2023-02-04 \n", + " 024010 streamflow watershed 0 mean 0 2023-02-04 \n", + " 024013 streamflow watershed 0 mean 0 2023-02-04 \n", + " 024014 streamflow watershed 0 mean 0 2023-02-04 \n", + " 024015 streamflow watershed 0 mean 0 2023-02-04 \n", + "\n", + " volume \n", + "year id variable spatial_agg timestep time_agg source \n", + "1970 020302 streamflow watershed 0 mean 0 NaN \n", + " 020404 streamflow watershed 0 mean 0 NaN \n", + " 020502 streamflow watershed 0 mean 0 NaN \n", + " 020602 streamflow watershed 0 mean 0 NaN \n", + " 020802 streamflow watershed 0 mean 0 6.851520 \n", + "... ... \n", + "2023 024007 streamflow watershed 0 mean 0 NaN \n", + " 024010 streamflow watershed 0 mean 0 NaN \n", + " 024013 streamflow watershed 0 mean 0 NaN \n", + " 024014 streamflow watershed 0 mean 0 2.327616 \n", + " 024015 streamflow watershed 0 mean 0 0.172256 \n", + "\n", + "[1998 rows x 3 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vol = xfa.calculate_volume(dates=[35, 36])\n", + "vol.volume.to_dataframe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "On calcul les volumes de crues avec un DataSet" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    unitsstart_dateend_datevolume
    yearid
    1993020302hm³1993-03-111993-05-1922411.681368
    1994020302hm³1994-03-111994-05-1949109.812832
    \n", + "
    " + ], + "text/plain": [ + " units start_date end_date volume\n", + "year id \n", + "1993 020302 hm³ 1993-03-11 1993-05-19 22411.681368\n", + "1994 020302 hm³ 1994-03-11 1994-05-19 49109.812832" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sub_set_example = xfa.select_catchments(['020302'])\n", + "vol = sub_set_example.calculate_volume(dates=dates_ds)\n", + "vol.to_dataframe().dropna()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    yearidunitsstart_dateend_datevolume
    01993020302hm³1993-03-111993-05-1922411.681368
    11994020302hm³1994-03-111994-05-1949109.812832
    \n", + "
    " + ], + "text/plain": [ + " year id units start_date end_date volume\n", + "0 1993 020302 hm³ 1993-03-11 1993-05-19 22411.681368\n", + "1 1994 020302 hm³ 1994-03-11 1994-05-19 49109.812832" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vol.to_dataframe().dropna().reset_index()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "Use get_maximum to get the maximums per season for selected catcment, if no period selected, anual maxmaximum will be fectch" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    idseasonyearstart_dateend_datestreamflow
    3020602Whole year197002-1106-1937.900002
    4020802Whole year197002-1106-19266.000000
    6021502Whole year197002-1106-19204.000000
    7021601Whole year197002-1106-19413.000000
    8021702Whole year197002-1106-1954.700001
    .....................
    1990023702Whole year202302-1106-1984.209999
    1991024003Whole year202302-1106-19208.600006
    1992024004Whole year202302-1106-1917.190001
    1996024014Whole year202302-1106-19405.500000
    1997024015Whole year202302-1106-1911.680000
    \n", + "

    1412 rows × 6 columns

    \n", + "
    " + ], + "text/plain": [ + " id season year start_date end_date streamflow\n", + "3 020602 Whole year 1970 02-11 06-19 37.900002\n", + "4 020802 Whole year 1970 02-11 06-19 266.000000\n", + "6 021502 Whole year 1970 02-11 06-19 204.000000\n", + "7 021601 Whole year 1970 02-11 06-19 413.000000\n", + "8 021702 Whole year 1970 02-11 06-19 54.700001\n", + "... ... ... ... ... ... ...\n", + "1990 023702 Whole year 2023 02-11 06-19 84.209999\n", + "1991 024003 Whole year 2023 02-11 06-19 208.600006\n", + "1992 024004 Whole year 2023 02-11 06-19 17.190001\n", + "1996 024014 Whole year 2023 02-11 06-19 405.500000\n", + "1997 024015 Whole year 2023 02-11 06-19 11.680000\n", + "\n", + "[1412 rows x 6 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "xfa.get_maximum(tolerence=.85)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    idseasonyearstart_dateend_datestreamflow
    4020802Spring197002-1106-19266.000000
    6021502Spring197002-1106-19204.000000
    7021601Spring197002-1106-19413.000000
    8021702Spring197002-1106-1954.700001
    11022003Spring197002-1106-19309.000000
    .....................
    1990023702Spring202302-1106-1984.209999
    1991024003Spring202302-1106-19208.600006
    1992024004Spring202302-1106-1917.190001
    1996024014Spring202302-1106-19405.500000
    1997024015Spring202302-1106-1911.680000
    \n", + "

    1302 rows × 6 columns

    \n", + "
    " + ], + "text/plain": [ + " id season year start_date end_date streamflow\n", + "4 020802 Spring 1970 02-11 06-19 266.000000\n", + "6 021502 Spring 1970 02-11 06-19 204.000000\n", + "7 021601 Spring 1970 02-11 06-19 413.000000\n", + "8 021702 Spring 1970 02-11 06-19 54.700001\n", + "11 022003 Spring 1970 02-11 06-19 309.000000\n", + "... ... ... ... ... ... ...\n", + "1990 023702 Spring 2023 02-11 06-19 84.209999\n", + "1991 024003 Spring 2023 02-11 06-19 208.600006\n", + "1992 024004 Spring 2023 02-11 06-19 17.190001\n", + "1996 024014 Spring 2023 02-11 06-19 405.500000\n", + "1997 024015 Spring 2023 02-11 06-19 11.680000\n", + "\n", + "[1302 rows x 6 columns]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "sub_set.get_maximum(tolerence=0.15, seasons=['Spring'])" + "xfa.get_maximum(tolerence=0.15, seasons=['Spring'])" ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -1046,7 +2869,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -1056,147 +2878,1123 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "from xhydro.frequency_analysis.local import Local" + ] + }, + { + "cell_type": "code", + "execution_count": 58, "metadata": {}, "outputs": [], "source": [ "return_period = np.array([2, 5, 10, 20, 50, 100, 200, 1000, 2000, 10000])\n", "dist_list = ['expon', 'gamma', 'genextreme', 'gennorm', 'gumbel_r', 'pearson3', 'weibull_min']\n", "\n", - "fa = xh.Local(data_ds = sub_set, return_period = return_period, dist_list = dist_list, tolerence = 0.15, seasons = ['Spring'], min_year = 15, vars_of_interest = ['max'])" + "fa = Local(data_ds=xfa,\n", + " return_period=return_period,\n", + " dist_list=dist_list,\n", + " tolerence=0.15,\n", + " seasons=['Spring'],\n", + " min_year=15,\n", + " vars_of_interest=['max'])" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 59, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
    <xarray.Dataset>\n",
    +       "Dimensions:                (id: 33, season: 1, scipy_dist: 7,\n",
    +       "                            return_period: 10, time: 54, dparams: 6)\n",
    +       "Coordinates: (12/19)\n",
    +       "  * id                     (id) object '020404' '020502' ... '024014' '024015'\n",
    +       "  * season                 (season) <U6 'Spring'\n",
    +       "  * scipy_dist             (scipy_dist) <U11 'expon' 'gamma' ... 'weibull_min'\n",
    +       "    drainage_area          (id) float32 647.0 59.8 626.0 ... 2.163e+03 48.1\n",
    +       "    end_date               <U5 '06-19'\n",
    +       "    latitude               (id) float32 48.81 48.98 48.98 ... 46.05 46.2 46.18\n",
    +       "    ...                     ...\n",
    +       "    time_agg               <U4 'mean'\n",
    +       "    timestep               <U1 'D'\n",
    +       "    variable               <U10 'streamflow'\n",
    +       "  * return_period          (return_period) float64 2.0 5.0 10.0 ... 2e+03 1e+04\n",
    +       "  * time                   (time) int64 1970 1971 1972 1973 ... 2021 2022 2023\n",
    +       "  * dparams                (dparams) <U5 'a' 'beta' 'c' 'loc' 'scale' 'skew'\n",
    +       "Data variables:\n",
    +       "    value_criterions       (season, id, scipy_dist) object {'aic': 473.117809...\n",
    +       "    streamflow_quantiles   (scipy_dist, season, return_period, id) float64 11...\n",
    +       "    streamflow             (season, time, id) float32 nan nan ... 405.5 11.68\n",
    +       "    streamflow_parameters  (scipy_dist, season, dparams, id) float64 nan ... nan
    " + ], + "text/plain": [ + "\n", + "Dimensions: (id: 33, season: 1, scipy_dist: 7,\n", + " return_period: 10, time: 54, dparams: 6)\n", + "Coordinates: (12/19)\n", + " * id (id) object '020404' '020502' ... '024014' '024015'\n", + " * season (season) \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    idseasontimestart_dateend_datestreamflow
    3020802Spring197002-1106-19266.000000
    5021502Spring197002-1106-19204.000000
    6021601Spring197002-1106-19413.000000
    7021702Spring197002-1106-1954.700001
    10022003Spring197002-1106-19309.000000
    .....................
    1774023432Spring202302-1106-1966.580002
    1776023702Spring202302-1106-1984.209999
    1777024003Spring202302-1106-19208.600006
    1780024014Spring202302-1106-19405.500000
    1781024015Spring202302-1106-1911.680000
    \n", + "

    1287 rows × 6 columns

    \n", + "" + ], + "text/plain": [ + " id season time start_date end_date streamflow\n", + "3 020802 Spring 1970 02-11 06-19 266.000000\n", + "5 021502 Spring 1970 02-11 06-19 204.000000\n", + "6 021601 Spring 1970 02-11 06-19 413.000000\n", + "7 021702 Spring 1970 02-11 06-19 54.700001\n", + "10 022003 Spring 1970 02-11 06-19 309.000000\n", + "... ... ... ... ... ... ...\n", + "1774 023432 Spring 2023 02-11 06-19 66.580002\n", + "1776 023702 Spring 2023 02-11 06-19 84.209999\n", + "1777 024003 Spring 2023 02-11 06-19 208.600006\n", + "1780 024014 Spring 2023 02-11 06-19 405.500000\n", + "1781 024015 Spring 2023 02-11 06-19 11.680000\n", + "\n", + "[1287 rows x 6 columns]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "fa.view_values('max')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    idseasonscipy_distvalue_criterions
    0020404Springexpon{'aic': 473.11780986942864, 'bic': 476.6402101...
    1020404Springgamma{'aic': 458.2985841195104, 'bic': 463.58218446...
    2020404Springgenextreme{'aic': 457.5956462573534, 'bic': 462.87924660...
    3020404Springgennorm{'aic': 458.6848317031879, 'bic': 463.96843205...
    4020404Springgumbel_r{'aic': 455.63163990069773, 'bic': 459.1540401...
    ...............
    226024015Springgenextreme{'aic': 102.98921985069077, 'bic': 105.4888598...
    227024015Springgennorm{'aic': 107.10226730923392, 'bic': 109.6019073...
    228024015Springgumbel_r{'aic': 101.55648839938996, 'bic': 103.2229150...
    229024015Springpearson3{'aic': 95.20549363483296, 'bic': 97.705133667...
    230024015Springweibull_min{'aic': 101.01183131846669, 'bic': 103.5114713...
    \n", + "

    231 rows × 4 columns

    \n", + "
    " + ], + "text/plain": [ + " id season scipy_dist \\\n", + "0 020404 Spring expon \n", + "1 020404 Spring gamma \n", + "2 020404 Spring genextreme \n", + "3 020404 Spring gennorm \n", + "4 020404 Spring gumbel_r \n", + ".. ... ... ... \n", + "226 024015 Spring genextreme \n", + "227 024015 Spring gennorm \n", + "228 024015 Spring gumbel_r \n", + "229 024015 Spring pearson3 \n", + "230 024015 Spring weibull_min \n", + "\n", + " value_criterions \n", + "0 {'aic': 473.11780986942864, 'bic': 476.6402101... \n", + "1 {'aic': 458.2985841195104, 'bic': 463.58218446... \n", + "2 {'aic': 457.5956462573534, 'bic': 462.87924660... \n", + "3 {'aic': 458.6848317031879, 'bic': 463.96843205... \n", + "4 {'aic': 455.63163990069773, 'bic': 459.1540401... \n", + ".. ... \n", + "226 {'aic': 102.98921985069077, 'bic': 105.4888598... \n", + "227 {'aic': 107.10226730923392, 'bic': 109.6019073... \n", + "228 {'aic': 101.55648839938996, 'bic': 103.2229150... \n", + "229 {'aic': 95.20549363483296, 'bic': 97.705133667... \n", + "230 {'aic': 101.01183131846669, 'bic': 103.5114713... \n", + "\n", + "[231 rows x 4 columns]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "fa.view_criterions('max')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
    \n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    idseasonscipy_distreturn_periodstreamflow_quantiles
    0020404Springexpon2.0110.0
    1020502Springexpon2.019.0
    2020602Springexpon2.0154.0
    3020802Springexpon2.0255.0
    4021407Springexpon2.0195.0
    ..................
    2305024003Springweibull_min10000.0528.0
    2306024007Springweibull_min10000.01180.0
    2307024013Springweibull_min10000.0291.0
    2308024014Springweibull_min10000.01198.0
    2309024015Springweibull_min10000.053.0
    \n", + "

    2310 rows × 5 columns

    \n", + "
    " + ], + "text/plain": [ + " id season scipy_dist return_period streamflow_quantiles\n", + "0 020404 Spring expon 2.0 110.0\n", + "1 020502 Spring expon 2.0 19.0\n", + "2 020602 Spring expon 2.0 154.0\n", + "3 020802 Spring expon 2.0 255.0\n", + "4 021407 Spring expon 2.0 195.0\n", + "... ... ... ... ... ...\n", + "2305 024003 Spring weibull_min 10000.0 528.0\n", + "2306 024007 Spring weibull_min 10000.0 1180.0\n", + "2307 024013 Spring weibull_min 10000.0 291.0\n", + "2308 024014 Spring weibull_min 10000.0 1198.0\n", + "2309 024015 Spring weibull_min 10000.0 53.0\n", + "\n", + "[2310 rows x 5 columns]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "fa.view_quantiles('max')" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from xhydro.frequency_analysis.visualisation import *\n", - "plot_fa(fa, var='max', lang='fr', var_name='value')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fa_calc = xh.Local(data_ds = mm2, return_period = return_period, dist_list = dist_list, tolerence = 0.15, seasons = ['Spring'], min_year = 15, vars_of_interest = ['max'], dates_vol=dates_ds, calculated = True)\n", - "fa_calc.view_values('max')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fa.view_values('max')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fa.view_criterions('max')" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "view quantiles" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fa.view_quantiles('max')\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fa_calc.view_quantiles('max')\n", - "max" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\n", - "view criterions" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "xhydro_develop", "language": "python", - "name": "python3" + "name": "xhydro_develop" }, "language_info": { "codemirror_mode": { @@ -1208,7 +4006,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.10.12" }, "vscode": { "interpreter": { From dce507652c950e1d296c5d82d9c770d44c351f32 Mon Sep 17 00:00:00 2001 From: sebastienlanglois Date: Mon, 28 Aug 2023 00:58:23 -0400 Subject: [PATCH 21/24] make frequency_analysis a module --- xhydro/__init__.py | 1 - xhydro/frequency_analysis/__init__.py | 0 2 files changed, 1 deletion(-) create mode 100644 xhydro/frequency_analysis/__init__.py diff --git a/xhydro/__init__.py b/xhydro/__init__.py index 7ebf9f12..47c04ead 100644 --- a/xhydro/__init__.py +++ b/xhydro/__init__.py @@ -4,5 +4,4 @@ __email__ = "tcff_hydro@outlook.com" __version__ = "0.1.5" -from .frequency_analysis.local import * from .utils import get_julian_day, get_timestep diff --git a/xhydro/frequency_analysis/__init__.py b/xhydro/frequency_analysis/__init__.py new file mode 100644 index 00000000..e69de29b From a7bb04c7ecd0399c44d0462a53a05efb2ce6f4ef Mon Sep 17 00:00:00 2001 From: sebastienlanglois Date: Mon, 28 Aug 2023 01:19:43 -0400 Subject: [PATCH 22/24] remove first paragraph --- docs/notebooks/local_frequency_analysis.ipynb | 27 +++---------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/docs/notebooks/local_frequency_analysis.ipynb b/docs/notebooks/local_frequency_analysis.ipynb index 9fb1a318..6835e867 100644 --- a/docs/notebooks/local_frequency_analysis.ipynb +++ b/docs/notebooks/local_frequency_analysis.ipynb @@ -16,26 +16,7 @@ "source": [ "# Frequency analysis \n", "\n", - "This class takes a `xarray.Dataset` to initialise\n", - "After the class is initialised, it can be used to prepare the data\n", - "in this example, we extract the value form a melcc dataset and we filter on all catchments statirng with 0234- and catchemnt 051001\n", - "The function get_julian_day helps us defining our seasons dates which we then set using the attribute .season of our Data() DataSet\n", - "\n", - "- [loading the dataset Cell](#data_load)\n", - "- [Catchement selection](#data_catchments)\n", - "- [Seasons](#data_seasons)\n", - "- [rm Seasons](#data_rmseasons)\n", - "- [Maximum](#data_getMaximum)\n", - "\n", - "\n", - "\n", - "\n", - "- [Init](#fa_init)\n", - "- [View quantiles](#fa_qunatiles)\n", - "\n", - "\n", - "\n", - "\n" + "Text\n" ] }, { @@ -3992,9 +3973,9 @@ ], "metadata": { "kernelspec": { - "display_name": "xhydro_develop", + "display_name": "xhydro", "language": "python", - "name": "xhydro_develop" + "name": "xhydro" }, "language_info": { "codemirror_mode": { @@ -4006,7 +3987,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.9.16" }, "vscode": { "interpreter": { From b47968250e22db489eccf79794a8a908a009f1be Mon Sep 17 00:00:00 2001 From: Zeitsperre <10819524+Zeitsperre@users.noreply.github.com> Date: Tue, 3 Oct 2023 11:04:47 -0400 Subject: [PATCH 23/24] exclude jupyter notebooks --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 937468d1..6804a642 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,7 +17,7 @@ repos: - id: check-json - id: pretty-format-json args: [ '--autofix', '--no-ensure-ascii', '--no-sort-keys' ] - # exclude: .ipynb + exclude: .ipynb - id: check-yaml args: [ '--allow-multiple-documents' ] - repo: https://github.com/pre-commit/pygrep-hooks From 5b18b64399d04853d4afd0988392f0d278c9ff7e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 3 Oct 2023 15:05:15 +0000 Subject: [PATCH 24/24] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xhydro/frequency_analysis/local.py | 14 ++++++++------ xhydro/utils.py | 6 +++--- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/xhydro/frequency_analysis/local.py b/xhydro/frequency_analysis/local.py index cbe80ed5..abdc12ca 100644 --- a/xhydro/frequency_analysis/local.py +++ b/xhydro/frequency_analysis/local.py @@ -1,21 +1,23 @@ -import xarray as xr -import numpy as np -import warnings import copy +import fnmatch +import warnings from typing import Union + +import numpy as np import pandas as pd -import fnmatch import scipy.stats -import xhydro as xh - +import xarray as xr from statsmodels.tools import eval_measures from xclim.indices.stats import fit, parametric_quantile +import xhydro as xh + __all__ = [ "Data", "Local", ] + class Data: def __init__(self, ds: xr.Dataset): """init function takes a dataset as input and initialize an empty diff --git a/xhydro/utils.py b/xhydro/utils.py index bdad1a75..22c2daaa 100644 --- a/xhydro/utils.py +++ b/xhydro/utils.py @@ -24,12 +24,12 @@ def get_julian_day(month, day, year=None): Examples -------- >>> import xarray as xr - >>> cehq_data_path = '/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr' + >>> cehq_data_path = "/dbfs/mnt/devdlzxxkp01/datasets/xhydro/tests/cehq/zarr" >>> ds = xr.open_zarr(cehq_data_path, consolidated=True) >>> donnees = Data(ds) - >>> jj = donnees.get_julian_day(month = 9, day = 1) + >>> jj = donnees.get_julian_day(month=9, day=1) >>> jj: 244 - >>> jj = donnees.get_julian_day(month = 9, day = 1, year = 2000) + >>> jj = donnees.get_julian_day(month=9, day=1, year=2000) >>> jj: 245 """ if year is None: