From f567282bd47d0abcef323b72b4273aacb3a24fff Mon Sep 17 00:00:00 2001 From: anouksprong Date: Thu, 31 Aug 2023 16:05:02 +0200 Subject: [PATCH 01/23] add lizard functions --- hydropandas/io/lizard.py | 7 +++++++ tests/test_012_lizard.py | 7 +++++++ 2 files changed, 14 insertions(+) create mode 100644 hydropandas/io/lizard.py create mode 100644 tests/test_012_lizard.py diff --git a/hydropandas/io/lizard.py b/hydropandas/io/lizard.py new file mode 100644 index 00000000..4661123d --- /dev/null +++ b/hydropandas/io/lizard.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- +""" +Spyder Editor + +This is a temporary script file. +""" + diff --git a/tests/test_012_lizard.py b/tests/test_012_lizard.py new file mode 100644 index 00000000..008b7fea --- /dev/null +++ b/tests/test_012_lizard.py @@ -0,0 +1,7 @@ +# -*- coding: utf-8 -*- +""" +Created on Thu Aug 31 16:01:18 2023 + +@author: SprongA +""" + From 669dd89b2acd79f5b6ad607e51cdb941628ed347 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Wed, 1 Nov 2023 17:01:14 +0100 Subject: [PATCH 02/23] fix docstring --- hydropandas/extensions/plots.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hydropandas/extensions/plots.py b/hydropandas/extensions/plots.py index 7694425c..491a552b 100644 --- a/hydropandas/extensions/plots.py +++ b/hydropandas/extensions/plots.py @@ -44,7 +44,7 @@ def interactive_plots( if True plot multiple tubes at the same monitoring_well in one figure **kwargs : - will be passed to the Obs.to_interactive_plot method, options + will be passed to the Obs.interactive_plot method, options include: - cols : list of str or None @@ -55,7 +55,7 @@ def interactive_plots( - hoover_names : list of str - plot_colors : list of str - ylabel : str - - add_filter_to_legend : boolean + - add_screen_to_legend : boolean """ _color_cycle = ( "blue", @@ -184,7 +184,7 @@ def interactive_map( if True interactive plots will be created, if False the iplot_fname in the meta ditctionary of the observations is used. **kwargs : - will be passed to the to_interactive_plots method options are: + will be passed to the interactive_plots method options are: - cols : list of str or None - hoover_names : list of str @@ -194,7 +194,7 @@ def interactive_map( - hoover_names : list of str - plot_colors : list of str - ylabel : str - - add_filter_to_legend : boolean + - add_screen_to_legend : boolean - tmin : dt.datetime - tmax : dt.datetime From 793e1293a2722f72f3e5f44019e36f120286d08d Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Fri, 1 Dec 2023 09:24:24 +0100 Subject: [PATCH 03/23] some minor changes --- hydropandas/observation.py | 18 ++++++++---------- hydropandas/util.py | 26 +++++++++++++++----------- hydropandas/version.py | 2 +- 3 files changed, 24 insertions(+), 22 deletions(-) diff --git a/hydropandas/observation.py b/hydropandas/observation.py index 730e3882..9bcd6a0e 100644 --- a/hydropandas/observation.py +++ b/hydropandas/observation.py @@ -67,11 +67,10 @@ def __init__(self, *args, **kwargs): **kwargs can be one of the attributes listed in _metadata or keyword arguments for the constructor of a pandas.DataFrame. """ - if len(args) > 0: - if isinstance(args[0], Obs): - for key in args[0]._metadata: - if (key in Obs._metadata) and (key not in kwargs.keys()): - kwargs[key] = getattr(args[0], key) + if (len(args) > 0) and isinstance(args[0], Obs): + for key in args[0]._metadata: + if (key in Obs._metadata) and (key not in kwargs): + kwargs[key] = getattr(args[0], key) self.x = kwargs.pop("x", np.nan) self.y = kwargs.pop("y", np.nan) @@ -122,7 +121,7 @@ def __repr__(self) -> str: return buf.getvalue() - def _repr_html_(self, collapse=True): + def _repr_html_(self, collapse=False): """ Uses the pandas DataFrame html representation with the metadata prepended. @@ -714,8 +713,7 @@ def from_artdino_file(cls, path=None, **kwargs): @classmethod def from_wiski(cls, path, **kwargs): - """ - Read data from a WISKI file. + """Read data from a WISKI file. Parameters: ----------- @@ -779,7 +777,7 @@ def from_pastastore(cls, pstore, libname, name, metadata_mapping=None): class WaterQualityObs(Obs): - """class for water quality ((grond)watersamenstelling) point + """Class for water quality ((grond)watersamenstelling) point observations. Subclass of the Obs class @@ -814,7 +812,7 @@ def _constructor(self): @classmethod def from_dino(cls, path, **kwargs): - """read dino file with groundwater quality data. + """Read dino file with groundwater quality data. Parameters ---------- diff --git a/hydropandas/util.py b/hydropandas/util.py index bd4de076..0db30604 100644 --- a/hydropandas/util.py +++ b/hydropandas/util.py @@ -102,14 +102,16 @@ def get_files( Parameters ---------- file_or_dir : str - file or path to data + file or path to data. ext : str - extension of filenames to store in list + extension of filenames to store in list. + unpackdir : str + directory to story unpacked zip file, only used in case of a zipfile. force_unpack : bool, optional - force unzip, by default False + force unzip, by default False. preserve_datetime : bool, optional - preserve datetime of unzipped files, by default False - (useful for checking whether data has changed) + preserve datetime of unzipped files, by default False. Used for + checking whether data has changed). """ # check if unpackdir is same as file_or_dir, if same, this can cause # problems when the unpackdir still contains zips that will be unpacked @@ -117,6 +119,7 @@ def get_files( if unpackdir is not None: if os.path.normcase(unpackdir) == os.path.normcase(file_or_dir): raise ValueError("Please specify a different folder to unpack files!") + # identify whether file_or_dir started as zip if file_or_dir.endswith(".zip"): iszip = True @@ -260,17 +263,18 @@ def get_color_logger(level="INFO"): handler = logging.StreamHandler(sys.stdout) handler.setFormatter(formatter) - logger = logging.getLogger() - logger.handlers[:] = [] - logger.addHandler(handler) - logger.setLevel(getattr(logging, level)) + l = logging.getLogger() + l.handlers[:] = [] + l.addHandler(handler) + l.setLevel(getattr(logging, level)) logging.captureWarnings(True) - return logger + return l def oc_to_df(oc, col: Optional[str] = None) -> pd.DataFrame: - """convert an observation collection to + """convert an observation collection to a DataFrame where every column + has one observation. Parameters ---------- diff --git a/hydropandas/version.py b/hydropandas/version.py index a2fecb45..7db59c43 100644 --- a/hydropandas/version.py +++ b/hydropandas/version.py @@ -1 +1 @@ -__version__ = "0.9.2" +__version__ = "0.9.3b" From f4afac35838e9ee9646343445a4d95d7a297d7fa Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Fri, 1 Dec 2023 09:25:58 +0100 Subject: [PATCH 04/23] version bump --- hydropandas/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hydropandas/version.py b/hydropandas/version.py index 7db59c43..9d4a2b65 100644 --- a/hydropandas/version.py +++ b/hydropandas/version.py @@ -1 +1 @@ -__version__ = "0.9.3b" +__version__ = "0.9.4b" From e4df99758c67743277b864adaa9e932c4df7c152 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Fri, 1 Dec 2023 09:28:49 +0100 Subject: [PATCH 05/23] text --- hydropandas/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hydropandas/util.py b/hydropandas/util.py index 0db30604..97158195 100644 --- a/hydropandas/util.py +++ b/hydropandas/util.py @@ -111,7 +111,7 @@ def get_files( force unzip, by default False. preserve_datetime : bool, optional preserve datetime of unzipped files, by default False. Used for - checking whether data has changed). + checking whether data has changed. """ # check if unpackdir is same as file_or_dir, if same, this can cause # problems when the unpackdir still contains zips that will be unpacked From f179818c4d0308c6d7ce4fbb988af3e73afc9366 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Fri, 1 Dec 2023 09:31:14 +0100 Subject: [PATCH 06/23] ruff fix --- hydropandas/util.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hydropandas/util.py b/hydropandas/util.py index 97158195..3d7be189 100644 --- a/hydropandas/util.py +++ b/hydropandas/util.py @@ -263,13 +263,13 @@ def get_color_logger(level="INFO"): handler = logging.StreamHandler(sys.stdout) handler.setFormatter(formatter) - l = logging.getLogger() - l.handlers[:] = [] - l.addHandler(handler) - l.setLevel(getattr(logging, level)) + clogger = logging.getLogger() + clogger.handlers[:] = [] + clogger.addHandler(handler) + clogger.setLevel(getattr(logging, level)) logging.captureWarnings(True) - return l + return clogger def oc_to_df(oc, col: Optional[str] = None) -> pd.DataFrame: From 6e95ab3580189635d6b52d8e1c941336c562942c Mon Sep 17 00:00:00 2001 From: Onno Ebbens Date: Thu, 7 Dec 2023 14:40:01 +0100 Subject: [PATCH 07/23] fix voor #176 --- hydropandas/io/fews.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/hydropandas/io/fews.py b/hydropandas/io/fews.py index 736e786b..09fb061e 100644 --- a/hydropandas/io/fews.py +++ b/hydropandas/io/fews.py @@ -395,11 +395,13 @@ class of the observations, e.g. GroundwaterObs or WaterlvlObs [d + " " + t for d, t in zip(date, time)], errors="coerce" ) ts = pd.DataFrame(events, index=index) - ts.loc[:, "value"] = ts.loc[:, "value"].astype(float) - if remove_nan and (not ts.empty): - ts.dropna(subset=["value"], inplace=True) - header["unit"] = "m NAP" + if not ts.empty: + ts.loc[:, "value"] = ts.loc[:, "value"].astype(float) + + if remove_nan: + ts.dropna(subset=["value"], inplace=True) + header["unit"] = "m NAP" o, header = _obs_from_meta(ts, header, translate_dic, ObsClass) if locationIds is not None: From 3c9a03abde3a6c5ca81279fce75ba6c07ab3e9b3 Mon Sep 17 00:00:00 2001 From: Onno Ebbens Date: Thu, 7 Dec 2023 15:04:14 +0100 Subject: [PATCH 08/23] remove loc to change the dtype of the column --- hydropandas/io/fews.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hydropandas/io/fews.py b/hydropandas/io/fews.py index 09fb061e..240d7970 100644 --- a/hydropandas/io/fews.py +++ b/hydropandas/io/fews.py @@ -397,7 +397,7 @@ class of the observations, e.g. GroundwaterObs or WaterlvlObs ts = pd.DataFrame(events, index=index) if not ts.empty: - ts.loc[:, "value"] = ts.loc[:, "value"].astype(float) + ts["value"] = ts["value"].astype(float) if remove_nan: ts.dropna(subset=["value"], inplace=True) From b24ef3be6c80aa96234e5e33fbbe4b6efc17cf05 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Fri, 8 Dec 2023 14:51:20 +0100 Subject: [PATCH 09/23] trigger tests --- tests/test_011_bro.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_011_bro.py b/tests/test_011_bro.py index c7ee5ba7..d2d069c4 100644 --- a/tests/test_011_bro.py +++ b/tests/test_011_bro.py @@ -53,7 +53,7 @@ def test_get_gld_ids_from_gmw(): def test_obs_list_from_extent(): - extent = (102395, 103121, 434331, 434750) + # extent = (102395, 103121, 434331, 434750) extent = [116500, 120000, 439000, 442000] bro.get_obs_list_from_extent( extent, hpd.GroundwaterObs, tmin=None, tmax=None, epsg=28992, only_metadata=True From cda062e00012da5c164e7ba9a740c55f53bd5e62 Mon Sep 17 00:00:00 2001 From: anouksprong Date: Tue, 19 Dec 2023 15:20:06 +0100 Subject: [PATCH 10/23] add Lizard functionality to hydropandas Functions to extract metadata and timeseries from a Lizard-API --- hydropandas/__init__.py | 2 + hydropandas/io/lizard.py | 732 +++++++++++++++++++++++++++++++++- hydropandas/obs_collection.py | 187 +++++++++ hydropandas/observation.py | 47 +++ 4 files changed, 963 insertions(+), 5 deletions(-) diff --git a/hydropandas/__init__.py b/hydropandas/__init__.py index 99b85b2a..2ecde96a 100644 --- a/hydropandas/__init__.py +++ b/hydropandas/__init__.py @@ -14,6 +14,8 @@ read_fews, read_imod, read_knmi, + read_lizard_from_list, + read_lizard_from_extent, read_menyanthes, read_modflow, read_pickle, diff --git a/hydropandas/io/lizard.py b/hydropandas/io/lizard.py index 4661123d..35a5cca0 100644 --- a/hydropandas/io/lizard.py +++ b/hydropandas/io/lizard.py @@ -1,7 +1,729 @@ -# -*- coding: utf-8 -*- -""" -Spyder Editor +import pandas as pd +import requests +from pyproj import Transformer +import logging +import numpy as np +logger = logging.getLogger(__name__) +from shapely.geometry import Polygon +from tqdm import tqdm +import math +import geopandas +import concurrent.futures +#%% check_status_obs +def check_status_obs (metadata, timeseries): + ''' + checks if a monitoring well is still active -This is a temporary script file. -""" + Parameters + ---------- + metadata : pandas.DataFrame + metadata of the monitoring well + timeseries : pandas DataFrame + timeseries of the monitoring well + + Returns + ------- + metadata DataFrame including the status of the monitoring well + + ''' + if timeseries.empty: + metadata["status"] = 'geen tijdsserie beschikbaar' + return metadata + + last_measurement_date = timeseries.last_valid_index() + today = pd.to_datetime('today').normalize() + if today - last_measurement_date < pd.Timedelta(days= 180): + metadata["status"] = 'active' + + else: + metadata["status"] = 'niet actief' + + + return metadata +#%% extent_to_polygon +def extent_to_polygon (coordinates): + ''' + Translates a list of coordinates (xmin, ymin, xmax, ymax) to a polygon with + coordinate system WGS84 + + Parameters + ---------- + coordinates : lst + list of the modelextent within which the observations are collected + + Returns + ------- + polygon of the modelextent with coordinate system WGS84 + + ''' + transformer = Transformer.from_crs("EPSG:28992","WGS84") + + lon_min, lat_min = transformer.transform(coordinates[0], coordinates[1]) + lon_max, lat_max = transformer.transform(coordinates[2], coordinates[3]) + + poly_T = Polygon([ + (lat_min, lon_min), (lat_max, lon_min),(lat_max, lon_max), (lat_min,lon_max)]) + + return poly_T +#%% translate_flag +def translate_flag (timeseries): + ''' + Translates Vitens Lizard flags from interter to text + + Parameters + ---------- + timeseries : pandas.DataFrame + timeseries of a monitoring well with flags + + Returns + ------- + timeseries : pandas.DataFrame + timeseries with translated quality flags + + ''' + for idx, flag in enumerate(timeseries.loc[:,"flag"]): + if flag == 0 or flag ==1: + timeseries.loc[idx,'flag'] = 'betrouwbaar' + + elif flag == 3 or flag ==4: + timeseries.loc[idx,'flag'] = 'onbeslist' + + elif flag == 6 or flag ==7: + timeseries.loc[idx,'flag'] = 'onbetrouwbaar' + + elif flag == 99: + timeseries.loc[idx,'flag'] = 'onongevalideerd' + + elif flag == -99: + timeseries.loc[idx,'flag'] = 'verwijderd' + + return timeseries +#%% get_API_results_from_code +def get_API_results_from_code(code, url_lizard): + ''' + extracts the Groundwater Station parameters from a monitoring well based + on the code of the monitoring well + + Parameters + ---------- + code : str + code of the monitoring well + url_lizard : str + location of the LIZARD-API + + Raises + ------ + ValueError + if code of the monitoring well is not known + + Returns + ------- + groundwaterstation_metadata : dict + dictionary with all available metadata of the monitoring well and its filters + + ''' + lizard_GWS_endpoint = f'{url_lizard}groundwaterstations/' + url_groundwaterstation_code = f'{lizard_GWS_endpoint}?code={code}' + + try: + groundwaterstation_metadata = requests.get(url_groundwaterstation_code).json()["results"][0] + + except IndexError: + raise ValueError("Code is invalid") + + return groundwaterstation_metadata +#%% +def _prepare_API_input(nr_pages, url_groundwater): + ''' + prepare API data pages within the defined extent + + Parameters + ---------- + nr_pages : int + number of the pages on which the information is stored + url_groundwater : str + location of the used API to extract the data + + Returns + ------- + proces_input : list + list of the page number and the corresponding url + + ''' + proces_input = [] + for page in range(nr_pages): + true_page = page+1 # Het echte paginanummer wordt aan de import thread gekoppeld + url = url_groundwater+'&page={}'.format(true_page) + item = [true_page,url] + proces_input += [item] + return proces_input + +def _download_API(data): + ''' + Function to download the data from the API using the ThreadPoolExecutor + + Parameters + ---------- + data : list + list of the page number and the corresponding url + + Returns + ------- + None. + + ''' + page, url = data + try: + data = requests.get(url = url) + # succes += 1 + data = data.json()['results'] + except: + data = [] + return(data) + + +#%% get_API_results_from_extent +def get_API_results_from_extent(polygon_extent, url_lizard, page_size = 100, nr_threads = 10): + ''' + extracts the Groundwater Station parameters from a monitoring well based + on the code + + Parameters + ---------- + code : str + code of the monitoring well + url_lizard : str + location of the LIZARD-API + + Raises + ------ + ValueError + if code of the monitoring well is not known + + Returns + ------- + groundwaterstation_metadata :dict + dictionary with all available metadata of the monitoring well and its filters + + ''' + lizard_GWS_endpoint = f'{url_lizard}groundwaterstations/' + url_groundwaterstation_extent = f'{lizard_GWS_endpoint}?geometry__within={polygon_extent}&page_size={page_size}' + + try: + groundwaterstation_data = requests.get(url_groundwaterstation_extent).json() + nr_results = groundwaterstation_data['count'] + nr_pages = math.ceil(nr_results/page_size) + + print("Number of monitoring wells: {}".format(nr_results)) + print("Number of pages: {}".format(nr_pages)) + + + if nr_threads > nr_pages: + nr_threads = nr_pages + + proces_input = _prepare_API_input(nr_pages, url_groundwaterstation_extent) + groundwaterstation_results = pd.DataFrame() + + + with concurrent.futures.ThreadPoolExecutor(max_workers = nr_threads) as executor: + for result in tqdm(executor.map(_download_API,proces_input),total = nr_pages, desc="Page"): + groundwaterstation_results = pd.concat([groundwaterstation_results,pd.DataFrame(result)]) + + except IndexError: + raise ValueError("Extent is invalid") + + return nr_results, groundwaterstation_results + +#%% get_metadata_filter +def get_metadata_filter(API_result, tube_nr, url_lizard): + """ + extract the metadata for a specific location from the dict with all + groundwater station metadata + + Parameters + ---------- + API_result : dict + dictionary with all available metadata of the monitoring well and all its filters + tube_nr : int, optional + select metadata from a specific tube number + Default selects tube_nr = 1 + url_lizard : str + location of the LIZARD-API + + Raises + ------ + ValueError + if code of the monitoring well is invalid. + Returns + ------- + padndas DataFrame of metadata of the specific monitoring well + """ + + metadata = pd.DataFrame(columns= ["monitoring_well", "tube_nr", "name", "x", "y", + "tube_top", "ground_level","screen_top", "screen_bottom","status", + "timeseries_available", "uuid_hand", "start_hand", "uuid_diver", + 'start_diver', 'source', "unit"]) + + lon, lat, _ = API_result['geometry']['coordinates'] + transformer = Transformer.from_crs("WGS84","EPSG:28992") + x,y = transformer.transform(lat,lon) + + for idx, location in enumerate(API_result["filters"]): + metadata.loc[idx, 'unit'] = 'm NAP' + name = location['code'].replace("-","") + metadata.loc[idx, 'x'] = np.round(x,2) + metadata.loc[idx, 'y'] = np.round(y,2) + metadata.loc[idx,"name"] = location["code"] + metadata.loc[idx, "monitoring_well"] = API_result['name'] + metadata.loc[idx, + "ground_level"] = API_result['surface_level'] + + + metadata.loc[idx, "tube_nr"] = int(name[-3:]) + + metadata.loc[idx, "tube_top"] = location['top_level'] + metadata.loc[idx, + "screen_top"] = location['filter_top_level'] + metadata.loc[idx, + "screen_bottom"] = location['filter_bottom_level'] + metadata.loc[idx, + "source"] = 'lizard' + + if not location['timeseries']: + metadata.loc[idx, "timeseries_available"] = 'Nee' + + else: + timeseries = location['timeseries'] + # metadata.loc[name, "_wezig"] = 'Ja' + for series in timeseries: + series_info = requests.get(series).json() + if series_info["name"] == 'WNS9040.hand': + metadata.loc[idx, "uuid_hand"] = series_info["uuid"] + metadata.loc[idx, + "start_hand"] = series_info["start"] + elif series_info["name"] == 'WNS9040': + metadata.loc[idx, "uuid_diver"] = series_info["uuid"] + metadata.loc[idx, + "start_diver"] = series_info["start"] + + # geen tijdreeksen aanwezig + if pd.isna(metadata.loc[idx, "start_diver"]) and pd.isna(metadata.loc[idx, "start_hand"]): + metadata.loc[idx, "timeseries_available"] = 'Nee' + elif pd.notna(metadata.loc[idx, "start_diver"]) and pd.isna(metadata.loc[idx, "start_hand"]): + metadata.loc[idx, "timeseries_available"] = 'Diver' + elif pd.isna(metadata.loc[idx, "start_diver"]) and pd.notna(metadata.loc[idx, "start_hand"]): + metadata.loc[idx, "timeseries_available"] = 'handpeilingen' + elif pd.notna(metadata.loc[idx, "start_diver"]) and pd.notna(metadata.loc[idx, "start_hand"]): + metadata.loc[idx, "timeseries_available"] = 'Diver + hand' + + metadata.sort_values(by = ['tube_nr'], inplace = True, ignore_index = True) + + tube_nr = [tube_nr] if isinstance(tube_nr, int) else tube_nr + + if tube_nr is None: + metadata = metadata.loc[metadata["tube_nr"] == 1] + elif tube_nr is not None and tube_nr != 'all': + metadata =metadata[metadata['tube_nr'].isin(tube_nr)] + + metadata = metadata if isinstance(metadata, pd.DataFrame) else pd.DataFrame(metadata).T + return metadata +#%% get_timeseries +def get_timeseries (uuid, code, tube_nr, tmin , tmax ,url_lizard, page_size = 100000): + """ + Get the time series of a specific monitoring well + ---------- + uuid : str + Universally Unique Identifier of the monitoring well. + code : str + code or name of the monitoring well + tube_nr : int, optional + select specific tube number + tmin : str YYYY-m-d, optional + start of the observations, by default the entire serie is returned + tmax : int YYYY-m-d, optional + end of the observations, by default the entire serie is returned + url_lizard : str + location of the LIZARD-API. + page_size : int, optional + Query parameter which can extend the response size. The default is 100000. + + Returns + ------- + pandas DataFrame with the timeseries of the monitoring well + + """ + + url_timeseries = url_lizard+'timeseries/{}'.format(uuid) + + if tmin != None: + tmin = pd.to_datetime(tmin).isoformat('T') + + if tmax != None: + tmax = pd.to_datetime(tmax).isoformat('T') + + + params= {'start':tmin, 'end': tmax, 'page_size': page_size} + url = url_timeseries + '/events/' + + time_series_events = requests.get(url=url, params=params).json()['results'] + time_series_df = pd.DataFrame(time_series_events) + + if time_series_df.empty: + # raise ValueError("{} doesn't have measurements in the selected period between {} and {}".format(code, tmin, tmax)) + # print("{} doesn't have measurements in the selected period between {} and {}".format(code, tmin, tmax)) + return pd.DataFrame() + + else: + time_series_df = translate_flag(time_series_df) + + timeseries_sel = time_series_df.loc[:,['time','value', "flag","comment"]] + timeseries_sel['time'] = pd.to_datetime(timeseries_sel['time'], format = '%Y-%m-%dT%H:%M:%SZ', + errors = 'coerce') + pd.DateOffset(hours = 1) + + timeseries_sel = timeseries_sel[~timeseries_sel['time'].isnull()] + + timeseries_sel.set_index('time', inplace = True) + timeseries_sel["name"] = code + timeseries_sel["filter_nr"] = tube_nr + timeseries_sel.index.rename("peil_datum_tijd", inplace = True) + timeseries_sel = timeseries_sel.loc[:,['name', 'filter_nr','value','flag']] + timeseries_sel.dropna(inplace = True) + + + return timeseries_sel +#%% merge_timeseries +def merge_timeseries(hand_measurements, diver_measurements): + """ + merges the timeseries of the hand and diver measurements into one timeserie + + Parameters + ---------- + hand_measurements : DataFrame + DataFrame containing the hand measurements of the monitoring well + diver_measurements : DataFrame + DataFrame containing the Diver measurements of the monitoring well + + Returns + ------- + DataFrame where hand and diver measurements are merged in one timeseries + + """ + if hand_measurements.empty and diver_measurements.empty: + measurements = pd.DataFrame() + + elif diver_measurements.first_valid_index() == None: + measurements = hand_measurements + print("no diver measuremets available for {}".format(hand_measurements.iloc[0]['name'])) + + else: + + + hand_measurements_sel = hand_measurements.loc[hand_measurements.index < diver_measurements.first_valid_index()] + measurements = pd.concat([hand_measurements_sel, diver_measurements], axis = 0) + + return measurements +#%% combine_timeseries +def combine_timeseries (hand_measurements, diver_measurements): + """ + combines the timeseries of the hand and diver measurements into one DataFrame + + Parameters + ---------- + hand_measurements : DataFrame + DataFrame containing the hand measurements of the monitoring well + diver_measurements : DataFrame + DataFrame containing the Diver measurements of the monitoring well + + Returns + ------- + a combined DataFrame with both hand, and diver measurements + DESCRIPTION. + + """ + hand_measurements.rename(columns = {"value": "value_hand", "flag": "flag_hand"}, inplace = True) + diver_measurements.rename(columns = {"value": "value_diver", "flag": "flag_diver"}, inplace = True) + + measurements = pd.concat([hand_measurements, diver_measurements], axis = 1) + measurements = measurements.loc[:,[ "value_hand","value_diver", "flag_hand","flag_diver"]] + measurements.loc[:,"name"] = hand_measurements.loc[:,"name"][0] + measurements.loc[:,"filter_nr"] = hand_measurements.loc[:,"filter_nr"][0] + + return measurements + +#%% extract_timeseries_from_API +def extract_timeseries_from_API (metadata_df, tmin, tmax, type_timeseries, url_lizard): + ''' + extracts timeseries for a specific monitoring well + + Parameters + ---------- + metadata_df : pandas DataFrame + metadata dataframe of the monitoring wel + tmin : str YYYY-m-d, optional + start of the observations, by default the entire serie is returned + tmax : Ttr YYYY-m-d, optional + end of the observations, by default the entire serie is returned + type_timeseries : str, optional + type of timeseries to; + hand: returns only hand measurements + diver: returns only diver measurements + merge: the hand and diver measurements into one time series (merge; default) or + combine: keeps hand and diver measurements separeted + The default is merge. + url_lizard : str + location of the LIZARD-API. + + Returns + ------- + measurements : pandas DataFrame + dataframe with the timeseries of the monitoring well + metadata_df : pandas DataFrame + dataframe with the metadata of the monitoring well + + ''' + + metadata_df = metadata_df.squeeze() + if metadata_df["timeseries_available"] != "Nee": + + if metadata_df["timeseries_available"] == 'Diver + hand': + hand_measurements = get_timeseries(metadata_df['uuid_hand'], metadata_df["name"],metadata_df["tube_nr"], tmin, tmax, url_lizard) + diver_measurements = get_timeseries(metadata_df['uuid_diver'],metadata_df["name"],metadata_df["tube_nr"], tmin, tmax, url_lizard) + + if type_timeseries == "hand": + measurements = hand_measurements + elif type_timeseries == "diver": + measurements = diver_measurements + + elif type_timeseries == "merge": + measurements = merge_timeseries(hand_measurements, diver_measurements) + elif type_timeseries =="combine": + measurements = combine_timeseries(hand_measurements, diver_measurements) + + # Diver + elif metadata_df ["timeseries_available"] == 'Diver': + measurements = get_timeseries(metadata_df['uuid_diver'],metadata_df["name"], metadata_df["tube_nr"], + tmin, tmax, url_lizard) + + # HAND + elif metadata_df["timeseries_available"] == 'handpeilingen': + measurements = get_timeseries(metadata_df['uuid_hand'],metadata_df["name"], metadata_df["tube_nr"], tmin, tmax, url_lizard) + + elif metadata_df["timeseries_available"] == "Nee": + measurements = pd.DataFrame() + + + + metadata_df.drop(['uuid_hand','uuid_diver'], inplace = True) + + return measurements, metadata_df + +#%% read_lizard_groundwater +def read_lizard_groundwater_from_code (code, tube_nr=None, + tmin = None, tmax = None, + type_timeseries = 'merge', + url_lizard='https://vitens.lizard.net/api/v4/'): + """ + extracts the metadata and timeseries of a observation well from a LIZARD-API based on + the code of a monitoring well + + Parameters + ---------- + code : str + code of the measuring well + tube_nr : int, optional + select specific tube top + Default selects tube_nr = 1 + tmin : str YYYY-m-d, optional + start of the observations, by default the entire serie is returned + tmax : Ttr YYYY-m-d, optional + end of the observations, by default the entire serie is returned + type_timeseries : str, optional + hand: returns only hand measurements + diver: returns only diver measurements + merge: the hand and diver measurements into one time series (merge; default) or + combine: keeps hand and diver measurements separeted + The default is merge. + url_lizard : str + location of the LIZARD-API. + + Returns + ------- + returns a DataFrame with metadata and timeseries + """ + + groundwaterstation_metadata = get_API_results_from_code(code, url_lizard) + + obs_df = get_metadata_filter(groundwaterstation_metadata,tube_nr, url_lizard) + + if obs_df.empty: + raise ValueError("{} doesn't have a tube number {}".format(code,tube_nr)) + + measurements, obs_df = extract_timeseries_from_API (obs_df, tmin, tmax,type_timeseries, url_lizard) + obs_df = check_status_obs(obs_df,measurements) + + + return measurements,obs_df.to_dict() + +#%% get_obs_list_from_code +def get_obs_list_from_code (code, tube_nr='all', + tmin = None, tmax = None, + type_timeseries = 'merge', + url_lizard='https://vitens.lizard.net/api/v4/'): + """ + get all observations from a list of codes of the monitoring wells and a + list of tube numbers + + Parameters + ---------- + code : lst of str + codes of the monitoring wells + tube_nr : lst of str + list of tube numbers of the monitoring wells that should be selected. + By default 'all' available tubes are selected. + tmin : str YYYY-m-d, optional + start of the observations, by default the entire serie is returned + tmax : Ttr YYYY-m-d, optional + end of the observations, by default the entire serie is returned + type_timeseries : str, optional + hand: returns only hand measurements + diver: returns only diver measurements + merge: the hand and diver measurements into one time series (merge; default) or + combine: keeps hand and diver measurements separeted + The default is merge. + url_lizard : str + location of the LIZARD-API. + Returns + ------- + ObsCollection + ObsCollection DataFrame with the 'obs' column + + """ + + obs_col = pd.DataFrame(columns = ['monitoring_well', 'tube_nr', 'name', 'x', 'y', 'tube_top', + 'ground_level', 'screen_top', 'screen_bottom', 'status', + 'timeseries_available', 'start_hand', 'start_diver', 'source', + 'unit','obs']) + if not isinstance(code, list): + raise ValueError("Code should be a list") + + if len(code) == 1: + code = code[0] + groundwaterstation_metadata = get_API_results_from_code(code, url_lizard) + obs_df = get_metadata_filter(groundwaterstation_metadata,tube_nr, url_lizard) + + for idx, row in obs_df.iterrows(): + measurements, obs_series = extract_timeseries_from_API (row, tmin, tmax,type_timeseries, url_lizard) + obs_series['obs'] = measurements.squeeze() + obs_series = check_status_obs(obs_series, measurements) + obs_col.loc[idx] = obs_series + + + else: + for elem in code: + groundwaterstation_metadata = get_API_results_from_code(elem, url_lizard) + obs_df = get_metadata_filter(groundwaterstation_metadata,tube_nr, url_lizard) + obs_col = pd.concat([obs_df,obs_col], axis = 0, ignore_index = True) + + for idx, row in obs_df.iterrows(): + measurements, obs_series = extract_timeseries_from_API (row, tmin, tmax,type_timeseries, url_lizard) + obs_series['obs'] = measurements.squeeze() + obs_series = check_status_obs(obs_series, measurements) + obs_col.loc[idx] = obs_series + + return obs_col + + +#%% get_obs_list_from_extent + +def get_obs_list_from_extent(extent, extract_timeseries = True, + tmin = None, tmax = None, + type_timeseries = 'merge', + url_lizard='https://vitens.lizard.net/api/v4/'): + ''' + get all observations within a specified extent + Parameters + ---------- + extent : list or a shapefile + get groundwater monitoring wells wihtin this extent [xmin, ymin, xmax, ymax] + or within a predefined Polygon from a shapefile + extract_timeseries : Bool, optional + Extract timeseries or not, if not only metadata are returned + tmin : str YYYY-m-d, optional + start of the observations, by default the entire serie is returned + tmax : Ttr YYYY-m-d, optional + end of the observations, by default the entire serie is returned + type_timeseries : str, optional + merge: the hand and diver measurements into one time series (merge; default) or + combine: keeps hand and diver measurements separeted + The default is merge. + url_lizard : str + location of the LIZARD-API. + + Returns + ------- + obs_col : TYPE + ObsCollection DataFrame with the 'obs' column + + ''' + + if type(extent) == list: + polygon_T = extent_to_polygon(extent) + + + + elif extent.endswith('.shp'): + polygon = geopandas.read_file(extent) + polygon_T = polygon.to_crs("WGS84","EPSG:28992").loc[0,"geometry"] + + + else: + print("Extent should be a shapefile or a list of coordinates") + return + + r_results, groundwaterstation_info= get_API_results_from_extent(polygon_T, url_lizard) + + obs_col = pd.DataFrame(columns = ['monitoring_well', 'tube_nr', 'name', 'x', 'y', 'tube_top', + 'ground_level', 'screen_top', 'screen_bottom', 'status', + 'timeseries_available', 'start_hand', 'start_diver', 'source', + 'unit','obs']) + + groundwaterstation_info = [(series, 'all', url_lizard) for _, series in groundwaterstation_info.iterrows()] + groundwaterstation_filters = pd.DataFrame() + + nr_threads = 10 + if nr_threads > r_results: + nr_threads = r_results + + with concurrent.futures.ThreadPoolExecutor(max_workers= nr_threads) as executor: + for result in tqdm(executor.map(lambda args : get_metadata_filter(*args), groundwaterstation_info), + total = r_results, desc='Monitoring well'): + groundwaterstation_filters = pd.concat([groundwaterstation_filters,pd.DataFrame(result)]) + + if extract_timeseries == True: + groundwaterstation_filters = [ + (series, tmin,tmax,type_timeseries, url_lizard) for _, series in groundwaterstation_filters.iterrows()] + + with concurrent.futures.ThreadPoolExecutor(max_workers= nr_threads) as executor: + for measurement, obs_series in tqdm(executor.map(lambda args : extract_timeseries_from_API(*args), groundwaterstation_filters), + total =len(groundwaterstation_filters), desc='Timeseries'): + obs_series['obs'] = measurement + obs_col = pd.concat([obs_col,pd.DataFrame([obs_series])]) + + else: + groundwaterstation_filters.drop(['uuid_hand','uuid_diver'], axis = 1, inplace = True) + + obs_col = groundwaterstation_filters + + obs_col.reset_index(drop = True, inplace = True) + + return obs_col + + + + + + + + + diff --git a/hydropandas/obs_collection.py b/hydropandas/obs_collection.py index 58af8842..046f6fd5 100644 --- a/hydropandas/obs_collection.py +++ b/hydropandas/obs_collection.py @@ -20,6 +20,96 @@ logger = logging.getLogger(__name__) +def read_lizard_from_list( + code, + tube_nr='all', + tmin = None, + tmax = None, + type_timeseries = 'merge', + url_lizard='https://vitens.lizard.net/api/v4/'): + """ + get all observations from a list of codes of the monitoring wells and a + list of tube numbers + + Parameters + ---------- + code : lst of str + codes of the monitoring wells + tube_nr : lst of str + list of tube numbers of the monitoring wells that should be selected. + By default 'all' available tubes are selected. + tmin : str YYYY-m-d, optional + start of the observations, by default the entire serie is returned + tmax : Ttr YYYY-m-d, optional + end of the observations, by default the entire serie is returned + type_timeseries : str, optional + hand: returns only hand measurements + diver: returns only diver measurements + merge: the hand and diver measurements into one time series (merge; default) or + combine: keeps hand and diver measurements separeted + The default is merge. + url_lizard : str + location of the LIZARD-API. + Returns + ------- + ObsCollection + ObsCollection DataFrame with the 'obs' column + + """ + oc = ObsCollection.from_lizard_list( + code = code, + tube_nr= tube_nr, + tmin = tmin, + tmax = tmax, + type_timeseries = type_timeseries, + url_lizard=url_lizard) + return oc + +def read_lizard_from_extent( + extent, + extract_timeseries = True, + tmin = None, + tmax = None, + type_timeseries = 'merge', + url_lizard='https://vitens.lizard.net/api/v4/'): + """ + get all observations within a specified extent + + Parameters + ---------- + extent : list or a shapefile + get groundwater monitoring wells wihtin this extent [xmin, ymin, xmax, ymax] + or within a predefined Polygon from a shapefile + extract_timeseries : Bool, optional + Extract timeseries or not, if not only metadata are returned + tmin : str YYYY-m-d, optional + start of the observations, by default the entire serie is returned + tmax : Ttr YYYY-m-d, optional + end of the observations, by default the entire serie is returned + type_timeseries : str, optional + hand: returns only hand measurements + diver: returns only diver measurements + merge: the hand and diver measurements into one time series (merge; default) or + combine: keeps hand and diver measurements separeted + The default is merge. + url_lizard : str + location of the LIZARD-API. + + Returns + ------- + obs_col : TYPE + ObsCollection DataFrame with the 'obs' column + + """ + oc = ObsCollection.from_lizard_extent( + extent = extent, + extract_timeseries = extract_timeseries, + tmin = tmin, + tmax = tmax, + type_timeseries = type_timeseries, + url_lizard=url_lizard) + return oc + def read_bro( extent=None, bro_id=None, @@ -1095,6 +1185,103 @@ def from_bro( obs_df = util._obslist_to_frame(obs_list) return cls(obs_df, name=name, meta=meta) + + @classmethod + def from_lizard_list( + cls, + code, + tube_nr='all', + tmin = None, tmax = None, + type_timeseries = 'merge', + url_lizard='https://vitens.lizard.net/api/v4/'): + """ + get all observations from a list of codes of the monitoring wells and a + list of tube numbers + + Parameters + ---------- + code : lst of str + codes of the monitoring wells + tube_nr : lst of str + list of tube numbers of the monitoring wells that should be selected. + By default 'all' available tubes are selected. + tmin : str YYYY-m-d, optional + start of the observations, by default the entire serie is returned + tmax : Ttr YYYY-m-d, optional + end of the observations, by default the entire serie is returned + type_timeseries : str, optional + hand: returns only hand measurements + diver: returns only diver measurements + merge: the hand and diver measurements into one time series (merge; default) or + combine: keeps hand and diver measurements separeted + The default is merge. + url_lizard : str + location of the LIZARD-API. + Returns + ------- + ObsCollection + ObsCollection DataFrame with the 'obs' column + + """ + + from .io.lizard import get_obs_list_from_code + + obs_df = get_obs_list_from_code(code, + tube_nr, + tmin, + tmax, + type_timeseries, + url_lizard) + + + return cls(obs_df) + @classmethod + def from_lizard_extent( + cls, + extent, + extract_timeseries = True, + tmin = None, tmax = None, + type_timeseries = 'merge', + url_lizard='https://vitens.lizard.net/api/v4/'): + ''' + get all observations within a specified extent + + Parameters + ---------- + extent : list or a shapefile + get groundwater monitoring wells wihtin this extent [xmin, ymin, xmax, ymax] + or within a predefined Polygon from a shapefile + extract_timeseries : Bool, optional + Extract timeseries or not, if not only metadata are returned + tmin : str YYYY-m-d, optional + start of the observations, by default the entire serie is returned + tmax : Ttr YYYY-m-d, optional + end of the observations, by default the entire serie is returned + type_timeseries : str, optional + hand: returns only hand measurements + diver: returns only diver measurements + merge: the hand and diver measurements into one time series (merge; default) or + combine: keeps hand and diver measurements separeted + The default is merge. + url_lizard : str + location of the LIZARD-API. + + Returns + ------- + obs_col : TYPE + ObsCollection DataFrame with the 'obs' column + + ''' + + from .io.lizard import get_obs_list_from_extent + + obs_df = get_obs_list_from_extent(extent, + extract_timeseries, + tmin, + tmax, + type_timeseries, + url_lizard) + return cls(obs_df) @classmethod def from_dataframe(cls, df, obs_list=None, ObsClass=obs.GroundwaterObs): diff --git a/hydropandas/observation.py b/hydropandas/observation.py index 264efc5b..daf6b506 100644 --- a/hydropandas/observation.py +++ b/hydropandas/observation.py @@ -557,6 +557,53 @@ def from_bro( tube_top=meta.pop("tube_top"), ) + @classmethod + def from_lizard_code(cls, + code, + tube_nr=None, + tmin = None, tmax = None, + type_timeseries = 'merge', + url_lizard='https://vitens.lizard.net/api/v4/'): + """ + extracts the metadata and timeseries of a observation well from a LIZARD-API based on + the code of a monitoring well + + Parameters + ---------- + code : str + code of the measuring well + tube_nr : int, optional + select specific tube top + Default selects tube_nr = 1 + tmin : str YYYY-m-d, optional + start of the observations, by default the entire serie is returned + tmax : Ttr YYYY-m-d, optional + end of the observations, by default the entire serie is returned + type_timeseries : str, optional + hand: returns only hand measurements + diver: returns only diver measurements + merge: the hand and diver measurements into one time series (merge; default) or + combine: keeps hand and diver measurements separeted + The default is merge. + url_lizard : str + location of the LIZARD-API. + + Returns + ------- + returns a DataFrame with metadata and timeseries + """ + + from .io import lizard + + measurements, meta = lizard.read_lizard_groundwater_from_code( + code, + tube_nr, + tmin, + tmax, + type_timeseries, + url_lizard) + return cls(measurements, meta=meta) + @classmethod def from_dino( cls, From e34631fc7919ba6019aeba605b728c06f2ad3a6d Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Wed, 3 Jan 2024 10:19:27 +0100 Subject: [PATCH 11/23] update lizard --- hydropandas/__init__.py | 3 +- hydropandas/io/lizard.py | 957 ++++++++++++++++------------------ hydropandas/obs_collection.py | 229 +++----- hydropandas/observation.py | 40 +- tests/test_012_lizard.py | 21 +- 5 files changed, 597 insertions(+), 653 deletions(-) diff --git a/hydropandas/__init__.py b/hydropandas/__init__.py index 14d79364..4c02fbe3 100644 --- a/hydropandas/__init__.py +++ b/hydropandas/__init__.py @@ -15,8 +15,7 @@ read_fews, read_imod, read_knmi, - read_lizard_from_list, - read_lizard_from_extent, + read_lizard, read_menyanthes, read_modflow, read_pastastore, diff --git a/hydropandas/io/lizard.py b/hydropandas/io/lizard.py index 35a5cca0..bb3a7e02 100644 --- a/hydropandas/io/lizard.py +++ b/hydropandas/io/lizard.py @@ -1,116 +1,120 @@ import pandas as pd -import requests -from pyproj import Transformer +import requests +from pyproj import Transformer import logging import numpy as np -logger = logging.getLogger(__name__) +import pathlib from shapely.geometry import Polygon from tqdm import tqdm import math import geopandas import concurrent.futures -#%% check_status_obs -def check_status_obs (metadata, timeseries): - ''' - checks if a monitoring well is still active + +logger = logging.getLogger(__name__) + + +URL_LIZARD = "https://vitens.lizard.net/api/v4/" + + +def check_status_obs(metadata, timeseries): + """ + checks if a monitoring tube is still active Parameters ---------- - metadata : pandas.DataFrame - metadata of the monitoring well + metadata : dict + metadata of the monitoring tube timeseries : pandas DataFrame - timeseries of the monitoring well - + timeseries of the monitoring well + Returns ------- - metadata DataFrame including the status of the monitoring well - - ''' + metadata DataFrame including the status of the monitoring well + + """ if timeseries.empty: - metadata["status"] = 'geen tijdsserie beschikbaar' + metadata["status"] = "no timeseries available" return metadata - + last_measurement_date = timeseries.last_valid_index() - today = pd.to_datetime('today').normalize() + today = pd.to_datetime("today").normalize() - if today - last_measurement_date < pd.Timedelta(days= 180): - metadata["status"] = 'active' + if today - last_measurement_date < pd.Timedelta(days=180): + metadata["status"] = "active" else: - metadata["status"] = 'niet actief' - - + metadata["status"] = "inactive" + return metadata -#%% extent_to_polygon -def extent_to_polygon (coordinates): - ''' - Translates a list of coordinates (xmin, ymin, xmax, ymax) to a polygon with - coordinate system WGS84 + + +def extent_to_wgs84_polygon(coordinates): + """ + Translates a list of coordinates (xmin,xmax, ymin, ymax) to a polygon with + coordinate system WGS84 Parameters ---------- - coordinates : lst - list of the modelextent within which the observations are collected + coordinates : lst or tuple + list of the modelextent in epsg 28992 within which the observations + are collected. Returns ------- polygon of the modelextent with coordinate system WGS84 - ''' - transformer = Transformer.from_crs("EPSG:28992","WGS84") + """ + transformer = Transformer.from_crs("EPSG:28992", "WGS84") + + lon_min, lat_min = transformer.transform(coordinates[0], coordinates[2]) + lon_max, lat_max = transformer.transform(coordinates[1], coordinates[3]) - lon_min, lat_min = transformer.transform(coordinates[0], coordinates[1]) - lon_max, lat_max = transformer.transform(coordinates[2], coordinates[3]) + poly_T = Polygon( + [(lat_min, lon_min), (lat_max, lon_min), (lat_max, lon_max), (lat_min, lon_max)] + ) - poly_T = Polygon([ - (lat_min, lon_min), (lat_max, lon_min),(lat_max, lon_max), (lat_min,lon_max)]) - return poly_T -#%% translate_flag -def translate_flag (timeseries): - ''' + + +def translate_flag(timeseries): + """ Translates Vitens Lizard flags from interter to text Parameters ---------- - timeseries : pandas.DataFrame + timeseries : pandas.DataFrame timeseries of a monitoring well with flags Returns ------- - timeseries : pandas.DataFrame + timeseries : pandas.DataFrame timeseries with translated quality flags - ''' - for idx, flag in enumerate(timeseries.loc[:,"flag"]): - if flag == 0 or flag ==1: - timeseries.loc[idx,'flag'] = 'betrouwbaar' - - elif flag == 3 or flag ==4: - timeseries.loc[idx,'flag'] = 'onbeslist' - - elif flag == 6 or flag ==7: - timeseries.loc[idx,'flag'] = 'onbetrouwbaar' - - elif flag == 99: - timeseries.loc[idx,'flag'] = 'onongevalideerd' - - elif flag == -99: - timeseries.loc[idx,'flag'] = 'verwijderd' - + """ + translate_dic = { + 0: "betrouwbaar", + 1: "betrouwbaar", + 3: "onbeslist", + 4: "onbeslist", + 6: "onbetrouwbaar", + 7: "onbetrouwbaar", + 99: "onongevalideerd", + -99: "verwijderd", + } + timeseries["flag"] = timeseries["flag"].replace(translate_dic) + return timeseries -#%% get_API_results_from_code -def get_API_results_from_code(code, url_lizard): - ''' - extracts the Groundwater Station parameters from a monitoring well based - on the code of the monitoring well - + + +def get_metadata_mw_from_code(code): + """ + extracts the Groundwater Station parameters from a monitoring well based + on the code of the monitoring well + Parameters ---------- code : str - code of the monitoring well - url_lizard : str - location of the LIZARD-API + code of the monitoring well Raises ------ @@ -122,46 +126,52 @@ def get_API_results_from_code(code, url_lizard): groundwaterstation_metadata : dict dictionary with all available metadata of the monitoring well and its filters - ''' - lizard_GWS_endpoint = f'{url_lizard}groundwaterstations/' - url_groundwaterstation_code = f'{lizard_GWS_endpoint}?code={code}' - + """ + lizard_GWS_endpoint = f"{URL_LIZARD}groundwaterstations/" + url_groundwaterstation_code = f"{lizard_GWS_endpoint}?code={code}" + try: - groundwaterstation_metadata = requests.get(url_groundwaterstation_code).json()["results"][0] - + groundwaterstation_metadata = requests.get(url_groundwaterstation_code).json()[ + "results" + ][0] + except IndexError: - raise ValueError("Code is invalid") - + raise ValueError("Code is invalid") + return groundwaterstation_metadata -#%% + + def _prepare_API_input(nr_pages, url_groundwater): - ''' - prepare API data pages within the defined extent + """ + prepare API data pages within the defined extent Parameters ---------- nr_pages : int number of the pages on which the information is stored url_groundwater : str - location of the used API to extract the data + location of the used API to extract the data Returns ------- proces_input : list - list of the page number and the corresponding url + list of the page number and the corresponding url - ''' + """ proces_input = [] for page in range(nr_pages): - true_page = page+1 # Het echte paginanummer wordt aan de import thread gekoppeld - url = url_groundwater+'&page={}'.format(true_page) - item = [true_page,url] + true_page = ( + page + 1 + ) # Het echte paginanummer wordt aan de import thread gekoppeld + url = url_groundwater + "&page={}".format(true_page) + item = [true_page, url] proces_input += [item] return proces_input -def _download_API(data): - ''' - Function to download the data from the API using the ThreadPoolExecutor + +def _download(data): + """ + Function to download the data from the API using the ThreadPoolExecutor Parameters ---------- @@ -172,268 +182,202 @@ def _download_API(data): ------- None. - ''' + """ page, url = data try: - data = requests.get(url = url) + data = requests.get(url=url) # succes += 1 - data = data.json()['results'] + data = data.json()["results"] except: data = [] - return(data) + return data -#%% get_API_results_from_extent -def get_API_results_from_extent(polygon_extent, url_lizard, page_size = 100, nr_threads = 10): - ''' - extracts the Groundwater Station parameters from a monitoring well based - on the code - - Parameters - ---------- - code : str - code of the monitoring well - url_lizard : str - location of the LIZARD-API - - Raises - ------ - ValueError - if code of the monitoring well is not known - - Returns - ------- - groundwaterstation_metadata :dict - dictionary with all available metadata of the monitoring well and its filters - - ''' - lizard_GWS_endpoint = f'{url_lizard}groundwaterstations/' - url_groundwaterstation_extent = f'{lizard_GWS_endpoint}?geometry__within={polygon_extent}&page_size={page_size}' - - try: - groundwaterstation_data = requests.get(url_groundwaterstation_extent).json() - nr_results = groundwaterstation_data['count'] - nr_pages = math.ceil(nr_results/page_size) - - print("Number of monitoring wells: {}".format(nr_results)) - print("Number of pages: {}".format(nr_pages)) - - - if nr_threads > nr_pages: - nr_threads = nr_pages - - proces_input = _prepare_API_input(nr_pages, url_groundwaterstation_extent) - groundwaterstation_results = pd.DataFrame() - - - with concurrent.futures.ThreadPoolExecutor(max_workers = nr_threads) as executor: - for result in tqdm(executor.map(_download_API,proces_input),total = nr_pages, desc="Page"): - groundwaterstation_results = pd.concat([groundwaterstation_results,pd.DataFrame(result)]) - - except IndexError: - raise ValueError("Extent is invalid") - - return nr_results, groundwaterstation_results - -#%% get_metadata_filter -def get_metadata_filter(API_result, tube_nr, url_lizard): +def get_metadata_tube(metadata_mw, tube_nr): """ - extract the metadata for a specific location from the dict with all - groundwater station metadata - + extract the metadata for a specific tube from the monitoring well metadata + Parameters ---------- - API_result : dict + metadata_mw : dict dictionary with all available metadata of the monitoring well and all its filters - tube_nr : int, optional + tube_nr : int or None select metadata from a specific tube number - Default selects tube_nr = 1 - url_lizard : str - location of the LIZARD-API - + Raises ------ ValueError if code of the monitoring well is invalid. + Returns ------- - padndas DataFrame of metadata of the specific monitoring well + dictionary with metadata of a specific tube """ - - metadata = pd.DataFrame(columns= ["monitoring_well", "tube_nr", "name", "x", "y", - "tube_top", "ground_level","screen_top", "screen_bottom","status", - "timeseries_available", "uuid_hand", "start_hand", "uuid_diver", - 'start_diver', 'source', "unit"]) - - lon, lat, _ = API_result['geometry']['coordinates'] - transformer = Transformer.from_crs("WGS84","EPSG:28992") - x,y = transformer.transform(lat,lon) - - for idx, location in enumerate(API_result["filters"]): - metadata.loc[idx, 'unit'] = 'm NAP' - name = location['code'].replace("-","") - metadata.loc[idx, 'x'] = np.round(x,2) - metadata.loc[idx, 'y'] = np.round(y,2) - metadata.loc[idx,"name"] = location["code"] - metadata.loc[idx, "monitoring_well"] = API_result['name'] - metadata.loc[idx, - "ground_level"] = API_result['surface_level'] - - - metadata.loc[idx, "tube_nr"] = int(name[-3:]) - - metadata.loc[idx, "tube_top"] = location['top_level'] - metadata.loc[idx, - "screen_top"] = location['filter_top_level'] - metadata.loc[idx, - "screen_bottom"] = location['filter_bottom_level'] - metadata.loc[idx, - "source"] = 'lizard' - - if not location['timeseries']: - metadata.loc[idx, "timeseries_available"] = 'Nee' - - else: - timeseries = location['timeseries'] - # metadata.loc[name, "_wezig"] = 'Ja' - for series in timeseries: - series_info = requests.get(series).json() - if series_info["name"] == 'WNS9040.hand': - metadata.loc[idx, "uuid_hand"] = series_info["uuid"] - metadata.loc[idx, - "start_hand"] = series_info["start"] - elif series_info["name"] == 'WNS9040': - metadata.loc[idx, "uuid_diver"] = series_info["uuid"] - metadata.loc[idx, - "start_diver"] = series_info["start"] - - # geen tijdreeksen aanwezig - if pd.isna(metadata.loc[idx, "start_diver"]) and pd.isna(metadata.loc[idx, "start_hand"]): - metadata.loc[idx, "timeseries_available"] = 'Nee' - elif pd.notna(metadata.loc[idx, "start_diver"]) and pd.isna(metadata.loc[idx, "start_hand"]): - metadata.loc[idx, "timeseries_available"] = 'Diver' - elif pd.isna(metadata.loc[idx, "start_diver"]) and pd.notna(metadata.loc[idx, "start_hand"]): - metadata.loc[idx, "timeseries_available"] = 'handpeilingen' - elif pd.notna(metadata.loc[idx, "start_diver"]) and pd.notna(metadata.loc[idx, "start_hand"]): - metadata.loc[idx, "timeseries_available"] = 'Diver + hand' - - metadata.sort_values(by = ['tube_nr'], inplace = True, ignore_index = True) - - tube_nr = [tube_nr] if isinstance(tube_nr, int) else tube_nr - + if tube_nr is None: - metadata = metadata.loc[metadata["tube_nr"] == 1] - elif tube_nr is not None and tube_nr != 'all': - metadata =metadata[metadata['tube_nr'].isin(tube_nr)] - - metadata = metadata if isinstance(metadata, pd.DataFrame) else pd.DataFrame(metadata).T + tube_nr = 1 + + metadata = { + "monitoring_well": metadata_mw["name"], + "ground_level": metadata_mw["surface_level"], + "source": "lizard", + "unit": "m NAP", + "metadata_available": True, + "status": None, + } + + for metadata_tube in metadata_mw["filters"]: + if metadata_tube["code"].endswith(str(tube_nr)): + break + else: + raise ValueError(f"{metadata_mw['name']} doesn't have a tube number {tube_nr}") + + metadata.update( + { + "tube_nr": tube_nr, + "name": metadata_tube["code"].replace("-", ""), + "tube_top": metadata_tube["top_level"], + "screen_top": metadata_tube["filter_top_level"], + "screen_bottom": metadata_tube["filter_bottom_level"], + } + ) + + lon, lat, _ = metadata_mw["geometry"]["coordinates"] + transformer = Transformer.from_crs("WGS84", "EPSG:28992") + metadata["x"], metadata["y"] = transformer.transform(lat, lon) + + if not metadata_tube["timeseries"]: + metadata["timeseries_type"] = None + else: + for series in metadata_tube["timeseries"]: + series_info = requests.get(series).json() + if series_info["name"] == "WNS9040.hand": + metadata["uuid_hand"] = series_info["uuid"] + metadata["start_hand"] = series_info["start"] + elif series_info["name"] == "WNS9040": + metadata["uuid_diver"] = series_info["uuid"] + metadata["start_diver"] = series_info["start"] + + if (metadata.get("start_hand") is None) and ( + metadata.get("start_diver") is None + ): + metadata["timeseries_type"] = None + elif (metadata.get("start_hand") is not None) and ( + metadata.get("start_diver") is not None + ): + metadata["timeseries_type"] = "diver + hand" + elif metadata.get("start_hand") is None: + metadata["timeseries_type"] = "diver" + elif metadata.get("start_diver") is None: + metadata["timeseries_type"] = "hand" + return metadata -#%% get_timeseries -def get_timeseries (uuid, code, tube_nr, tmin , tmax ,url_lizard, page_size = 100000): + + +def get_timeseries_uuid(uuid, code, tube_nr, tmin, tmax, page_size=100000): """ - Get the time series of a specific monitoring well + Get the time series (hand or diver) using the uuid. + ---------- uuid : str - Universally Unique Identifier of the monitoring well. + Universally Unique Identifier of the tube and type of time series. code : str - code or name of the monitoring well - tube_nr : int, optional + code or name of the monitoring well + tube_nr : int select specific tube number - tmin : str YYYY-m-d, optional + tmin : str YYYY-m-d start of the observations, by default the entire serie is returned - tmax : int YYYY-m-d, optional + tmax : int YYYY-m-d end of the observations, by default the entire serie is returned - url_lizard : str - location of the LIZARD-API. page_size : int, optional Query parameter which can extend the response size. The default is 100000. Returns ------- - pandas DataFrame with the timeseries of the monitoring well + pandas DataFrame with the timeseries of the monitoring well """ - - url_timeseries = url_lizard+'timeseries/{}'.format(uuid) - + + url_timeseries = URL_LIZARD + "timeseries/{}".format(uuid) + if tmin != None: - tmin = pd.to_datetime(tmin).isoformat('T') - + tmin = pd.to_datetime(tmin).isoformat("T") + if tmax != None: - tmax = pd.to_datetime(tmax).isoformat('T') - - - params= {'start':tmin, 'end': tmax, 'page_size': page_size} - url = url_timeseries + '/events/' - - time_series_events = requests.get(url=url, params=params).json()['results'] - time_series_df = pd.DataFrame(time_series_events) - + tmax = pd.to_datetime(tmax).isoformat("T") + + params = {"start": tmin, "end": tmax, "page_size": page_size} + url = url_timeseries + "/events/" + + time_series_events = requests.get(url=url, params=params).json()["results"] + time_series_df = pd.DataFrame(time_series_events) + if time_series_df.empty: - # raise ValueError("{} doesn't have measurements in the selected period between {} and {}".format(code, tmin, tmax)) - # print("{} doesn't have measurements in the selected period between {} and {}".format(code, tmin, tmax)) return pd.DataFrame() - - else: + + else: time_series_df = translate_flag(time_series_df) - - timeseries_sel = time_series_df.loc[:,['time','value', "flag","comment"]] - timeseries_sel['time'] = pd.to_datetime(timeseries_sel['time'], format = '%Y-%m-%dT%H:%M:%SZ', - errors = 'coerce') + pd.DateOffset(hours = 1) - - timeseries_sel = timeseries_sel[~timeseries_sel['time'].isnull()] - - timeseries_sel.set_index('time', inplace = True) - timeseries_sel["name"] = code - timeseries_sel["filter_nr"] = tube_nr - timeseries_sel.index.rename("peil_datum_tijd", inplace = True) - timeseries_sel = timeseries_sel.loc[:,['name', 'filter_nr','value','flag']] - timeseries_sel.dropna(inplace = True) - - + + timeseries_sel = time_series_df.loc[:, ["time", "value", "flag", "comment"]] + timeseries_sel["time"] = pd.to_datetime( + timeseries_sel["time"], format="%Y-%m-%dT%H:%M:%SZ", errors="coerce" + ) + pd.DateOffset(hours=1) + + timeseries_sel = timeseries_sel[~timeseries_sel["time"].isnull()] + + timeseries_sel.set_index("time", inplace=True) + timeseries_sel.index.rename("peil_datum_tijd", inplace=True) + # timeseries_sel.dropna(inplace=True) + return timeseries_sel -#%% merge_timeseries -def merge_timeseries(hand_measurements, diver_measurements): + + +def _merge_timeseries(hand_measurements, diver_measurements): """ merges the timeseries of the hand and diver measurements into one timeserie Parameters ---------- hand_measurements : DataFrame - DataFrame containing the hand measurements of the monitoring well + DataFrame containing the hand measurements of the monitoring well diver_measurements : DataFrame - DataFrame containing the Diver measurements of the monitoring well - + DataFrame containing the Diver measurements of the monitoring well + Returns ------- - DataFrame where hand and diver measurements are merged in one timeseries + DataFrame where hand and diver measurements are merged in one timeseries """ if hand_measurements.empty and diver_measurements.empty: measurements = pd.DataFrame() - + elif diver_measurements.first_valid_index() == None: measurements = hand_measurements - print("no diver measuremets available for {}".format(hand_measurements.iloc[0]['name'])) - + print( + "no diver measuremets available for {}".format( + hand_measurements.iloc[0]["name"] + ) + ) + else: - - - hand_measurements_sel = hand_measurements.loc[hand_measurements.index < diver_measurements.first_valid_index()] - measurements = pd.concat([hand_measurements_sel, diver_measurements], axis = 0) - + hand_measurements_sel = hand_measurements.loc[ + hand_measurements.index < diver_measurements.first_valid_index() + ] + measurements = pd.concat([hand_measurements_sel, diver_measurements], axis=0) + return measurements -#%% combine_timeseries -def combine_timeseries (hand_measurements, diver_measurements): + + +def _combine_timeseries(hand_measurements, diver_measurements): """ combines the timeseries of the hand and diver measurements into one DataFrame Parameters ---------- hand_measurements : DataFrame - DataFrame containing the hand measurements of the monitoring well + DataFrame containing the hand measurements of the monitoring well diver_measurements : DataFrame - DataFrame containing the Diver measurements of the monitoring well + DataFrame containing the Diver measurements of the monitoring well Returns ------- @@ -441,25 +385,32 @@ def combine_timeseries (hand_measurements, diver_measurements): DESCRIPTION. """ - hand_measurements.rename(columns = {"value": "value_hand", "flag": "flag_hand"}, inplace = True) - diver_measurements.rename(columns = {"value": "value_diver", "flag": "flag_diver"}, inplace = True) - - measurements = pd.concat([hand_measurements, diver_measurements], axis = 1) - measurements = measurements.loc[:,[ "value_hand","value_diver", "flag_hand","flag_diver"]] - measurements.loc[:,"name"] = hand_measurements.loc[:,"name"][0] - measurements.loc[:,"filter_nr"] = hand_measurements.loc[:,"filter_nr"][0] - - return measurements - -#%% extract_timeseries_from_API -def extract_timeseries_from_API (metadata_df, tmin, tmax, type_timeseries, url_lizard): - ''' - extracts timeseries for a specific monitoring well + hand_measurements.rename( + columns={"value": "value_hand", "flag": "flag_hand"}, inplace=True + ) + diver_measurements.rename( + columns={"value": "value_diver", "flag": "flag_diver"}, inplace=True + ) + + measurements = pd.concat([hand_measurements, diver_measurements], axis=1) + measurements = measurements.loc[ + :, ["value_hand", "value_diver", "flag_hand", "flag_diver"] + ] + measurements.loc[:, "name"] = hand_measurements.loc[:, "name"][0] + measurements.loc[:, "filter_nr"] = hand_measurements.loc[:, "filter_nr"][0] + + return measurements + + +def get_timeseries_tube(tube_metadata, tmin, tmax, type_timeseries): + """ + extracts multiple timeseries (hand and/or diver measurements) for a specific + tube using the Lizard API. Parameters ---------- - metadata_df : pandas DataFrame - metadata dataframe of the monitoring wel + tube_metadata : dict + metadata of a tube tmin : str YYYY-m-d, optional start of the observations, by default the entire serie is returned tmax : Ttr YYYY-m-d, optional @@ -467,70 +418,82 @@ def extract_timeseries_from_API (metadata_df, tmin, tmax, type_timeseries, url_l type_timeseries : str, optional type of timeseries to; hand: returns only hand measurements - diver: returns only diver measurements + diver: returns only diver measurements merge: the hand and diver measurements into one time series (merge; default) or - combine: keeps hand and diver measurements separeted + combine: keeps hand and diver measurements separeted The default is merge. - url_lizard : str - location of the LIZARD-API. Returns ------- measurements : pandas DataFrame - dataframe with the timeseries of the monitoring well - metadata_df : pandas DataFrame - dataframe with the metadata of the monitoring well - - ''' - - metadata_df = metadata_df.squeeze() - if metadata_df["timeseries_available"] != "Nee": - - if metadata_df["timeseries_available"] == 'Diver + hand': - hand_measurements = get_timeseries(metadata_df['uuid_hand'], metadata_df["name"],metadata_df["tube_nr"], tmin, tmax, url_lizard) - diver_measurements = get_timeseries(metadata_df['uuid_diver'],metadata_df["name"],metadata_df["tube_nr"], tmin, tmax, url_lizard) - - if type_timeseries == "hand": - measurements = hand_measurements - elif type_timeseries == "diver": - measurements = diver_measurements - - elif type_timeseries == "merge": - measurements = merge_timeseries(hand_measurements, diver_measurements) - elif type_timeseries =="combine": - measurements = combine_timeseries(hand_measurements, diver_measurements) - - # Diver - elif metadata_df ["timeseries_available"] == 'Diver': - measurements = get_timeseries(metadata_df['uuid_diver'],metadata_df["name"], metadata_df["tube_nr"], - tmin, tmax, url_lizard) - - # HAND - elif metadata_df["timeseries_available"] == 'handpeilingen': - measurements = get_timeseries(metadata_df['uuid_hand'],metadata_df["name"], metadata_df["tube_nr"], tmin, tmax, url_lizard) - - elif metadata_df["timeseries_available"] == "Nee": - measurements = pd.DataFrame() - - - - metadata_df.drop(['uuid_hand','uuid_diver'], inplace = True) - - return measurements, metadata_df - -#%% read_lizard_groundwater -def read_lizard_groundwater_from_code (code, tube_nr=None, - tmin = None, tmax = None, - type_timeseries = 'merge', - url_lizard='https://vitens.lizard.net/api/v4/'): + timeseries of the monitoring well + metadata_df : dict + metadata of the monitoring well + + """ + if tube_metadata["timeseries_type"] is None: + return pd.DataFrame(), tube_metadata + + if type_timeseries in ["hand", "merge", "combine"]: + if "hand" in tube_metadata["timeseries_type"]: + hand_measurements = get_timeseries_uuid( + tube_metadata.pop("uuid_hand"), + tube_metadata["name"], + tube_metadata["tube_nr"], + tmin, + tmax, + ) + else: + hand_measurements = None + + if type_timeseries in ["diver", "merge", "combine"]: + if "diver" in tube_metadata["timeseries_type"]: + diver_measurements = get_timeseries_uuid( + tube_metadata.pop("uuid_diver"), + tube_metadata["name"], + tube_metadata["tube_nr"], + tmin, + tmax, + ) + else: + diver_measurements = None + + if type_timeseries == "hand" and hand_measurements is not None: + measurements = hand_measurements + elif type_timeseries == "diver" and diver_measurements is not None: + measurements = diver_measurements + elif type_timeseries in ["merge", "combine"]: + if (hand_measurements is not None) and (diver_measurements is not None): + if type_timeseries == "merge": + measurements = _merge_timeseries(hand_measurements, diver_measurements) + elif type_timeseries == "combine": + measurements = _combine_timeseries( + hand_measurements, diver_measurements + ) + elif hand_measurements is not None: + measurements = hand_measurements + elif diver_measurements is not None: + measurements = diver_measurements + + return measurements, tube_metadata + + +def get_lizard_groundwater( + code, + tube_nr=None, + tmin=None, + tmax=None, + type_timeseries="merge", + only_metadata=False, +): """ - extracts the metadata and timeseries of a observation well from a LIZARD-API based on - the code of a monitoring well + extracts the metadata and timeseries of an observation well from a + LIZARD-API based on the code of a monitoring well Parameters ---------- code : str - code of the measuring well + code of the measuring well, e.g. '27B-0444' tube_nr : int, optional select specific tube top Default selects tube_nr = 1 @@ -540,45 +503,54 @@ def read_lizard_groundwater_from_code (code, tube_nr=None, end of the observations, by default the entire serie is returned type_timeseries : str, optional hand: returns only hand measurements - diver: returns only diver measurements + diver: returns only diver measurements merge: the hand and diver measurements into one time series (merge; default) or - combine: keeps hand and diver measurements separeted + combine: keeps hand and diver measurements separated The default is merge. - url_lizard : str - location of the LIZARD-API. + only_metadata : bool, optional + if True only metadata is returned and no time series data. The + default is False. Returns ------- - returns a DataFrame with metadata and timeseries + returns a DataFrame with metadata and timeseries """ - - groundwaterstation_metadata = get_API_results_from_code(code, url_lizard) - - obs_df = get_metadata_filter(groundwaterstation_metadata,tube_nr, url_lizard) - - if obs_df.empty: - raise ValueError("{} doesn't have a tube number {}".format(code,tube_nr)) - - measurements, obs_df = extract_timeseries_from_API (obs_df, tmin, tmax,type_timeseries, url_lizard) - obs_df = check_status_obs(obs_df,measurements) - - - return measurements,obs_df.to_dict() - -#%% get_obs_list_from_code -def get_obs_list_from_code (code, tube_nr='all', - tmin = None, tmax = None, - type_timeseries = 'merge', - url_lizard='https://vitens.lizard.net/api/v4/'): + + groundwaterstation_metadata = get_metadata_mw_from_code(code) + + tube_metadata = get_metadata_tube(groundwaterstation_metadata, tube_nr) + + if only_metadata: + return pd.DataFrame(), tube_metadata + + measurements, tube_metadata = get_timeseries_tube( + tube_metadata, tmin, tmax, type_timeseries + ) + tube_metadata = check_status_obs(tube_metadata, measurements) + + return measurements, tube_metadata + + +def get_obs_list_from_codes( + codes, + ObsClass, + tube_nr="all", + tmin=None, + tmax=None, + type_timeseries="merge", + only_metadata=False, +): """ - get all observations from a list of codes of the monitoring wells and a - list of tube numbers + get all observations from a list of codes of the monitoring wells and a + list of tube numbers Parameters ---------- - code : lst of str - codes of the monitoring wells - tube_nr : lst of str + codes : lst of str or str + codes of the monitoring wells + ObsClass : type + class of the observations, e.g. GroundwaterObs + tube_nr : lst of str list of tube numbers of the monitoring wells that should be selected. By default 'all' available tubes are selected. tmin : str YYYY-m-d, optional @@ -587,143 +559,136 @@ def get_obs_list_from_code (code, tube_nr='all', end of the observations, by default the entire serie is returned type_timeseries : str, optional hand: returns only hand measurements - diver: returns only diver measurements + diver: returns only diver measurements merge: the hand and diver measurements into one time series (merge; default) or - combine: keeps hand and diver measurements separeted + combine: keeps hand and diver measurements separeted The default is merge. - url_lizard : str - location of the LIZARD-API. + only_metadata : bool, optional + if True only metadata is returned and no time series data. The + default is False. + + Returns ------- ObsCollection ObsCollection DataFrame with the 'obs' column """ - - obs_col = pd.DataFrame(columns = ['monitoring_well', 'tube_nr', 'name', 'x', 'y', 'tube_top', - 'ground_level', 'screen_top', 'screen_bottom', 'status', - 'timeseries_available', 'start_hand', 'start_diver', 'source', - 'unit','obs']) - if not isinstance(code, list): - raise ValueError("Code should be a list") - - if len(code) == 1: - code = code[0] - groundwaterstation_metadata = get_API_results_from_code(code, url_lizard) - obs_df = get_metadata_filter(groundwaterstation_metadata,tube_nr, url_lizard) - - for idx, row in obs_df.iterrows(): - measurements, obs_series = extract_timeseries_from_API (row, tmin, tmax,type_timeseries, url_lizard) - obs_series['obs'] = measurements.squeeze() - obs_series = check_status_obs(obs_series, measurements) - obs_col.loc[idx] = obs_series - - - else: - for elem in code: - groundwaterstation_metadata = get_API_results_from_code(elem, url_lizard) - obs_df = get_metadata_filter(groundwaterstation_metadata,tube_nr, url_lizard) - obs_col = pd.concat([obs_df,obs_col], axis = 0, ignore_index = True) - - for idx, row in obs_df.iterrows(): - measurements, obs_series = extract_timeseries_from_API (row, tmin, tmax,type_timeseries, url_lizard) - obs_series['obs'] = measurements.squeeze() - obs_series = check_status_obs(obs_series, measurements) - obs_col.loc[idx] = obs_series - - return obs_col - - -#%% get_obs_list_from_extent - -def get_obs_list_from_extent(extent, extract_timeseries = True, - tmin = None, tmax = None, - type_timeseries = 'merge', - url_lizard='https://vitens.lizard.net/api/v4/'): - ''' - get all observations within a specified extent + + if isinstance(codes, str): + codes = [codes] + + if not hasattr(codes, "__iter__"): + raise TypeError("argument 'codes' should be an iterable") + + l = [] + for code in codes: + groundwaterstation_metadata = get_metadata_mw_from_code(code) + if tube_nr == "all": + for metadata_tube in groundwaterstation_metadata["filters"]: + tube_nr = int(metadata_tube["code"][-3:]) + o = ObsClass.from_lizard( + code, + tube_nr, + tmin, + tmax, + type_timeseries, + only_metadata=only_metadata, + ) + l.append(o) + else: + o = ObsClass.from_lizard( + code, tube_nr, tmin, tmax, type_timeseries, only_metadata=only_metadata + ) + l.append(o) + + return l + + +def get_obs_list_from_extent( + extent, + ObsClass, + tube_nr="all", + tmin=None, + tmax=None, + type_timeseries="merge", + only_metadata=False, + page_size=100, + nr_threads=10, +): + """ + get all observations within a specified extent Parameters ---------- - extent : list or a shapefile - get groundwater monitoring wells wihtin this extent [xmin, ymin, xmax, ymax] - or within a predefined Polygon from a shapefile - extract_timeseries : Bool, optional - Extract timeseries or not, if not only metadata are returned + extent : list or a shapefile + get groundwater monitoring wells wihtin this extent [xmin, xmax, ymin, ymax] + or within a predefined Polygon from a shapefile + ObsClass : type + class of the observations, e.g. GroundwaterObs + tube_nr : lst of str + list of tube numbers of the monitoring wells that should be selected. + By default 'all' available tubes are selected. tmin : str YYYY-m-d, optional start of the observations, by default the entire serie is returned tmax : Ttr YYYY-m-d, optional end of the observations, by default the entire serie is returned type_timeseries : str, optional merge: the hand and diver measurements into one time series (merge; default) or - combine: keeps hand and diver measurements separeted + combine: keeps hand and diver measurements separeted The default is merge. - url_lizard : str - location of the LIZARD-API. + only_metadata : bool, optional + if True only metadata is returned and no time series data. The + default is False. + Returns ------- obs_col : TYPE ObsCollection DataFrame with the 'obs' column - ''' - - if type(extent) == list: - polygon_T = extent_to_polygon(extent) - + """ + + if isinstance(extent, (list, tuple)): + polygon_T = extent_to_wgs84_polygon(extent) - - elif extent.endswith('.shp'): + elif isinstance(extent, str) or isinstance(extent, pathlib.PurePath): polygon = geopandas.read_file(extent) - polygon_T = polygon.to_crs("WGS84","EPSG:28992").loc[0,"geometry"] - - - else: - print("Extent should be a shapefile or a list of coordinates") - return - - r_results, groundwaterstation_info= get_API_results_from_extent(polygon_T, url_lizard) - - obs_col = pd.DataFrame(columns = ['monitoring_well', 'tube_nr', 'name', 'x', 'y', 'tube_top', - 'ground_level', 'screen_top', 'screen_bottom', 'status', - 'timeseries_available', 'start_hand', 'start_diver', 'source', - 'unit','obs']) - - groundwaterstation_info = [(series, 'all', url_lizard) for _, series in groundwaterstation_info.iterrows()] - groundwaterstation_filters = pd.DataFrame() - - nr_threads = 10 - if nr_threads > r_results: - nr_threads = r_results - - with concurrent.futures.ThreadPoolExecutor(max_workers= nr_threads) as executor: - for result in tqdm(executor.map(lambda args : get_metadata_filter(*args), groundwaterstation_info), - total = r_results, desc='Monitoring well'): - groundwaterstation_filters = pd.concat([groundwaterstation_filters,pd.DataFrame(result)]) - - if extract_timeseries == True: - groundwaterstation_filters = [ - (series, tmin,tmax,type_timeseries, url_lizard) for _, series in groundwaterstation_filters.iterrows()] - - with concurrent.futures.ThreadPoolExecutor(max_workers= nr_threads) as executor: - for measurement, obs_series in tqdm(executor.map(lambda args : extract_timeseries_from_API(*args), groundwaterstation_filters), - total =len(groundwaterstation_filters), desc='Timeseries'): - obs_series['obs'] = measurement - obs_col = pd.concat([obs_col,pd.DataFrame([obs_series])]) - + polygon_T = polygon.to_crs("WGS84", "EPSG:28992").loc[0, "geometry"] else: - groundwaterstation_filters.drop(['uuid_hand','uuid_diver'], axis = 1, inplace = True) - - obs_col = groundwaterstation_filters - - obs_col.reset_index(drop = True, inplace = True) - - return obs_col - - - - - - - - - + raise TypeError("Extent should be a shapefile or a list of coordinates") + + lizard_GWS_endpoint = f"{URL_LIZARD}groundwaterstations/" + url_groundwaterstation_extent = ( + f"{lizard_GWS_endpoint}?geometry__within={polygon_T}&page_size={page_size}" + ) + + groundwaterstation_data = requests.get(url_groundwaterstation_extent).json() + nr_results = groundwaterstation_data["count"] + nr_pages = math.ceil(nr_results / page_size) + + print("Number of monitoring wells: {}".format(nr_results)) + print("Number of pages: {}".format(nr_pages)) + + if nr_threads > nr_pages: + nr_threads = nr_pages + + proces_input = _prepare_API_input(nr_pages, url_groundwaterstation_extent) + + arg_tuple = (ObsClass, tube_nr, tmin, tmax, type_timeseries, only_metadata) + codes = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=nr_threads) as executor: + for result in tqdm( + executor.map(_download, proces_input), total=nr_pages, desc="Page" + ): + codes += [(d["code"],) + arg_tuple for d in result] + + l = [] + with concurrent.futures.ThreadPoolExecutor() as executor: + for obs_list in tqdm( + executor.map(lambda args: get_obs_list_from_codes(*args), codes), + total=len(codes), + desc="monitoring well", + ): + l += obs_list + + return l diff --git a/hydropandas/obs_collection.py b/hydropandas/obs_collection.py index 39d8c5f6..34883af8 100644 --- a/hydropandas/obs_collection.py +++ b/hydropandas/obs_collection.py @@ -20,22 +20,28 @@ logger = logging.getLogger(__name__) -def read_lizard_from_list( - code, - tube_nr='all', - tmin = None, - tmax = None, - type_timeseries = 'merge', - url_lizard='https://vitens.lizard.net/api/v4/'): +def read_lizard( + extent=None, + codes=None, + name="", + tube_nr="all", + tmin=None, + tmax=None, + type_timeseries="merge", + only_metadata=False, +): """ - get all observations from a list of codes of the monitoring wells and a - list of tube numbers + get all observations from a list of codes of the monitoring wells and a + list of tube numbers Parameters ---------- - code : lst of str - codes of the monitoring wells - tube_nr : lst of str + extent : list, shapefile path or None + get groundwater monitoring wells wihtin this extent [xmin, ymin, xmax, ymax] + or within a predefined Polygon from a shapefile + codes : lst of str or None + codes of the monitoring wells + tube_nr : lst of str list of tube numbers of the monitoring wells that should be selected. By default 'all' available tubes are selected. tmin : str YYYY-m-d, optional @@ -44,71 +50,32 @@ def read_lizard_from_list( end of the observations, by default the entire serie is returned type_timeseries : str, optional hand: returns only hand measurements - diver: returns only diver measurements + diver: returns only diver measurements merge: the hand and diver measurements into one time series (merge; default) or - combine: keeps hand and diver measurements separeted + combine: keeps hand and diver measurements separeted The default is merge. - url_lizard : str - location of the LIZARD-API. + only_metadata : bool, optional + if True only metadata is returned and no time series data. The + default is False. + Returns ------- ObsCollection ObsCollection DataFrame with the 'obs' column """ - oc = ObsCollection.from_lizard_list( - code = code, - tube_nr= tube_nr, - tmin = tmin, - tmax = tmax, - type_timeseries = type_timeseries, - url_lizard=url_lizard) + oc = ObsCollection.from_lizard( + extent=extent, + codes=codes, + name=name, + tube_nr=tube_nr, + tmin=tmin, + tmax=tmax, + type_timeseries=type_timeseries, + only_metadata=only_metadata, + ) return oc -def read_lizard_from_extent( - extent, - extract_timeseries = True, - tmin = None, - tmax = None, - type_timeseries = 'merge', - url_lizard='https://vitens.lizard.net/api/v4/'): - """ - get all observations within a specified extent - - Parameters - ---------- - extent : list or a shapefile - get groundwater monitoring wells wihtin this extent [xmin, ymin, xmax, ymax] - or within a predefined Polygon from a shapefile - extract_timeseries : Bool, optional - Extract timeseries or not, if not only metadata are returned - tmin : str YYYY-m-d, optional - start of the observations, by default the entire serie is returned - tmax : Ttr YYYY-m-d, optional - end of the observations, by default the entire serie is returned - type_timeseries : str, optional - hand: returns only hand measurements - diver: returns only diver measurements - merge: the hand and diver measurements into one time series (merge; default) or - combine: keeps hand and diver measurements separeted - The default is merge. - url_lizard : str - location of the LIZARD-API. - - Returns - ------- - obs_col : TYPE - ObsCollection DataFrame with the 'obs' column - - """ - oc = ObsCollection.from_lizard_extent( - extent = extent, - extract_timeseries = extract_timeseries, - tmin = tmin, - tmax = tmax, - type_timeseries = type_timeseries, - url_lizard=url_lizard) - return oc def read_bro( extent=None, @@ -1282,24 +1249,30 @@ def from_bro( obs_df = util._obslist_to_frame(obs_list) return cls(obs_df, name=name, meta=meta) - + @classmethod - def from_lizard_list( + def from_lizard( cls, - code, - tube_nr='all', - tmin = None, tmax = None, - type_timeseries = 'merge', - url_lizard='https://vitens.lizard.net/api/v4/'): + extent=None, + codes=None, + name="", + tube_nr="all", + tmin=None, + tmax=None, + type_timeseries="merge", + only_metadata=False, + ): """ - get all observations from a list of codes of the monitoring wells and a - list of tube numbers - + get all observations within a specified extent + Parameters ---------- - code : lst of str - codes of the monitoring wells - tube_nr : lst of str + extent : list, shapefile path or None + get groundwater monitoring wells wihtin this extent [xmin, ymin, xmax, ymax] + or within a predefined Polygon from a shapefile + codes : lst of str or None + codes of the monitoring wells + tube_nr : lst of str list of tube numbers of the monitoring wells that should be selected. By default 'all' available tubes are selected. tmin : str YYYY-m-d, optional @@ -1308,77 +1281,47 @@ def from_lizard_list( end of the observations, by default the entire serie is returned type_timeseries : str, optional hand: returns only hand measurements - diver: returns only diver measurements + diver: returns only diver measurements merge: the hand and diver measurements into one time series (merge; default) or - combine: keeps hand and diver measurements separeted + combine: keeps hand and diver measurements separeted The default is merge. - url_lizard : str - location of the LIZARD-API. + only_metadata : bool, optional + if True only metadata is returned and no time series data. The + default is False. + Returns ------- ObsCollection ObsCollection DataFrame with the 'obs' column - + """ - - from .io.lizard import get_obs_list_from_code - - obs_df = get_obs_list_from_code(code, - tube_nr, - tmin, - tmax, - type_timeseries, - url_lizard) - - return cls(obs_df) - @classmethod - def from_lizard_extent( - cls, - extent, - extract_timeseries = True, - tmin = None, tmax = None, - type_timeseries = 'merge', - url_lizard='https://vitens.lizard.net/api/v4/'): - ''' - get all observations within a specified extent - - Parameters - ---------- - extent : list or a shapefile - get groundwater monitoring wells wihtin this extent [xmin, ymin, xmax, ymax] - or within a predefined Polygon from a shapefile - extract_timeseries : Bool, optional - Extract timeseries or not, if not only metadata are returned - tmin : str YYYY-m-d, optional - start of the observations, by default the entire serie is returned - tmax : Ttr YYYY-m-d, optional - end of the observations, by default the entire serie is returned - type_timeseries : str, optional - hand: returns only hand measurements - diver: returns only diver measurements - merge: the hand and diver measurements into one time series (merge; default) or - combine: keeps hand and diver measurements separeted - The default is merge. - url_lizard : str - location of the LIZARD-API. - - Returns - ------- - obs_col : TYPE - ObsCollection DataFrame with the 'obs' column - - ''' - - from .io.lizard import get_obs_list_from_extent - - obs_df = get_obs_list_from_extent(extent, - extract_timeseries, - tmin, - tmax, - type_timeseries, - url_lizard) - return cls(obs_df) + from .io.lizard import get_obs_list_from_extent, get_obs_list_from_codes + + if extent is not None: + obs_list = get_obs_list_from_extent( + extent, + obs.GroundwaterObs, + tube_nr, + tmin, + tmax, + type_timeseries, + only_metadata=only_metadata, + ) + elif codes is not None: + obs_list = get_obs_list_from_codes( + codes, + obs.GroundwaterObs, + tube_nr, + tmin, + tmax, + type_timeseries, + only_metadata=only_metadata, + ) + else: + raise ValueError("specify codes or extent") + + return cls(obs_list, name=name) @classmethod def from_bronhouderportaal_bro( diff --git a/hydropandas/observation.py b/hydropandas/observation.py index 55fc3a57..b92e5239 100644 --- a/hydropandas/observation.py +++ b/hydropandas/observation.py @@ -580,6 +580,9 @@ def from_bro( drop_duplicate_times : bool, optional if True rows with a duplicate time stamp are removed keeping only the first row. The default is True. + only_metadata : bool, optional + if True only metadata is returned and no time series data. The + default is False Returns ------- @@ -618,14 +621,14 @@ def from_bro( ) @classmethod - def from_lizard_code( + def from_lizard( cls, code, tube_nr=None, tmin=None, tmax=None, type_timeseries="merge", - url_lizard="https://vitens.lizard.net/api/v4/", + only_metadata=False, ): """ extracts the metadata and timeseries of a observation well from a LIZARD-API based on @@ -648,8 +651,10 @@ def from_lizard_code( merge: the hand and diver measurements into one time series (merge; default) or combine: keeps hand and diver measurements separeted The default is merge. - url_lizard : str - location of the LIZARD-API. + only_metadata : bool, optional + if True only metadata is returned and no time series data. The + default is False. + Returns ------- @@ -658,11 +663,32 @@ def from_lizard_code( from .io import lizard - measurements, meta = lizard.read_lizard_groundwater_from_code( - code, tube_nr, tmin, tmax, type_timeseries, url_lizard + measurements, meta = lizard.get_lizard_groundwater( + code, + tube_nr, + tmin, + tmax, + type_timeseries, + only_metadata=only_metadata, + ) + return cls( + measurements, + name=meta.pop("name"), + x=meta.pop("x"), + y=meta.pop("y"), + source=meta.pop("source"), + unit=meta.pop("unit"), + screen_bottom=meta.pop("screen_bottom"), + screen_top=meta.pop("screen_top"), + ground_level=meta.pop("ground_level"), + metadata_available=meta.pop("metadata_available"), + monitoring_well=meta.pop("monitoring_well"), + tube_nr=meta.pop("tube_nr"), + tube_top=meta.pop("tube_top"), + meta=meta, ) - return cls(measurements, meta=meta) + @classmethod def from_bronhouderportaal_bro( cls, path, diff --git a/tests/test_012_lizard.py b/tests/test_012_lizard.py index 008b7fea..299666b8 100644 --- a/tests/test_012_lizard.py +++ b/tests/test_012_lizard.py @@ -1,7 +1,18 @@ -# -*- coding: utf-8 -*- -""" -Created on Thu Aug 31 16:01:18 2023 +import hydropandas as hpd -@author: SprongA -""" +def test_single_observation(): + code = "27BP0003" + o = hpd.GroundwaterObs.from_lizard(code) + assert o.tube_nr == 1 + + +def test_extent(): + extent = [201500, 202000, 502000, 502200] + oc = hpd.read_lizard(extent) + assert not oc.empty + + +def test_codes(): + oc = hpd.read_lizard(codes="27BP0003") + assert not oc.empty From 4ff32a4f75fdfb8418b80ecd22aab6e363f9f9bf Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Wed, 3 Jan 2024 10:43:04 +0100 Subject: [PATCH 12/23] fix isort and linting --- hydropandas/io/lizard.py | 63 ++++++++++++++++------------------- hydropandas/obs_collection.py | 2 +- 2 files changed, 30 insertions(+), 35 deletions(-) diff --git a/hydropandas/io/lizard.py b/hydropandas/io/lizard.py index bb3a7e02..4136b405 100644 --- a/hydropandas/io/lizard.py +++ b/hydropandas/io/lizard.py @@ -1,14 +1,14 @@ +import concurrent.futures +import logging +import math +import pathlib + +import geopandas import pandas as pd import requests from pyproj import Transformer -import logging -import numpy as np -import pathlib from shapely.geometry import Polygon from tqdm import tqdm -import math -import geopandas -import concurrent.futures logger = logging.getLogger(__name__) @@ -136,14 +136,14 @@ def get_metadata_mw_from_code(code): ][0] except IndexError: - raise ValueError("Code is invalid") + raise ValueError(f"Code {code} is invalid") return groundwaterstation_metadata def _prepare_API_input(nr_pages, url_groundwater): """ - prepare API data pages within the defined extent + get API data pages within the defined extent Parameters ---------- @@ -154,42 +154,38 @@ def _prepare_API_input(nr_pages, url_groundwater): Returns ------- - proces_input : list + urls : list list of the page number and the corresponding url """ - proces_input = [] + urls = [] for page in range(nr_pages): true_page = ( page + 1 ) # Het echte paginanummer wordt aan de import thread gekoppeld - url = url_groundwater + "&page={}".format(true_page) - item = [true_page, url] - proces_input += [item] - return proces_input + urls = [url_groundwater + "&page={}".format(true_page)] + return urls -def _download(data): +def _download(url, timeout=1800): """ Function to download the data from the API using the ThreadPoolExecutor Parameters ---------- - data : list - list of the page number and the corresponding url + url : str + url of an API page + timeout : int, optional + number of seconds to wait before terminating request Returns ------- - None. + dictionary with timeseries data """ - page, url = data - try: - data = requests.get(url=url) - # succes += 1 - data = data.json()["results"] - except: - data = [] + data = requests.get(url=url, timeout=timeout) + data = data.json()["results"] + return data @@ -200,7 +196,8 @@ def get_metadata_tube(metadata_mw, tube_nr): Parameters ---------- metadata_mw : dict - dictionary with all available metadata of the monitoring well and all its filters + dictionary with all available metadata of the monitoring well and all its + filters tube_nr : int or None select metadata from a specific tube number @@ -300,10 +297,10 @@ def get_timeseries_uuid(uuid, code, tube_nr, tmin, tmax, page_size=100000): url_timeseries = URL_LIZARD + "timeseries/{}".format(uuid) - if tmin != None: + if tmin is not None: tmin = pd.to_datetime(tmin).isoformat("T") - if tmax != None: + if tmax is not None: tmax = pd.to_datetime(tmax).isoformat("T") params = {"start": tmin, "end": tmax, "page_size": page_size} @@ -351,7 +348,7 @@ def _merge_timeseries(hand_measurements, diver_measurements): if hand_measurements.empty and diver_measurements.empty: measurements = pd.DataFrame() - elif diver_measurements.first_valid_index() == None: + elif diver_measurements.first_valid_index() is None: measurements = hand_measurements print( "no diver measuremets available for {}".format( @@ -419,7 +416,7 @@ def get_timeseries_tube(tube_metadata, tmin, tmax, type_timeseries): type of timeseries to; hand: returns only hand measurements diver: returns only diver measurements - merge: the hand and diver measurements into one time series (merge; default) or + merge: the hand and diver measurements into one time series (default) combine: keeps hand and diver measurements separeted The default is merge. @@ -672,14 +669,12 @@ class of the observations, e.g. GroundwaterObs if nr_threads > nr_pages: nr_threads = nr_pages - proces_input = _prepare_API_input(nr_pages, url_groundwaterstation_extent) + urls = _prepare_API_input(nr_pages, url_groundwaterstation_extent) arg_tuple = (ObsClass, tube_nr, tmin, tmax, type_timeseries, only_metadata) codes = [] with concurrent.futures.ThreadPoolExecutor(max_workers=nr_threads) as executor: - for result in tqdm( - executor.map(_download, proces_input), total=nr_pages, desc="Page" - ): + for result in tqdm(executor.map(_download, urls), total=nr_pages, desc="Page"): codes += [(d["code"],) + arg_tuple for d in result] l = [] diff --git a/hydropandas/obs_collection.py b/hydropandas/obs_collection.py index 34883af8..406adf0f 100644 --- a/hydropandas/obs_collection.py +++ b/hydropandas/obs_collection.py @@ -1296,7 +1296,7 @@ def from_lizard( """ - from .io.lizard import get_obs_list_from_extent, get_obs_list_from_codes + from .io.lizard import get_obs_list_from_codes, get_obs_list_from_extent if extent is not None: obs_list = get_obs_list_from_extent( From 0c78693e79898cf7ae18574ca6fd0177bb9cecf3 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Wed, 3 Jan 2024 10:44:36 +0100 Subject: [PATCH 13/23] Update LICENSE (#178) --- LICENSE | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/LICENSE b/LICENSE index 21f1959a..ff3c8765 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2020 O.E. Ebbens, D.A. Brakenhoff, R. Calje +Copyright (c) 2020 O.N. Ebbens, D.A. Brakenhoff, R. Calje Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal From fb1a2f67ffd04557a0598ee472f8705be61ac74c Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Wed, 3 Jan 2024 10:46:09 +0100 Subject: [PATCH 14/23] line length --- hydropandas/obs_collection.py | 2 +- hydropandas/observation.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hydropandas/obs_collection.py b/hydropandas/obs_collection.py index 406adf0f..43e32f95 100644 --- a/hydropandas/obs_collection.py +++ b/hydropandas/obs_collection.py @@ -1282,7 +1282,7 @@ def from_lizard( type_timeseries : str, optional hand: returns only hand measurements diver: returns only diver measurements - merge: the hand and diver measurements into one time series (merge; default) or + merge: the hand and diver measurements into one time series (default) combine: keeps hand and diver measurements separeted The default is merge. only_metadata : bool, optional diff --git a/hydropandas/observation.py b/hydropandas/observation.py index b92e5239..7c195ed6 100644 --- a/hydropandas/observation.py +++ b/hydropandas/observation.py @@ -631,8 +631,8 @@ def from_lizard( only_metadata=False, ): """ - extracts the metadata and timeseries of a observation well from a LIZARD-API based on - the code of a monitoring well + extracts the metadata and timeseries of a observation well from a LIZARD-API + based on the code of a monitoring well Parameters ---------- @@ -648,7 +648,7 @@ def from_lizard( type_timeseries : str, optional hand: returns only hand measurements diver: returns only diver measurements - merge: the hand and diver measurements into one time series (merge; default) or + merge: the hand and diver measurements into one time series (default) combine: keeps hand and diver measurements separeted The default is merge. only_metadata : bool, optional From a440885627042d3afc56f8a02cb7ab463910e2c0 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Wed, 3 Jan 2024 10:48:47 +0100 Subject: [PATCH 15/23] change ambigious name --- hydropandas/io/lizard.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hydropandas/io/lizard.py b/hydropandas/io/lizard.py index 4136b405..cf726c88 100644 --- a/hydropandas/io/lizard.py +++ b/hydropandas/io/lizard.py @@ -677,13 +677,13 @@ class of the observations, e.g. GroundwaterObs for result in tqdm(executor.map(_download, urls), total=nr_pages, desc="Page"): codes += [(d["code"],) + arg_tuple for d in result] - l = [] + obs_list = [] with concurrent.futures.ThreadPoolExecutor() as executor: - for obs_list in tqdm( + for obs_list_mw in tqdm( executor.map(lambda args: get_obs_list_from_codes(*args), codes), total=len(codes), desc="monitoring well", ): - l += obs_list + obs_list += obs_list_mw - return l + return obs_list From 5cccc3b647b09ac0a87d3e02f9eaeb3ac5e1d998 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Wed, 3 Jan 2024 10:51:48 +0100 Subject: [PATCH 16/23] forgot one --- hydropandas/io/lizard.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hydropandas/io/lizard.py b/hydropandas/io/lizard.py index cf726c88..c413232d 100644 --- a/hydropandas/io/lizard.py +++ b/hydropandas/io/lizard.py @@ -567,8 +567,8 @@ class of the observations, e.g. GroundwaterObs Returns ------- - ObsCollection - ObsCollection DataFrame with the 'obs' column + obs_list + list of observations """ @@ -578,7 +578,7 @@ class of the observations, e.g. GroundwaterObs if not hasattr(codes, "__iter__"): raise TypeError("argument 'codes' should be an iterable") - l = [] + obs_list = [] for code in codes: groundwaterstation_metadata = get_metadata_mw_from_code(code) if tube_nr == "all": @@ -592,14 +592,14 @@ class of the observations, e.g. GroundwaterObs type_timeseries, only_metadata=only_metadata, ) - l.append(o) + obs_list.append(o) else: o = ObsClass.from_lizard( code, tube_nr, tmin, tmax, type_timeseries, only_metadata=only_metadata ) - l.append(o) + obs_list.append(o) - return l + return obs_list def get_obs_list_from_extent( From 0c5f44197d096a0313fd29328ecf1dac3765840d Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Wed, 3 Jan 2024 12:13:15 +0100 Subject: [PATCH 17/23] rename test file --- tests/{test_012_lizard.py => test_013_lizard.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{test_012_lizard.py => test_013_lizard.py} (100%) diff --git a/tests/test_012_lizard.py b/tests/test_013_lizard.py similarity index 100% rename from tests/test_012_lizard.py rename to tests/test_013_lizard.py From 510fa9f266ceca8314a16f92c7110506e5db180d Mon Sep 17 00:00:00 2001 From: Artesia Water <31697400+ArtesiaWater@users.noreply.github.com> Date: Mon, 8 Jan 2024 12:50:21 +0100 Subject: [PATCH 18/23] version bump (#180) --- hydropandas/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hydropandas/version.py b/hydropandas/version.py index 9d4a2b65..61fb31ca 100644 --- a/hydropandas/version.py +++ b/hydropandas/version.py @@ -1 +1 @@ -__version__ = "0.9.4b" +__version__ = "0.10.0" From 766e38e3ffc48cc620ced80276ce45867421dc1a Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Tue, 9 Jan 2024 12:44:59 +0100 Subject: [PATCH 19/23] Patch tests (#181) * version bump * test gmn with less tubes --- tests/test_011_bro.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_011_bro.py b/tests/test_011_bro.py index d2d069c4..7c9ebf1d 100644 --- a/tests/test_011_bro.py +++ b/tests/test_011_bro.py @@ -19,7 +19,7 @@ def test_metadata_full(): def test_groundwater_monitoring_net_metadata(): - bro_id = "GMN000000000163" + bro_id = "GMN000000000001" bro.get_obs_list_from_gmn(bro_id, hpd.GroundwaterObs, only_metadata=True) From acb77ea82f1938083e66f4d6736c471d16c2fa24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Tue, 9 Jan 2024 17:10:10 +0100 Subject: [PATCH 20/23] use values instead of data to support dask delayed datasets --- hydropandas/extensions/gwobs.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hydropandas/extensions/gwobs.py b/hydropandas/extensions/gwobs.py index d88fb69e..09cab0ec 100644 --- a/hydropandas/extensions/gwobs.py +++ b/hydropandas/extensions/gwobs.py @@ -298,19 +298,19 @@ def get_zvec(x, y, gwf=None, ds=None): cid = nlmod.dims.xy_to_icell2d((x, y), ds) sel = ds.sel(icell2d=cid) - zvec = np.concatenate(([sel["top"].data], sel["botm"].data)) + zvec = np.concatenate(([sel["top"].values], sel["botm"].values)) mask = np.isnan(zvec) idx = np.where(~mask, np.arange(mask.size), 0) np.maximum.accumulate(idx, axis=0, out=idx) zvec[mask] = zvec[idx[mask]] else: sel = ds.sel(x=x, y=y, method="nearest") - first_notna = np.nonzero(np.isfinite(np.atleast_1d(sel["top"].data)))[0][0] - if sel["top"].data.shape == tuple(): - top = np.atleast_1d(sel["top"].data) + first_notna = np.nonzero(np.isfinite(np.atleast_1d(sel["top"].values)))[0][0] + if sel["top"].values.shape == tuple(): + top = np.atleast_1d(sel["top"].values) else: - top = np.atleast_1d(sel["top"].data[[first_notna]]) - zvec = np.concatenate([top, sel["botm"].data]) + top = np.atleast_1d(sel["top"].values[[first_notna]]) + zvec = np.concatenate([top, sel["botm"].values]) mask = np.isnan(zvec) idx = np.where(~mask, np.arange(mask.size), 0) np.maximum.accumulate(idx, axis=0, out=idx) From 794ef38264f0a7d6b4718218482a6261c470eb74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Tue, 9 Jan 2024 17:10:33 +0100 Subject: [PATCH 21/23] add series_per_unique_location plots --- hydropandas/extensions/plots.py | 51 +++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/hydropandas/extensions/plots.py b/hydropandas/extensions/plots.py index 491a552b..2c5f2044 100644 --- a/hydropandas/extensions/plots.py +++ b/hydropandas/extensions/plots.py @@ -4,7 +4,9 @@ import matplotlib.pyplot as plt import numpy as np from matplotlib.gridspec import GridSpec +from tqdm.auto import tqdm +from ..observation import GroundwaterObs from . import accessor logger = logging.getLogger(__name__) @@ -644,6 +646,55 @@ def section_plot( return fig, axes + def series_per_unique_location(self, plot_column, savefig=True, outputdir="."): + """Plot time series per unique location. + + Unique location is derived from unique x, y coordinates. + + Parameters + ---------- + plot_column : str + name of column containing time series data + savefig : bool, optional + save figures, by default True + outputdir : str, optional + path to output directory, by default the current directory (".") + """ + gr = self._obj.groupby(by=["x", "y"]) + for _, group in tqdm( + gr, desc="Plotting series per unique location", total=len(gr) + ): + f, ax = plt.subplots(1, 1, figsize=(10, 3)) + for name, row in group.iterrows(): + if isinstance(row.obs, GroundwaterObs): + lbl = ( + f"{name} (NAP{row['screen_top']:+.1f}" + f"-{row['screen_bottom']:+.1f}m)" + ) + else: + lbl = f"{name}" + ax.plot( + row.obs.index, + row.obs[plot_column], + label=lbl, + ) + ax.legend( + loc=(0, 1), + frameon=False, + ncol=min(group.index.size, 3), + fontsize="x-small", + ) + ax.set_ylabel(row["unit"]) + ax.grid(True) + f.tight_layout() + if savefig: + if isinstance(row.obs, GroundwaterObs): + name = name.split("-")[0] + f.savefig( + os.path.join(outputdir, f"{name}.png"), bbox_inches="tight", dpi=150 + ) + plt.close(f) + @accessor.register_obs_accessor("plots") class ObsPlots: From 83604877cadfd12ab383e2fd2d4996d72f2548df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Tue, 9 Jan 2024 17:27:39 +0100 Subject: [PATCH 22/23] rename series_per_unique_loc to series_per_group - make groupby parameter a kwarg - default to x,y location --- hydropandas/extensions/plots.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/hydropandas/extensions/plots.py b/hydropandas/extensions/plots.py index 2c5f2044..f53d099b 100644 --- a/hydropandas/extensions/plots.py +++ b/hydropandas/extensions/plots.py @@ -646,24 +646,28 @@ def section_plot( return fig, axes - def series_per_unique_location(self, plot_column, savefig=True, outputdir="."): - """Plot time series per unique location. + def series_per_group(self, plot_column, by=None, savefig=True, outputdir="."): + """Plot time series per group. - Unique location is derived from unique x, y coordinates. + The default groupby is based on identical x, y coordinates, so plots unique + time series per location. Parameters ---------- plot_column : str name of column containing time series data + by : (list of) str or (list of) array-like + groupby parameters, default is None which sets groupby to + columns ["x", "y"]. savefig : bool, optional save figures, by default True outputdir : str, optional path to output directory, by default the current directory (".") """ - gr = self._obj.groupby(by=["x", "y"]) - for _, group in tqdm( - gr, desc="Plotting series per unique location", total=len(gr) - ): + if by is None: + by = ["x", "y"] + gr = self._obj.groupby(by=by) + for _, group in tqdm(gr, desc="Plotting series per group", total=len(gr)): f, ax = plt.subplots(1, 1, figsize=(10, 3)) for name, row in group.iterrows(): if isinstance(row.obs, GroundwaterObs): From 337db2ab9c8ef062f5076d7711fbb84445a53369 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Tue, 9 Jan 2024 18:12:21 +0100 Subject: [PATCH 23/23] address my own comments on PR - add some notes to lizard.py - improve some docstrings, styling, etc. --- hydropandas/extensions/gwobs.py | 4 +- hydropandas/io/lizard.py | 163 ++++++++++++++------------------ hydropandas/obs_collection.py | 104 +++++++++----------- hydropandas/observation.py | 88 ++++++++--------- hydropandas/util.py | 10 +- 5 files changed, 162 insertions(+), 207 deletions(-) diff --git a/hydropandas/extensions/gwobs.py b/hydropandas/extensions/gwobs.py index 09cab0ec..3391ee3e 100644 --- a/hydropandas/extensions/gwobs.py +++ b/hydropandas/extensions/gwobs.py @@ -305,7 +305,9 @@ def get_zvec(x, y, gwf=None, ds=None): zvec[mask] = zvec[idx[mask]] else: sel = ds.sel(x=x, y=y, method="nearest") - first_notna = np.nonzero(np.isfinite(np.atleast_1d(sel["top"].values)))[0][0] + first_notna = np.nonzero(np.isfinite(np.atleast_1d(sel["top"].values)))[0][ + 0 + ] if sel["top"].values.shape == tuple(): top = np.atleast_1d(sel["top"].values) else: diff --git a/hydropandas/io/lizard.py b/hydropandas/io/lizard.py index c413232d..d93e6358 100644 --- a/hydropandas/io/lizard.py +++ b/hydropandas/io/lizard.py @@ -1,7 +1,7 @@ -import concurrent.futures import logging import math import pathlib +from concurrent.futures import ThreadPoolExecutor import geopandas import pandas as pd @@ -12,13 +12,21 @@ logger = logging.getLogger(__name__) +# TODO: +# - check transformation from EPSG:28992 to WGS84 (elsewhere in hydropandas we use +# another definition for EPSG:28992 that is provided in util.py) -URL_LIZARD = "https://vitens.lizard.net/api/v4/" +# NOTE: currently only the vitens API is officially supported. If/when new endpoints +# are added we should check whether we want to add the URL as argument or add supported +# sources to this dictionary: +LIZARD_APIS = {"vitens": "https://vitens.lizard.net/api/v4/"} def check_status_obs(metadata, timeseries): - """ - checks if a monitoring tube is still active + """Checks if a monitoring tube is still active. + + If there are no measurements in the last 180 days, the monitoring + tube is considered inactive. Parameters ---------- @@ -30,7 +38,6 @@ def check_status_obs(metadata, timeseries): Returns ------- metadata DataFrame including the status of the monitoring well - """ if timeseries.empty: metadata["status"] = "no timeseries available" @@ -48,26 +55,23 @@ def check_status_obs(metadata, timeseries): return metadata -def extent_to_wgs84_polygon(coordinates): - """ - Translates a list of coordinates (xmin,xmax, ymin, ymax) to a polygon with - coordinate system WGS84 +def extent_to_wgs84_polygon(extent): + """Translates an extent (xmin, xmax, ymin, ymax) to a polygon with coordinate system + WGS84. Parameters ---------- - coordinates : lst or tuple - list of the modelextent in epsg 28992 within which the observations - are collected. + extent : list or tuple + extent in epsg 28992 within which the observations are collected. Returns ------- - polygon of the modelextent with coordinate system WGS84 - + polygon of the extent with coordinate system WGS84 """ transformer = Transformer.from_crs("EPSG:28992", "WGS84") - lon_min, lat_min = transformer.transform(coordinates[0], coordinates[2]) - lon_max, lat_max = transformer.transform(coordinates[1], coordinates[3]) + lon_min, lat_min = transformer.transform(extent[0], extent[2]) + lon_max, lat_max = transformer.transform(extent[1], extent[3]) poly_T = Polygon( [(lat_min, lon_min), (lat_max, lon_min), (lat_max, lon_max), (lat_min, lon_max)] @@ -77,19 +81,17 @@ def extent_to_wgs84_polygon(coordinates): def translate_flag(timeseries): - """ - Translates Vitens Lizard flags from interter to text + """Translates Vitens Lizard flags from integer to text. Parameters ---------- timeseries : pandas.DataFrame - timeseries of a monitoring well with flags + timeseries of a monitoring well with flags Returns ------- timeseries : pandas.DataFrame timeseries with translated quality flags - """ translate_dic = { 0: "betrouwbaar", @@ -106,15 +108,16 @@ def translate_flag(timeseries): return timeseries -def get_metadata_mw_from_code(code): - """ - extracts the Groundwater Station parameters from a monitoring well based - on the code of the monitoring well +def get_metadata_mw_from_code(code, source="vitens"): + """Extracts the Groundwater Station parameters from a monitoring well based on the + code of the monitoring well. Parameters ---------- code : str code of the monitoring well + source : str + source indicating URL endpoint, currently only "vitens" is officially supported. Raises ------ @@ -125,9 +128,8 @@ def get_metadata_mw_from_code(code): ------- groundwaterstation_metadata : dict dictionary with all available metadata of the monitoring well and its filters - """ - lizard_GWS_endpoint = f"{URL_LIZARD}groundwaterstations/" + lizard_GWS_endpoint = f"{LIZARD_APIS[source]}groundwaterstations/" url_groundwaterstation_code = f"{lizard_GWS_endpoint}?code={code}" try: @@ -142,8 +144,7 @@ def get_metadata_mw_from_code(code): def _prepare_API_input(nr_pages, url_groundwater): - """ - get API data pages within the defined extent + """Get API data pages within the defined extent. Parameters ---------- @@ -156,20 +157,16 @@ def _prepare_API_input(nr_pages, url_groundwater): ------- urls : list list of the page number and the corresponding url - """ urls = [] for page in range(nr_pages): - true_page = ( - page + 1 - ) # Het echte paginanummer wordt aan de import thread gekoppeld + true_page = page + 1 # The real page number is attached to the import thread urls = [url_groundwater + "&page={}".format(true_page)] return urls def _download(url, timeout=1800): - """ - Function to download the data from the API using the ThreadPoolExecutor + """Function to download the data from the API using the ThreadPoolExecutor. Parameters ---------- @@ -181,7 +178,6 @@ def _download(url, timeout=1800): Returns ------- dictionary with timeseries data - """ data = requests.get(url=url, timeout=timeout) data = data.json()["results"] @@ -190,8 +186,7 @@ def _download(url, timeout=1800): def get_metadata_tube(metadata_mw, tube_nr): - """ - extract the metadata for a specific tube from the monitoring well metadata + """Extract the metadata for a specific tube from the monitoring well metadata. Parameters ---------- @@ -271,31 +266,29 @@ def get_metadata_tube(metadata_mw, tube_nr): return metadata -def get_timeseries_uuid(uuid, code, tube_nr, tmin, tmax, page_size=100000): +def get_timeseries_uuid(uuid, tmin, tmax, page_size=100000, source="vitens"): """ Get the time series (hand or diver) using the uuid. ---------- uuid : str Universally Unique Identifier of the tube and type of time series. - code : str - code or name of the monitoring well - tube_nr : int - select specific tube number tmin : str YYYY-m-d start of the observations, by default the entire serie is returned tmax : int YYYY-m-d end of the observations, by default the entire serie is returned page_size : int, optional Query parameter which can extend the response size. The default is 100000. + source : str, optional + source indicating URL endpoint, currently only "vitens" is officially supported Returns ------- - pandas DataFrame with the timeseries of the monitoring well - + pd.DataFrame + pandas DataFrame with the timeseries of the monitoring well """ - url_timeseries = URL_LIZARD + "timeseries/{}".format(uuid) + url_timeseries = LIZARD_APIS[source] + "timeseries/{}".format(uuid) if tmin is not None: tmin = pd.to_datetime(tmin).isoformat("T") @@ -330,8 +323,7 @@ def get_timeseries_uuid(uuid, code, tube_nr, tmin, tmax, page_size=100000): def _merge_timeseries(hand_measurements, diver_measurements): - """ - merges the timeseries of the hand and diver measurements into one timeserie + """Merges the timeseries of the hand and diver measurements into one timeserie. Parameters ---------- @@ -343,7 +335,6 @@ def _merge_timeseries(hand_measurements, diver_measurements): Returns ------- DataFrame where hand and diver measurements are merged in one timeseries - """ if hand_measurements.empty and diver_measurements.empty: measurements = pd.DataFrame() @@ -366,8 +357,7 @@ def _merge_timeseries(hand_measurements, diver_measurements): def _combine_timeseries(hand_measurements, diver_measurements): - """ - combines the timeseries of the hand and diver measurements into one DataFrame + """Combines the timeseries of the hand and diver measurements into one DataFrame. Parameters ---------- @@ -380,7 +370,6 @@ def _combine_timeseries(hand_measurements, diver_measurements): ------- a combined DataFrame with both hand, and diver measurements DESCRIPTION. - """ hand_measurements.rename( columns={"value": "value_hand", "flag": "flag_hand"}, inplace=True @@ -400,9 +389,8 @@ def _combine_timeseries(hand_measurements, diver_measurements): def get_timeseries_tube(tube_metadata, tmin, tmax, type_timeseries): - """ - extracts multiple timeseries (hand and/or diver measurements) for a specific - tube using the Lizard API. + """Extracts multiple timeseries (hand and/or diver measurements) for a specific tube + using the Lizard API. Parameters ---------- @@ -413,12 +401,10 @@ def get_timeseries_tube(tube_metadata, tmin, tmax, type_timeseries): tmax : Ttr YYYY-m-d, optional end of the observations, by default the entire serie is returned type_timeseries : str, optional - type of timeseries to; - hand: returns only hand measurements - diver: returns only diver measurements - merge: the hand and diver measurements into one time series (default) - combine: keeps hand and diver measurements separeted - The default is merge. + hand: returns only hand measurements + diver: returns only diver measurements + merge: the hand and diver measurements into one time series (default) + combine: keeps hand and diver measurements separeted Returns ------- @@ -426,7 +412,6 @@ def get_timeseries_tube(tube_metadata, tmin, tmax, type_timeseries): timeseries of the monitoring well metadata_df : dict metadata of the monitoring well - """ if tube_metadata["timeseries_type"] is None: return pd.DataFrame(), tube_metadata @@ -435,8 +420,6 @@ def get_timeseries_tube(tube_metadata, tmin, tmax, type_timeseries): if "hand" in tube_metadata["timeseries_type"]: hand_measurements = get_timeseries_uuid( tube_metadata.pop("uuid_hand"), - tube_metadata["name"], - tube_metadata["tube_nr"], tmin, tmax, ) @@ -447,8 +430,6 @@ def get_timeseries_tube(tube_metadata, tmin, tmax, type_timeseries): if "diver" in tube_metadata["timeseries_type"]: diver_measurements = get_timeseries_uuid( tube_metadata.pop("uuid_diver"), - tube_metadata["name"], - tube_metadata["tube_nr"], tmin, tmax, ) @@ -483,9 +464,8 @@ def get_lizard_groundwater( type_timeseries="merge", only_metadata=False, ): - """ - extracts the metadata and timeseries of an observation well from a - LIZARD-API based on the code of a monitoring well + """Extracts the metadata and timeseries of an observation well from a LIZARD-API + based on the code of a monitoring well. Parameters ---------- @@ -501,16 +481,18 @@ def get_lizard_groundwater( type_timeseries : str, optional hand: returns only hand measurements diver: returns only diver measurements - merge: the hand and diver measurements into one time series (merge; default) or + merge: the hand and diver measurements into one time series (default) combine: keeps hand and diver measurements separated - The default is merge. only_metadata : bool, optional if True only metadata is returned and no time series data. The default is False. Returns ------- - returns a DataFrame with metadata and timeseries + measurements : pd.DataFrame + returns a DataFrame with metadata and timeseries + tube_metadata : dict + dictionary containing metadata """ groundwaterstation_metadata = get_metadata_mw_from_code(code) @@ -537,13 +519,12 @@ def get_obs_list_from_codes( type_timeseries="merge", only_metadata=False, ): - """ - get all observations from a list of codes of the monitoring wells and a - list of tube numbers + """Get all observations from a list of codes of the monitoring wells and a list of + tube numbers. Parameters ---------- - codes : lst of str or str + codes : list of str or str codes of the monitoring wells ObsClass : type class of the observations, e.g. GroundwaterObs @@ -557,19 +538,16 @@ class of the observations, e.g. GroundwaterObs type_timeseries : str, optional hand: returns only hand measurements diver: returns only diver measurements - merge: the hand and diver measurements into one time series (merge; default) or + merge: the hand and diver measurements into one time series (default) combine: keeps hand and diver measurements separeted - The default is merge. only_metadata : bool, optional if True only metadata is returned and no time series data. The default is False. - Returns ------- obs_list list of observations - """ if isinstance(codes, str): @@ -612,12 +590,13 @@ def get_obs_list_from_extent( only_metadata=False, page_size=100, nr_threads=10, + source="vitens", ): - """ - get all observations within a specified extent + """Get all observations within a specified extent. + Parameters ---------- - extent : list or a shapefile + extent : list or shapefile get groundwater monitoring wells wihtin this extent [xmin, xmax, ymin, ymax] or within a predefined Polygon from a shapefile ObsClass : type @@ -625,10 +604,12 @@ class of the observations, e.g. GroundwaterObs tube_nr : lst of str list of tube numbers of the monitoring wells that should be selected. By default 'all' available tubes are selected. - tmin : str YYYY-m-d, optional - start of the observations, by default the entire serie is returned - tmax : Ttr YYYY-m-d, optional - end of the observations, by default the entire serie is returned + tmin : str, optional + start of the observations (format YYYY-m-d), by default the entire series + is returned + tmax : str, optional + end of the observations (format YYYY-m-d), by default the entire series + is returned type_timeseries : str, optional merge: the hand and diver measurements into one time series (merge; default) or combine: keeps hand and diver measurements separeted @@ -636,13 +617,14 @@ class of the observations, e.g. GroundwaterObs only_metadata : bool, optional if True only metadata is returned and no time series data. The default is False. + source : str + source indicating URL endpoint, currently only "vitens" is officially supported. Returns ------- obs_col : TYPE ObsCollection DataFrame with the 'obs' column - """ if isinstance(extent, (list, tuple)): @@ -650,11 +632,12 @@ class of the observations, e.g. GroundwaterObs elif isinstance(extent, str) or isinstance(extent, pathlib.PurePath): polygon = geopandas.read_file(extent) + # TODO: check this transformation polygon_T = polygon.to_crs("WGS84", "EPSG:28992").loc[0, "geometry"] else: raise TypeError("Extent should be a shapefile or a list of coordinates") - lizard_GWS_endpoint = f"{URL_LIZARD}groundwaterstations/" + lizard_GWS_endpoint = f"{LIZARD_APIS[source]}groundwaterstations/" url_groundwaterstation_extent = ( f"{lizard_GWS_endpoint}?geometry__within={polygon_T}&page_size={page_size}" ) @@ -673,12 +656,12 @@ class of the observations, e.g. GroundwaterObs arg_tuple = (ObsClass, tube_nr, tmin, tmax, type_timeseries, only_metadata) codes = [] - with concurrent.futures.ThreadPoolExecutor(max_workers=nr_threads) as executor: + with ThreadPoolExecutor(max_workers=nr_threads) as executor: for result in tqdm(executor.map(_download, urls), total=nr_pages, desc="Page"): codes += [(d["code"],) + arg_tuple for d in result] obs_list = [] - with concurrent.futures.ThreadPoolExecutor() as executor: + with ThreadPoolExecutor() as executor: for obs_list_mw in tqdm( executor.map(lambda args: get_obs_list_from_codes(*args), codes), total=len(codes), diff --git a/hydropandas/obs_collection.py b/hydropandas/obs_collection.py index 43e32f95..70a2a6da 100644 --- a/hydropandas/obs_collection.py +++ b/hydropandas/obs_collection.py @@ -1,4 +1,4 @@ -"""module with ObsCollection class for a collection of observations. +"""Module with ObsCollection class for a collection of observations. The ObsCollection class is a subclass of a pandas DataFrame with additional attributes and methods. @@ -30,14 +30,13 @@ def read_lizard( type_timeseries="merge", only_metadata=False, ): - """ - get all observations from a list of codes of the monitoring wells and a - list of tube numbers + """Get all observations from a list of codes of the monitoring wells and a list of + tube numbers. Parameters ---------- extent : list, shapefile path or None - get groundwater monitoring wells wihtin this extent [xmin, ymin, xmax, ymax] + get groundwater monitoring wells within this extent [xmin, ymin, xmax, ymax] or within a predefined Polygon from a shapefile codes : lst of str or None codes of the monitoring wells @@ -45,15 +44,14 @@ def read_lizard( list of tube numbers of the monitoring wells that should be selected. By default 'all' available tubes are selected. tmin : str YYYY-m-d, optional - start of the observations, by default the entire serie is returned + start of the observations, by default the entire time series is returned tmax : Ttr YYYY-m-d, optional - end of the observations, by default the entire serie is returned + end of the observations, by default the entire time series is returned type_timeseries : str, optional hand: returns only hand measurements diver: returns only diver measurements - merge: the hand and diver measurements into one time series (merge; default) or + merge: the hand and diver measurements into one time series (default) combine: keeps hand and diver measurements separeted - The default is merge. only_metadata : bool, optional if True only metadata is returned and no time series data. The default is False. @@ -62,7 +60,6 @@ def read_lizard( ------- ObsCollection ObsCollection DataFrame with the 'obs' column - """ oc = ObsCollection.from_lizard( extent=extent, @@ -88,9 +85,7 @@ def read_bro( epsg=28992, ignore_max_obs=False, ): - """get all the observations within an extent or within a - groundwatermonitoring net. - + """Get all the observations within an extent or within a groundwatermonitoring net. Parameters ---------- @@ -122,7 +117,6 @@ def read_bro( ------- ObsCollection ObsCollection DataFrame with the 'obs' column - """ oc = ObsCollection.from_bro( @@ -182,8 +176,8 @@ def read_dino( name=None, **kwargs, ): - """Read dino observations within an extent from the server or from a - directory with downloaded files. + """Read dino observations within an extent from the server or from a directory with + downloaded files. Parameters ---------- @@ -224,9 +218,9 @@ class of the observations, so far only GroundwaterObs is supported def read_excel(path, meta_sheet_name="metadata"): - """Create an observation collection from an excel file. The excel file - should have the same format as excel files created with the `to_excel` - method of an ObsCollection. + """Create an observation collection from an excel file. The excel file should have + the same format as excel files created with the `to_excel` method of an + ObsCollection. Parameters ---------- @@ -397,8 +391,7 @@ def read_knmi( use_api=True, raise_exceptions=True, ): - """Get knmi observations from a list of locations or a list of - stations. + """Get knmi observations from a list of locations or a list of stations. Parameters ---------- @@ -563,7 +556,7 @@ class of the observations, can be PrecipitationObs, EvaporationObs def read_menyanthes( path, name="", ObsClass=obs.Obs, load_oseries=True, load_stresses=True ): - """read a Menyanthes file + """Read a Menyanthes file. Parameters ---------- @@ -653,7 +646,7 @@ def read_pickle( compression="infer", storage_options=None, ): - """wrapper around pd.read_pickle + """Wrapper around pd.read_pickle. Parameters ---------- @@ -816,7 +809,7 @@ def read_pastastore( class ObsCollection(pd.DataFrame): - """class for a collection of point observations. + """Class for a collection of point observations. An ObsCollection object is a subclass of a pandas.DataFrame and allows for additional attributes and methods. Additional attributes are @@ -830,7 +823,6 @@ class ObsCollection(pd.DataFrame): name of the observation collection meta : dic metadata of the observation collection - """ # temporary properties @@ -1022,8 +1014,8 @@ def _is_consistent(self, check_individual_obs=True): return True def add_observation(self, o, check_consistency=True, **kwargs): - """add an observation to an existing observation collection. If the - observation exists the two observations are merged. + """Add an observation to an existing observation collection. If the observation + exists the two observations are merged. Parameters ---------- @@ -1059,7 +1051,6 @@ def add_observation(self, o, check_consistency=True, **kwargs): Returns ------- None. - """ if check_consistency: if not self._is_consistent(): @@ -1086,8 +1077,8 @@ def add_observation(self, o, check_consistency=True, **kwargs): def add_obs_collection( self, obs_collection, check_consistency=True, inplace=False, **kwargs ): - """add one observation collection to another observation - collection. See add_observation method for more details + """Add one observation collection to another observation collection. See + add_observation method for more details. Parameters ---------- @@ -1125,7 +1116,6 @@ def add_obs_collection( ------- ObsCollection or None merged ObsCollection if ``inplace=True``. - """ if check_consistency: if not self._is_consistent(): @@ -1184,9 +1174,8 @@ def from_bro( epsg=28992, ignore_max_obs=False, ): - """get all the observations within an extent or within a - groundwatermonitoring net. - + """Get all the observations within an extent or within a groundwatermonitoring + net. Parameters ---------- @@ -1218,7 +1207,6 @@ def from_bro( ------- ObsCollection ObsCollection DataFrame with the 'obs' column - """ from .io.bro import get_obs_list_from_extent, get_obs_list_from_gmn @@ -1262,8 +1250,7 @@ def from_lizard( type_timeseries="merge", only_metadata=False, ): - """ - get all observations within a specified extent + """Get all observations within a specified extent. Parameters ---------- @@ -1293,7 +1280,6 @@ def from_lizard( ------- ObsCollection ObsCollection DataFrame with the 'obs' column - """ from .io.lizard import get_obs_list_from_codes, get_obs_list_from_extent @@ -1329,8 +1315,7 @@ def from_bronhouderportaal_bro( dirname, full_meta=False, ): - """get all the metadata from dirname. - + """Get all the metadata from dirname. Parameters ---------- @@ -1343,7 +1328,6 @@ def from_bronhouderportaal_bro( ------- ObsCollection ObsCollection DataFrame without the 'obs' column - """ from .io.bronhouderportaal_bro import get_obs_list_from_dir @@ -1360,8 +1344,8 @@ def from_bronhouderportaal_bro( @classmethod def from_dataframe(cls, df, obs_list=None, ObsClass=obs.GroundwaterObs): - """Create an observation collection from a DataFrame by adding a column - with empty observations. + """Create an observation collection from a DataFrame by adding a column with + empty observations. Parameters ---------- @@ -1391,9 +1375,9 @@ def from_dataframe(cls, df, obs_list=None, ObsClass=obs.GroundwaterObs): @classmethod def from_excel(cls, path, meta_sheet_name="metadata"): - """Create an observation collection from an excel file. The excel file - should have the same format as excel files created with the `to_excel` - method of an ObsCollection. + """Create an observation collection from an excel file. The excel file should + have the same format as excel files created with the `to_excel` method of an + ObsCollection. Parameters ---------- @@ -1445,8 +1429,8 @@ def from_dino( name=None, **kwargs, ): - """Read dino data within an extent from the server or from a directory - with downloaded files. + """Read dino data within an extent from the server or from a directory with + downloaded files. Parameters ---------- @@ -1749,8 +1733,7 @@ def from_knmi( use_api=True, raise_exceptions=True, ): - """Get knmi observations from a list of locations or a list of - stations. + """Get knmi observations from a list of locations or a list of stations. Parameters ---------- @@ -1862,7 +1845,7 @@ class of the observations, can be PrecipitationObs, EvaporationObs @classmethod def from_list(cls, obs_list, name=""): - """read observations from a list of obs objects. + """Read observations from a list of obs objects. Parameters ---------- @@ -2049,9 +2032,8 @@ def from_pastastore( return cls(obs_df, name=pstore.name, meta=meta) def to_excel(self, path, meta_sheet_name="metadata"): - """Write an ObsCollection to an excel, the first sheet in the - excel contains the metadata, the other tabs are the timeseries of each - observation. + """Write an ObsCollection to an excel, the first sheet in the excel contains the + metadata, the other tabs are the timeseries of each observation. The excel can be read using the read_excel function of hydropandas. @@ -2106,7 +2088,7 @@ def to_pi_xml(self, fname, timezone="", version="1.24"): fews.write_pi_xml(self, fname, timezone=timezone, version=version) def to_gdf(self, xcol="x", ycol="y", crs=28992, drop_obs=True): - """convert ObsCollection to GeoDataFrame. + """Convert ObsCollection to GeoDataFrame. Parameters ---------- @@ -2142,7 +2124,7 @@ def to_pastastore( conn=None, overwrite=False, ): - """add observations to a new or existing pastastore. + """Add observations to a new or existing pastastore. Parameters ---------- @@ -2185,7 +2167,7 @@ def to_pastastore( return pstore def to_shapefile(self, path, xcol="x", ycol="y"): - """save ObsCollection as shapefile. + """Save ObsCollection as shapefile. Parameters ---------- @@ -2220,8 +2202,8 @@ def to_shapefile(self, path, xcol="x", ycol="y"): gdf.to_file(path) def add_meta_to_df(self, key="all"): - """Get the values from the meta dictionary of each observation object - and add these to the ObsCollection as a column. + """Get the values from the meta dictionary of each observation object and add + these to the ObsCollection as a column. to the ObsCollection @@ -2284,8 +2266,8 @@ def interpolate( epsilon: Optional[int] = None, col: Optional[str] = None, ): - """Interpolation method for ObsCollections using the Scipy radial basis - function (RBF) + """Interpolation method for ObsCollections using the Scipy radial basis function + (RBF) Parameters ---------- diff --git a/hydropandas/observation.py b/hydropandas/observation.py index 7c195ed6..deb57d20 100644 --- a/hydropandas/observation.py +++ b/hydropandas/observation.py @@ -30,7 +30,7 @@ class Obs(pd.DataFrame): - """generic class for a time series with measurements at a certain location. + """Generic class for a time series with measurements at a certain location. Unless specified explicitly the first numeric column in the observation is used for analysis and plotting. @@ -61,11 +61,11 @@ class Obs(pd.DataFrame): _metadata = ["name", "x", "y", "meta", "filename", "source", "unit"] def __init__(self, *args, **kwargs): - """constructor of Obs class. + """Constructor of Obs class. - *args must be input for the pandas.DataFrame constructor, - **kwargs can be one of the attributes listed in _metadata or - keyword arguments for the constructor of a pandas.DataFrame. + *args must be input for the pandas.DataFrame constructor, **kwargs can be one of + the attributes listed in _metadata or keyword arguments for the constructor of a + pandas.DataFrame. """ if (len(args) > 0) and isinstance(args[0], Obs): for key in args[0]._metadata: @@ -122,10 +122,7 @@ def __repr__(self) -> str: return buf.getvalue() def _repr_html_(self, collapse=False): - """ - Uses the pandas DataFrame html representation with the metadata - prepended. - """ + """Uses the pandas DataFrame html representation with the metadata prepended.""" obs_type = f'

hydropandas.{type(self).__name__}

\n' metadata_dic = {key: getattr(self, key) for key in self._metadata} @@ -184,13 +181,12 @@ def _constructor(self): return Obs def _get_first_numeric_col_name(self): - """get the first numeric column name of the observations + """Get the first numeric column name of the observations. Returns ------- col : str or int column name. - """ if self.empty: return None @@ -202,7 +198,7 @@ def _get_first_numeric_col_name(self): return col def copy(self, deep=True): - """create a copy of the observation. + """Create a copy of the observation. When ``deep=True`` (default), a new object will be created with a copy of the calling object's data and indices. Modifications to @@ -240,8 +236,7 @@ def copy(self, deep=True): return o def to_collection_dict(self, include_meta=False): - """get dictionary with registered attributes and their values of an Obs - object. + """Get dictionary with registered attributes and their values of an Obs object. This method can be used to create a dataframe from a collection of Obs objects. @@ -271,8 +266,7 @@ def to_collection_dict(self, include_meta=False): return d def merge_metadata(self, right, overlap="error"): - """Merge the metadata of an Obs object with metadata from another Obs - object. + """Merge the metadata of an Obs object with metadata from another Obs object. Parameters ---------- @@ -346,7 +340,7 @@ def merge_metadata(self, right, overlap="error"): return new_metadata def _merge_timeseries(self, right, overlap="error"): - """merge two timeseries. + """Merge two timeseries. Parameters ---------- @@ -524,10 +518,13 @@ class GroundwaterObs(Obs): ] def __init__(self, *args, **kwargs): - """ - *args must be input for the pandas.DataFrame constructor, - **kwargs can be one of the attributes listed in _metadata or - keyword arguments for the constructor of a pandas.DataFrame. + """Constructor for ObsCollection. + + Parameters + ---------- + *args must be input for the pandas.DataFrame constructor + **kwargs can be one of the attributes listed in _metadata or keyword arguments + for the constructor of a pandas.DataFrame. """ if len(args) > 0: if isinstance(args[0], Obs): @@ -560,8 +557,7 @@ def from_bro( drop_duplicate_times=True, only_metadata=False, ): - """download BRO groundwater observations from the server. - + """Download BRO groundwater observations from the server. Parameters ---------- @@ -588,7 +584,6 @@ def from_bro( ------- TYPE DESCRIPTION. - """ from .io import bro @@ -630,9 +625,8 @@ def from_lizard( type_timeseries="merge", only_metadata=False, ): - """ - extracts the metadata and timeseries of a observation well from a LIZARD-API - based on the code of a monitoring well + """Extracts the metadata and timeseries of a observation well from a LIZARD-API + based on the code of a monitoring well. Parameters ---------- @@ -649,16 +643,15 @@ def from_lizard( hand: returns only hand measurements diver: returns only diver measurements merge: the hand and diver measurements into one time series (default) - combine: keeps hand and diver measurements separeted - The default is merge. + combine: keeps hand and diver measurements separated only_metadata : bool, optional if True only metadata is returned and no time series data. The default is False. - Returns ------- - returns a DataFrame with metadata and timeseries + ObsCollection + Returns a DataFrame with metadata and timeseries """ from .io import lizard @@ -695,9 +688,8 @@ def from_bronhouderportaal_bro( tube_nr, full_meta=False, ): - """load BRO groundwater metadata from XML file. Mind that - bro_id is applicable, because file is not yet imported in BRO - + """Load BRO groundwater metadata from XML file. Mind that bro_id is applicable, + because file is not yet imported in BRO. Parameters ---------- @@ -710,9 +702,8 @@ def from_bronhouderportaal_bro( Returns ------- - TYPE - DESCRIPTION. - + ObsCollection + ObsCollection containing observations from XML file. """ from .io import bronhouderportaal_bro @@ -747,7 +738,7 @@ def from_dino( path=None, **kwargs, ): - """download dino data from the server. + """Download dino data from the server. Parameters ---------- @@ -766,7 +757,7 @@ def from_dino( @classmethod def from_artdino_file(cls, path=None, **kwargs): - """read a dino csv file (artdiver style). + """Read a dino csv file (artdiver style). Parameters ---------- @@ -848,8 +839,7 @@ def from_pastastore(cls, pstore, libname, name, metadata_mapping=None): class WaterQualityObs(Obs): - """Class for water quality ((grond)watersamenstelling) point - observations. + """Class for water quality ((grond)watersamenstelling) point observations. Subclass of the Obs class """ @@ -901,7 +891,7 @@ def from_dino(cls, path, **kwargs): class WaterlvlObs(Obs): - """class for water level point observations. + """Class for water level point observations. Subclass of the Obs class """ @@ -926,7 +916,7 @@ def _constructor(self): @classmethod def from_dino(cls, path, **kwargs): - """read a dino file with waterlvl data. + """Read a dino file with waterlvl data. Parameters ---------- @@ -969,7 +959,7 @@ def from_waterinfo(cls, path, **kwargs): class ModelObs(Obs): - """class for model point results. + """Class for model point results. Subclass of the Obs class """ @@ -993,7 +983,7 @@ def _constructor(self): class MeteoObs(Obs): - """class for meteorological timeseries. + """Class for meteorological timeseries. Subclass of the Obs class """ @@ -1114,7 +1104,7 @@ def from_wow( start: Optional[pd.Timestamp] = None, end: Optional[pd.Timestamp] = None, ): - """Get a MeteoObs timeseries from a wow.knmi.nl station + """Get a MeteoObs timeseries from a wow.knmi.nl station. Parameters ---------- @@ -1153,7 +1143,7 @@ def from_wow( class EvaporationObs(MeteoObs): - """class for evaporation timeseries. + """Class for evaporation timeseries. Subclass of the MeteoObs class """ @@ -1244,7 +1234,7 @@ def from_knmi( class PrecipitationObs(MeteoObs): - """class for precipitation timeseries. + """Class for precipitation timeseries. Subclass of the MeteoObs class """ @@ -1366,7 +1356,7 @@ def from_wow( start: Optional[pd.Timestamp] = None, end: Optional[pd.Timestamp] = None, ): - """Get a PrecipitationObs timeseries from a wow.knmi.nl station + """Get a PrecipitationObs timeseries from a wow.knmi.nl station. Parameters ---------- diff --git a/hydropandas/util.py b/hydropandas/util.py index 3d7be189..35e7da51 100644 --- a/hydropandas/util.py +++ b/hydropandas/util.py @@ -27,7 +27,7 @@ def _obslist_to_frame(obs_list): - """convert a list of observations to a pandas DataFrame. + """Convert a list of observations to a pandas DataFrame. Parameters ---------- @@ -96,8 +96,7 @@ def unzip_file(src, dst, force=False, preserve_datetime=False): def get_files( file_or_dir, ext, unpackdir=None, force_unpack=False, preserve_datetime=False ): - """internal method to get list of files with specific extension from - dirname. + """Internal method to get list of files with specific extension from dirname. Parameters ---------- @@ -273,8 +272,8 @@ def get_color_logger(level="INFO"): def oc_to_df(oc, col: Optional[str] = None) -> pd.DataFrame: - """convert an observation collection to a DataFrame where every column - has one observation. + """Convert an observation collection to a DataFrame where every column has one + observation. Parameters ---------- @@ -309,7 +308,6 @@ def interpolate( ) -> pd.DataFrame: """Interpolation method using the Scipy radial basis function (RBF) - Parameters ---------- xy : List[List[float]]