From 986210484d159f5ae661b14e84760477abb267e6 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Mon, 19 Feb 2024 15:27:58 +0100 Subject: [PATCH 1/8] update pandas version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 26b7f91d..cbd5ebb3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ maintainers = [ requires-python = ">=3.7" dependencies = [ "scipy", - "pandas<2.1.0", + "pandas", "matplotlib", "tqdm", "requests", From 0e7491a5b939501e2a190af0f882bae436a04aa6 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Thu, 22 Feb 2024 12:46:31 +0100 Subject: [PATCH 2/8] solve pandas 2.2.0 error --- hydropandas/util.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/hydropandas/util.py b/hydropandas/util.py index 35e7da51..e1bb5c29 100644 --- a/hydropandas/util.py +++ b/hydropandas/util.py @@ -288,12 +288,13 @@ def oc_to_df(oc, col: Optional[str] = None) -> pd.DataFrame: _description_ """ df_list = [] - for obs in oc.obs.values: - if not obs.empty: + for o in oc.obs.values: + if not o.empty: if col is None: - vals = obs.loc[:, obs._get_first_numeric_col_name()] + vals = o.loc[:, o._get_first_numeric_col_name()] else: - vals = obs.loc[:, col] + vals = o.loc[:, col] + vals.name = o.name df_list.append(vals) return pd.concat(df_list, axis=1) From c91eca8ec7d48eade9f5e8f3262916db6eeac897 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Thu, 22 Feb 2024 13:45:06 +0100 Subject: [PATCH 3/8] no longer set crs for basemap --- examples/02_knmi_observations.ipynb | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/02_knmi_observations.ipynb b/examples/02_knmi_observations.ipynb index de33e34d..91a1630c 100644 --- a/examples/02_knmi_observations.ipynb +++ b/examples/02_knmi_observations.ipynb @@ -1477,11 +1477,10 @@ "oc = hpd.ObsCollection([precip1, precip2])\n", "gdf = oc.to_gdf()\n", "gdf = gdf.set_crs(28992)\n", - "gdf = gdf.to_crs(3857)\n", "gdf[\"name\"] = gdf.index\n", "ax = gdf.buffer(2000).plot(alpha=0, figsize=(8, 8))\n", "gdf.plot(\"name\", ax=ax, cmap=\"jet\", legend=True, markersize=100)\n", - "cx.add_basemap(ax)" + "cx.add_basemap(ax, crs=28992)" ] }, { @@ -3331,9 +3330,9 @@ ], "metadata": { "kernelspec": { - "display_name": "hpd_env", + "display_name": "dev", "language": "python", - "name": "python3" + "name": "dev" }, "language_info": { "codemirror_mode": { @@ -3345,7 +3344,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.10.13" } }, "nbformat": 4, From c8bc5ca2ac2f4dc7c94cefd5ef4a7b4c235e794a Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Fri, 1 Mar 2024 15:28:39 +0100 Subject: [PATCH 4/8] fixes for #188 --- hydropandas/io/lizard.py | 107 +++++++++++++++++++++++++++++++-------- tests/test_013_lizard.py | 19 ++++++- 2 files changed, 103 insertions(+), 23 deletions(-) diff --git a/hydropandas/io/lizard.py b/hydropandas/io/lizard.py index d93e6358..818e5eed 100644 --- a/hydropandas/io/lizard.py +++ b/hydropandas/io/lizard.py @@ -161,7 +161,7 @@ def _prepare_API_input(nr_pages, url_groundwater): urls = [] for page in range(nr_pages): true_page = page + 1 # The real page number is attached to the import thread - urls = [url_groundwater + "&page={}".format(true_page)] + urls += [url_groundwater + "&page={}".format(true_page)] return urls @@ -185,6 +185,32 @@ def _download(url, timeout=1800): return data +def _split_mw_tube_nr(code): + """get the tube number from a code that consists of the name and the tube number. + + Parameters + ---------- + code : str + name + tube_nr. e.g. 'BUWP014-11' or 'BUWP014012' + + Returns + ------- + monitoring well, tube_number (str, int) + + Notes + ----- + The format of the name + tube_nr is not very consistent and this function may need + further finetuning. + """ + + if code[-3:].isdigit(): + return code[:-3], int(code[-3:]) + else: + # assume there is a '-' to split name and filter number + tube_nr = code.split("-")[-1] + return code.strip(f"-{tube_nr}"), int(tube_nr) + + def get_metadata_tube(metadata_mw, tube_nr): """Extract the metadata for a specific tube from the monitoring well metadata. @@ -218,19 +244,52 @@ def get_metadata_tube(metadata_mw, tube_nr): "status": None, } + metadata_tube_list = [] for metadata_tube in metadata_mw["filters"]: - if metadata_tube["code"].endswith(str(tube_nr)): - break - else: + # check if name+filternr ends with three digits + code, tbnr = _split_mw_tube_nr(metadata_tube["code"]) + if tbnr == tube_nr: + metadata_tube_list.append(metadata_tube) + + if len(metadata_tube_list) == 0: raise ValueError(f"{metadata_mw['name']} doesn't have a tube number {tube_nr}") + elif len(metadata_tube_list) == 1: + mtd_tube = metadata_tube_list[0] + elif len(metadata_tube_list) > 1: + # tube has probably been replaced, multiple tubes with the same code and tube nr + # merge metadata from all tubes + logger.info( + f"there are {len(metadata_tube_list)} instances of {code} and tube {tube_nr}, trying to merge all in one observation object" + ) + mtd_tube = metadata_tube_list[0].copy() + relevant_keys = { + "top_level", + "filter_top_level", + "filter_bottom_level", + "timeseries", + } + for metadata_tube in metadata_tube_list: + for key in set(metadata_tube.keys()) & relevant_keys: + # check if properties are always the same for a tube number + val = metadata_tube[key] + if key in ["top_level", "filter_top_level", "filter_bottom_level"]: + if val != mtd_tube[key]: + logger.warning( + f"multiple {key} values found ({val} & {mtd_tube[key]}) for {code} and tube {tube_nr}, using {mtd_tube[key]}" + ) + # merge time series from all tubes with the same code and tube number + elif key == "timeseries": + mtd_tube[key] += val + + mtd_tube["code"] = f"{code}{tube_nr}" metadata.update( { "tube_nr": tube_nr, - "name": metadata_tube["code"].replace("-", ""), - "tube_top": metadata_tube["top_level"], - "screen_top": metadata_tube["filter_top_level"], - "screen_bottom": metadata_tube["filter_bottom_level"], + "name": mtd_tube["code"].replace("-", ""), + "tube_top": mtd_tube["top_level"], + "screen_top": mtd_tube["filter_top_level"], + "screen_bottom": mtd_tube["filter_bottom_level"], } ) @@ -238,10 +297,10 @@ def get_metadata_tube(metadata_mw, tube_nr): transformer = Transformer.from_crs("WGS84", "EPSG:28992") metadata["x"], metadata["y"] = transformer.transform(lat, lon) - if not metadata_tube["timeseries"]: + if not mtd_tube["timeseries"]: metadata["timeseries_type"] = None else: - for series in metadata_tube["timeseries"]: + for series in mtd_tube["timeseries"]: series_info = requests.get(series).json() if series_info["name"] == "WNS9040.hand": metadata["uuid_hand"] = series_info["uuid"] @@ -382,8 +441,6 @@ def _combine_timeseries(hand_measurements, diver_measurements): measurements = measurements.loc[ :, ["value_hand", "value_diver", "flag_hand", "flag_diver"] ] - measurements.loc[:, "name"] = hand_measurements.loc[:, "name"][0] - measurements.loc[:, "filter_nr"] = hand_measurements.loc[:, "filter_nr"][0] return measurements @@ -413,6 +470,7 @@ def get_timeseries_tube(tube_metadata, tmin, tmax, type_timeseries): metadata_df : dict metadata of the monitoring well """ + if tube_metadata["timeseries_type"] is None: return pd.DataFrame(), tube_metadata @@ -559,18 +617,23 @@ class of the observations, e.g. GroundwaterObs obs_list = [] for code in codes: groundwaterstation_metadata = get_metadata_mw_from_code(code) + tubes = [] if tube_nr == "all": for metadata_tube in groundwaterstation_metadata["filters"]: - tube_nr = int(metadata_tube["code"][-3:]) - o = ObsClass.from_lizard( - code, - tube_nr, - tmin, - tmax, - type_timeseries, - only_metadata=only_metadata, - ) - obs_list.append(o) + tnr = _split_mw_tube_nr(metadata_tube["code"])[-1] + if tnr not in tubes: + logger.info(f"get {code}{tnr}") + o = ObsClass.from_lizard( + code, + tnr, + tmin, + tmax, + type_timeseries, + only_metadata=only_metadata, + ) + obs_list.append(o) + tubes.append(tnr) + else: o = ObsClass.from_lizard( code, tube_nr, tmin, tmax, type_timeseries, only_metadata=only_metadata diff --git a/tests/test_013_lizard.py b/tests/test_013_lizard.py index 299666b8..d9402dc7 100644 --- a/tests/test_013_lizard.py +++ b/tests/test_013_lizard.py @@ -14,5 +14,22 @@ def test_extent(): def test_codes(): - oc = hpd.read_lizard(codes="27BP0003") + oc = hpd.read_lizard( + codes=["39F-0735", "39F-0736", "39F-0737"], type_timeseries="merge" + ) assert not oc.empty + + +def test_many_tubed_well(): + + oc = hpd.read_lizard(codes="EEWP004", tube_nr="all") + + +def test_complex_well(): + + oc = hpd.read_lizard(codes="BUWP014", tube_nr="all") + + +def test_combine(): + + hpd.GroundwaterObs.from_lizard("39F-0736", tube_nr=1, type_timeseries="combine") From e9ba85980ac4879466e5548e0d63bd2d782ce5e7 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Fri, 1 Mar 2024 16:32:14 +0100 Subject: [PATCH 5/8] fix flake8 --- hydropandas/io/lizard.py | 6 ++++-- tests/test_013_lizard.py | 2 ++ tests/{test_013_solinst.py => test_014_solinst.py} | 0 3 files changed, 6 insertions(+), 2 deletions(-) rename tests/{test_013_solinst.py => test_014_solinst.py} (100%) diff --git a/hydropandas/io/lizard.py b/hydropandas/io/lizard.py index 818e5eed..694e1b78 100644 --- a/hydropandas/io/lizard.py +++ b/hydropandas/io/lizard.py @@ -259,7 +259,8 @@ def get_metadata_tube(metadata_mw, tube_nr): # tube has probably been replaced, multiple tubes with the same code and tube nr # merge metadata from all tubes logger.info( - f"there are {len(metadata_tube_list)} instances of {code} and tube {tube_nr}, trying to merge all in one observation object" + f"there are {len(metadata_tube_list)} instances of {code} and tube " + f"{tube_nr}, trying to merge all in one observation object" ) mtd_tube = metadata_tube_list[0].copy() relevant_keys = { @@ -275,7 +276,8 @@ def get_metadata_tube(metadata_mw, tube_nr): if key in ["top_level", "filter_top_level", "filter_bottom_level"]: if val != mtd_tube[key]: logger.warning( - f"multiple {key} values found ({val} & {mtd_tube[key]}) for {code} and tube {tube_nr}, using {mtd_tube[key]}" + f"multiple {key} values found ({val} & {mtd_tube[key]})" + f" for {code} and tube {tube_nr}, using {mtd_tube[key]}" ) # merge time series from all tubes with the same code and tube number elif key == "timeseries": diff --git a/tests/test_013_lizard.py b/tests/test_013_lizard.py index d9402dc7..d7d65e2e 100644 --- a/tests/test_013_lizard.py +++ b/tests/test_013_lizard.py @@ -23,11 +23,13 @@ def test_codes(): def test_many_tubed_well(): oc = hpd.read_lizard(codes="EEWP004", tube_nr="all") + assert not oc.empty def test_complex_well(): oc = hpd.read_lizard(codes="BUWP014", tube_nr="all") + assert not oc.empty def test_combine(): diff --git a/tests/test_013_solinst.py b/tests/test_014_solinst.py similarity index 100% rename from tests/test_013_solinst.py rename to tests/test_014_solinst.py From 5d8d8ee89f4c529447730140d0c99b2437628521 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Mon, 4 Mar 2024 16:13:53 +0100 Subject: [PATCH 6/8] version bump --- hydropandas/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hydropandas/version.py b/hydropandas/version.py index 61fb31ca..3f4edc33 100644 --- a/hydropandas/version.py +++ b/hydropandas/version.py @@ -1 +1 @@ -__version__ = "0.10.0" +__version__ = "0.10.1b" From 067048a0761595be7a740ff92d8729d81b17e5d1 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Thu, 7 Mar 2024 11:41:05 +0100 Subject: [PATCH 7/8] pandas future fixes and get_obs() method for collection --- hydropandas/io/bro.py | 16 +++---- hydropandas/obs_collection.py | 46 +++++++++++++++++++ tests/test_002_obs_objects.py | 84 ++++++++++++++++++++--------------- 3 files changed, 100 insertions(+), 46 deletions(-) diff --git a/hydropandas/io/bro.py b/hydropandas/io/bro.py index 01f1ae4b..7b249d91 100644 --- a/hydropandas/io/bro.py +++ b/hydropandas/io/bro.py @@ -148,14 +148,12 @@ def get_bro_groundwater(bro_id, tube_nr=None, only_metadata=False, **kwargs): empty_df = pd.DataFrame() return empty_df, meta + dfl = [] for i, gld_id in enumerate(gld_ids): - if i == 0: - df, meta_new = measurements_from_gld(gld_id, **kwargs) - meta.update(meta_new) - else: - df_new, meta_new = measurements_from_gld(gld_id, **kwargs) - df = pd.concat([df, df_new], axis=1) - meta.update(meta_new) + df, meta_new = measurements_from_gld(gld_id, **kwargs) + meta.update(meta_new) + dfl.append(df) + df = pd.concat(dfl, axis=0) return df, meta @@ -305,7 +303,7 @@ def measurements_from_gld( # to dataframe df = pd.DataFrame( - index=pd.to_datetime(times), + index=pd.to_datetime(times, utc=True).tz_convert("CET"), data={"values": values, "qualifier": qualifiers}, ) @@ -313,7 +311,7 @@ def measurements_from_gld( if to_wintertime: # remove time zone information by transforming to dutch winter time df.index = pd.to_datetime(df.index, utc=True).tz_localize(None) + pd.Timedelta( - 1, unit="H" + 1, unit="h" ) # duplicates diff --git a/hydropandas/obs_collection.py b/hydropandas/obs_collection.py index 70a2a6da..11a7f482 100644 --- a/hydropandas/obs_collection.py +++ b/hydropandas/obs_collection.py @@ -2031,6 +2031,52 @@ def from_pastastore( } return cls(obs_df, name=pstore.name, meta=meta) + def get_obs(self, name=None, **kwargs): + """get an observation object from a collection + + Parameters + ---------- + name : str or None, optional + name of the observation you want to select, by default None + **kwargs : any metadata, value pair e.g. for a collection of GroundwaterObs: + tube_nr = 1 or source = 'BRO' + + Returns + ------- + hpd.Obs + Observation object from the collection. + + Raises + ------ + ValueError + If multiple observations in the collection match the given attribute values. + ValueError + If no observation in the collection match the given attribute values. + + + """ + + # select by name + if name is None: + selected_obs = self + else: + selected_obs = self.loc[[name]] + + # select by condition + for key, item in kwargs.items(): + condition = selected_obs[key] == item + selected_obs = selected_obs.loc[condition] + + # return an Obs objet + if len(selected_obs) == 1: + return selected_obs["obs"].values[0] + elif len(selected_obs) == 0: + raise ValueError(f"no observations for given conditions") + else: + raise ValueError( + f"multiple observations for given conditions {selected_obs.index}" + ) + def to_excel(self, path, meta_sheet_name="metadata"): """Write an ObsCollection to an excel, the first sheet in the excel contains the metadata, the other tabs are the timeseries of each observation. diff --git a/tests/test_002_obs_objects.py b/tests/test_002_obs_objects.py index ca972f66..5b48ace3 100644 --- a/tests/test_002_obs_objects.py +++ b/tests/test_002_obs_objects.py @@ -1,19 +1,11 @@ import numpy as np import pandas as pd +import pytest import hydropandas as hpd -# import sys -# sys.path.insert(1, "..") - -# TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -# PROJECT_DIR = os.path.abspath(os.path.join(TEST_DIR, os.pardir)) -# sys.path.insert(0, PROJECT_DIR) -# os.chdir(TEST_DIR) - - -def test_groundwater_obs(name="groundwaterobs_001", tube_nr=2): +def _get_groundwater_obs(name="groundwaterobs_001", tube_nr=2): df = pd.DataFrame( index=pd.date_range("2020-1-1", "2020-1-10"), data={"values": np.random.rand(10)}, @@ -41,7 +33,7 @@ def test_groundwater_obs(name="groundwaterobs_001", tube_nr=2): return o -def test_waterlvl_obs(): +def _get_waterlvl_obs(): df = pd.DataFrame( index=pd.date_range("2020-1-1", "2020-1-10"), data={"values": np.random.rand(10)}, @@ -60,6 +52,16 @@ def test_waterlvl_obs(): return o +def _obscollection_from_list(): + o_list = [] + for i in range(10): + o_list.append(_get_groundwater_obs(name=f"groundwaterobs_00{i}", tube_nr=i)) + + oc = hpd.ObsCollection.from_list(o_list) + + return oc + + def test_groundwater_quality_obs(): df = pd.DataFrame( index=pd.date_range("2020-1-1", "2020-1-10"), data={"pH": np.random.rand(10)} @@ -75,25 +77,15 @@ def test_groundwater_quality_obs(): ) -def test_obscollection_from_list(): - o_list = [] - for i in range(10): - o_list.append(test_groundwater_obs(name=f"groundwaterobs_00{i}", tube_nr=i)) - - oc = hpd.ObsCollection.from_list(o_list) - - return oc - - def test_add_meta_to_df(): - oc = test_obscollection_from_list() + oc = _obscollection_from_list() oc.add_meta_to_df(key="all") assert "info" in oc.columns, "unexpected result for add_meta_to_df" def test_copy_obs(): - o = test_groundwater_obs(name="groundwaterobs_001", tube_nr=2) + o = _get_groundwater_obs(name="groundwaterobs_001", tube_nr=2) o2 = o.copy() o.meta["hello"] = "world" @@ -133,10 +125,10 @@ def test_convert_waterlvl_groundwater_obs(): def test_merge_observations_same_timeseries(): # base - o = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10) + o = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10) # observation with different metadata, same time series - o2 = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10) + o2 = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10) o2.iloc[:, 0] = o.iloc[:, 0] omerged = o.merge_observation(o2, merge_metadata=False) @@ -146,7 +138,7 @@ def test_merge_observations_same_timeseries(): def test_merge_observations_different_timeseries(): # base - o = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10) + o = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10) # observation with different time series o2 = o.copy() @@ -163,7 +155,7 @@ def test_merge_observations_different_timeseries(): def test_merge_overlapping(): # base - o = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10) + o = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10) # observation with partially overlapping time series and extra columns o2 = o.copy() @@ -182,29 +174,47 @@ def test_merge_overlapping(): def test_merge_errors(): # base - o = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10) + o = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10) # observation with partially overlapping time series and extra columns - o2 = test_waterlvl_obs() + o2 = _get_waterlvl_obs() - try: + with pytest.raises(TypeError): o.merge_observation(o2) - except TypeError: - return - - raise RuntimeError("function should raise an error") def test_add_observation_to_oc(): - oc = test_obscollection_from_list() + oc = _obscollection_from_list() - o = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10) + o = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10) oc.add_observation(o) def test_interpolate_obscollection(): - oc = test_obscollection_from_list() + oc = _obscollection_from_list() xy = [[500, 11000], [9000, 18000]] oc.interpolate(xy) + + +def test_get_obs(): + oc = _obscollection_from_list() + + # by name + o = oc.get_obs(name="groundwaterobs_001") + assert isinstance(o, hpd.GroundwaterObs) + assert o.name == "groundwaterobs_001" + + # by attributes + o = oc.get_obs(monitoring_well="groundwaterobs", tube_nr=2) + assert isinstance(o, hpd.GroundwaterObs) + assert o.tube_nr == 2 + + # multiple observations + with pytest.raises(ValueError): + oc.get_obs(monitoring_well="groundwaterobs") + + # no observations + with pytest.raises(ValueError): + oc.get_obs(monitoring_well="I do not exist") From 2e2749ffcf03f50eaebadf4bfee1231397148e29 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Thu, 7 Mar 2024 11:50:31 +0100 Subject: [PATCH 8/8] flake 8 --- hydropandas/obs_collection.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hydropandas/obs_collection.py b/hydropandas/obs_collection.py index 11a7f482..26dbb7f2 100644 --- a/hydropandas/obs_collection.py +++ b/hydropandas/obs_collection.py @@ -2052,8 +2052,6 @@ def get_obs(self, name=None, **kwargs): If multiple observations in the collection match the given attribute values. ValueError If no observation in the collection match the given attribute values. - - """ # select by name @@ -2071,7 +2069,7 @@ def get_obs(self, name=None, **kwargs): if len(selected_obs) == 1: return selected_obs["obs"].values[0] elif len(selected_obs) == 0: - raise ValueError(f"no observations for given conditions") + raise ValueError("no observations for given conditions") else: raise ValueError( f"multiple observations for given conditions {selected_obs.index}"