diff --git a/hydropandas/io/bro.py b/hydropandas/io/bro.py index 01f1ae4b..7b249d91 100644 --- a/hydropandas/io/bro.py +++ b/hydropandas/io/bro.py @@ -148,14 +148,12 @@ def get_bro_groundwater(bro_id, tube_nr=None, only_metadata=False, **kwargs): empty_df = pd.DataFrame() return empty_df, meta + dfl = [] for i, gld_id in enumerate(gld_ids): - if i == 0: - df, meta_new = measurements_from_gld(gld_id, **kwargs) - meta.update(meta_new) - else: - df_new, meta_new = measurements_from_gld(gld_id, **kwargs) - df = pd.concat([df, df_new], axis=1) - meta.update(meta_new) + df, meta_new = measurements_from_gld(gld_id, **kwargs) + meta.update(meta_new) + dfl.append(df) + df = pd.concat(dfl, axis=0) return df, meta @@ -305,7 +303,7 @@ def measurements_from_gld( # to dataframe df = pd.DataFrame( - index=pd.to_datetime(times), + index=pd.to_datetime(times, utc=True).tz_convert("CET"), data={"values": values, "qualifier": qualifiers}, ) @@ -313,7 +311,7 @@ def measurements_from_gld( if to_wintertime: # remove time zone information by transforming to dutch winter time df.index = pd.to_datetime(df.index, utc=True).tz_localize(None) + pd.Timedelta( - 1, unit="H" + 1, unit="h" ) # duplicates diff --git a/hydropandas/obs_collection.py b/hydropandas/obs_collection.py index 70a2a6da..26dbb7f2 100644 --- a/hydropandas/obs_collection.py +++ b/hydropandas/obs_collection.py @@ -2031,6 +2031,50 @@ def from_pastastore( } return cls(obs_df, name=pstore.name, meta=meta) + def get_obs(self, name=None, **kwargs): + """get an observation object from a collection + + Parameters + ---------- + name : str or None, optional + name of the observation you want to select, by default None + **kwargs : any metadata, value pair e.g. for a collection of GroundwaterObs: + tube_nr = 1 or source = 'BRO' + + Returns + ------- + hpd.Obs + Observation object from the collection. + + Raises + ------ + ValueError + If multiple observations in the collection match the given attribute values. + ValueError + If no observation in the collection match the given attribute values. + """ + + # select by name + if name is None: + selected_obs = self + else: + selected_obs = self.loc[[name]] + + # select by condition + for key, item in kwargs.items(): + condition = selected_obs[key] == item + selected_obs = selected_obs.loc[condition] + + # return an Obs objet + if len(selected_obs) == 1: + return selected_obs["obs"].values[0] + elif len(selected_obs) == 0: + raise ValueError("no observations for given conditions") + else: + raise ValueError( + f"multiple observations for given conditions {selected_obs.index}" + ) + def to_excel(self, path, meta_sheet_name="metadata"): """Write an ObsCollection to an excel, the first sheet in the excel contains the metadata, the other tabs are the timeseries of each observation. diff --git a/tests/test_002_obs_objects.py b/tests/test_002_obs_objects.py index ca972f66..5b48ace3 100644 --- a/tests/test_002_obs_objects.py +++ b/tests/test_002_obs_objects.py @@ -1,19 +1,11 @@ import numpy as np import pandas as pd +import pytest import hydropandas as hpd -# import sys -# sys.path.insert(1, "..") - -# TEST_DIR = os.path.dirname(os.path.abspath(__file__)) -# PROJECT_DIR = os.path.abspath(os.path.join(TEST_DIR, os.pardir)) -# sys.path.insert(0, PROJECT_DIR) -# os.chdir(TEST_DIR) - - -def test_groundwater_obs(name="groundwaterobs_001", tube_nr=2): +def _get_groundwater_obs(name="groundwaterobs_001", tube_nr=2): df = pd.DataFrame( index=pd.date_range("2020-1-1", "2020-1-10"), data={"values": np.random.rand(10)}, @@ -41,7 +33,7 @@ def test_groundwater_obs(name="groundwaterobs_001", tube_nr=2): return o -def test_waterlvl_obs(): +def _get_waterlvl_obs(): df = pd.DataFrame( index=pd.date_range("2020-1-1", "2020-1-10"), data={"values": np.random.rand(10)}, @@ -60,6 +52,16 @@ def test_waterlvl_obs(): return o +def _obscollection_from_list(): + o_list = [] + for i in range(10): + o_list.append(_get_groundwater_obs(name=f"groundwaterobs_00{i}", tube_nr=i)) + + oc = hpd.ObsCollection.from_list(o_list) + + return oc + + def test_groundwater_quality_obs(): df = pd.DataFrame( index=pd.date_range("2020-1-1", "2020-1-10"), data={"pH": np.random.rand(10)} @@ -75,25 +77,15 @@ def test_groundwater_quality_obs(): ) -def test_obscollection_from_list(): - o_list = [] - for i in range(10): - o_list.append(test_groundwater_obs(name=f"groundwaterobs_00{i}", tube_nr=i)) - - oc = hpd.ObsCollection.from_list(o_list) - - return oc - - def test_add_meta_to_df(): - oc = test_obscollection_from_list() + oc = _obscollection_from_list() oc.add_meta_to_df(key="all") assert "info" in oc.columns, "unexpected result for add_meta_to_df" def test_copy_obs(): - o = test_groundwater_obs(name="groundwaterobs_001", tube_nr=2) + o = _get_groundwater_obs(name="groundwaterobs_001", tube_nr=2) o2 = o.copy() o.meta["hello"] = "world" @@ -133,10 +125,10 @@ def test_convert_waterlvl_groundwater_obs(): def test_merge_observations_same_timeseries(): # base - o = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10) + o = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10) # observation with different metadata, same time series - o2 = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10) + o2 = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10) o2.iloc[:, 0] = o.iloc[:, 0] omerged = o.merge_observation(o2, merge_metadata=False) @@ -146,7 +138,7 @@ def test_merge_observations_same_timeseries(): def test_merge_observations_different_timeseries(): # base - o = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10) + o = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10) # observation with different time series o2 = o.copy() @@ -163,7 +155,7 @@ def test_merge_observations_different_timeseries(): def test_merge_overlapping(): # base - o = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10) + o = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10) # observation with partially overlapping time series and extra columns o2 = o.copy() @@ -182,29 +174,47 @@ def test_merge_overlapping(): def test_merge_errors(): # base - o = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10) + o = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10) # observation with partially overlapping time series and extra columns - o2 = test_waterlvl_obs() + o2 = _get_waterlvl_obs() - try: + with pytest.raises(TypeError): o.merge_observation(o2) - except TypeError: - return - - raise RuntimeError("function should raise an error") def test_add_observation_to_oc(): - oc = test_obscollection_from_list() + oc = _obscollection_from_list() - o = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10) + o = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10) oc.add_observation(o) def test_interpolate_obscollection(): - oc = test_obscollection_from_list() + oc = _obscollection_from_list() xy = [[500, 11000], [9000, 18000]] oc.interpolate(xy) + + +def test_get_obs(): + oc = _obscollection_from_list() + + # by name + o = oc.get_obs(name="groundwaterobs_001") + assert isinstance(o, hpd.GroundwaterObs) + assert o.name == "groundwaterobs_001" + + # by attributes + o = oc.get_obs(monitoring_well="groundwaterobs", tube_nr=2) + assert isinstance(o, hpd.GroundwaterObs) + assert o.tube_nr == 2 + + # multiple observations + with pytest.raises(ValueError): + oc.get_obs(monitoring_well="groundwaterobs") + + # no observations + with pytest.raises(ValueError): + oc.get_obs(monitoring_well="I do not exist")