Skip to content

Commit

Permalink
pandas future fixes and get_obs() method for collection
Browse files Browse the repository at this point in the history
  • Loading branch information
OnnoEbbens committed Mar 7, 2024
1 parent 5d8d8ee commit 067048a
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 46 deletions.
16 changes: 7 additions & 9 deletions hydropandas/io/bro.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,14 +148,12 @@ def get_bro_groundwater(bro_id, tube_nr=None, only_metadata=False, **kwargs):
empty_df = pd.DataFrame()
return empty_df, meta

dfl = []
for i, gld_id in enumerate(gld_ids):
if i == 0:
df, meta_new = measurements_from_gld(gld_id, **kwargs)
meta.update(meta_new)
else:
df_new, meta_new = measurements_from_gld(gld_id, **kwargs)
df = pd.concat([df, df_new], axis=1)
meta.update(meta_new)
df, meta_new = measurements_from_gld(gld_id, **kwargs)
meta.update(meta_new)
dfl.append(df)
df = pd.concat(dfl, axis=0)

return df, meta

Expand Down Expand Up @@ -305,15 +303,15 @@ def measurements_from_gld(

# to dataframe
df = pd.DataFrame(
index=pd.to_datetime(times),
index=pd.to_datetime(times, utc=True).tz_convert("CET"),
data={"values": values, "qualifier": qualifiers},
)

# wintertime
if to_wintertime:
# remove time zone information by transforming to dutch winter time
df.index = pd.to_datetime(df.index, utc=True).tz_localize(None) + pd.Timedelta(
1, unit="H"
1, unit="h"
)

# duplicates
Expand Down
46 changes: 46 additions & 0 deletions hydropandas/obs_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -2031,6 +2031,52 @@ def from_pastastore(
}
return cls(obs_df, name=pstore.name, meta=meta)

def get_obs(self, name=None, **kwargs):
"""get an observation object from a collection
Parameters
----------
name : str or None, optional
name of the observation you want to select, by default None
**kwargs : any metadata, value pair e.g. for a collection of GroundwaterObs:
tube_nr = 1 or source = 'BRO'
Returns
-------
hpd.Obs
Observation object from the collection.
Raises
------
ValueError
If multiple observations in the collection match the given attribute values.
ValueError
If no observation in the collection match the given attribute values.
"""

# select by name
if name is None:
selected_obs = self
else:
selected_obs = self.loc[[name]]

# select by condition
for key, item in kwargs.items():
condition = selected_obs[key] == item
selected_obs = selected_obs.loc[condition]

# return an Obs objet
if len(selected_obs) == 1:
return selected_obs["obs"].values[0]
elif len(selected_obs) == 0:
raise ValueError(f"no observations for given conditions")
else:
raise ValueError(
f"multiple observations for given conditions {selected_obs.index}"
)

def to_excel(self, path, meta_sheet_name="metadata"):
"""Write an ObsCollection to an excel, the first sheet in the excel contains the
metadata, the other tabs are the timeseries of each observation.
Expand Down
84 changes: 47 additions & 37 deletions tests/test_002_obs_objects.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,11 @@
import numpy as np
import pandas as pd
import pytest

import hydropandas as hpd

# import sys
# sys.path.insert(1, "..")


# TEST_DIR = os.path.dirname(os.path.abspath(__file__))
# PROJECT_DIR = os.path.abspath(os.path.join(TEST_DIR, os.pardir))
# sys.path.insert(0, PROJECT_DIR)
# os.chdir(TEST_DIR)


def test_groundwater_obs(name="groundwaterobs_001", tube_nr=2):
def _get_groundwater_obs(name="groundwaterobs_001", tube_nr=2):
df = pd.DataFrame(
index=pd.date_range("2020-1-1", "2020-1-10"),
data={"values": np.random.rand(10)},
Expand Down Expand Up @@ -41,7 +33,7 @@ def test_groundwater_obs(name="groundwaterobs_001", tube_nr=2):
return o


def test_waterlvl_obs():
def _get_waterlvl_obs():
df = pd.DataFrame(
index=pd.date_range("2020-1-1", "2020-1-10"),
data={"values": np.random.rand(10)},
Expand All @@ -60,6 +52,16 @@ def test_waterlvl_obs():
return o


def _obscollection_from_list():
o_list = []
for i in range(10):
o_list.append(_get_groundwater_obs(name=f"groundwaterobs_00{i}", tube_nr=i))

oc = hpd.ObsCollection.from_list(o_list)

return oc


def test_groundwater_quality_obs():
df = pd.DataFrame(
index=pd.date_range("2020-1-1", "2020-1-10"), data={"pH": np.random.rand(10)}
Expand All @@ -75,25 +77,15 @@ def test_groundwater_quality_obs():
)


def test_obscollection_from_list():
o_list = []
for i in range(10):
o_list.append(test_groundwater_obs(name=f"groundwaterobs_00{i}", tube_nr=i))

oc = hpd.ObsCollection.from_list(o_list)

return oc


def test_add_meta_to_df():
oc = test_obscollection_from_list()
oc = _obscollection_from_list()
oc.add_meta_to_df(key="all")

assert "info" in oc.columns, "unexpected result for add_meta_to_df"


def test_copy_obs():
o = test_groundwater_obs(name="groundwaterobs_001", tube_nr=2)
o = _get_groundwater_obs(name="groundwaterobs_001", tube_nr=2)
o2 = o.copy()

o.meta["hello"] = "world"
Expand Down Expand Up @@ -133,10 +125,10 @@ def test_convert_waterlvl_groundwater_obs():

def test_merge_observations_same_timeseries():
# base
o = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10)
o = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10)

# observation with different metadata, same time series
o2 = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10)
o2 = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10)
o2.iloc[:, 0] = o.iloc[:, 0]

omerged = o.merge_observation(o2, merge_metadata=False)
Expand All @@ -146,7 +138,7 @@ def test_merge_observations_same_timeseries():

def test_merge_observations_different_timeseries():
# base
o = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10)
o = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10)

# observation with different time series
o2 = o.copy()
Expand All @@ -163,7 +155,7 @@ def test_merge_observations_different_timeseries():

def test_merge_overlapping():
# base
o = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10)
o = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10)

# observation with partially overlapping time series and extra columns
o2 = o.copy()
Expand All @@ -182,29 +174,47 @@ def test_merge_overlapping():

def test_merge_errors():
# base
o = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10)
o = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10)

# observation with partially overlapping time series and extra columns
o2 = test_waterlvl_obs()
o2 = _get_waterlvl_obs()

try:
with pytest.raises(TypeError):
o.merge_observation(o2)
except TypeError:
return

raise RuntimeError("function should raise an error")


def test_add_observation_to_oc():
oc = test_obscollection_from_list()
oc = _obscollection_from_list()

o = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10)
o = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10)

oc.add_observation(o)


def test_interpolate_obscollection():
oc = test_obscollection_from_list()
oc = _obscollection_from_list()

xy = [[500, 11000], [9000, 18000]]
oc.interpolate(xy)


def test_get_obs():
oc = _obscollection_from_list()

# by name
o = oc.get_obs(name="groundwaterobs_001")
assert isinstance(o, hpd.GroundwaterObs)
assert o.name == "groundwaterobs_001"

# by attributes
o = oc.get_obs(monitoring_well="groundwaterobs", tube_nr=2)
assert isinstance(o, hpd.GroundwaterObs)
assert o.tube_nr == 2

# multiple observations
with pytest.raises(ValueError):
oc.get_obs(monitoring_well="groundwaterobs")

# no observations
with pytest.raises(ValueError):
oc.get_obs(monitoring_well="I do not exist")

0 comments on commit 067048a

Please sign in to comment.