Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pandas updates and get_obs method #190

Merged
merged 10 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 7 additions & 9 deletions hydropandas/io/bro.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,14 +148,12 @@ def get_bro_groundwater(bro_id, tube_nr=None, only_metadata=False, **kwargs):
empty_df = pd.DataFrame()
return empty_df, meta

dfl = []
for i, gld_id in enumerate(gld_ids):
if i == 0:
df, meta_new = measurements_from_gld(gld_id, **kwargs)
meta.update(meta_new)
else:
df_new, meta_new = measurements_from_gld(gld_id, **kwargs)
df = pd.concat([df, df_new], axis=1)
meta.update(meta_new)
df, meta_new = measurements_from_gld(gld_id, **kwargs)
meta.update(meta_new)
dfl.append(df)
df = pd.concat(dfl, axis=0)

return df, meta

Expand Down Expand Up @@ -305,15 +303,15 @@ def measurements_from_gld(

# to dataframe
df = pd.DataFrame(
index=pd.to_datetime(times),
index=pd.to_datetime(times, utc=True).tz_convert("CET"),
data={"values": values, "qualifier": qualifiers},
)

# wintertime
if to_wintertime:
# remove time zone information by transforming to dutch winter time
df.index = pd.to_datetime(df.index, utc=True).tz_localize(None) + pd.Timedelta(
1, unit="H"
1, unit="h"
)

# duplicates
Expand Down
44 changes: 44 additions & 0 deletions hydropandas/obs_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -2031,6 +2031,50 @@ def from_pastastore(
}
return cls(obs_df, name=pstore.name, meta=meta)

def get_obs(self, name=None, **kwargs):
"""get an observation object from a collection

Parameters
----------
name : str or None, optional
name of the observation you want to select, by default None
**kwargs : any metadata, value pair e.g. for a collection of GroundwaterObs:
tube_nr = 1 or source = 'BRO'

Returns
-------
hpd.Obs
Observation object from the collection.

Raises
------
ValueError
If multiple observations in the collection match the given attribute values.
ValueError
If no observation in the collection match the given attribute values.
"""

# select by name
if name is None:
selected_obs = self
else:
selected_obs = self.loc[[name]]

# select by condition
for key, item in kwargs.items():
condition = selected_obs[key] == item
selected_obs = selected_obs.loc[condition]

# return an Obs objet
if len(selected_obs) == 1:
return selected_obs["obs"].values[0]
elif len(selected_obs) == 0:
raise ValueError("no observations for given conditions")
else:
raise ValueError(
f"multiple observations for given conditions {selected_obs.index}"
)

def to_excel(self, path, meta_sheet_name="metadata"):
"""Write an ObsCollection to an excel, the first sheet in the excel contains the
metadata, the other tabs are the timeseries of each observation.
Expand Down
84 changes: 47 additions & 37 deletions tests/test_002_obs_objects.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,11 @@
import numpy as np
import pandas as pd
import pytest

import hydropandas as hpd

# import sys
# sys.path.insert(1, "..")


# TEST_DIR = os.path.dirname(os.path.abspath(__file__))
# PROJECT_DIR = os.path.abspath(os.path.join(TEST_DIR, os.pardir))
# sys.path.insert(0, PROJECT_DIR)
# os.chdir(TEST_DIR)


def test_groundwater_obs(name="groundwaterobs_001", tube_nr=2):
def _get_groundwater_obs(name="groundwaterobs_001", tube_nr=2):
df = pd.DataFrame(
index=pd.date_range("2020-1-1", "2020-1-10"),
data={"values": np.random.rand(10)},
Expand Down Expand Up @@ -41,7 +33,7 @@ def test_groundwater_obs(name="groundwaterobs_001", tube_nr=2):
return o


def test_waterlvl_obs():
def _get_waterlvl_obs():
df = pd.DataFrame(
index=pd.date_range("2020-1-1", "2020-1-10"),
data={"values": np.random.rand(10)},
Expand All @@ -60,6 +52,16 @@ def test_waterlvl_obs():
return o


def _obscollection_from_list():
o_list = []
for i in range(10):
o_list.append(_get_groundwater_obs(name=f"groundwaterobs_00{i}", tube_nr=i))

oc = hpd.ObsCollection.from_list(o_list)

return oc


def test_groundwater_quality_obs():
df = pd.DataFrame(
index=pd.date_range("2020-1-1", "2020-1-10"), data={"pH": np.random.rand(10)}
Expand All @@ -75,25 +77,15 @@ def test_groundwater_quality_obs():
)


def test_obscollection_from_list():
o_list = []
for i in range(10):
o_list.append(test_groundwater_obs(name=f"groundwaterobs_00{i}", tube_nr=i))

oc = hpd.ObsCollection.from_list(o_list)

return oc


def test_add_meta_to_df():
oc = test_obscollection_from_list()
oc = _obscollection_from_list()
oc.add_meta_to_df(key="all")

assert "info" in oc.columns, "unexpected result for add_meta_to_df"


def test_copy_obs():
o = test_groundwater_obs(name="groundwaterobs_001", tube_nr=2)
o = _get_groundwater_obs(name="groundwaterobs_001", tube_nr=2)
o2 = o.copy()

o.meta["hello"] = "world"
Expand Down Expand Up @@ -133,10 +125,10 @@ def test_convert_waterlvl_groundwater_obs():

def test_merge_observations_same_timeseries():
# base
o = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10)
o = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10)

# observation with different metadata, same time series
o2 = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10)
o2 = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10)
o2.iloc[:, 0] = o.iloc[:, 0]

omerged = o.merge_observation(o2, merge_metadata=False)
Expand All @@ -146,7 +138,7 @@ def test_merge_observations_same_timeseries():

def test_merge_observations_different_timeseries():
# base
o = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10)
o = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10)

# observation with different time series
o2 = o.copy()
Expand All @@ -163,7 +155,7 @@ def test_merge_observations_different_timeseries():

def test_merge_overlapping():
# base
o = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10)
o = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10)

# observation with partially overlapping time series and extra columns
o2 = o.copy()
Expand All @@ -182,29 +174,47 @@ def test_merge_overlapping():

def test_merge_errors():
# base
o = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10)
o = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10)

# observation with partially overlapping time series and extra columns
o2 = test_waterlvl_obs()
o2 = _get_waterlvl_obs()

try:
with pytest.raises(TypeError):
o.merge_observation(o2)
except TypeError:
return

raise RuntimeError("function should raise an error")


def test_add_observation_to_oc():
oc = test_obscollection_from_list()
oc = _obscollection_from_list()

o = test_groundwater_obs(name="groundwaterobs_010", tube_nr=10)
o = _get_groundwater_obs(name="groundwaterobs_010", tube_nr=10)

oc.add_observation(o)


def test_interpolate_obscollection():
oc = test_obscollection_from_list()
oc = _obscollection_from_list()

xy = [[500, 11000], [9000, 18000]]
oc.interpolate(xy)


def test_get_obs():
oc = _obscollection_from_list()

# by name
o = oc.get_obs(name="groundwaterobs_001")
assert isinstance(o, hpd.GroundwaterObs)
assert o.name == "groundwaterobs_001"

# by attributes
o = oc.get_obs(monitoring_well="groundwaterobs", tube_nr=2)
assert isinstance(o, hpd.GroundwaterObs)
assert o.tube_nr == 2

# multiple observations
with pytest.raises(ValueError):
oc.get_obs(monitoring_well="groundwaterobs")

# no observations
with pytest.raises(ValueError):
oc.get_obs(monitoring_well="I do not exist")
Loading