Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to 0.9.2 #159

Merged
merged 16 commits into from
Oct 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,16 @@ sphinx:
# Optionally build your docs in additional formats such as PDF and ePub
formats: all

# Optionally set the version of Python and requirements required to build your docs
# Set the OS, Python version and other tools you might need
build:
os: ubuntu-22.04
tools:
python: "3.11"

# Optionally set the Python requirements required to build your docs
python:
version: 3.8
install:
- method: pip
path: .
extra_requirements:
- "rtd"

build:
image: latest
- "rtd"
3 changes: 3 additions & 0 deletions docs/examples/05_bronhouderportaal_bro.nblink
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"path": "../../examples/05_bronhouderportaal_bro.ipynb"
}
3 changes: 2 additions & 1 deletion examples/01_groundwater_observations.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@
"source": [
"# reading a dino csv file\n",
"path = \"data/Grondwaterstanden_Put/B33F0080001_1.csv\"\n",
"gw_dino = hpd.GroundwaterObs.from_dino(path=path)"
"gw_dino = hpd.GroundwaterObs.from_dino(path=path)\n",
"gw_dino"
]
},
{
Expand Down
22 changes: 18 additions & 4 deletions hydropandas/io/bro.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@
import numpy as np
import pandas as pd
import requests
from pyproj import Transformer
from pyproj import Proj, Transformer
from tqdm import tqdm

from ..util import EPSG_28992

logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -422,7 +424,7 @@ def get_full_metadata_from_gmw(bro_id, tube_nr):
return meta


def get_metadata_from_gmw(bro_id, tube_nr):
def get_metadata_from_gmw(bro_id, tube_nr, epsg=28992):
"""get selection of metadata for a groundwater monitoring well.
coordinates, ground_level, tube_top and tube screen

Expand Down Expand Up @@ -473,8 +475,20 @@ def get_metadata_from_gmw(bro_id, tube_nr):
meta = {"monitoring_well": bro_id, "tube_nr": tube_nr, "source": "BRO"}

# x and y
xy = gmw.find("dsgmw:deliveredLocation//gmwcommon:location//gml:pos", ns)
meta["x"], meta["y"] = [float(val) for val in xy.text.split()]
xy_elem = gmw.find("dsgmw:deliveredLocation//gmwcommon:location//gml:pos", ns)
xy = [float(val) for val in xy_elem.text.split()]

# convert crs
srsname = gmw.find("dsgmw:deliveredLocation//gmwcommon:location", ns).attrib[
"srsName"
]
epsg_gwm = int(srsname.split(":")[-1])
proj_from = Proj(f"EPSG:{epsg_gwm}")
proj_to = Proj(EPSG_28992)
transformer = Transformer.from_proj(proj_from, proj_to)
xy = transformer.transform(xy[0], xy[1])

meta["x"], meta["y"] = xy

# ground_level
vert_pos = gmw.find("dsgmw:deliveredVerticalPosition", ns)
Expand Down
87 changes: 53 additions & 34 deletions hydropandas/io/knmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
URL_DAILY_PREC = "https://www.daggegevens.knmi.nl/klimatologie/monv/reeksen"
URL_DAILY_METEO = "https://www.daggegevens.knmi.nl/klimatologie/daggegevens"
URL_HOURLY_METEO = "https://www.daggegevens.knmi.nl/klimatologie/uurgegevens"
LOOK_BACK_DAYS = 365


def get_knmi_obs(
Expand All @@ -53,7 +52,7 @@ def get_knmi_obs(
RD coördinates of a location in the Netherlands. The station nearest
to this location used. The Default is None.
meteo_var : str or None, optional
meteo variable e.g. "RH" or "EV24". See list with al options in the
meteo variable e.g. "RH" or "EV24". See list with all options in the
hydropandas documentation.
start : str, datetime or None, optional
start date of observations. The default is None.
Expand Down Expand Up @@ -98,9 +97,12 @@ def get_knmi_obs(
if stn is not None:
stn = int(stn)

start_str = str(start).replace(" 00:00:00", "")
end_str = str(end).replace(" 00:00:00", "")

logger.info(
f"get KNMI data from station {stn} and meteo variable {meteo_var}"
f"from {start} to {end}"
f"get data from station {stn} and variable {meteo_var} "
f"from {start_str} to {end_str}"
)
ts, meta = get_knmi_timeseries_stn(stn, meteo_var, settings, start, end)
elif fname is not None:
Expand Down Expand Up @@ -173,7 +175,7 @@ def _get_default_settings(settings=None):
only the non-existing settings are added with their default value.

The default settings are:
fill_missing_obs = True
fill_missing_obs = False
nan values in time series are filled with nearby time series.
interval = 'daily'
desired time interval for observations. Can be 'daily' or 'hourly'.
Expand Down Expand Up @@ -433,18 +435,6 @@ def fill_missing_measurements(stn, meteo_var, start, end, settings, stn_name=Non
if stn_name is None:
stn_name = get_station_name(stn=stn, stations=stations)

# check latest date at which measurements are available at De Bilt
if (meteo_var in ["RD", "RH"]) and (
end > (dt.datetime.now() - pd.Timedelta(LOOK_BACK_DAYS, unit="D"))
):
end = min(
end,
_check_latest_measurement_date_RD_debilt(
meteo_var, use_api=settings["use_api"]
),
)
logger.info(f'changing end_date to {end.strftime("%Y-%m-%d")}')

# download data from station
knmi_df, variables, station_meta = download_knmi_data(
stn, meteo_var, start, end, settings, stn_name
Expand Down Expand Up @@ -473,11 +463,23 @@ def fill_missing_measurements(stn, meteo_var, start, end, settings, stn_name=Non
)
ignore.append(stn)

if end > knmi_df.index[-1]:
# check latest date at which measurements are available at De Bilt
new_end = _check_latest_measurement_date_de_bilt(
meteo_var,
use_api=settings["use_api"],
start=start if knmi_df.empty else knmi_df.index[-1],
end=end,
)
if new_end < end:
end = new_end
logger.warning(f'changing end_date to {end.strftime("%Y-%m-%d")}')

# find missing values
knmi_df = _add_missing_indices(knmi_df, stn, start, end)

missing = knmi_df[meteo_var].isna()
logger.info(f"station {stn} has {missing.sum()} missing measurements")
logger.debug(f"station {stn} has {missing.sum()} missing measurements")

knmi_df.loc[~missing, "station"] = str(stn)

Expand All @@ -487,20 +489,22 @@ def fill_missing_measurements(stn, meteo_var, start, end, settings, stn_name=Non
stations.loc[[stn]], meteo_var=meteo_var, ignore=ignore
)

logger.info(
f"trying to fill {missing.sum()} " f"measurements with station {stn_comp}"
)

if stn_comp is None:
logger.info(
"could not fill all missing measurements there are "
"could not fill all missing measurements as there are "
"no stations left to check"
)

missing[:] = False
break
else:
stn_comp = stn_comp[0]

n_missing = missing.sum()
logger.info(
f"trying to fill {n_missing} missing measurements with station {stn_comp}"
)

stn_name_comp = get_station_name(stn_comp, stations)
knmi_df_comp, _, __ = download_knmi_data(
stn_comp, meteo_var, start, end, settings, stn_name_comp
Expand Down Expand Up @@ -583,8 +587,8 @@ def download_knmi_data(stn, meteo_var, start, end, settings, stn_name=None):
information about the measurement station.
"""

logger.info(
f"download knmi {meteo_var} data from station "
logger.debug(
f"download KNMI {meteo_var} data from station "
f"{stn}-{stn_name} between {start} and {end}"
)

Expand Down Expand Up @@ -639,7 +643,7 @@ def download_knmi_data(stn, meteo_var, start, end, settings, stn_name=None):
raise ValueError(e)

if knmi_df.empty:
logger.info(
logger.debug(
"no measurements found for station "
f"{stn}-{stn_name} between {start} and {end}"
)
Expand Down Expand Up @@ -1376,7 +1380,9 @@ def read_knmi_hourly(f, meteo_var, start=None, end=None):
return df.loc[start:end, [meteo_var]], variables


def _check_latest_measurement_date_RD_debilt(meteo_var, use_api=True):
def _check_latest_measurement_date_de_bilt(
meteo_var, use_api=True, start=None, end=None
):
"""According to the website of the knmi it can take up to 3 weeks before
precipitation data is updated. If you use the fill_missing_measurements
method to fill a time series untill today, it will keep looking at all
Expand All @@ -1396,15 +1402,22 @@ def _check_latest_measurement_date_RD_debilt(meteo_var, use_api=True):
if True the api is used to obtain the data, API documentation is here:
https://www.knmi.nl/kennis-en-datacentrum/achtergrond/data-ophalen-vanuit-een-script
Default is True.
start : pd.TimeStamp or None, optional
start date of observations. Set to 365 days before today when None. The default
is None.
end : pd.TimeStamp or None, optional
end date of observations. Set to 10 days after today when None. The default is
None.

Returns
-------
last_measurement_date_debilt : pd.TimeStamp
last date with measurements at station de Bilt
"""

start = dt.datetime.now() - pd.Timedelta(LOOK_BACK_DAYS, unit="D")
end = dt.datetime.now() + pd.Timedelta(10, unit="D")
if start is None:
start = dt.datetime.now() - pd.Timedelta(365, unit="D")
if end is None:
end = dt.datetime.now() + pd.Timedelta(10, unit="D")
if meteo_var == "RD":
if use_api:
try:
Expand All @@ -1427,16 +1440,18 @@ def _check_latest_measurement_date_RD_debilt(meteo_var, use_api=True):
knmi_df, _, _ = get_knmi_daily_meteo_url(260, meteo_var, start, end)

knmi_df = knmi_df.dropna()
end_str = end.strftime("%Y-%m-%d")
if knmi_df.empty:
start_str = start.strftime("%Y-%m-%d")
raise ValueError(
"knmi station de Bilt has no measurements "
f"in the past {LOOK_BACK_DAYS} days for variable {meteo_var}."
"knmi station de Bilt has no measurements between "
f"{start_str} and {end_str} for variable {meteo_var}."
)

last_measurement_date_debilt = knmi_df.index[-1]

logger.debug(
f"last {meteo_var} measurement available at the Bilt is from"
f"last {meteo_var} measurement available at the Bilt until {end_str} is from"
f' {last_measurement_date_debilt.strftime("%Y-%m-%d")}'
)
logger.debug(
Expand Down Expand Up @@ -1700,6 +1715,9 @@ class of the observations, can be PrecipitationObs or

settings = _get_default_settings(kwargs)

if isinstance(meteo_vars, str):
meteo_vars = [meteo_vars]

if starts is None:
starts = [None] * len(meteo_vars)
elif isinstance(starts, (str, dt.datetime)):
Expand Down Expand Up @@ -1738,7 +1756,8 @@ class of the observations, can be PrecipitationObs or
)
else:
raise ValueError(
"stns, location and x are all None" "please specify one of these"
"stns, location and xy are all None. "
"Please specify one of these arguments."
)
_stns = np.unique(_stns)

Expand Down
6 changes: 3 additions & 3 deletions hydropandas/io/pastas.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ def _get_metadata_from_obs(o):
elif isinstance(v, numbers.Number):
meta[k] = float(v)
else:
logger.info(
logger.debug(
f"did not add {k} to metadata because datatype is {type(v)}"
)
else:
logger.info(
logger.debug(
f"did not add {attr_key} to metadata because datatype is {type(val)}"
)

Expand Down Expand Up @@ -95,7 +95,7 @@ def create_pastastore(
pstore = pst.PastaStore(name=pstore_name, connector=conn)

for o in oc.obs.values:
logger.info("add to pastastore -> {}".format(o.name))
logger.debug("add to pastastore -> {}".format(o.name))

if add_metadata:
meta = _get_metadata_from_obs(o)
Expand Down
32 changes: 12 additions & 20 deletions hydropandas/obs_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1905,7 +1905,7 @@ def to_pi_xml(self, fname, timezone="", version="1.24"):

fews.write_pi_xml(self, fname, timezone=timezone, version=version)

def to_gdf(self, xcol="x", ycol="y"):
def to_gdf(self, xcol="x", ycol="y", crs=28992, drop_obs=True):
"""convert ObsCollection to GeoDataFrame.

Parameters
Expand All @@ -1914,31 +1914,23 @@ def to_gdf(self, xcol="x", ycol="y"):
column name with x values
ycol : str
column name with y values
crs : int, optional
coordinate reference system, by default 28992 (RD new).
drop_obs : bool, optional
drop the column with observations. Useful for basic geodataframe
manipulations that require JSON serializable columns. The default
is True.

Returns
-------
gdf : geopandas.GeoDataFrame
"""
return util.df2gdf(self, xcol, ycol)

def to_report_table(
self,
columns=(
"monitoring_well",
"tube_nr",
"startdate",
"enddate",
"# measurements",
),
):
if "startdate" in columns:
self["startdate"] = self.obs.apply(lambda x: x.index[0])
if "enddate" in columns:
self["enddate"] = self.obs.apply(lambda x: x.index[-1])
if "# measurements" in columns:
self["# measurements"] = self.obs.apply(lambda x: x.shape[0])

return self[columns]
gdf = util.df2gdf(self, xcol=xcol, ycol=ycol, crs=crs)
if drop_obs:
return gdf.drop(columns="obs")
else:
return gdf

def to_pastastore(
self,
Expand Down
Loading