ArtesiaWater · dbrakenhoff · Jan 13, 2025 · Sep 27, 2024 · Sep 27, 2024 · Sep 27, 2024
diff --git a/.devcontainer/setup.sh b/.devcontainer/setup.sh
@@ -1,4 +1,4 @@
 #!/bin/bash
 
 # Create model environments
-pip install -e .
+pip install -e .[full]
diff --git a/docs/examples/07_fews.nblink b/docs/examples/07_fews.nblink
@@ -0,0 +1,3 @@
+{
+    "path": "../../examples/07_fews.ipynb"
+}
diff --git a/examples/07_fews.ipynb b/examples/07_fews.ipynb
diff --git a/hydropandas/extensions/geo.py b/hydropandas/extensions/geo.py
@@ -232,8 +232,7 @@ def _get_nearest_geometry(
                     )
                 else:
                     raise ValueError(
-                        "invalid value for multiple_geometries ->"
-                        f"{multiple_geometries}"
+                        f"invalid value for multiple_geometries ->{multiple_geometries}"
                     )
             else:
                 gdf_obs.loc[i, f"nearest {geometry_type}"] = gdf.iloc[

diff --git a/hydropandas/extensions/gwobs.py b/hydropandas/extensions/gwobs.py
@@ -179,28 +179,26 @@ def get_modellayer_from_screen_depth(ftop, fbot, zvec, left=-999, right=999):
 
     else:
         if lay_fbot == left and lay_ftop == right:
-            logger.debug("- tube screen spans all layers. " "return nan")
+            logger.debug("- tube screen spans all layers. return nan")
             return np.nan
         elif lay_ftop == right:
             logger.debug(
-                "- tube screen top higher than top layer. " f"selected layer {lay_fbot}"
+                f"- tube screen top higher than top layer. selected layer {lay_fbot}"
             )
             return lay_fbot
 
         elif lay_fbot == left:
             logger.debug(
-                "- tube screen bot lower than bottom layer. "
-                f"selected layer {lay_ftop}"
+                f"- tube screen bot lower than bottom layer. selected layer {lay_ftop}"
             )
             return lay_ftop
 
         logger.debug(
-            "- tube screen crosses layer boundary:\n"
-            f"  - layers: {lay_ftop}, {lay_fbot}"
+            f"- tube screen crosses layer boundary:\n  - layers: {lay_ftop}, {lay_fbot}"
         )
 
         logger.debug(
-            f"- tube screen spans {lay_fbot - lay_ftop +1} layers."
+            f"- tube screen spans {lay_fbot - lay_ftop + 1} layers."
             " checking length per layer\n"
             "  - length per layer:"
         )
@@ -215,7 +213,7 @@ def get_modellayer_from_screen_depth(ftop, fbot, zvec, left=-999, right=999):
             else:
                 length_layers[i] = zvec[lay_ftop + i] - zvec[lay_ftop + 1 + i]
 
-            logger.debug(f"    - lay {lay_ftop+i}: {length_layers[i]:.2f}")
+            logger.debug(f"    - lay {lay_ftop + i}: {length_layers[i]:.2f}")
 
         # choose layer with biggest length
         rel_layer = np.argmax(length_layers)

diff --git a/hydropandas/io/bro.py b/hydropandas/io/bro.py
@@ -477,7 +477,7 @@ def _get_gmw_from_bro_id(bro_id, retries=0):
         valid = req.text[(val_ind + 9) : (val_ind + 14)]
         if valid == "false" and retries < max_retries:
             logger.debug(
-                f"got invalid response for {bro_id}, trying again {retries+1}/{max_retries}"
+                f"got invalid response for {bro_id}, trying again {retries + 1}/{max_retries}"
             )
             return _get_gmw_from_bro_id(bro_id, retries=retries + 1)
         elif valid == "false":

diff --git a/hydropandas/io/bronhouderportaal_bro.py b/hydropandas/io/bronhouderportaal_bro.py
@@ -33,7 +33,7 @@ def get_tube_nrs_from_xml(tree, ns):
     # get numbers of individual filters from XML file
     all_tube_nrs = []
     tubes = tree.findall(
-        "isgmw:sourceDocument//" "isgmw:GMW_Construction//" "isgmw:monitoringTube", ns
+        "isgmw:sourceDocument//isgmw:GMW_Construction//isgmw:monitoringTube", ns
     )
     for tube in tubes:
         all_tube_nrs.append(int(tube.find("isgmw:tubeNumber", ns).text))
@@ -200,7 +200,7 @@ def get_metadata_from_gmw(path_xml, tube_nr, full_meta=False):
             meta["deliveryContext"] = GMW_c.find("isgmw:deliveryContext", ns).text
         if GMW_c.find("isgmw:constructionStandard", ns) is not None:
             meta["constructionStandard"] = GMW_c.find(
-                "isgmw:construction" "Standard", ns
+                "isgmw:constructionStandard", ns
             ).text
         if GMW_c.find("isgmw:initialFunction", ns) is not None:
             meta["initialFunction"] = GMW_c.find("isgmw:initialFunction", ns).text
@@ -220,12 +220,12 @@ def get_metadata_from_gmw(path_xml, tube_nr, full_meta=False):
     if full_meta:
         if (
             GMW_c.find(
-                "isgmw:deliveredLocation//" "gmwcommon:horizontalPositioningMethod", ns
+                "isgmw:deliveredLocation//gmwcommon:horizontalPositioningMethod", ns
             )
             is not None
         ):
             meta["horizontalPositioningMethod"] = GMW_c.find(
-                "isgmw:deliveredLocation//" "gmwcommon:horizontalPositioningMethod", ns
+                "isgmw:deliveredLocation//gmwcommon:horizontalPositioningMethod", ns
             ).text
         if (
             GMW_c.find(
@@ -241,13 +241,11 @@ def get_metadata_from_gmw(path_xml, tube_nr, full_meta=False):
                 ns,
             ).text
         if (
-            GMW_c.find("isgmw:deliveredVerticalPosition//" "gmwcommon:offset", ns)
+            GMW_c.find("isgmw:deliveredVerticalPosition//gmwcommon:offset", ns)
             is not None
         ):
             meta["deliveredVerticalPosition_offset"] = float(
-                GMW_c.find(
-                    "isgmw:deliveredVerticalPosition//" "gmwcommon:offset", ns
-                ).text
+                GMW_c.find("isgmw:deliveredVerticalPosition//gmwcommon:offset", ns).text
             )
         if (
             GMW_c.find(
@@ -265,15 +263,15 @@ def get_metadata_from_gmw(path_xml, tube_nr, full_meta=False):
 
     # ground_level
     glp_xml = GMW_c.find(
-        "isgmw:deliveredVerticalPosition//" "gmwcommon:groundLevelPosition", ns
+        "isgmw:deliveredVerticalPosition//gmwcommon:groundLevelPosition", ns
     )
     vert_datum = GMW_c.find(
-        "isgmw:deliveredVerticalPosition//" "gmwcommon:verticalDatum", ns
+        "isgmw:deliveredVerticalPosition//gmwcommon:verticalDatum", ns
     ).text
     meta["unit"] = glp_xml.attrib["uom"] + " " + vert_datum
     if glp_xml.attrib["uom"].lower() != "m":
         logger.info(
-            f'groundlevel unit is unexpected {glp_xml.attrib["uom"]}, ' "m is expected"
+            f"groundlevel unit is unexpected {glp_xml.attrib['uom']}, m is expected"
         )
     if vert_datum.lower() != "nap":
         logger.info(f"datum has unexpected value {vert_datum}, NAP is expected")
@@ -315,7 +313,7 @@ def get_metadata_from_gmw(path_xml, tube_nr, full_meta=False):
                 "isgmw:tubeTopPositioningMethod", ns
             ).text
         if (
-            tube.find("isgmw:materialUsed//" "gmwcommon:tubePackingMaterial", ns)
+            tube.find("isgmw:materialUsed//gmwcommon:tubePackingMaterial", ns)
             is not None
         ):
             meta["tubePackingMaterial"] = tube.find(
@@ -347,8 +345,7 @@ def get_metadata_from_gmw(path_xml, tube_nr, full_meta=False):
     screenLength_unit = screenLength_xml.attrib["uom"]
     if screenLength_unit != "m":
         logger.info(
-            f'screenLength unit is unexpected {screenLength.attrib["uom"]},'
-            "m expected"
+            f"screenLength unit is unexpected {screenLength.attrib['uom']},m expected"
         )
 
     plainTubePartLength_xml = tube.find(
@@ -366,7 +363,7 @@ def get_metadata_from_gmw(path_xml, tube_nr, full_meta=False):
 
     if tube.find("isgmw:sedimentSumpPresent", ns).text.lower() in ["ja", "yes"]:
         sedimentSumpLength_xml = tube.find(
-            "isgmw:sedimentSump//" "gmwcommon:sedimentSumpLength", ns
+            "isgmw:sedimentSump//gmwcommon:sedimentSumpLength", ns
         )
         sedimentSumpLength = float(sedimentSumpLength_xml.text)
         sedimentSumpLength_unit = sedimentSumpLength_xml.attrib["uom"]

diff --git a/hydropandas/io/dino.py b/hydropandas/io/dino.py
@@ -121,7 +121,7 @@ def _read_dino_groundwater_metadata(f, line):
                 meta_ts.pop(key)
 
         obs_att = meta_tsi.copy()
-        obs_att["name"] = f'{obs_att["monitoring_well"]}-{int(obs_att["tube_nr"]):03d}'
+        obs_att["name"] = f"{obs_att['monitoring_well']}-{int(obs_att['tube_nr']):03d}"
         obs_att["metadata_available"] = True
     else:
         # no metadata

diff --git a/hydropandas/io/fews.py b/hydropandas/io/fews.py
@@ -674,7 +674,7 @@ class of the observations, e.g. GroundwaterObs or WaterlvlObs
     nfiles = len(fnames)
     for j, ixml in enumerate(fnames):
         # print message
-        logger.info(f"{j+1}/{nfiles} read {ixml}")
+        logger.info(f"{j + 1}/{nfiles} read {ixml}")
 
         # join directory to filename if provided
         if directory is None:

diff --git a/hydropandas/io/knmi.py b/hydropandas/io/knmi.py
@@ -161,7 +161,7 @@ def get_knmi_timeseries_fname(
     elif meteo_var is None or meteo_var == "RD":
         # neerslagstation
         meteo_var = "RD"
-        add_day = True
+        add_day = False
     elif settings["interval"] == "daily":
         # meteo station
         add_day = True
@@ -175,7 +175,7 @@ def get_knmi_timeseries_fname(
         )
     if df.empty:
         logger.warning(
-            f"No data for {meteo_var=} in {fname=} between" f"{start=} and {end=}."
+            f"No data for {meteo_var=} in {fname=} between{start=} and {end=}."
         )
     else:
         ts, meta = interpret_knmi_file(
@@ -185,6 +185,7 @@ def get_knmi_timeseries_fname(
             start=start,
             end=end,
             add_day=add_day,
+            add_hour=True,
         )
 
     stn = meta["station"]
@@ -314,8 +315,7 @@ def get_knmi_timeseries_stn(
         and settings["use_api"]
     ):
         message = (
-            "No hourly evaporation data available through the api, "
-            "set use_api=False."
+            "No hourly evaporation data available through the api, set use_api=False."
         )
         raise ValueError(message)
     elif settings["fill_missing_obs"]:
@@ -346,7 +346,7 @@ def get_knmi_timeseries_stn(
         )
         if knmi_df.empty:
             logger.warning(
-                f"No data for {meteo_var=} at {stn=} between" f"{start=} and {end=}."
+                f"No data for {meteo_var=} at {stn=} between{start=} and {end=}."
             )
         if str(stn) in station_meta.index:
             meta = station_meta.loc[f"{stn}"].to_dict()
@@ -596,7 +596,7 @@ def fill_missing_measurements(
         )
         if new_end < end:
             end = new_end
-            logger.info(f'changing end_date to {end.strftime("%Y-%m-%d")}')
+            logger.info(f"changing end_date to {end.strftime('%Y-%m-%d')}")
 
     # find missing values
     knmi_df = _add_missing_indices(knmi_df, stn, start, end)
@@ -743,7 +743,7 @@ def download_knmi_data(
                     df, meta = get_knmi_daily_rainfall_api(
                         stn=stn, start=start, end=end
                     )
-                    add_day = True
+                    add_day = False
                 else:
                     # daily data from meteorological stations
                     df, meta = get_knmi_daily_meteo_api(
@@ -758,6 +758,7 @@ def download_knmi_data(
                         start=start,
                         end=end,
                         add_day=add_day,
+                        add_hour=True,
                     )
 
         except (RuntimeError, requests.ConnectionError) as e:
@@ -777,17 +778,20 @@ def download_knmi_data(
             elif meteo_var == "RD":
                 # daily data from rainfall-stations
                 df, meta = get_knmi_daily_rainfall_url(stn, stn_name)
+                add_day = True
             else:
                 # daily data from meteorological stations
                 df, meta = get_knmi_daily_meteo_url(stn=stn)
+                add_day = True
             if not df.empty:
                 knmi_df, variables = interpret_knmi_file(
                     df=df,
                     meta=meta,
                     meteo_var=meteo_var,
                     start=start,
                     end=end,
-                    add_day=True,
+                    add_day=add_day,
+                    add_hour=True,
                 )
     except (ValueError, KeyError, pd.errors.EmptyDataError) as e:
         logger.error(f"{e} {msg}")
@@ -843,7 +847,8 @@ def get_knmi_daily_rainfall_api(
 
 @lru_cache()
 def get_knmi_daily_rainfall_url(
-    stn: int, stn_name: str
+    stn: int,
+    stn_name: str,
 ) -> Tuple[pd.DataFrame, Dict[str, Any]]:
     """download and read knmi daily rainfall.
 
@@ -867,6 +872,7 @@ def get_knmi_daily_rainfall_url(
         additional information about the variables
     """
 
+    stn = f"{stn:03d}"  # make sure there are leading zeros
     url = (
         "https://cdn.knmi.nl/knmi/map/page/klimatologie/"
         f"gegevens/monv_reeksen/neerslaggeg_{stn_name}_{stn}.zip"
@@ -1195,7 +1201,7 @@ def interpret_knmi_file(
     meteo_var: str,
     start: Union[pd.Timestamp, None] = None,
     end: Union[pd.Timestamp, None] = None,
-    add_day: bool = True,
+    add_day: bool = False,
     add_hour: bool = True,
 ) -> Tuple[pd.DataFrame, Dict[str, Any]]:
     """interpret data from knmi by selecting meteo_var data and meta
@@ -1214,9 +1220,11 @@ def interpret_knmi_file(
     end : pd.TimeStamp or None
         end time of observations.
     add_day : boolean, optional
-        add 1 day so that the timestamp is at the end of the period the data describes
+        add 1 day so that the timestamp is at the end of the period the data describes,
+        default is False, and has to be set per type of file.
     add_hour : boolean, optional
-        add 1 hour to convert from UT to UT+1 (standard-time in the Netherlands)
+        add 1 hour to convert from UT to UT+1 (standard-time in the Netherlands),
+        default is True as this is usually the case.
 
 
     Returns
@@ -1409,7 +1417,7 @@ def _check_latest_measurement_date_de_bilt(
 
     logger.debug(
         f"last {meteo_var} measurement available at the Bilt until {end_str} is from"
-        f' {last_measurement_date_debilt.strftime("%Y-%m-%d")}'
+        f" {last_measurement_date_debilt.strftime('%Y-%m-%d')}"
     )
     logger.debug(
         f"assuming no {meteo_var} measurements are available at "

diff --git a/hydropandas/io/modflow.py b/hydropandas/io/modflow.py
@@ -47,7 +47,7 @@ def read_imod_results(
 
     if ml.modelgrid.xoffset == 0 or ml.modelgrid.yoffset == 0:
         warnings.warn(
-            "you probably want to set the xll and/or yll " "attributes of ml.modelgrid"
+            "you probably want to set the xll and/or yll attributes of ml.modelgrid"
         )
 
     if nlay is None:
@@ -130,7 +130,7 @@ def read_modflow_results(
     if ml.modelgrid.grid_type == "structured":
         if ml.modelgrid.xoffset == 0 or ml.modelgrid.yoffset == 0:
             warnings.warn(
-                "you probably want to set the xll " "and/or yll attributes in DIS!"
+                "you probably want to set the xll and/or yll attributes in DIS!"
             )
 
     if isinstance(hds_arr, xr.DataArray):

diff --git a/hydropandas/io/waterinfo.py b/hydropandas/io/waterinfo.py
@@ -73,9 +73,7 @@ def read_waterinfo_file(
         elif "WAARNEMINGTIJD" in df.columns:
             index_cols += ["WAARNEMINGTIJD"]
         else:
-            raise KeyError(
-                "expected column with WAARNEMINGSTIJD but could not find one"
-            )
+            raise KeyError("expected column with WAARNEMINGTIJD but could not find one")
 
     df.index = pd.to_datetime(
         df[index_cols[0]] + " " + df[index_cols[1]], dayfirst=True

diff --git a/hydropandas/obs_collection.py b/hydropandas/obs_collection.py
@@ -2098,7 +2098,7 @@ def get_obs(self, name=None, **kwargs):
                 f"multiple observations for given conditions {selected_obs.index}"
             )
 
-    def to_excel(self, path, meta_sheet_name="metadata"):
+    def to_excel(self, path, meta_sheet_name="metadata", check_consistency=True):
         """Write an ObsCollection to an excel, the first sheet in the excel contains the
         metadata, the other tabs are the timeseries of each observation.
 
@@ -2110,6 +2110,9 @@ def to_excel(self, path, meta_sheet_name="metadata"):
             full path of xlsx file.
         meta_sheet_name : str, optional
             sheetname with metadata. The default is "metadata".
+        check_consistency : bool, optional
+            If True the consistency of the collection is checked. If set to False the excel file may be unreadable by hydropandas. The
+            default is True.
 
         Raises
         ------
@@ -2129,8 +2132,9 @@ def to_excel(self, path, meta_sheet_name="metadata"):
         If you don't want this consider using the `to_pickle` method.
         """
 
-        if not self._is_consistent():
-            raise RuntimeError("inconsistent observation collection")
+        if check_consistency:
+            if not self._is_consistent():
+                raise RuntimeError("inconsistent observation collection")
 
         oc = self.copy(deep=True)