Skip to content

Commit

Permalink
Add Nudging decay (including reading LastObs files) (#310)
Browse files Browse the repository at this point in the history
* added last obs df including discharge and ids

* lastobs nc file folder

* formatting and cleanup

* added both discharge and model discharge

* added prediction delta timesteps

* da decay with exp decay incorporated, unblackened for readability

* added check between obs file results and our fvd output

* removed pdbs and set to run on lastobs

* added verbose statements

* restructuring of lastobs df to simplify process

* generalized last timestep index call to automatically determine from various input sizes

* github requested fixes to if statements and cleanup

* updated variable

* working ncar da decay prototype need to bring in real lastobs data inside equation

* getting da values in order through classic da assim technique

* pushing ids and values to mc reach

* fixed gage id matching, cython is broken cant compile correctly, need to print values to identify if da is properly working

* saving changes, trying to fix old DA function

* restructing da timeslice file read to use datetime, not generalized before

* added last obs df including discharge and ids

* lastobs nc file folder

* formatting and cleanup

* added both discharge and model discharge

* added prediction delta timesteps

* da decay with exp decay incorporated, unblackened for readability

* added check between obs file results and our fvd output

* removed pdbs and set to run on lastobs

* added verbose statements

* restructuring of lastobs df to simplify process

* generalized last timestep index call to automatically determine from various input sizes

* github requested fixes to if statements and cleanup

* updated variable

* generalized da naming conventional and date timeframe with improved interpolation

* removed extra comments

* remove dependence on not-yet-created flowveldepth

* name "last_obs_file"

* include data_assimilation_parameters to yaml

* include empty dict for data_assimilation_parameters in yaml

* black

* added paths to shared drive locations and blackened files

* quick merge changes

* files working

* sync for merge

* sync for merge

* sync merge to upstream

* move last obs function next to usgs_da function

* add TODOs

* add lastobs to other parallel modes

* move last_obs above usgs_df

* cimports and cdefs for last_obs

* fixed broken usgs_df names were changed to folder in some areas and filter in others

* moved da functions into single wrapper in nhd_network_utilities

also harmonized inputs a bit.

* add da yaml file (DATA NOT YET ADDED)

* drop non-5-minute entries from time_slices

Also harmonizing inputs for merge.

* add function for finding the tailwater for a given segment

* add masks

* use pandas date_range

* cleanup

* add comment showing possible handling of extended DA

* Revert "add comment showing possible handling of extended DA"

This reverts commit 0455466.

* temporarily disable last_obs

* Update example yaml with inputs that work

* temporarily disable last_obs

* update comment

* adjust DA for perfect match

* removed filter list

* use efficient shape call for usgs_positions_list length

* add gage_maxtime and pseudocode for lastobs

* Identified major hard-code issue in structured- and structured-obj

* update lastobs comments and pseudocode

* update da test yaml file with additional gage options

* use "reindex" to fill/eliminate columns for usgs_df

* functions in place for decay but last obs file is behaving incorrectly and changing parity check even when all uses are turned off

* added decay timestep count

* Use new fields in flowveldepth to simplify initial condition handling

To facilitate this, added a constant qvd_ts_w (flowveldepth timestep width)
to define the standard column width.

* add two additional segments for parity checking

* reconfigure reach splitting to consider gages

* update diffusive call signature

* yaml updates for test

* black

Co-authored-by: James Halgren <james.halgren@noaa.gov>
  • Loading branch information
jhrehanoaa and jameshalgren authored May 28, 2021
1 parent fa93df6 commit e24d032
Show file tree
Hide file tree
Showing 8 changed files with 430 additions and 73 deletions.
132 changes: 112 additions & 20 deletions src/python_framework_v02/troute/nhd_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
import numpy as np
from toolz import compose
import dask.array as da
import sys
import math
from datetime import *


def read_netcdf(geo_file_path):
Expand Down Expand Up @@ -186,7 +189,6 @@ def get_ql_from_wrf_hydro_mf(qlat_files, index_col="feature_id", value_col="q_la
2018-01-01 13:00:00 4186117 41.233807 -75.413895 0.006496
```
"""
filter_list = None

with xr.open_mfdataset(
qlat_files,
Expand Down Expand Up @@ -339,6 +341,92 @@ def preprocess_time_station_index(xd):
)


def build_last_obs_df(lastobsfile, routelink, wrf_last_obs_flag):
# open routelink_file and extract discharges

ds1 = xr.open_dataset(routelink)
df = ds1.to_dataframe()
df2 = df.loc[df["gages"] != b" "]
df2["gages"] = df2["gages"].astype("int")
df2 = df2[["gages", "to"]]
df2 = df2.reset_index()
df2 = df2.set_index("gages")

with xr.open_dataset(lastobsfile) as ds:
df_model_discharges = ds["model_discharge"].to_dataframe()
df_discharges = ds["discharge"].to_dataframe()
last_ts = df_model_discharges.index.get_level_values("timeInd")[-1]
model_discharge_last_ts = df_model_discharges[
df_model_discharges.index.get_level_values("timeInd") == last_ts
]
discharge_last_ts = df_discharges[
df_discharges.index.get_level_values("timeInd") == last_ts
]
df1 = ds["stationId"].to_dataframe()
df1 = df1.astype(int)
model_discharge_last_ts = model_discharge_last_ts.join(df1)
model_discharge_last_ts = model_discharge_last_ts.join(discharge_last_ts)
model_discharge_last_ts = model_discharge_last_ts.loc[
model_discharge_last_ts["model_discharge"] != -9999.0
]
model_discharge_last_ts = model_discharge_last_ts.reset_index().set_index(
"stationId"
)
model_discharge_last_ts = model_discharge_last_ts.drop(
["stationIdInd", "timeInd"], axis=1
)
model_discharge_last_ts["discharge"] = model_discharge_last_ts[
"discharge"
].to_frame()
# If predict from last_obs file use last obs file results
# if last_obs_file == "error-based":
# elif last_obs_file == "obs-based": # the wrf-hydro default
if wrf_last_obs_flag:
model_discharge_last_ts["last_nudge"] = (
model_discharge_last_ts["discharge"]
- model_discharge_last_ts["model_discharge"]
)
final_df = df2.join(model_discharge_last_ts["discharge"])
final_df = final_df.reset_index()
final_df = final_df.set_index("to")
final_df = final_df.drop(["feature_id", "gages"], axis=1)
final_df = final_df.dropna()

# Else predict from the model outputs from t-route if index doesn't match interrupt computation as the results won't be valid
# else:
# fvd_df = fvd_df
# if len(model_discharge_last_ts.index) == len(fvd_df.index):
# model_discharge_last_ts["last_nudge"] = (
# model_discharge_last_ts["discharge"] - fvd_df[fvd_df.columns[0]]
# )
# else:
# print("THE NUDGING FILE IDS DO NOT MATCH THE FLOWVELDEPTH IDS")
# sys.exit()
# # Predictions created with continuously decreasing deltas until near 0 difference
# a = 120
# prediction_df = pd.DataFrame(index=model_discharge_last_ts.index)

# for time in range(0, 720, 5):
# weight = math.exp(time / -a)
# delta = pd.DataFrame(
# model_discharge_last_ts["last_nudge"] / weight)

# if time == 0:
# prediction_df[str(time)] = model_discharge_last_ts["last_nudge"]
# weight_diff = prediction_df[str(time)] - prediction_df[str(time)]
# else:
# if weight > 0.1:
# prediction_df[str(time)] = (
# delta["last_nudge"] + model_discharge_last_ts["model_discharge"]
# )
# elif weight < -0.1:
# prediction_df[str(time)] = (
# delta["last_nudge"] + model_discharge_last_ts["model_discharge"]
# )
# prediction_df["0"] = model_discharge_last_ts["model_discharge"]
return final_df


def get_usgs_from_time_slices_csv(routelink_subset_file, usgs_csv):

df2 = pd.read_csv(usgs_csv, index_col=0)
Expand Down Expand Up @@ -421,32 +509,36 @@ def get_usgs_from_time_slices_folder(
usgs_df = usgs_df.drop(["gages", "ascendingIndex", "to"], axis=1)
columns_list = usgs_df.columns

for i in range(0, (len(columns_list) * 3) - 12, 12):
original_string = usgs_df.columns[i]
original_string_shortened = original_string[:-5]
temp_name1 = original_string_shortened + str("05:00")
temp_name2 = original_string_shortened + str("10:00")
temp_name3 = original_string_shortened + str("20:00")
temp_name4 = original_string_shortened + str("25:00")
temp_name5 = original_string_shortened + str("35:00")
temp_name6 = original_string_shortened + str("40:00")
temp_name7 = original_string_shortened + str("50:00")
temp_name8 = original_string_shortened + str("55:00")
usgs_df.insert(i + 1, temp_name1, np.nan)
usgs_df.insert(i + 2, temp_name2, np.nan)
usgs_df.insert(i + 4, temp_name3, np.nan)
usgs_df.insert(i + 5, temp_name4, np.nan)
usgs_df.insert(i + 7, temp_name5, np.nan)
usgs_df.insert(i + 8, temp_name6, np.nan)
usgs_df.insert(i + 10, temp_name7, np.nan)
usgs_df.insert(i + 11, temp_name8, np.nan)
original_string_first = usgs_df.columns[0]
date_time_str = original_string_first[:10] + " " + original_string_first[11:]
date_time_obj_start = datetime.strptime(date_time_str, "%Y-%m-%d %H:%M:%S")

original_string_last = usgs_df.columns[-1]
date_time_str = original_string_last[:10] + " " + original_string_last[11:]
date_time_obj_end = datetime.strptime(date_time_str, "%Y-%m-%d %H:%M:%S")

dates = []
# for j in pd.date_range(date_time_obj_start, date_time_obj_end + timedelta(1), freq="5min"):
for j in pd.date_range(date_time_obj_start, date_time_obj_end, freq="5min"):
dates.append(j.strftime("%Y-%m-%d_%H:%M:00"))

"""
# dates_to_drop = ~usgs_df.columns.isin(dates)
OR
# dates_to_drop = usgs_df.columns.difference(dates)
# dates_to_add = pd.Index(dates).difference(usgs_df.columns)
"""

usgs_df = usgs_df.reindex(columns=dates)

usgs_df = usgs_df.interpolate(method="linear", axis=1)
usgs_df = usgs_df.interpolate(method="linear", axis=1, limit_direction="backward")
usgs_df.drop(usgs_df[usgs_df.iloc[:, 0] == -999999.000000].index, inplace=True)

return usgs_df


# TODO: Move channel restart above usgs to keep order with execution script
def get_channel_restart_from_csv(
channel_initial_states_file,
index_col=0,
Expand Down
25 changes: 25 additions & 0 deletions src/python_framework_v02/troute/nhd_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,31 @@ def reverse_network(N):
return rg


def find_tw_for_node(reaches_bytw, node):
# TODO: extend this function (or write a new one) to handle a list of nodes.
# Such functionality might be useful for finding networks corresponding to a
# list of gages, for instance.
"""
reaches_bytw is a dictionary of lists of the form
tw 1:
[ [ seg1, seg2, seg3, ... segn ], # reach 1
[ sega, segb, segc, ... segz ], # reach 2
.
.
.
[ ... ] ] reach n
tw 2:
etc.
"""
for tw, rs in reaches_bytw.items():
for r in rs:
if node in r:
return tw

return None # Node not in reach set.


def junctions(N):
c = Counter(chain.from_iterable(N.values()))
return {k for k, v in c.items() if v > 1}
Expand Down
57 changes: 50 additions & 7 deletions src/python_framework_v02/troute/nhd_network_utilities_v02.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,10 +419,31 @@ def build_connections(supernetwork_parameters, dt):

param_df["dt"] = dt
param_df = param_df.rename(columns=reverse_dict(cols))

wbodies = {}
if "waterbody" in cols:
wbodies = build_waterbodies(
param_df[["waterbody"]], supernetwork_parameters, "waterbody"
)
param_df = param_df.drop("waterbody", axis=1)

gages = {}
if "gages" in cols:
gages = build_gages(param_df[["gages"]])
param_df = param_df.drop("gages", axis=1)

param_df = param_df.astype("float32")

# datasub = data[['dt', 'bw', 'tw', 'twcc', 'dx', 'n', 'ncc', 'cs', 's0']]
return connections, param_df
return connections, param_df, wbodies, gages


def build_gages(segment_gage_df,):
gage_list = list(map(bytes.strip, segment_gage_df.gages.values))
gage_mask = list(map(bytes.isdigit, gage_list))
gages = segment_gage_df.loc[gage_mask, "gages"].to_dict()

return gages


def build_waterbodies(
Expand Down Expand Up @@ -569,20 +590,42 @@ def build_data_assimilation(data_assimilation_parameters):
data_assimilation_csv = data_assimilation_parameters.get(
"data_assimilation_csv", None
)
data_assimilation_filter = data_assimilation_parameters.get(
"data_assimilation_filter", None
data_assimilation_folder = data_assimilation_parameters.get(
"data_assimilation_timeslices_folder", None
)
# TODO: Fix the Logic here according to the following.

# If there are any observations for data assimilation, there
# needs to be a complete set in the first time set or else
# there must be a "LastObs". If there is a complete set in
# the first time step, the LastObs is optional. If there are
# no observations for assimilation, there can be a LastObs
# with an empty usgs dataframe.

last_obs_file = data_assimilation_parameters.get("wrf_hydro_last_obs_file", None)
last_obs_type = data_assimilation_parameters.get("wrf_last_obs_type", "error-based")
last_obs_crosswalk_file = data_assimilation_parameters.get(
"wrf_hydro_da_channel_ID_crosswalk_file", None
)

last_obs_df = pd.DataFrame()

if last_obs_file:
last_obs_df = nhd_io.build_last_obs_df(
last_obs_file, last_obs_crosswalk_file, last_obs_type,
)

if data_assimilation_csv:
usgs_df = build_data_assimilation_csv(data_assimilation_parameters)
elif data_assimilation_filter:
elif data_assimilation_folder:
usgs_df = build_data_assimilation_folder(data_assimilation_parameters)
return usgs_df
return usgs_df, last_obs_df


def build_data_assimilation_csv(data_assimilation_parameters):

usgs_df = nhd_io.get_usgs_from_time_slices_csv(
data_assimilation_parameters["data_assimilation_parameters_file"],
data_assimilation_parameters["wrf_hydro_da_channel_ID_crosswalk_file"],
data_assimilation_parameters["data_assimilation_csv"],
)

Expand All @@ -597,7 +640,7 @@ def build_data_assimilation_folder(data_assimilation_parameters):
).resolve()

usgs_df = nhd_io.get_usgs_from_time_slices_folder(
data_assimilation_parameters["data_assimilation_parameters_file"],
data_assimilation_parameters["wrf_hydro_da_channel_ID_crosswalk_file"],
usgs_timeslices_folder,
data_assimilation_parameters["data_assimilation_filter"],
)
Expand Down
Loading

0 comments on commit e24d032

Please sign in to comment.