Skip to content

Commit

Permalink
various improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
swd-turner committed Mar 13, 2021
1 parent 18090f1 commit de6e37f
Show file tree
Hide file tree
Showing 8 changed files with 146 additions and 28 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ importFrom(crayon,blue)
importFrom(crayon,green)
importFrom(crayon,red)
importFrom(dplyr,arrange)
importFrom(dplyr,count)
importFrom(dplyr,filter)
importFrom(dplyr,first)
importFrom(dplyr,group_by)
Expand Down
22 changes: 22 additions & 0 deletions R/constants.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,30 @@ min_allowable_days_of_storage <- 1095
# minimum allowable data points to use release and inflow without any back-calculating
min_r_i_datapoints <- 260 # 5 years

# tolerance for r-squared value of release residual model.
# Models with lower r-squared value than r_sq_tol are discarded.
r_sq_tol <- 0.2

# release constraint quantile
r_st_min_quantile <- 0.005
r_st_max_quantile <- 0.995


# unit conversion
m3_to_Mm3 <- 1e-6
seconds_per_day <- 86400
weeks_per_year <- 365.25 / 7


HUC_replacements <-
tibble::tribble(
~STATE, ~HUC4_replacement,
"Indiana", "05XX",
"Michigan", "07XX",
"Minnesota", "09XX",
"New York", "02XX",
"Vermont", "02XX",
"Ohio", "05XX",
"Wisconsin", "07XX"
)

25 changes: 21 additions & 4 deletions R/extrapolate_targets_to_GRanD.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,14 @@
#' @description use an set of inferred storage targets to extrapolate storage targets to all dams in GRanD
#' @param USRDATS_path path to USRDATS data
#' @param targets_path path to fitted targets (generated using fit_targets())
#' @param include_all T/F (if T, returns results for dams in the trained set)
#' @param HUC_04_correction T/F (if T, replaces Great Lakes HUC2 with neighboring HUC2s...(deals with missing data for all of Great Lakes))
#' @importFrom dplyr select group_by ungroup filter summarise pull mutate arrange if_else first last left_join
#' @importFrom purrr map_dfr
#' @return tibble dam ids to copy
#' @export
#'
extrapolate_targets_to_GRanD <- function(USRDATS_path, targets_path){
extrapolate_targets_to_GRanD <- function(USRDATS_path, targets_path, include_all = FALSE, HUC_04_correction = TRUE){

# read GRanD data and join HUCs
read_reservoir_attributes(USRDATS_path) %>%
Expand All @@ -30,9 +32,16 @@ extrapolate_targets_to_GRanD <- function(USRDATS_path, targets_path){
fitted_dams

# get list of dams that lack targets and need to be extrapolated
dam_attributes_and_HUCs[["GRAND_ID"]] %>%
.[which(!. %in% fitted_dams)] ->
unfitted_dams

if(include_all == FALSE){
dam_attributes_and_HUCs[["GRAND_ID"]] %>%
.[which(!. %in% fitted_dams)] ->
unfitted_dams
}else{
dam_attributes_and_HUCs[["GRAND_ID"]] ->
unfitted_dams
}


# cycle through unfitted dams to find best candidate dam for copy

Expand All @@ -42,6 +51,14 @@ extrapolate_targets_to_GRanD <- function(USRDATS_path, targets_path){
dam_attributes_and_HUCs %>%
filter(GRAND_ID == dam) -> dam_attr

# GREAT LAKES CORRECTION
if(HUC_04_correction == TRUE & substr(dam_attr[["HUC4"]], 1, 2) == "04"){
dam_attr %>%
left_join(HUC_replacements, by = "STATE") %>%
mutate(HUC4 = HUC4_replacement) %>%
select(-HUC4_replacement) -> dam_attr
}

dam_attributes_and_HUCs %>%
filter(HUC4 %in% dam_attr[["HUC4"]],
GRAND_ID != dam,
Expand Down
95 changes: 80 additions & 15 deletions R/fit_release_function.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#' @param dam_id integer id of dam; same as GRanD ID
#' @param targets_path path to fitted targets. If NULL, fit_targets() will be run.
#' @importFrom lubridate year epiweek
#' @importFrom dplyr select group_by ungroup filter summarise pull mutate arrange if_else first last left_join lead
#' @importFrom dplyr select group_by ungroup filter summarise pull mutate arrange if_else first last left_join lead count
#' @importFrom readr read_csv cols
#' @return tibble of observed dam data (storage, inflow, release)
#' @export
Expand All @@ -20,12 +20,12 @@ fit_release_function <- function(USRDATS_path, dam_id, targets_path){

if(missing(targets_path)){

info("targets_path not supplied; fitting storage targets...")
#info("targets_path not supplied; fitting storage targets...")

fit_targets(USRDATS_path, dam_id) -> fitted_targets

tibble(pf = fitted_targets[["flood target parameters"]],
pm = fitted_targets[["conservation target parameters"]]) ->
tibble(pf = fitted_targets[["NSR upper bound"]],
pm = fitted_targets[["NSR lower bound"]]) ->
storage_target_parameters

}else{
Expand Down Expand Up @@ -56,11 +56,17 @@ fit_release_function <- function(USRDATS_path, dam_id, targets_path){
r = r_cumecs * m3_to_Mm3 * seconds_per_day) %>%
select(date, s = s_MCM, i, r) %>%
mutate(year = year(date), epiweek = epiweek(date)) %>%
filter(year >= cutoff_year) %>%
filter(year >= cutoff_year) -> daily_ops

daily_ops %>% filter(s + i < storage_capacity_MCM) ->
daily_ops_non_spill_periods

daily_ops %>%
aggregate_to_epiweeks() %>%
back_calc_missing_flows() %>%
filter(!is.na(i) & !is.na(r),
i >= 0, r >= 0) -> weekly_ops_NA_removed
i >= 0, r >= 0) ->
weekly_ops_NA_removed


# RETURN BLANK IF INSUFFICIENT RELEASE/INFOW DATA
Expand All @@ -70,26 +76,55 @@ fit_release_function <- function(USRDATS_path, dam_id, targets_path){
fitted_targets[["mean inflow from obs. (MCM / wk)"]] <- NA_real_
fitted_targets[["release harmonic parameters"]] <- rep(NA_real_, 4)
fitted_targets[["release residual model coefficients"]] <- rep(NA_real_, 3)
fitted_targets[["release constraints"]] <- c(NA_real_, NA_real_)
return(
fitted_targets
)
}

# get most representative mean flow value available
# either from daily or weekly (back-calculated) data

daily_ops %>% filter(!is.na(i)) %>% .[["i"]] -> i_daily

if(length(i_daily) > min_r_i_datapoints * 7){
i_mean <- mean(i_daily) * 7
}else{
i_mean <- mean(weekly_ops_NA_removed[["i"]])
}

weekly_ops_NA_removed %>%
left_join(convert_parameters_to_targets(storage_target_parameters[["pf"]],
"upper"), by = "epiweek") %>%
left_join(convert_parameters_to_targets(storage_target_parameters[["pm"]],
"lower"), by = "epiweek") %>%
mutate(avail_pct = 100 * ((s_start) / storage_capacity_MCM)) %>%
mutate(availability_status = (avail_pct - lower) / (upper - lower)) %>%
filter(availability_status <= 1,
availability_status > 0) %>%
mutate(
i_st = (i / mean(i)) - 1,
r_st = (r / mean(i)) - 1
) ->
training_data
i_st = (i / i_mean) - 1,
r_st = (r / i_mean) - 1
) ->
training_data_unfiltered

# define max and min release constraints
daily_ops_non_spill_periods %>% filter(!is.na(r)) %>% .[["r"]] -> r_daily

# use daily release data to define max release (if possible)
if(length(r_daily) > min_r_i_datapoints * 7){
r_st_max <- ((quantile(r_daily, r_st_max_quantile) %>% unname() %>% round(4) * 7) / i_mean) - 1
r_st_min <- ((quantile(r_daily, r_st_min_quantile) %>% unname() %>% round(4) * 7) / i_mean) - 1
}else{
training_data_unfiltered %>%
filter(s_start + i < storage_capacity_MCM) %>% .[["r_st"]] -> r_st_vector
quantile(r_st_vector, r_st_min_quantile) %>% unname() %>% round(4) -> r_st_min
quantile(r_st_vector, r_st_max_quantile) %>% unname() %>% round(4) -> r_st_max
}

# create final training data for normal operating period
training_data_unfiltered %>%
filter(availability_status <= 1,
availability_status > 0) ->
training_data

### harmonic regression (two harmonics) for standardized release
lm(
Expand Down Expand Up @@ -125,8 +160,37 @@ fit_release_function <- function(USRDATS_path, dam_id, targets_path){
round(3) ->
st_r_residual_model_coef

if(summary(st_r_residual_model) %>% .[["adj.r.squared"]] < 0.1){
info("Release residual model has low r-squared; (release will be based harmonic function only)")
# deal with any negative coefficients by setting to zero and re-fitting
if(st_r_residual_model_coef[2] < 0 & st_r_residual_model_coef[3] >= 0){
lm(
data = data_for_linear_model_of_release_residuals,
r_st_resid ~ i_st
) -> st_r_residual_model

c(st_r_residual_model[["coefficients"]][[1]],
0,
st_r_residual_model[["coefficients"]][[2]]) %>%
round(3) ->
st_r_residual_model_coef
}

if(st_r_residual_model_coef[3] < 0 & st_r_residual_model_coef[2] >= 0){
lm(
data = data_for_linear_model_of_release_residuals,
r_st_resid ~ availability_status
) -> st_r_residual_model

c(st_r_residual_model[["coefficients"]][[1]],
st_r_residual_model[["coefficients"]][[2]],
0) %>%
round(3) ->
st_r_residual_model_coef
}

if(summary(st_r_residual_model) %>% .[["adj.r.squared"]] < r_sq_tol |
st_r_residual_model_coef[2] < 0 |
st_r_residual_model_coef[3] < 0) {
info("Release residual model will be discarded; (release will be based harmonic function only)")
st_r_residual_model_coef <- c(0, 0, 0)
}

Expand All @@ -141,9 +205,10 @@ fit_release_function <- function(USRDATS_path, dam_id, targets_path){
# NULL

fitted_targets[["mean inflow from GRAND. (MCM / wk)"]] <- reservoir_attributes[["i_MAF_MCM"]] / weeks_per_year
fitted_targets[["mean inflow from obs. (MCM / wk)"]] <- training_data[["i"]] %>% mean()
fitted_targets[["mean inflow from obs. (MCM / wk)"]] <- i_mean
fitted_targets[["release harmonic parameters"]] <- st_r_harmonic
fitted_targets[["release residual model coefficients"]] <- st_r_residual_model_coef
fitted_targets[["release constraints"]] <- c(r_st_min, r_st_max)

return(fitted_targets)

Expand Down
12 changes: 6 additions & 6 deletions R/fit_targets.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ fit_targets <- function(USRDATS_path, dam_id){
list(
"id" = dam_id,
"weekly storage" = tibble(),
"flood target parameters" = rep(NA_real_, 5),
"conservation target parameters" = rep(NA_real_, 5)
"NSR upper bound" = rep(NA_real_, 5),
"NSR lower bound" = rep(NA_real_, 5)
)
)
}
Expand Down Expand Up @@ -101,12 +101,12 @@ fit_targets <- function(USRDATS_path, dam_id){

# fit the flood harmonic
fit_constrained_harmonic(data_for_flood_harmonic) %>%
.[["solution"]] ->
.[["solution"]] %>% round(3) ->
p_flood_harmonic

# fit the conservation harmonic
fit_constrained_harmonic(data_for_conservation_harmonic) %>%
.[["solution"]] ->
.[["solution"]] %>% round(3) ->
p_conservation_harmonic

# evaluate targets to remove any superfluous constraints
Expand All @@ -129,8 +129,8 @@ fit_targets <- function(USRDATS_path, dam_id){
list(
"id" = dam_id,
"weekly storage" = storage_weekly,
"flood target parameters" = p_flood_harmonic,
"conservation target parameters" = p_conservation_harmonic
"NSR upper bound" = p_flood_harmonic,
"NSR lower bound" = p_conservation_harmonic
)
)

Expand Down
4 changes: 4 additions & 0 deletions R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@ find_closest_dam <- function(dam_attr, other_dams){
dam_attr[["supply"]] -> su
dam_attr[["irr"]] -> ir

if(is.na(fl)) fl <- TRUE
if(is.na(hy)) hy <- TRUE
if(is.na(su)) su <- TRUE
if(is.na(ir)) ir <- TRUE

# determine best matches
other_dams %>%
Expand Down
4 changes: 2 additions & 2 deletions R/inputs.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#'
read_reservoir_data <- function(USRDATS_path, dam_id){

read_csv(paste0(USRDATS_path, "/TimeSeriesv2/",
read_csv(paste0(USRDATS_path, "/TimeSeries_all/ResOpsUSA_",
dam_id, ".csv"),
col_types = cols(date = "D",
storage = "d",
Expand Down Expand Up @@ -65,7 +65,7 @@ read_reservoir_attributes <- function(USRDATS_path, dam_id = NULL){
read_GRanD_HUC8 <- function(){

read_csv(
paste0(system.file("extdata/", package = "rulecurve"),
paste0(system.file("extdata/", package = "starfit"),
"GRAND_HUC8.csv"),
comment = "#", col_types = cols())

Expand Down
11 changes: 10 additions & 1 deletion man/extrapolate_targets_to_GRanD.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit de6e37f

Please sign in to comment.