various improvements

IMMM-SFA · Mar 13, 2021 · de6e37f · de6e37f
1 parent 18090f1
commit de6e37f
Show file tree

Hide file tree

Showing 8 changed files with 146 additions and 28 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -14,6 +14,7 @@ importFrom(crayon,blue)
 importFrom(crayon,green)
 importFrom(crayon,red)
 importFrom(dplyr,arrange)
+importFrom(dplyr,count)
 importFrom(dplyr,filter)
 importFrom(dplyr,first)
 importFrom(dplyr,group_by)

diff --git a/R/constants.R b/R/constants.R
@@ -17,8 +17,30 @@ min_allowable_days_of_storage <- 1095
 # minimum allowable data points to use release and inflow without any back-calculating
 min_r_i_datapoints <- 260 # 5 years
 
+# tolerance for r-squared value of release residual model.
+# Models with lower r-squared value than r_sq_tol are discarded.
+r_sq_tol <- 0.2
+
+# release constraint quantile
+r_st_min_quantile <- 0.005
+r_st_max_quantile <- 0.995
+
 
 # unit conversion
 m3_to_Mm3 <- 1e-6
 seconds_per_day <- 86400
 weeks_per_year <- 365.25 / 7
+
+
+HUC_replacements <-
+  tibble::tribble(
+    ~STATE, ~HUC4_replacement,
+    "Indiana", "05XX",
+    "Michigan", "07XX",
+    "Minnesota", "09XX",
+    "New York", "02XX",
+    "Vermont", "02XX",
+    "Ohio", "05XX",
+    "Wisconsin", "07XX"
+    )
+
diff --git a/R/extrapolate_targets_to_GRanD.R b/R/extrapolate_targets_to_GRanD.R
@@ -3,12 +3,14 @@
 #' @description use an set of inferred storage targets to extrapolate storage targets to all dams in GRanD
 #' @param USRDATS_path path to USRDATS data
 #' @param targets_path path to fitted targets (generated using fit_targets())
+#' @param include_all T/F (if T, returns results for dams in the trained set)
+#' @param HUC_04_correction T/F (if T, replaces Great Lakes HUC2 with neighboring HUC2s...(deals with missing data for all of Great Lakes))
 #' @importFrom dplyr select group_by ungroup filter summarise pull mutate arrange if_else first last left_join
 #' @importFrom purrr map_dfr
 #' @return tibble dam ids to copy
 #' @export
 #'
-extrapolate_targets_to_GRanD <- function(USRDATS_path, targets_path){
+extrapolate_targets_to_GRanD <- function(USRDATS_path, targets_path, include_all = FALSE, HUC_04_correction = TRUE){
 
   # read GRanD data and join HUCs
   read_reservoir_attributes(USRDATS_path) %>%
@@ -30,9 +32,16 @@ extrapolate_targets_to_GRanD <- function(USRDATS_path, targets_path){
     fitted_dams
 
   # get list of dams that lack targets and need to be extrapolated
-  dam_attributes_and_HUCs[["GRAND_ID"]] %>%
-    .[which(!. %in% fitted_dams)] ->
-    unfitted_dams
+
+  if(include_all == FALSE){
+    dam_attributes_and_HUCs[["GRAND_ID"]] %>%
+      .[which(!. %in% fitted_dams)] ->
+      unfitted_dams
+  }else{
+    dam_attributes_and_HUCs[["GRAND_ID"]] ->
+      unfitted_dams
+  }
+
 
   # cycle through unfitted dams to find best candidate dam for copy
 
@@ -42,6 +51,14 @@ extrapolate_targets_to_GRanD <- function(USRDATS_path, targets_path){
       dam_attributes_and_HUCs %>%
         filter(GRAND_ID == dam) -> dam_attr
 
+      # GREAT LAKES CORRECTION
+      if(HUC_04_correction == TRUE & substr(dam_attr[["HUC4"]], 1, 2) == "04"){
+        dam_attr %>%
+          left_join(HUC_replacements, by = "STATE") %>%
+          mutate(HUC4 = HUC4_replacement) %>%
+          select(-HUC4_replacement) -> dam_attr
+      }
+
       dam_attributes_and_HUCs %>%
         filter(HUC4 %in% dam_attr[["HUC4"]],
                GRAND_ID != dam,

diff --git a/R/fit_release_function.R b/R/fit_release_function.R
@@ -5,7 +5,7 @@
 #' @param dam_id integer id of dam; same as GRanD ID
 #' @param targets_path path to fitted targets. If NULL, fit_targets() will be run.
 #' @importFrom lubridate year epiweek
-#' @importFrom dplyr select group_by ungroup filter summarise pull mutate arrange if_else first last left_join lead
+#' @importFrom dplyr select group_by ungroup filter summarise pull mutate arrange if_else first last left_join lead count
 #' @importFrom readr read_csv cols
 #' @return tibble of observed dam data (storage, inflow, release)
 #' @export
@@ -20,12 +20,12 @@ fit_release_function <- function(USRDATS_path, dam_id, targets_path){
 
   if(missing(targets_path)){
 
-    info("targets_path not supplied; fitting storage targets...")
+    #info("targets_path not supplied; fitting storage targets...")
 
     fit_targets(USRDATS_path, dam_id) -> fitted_targets
 
-    tibble(pf = fitted_targets[["flood target parameters"]],
-           pm = fitted_targets[["conservation target parameters"]]) ->
+    tibble(pf = fitted_targets[["NSR upper bound"]],
+           pm = fitted_targets[["NSR lower bound"]]) ->
       storage_target_parameters
 
   }else{
@@ -56,11 +56,17 @@ fit_release_function <- function(USRDATS_path, dam_id, targets_path){
            r = r_cumecs * m3_to_Mm3 * seconds_per_day) %>%
     select(date, s = s_MCM, i, r) %>%
     mutate(year = year(date), epiweek = epiweek(date)) %>%
-    filter(year >= cutoff_year) %>%
+    filter(year >= cutoff_year) -> daily_ops
+
+  daily_ops %>% filter(s + i < storage_capacity_MCM) ->
+    daily_ops_non_spill_periods
+
+  daily_ops %>%
     aggregate_to_epiweeks() %>%
     back_calc_missing_flows() %>%
     filter(!is.na(i) & !is.na(r),
-           i >= 0, r >= 0) -> weekly_ops_NA_removed
+           i >= 0, r >= 0) ->
+    weekly_ops_NA_removed
 
 
   # RETURN BLANK IF INSUFFICIENT RELEASE/INFOW DATA
@@ -70,26 +76,55 @@ fit_release_function <- function(USRDATS_path, dam_id, targets_path){
     fitted_targets[["mean inflow from obs. (MCM / wk)"]] <- NA_real_
     fitted_targets[["release harmonic parameters"]] <- rep(NA_real_, 4)
     fitted_targets[["release residual model coefficients"]] <- rep(NA_real_, 3)
+    fitted_targets[["release constraints"]] <- c(NA_real_, NA_real_)
     return(
       fitted_targets
     )
   }
 
+  # get most representative mean flow value available
+  # either from daily or weekly (back-calculated) data
+
+  daily_ops %>% filter(!is.na(i)) %>% .[["i"]] -> i_daily
+
+  if(length(i_daily) > min_r_i_datapoints * 7){
+    i_mean <- mean(i_daily) * 7
+  }else{
+    i_mean <- mean(weekly_ops_NA_removed[["i"]])
+  }
+
   weekly_ops_NA_removed %>%
     left_join(convert_parameters_to_targets(storage_target_parameters[["pf"]],
                                             "upper"), by = "epiweek") %>%
     left_join(convert_parameters_to_targets(storage_target_parameters[["pm"]],
                                             "lower"), by = "epiweek") %>%
     mutate(avail_pct = 100 * ((s_start) / storage_capacity_MCM)) %>%
     mutate(availability_status = (avail_pct - lower) / (upper - lower)) %>%
-    filter(availability_status <= 1,
-           availability_status > 0) %>%
     mutate(
-      i_st = (i / mean(i)) - 1,
-      r_st = (r / mean(i)) - 1
-      ) ->
-    training_data
+      i_st = (i / i_mean) - 1,
+      r_st = (r / i_mean) - 1
+    ) ->
+    training_data_unfiltered
+
+  # define max and min release constraints
+  daily_ops_non_spill_periods %>% filter(!is.na(r)) %>% .[["r"]] -> r_daily
+
+  # use daily release data to define max release (if possible)
+  if(length(r_daily) > min_r_i_datapoints * 7){
+    r_st_max <- ((quantile(r_daily, r_st_max_quantile) %>% unname() %>% round(4) * 7) / i_mean) - 1
+    r_st_min <- ((quantile(r_daily, r_st_min_quantile) %>% unname() %>% round(4) * 7) / i_mean) - 1
+  }else{
+    training_data_unfiltered %>%
+      filter(s_start + i < storage_capacity_MCM) %>% .[["r_st"]] -> r_st_vector
+    quantile(r_st_vector, r_st_min_quantile) %>% unname() %>% round(4) -> r_st_min
+    quantile(r_st_vector, r_st_max_quantile) %>% unname() %>% round(4) -> r_st_max
+  }
 
+  # create final training data for normal operating period
+  training_data_unfiltered %>%
+    filter(availability_status <= 1,
+           availability_status > 0) ->
+    training_data
 
   ### harmonic regression (two harmonics) for standardized release
     lm(
@@ -125,8 +160,37 @@ fit_release_function <- function(USRDATS_path, dam_id, targets_path){
       round(3) ->
       st_r_residual_model_coef
 
-    if(summary(st_r_residual_model) %>% .[["adj.r.squared"]] < 0.1){
-      info("Release residual model has low r-squared; (release will be based harmonic function only)")
+    # deal with any negative coefficients by setting to zero and re-fitting
+    if(st_r_residual_model_coef[2] < 0 & st_r_residual_model_coef[3] >= 0){
+      lm(
+        data = data_for_linear_model_of_release_residuals,
+        r_st_resid ~ i_st
+      ) -> st_r_residual_model
+
+      c(st_r_residual_model[["coefficients"]][[1]],
+        0,
+        st_r_residual_model[["coefficients"]][[2]]) %>%
+        round(3) ->
+        st_r_residual_model_coef
+    }
+
+    if(st_r_residual_model_coef[3] < 0 & st_r_residual_model_coef[2] >= 0){
+      lm(
+        data = data_for_linear_model_of_release_residuals,
+        r_st_resid ~ availability_status
+      ) -> st_r_residual_model
+
+      c(st_r_residual_model[["coefficients"]][[1]],
+        st_r_residual_model[["coefficients"]][[2]],
+        0) %>%
+        round(3) ->
+        st_r_residual_model_coef
+    }
+
+    if(summary(st_r_residual_model) %>% .[["adj.r.squared"]] < r_sq_tol |
+       st_r_residual_model_coef[2] < 0 |
+       st_r_residual_model_coef[3] < 0) {
+      info("Release residual model will be discarded; (release will be based harmonic function only)")
       st_r_residual_model_coef <- c(0, 0, 0)
     }
 
@@ -141,9 +205,10 @@ fit_release_function <- function(USRDATS_path, dam_id, targets_path){
       # NULL
 
     fitted_targets[["mean inflow from GRAND. (MCM / wk)"]] <- reservoir_attributes[["i_MAF_MCM"]] / weeks_per_year
-    fitted_targets[["mean inflow from obs. (MCM / wk)"]] <- training_data[["i"]] %>% mean()
+    fitted_targets[["mean inflow from obs. (MCM / wk)"]] <- i_mean
     fitted_targets[["release harmonic parameters"]] <- st_r_harmonic
     fitted_targets[["release residual model coefficients"]] <- st_r_residual_model_coef
+    fitted_targets[["release constraints"]] <- c(r_st_min, r_st_max)
 
     return(fitted_targets)
 

diff --git a/R/fit_targets.R b/R/fit_targets.R
@@ -29,8 +29,8 @@ fit_targets <- function(USRDATS_path, dam_id){
       list(
         "id" = dam_id,
         "weekly storage" = tibble(),
-        "flood target parameters" = rep(NA_real_, 5),
-        "conservation target parameters" = rep(NA_real_, 5)
+        "NSR upper bound" = rep(NA_real_, 5),
+        "NSR lower bound" = rep(NA_real_, 5)
       )
     )
   }
@@ -101,12 +101,12 @@ fit_targets <- function(USRDATS_path, dam_id){
 
   # fit the flood harmonic
   fit_constrained_harmonic(data_for_flood_harmonic) %>%
-    .[["solution"]] ->
+    .[["solution"]] %>% round(3) ->
     p_flood_harmonic
 
   # fit the conservation harmonic
   fit_constrained_harmonic(data_for_conservation_harmonic) %>%
-    .[["solution"]] ->
+    .[["solution"]] %>% round(3) ->
     p_conservation_harmonic
 
   # evaluate targets to remove any superfluous constraints
@@ -129,8 +129,8 @@ fit_targets <- function(USRDATS_path, dam_id){
     list(
       "id" = dam_id,
       "weekly storage" = storage_weekly,
-      "flood target parameters" = p_flood_harmonic,
-      "conservation target parameters" = p_conservation_harmonic
+      "NSR upper bound" = p_flood_harmonic,
+      "NSR lower bound" = p_conservation_harmonic
     )
   )
 

diff --git a/R/functions.R b/R/functions.R
@@ -112,6 +112,10 @@ find_closest_dam <- function(dam_attr, other_dams){
   dam_attr[["supply"]] -> su
   dam_attr[["irr"]] -> ir
 
+  if(is.na(fl)) fl <- TRUE
+  if(is.na(hy)) hy <- TRUE
+  if(is.na(su)) su <- TRUE
+  if(is.na(ir)) ir <- TRUE
 
   # determine best matches
   other_dams %>%

diff --git a/R/inputs.R b/R/inputs.R
@@ -10,7 +10,7 @@
 #'
 read_reservoir_data <- function(USRDATS_path, dam_id){
 
-  read_csv(paste0(USRDATS_path, "/TimeSeriesv2/",
+  read_csv(paste0(USRDATS_path, "/TimeSeries_all/ResOpsUSA_",
                   dam_id, ".csv"),
            col_types = cols(date = "D",
                             storage = "d",
@@ -65,7 +65,7 @@ read_reservoir_attributes <- function(USRDATS_path, dam_id = NULL){
 read_GRanD_HUC8 <- function(){
 
   read_csv(
-    paste0(system.file("extdata/", package = "rulecurve"),
+    paste0(system.file("extdata/", package = "starfit"),
            "GRAND_HUC8.csv"),
     comment = "#", col_types = cols())
 

diff --git a/man/extrapolate_targets_to_GRanD.Rd b/man/extrapolate_targets_to_GRanD.Rd