diff --git a/NAMESPACE b/NAMESPACE index 1bc96aa..f264c08 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -27,6 +27,7 @@ export(get_phac_d) export(get_pt) export(load_datasets) export(load_ds) +export(max_date) export(plot_datasets) export(pushover) export(read_d) diff --git a/R/assemble_final_datasets.R b/R/assemble_final_datasets.R index c316bda..2f8a978 100644 --- a/R/assemble_final_datasets.R +++ b/R/assemble_final_datasets.R @@ -41,6 +41,8 @@ assemble_final_datasets <- function() { ab8 <- dplyr::bind_rows(ab6, ab7) cases_ab <- append_daily_d(cases_ab, ab8) rm(ab1, ab2, ab3, ab4, ab5, ab6, ab6_pt, ab7, ab8) # clean up + # trim to max date + cases_ab <- max_date(cases_ab, "2023-12-30") ## bc cases_bc <- read_d("raw_data/reports/bc/bc_monthly_report_cumulative.csv") |> @@ -80,6 +82,8 @@ assemble_final_datasets <- function() { cases_nb <- append_daily_d(cases_nb, nb1) cases_nb <- append_daily_d(cases_nb, nb2) rm(nb1, nb2) # cleanup + # trim to max date + cases_nb <- max_date(cases_nb, "2023-12-30") ## nl cases_nl <- dplyr::bind_rows( @@ -104,6 +108,8 @@ assemble_final_datasets <- function() { add_hr_col("Unknown") |> convert_hr_names() ) + # trim to max date + cases_nl <- max_date(cases_nl, "2023-12-30") ## ns ns1 <- read_d("raw_data/static/ns/ns_cases_hr_ts_1.csv") @@ -168,6 +174,8 @@ assemble_final_datasets <- function() { add_hr_col("Prince Edward Island") %>% dplyr::filter(.data$date >= as.Date("2022-06-11")) ) + # trim to max date + cases_pe <- max_date(cases_pe, "2023-12-30") ## qc tryCatch( @@ -203,6 +211,8 @@ assemble_final_datasets <- function() { sub_region_1 = ifelse(.data$sub_region_1 == "Not Assigned", "Unknown", .data$sub_region_1)) cases_sk <- append_daily_d(cases_sk, sk3) rm(sk1, sk2, sk3) # cleanup + # trim to max date + cases_sk <- max_date(cases_sk, "2023-12-30") }, error = function(e) { print(e) @@ -255,6 +265,8 @@ assemble_final_datasets <- function() { .data$name, .data$region, .data$sub_region_1, .data$date, value = .data$value + .data$value2) deaths_ab <- dplyr::bind_rows(deaths_ab, ab4) rm(ab1, ab2, ab2_max, ab3, ab_max, ab4) # clean up + # trim to max date + deaths_ab <- max_date(deaths_ab, "2023-12-30") ## bc deaths_bc <- read_d("raw_data/reports/bc/bc_monthly_report_cumulative.csv") |> @@ -412,6 +424,8 @@ assemble_final_datasets <- function() { convert_hr_names() deaths_on <- append_daily_d(on1, on2) rm(on1, on2) # clean up + # trim to max date + deaths_on <- max_date(deaths_on, "2023-12-30") ## pe deaths_pe <- dplyr::bind_rows( @@ -421,6 +435,8 @@ assemble_final_datasets <- function() { add_hr_col("Prince Edward Island") %>% dplyr::filter(.data$date >= as.Date("2022-06-11")) ) + # trim to max date + deaths_pe <- max_date(deaths_pe, "2023-12-30") ## qc tryCatch( @@ -470,6 +486,8 @@ assemble_final_datasets <- function() { cat("Error in processing pipeline", fill = TRUE) } ) + # trim to max date + deaths_sk <- max_date(deaths_sk, "2023-12-30") ## yt deaths_yt <- dplyr::bind_rows( @@ -509,6 +527,8 @@ assemble_final_datasets <- function() { # remove seemingly erroneous data (unexplained spikes) hospitalizations_bc <- hospitalizations_bc[ !hospitalizations_bc$date %in% as.Date(c("2021-04-01", "2021-05-05", "2022-04-28", "2022-08-18")), ] + # trim to max date + hospitalizations_bc <- max_date(hospitalizations_bc, "2023-12-21") ## mb hospitalizations_mb <- dplyr::bind_rows( @@ -613,6 +633,8 @@ assemble_final_datasets <- function() { # remove seemingly erroneous data (unexplained spikes) icu_bc <- icu_bc[ !icu_bc$date %in% as.Date(c("2021-04-01", "2021-05-05", "2022-04-28", "2022-08-18")), ] + # trim to max date + icu_bc <- max_date(icu_bc, "2023-12-21") ## mb icu_mb <- dplyr::bind_rows( @@ -711,6 +733,8 @@ assemble_final_datasets <- function() { .data$region, .data$date, value = cumsum(.data$value_daily)) + # trim to max date + hosp_admissions_ab <- max_date(hosp_admissions_ab, "2023-12-30") ## bc hosp_admissions_bc <- read_d("raw_data/reports/bc/bc_monthly_report_cumulative.csv") |> @@ -772,6 +796,8 @@ assemble_final_datasets <- function() { .data$region, .data$date, value = cumsum(.data$value_daily)) + # trim to max date + hosp_admissions_on <- max_date(hosp_admissions_on, "2023-12-30") ## pe hosp_admissions_pe <- dplyr::bind_rows( @@ -797,6 +823,8 @@ assemble_final_datasets <- function() { report_pluck("hosp_admissions", "new_hospitalizations", "value_daily", "pt") |> report_recent() ) + # trim to max date + hosp_admissions_sk <- max_date(hosp_admissions_sk, "2023-12-30") ## qc hosp_admissions_qc <- read_d("raw_data/static/qc/qc_hosp_admissions_pt_ts.csv") |> @@ -830,6 +858,8 @@ assemble_final_datasets <- function() { .data$region, .data$date, value = cumsum(.data$value_daily)) + # trim to max date + icu_admissions_ab <- max_date(icu_admissions_ab, "2023-12-30") ## bc icu_admissions_bc <- read_d("raw_data/reports/bc/bc_monthly_report_cumulative.csv") |> @@ -891,6 +921,8 @@ assemble_final_datasets <- function() { report_pluck("icu_admissions", "new_icu", "value_daily", "pt") |> report_recent() ) + # trim to max date + icu_admissions_sk <- max_date(icu_admissions_sk, "2023-12-30") ## collate and process final dataset suppressWarnings(rm(icu_admissions_pt)) # if re-running manually @@ -990,6 +1022,8 @@ assemble_final_datasets <- function() { # add ON back to main dataset tests_completed_pt <- dplyr::bind_rows(tests_completed_pt, on3) rm(on1, on2, on3) # clean up + # trim to max date + tests_completed_pt <- max_date(tests_completed_pt, "2023-12-30") ## add PE data tests_completed_pt <- dplyr::bind_rows( diff --git a/R/process_funs.R b/R/process_funs.R index 53bc69d..d44b65d 100644 --- a/R/process_funs.R +++ b/R/process_funs.R @@ -19,6 +19,7 @@ #' @param geo The geographic level of the data. One of "pt", "hr", "sub-hr". #' @param d1 Dataset to append to. A cumulative value dataset. #' @param d2 Dataset being appended. A daily value dataset. +#' @param max_date The maximum date to trim the output dataset to. #' #' @name process_funs NULL @@ -214,6 +215,24 @@ report_recent <- function(d) { ) } +#' Filter a dataset to a maximum date +#' +#' @rdname process_funs +#' +#' @export +max_date <- function(d, max_date) { + tryCatch( + { + d %>% + dplyr::filter(.data$date <= as.Date(max_date)) + }, + error = function(e) { + print(e) + cat("Error in max_date", fill = TRUE) + } + ) +} + #' Append a daily value dataset to a cumulative value dataset #' #' @rdname process_funs @@ -400,6 +419,8 @@ agg2can <- function(d) { agg2can_completeness <- function(d) { tryCatch( { + # limit to maximum date (2023-12-31) + d <- d[d$date <= as.Date("2023-12-31"), ] # get region values for each date out_pt <- split(d$region, d$date) # count number of regions for each date diff --git a/R/utils.R b/R/utils.R index 65e4a2d..8aa6fa9 100644 --- a/R/utils.R +++ b/R/utils.R @@ -193,6 +193,10 @@ get_phac_d <- function(val, region, exclude_repatriated = TRUE, keep_up_to_date warning("keep_up_to_date = TRUE is not supported with this value, ignoring...") } } + # filter weekly data to max date (2023-12-30) + if (val %in% c("cases", "deaths", "tests_completed", "tests_completed_rvdss")) { + d <- d[d$date <= as.Date("2023-12-30"), ] + } # return data d }, diff --git a/man/process_funs.Rd b/man/process_funs.Rd index 2a39f5c..8984319 100644 --- a/man/process_funs.Rd +++ b/man/process_funs.Rd @@ -9,6 +9,7 @@ \alias{add_as_of_date} \alias{report_pluck} \alias{report_recent} +\alias{max_date} \alias{append_daily_d} \alias{collate_datasets} \alias{add_hr_col} @@ -32,6 +33,8 @@ report_pluck(d, name, val, out_col, geo) report_recent(d) +max_date(d, max_date) + append_daily_d(d1, d2) collate_datasets(val) @@ -63,6 +66,8 @@ dataset_format(d, geo = c("pt", "hr", "sub-hr"), digits) \item{geo}{The geographic level of the data. One of "pt", "hr", "sub-hr".} +\item{max_date}{The maximum date to trim the output dataset to.} + \item{d1}{Dataset to append to. A cumulative value dataset.} \item{d2}{Dataset being appended. A daily value dataset.}