Skip to content

COVID-19 Data

Latest
Compare
Choose a tag to compare
@jfree-man jfree-man released this 19 Aug 19:39
· 317 commits to main since this release
9adba35

Data release

Data was sourced and prepped using the following script.

library(dplyr)
library(tidyr)

## Data Sources
covid_canada_url <- "https://wzmli.github.io/COVID19-Canada/git_push/clean.Rout.csv"
google_url <- "https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv"
apple_url <- "https://raw.githubusercontent.com/ActiveConclusion/COVID19_mobility/master/apple_reports/applemobilitytrends.csv"

covid_ca <- read.csv(covid_canada_url)
apple <- read.csv(apple_url,check.names=FALSE)
google <- read.csv(google_url)
macpan1.5_calibration <- readRDS(url("https://github.com/canmod/macpan2/raw/a06e9195d93dc4fa22a9d747607eecb003546144/misc/experiments/wastewater/macpan1-5_comparison_info.RDS"))

## Data Prep
covid_on <- (covid_ca
  %>% filter(Province=="ON")
  %>% select(Province,Date,Hospitalization,ICU,Ventilator,deceased,newConfirmations,newTests)
  %>% mutate(newDeaths=c(NA,diff(deceased))
             ## ON hosp includes ICU, macpan_base model uses only acute care
             , Hospitalization=Hospitalization-ICU)
  %>% select(-deceased)
  %>% pivot_longer(names_to="var",-c(Date,Province))
  %>% setNames(tolower(names(.)))
  %>% ungroup()
  %>% mutate(var=if_else(var=="newConfirmations","report",if_else(var=="newDeaths","death",var)))
)

mobility = (
  (apple
   %>% filter(alternative_name == "ON", transportation_type == "driving")
   %>% pivot_longer(cols=-c("geo_type","region","transportation_type","alternative_name","sub-region","country")
                    , names_to="date",names_transform = as.Date)
   # create relative percent change (to match google data)
   %>% mutate(value = value - 100)
   %>% select(date, value)
  ) 
  %>% full_join(google
                %>% filter(iso_3166_2_code == "CA-ON")
                %>% mutate(date = as.Date(date))
                %>% select(date,starts_with("retail_and_recreation"),starts_with("workplaces"))
  )
  %>% arrange(date)
  # compute 7 day moving average
  %>% mutate(across(where(is.numeric),~ stats::filter(.x, filter = rep(1/7, 7), sides = 2)))
  # scale to have pre-pandemic value of 1
  %>% mutate(across(where(is.numeric), ~ 1 + (.x/100)))
  # compute average of all mobility values
  %>% group_by(date)
  %>% summarize(mobility_ind = mean(c_across(where(is.numeric)),na.rm = TRUE))
  %>% ungroup()
  %>% na.omit()
)

# combine covid data from all sources
covid_on = (covid_on
  %>% mutate(date = as.Date(date))
  # add wastewater data (reported incidence is already included in covid_on)
  %>% bind_rows((macpan1.5_calibration$obs %>% filter(var == "W")))
  # add mobility data
  %>% bind_rows((mobility %>% rename(value = mobility_ind ) %>% mutate(var = "mobility_index")))
)

# remove observed data from RDS object (this is included in covid_on), remaining elements are from macpan 1.5 calibration
covid_on_macpan1.5_calibration = within(macpan1.5_calibration, rm(obs))

## Final Datasets: 
# covid_on
# covid_ww_macpan1.5_calibration