From ee311a39468b5c147402f9c29687efa6af75f104 Mon Sep 17 00:00:00 2001 From: Michelle Kendall Date: Mon, 17 Aug 2020 18:14:17 +0100 Subject: [PATCH] Updating with new folder structure --- TT_analysis_1.Rmd | 336 ++++++++++++++++++++++++++++++++++ TT_analysis_1.md | 446 ++++++++++++++++++++++++++++++++++++++++++++++ TT_analysis_2.Rmd | 6 +- TT_analysis_2.md | 6 +- 4 files changed, 788 insertions(+), 6 deletions(-) create mode 100644 TT_analysis_1.Rmd create mode 100644 TT_analysis_1.md diff --git a/TT_analysis_1.Rmd b/TT_analysis_1.Rmd new file mode 100644 index 0000000..f7e8c49 --- /dev/null +++ b/TT_analysis_1.Rmd @@ -0,0 +1,336 @@ +# Maximum-Likelihood estimation of R +### Michelle Kendall and Christophe Fraser + +```{r setup, include=FALSE} +knitr::opts_chunk$set(fig.width=10, fig.height=7, fig.cap="", cache=FALSE, echo = TRUE) + +``` + +This code performs a Maximum-Likelihood estimation of R over fixed time periods before and after the Test and Trace programme, as reported in our [preprint](https://www.medrxiv.org/content/10.1101/2020.07.12.20151753v1.article-info). + +## Setup + +### Load packages + +```{r load.packages, message=FALSE} +library(EpiEstim) +library(incidence) +library(ggplot2) +library(stats4) +library(tidyverse) +library(RColorBrewer) +``` + +## Setup + +### Load data + +From the working directory, fetch the daily case data: + +```{r load.data, message=FALSE} +dat.UK.p1 <- read_csv("data/pillar1_case_data.csv") # local area pillar 1 data +dat.UK.p2 <- read.csv("data/pillar2_case_data.csv") # national pillars data including pillar 2 +dat.IoW.p2 <- read.csv("data/pillar2_case_data_IoW.csv") # Isle of Wight pillar 2 data +dat.UK.both <- read.csv("data/combined_pillars_case_data.csv") # local combined pillars data +``` + +and the population size data: + +```{r load.pop.data} +population.data <- read.csv("data/population_by_region.csv", stringsAsFactors = FALSE) +``` + +### Cleaning data + +Extract data for England and Isle of Wight (Pillar 1) + +```{r cleaning} +# Extract relevant data: +dat.Eng.p1 <- dat.UK.p1 %>% filter(Country == "England") # England only +dat.IoW.p1 <- dat.Eng.p1 %>% filter(Area == "Isle of Wight") # IoW only + +dat.Eng.both <- dat.UK.both %>% filter(`Area.type` == "Upper tier local authority") # UTLAs only +dat.Eng.both.clean <- dat.Eng.both %>% select("Date"=`Specimen.date`, "Area"=`Area.name`, "Cases"=`Daily.lab.confirmed.cases`) # only the necessary columns +dat.Eng.both.clean$Date <- as.Date(dat.Eng.both.clean$Date) # restore "date" property + +``` + + +## Analysis + +Calculate incidence for each area and overall: + +```{r analysis.both.1, message=FALSE, warning=FALSE} +areas.alphabetical <- sort(unique(dat.Eng.both.clean$Area)) # get a list of utlas in alphabetical order +IoW.index <- which(areas.alphabetical == "Isle of Wight") + +earliest.date <- min(as.Date(dat.Eng.both.clean$Date)) +last.date <- max(as.Date(dat.Eng.both.clean$Date)) +all.dates <- seq(earliest.date, last.date, by=1) + +# Get incidence by area, filling in any missing dates with 0 cases +incidence.each.area <- sapply(areas.alphabetical, function(area) { + dat.area <- dat.Eng.both.clean %>% filter(Area == area) + + dat.area <- rbind.data.frame(dat.area, + cbind.data.frame("Date" = as.Date(setdiff(all.dates, dat.area$Date), origin = "1970-01-01"), + "Area" = area, + "Cases" = 0) + ) + + dat.area$Cases[order(dat.area$Date)] + +}) + +# extract the incidence table for the Isle of Wight +IoW.incidence <- cbind.data.frame( + "Date" = all.dates, + "Cases" = incidence.each.area[,IoW.index] +) + +# and overall for England: +total.incidence <- cbind.data.frame( + "Date" = all.dates, + "Cases" = sapply(1:nrow(incidence.each.area), function(d) sum(incidence.each.area[d,])) +) + +# these can be visualised for example like this: +TotalIncidencePlot <- ggplot(total.incidence, aes(x=Date, y=Cases)) + + geom_col(fill= brewer.pal(6,"Set2")[[3]]) + + scale_x_date(date_breaks = "2 weeks" , date_labels = "%b %d", limits=c(as.Date("2020-04-01"), last.date - 7)) + + ylab("Daily confirmed COVID-19\ncases by swab date") + + theme_bw(base_size = 16) + +TotalIncidencePlot +``` + +For England and for the Isle of Wight create their incidence line lists and `incidence` objects using data from 5 days after the TT launch and ending 5 days before the end (to allow for missing data as cases where there are delays in reporting cases.) + +```{r analysis.both.2, warning=FALSE, message=FALSE} +start.date.IoW.both <- as.Date("2020-05-10") +start.date.Eng.both <- as.Date("2020-05-23") +end.date.both <- last.date - 5 + +IoW.both.linelist <- IoW.incidence$Date[1] +for(row in 1:nrow(IoW.incidence)){ + if(IoW.incidence$Cases[row]>0){ + for(case in 1:IoW.incidence$Cases[row]){ + IoW.both.linelist <- c(IoW.both.linelist, IoW.incidence$Date[row]) + } + } +} +IoW.both.linelist <- IoW.both.linelist[-1] + +total.incidence$Cases[which(is.na(total.incidence$Cases))] <- 0 +Eng.both.linelist <- total.incidence$Date[1] +for(row in 1:nrow(total.incidence)){ + if(total.incidence$Cases[row]>0){ + Eng.both.linelist <- c(Eng.both.linelist, rep(total.incidence$Date[row], total.incidence$Cases[row])) + } +} +Eng.both.linelist <- Eng.both.linelist[-1] + +IoW.both.incidence <- incidence(IoW.both.linelist, + first_date = start.date.IoW.both, + last_date = end.date.both, + standard = FALSE) + +Eng.both.incidence <- incidence(Eng.both.linelist, + first_date = start.date.Eng.both, + last_date = end.date.both, + standard = FALSE) + + +``` + +Prepare log-likelihood function: +```{r analysis.ll} +minusloglik <- function(i0, r, start.date, incidence.dates, incidence.counts) { + ll <- 0 + for(row in 1:length(incidence.dates)){ + model <- i0*exp(-r*as.numeric(difftime(start.date,incidence.dates[row],units=c("days")))) + ll <- ll + model - incidence.counts[row]*log(model) + } + return(ll) +} +``` + +Get Maximum-Likelihood estimates: + +```{r analysis.both.mle, warning=FALSE} +fit.mle.IoW.both <- mle(minusloglik, + start=list(i0=10, r=-0.01), + fixed=list(start.date=start.date.IoW.both, + incidence.dates=IoW.both.incidence$dates, + incidence.counts=IoW.both.incidence$counts) +) +summary(fit.mle.IoW.both) + +fit.mle.Eng.both <- mle(minusloglik, + start=list(i0=1420, r=-0.02), + fixed=list(start.date=start.date.Eng.both, + incidence.dates=Eng.both.incidence$dates, + incidence.counts=Eng.both.incidence$counts)) +summary(fit.mle.Eng.both) + +``` + + +Similarly for Pillar 1 alone: +```{r analysis.p1.1, message=FALSE, warning=FALSE} +dat.IoW.p1$Incidence <- rep(0,nrow(dat.IoW.p1)) +dat.IoW.p1$Incidence[1] <- dat.IoW.p1$TotalCases[1] +for(row in 2:nrow(dat.IoW.p1)) { + dat.IoW.p1$Incidence[row] <- dat.IoW.p1$TotalCases[row] - dat.IoW.p1$TotalCases[row-1] +} + + +dat.Eng.p1 <- dat.UK.p1 %>% filter(Country == "England") + +incidence.all.areas.p1 <- sapply(areas.alphabetical, function(area) { + dat.area <- dat.Eng.p1 %>% filter(Area == area) + + new.dat.area <- data.frame("Date" = unique(dat.Eng.p1$Date)) + new.dat.area$TotalCases <- sapply(new.dat.area$Date, function(d) (dat.area %>% filter(Date == d))$TotalCases) + # fill in the blanks + if (length(new.dat.area$TotalCases[[1]])==0) new.dat.area$TotalCases[[1]] <- 0 + for (i in 1:nrow(new.dat.area)) { + if (length(new.dat.area$TotalCases[[i]])==0) new.dat.area$TotalCases[[i]] <- new.dat.area$TotalCases[[i-1]] + } + + new.dat.area$Incidence <- rep(0,nrow(new.dat.area)) + new.dat.area$Incidence[[1]] <- new.dat.area$TotalCases[[1]] + for(row in 2:nrow(new.dat.area)) new.dat.area$Incidence[[row]] <- new.dat.area$TotalCases[[row]] - new.dat.area$TotalCases[[row-1]] + + new.dat.area$Incidence +}) + +Eng.Pillar1 <- cbind.data.frame( + "Date" = unique(dat.Eng.p1$Date), + "Incidence" = sapply(1:nrow(incidence.all.areas.p1), function(row) { + sum(incidence.all.areas.p1[row,], na.rm=TRUE) + }) +) + + +# linelists +IoW.p1.linelist <- dat.IoW.p1$Date[1] +for(row in 1:nrow(dat.IoW.p1)){ + if(dat.IoW.p1$Incidence[row]>0){ + for(case in 1:dat.IoW.p1$Incidence[row]){ + IoW.p1.linelist <- c(IoW.p1.linelist, dat.IoW.p1$Date[row]) + } + } +} +IoW.p1.linelist <- IoW.p1.linelist[-1] + +Eng.p1.linelist <- Eng.Pillar1$Date[1] +for(row in 1:nrow(Eng.Pillar1)){ + if ((!is.na(Eng.Pillar1$Incidence[row]))&&(Eng.Pillar1$Incidence[row]>0)){ + Eng.p1.linelist <- c(Eng.p1.linelist, rep(Eng.Pillar1$Date[row], Eng.Pillar1$Incidence[row])) + } +} +Eng.p1.linelist <- Eng.p1.linelist[-1] + +``` + +And Pillar 2: +```{r analysis.p2.1, message=FALSE, warning=FALSE} +dat.IoW.p2$specimen_date <- as.Date(as.character(dat.IoW.p2$specimen_date), "%d-%b-%y") +dat.IoW.p2$Pillar2 <- sapply(dat.IoW.p2$Pillar2, function(x)(if(!is.na(x)) return(x) else return(0))) + +dat.UK.p2$Date.of.activity <- as.Date(dat.UK.p2$Date.of.activity, "%d/%m/%y") +UKPillar2 <- dat.UK.p2 %>% filter(Pillar == "Pillar 2 (excluding Wales)") +stopifnot(unique(UKPillar2$Nation) == "UK") # check we've just got the UK entries +UKPillar2$Daily.number.of.positive.cases <- sapply(UKPillar2$Daily.number.of.positive.cases, function(x)(if(!is.na(x)) return(x) else return(0))) + +IoW.p2.linelist <- dat.IoW.p2$specimen_date[1] +for(row in 1:nrow(dat.IoW.p2)){ + if(dat.IoW.p2$Pillar2[row]>0){ + for(case in 1:dat.IoW.p2$Pillar2[row]){ + IoW.p2.linelist <- c(IoW.p2.linelist, dat.IoW.p2$specimen_date[row]) + } + } +} +IoW.p2.linelist <- IoW.p2.linelist[-1] + +UK.p2.linelist <- UKPillar2$Date.of.activity[1] +for(row in 1:nrow(UKPillar2)){ + if(UKPillar2$Daily.number.of.positive.cases[row]>0){ + # this is faster: + UK.p2.linelist <- c(UK.p2.linelist, rep(UKPillar2$Date.of.activity[row], UKPillar2$Daily.number.of.positive.cases[row])) + # for(case in 1:UKPillar2$Daily.number.of.positive.cases[row]){ + # UK.linelist <- c(UK.linelist, UKPillar2$Date.of.activity[row]) + # } + } +} +UK.p2.linelist <- UK.p2.linelist[-1] +``` + + +Get `incidence` objects and Maximum-Likelihood estimates for the individual pillars data, starting 10 days after TT launch for Pillar 1 and 5 days after for Pillar 2 (to reflect the different typical times between symptom onset and swab). + +```{r incidence.p1.and.p2, message=FALSE, warning=FALSE} +start.date.IoW.p1 <- as.Date("2020-05-15") +start.date.IoW.p2 <- as.Date("2020-05-10") +start.date.Eng.p1 <- as.Date("2020-05-28") +start.date.UK.p2 <- as.Date("2020-05-23") + +end.date.IoW.p1 <- max(Eng.Pillar1$Date) - 5 +end.date.Eng.p1 <- max(Eng.Pillar1$Date) - 5 +end.date.UK.p2 <- max(UKPillar2$Date.of.activity, na.rm = TRUE) - 5 +end.date.IoW.p2 <- end.date.UK.p2 # there were no more cases after the 13th + +IoW.p1.incidence <- incidence(IoW.p1.linelist, + first_date = start.date.IoW.p1, + last_date = end.date.IoW.p1, + standard = FALSE) + +Eng.p1.incidence <- incidence(Eng.p1.linelist, + first_date = start.date.Eng.p1, + last_date = end.date.Eng.p1, + standard = FALSE) + +IoW.p2.incidence <- incidence(IoW.p2.linelist, + first_date = start.date.IoW.p2, + last_date = end.date.IoW.p2, + standard = FALSE) + +UK.p2.incidence <- incidence(UK.p2.linelist, + first_date = start.date.UK.p2, + last_date = end.date.UK.p2, + standard = FALSE) +``` + +```{r mle.p1.and.p2, warning=FALSE} +fit.mle.IoW.p1 <- mle(minusloglik, + start=list(i0=10, r=-0.01), + fixed=list(start.date=start.date.IoW.p1, + incidence.dates=IoW.p1.incidence$dates, + incidence.counts=IoW.p1.incidence$counts) +) +summary(fit.mle.IoW.p1) + +fit.mle.Eng.p1 <- mle(minusloglik, + start=list(i0=10, r=-0.03), + fixed=list(start.date=start.date.Eng.p1, + incidence.dates=Eng.p1.incidence$dates, + incidence.counts=Eng.p1.incidence$counts)) +summary(fit.mle.Eng.p1) + +fit.mle.IoW.p2 <- mle(minusloglik, + start=list(i0=10, r=-0.01), + fixed=list(start.date=start.date.IoW.p2, + incidence.dates=IoW.p2.incidence$dates, + incidence.counts=IoW.p2.incidence$counts) + ) +summary(fit.mle.IoW.p2) + +fit.mle.UK.p2 <- mle(minusloglik, + start=list(i0=10, r=-0.02), + fixed=list(start.date=start.date.UK.p2, + incidence.dates=UK.p2.incidence$dates, + incidence.counts=UK.p2.incidence$counts)) +summary(fit.mle.UK.p2) +``` + diff --git a/TT_analysis_1.md b/TT_analysis_1.md new file mode 100644 index 0000000..89258df --- /dev/null +++ b/TT_analysis_1.md @@ -0,0 +1,446 @@ +# Maximum-Likelihood estimation of R +### Michelle Kendall and Christophe Fraser + + + +This code performs a Maximum-Likelihood estimation of R over fixed time periods before and after the Test and Trace programme, as reported in our [preprint](https://www.medrxiv.org/content/10.1101/2020.07.12.20151753v1.article-info). + +## Setup + +### Load packages + + +```r +library(EpiEstim) +library(incidence) +library(ggplot2) +library(stats4) +library(tidyverse) +library(RColorBrewer) +``` + +## Setup + +### Load data + +From the working directory, fetch the daily case data: + + +```r +dat.UK.p1 <- read_csv("data/pillar1_case_data.csv") # local area pillar 1 data +dat.UK.p2 <- read.csv("data/pillar2_case_data.csv") # national pillars data including pillar 2 +dat.IoW.p2 <- read.csv("data/pillar2_case_data_IoW.csv") # Isle of Wight pillar 2 data +dat.UK.both <- read.csv("data/combined_pillars_case_data.csv") # local combined pillars data +``` + +and the population size data: + + +```r +population.data <- read.csv("data/population_by_region.csv", stringsAsFactors = FALSE) +``` + +### Cleaning data + +Extract data for England and Isle of Wight (Pillar 1) + + +```r +# Extract relevant data: +dat.Eng.p1 <- dat.UK.p1 %>% filter(Country == "England") # England only +dat.IoW.p1 <- dat.Eng.p1 %>% filter(Area == "Isle of Wight") # IoW only + +dat.Eng.both <- dat.UK.both %>% filter(`Area.type` == "Upper tier local authority") # UTLAs only +dat.Eng.both.clean <- dat.Eng.both %>% select("Date"=`Specimen.date`, "Area"=`Area.name`, "Cases"=`Daily.lab.confirmed.cases`) # only the necessary columns +dat.Eng.both.clean$Date <- as.Date(dat.Eng.both.clean$Date) # restore "date" property +``` + + +## Analysis + +Calculate incidence for each area and overall: + + +```r +areas.alphabetical <- sort(unique(dat.Eng.both.clean$Area)) # get a list of utlas in alphabetical order +IoW.index <- which(areas.alphabetical == "Isle of Wight") + +earliest.date <- min(as.Date(dat.Eng.both.clean$Date)) +last.date <- max(as.Date(dat.Eng.both.clean$Date)) +all.dates <- seq(earliest.date, last.date, by=1) + +# Get incidence by area, filling in any missing dates with 0 cases +incidence.each.area <- sapply(areas.alphabetical, function(area) { + dat.area <- dat.Eng.both.clean %>% filter(Area == area) + + dat.area <- rbind.data.frame(dat.area, + cbind.data.frame("Date" = as.Date(setdiff(all.dates, dat.area$Date), origin = "1970-01-01"), + "Area" = area, + "Cases" = 0) + ) + + dat.area$Cases[order(dat.area$Date)] + +}) + +# extract the incidence table for the Isle of Wight +IoW.incidence <- cbind.data.frame( + "Date" = all.dates, + "Cases" = incidence.each.area[,IoW.index] +) + +# and overall for England: +total.incidence <- cbind.data.frame( + "Date" = all.dates, + "Cases" = sapply(1:nrow(incidence.each.area), function(d) sum(incidence.each.area[d,])) +) + +# these can be visualised for example like this: +TotalIncidencePlot <- ggplot(total.incidence, aes(x=Date, y=Cases)) + + geom_col(fill= brewer.pal(6,"Set2")[[3]]) + + scale_x_date(date_breaks = "2 weeks" , date_labels = "%b %d", limits=c(as.Date("2020-04-01"), last.date - 7)) + + ylab("Daily confirmed COVID-19\ncases by swab date") + + theme_bw(base_size = 16) + +TotalIncidencePlot +``` + +![](figure/analysis.both.1-1.png) + +For England and for the Isle of Wight create their incidence line lists and `incidence` objects using data from 5 days after the TT launch and ending 5 days before the end (to allow for missing data as cases where there are delays in reporting cases.) + + +```r +start.date.IoW.both <- as.Date("2020-05-10") +start.date.Eng.both <- as.Date("2020-05-23") +end.date.both <- last.date - 5 + +IoW.both.linelist <- IoW.incidence$Date[1] +for(row in 1:nrow(IoW.incidence)){ + if(IoW.incidence$Cases[row]>0){ + for(case in 1:IoW.incidence$Cases[row]){ + IoW.both.linelist <- c(IoW.both.linelist, IoW.incidence$Date[row]) + } + } +} +IoW.both.linelist <- IoW.both.linelist[-1] + +total.incidence$Cases[which(is.na(total.incidence$Cases))] <- 0 +Eng.both.linelist <- total.incidence$Date[1] +for(row in 1:nrow(total.incidence)){ + if(total.incidence$Cases[row]>0){ + Eng.both.linelist <- c(Eng.both.linelist, rep(total.incidence$Date[row], total.incidence$Cases[row])) + } +} +Eng.both.linelist <- Eng.both.linelist[-1] + +IoW.both.incidence <- incidence(IoW.both.linelist, + first_date = start.date.IoW.both, + last_date = end.date.both, + standard = FALSE) + +Eng.both.incidence <- incidence(Eng.both.linelist, + first_date = start.date.Eng.both, + last_date = end.date.both, + standard = FALSE) +``` + +Prepare log-likelihood function: + +```r +minusloglik <- function(i0, r, start.date, incidence.dates, incidence.counts) { + ll <- 0 + for(row in 1:length(incidence.dates)){ + model <- i0*exp(-r*as.numeric(difftime(start.date,incidence.dates[row],units=c("days")))) + ll <- ll + model - incidence.counts[row]*log(model) + } + return(ll) +} +``` + +Get Maximum-Likelihood estimates: + + +```r +fit.mle.IoW.both <- mle(minusloglik, + start=list(i0=10, r=-0.01), + fixed=list(start.date=start.date.IoW.both, + incidence.dates=IoW.both.incidence$dates, + incidence.counts=IoW.both.incidence$counts) +) +summary(fit.mle.IoW.both) +``` + +``` +## Maximum likelihood estimation +## +## Call: +## mle(minuslogl = minusloglik, start = list(i0 = 10, r = -0.01), +## fixed = list(start.date = start.date.IoW.both, incidence.dates = IoW.both.incidence$dates, +## incidence.counts = IoW.both.incidence$counts)) +## +## Coefficients: +## Estimate Std. Error +## i0 9.62302144 1.236653064 +## r -0.07209213 0.008048002 +## +## -2 log L: -110.3964 +``` + +```r +fit.mle.Eng.both <- mle(minusloglik, + start=list(i0=1420, r=-0.02), + fixed=list(start.date=start.date.Eng.both, + incidence.dates=Eng.both.incidence$dates, + incidence.counts=Eng.both.incidence$counts)) +summary(fit.mle.Eng.both) +``` + +``` +## Maximum likelihood estimation +## +## Call: +## mle(minuslogl = minusloglik, start = list(i0 = 1420, r = -0.02), +## fixed = list(start.date = start.date.Eng.both, incidence.dates = Eng.both.incidence$dates, +## incidence.counts = Eng.both.incidence$counts)) +## +## Coefficients: +## Estimate Std. Error +## i0 1420.15949911 1.35301e+01 +## r -0.02283999 5.38094e-04 +## +## -2 log L: -410782.2 +``` + + +Similarly for Pillar 1 alone: + +```r +dat.IoW.p1$Incidence <- rep(0,nrow(dat.IoW.p1)) +dat.IoW.p1$Incidence[1] <- dat.IoW.p1$TotalCases[1] +for(row in 2:nrow(dat.IoW.p1)) { + dat.IoW.p1$Incidence[row] <- dat.IoW.p1$TotalCases[row] - dat.IoW.p1$TotalCases[row-1] +} + + +dat.Eng.p1 <- dat.UK.p1 %>% filter(Country == "England") + +incidence.all.areas.p1 <- sapply(areas.alphabetical, function(area) { + dat.area <- dat.Eng.p1 %>% filter(Area == area) + + new.dat.area <- data.frame("Date" = unique(dat.Eng.p1$Date)) + new.dat.area$TotalCases <- sapply(new.dat.area$Date, function(d) (dat.area %>% filter(Date == d))$TotalCases) + # fill in the blanks + if (length(new.dat.area$TotalCases[[1]])==0) new.dat.area$TotalCases[[1]] <- 0 + for (i in 1:nrow(new.dat.area)) { + if (length(new.dat.area$TotalCases[[i]])==0) new.dat.area$TotalCases[[i]] <- new.dat.area$TotalCases[[i-1]] + } + + new.dat.area$Incidence <- rep(0,nrow(new.dat.area)) + new.dat.area$Incidence[[1]] <- new.dat.area$TotalCases[[1]] + for(row in 2:nrow(new.dat.area)) new.dat.area$Incidence[[row]] <- new.dat.area$TotalCases[[row]] - new.dat.area$TotalCases[[row-1]] + + new.dat.area$Incidence +}) + +Eng.Pillar1 <- cbind.data.frame( + "Date" = unique(dat.Eng.p1$Date), + "Incidence" = sapply(1:nrow(incidence.all.areas.p1), function(row) { + sum(incidence.all.areas.p1[row,], na.rm=TRUE) + }) +) + + +# linelists +IoW.p1.linelist <- dat.IoW.p1$Date[1] +for(row in 1:nrow(dat.IoW.p1)){ + if(dat.IoW.p1$Incidence[row]>0){ + for(case in 1:dat.IoW.p1$Incidence[row]){ + IoW.p1.linelist <- c(IoW.p1.linelist, dat.IoW.p1$Date[row]) + } + } +} +IoW.p1.linelist <- IoW.p1.linelist[-1] + +Eng.p1.linelist <- Eng.Pillar1$Date[1] +for(row in 1:nrow(Eng.Pillar1)){ + if ((!is.na(Eng.Pillar1$Incidence[row]))&&(Eng.Pillar1$Incidence[row]>0)){ + Eng.p1.linelist <- c(Eng.p1.linelist, rep(Eng.Pillar1$Date[row], Eng.Pillar1$Incidence[row])) + } +} +Eng.p1.linelist <- Eng.p1.linelist[-1] +``` + +And Pillar 2: + +```r +dat.IoW.p2$specimen_date <- as.Date(as.character(dat.IoW.p2$specimen_date), "%d-%b-%y") +dat.IoW.p2$Pillar2 <- sapply(dat.IoW.p2$Pillar2, function(x)(if(!is.na(x)) return(x) else return(0))) + +dat.UK.p2$Date.of.activity <- as.Date(dat.UK.p2$Date.of.activity, "%d/%m/%y") +UKPillar2 <- dat.UK.p2 %>% filter(Pillar == "Pillar 2 (excluding Wales)") +stopifnot(unique(UKPillar2$Nation) == "UK") # check we've just got the UK entries +UKPillar2$Daily.number.of.positive.cases <- sapply(UKPillar2$Daily.number.of.positive.cases, function(x)(if(!is.na(x)) return(x) else return(0))) + +IoW.p2.linelist <- dat.IoW.p2$specimen_date[1] +for(row in 1:nrow(dat.IoW.p2)){ + if(dat.IoW.p2$Pillar2[row]>0){ + for(case in 1:dat.IoW.p2$Pillar2[row]){ + IoW.p2.linelist <- c(IoW.p2.linelist, dat.IoW.p2$specimen_date[row]) + } + } +} +IoW.p2.linelist <- IoW.p2.linelist[-1] + +UK.p2.linelist <- UKPillar2$Date.of.activity[1] +for(row in 1:nrow(UKPillar2)){ + if(UKPillar2$Daily.number.of.positive.cases[row]>0){ + # this is faster: + UK.p2.linelist <- c(UK.p2.linelist, rep(UKPillar2$Date.of.activity[row], UKPillar2$Daily.number.of.positive.cases[row])) + # for(case in 1:UKPillar2$Daily.number.of.positive.cases[row]){ + # UK.linelist <- c(UK.linelist, UKPillar2$Date.of.activity[row]) + # } + } +} +UK.p2.linelist <- UK.p2.linelist[-1] +``` + + +Get `incidence` objects and Maximum-Likelihood estimates for the individual pillars data, starting 10 days after TT launch for Pillar 1 and 5 days after for Pillar 2 (to reflect the different typical times between symptom onset and swab). + + +```r +start.date.IoW.p1 <- as.Date("2020-05-15") +start.date.IoW.p2 <- as.Date("2020-05-10") +start.date.Eng.p1 <- as.Date("2020-05-28") +start.date.UK.p2 <- as.Date("2020-05-23") + +end.date.IoW.p1 <- max(Eng.Pillar1$Date) - 5 +end.date.Eng.p1 <- max(Eng.Pillar1$Date) - 5 +end.date.UK.p2 <- max(UKPillar2$Date.of.activity, na.rm = TRUE) - 5 +end.date.IoW.p2 <- end.date.UK.p2 # there were no more cases after the 13th + +IoW.p1.incidence <- incidence(IoW.p1.linelist, + first_date = start.date.IoW.p1, + last_date = end.date.IoW.p1, + standard = FALSE) + +Eng.p1.incidence <- incidence(Eng.p1.linelist, + first_date = start.date.Eng.p1, + last_date = end.date.Eng.p1, + standard = FALSE) + +IoW.p2.incidence <- incidence(IoW.p2.linelist, + first_date = start.date.IoW.p2, + last_date = end.date.IoW.p2, + standard = FALSE) + +UK.p2.incidence <- incidence(UK.p2.linelist, + first_date = start.date.UK.p2, + last_date = end.date.UK.p2, + standard = FALSE) +``` + + +```r +fit.mle.IoW.p1 <- mle(minusloglik, + start=list(i0=10, r=-0.01), + fixed=list(start.date=start.date.IoW.p1, + incidence.dates=IoW.p1.incidence$dates, + incidence.counts=IoW.p1.incidence$counts) +) +summary(fit.mle.IoW.p1) +``` + +``` +## Maximum likelihood estimation +## +## Call: +## mle(minuslogl = minusloglik, start = list(i0 = 10, r = -0.01), +## fixed = list(start.date = start.date.IoW.p1, incidence.dates = IoW.p1.incidence$dates, +## incidence.counts = IoW.p1.incidence$counts)) +## +## Coefficients: +## Estimate Std. Error +## i0 3.6521234 0.92358594 +## r -0.1245396 0.02442024 +## +## -2 log L: 37.98268 +``` + +```r +fit.mle.Eng.p1 <- mle(minusloglik, + start=list(i0=10, r=-0.03), + fixed=list(start.date=start.date.Eng.p1, + incidence.dates=Eng.p1.incidence$dates, + incidence.counts=Eng.p1.incidence$counts)) +summary(fit.mle.Eng.p1) +``` + +``` +## Maximum likelihood estimation +## +## Call: +## mle(minuslogl = minusloglik, start = list(i0 = 10, r = -0.03), +## fixed = list(start.date = start.date.Eng.p1, incidence.dates = Eng.p1.incidence$dates, +## incidence.counts = Eng.p1.incidence$counts)) +## +## Coefficients: +## Estimate Std. Error +## i0 417.30047597 8.265965851 +## r -0.03182554 0.001423115 +## +## -2 log L: -73446.41 +``` + +```r +fit.mle.IoW.p2 <- mle(minusloglik, + start=list(i0=10, r=-0.01), + fixed=list(start.date=start.date.IoW.p2, + incidence.dates=IoW.p2.incidence$dates, + incidence.counts=IoW.p2.incidence$counts) + ) +summary(fit.mle.IoW.p2) +``` + +``` +## Maximum likelihood estimation +## +## Call: +## mle(minuslogl = minusloglik, start = list(i0 = 10, r = -0.01), +## fixed = list(start.date = start.date.IoW.p2, incidence.dates = IoW.p2.incidence$dates, +## incidence.counts = IoW.p2.incidence$counts)) +## +## Coefficients: +## Estimate Std. Error +## i0 4.90650182 0.85101721 +## r -0.05320915 0.01030546 +## +## -2 log L: 17.73932 +``` + +```r +fit.mle.UK.p2 <- mle(minusloglik, + start=list(i0=10, r=-0.02), + fixed=list(start.date=start.date.UK.p2, + incidence.dates=UK.p2.incidence$dates, + incidence.counts=UK.p2.incidence$counts)) +summary(fit.mle.UK.p2) +``` + +``` +## Maximum likelihood estimation +## +## Call: +## mle(minuslogl = minusloglik, start = list(i0 = 10, r = -0.02), +## fixed = list(start.date = start.date.UK.p2, incidence.dates = UK.p2.incidence$dates, +## incidence.counts = UK.p2.incidence$counts)) +## +## Coefficients: +## Estimate Std. Error +## i0 1393.11365688 1.447956e+01 +## r -0.01870897 7.094076e-04 +## +## -2 log L: -368702.5 +``` + diff --git a/TT_analysis_2.Rmd b/TT_analysis_2.Rmd index f61254e..17d7ec8 100644 --- a/TT_analysis_2.Rmd +++ b/TT_analysis_2.Rmd @@ -24,13 +24,13 @@ library(ggplot2) From the working directory, fetch the Pillar 1 daily case data: ```{r load.data, message=FALSE} -dat.UK.p1 <- read_csv("pillar1_case_data.csv") +dat.UK.p1 <- read_csv("data/pillar1_case_data.csv") ``` and the population data: ```{r load.pop.data} -population.data <- read.csv("population_by_region.csv", stringsAsFactors = FALSE) +population.data <- read.csv("data/population_by_region.csv", stringsAsFactors = FALSE) ``` @@ -269,5 +269,5 @@ nowcast.plot For ease of viewing on GitHub we include a static version here: ```{r static.plot, echo=FALSE, fig.retina = NULL} -knitr::include_graphics("Nowcast_IoW.png", dpi=72) +knitr::include_graphics("figure/Nowcast_IoW.png", dpi=72) ``` diff --git a/TT_analysis_2.md b/TT_analysis_2.md index aeb9576..0ee7587 100644 --- a/TT_analysis_2.md +++ b/TT_analysis_2.md @@ -24,14 +24,14 @@ From the working directory, fetch the Pillar 1 daily case data: ```r -dat.UK.p1 <- read_csv("pillar1_case_data.csv") +dat.UK.p1 <- read_csv("data/pillar1_case_data.csv") ``` and the population data: ```r -population.data <- read.csv("population_by_region.csv", stringsAsFactors = FALSE) +population.data <- read.csv("data/population_by_region.csv", stringsAsFactors = FALSE) ``` @@ -276,4 +276,4 @@ nowcast.plot ``` For ease of viewing on GitHub we include a static version here: -![plot of chunk static.plot](Nowcast_IoW.png) +![](figure/Nowcast_IoW.png)