Skip to content

Commit

Permalink
Updating with new folder structure
Browse files Browse the repository at this point in the history
  • Loading branch information
Michelle Kendall committed Aug 17, 2020
1 parent f8433b2 commit ee311a3
Show file tree
Hide file tree
Showing 4 changed files with 788 additions and 6 deletions.
336 changes: 336 additions & 0 deletions TT_analysis_1.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,336 @@
# Maximum-Likelihood estimation of R
### Michelle Kendall and Christophe Fraser

```{r setup, include=FALSE}
knitr::opts_chunk$set(fig.width=10, fig.height=7, fig.cap="", cache=FALSE, echo = TRUE)
```

This code performs a Maximum-Likelihood estimation of R over fixed time periods before and after the Test and Trace programme, as reported in our [preprint](https://www.medrxiv.org/content/10.1101/2020.07.12.20151753v1.article-info).

## Setup

### Load packages

```{r load.packages, message=FALSE}
library(EpiEstim)
library(incidence)
library(ggplot2)
library(stats4)
library(tidyverse)
library(RColorBrewer)
```

## Setup

### Load data

From the working directory, fetch the daily case data:

```{r load.data, message=FALSE}
dat.UK.p1 <- read_csv("data/pillar1_case_data.csv") # local area pillar 1 data
dat.UK.p2 <- read.csv("data/pillar2_case_data.csv") # national pillars data including pillar 2
dat.IoW.p2 <- read.csv("data/pillar2_case_data_IoW.csv") # Isle of Wight pillar 2 data
dat.UK.both <- read.csv("data/combined_pillars_case_data.csv") # local combined pillars data
```

and the population size data:

```{r load.pop.data}
population.data <- read.csv("data/population_by_region.csv", stringsAsFactors = FALSE)
```

### Cleaning data

Extract data for England and Isle of Wight (Pillar 1)

```{r cleaning}
# Extract relevant data:
dat.Eng.p1 <- dat.UK.p1 %>% filter(Country == "England") # England only
dat.IoW.p1 <- dat.Eng.p1 %>% filter(Area == "Isle of Wight") # IoW only
dat.Eng.both <- dat.UK.both %>% filter(`Area.type` == "Upper tier local authority") # UTLAs only
dat.Eng.both.clean <- dat.Eng.both %>% select("Date"=`Specimen.date`, "Area"=`Area.name`, "Cases"=`Daily.lab.confirmed.cases`) # only the necessary columns
dat.Eng.both.clean$Date <- as.Date(dat.Eng.both.clean$Date) # restore "date" property
```


## Analysis

Calculate incidence for each area and overall:

```{r analysis.both.1, message=FALSE, warning=FALSE}
areas.alphabetical <- sort(unique(dat.Eng.both.clean$Area)) # get a list of utlas in alphabetical order
IoW.index <- which(areas.alphabetical == "Isle of Wight")
earliest.date <- min(as.Date(dat.Eng.both.clean$Date))
last.date <- max(as.Date(dat.Eng.both.clean$Date))
all.dates <- seq(earliest.date, last.date, by=1)
# Get incidence by area, filling in any missing dates with 0 cases
incidence.each.area <- sapply(areas.alphabetical, function(area) {
dat.area <- dat.Eng.both.clean %>% filter(Area == area)
dat.area <- rbind.data.frame(dat.area,
cbind.data.frame("Date" = as.Date(setdiff(all.dates, dat.area$Date), origin = "1970-01-01"),
"Area" = area,
"Cases" = 0)
)
dat.area$Cases[order(dat.area$Date)]
})
# extract the incidence table for the Isle of Wight
IoW.incidence <- cbind.data.frame(
"Date" = all.dates,
"Cases" = incidence.each.area[,IoW.index]
)
# and overall for England:
total.incidence <- cbind.data.frame(
"Date" = all.dates,
"Cases" = sapply(1:nrow(incidence.each.area), function(d) sum(incidence.each.area[d,]))
)
# these can be visualised for example like this:
TotalIncidencePlot <- ggplot(total.incidence, aes(x=Date, y=Cases)) +
geom_col(fill= brewer.pal(6,"Set2")[[3]]) +
scale_x_date(date_breaks = "2 weeks" , date_labels = "%b %d", limits=c(as.Date("2020-04-01"), last.date - 7)) +
ylab("Daily confirmed COVID-19\ncases by swab date") +
theme_bw(base_size = 16)
TotalIncidencePlot
```

For England and for the Isle of Wight create their incidence line lists and `incidence` objects using data from 5 days after the TT launch and ending 5 days before the end (to allow for missing data as cases where there are delays in reporting cases.)

```{r analysis.both.2, warning=FALSE, message=FALSE}
start.date.IoW.both <- as.Date("2020-05-10")
start.date.Eng.both <- as.Date("2020-05-23")
end.date.both <- last.date - 5
IoW.both.linelist <- IoW.incidence$Date[1]
for(row in 1:nrow(IoW.incidence)){
if(IoW.incidence$Cases[row]>0){
for(case in 1:IoW.incidence$Cases[row]){
IoW.both.linelist <- c(IoW.both.linelist, IoW.incidence$Date[row])
}
}
}
IoW.both.linelist <- IoW.both.linelist[-1]
total.incidence$Cases[which(is.na(total.incidence$Cases))] <- 0
Eng.both.linelist <- total.incidence$Date[1]
for(row in 1:nrow(total.incidence)){
if(total.incidence$Cases[row]>0){
Eng.both.linelist <- c(Eng.both.linelist, rep(total.incidence$Date[row], total.incidence$Cases[row]))
}
}
Eng.both.linelist <- Eng.both.linelist[-1]
IoW.both.incidence <- incidence(IoW.both.linelist,
first_date = start.date.IoW.both,
last_date = end.date.both,
standard = FALSE)
Eng.both.incidence <- incidence(Eng.both.linelist,
first_date = start.date.Eng.both,
last_date = end.date.both,
standard = FALSE)
```

Prepare log-likelihood function:
```{r analysis.ll}
minusloglik <- function(i0, r, start.date, incidence.dates, incidence.counts) {
ll <- 0
for(row in 1:length(incidence.dates)){
model <- i0*exp(-r*as.numeric(difftime(start.date,incidence.dates[row],units=c("days"))))
ll <- ll + model - incidence.counts[row]*log(model)
}
return(ll)
}
```

Get Maximum-Likelihood estimates:

```{r analysis.both.mle, warning=FALSE}
fit.mle.IoW.both <- mle(minusloglik,
start=list(i0=10, r=-0.01),
fixed=list(start.date=start.date.IoW.both,
incidence.dates=IoW.both.incidence$dates,
incidence.counts=IoW.both.incidence$counts)
)
summary(fit.mle.IoW.both)
fit.mle.Eng.both <- mle(minusloglik,
start=list(i0=1420, r=-0.02),
fixed=list(start.date=start.date.Eng.both,
incidence.dates=Eng.both.incidence$dates,
incidence.counts=Eng.both.incidence$counts))
summary(fit.mle.Eng.both)
```


Similarly for Pillar 1 alone:
```{r analysis.p1.1, message=FALSE, warning=FALSE}
dat.IoW.p1$Incidence <- rep(0,nrow(dat.IoW.p1))
dat.IoW.p1$Incidence[1] <- dat.IoW.p1$TotalCases[1]
for(row in 2:nrow(dat.IoW.p1)) {
dat.IoW.p1$Incidence[row] <- dat.IoW.p1$TotalCases[row] - dat.IoW.p1$TotalCases[row-1]
}
dat.Eng.p1 <- dat.UK.p1 %>% filter(Country == "England")
incidence.all.areas.p1 <- sapply(areas.alphabetical, function(area) {
dat.area <- dat.Eng.p1 %>% filter(Area == area)
new.dat.area <- data.frame("Date" = unique(dat.Eng.p1$Date))
new.dat.area$TotalCases <- sapply(new.dat.area$Date, function(d) (dat.area %>% filter(Date == d))$TotalCases)
# fill in the blanks
if (length(new.dat.area$TotalCases[[1]])==0) new.dat.area$TotalCases[[1]] <- 0
for (i in 1:nrow(new.dat.area)) {
if (length(new.dat.area$TotalCases[[i]])==0) new.dat.area$TotalCases[[i]] <- new.dat.area$TotalCases[[i-1]]
}
new.dat.area$Incidence <- rep(0,nrow(new.dat.area))
new.dat.area$Incidence[[1]] <- new.dat.area$TotalCases[[1]]
for(row in 2:nrow(new.dat.area)) new.dat.area$Incidence[[row]] <- new.dat.area$TotalCases[[row]] - new.dat.area$TotalCases[[row-1]]
new.dat.area$Incidence
})
Eng.Pillar1 <- cbind.data.frame(
"Date" = unique(dat.Eng.p1$Date),
"Incidence" = sapply(1:nrow(incidence.all.areas.p1), function(row) {
sum(incidence.all.areas.p1[row,], na.rm=TRUE)
})
)
# linelists
IoW.p1.linelist <- dat.IoW.p1$Date[1]
for(row in 1:nrow(dat.IoW.p1)){
if(dat.IoW.p1$Incidence[row]>0){
for(case in 1:dat.IoW.p1$Incidence[row]){
IoW.p1.linelist <- c(IoW.p1.linelist, dat.IoW.p1$Date[row])
}
}
}
IoW.p1.linelist <- IoW.p1.linelist[-1]
Eng.p1.linelist <- Eng.Pillar1$Date[1]
for(row in 1:nrow(Eng.Pillar1)){
if ((!is.na(Eng.Pillar1$Incidence[row]))&&(Eng.Pillar1$Incidence[row]>0)){
Eng.p1.linelist <- c(Eng.p1.linelist, rep(Eng.Pillar1$Date[row], Eng.Pillar1$Incidence[row]))
}
}
Eng.p1.linelist <- Eng.p1.linelist[-1]
```

And Pillar 2:
```{r analysis.p2.1, message=FALSE, warning=FALSE}
dat.IoW.p2$specimen_date <- as.Date(as.character(dat.IoW.p2$specimen_date), "%d-%b-%y")
dat.IoW.p2$Pillar2 <- sapply(dat.IoW.p2$Pillar2, function(x)(if(!is.na(x)) return(x) else return(0)))
dat.UK.p2$Date.of.activity <- as.Date(dat.UK.p2$Date.of.activity, "%d/%m/%y")
UKPillar2 <- dat.UK.p2 %>% filter(Pillar == "Pillar 2 (excluding Wales)")
stopifnot(unique(UKPillar2$Nation) == "UK") # check we've just got the UK entries
UKPillar2$Daily.number.of.positive.cases <- sapply(UKPillar2$Daily.number.of.positive.cases, function(x)(if(!is.na(x)) return(x) else return(0)))
IoW.p2.linelist <- dat.IoW.p2$specimen_date[1]
for(row in 1:nrow(dat.IoW.p2)){
if(dat.IoW.p2$Pillar2[row]>0){
for(case in 1:dat.IoW.p2$Pillar2[row]){
IoW.p2.linelist <- c(IoW.p2.linelist, dat.IoW.p2$specimen_date[row])
}
}
}
IoW.p2.linelist <- IoW.p2.linelist[-1]
UK.p2.linelist <- UKPillar2$Date.of.activity[1]
for(row in 1:nrow(UKPillar2)){
if(UKPillar2$Daily.number.of.positive.cases[row]>0){
# this is faster:
UK.p2.linelist <- c(UK.p2.linelist, rep(UKPillar2$Date.of.activity[row], UKPillar2$Daily.number.of.positive.cases[row]))
# for(case in 1:UKPillar2$Daily.number.of.positive.cases[row]){
# UK.linelist <- c(UK.linelist, UKPillar2$Date.of.activity[row])
# }
}
}
UK.p2.linelist <- UK.p2.linelist[-1]
```


Get `incidence` objects and Maximum-Likelihood estimates for the individual pillars data, starting 10 days after TT launch for Pillar 1 and 5 days after for Pillar 2 (to reflect the different typical times between symptom onset and swab).

```{r incidence.p1.and.p2, message=FALSE, warning=FALSE}
start.date.IoW.p1 <- as.Date("2020-05-15")
start.date.IoW.p2 <- as.Date("2020-05-10")
start.date.Eng.p1 <- as.Date("2020-05-28")
start.date.UK.p2 <- as.Date("2020-05-23")
end.date.IoW.p1 <- max(Eng.Pillar1$Date) - 5
end.date.Eng.p1 <- max(Eng.Pillar1$Date) - 5
end.date.UK.p2 <- max(UKPillar2$Date.of.activity, na.rm = TRUE) - 5
end.date.IoW.p2 <- end.date.UK.p2 # there were no more cases after the 13th
IoW.p1.incidence <- incidence(IoW.p1.linelist,
first_date = start.date.IoW.p1,
last_date = end.date.IoW.p1,
standard = FALSE)
Eng.p1.incidence <- incidence(Eng.p1.linelist,
first_date = start.date.Eng.p1,
last_date = end.date.Eng.p1,
standard = FALSE)
IoW.p2.incidence <- incidence(IoW.p2.linelist,
first_date = start.date.IoW.p2,
last_date = end.date.IoW.p2,
standard = FALSE)
UK.p2.incidence <- incidence(UK.p2.linelist,
first_date = start.date.UK.p2,
last_date = end.date.UK.p2,
standard = FALSE)
```

```{r mle.p1.and.p2, warning=FALSE}
fit.mle.IoW.p1 <- mle(minusloglik,
start=list(i0=10, r=-0.01),
fixed=list(start.date=start.date.IoW.p1,
incidence.dates=IoW.p1.incidence$dates,
incidence.counts=IoW.p1.incidence$counts)
)
summary(fit.mle.IoW.p1)
fit.mle.Eng.p1 <- mle(minusloglik,
start=list(i0=10, r=-0.03),
fixed=list(start.date=start.date.Eng.p1,
incidence.dates=Eng.p1.incidence$dates,
incidence.counts=Eng.p1.incidence$counts))
summary(fit.mle.Eng.p1)
fit.mle.IoW.p2 <- mle(minusloglik,
start=list(i0=10, r=-0.01),
fixed=list(start.date=start.date.IoW.p2,
incidence.dates=IoW.p2.incidence$dates,
incidence.counts=IoW.p2.incidence$counts)
)
summary(fit.mle.IoW.p2)
fit.mle.UK.p2 <- mle(minusloglik,
start=list(i0=10, r=-0.02),
fixed=list(start.date=start.date.UK.p2,
incidence.dates=UK.p2.incidence$dates,
incidence.counts=UK.p2.incidence$counts))
summary(fit.mle.UK.p2)
```

Loading

0 comments on commit ee311a3

Please sign in to comment.