-
Notifications
You must be signed in to change notification settings - Fork 2
/
ADT reads in cells vs empty drops.Rmd
429 lines (332 loc) · 17 KB
/
ADT reads in cells vs empty drops.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
---
title: "CITE-seq optimization - ADT in cell-containing vs empty drops"
author: "Terkild Brink Buus"
date: "30/3/2020"
output: github_document
---
Background signal in CITE-seq has been proposed to be primarily caused by free-floating antibodies and can be assessed by measuring reads from Non-cell-containing (empty) droplets (Mulé et al. 2020). In this vignette, we compare UMI counts from cell-containing vs. empty drops
```{r setup, include=FALSE}
knitr::opts_chunk$set(warning=FALSE, message=FALSE)
options(stringsAsFactors=FALSE)
```
## Load utilities
Including libraries, plotting and color settings and custom utility functions
```{r loadLibraries, results='hide', message=FALSE, warning=FALSE}
set.seed(114)
require("Seurat", quietly=T)
require("tidyverse", quietly=T)
library("Matrix", quietly=T)
## Load ggplot theme and defaults
source("R/ggplot_settings.R")
## Load helper functions
source("R/Utilities.R")
## Load color schemes
source("R/color.R")
outdir <- "figures"
data.drive <- "F:/"
data.abpanel <- "data/Supplementary_Table_1.xlsx"
data.markerStats <- "data/markerByClusterStats.tsv"
data.Seurat <- "data/5P-CITE-seq_Titration.rds"
show_tsne_markers <- c("CD4", "CD19", "CD86", "CD279", "TCRgd")
## Make a custom function for formatting the concentration scale
scaleFUNformat <- function(x) sprintf("%.2f", x)
```
## Load the data
The ADT UMI count data has already been loaded and filtered in the "ADT counting methods" vignette. We'll load it from there. This includes the kallisto.ADT UMI count matrix as well as a list of barcodes that have been filtered to have gene expression UMI counts above the inflection point in the rank-barcode plot (used for calling cell-containing vs. empty droplets).
```{r loadADTdata}
load("data/data.ADT.Rdata")
## ADT UMI counts
kallisto.ADT[1:5,1:5]
## Barcodes for cell-containing droplet
head(gex.aboveInf)
```
## Load antibody panel data
Antibody panel concentration data is loaded from the supplementary data excel sheet.
```{r abdata}
abpanel <- data.frame(readxl::read_excel(data.abpanel))
rownames(abpanel) <- abpanel$Marker
head(abpanel)
```
## Preprocess data for plotting
Make sums of ADT UMI counts within cell-containing and empty droplets.
```{r preprocess}
ADT.matrix <- kallisto.ADT
## Calculate total UMI count per marker
markerUMI <- apply(ADT.matrix,1,sum)
## Calculate UMI count within cell-containing and empty droplets
markerUMI.inCell <- apply(ADT.matrix[,gex.aboveInf],1,sum)
markerUMI.inCell.freq <- markerUMI.inCell/sum(markerUMI.inCell)
markerUMI.inDrop <- markerUMI-markerUMI.inCell
markerUMI.inDrop.freq <- markerUMI.inDrop/sum(markerUMI.inDrop)
## Make DF to allow combination of the data into a "long" format
df.inCell <- data.frame(count=markerUMI.inCell, freq=markerUMI.inCell.freq, subset="Cell", marker=names(markerUMI.inCell.freq))
df.inDrop <- data.frame(count=markerUMI.inDrop, freq=markerUMI.inDrop.freq, subset="EmptyDrop", marker=names(markerUMI.inDrop.freq))
plotData <- rbind(df.inCell, df.inDrop)
## Add "metadata
plotData$conc <- abpanel[plotData$marker,"conc_µg_per_mL"]
plotData$subset <- factor(plotData$subset, levels=c("EmptyDrop","Cell"))
## Order markers according to antibody concentration and UMI frequency within empty droplets (by setting levels)
plotData$marker <- factor(plotData$marker,
levels=plotData$marker[order(plotData$conc[plotData$subset=="EmptyDrop"],
plotData$freq[plotData$subset=="EmptyDrop"])])
head(plotData)
```
## Draw cell-containing to empty droplet frequency ratio plot
```{r ratio, fig.width=2, fig.height=5}
data.ratio <- data.frame(ratio=markerUMI.inCell.freq/markerUMI.inDrop.freq) %>% mutate(Marker=rownames(.), conc=abpanel[rownames(.),"conc_µg_per_mL"]) %>% arrange(conc, ratio)
data.ratio$Marker <- factor(data.ratio$Marker, levels=data.ratio$Marker)
p.ratio <- ggplot(data.ratio, aes(x=Marker, y=log2(ratio))) +
geom_rect(aes(xmin=-Inf,xmax=Inf,ymin=-1,ymax=-1.25,fill=conc), col="black") +
scale_fill_viridis_c(trans="log2", labels=scaleFUNformat, breaks=c(0.0375,0.15,0.625,2.5,10)) +
ggnewscale::new_scale_fill() +
geom_bar(stat="identity", aes(fill=log2(ratio)>0), color="black", width=0.4) +
geom_hline(yintercept=0) +
scale_fill_manual(values=c(`FALSE`="lightgrey",`TRUE`="black")) +
scale_x_discrete(expand=c(0, 0.5)) +
scale_y_continuous(expand=c(0,0.05,0,0.05)) +
coord_flip() +
facet_grid(rows="conc", scales="free_y", space="free_y") +
labs(title="Cell:Empty ratio", y="log2(Cells:Empty ratio)", fill="µg/mL") +
theme(plot.title=element_text(size=7, face="bold", hjust=0.5),
panel.spacing=unit(0.5,"mm"),
axis.line=element_line(),
axis.title.y=element_blank(),
strip.placement="outside",
strip.text=element_blank(),
panel.border=element_rect(color=alpha("black",0.25)),
legend.position="none",
legend.justification=c(0,1),
legend.direction="horizontal",
legend.text.align=0,
legend.key.width=unit(0.3,"cm"),
legend.key.height=unit(0.4,"cm"),
legend.text=element_text(size=unit(5,"pt")))
p.ratio
```
## Draw barplot of UMI counts in cell-containing and empty-droplets
```{r barplot, fig.height=5, fig.width=3}
plotData$marker <- factor(as.character(plotData$marker), levels=levels(data.ratio$Marker))
p.barplot <- ggplot(plotData, aes(x=marker, y=count/10^6)) +
geom_rect(aes(xmin=-Inf,xmax=Inf,ymin=-0.050000,ymax=-0.010000,fill=conc), col="black") +
scale_fill_viridis_c(trans="log2", labels=scaleFUNformat, breaks=c(0.0375,0.15,0.625,2.5,10)) +
ggnewscale::new_scale_fill() +
geom_bar(aes(fill=subset),stat="identity", position="dodge", color="black", width=0.65) +
geom_hline(yintercept=0, col="black") +
scale_fill_manual(values=c("Cell"="black","EmptyDrop"="lightgrey")) +
scale_x_discrete(expand=c(0, 0.5)) +
scale_y_continuous(expand=c(0,0,0,0.05)) +
coord_flip() +
facet_grid(rows="conc", scales="free_y", space="free_y") +
guides(fill=guide_legend(reverse=TRUE)) +
labs(title="UMI counts", y=bquote("UMI count ("~10^6~")"), fill="Compartment") +
theme(plot.title=element_text(size=7, face="bold", hjust=0.5),
panel.border=element_blank(),
panel.grid.major.y=element_blank(),
panel.spacing=unit(0.5,"mm"),
axis.line=element_line(),
axis.title.y=element_blank(),
#axis.text.y=element_blank(),
strip.placement="outside",
strip.text=element_blank(),
legend.position=c(1,1),
legend.justification=c(1,1),
legend.text.align=0,
legend.key.width=unit(0.3,"cm"),
legend.key.height=unit(0.4,"cm"),
legend.text=element_text(size=unit(5,"pt")))
p.barplot
```
# Highlight markers
Determine which markers should be highlighted due to their differences between cell-containing and empty droplets.
```{r highlight}
freq.threshold <- 0.05
plotData$highlight <- ifelse(plotData$marker %in% plotData$marker[plotData$freq >= freq.threshold],1,0)
## Determine which compartment has the highest frequency for the markers above the threshold and assign the labels accordingly
max.label <- plotData[plotData$freq >= freq.threshold,] %>% group_by(marker) %>% summarize(subset.max=subset[which.max(freq)])
plotData$label <- ifelse((paste(plotData$marker,plotData$subset) %in%
paste(max.label$marker,max.label$subset.max))==FALSE |
plotData$freq < freq.threshold,
NA,as.character(plotData$marker))
```
## Make alluvial "river" plot of markers in each compartment
To allow labelling the markers, we need to calculate the cummulativeFrequency.
```{r alluvial, fig.height=5, fig.width=1.3}
## Order the dataframe
plotData$marker.conc <- factor(as.character(plotData$marker), levels=unique(plotData$marker[order(-plotData$conc, plotData$marker, decreasing=TRUE)]))
plotData <- plotData[order(plotData$marker.conc, decreasing=TRUE),]
plotData$cummulativeFreq <- 0
plotData$cummulativeFreq[plotData$subset=="EmptyDrop"] <- cumsum(plotData$freq[plotData$subset=="EmptyDrop"])
plotData$cummulativeFreq[plotData$subset=="Cell"] <- cumsum(plotData$freq[plotData$subset=="Cell"])
## A bit of a hack to get the columns in order
#plotData$subset.rev <- factor(as.character(plotData$subset), levels=c("Cell","EmptyDrop"))
p.alluvial <- ggplot(plotData, aes(y=freq, x=subset, fill=conc, stratum = marker.conc, alluvium = marker.conc)) +
ggalluvial::geom_flow(width = 1/2, color=alpha("black",0.25), alpha=0.75) +
ggalluvial::geom_stratum(width = 1/2) +
geom_text(aes(y=cummulativeFreq-(freq/2),label=label), na.rm=TRUE, vjust=0.5, hjust=0.5, angle=30, size=1.5, fontface="bold") +
scale_fill_viridis_c(trans="log2", labels=scaleFUNformat, breaks=c(0.0375,0.15,0.625,2.5,10)) +
scale_y_continuous(expand=c(0,0)) +
scale_x_discrete(expand=c(0,0), limits=rev(levels(plotData$subset))) +
labs(title="Frequency", y="UMI frequency", fill="DF1 µg/mL") +
theme(plot.title=element_text(size=7, face="bold", hjust=0.5),
legend.position="none",
axis.title.x=element_blank(),
panel.grid=element_blank())
p.alluvial
```
# Specific signals despite background
Despite high background (as assayed by high number of reads in empty droplets), most markers provide specific signal. However, the number of UMIs neede to achieve this signal is much lower in the markers with high signal-to-noise.
```{r}
object <- readRDS(file=data.Seurat)
## Show number of cells from each sample
table(object$group)
object <- subset(object, subset=volume == "50µl" & dilution == "DF1")
object
DefaultAssay(object) <- "ADT.kallisto"
```
## Show "positive" cutoff according to concentration
Another way to show this is to show the number of UMIs required to get above the background threshold (defined in Supplementary Figure S1)
```{r, fig.height=5, fig.width=3}
markerStats <- read.table(data.markerStats)
rownames(markerStats) <- paste(markerStats$marker,markerStats$tissue,sep="_")
## Determine which tissue has highest percentage positive cells and use this to set cutoff.
markerStats.max <- markerStats %>% group_by(marker) %>% filter(pct==max(pct))
data.UMI <- GetAssayData(object, assay="ADT.kallisto", slot="counts")
data.meta <- FetchData(object, vars=c("tissue"))
marker.data <- as.data.frame(data.UMI) %>%
mutate(marker=rownames(.)) %>%
pivot_longer(-marker) %>%
group_by(marker, tissue=data.meta[name,"tissue"]) %>%
summarize(pos.cutoff=quantile(value, probs=(1-min(0.95,(markerStats[paste(marker[1],tissue[1],sep="_"),"pct"]+20)/100)))) %>% left_join(markerStats)
marker.data$marker <- factor(as.character(marker.data$marker), levels=levels(data.ratio$Marker))
p.UMIcutoff <- ggplot(marker.data, aes(x=marker, y=pos.cutoff, group=tissue, fill=tissue)) +
geom_bar(position="dodge", stat="identity", color="black", width=0.65) +
scale_fill_manual(values=color.tissue) +
scale_x_discrete(expand=c(0, 0.5)) +
scale_y_continuous(expand=c(0,0.05,0,0.05)) +
coord_flip() +
facet_grid(rows="conc_µg_per_mL", scales="free_y", space="free_y") +
labs(title="UMI cutoff", y="Above-background cutoff (UMI)", fill="Tissue") +
theme(plot.title=element_text(size=7, face="bold", hjust=0.5),
panel.border=element_blank(),
panel.grid.major.y=element_blank(),
panel.spacing=unit(0.5,"mm"),
axis.line=element_line(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
strip.placement="outside",
strip.text=element_blank(),
legend.position=c(1,1),
legend.justification=c(1,1),
legend.text.align=0,
legend.key.width=unit(0.3,"cm"),
legend.key.height=unit(0.4,"cm"),
legend.text=element_text(size=unit(5,"pt")))
p.UMIcutoff
```
Make tSNE plots with raw UMI counts. Use rainbow color scheme to show dynamic range in expression levels.
```{r, fig.height=1.6, fig.width=7}
f.tsne.format <- function(x){
x +
scale_color_gradientn(colours = c("#000033","#3333FF","#3377FF","#33AAFF","#33CC33","orange","red"),
limits=c(0,NA)) +
scale_y_continuous(expand=c(0.15,0,0.05,0)) +
theme_get() +
theme(plot.title=element_text(size=7, face="bold", hjust=0.5),
plot.background=element_blank(),
panel.background=element_blank(),
axis.title=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
legend.key.width=unit(3,"mm"),
legend.key.height=unit(2,"mm"),
legend.position=c(1,-0.03),
legend.justification=c(1,0),
legend.background=element_blank(),
legend.direction="horizontal")
}
p.tsne <- lapply(FeaturePlot(object, reduction="tsne", sort=TRUE, combine=FALSE,
features=show_tsne_markers,
slot="counts",
max.cutoff='q90',
pt.size=0.1),
FUN=f.tsne.format)
## Get common y-axis label
p.tsne[[1]] <- p.tsne[[1]] + theme(axis.title.y=element_text())
# a bit of a hack to get a common x-axis label
p.tsne[[3]] <- p.tsne[[3]] + theme(axis.title.x=element_text(hjust=0.5))
p.UMI.tsne <- cowplot::plot_grid(plotlist=p.tsne,
align="h",
axis="tb",
nrow=1,
rel_widths=c(1.07,1,1,1,1),
labels=c("E","","F","","G"),
label_size=panel.label_size,
vjust=panel.label_vjust,
hjust=panel.label_hjust)
p.UMI.tsne
```
Make similar plots for all markers
```{r}
markers <- sort(rownames(object[["ADT.kallisto"]]))
p.tsne.all <- lapply(FeaturePlot(object, reduction="tsne", sort=TRUE, combine=FALSE,
features=markers,
slot="counts",
max.cutoff='q90',
pt.size=0.1),
FUN=f.tsne.format)
names(p.tsne.all) <- markers
p.tsne.all <- lapply(markers, function(x) p.tsne.all[[x]] + ggtitle(paste0(x," (",markerStats[paste0(x,"_PBMC"),"conc_µg_per_mL"]," µg/mL)")))
plot.columns <- 5
plot.num <- length(p.tsne.all)
plot.rows <- ceiling(plot.num/plot.columns)
plot.rowSplit <- 6
## Reduce margins
p.tsne.all <- lapply(p.tsne.all, function(x) x +
theme(plot.margin=unit(c(0.1,0.1,0.3,0.1),"mm")))
## Get common y-axis label
p.tsne.all[(c(0:(plot.rows-1))*plot.columns+1)] <- lapply(p.tsne.all[(c(0:(plot.rows-1))*plot.columns+1)], function(x) x + theme(axis.title.y=element_text()))
## Show axis label for the center plot of the last row
p.tsne.all[[(plot.columns*plot.rowSplit-floor(plot.columns/2))]] <- p.tsne.all[[(plot.columns*plot.rowSplit-floor(plot.columns/2))]] + theme(axis.title.x=element_text(hjust=0.5))
# a bit of a hack to get a common x-axis label on the last row (hardcoded)
p.tsne.all[[52]] <- p.tsne.all[[52]] + theme(axis.title.x=element_text(hjust=2))
p.UMI.tsne.all.1 <- cowplot::plot_grid(plotlist=p.tsne.all[1:(plot.rowSplit*plot.columns)], align="h", axis="tb", ncol=plot.columns, rel_widths=c(1.1,1,1,1,1))
p.UMI.tsne.all.2 <- cowplot::plot_grid(plotlist=p.tsne.all[(plot.rowSplit*plot.columns+1):52], align="h", axis="tb", ncol=plot.columns, rel_widths=c(1.1,1,1,1,1))
png(file=file.path(outdir,paste0("Supplementary Figure S7A.png")),
units=figure.unit,
res=figure.resolution,
width=figure.width.full,
height=(figure.width.full/plot.columns*plot.rowSplit)*1.1,
antialias=figure.antialias)
p.UMI.tsne.all.1
dev.off()
png(file=file.path(outdir,paste0("Supplementary Figure S7B.png")),
units=figure.unit,
res=figure.resolution,
width=figure.width.full,
height=(figure.width.full/plot.columns*(plot.rows-plot.rowSplit))*1.1,
antialias=figure.antialias)
p.UMI.tsne.all.2
dev.off()
```
## Combine figure
```{r figure, fig.height=5.9, fig.width=figure.width.full}
p.row1 <- cowplot::plot_grid(p.barplot + theme(plot.margin=unit(c(0.02,0,0,0),"npc")),
p.alluvial,
p.ratio + theme(plot.margin=unit(c(0,0,0,0.05),"npc")),
p.UMIcutoff + theme(plot.margin=unit(c(0,0,0,-0.007),"npc")),
nrow=1,
rel_widths=c(1.75,0.75,1.2,1.3),
align="h",
axis="tb",
labels=c("A", "B", "C", "D"),
label_size=panel.label_size,
vjust=panel.label_vjust,
hjust=panel.label_hjust)
p.final <- cowplot::plot_grid(p.row1, p.UMI.tsne,
ncol=1,
rel_heights=c(3,1.05))
p.final
png(file=file.path(outdir,"Figure 5.png"), width=figure.width.full, height=5.9, units=figure.unit, res=figure.resolution, antialias=figure.antialias)
p.final
dev.off()
```