Skip to content

Latest commit

 

History

History
483 lines (379 loc) · 19.9 KB

Volume-and-cell-number-titration.md

File metadata and controls

483 lines (379 loc) · 19.9 KB

CITE-seq optimization - Staining volume and cell number titration

Terkild Brink Buus 30/3/2020

Load utilities

Including libraries, plotting and color settings and custom utility functions

set.seed(114)
require("Seurat", quietly=T)
require("tidyverse", quietly=T)
library("Matrix", quietly=T)
library("patchwork", quietly=T)

## Load ggplot theme and defaults
source("R/ggplot_settings.R")

## Load helper functions
source("R/Utilities.R")

## Load predefined color schemes
source("R/color.R")

## Load feature_rankplot functions
source("R/feature_rankplot.R")
source("R/feature_rankplot_hist.R")
source("R/feature_rankplot_hist_custom.R")

outdir <- "figures"
data.Seurat <- "data/5P-CITE-seq_Titration.rds"
data.abpanel <- "data/Supplementary_Table_1.xlsx"
data.markerStats <- "data/markerByClusterStats.tsv"

## Make a custom function for formatting the concentration scale
scaleFUNformat <- function(x) sprintf("%.2f", x)

Load Seurat object

Subset to only focus on conditions with 200k or 1 mio cells and dilution factor 4 (thus comparing 50µl to 25µl staining volume with 1 mio or 200k PBMCs at staining).

object <- readRDS(file=data.Seurat)

## Show number of cells from each sample
table(object$group)
## 
## PBMC_50ul_1_1000k PBMC_50ul_4_1000k PBMC_25ul_4_1000k  PBMC_25ul_4_200k 
##              1777              1777              1777              1777 
##  Lung_50ul_1_500k  Lung_50ul_4_500k           Doublet          Negative 
##              1681              1681                 0                 0
object <- subset(object, subset=dilution == "DF4" & cellsAtStaining %in% c("200k","1000k"))
object
## An object of class Seurat 
## 33572 features across 5331 samples within 3 assays 
## Active assay: RNA.kallisto (33514 features)
##  2 other assays present: HTO.kallisto, ADT.kallisto
##  3 dimensional reductions calculated: pca, tsne, umap
color.volnum <- c("50µl_1000k"="#0082c8","25µl_1000k"="#f58231","25µl_200k"="#911eb4")
shape.volnum <- c("50µl_1000k"=21,"25µl_1000k"=22,"25µl_200k"=23)
object$volnum <- factor(paste(object$volume,object$cellsAtStaining,sep="_"),levels=names(color.volnum))

Load Ab panel annotation and concentrations

Marker stats is reused in other comparisons and was calculated in the end of the preprocessing vignette.

abpanel <- data.frame(readxl::read_excel(data.abpanel))
rownames(abpanel) <- abpanel$Marker

## As we are only working with dilution factor 4 samples here, we want to show labels accordingly
# a bit of a hack...
abpanel$conc_µg_per_mL <- abpanel$conc_µg_per_mL/4

markerStats <- read.table(data.markerStats)
markerStats.PBMC <- markerStats[markerStats$tissue == "PBMC",]
rownames(markerStats) <- paste(markerStats$marker,markerStats$tissue,sep="_")

## Make a ordering vector ordering markers per concentration and total UMI count
marker.order <- markerStats.PBMC$marker[order(markerStats.PBMC$conc_µg_per_mL, markerStats.PBMC$UMItotal, decreasing=TRUE)]

head(abpanel)
##        Marker Category     Alias   Clone Isotype_Mouse Corresponding_gene
## CD103   CD103        B      <NA> BerACT8          IgG1              ITGAE
## CD107a CD107a        B     LAMP1    H4A3          IgG1              LAMP1
## CD117   CD117        E     C-kit   104D2          IgG1                KIT
## CD11b   CD11b        B      <NA>  ICRF44          IgG1              ITGAM
## CD123   CD123        E      <NA>     6H6          IgG1              IL3RA
## CD127   CD127        E IL7Ralpha  A019D5          IgG1               IL7R
##        TotalSeqC_Tag BioLegend_Cat Stock_conc_µg_per_mL conc_µg_per_mL
## CD103           0145        350233                  500        0.31250
## CD107a          0155        328649                  500        0.62500
## CD117           0061        313243                  500        0.62500
## CD11b           0161        301359                  500        0.15625
## CD123           0064        306045                  500        0.12500
## CD127           0390        351356                  500        0.31250
##        dilution_1x
## CD103          400
## CD107a         200
## CD117          200
## CD11b          800
## CD123         1000
## CD127          400
head(markerStats)
##             marker tissue fineCluster nCells UMIsum   nth median   f90 UMItotal
## CD103_PBMC   CD103   PBMC           1    638   1740   5.0      2   5.0     5082
## CD103_Lung   CD103   Lung          16    132   7084 187.0      5 187.0    60252
## CD107a_PBMC CD107a   PBMC           1    638   7757  26.3      8  26.3    13396
## CD107a_Lung CD107a   Lung          12    260  11674  99.2     15  99.2    23273
## CD117_PBMC   CD117   PBMC           1    638   1318   4.0      2   4.0     3316
## CD117_Lung   CD117   Lung          21     32   1695  41.0     41 130.4     5878
##             Category Alias   Clone Isotype_Mouse Corresponding_gene
## CD103_PBMC         B  <NA> BerACT8          IgG1              ITGAE
## CD103_Lung         B  <NA> BerACT8          IgG1              ITGAE
## CD107a_PBMC        B LAMP1    H4A3          IgG1              LAMP1
## CD107a_Lung        B LAMP1    H4A3          IgG1              LAMP1
## CD117_PBMC         E C-kit   104D2          IgG1                KIT
## CD117_Lung         E C-kit   104D2          IgG1                KIT
##             TotalSeqC_Tag BioLegend_Cat Stock_conc_µg_per_mL conc_µg_per_mL
## CD103_PBMC            145        350233                  500           1.25
## CD103_Lung            145        350233                  500           1.25
## CD107a_PBMC           155        328649                  500           2.50
## CD107a_Lung           155        328649                  500           2.50
## CD117_PBMC             61        313243                  500           2.50
## CD117_Lung             61        313243                  500           2.50
##             dilution_1x marker.y DSB.cutoff positive count   pct
## CD103_PBMC          400    CD103          7       14  1777  0.79
## CD103_Lung          400    CD103          7      501  1681 29.80
## CD107a_PBMC         200   CD107a          7      122  1777  6.87
## CD107a_Lung         200   CD107a          7      150  1681  8.92
## CD117_PBMC          200    CD117          7        3  1777  0.17
## CD117_Lung          200    CD117          7       32  1681  1.90

Cell type and tissue overview

Make tSNE plots colored by cell type, cluster and tissue of origin.

p.tsne.volume <- DimPlot(object, group.by="volnum", reduction="tsne", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + facet_wrap(~"Volume") + scale_color_manual(values=color.volnum)

p.tsne.cluster <- DimPlot(object, group.by="supercluster", reduction="tsne", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + scale_color_manual(values=color.supercluster) + facet_wrap(~"Cell types")

p.tsne.finecluster <- DimPlot(object, label=TRUE, label.size=3, reduction="tsne", group.by="fineCluster", pt.size=0.1, combine=FALSE)[[1]] + theme_get() + facet_wrap(  ~"Clusters") + guides(col=F)

p.tsne.cluster + p.tsne.finecluster + p.tsne.volume

Overall ADT counts

Extract UMI data and calculate UMI sum per marker within each condition.

## Get the data
ADT.matrix <- data.frame(GetAssayData(object, assay="ADT.kallisto", slot="counts"))
ADT.matrix$marker <- rownames(ADT.matrix)
ADT.matrix$conc <- abpanel[ADT.matrix$marker,"conc_µg_per_mL"]
ADT.matrix <- ADT.matrix %>% pivot_longer(c(-marker,-conc))

## Get cell annotations
cell.annotation <- FetchData(object, vars=c("volnum"))

## Calculate marker sum from each dilution within both tissues
ADT.matrix.agg <- ADT.matrix %>% group_by(volume=cell.annotation[name,"volnum"], marker, conc) %>% summarise(sum=sum(value))

## Order markers by concentration
ADT.matrix.agg$marker.byConc <- factor(ADT.matrix.agg$marker, levels=marker.order)

## Extract marker annotation
ann.markerConc <- abpanel[marker.order,]
ann.markerConc$Marker <- factor(marker.order, levels=marker.order)

ADT.matrix.agg.total <- ADT.matrix.agg

Plot overall ADT counts by conditions

Samples stained with diluted Ab panel have reduced ADT counts.

p.UMIcountsPerCondition <- ggplot(ADT.matrix.agg.total[order(-ADT.matrix.agg$conc, -ADT.matrix.agg$sum),], aes(x=volume, y=sum/10^6, fill=conc)) + 
  geom_bar(stat="identity", col=alpha(col="black",alpha=0.05)) + 
  scale_fill_viridis_c(trans="log2", labels=scaleFUNformat, breaks=c(0.0375,0.15,0.625,2.5,10)) + 
  scale_y_continuous(expand=c(0,0,0,0.05)) + 
  labs(fill="DF4\nµg/mL", y=bquote("ADT UMI counts ("~10^6~")")) + 
  guides(fill=guide_colourbar(reverse=T)) + 
  theme(panel.grid.major=element_blank(), axis.title.x=element_blank(), panel.border=element_blank(), axis.line = element_line(), legend.position="right")

p.UMIcountsPerCondition

Compare total UMI counts per marker

Plot total UMI counts for each marker at the investigated dilution factors (DF1 vs. DF4). To ease readability, we place dashed lines between each concentration.

## Calculate "breaks" where concentration change.
lines <- length(marker.order)-cumsum(sapply(split(ann.markerConc$Marker,ann.markerConc$conc_µg_per_mL),length))+0.5
lines <- data.frame(breaks=lines[-length(lines)])

## Make a marker by concentration "heatmap"
p.markerByConc <- ggplot(ann.markerConc, aes(x=1, y=Marker, fill=conc_µg_per_mL)) + 
  geom_tile(col=alpha(col="black",alpha=0.2)) + 
  geom_hline(data=lines,aes(yintercept=breaks), linetype="dashed", alpha=0.5) + 
  scale_fill_viridis_c(trans="log2") + 
  labs(fill="µg/mL") + 
  theme_get() + 
  theme(axis.ticks.x=element_blank(), axis.title = element_blank(), axis.text.x=element_blank(), panel.grid=element_blank(), legend.position="right", plot.margin=unit(c(0.1,0.1,0.1,0.1),"mm")) + scale_x_continuous(expand=c(0,0))
  
## Make UMI counts per Marker plot
p.UMIcountsPerMarker <- ggplot(ADT.matrix.agg, aes(x=marker.byConc,y=log2(sum))) + 
  geom_line(aes(group=marker), size=1, color="#666666", alpha=0.5) + 
  ggbeeswarm::geom_quasirandom(aes(group=volume, fill=volume, pch=volume), size=1, dodge.width=-0.75) + 
  geom_vline(data=lines,aes(xintercept=breaks), linetype="dashed", alpha=0.5) + 
  scale_fill_manual(values=color.volnum) + 
  scale_y_continuous(breaks=c(9:17)) + 
  scale_shape_manual(values=shape.volnum) + 
  ylab("log2(UMI sum)") + 
  guides(fill=guide_legend(override.aes=list(size=1.5), ncol=1, reverse=FALSE)) + 
  theme(axis.title.y=element_blank(), axis.text.y=element_blank(), legend.position="bottom", legend.justification="left", legend.title.align=0, legend.key.width=unit(0.2,"cm"), legend.title=element_blank()) + 
  coord_flip()

## Combine plot with markerByConc annotation heatmap
plotUMIcountsPerMarker <- p.markerByConc + guides(fill=F) + p.UMIcountsPerMarker + guides(fill=F, shape=F) + plot_spacer() + guide_area() + plot_layout(ncol=4, widths=c(1,30,0.1), guides='collect')

plotUMIcountsPerMarker

Compare change in UMI/cell within expressing cluster

Using a specific percentile may be prone to outliers in small clusters (i.e. the 90th percentile of a cluster of 30 will be the #3 higest cell making it prone to outliers). We thus set a threshold of the value to only be the 90th percentile if cluster contains more than 100 cells. For smaller clusters, the median is used. Expressing cluster is identified in the “preprocessing” vignette.

## Get the data
ADT.matrix <- data.frame(GetAssayData(object, assay="ADT.kallisto", slot="counts"))
ADT.matrix$marker <- rownames(ADT.matrix)
ADT.matrix$conc <- abpanel[ADT.matrix$marker,"conc_µg_per_mL"]
ADT.matrix <- ADT.matrix %>% pivot_longer(c(-marker,-conc))

## Get cell annotations
cell.annotation <- FetchData(object, vars=c("volnum", "fineCluster"))

## Calculate marker statistics from each dilution within each cluster
ADT.matrix.agg <- ADT.matrix %>% group_by(volume=cell.annotation[name,"volnum"], fineCluster=cell.annotation[name,"fineCluster"], marker, conc) %>% summarise(sum=sum(value), median=quantile(value, probs=c(0.9)), nth=nth(value))
ADT.matrix.agg$tissue == "PBMC"
## logical(0)
## Use data for the previously determined expressing cluster.
Cluster.max <- markerStats[markerStats$tissue == "PBMC",c("marker","fineCluster")]
Cluster.max$fineCluster <- factor(Cluster.max$fineCluster)

ADT.matrix.aggByClusterMax <- Cluster.max %>% left_join(ADT.matrix.agg)
ADT.matrix.aggByClusterMax$marker.byConc <- factor(ADT.matrix.aggByClusterMax$marker, levels=marker.order)

p.UMIinExpressingCells <- ggplot(ADT.matrix.aggByClusterMax, aes(x=marker.byConc, y=log2(nth))) + 
  geom_line(aes(group=marker), size=1, alpha=0.5, color="#666666") + 
  ggbeeswarm::geom_quasirandom(aes(group=volume, fill=volume, pch=volume), size=1, show.legend=FALSE, dodge.width=-0.75) + 
  geom_vline(data=lines,aes(xintercept=breaks), linetype="dashed", alpha=0.5) + 
  geom_text(aes(label=paste0(fineCluster," ")), y=Inf, adj=1, size=1.5) + 
  scale_fill_manual(values=color.volnum) + 
  scale_shape_manual(values=shape.volnum) + 
  scale_y_continuous(breaks=c(0:11), labels=2^c(0:11), expand=c(0.05,0.5)) + 
  ylab("90th percentile UMI of expressing cluster") + 
  theme(axis.title.y=element_blank(), axis.text.y=element_blank(), legend.position="right", legend.justification="left", legend.title.align=0, legend.key.width=unit(0.2,"cm")) + 
  coord_flip()

## Combine plot with markerByConc annotation heatmap
UMIinExpressingCells <- p.markerByConc + theme(legend.position="none") + p.UMIinExpressingCells + theme(legend.position="none") + plot_spacer() + plot_layout(ncol=4, widths=c(1,30,0.1), guides='collect')

UMIinExpressingCells

Titration examples

Most markers are largely unaffected by reducing staining volume. However, some antibodies used at low concentrations and targeting abundant epitopes are affected, an example of such is CD31:

## Make helper function for plotting titration plots
titrationPlot <- function(marker, gate.PBMC=NULL, gate.Lung=NULL, y.axis=FALSE, show.gate=TRUE, legend=FALSE){
  curMarker.name <- marker
  
  ## Get antibody concentration for legends
  curMarker.DF1conc <- abpanel[curMarker.name, "conc_µg_per_mL"]
  if(show.gate==TRUE){
    ## Load gating percentages from manually set DSB thresholds
    gate <- data.frame(gate=markerStats[markerStats$marker == curMarker.name & markerStats$tissue== "PBMC",c("pct")])
    gate$gate <- 1-(gate$gate/100)
    rownames(gate) <- gate$wrap
    ## Allow manual gating
    if(!is.null(gate.PBMC)) gate <- gate.PBMC
  } else {
    gate <- NULL
  }

  p <- feature_rankplot_hist_custom(data=object, 
                                    marker=paste0("adt_",curMarker.name),      
                                    group="volnum",
                                    barcodeGroup="supercluster",
                                    conc=curMarker.DF1conc, 
                                    legend=legend, 
                                    yaxis.text=y.axis, 
                                    gates=gate,
                                    histogram.colors=color.volnum, 
                                    title=curMarker.name)
  
  return(p)
}

p.CD31 <- titrationPlot("CD31", legend=TRUE)

p.CD31

Final plot

A <- p.UMIcountsPerCondition + theme(legend.key.width=unit(0.3,"cm"), 
                                     legend.key.height=unit(0.4,"cm"), 
                                     legend.text=element_text(size=unit(5,"pt")),
                                     plot.margin=unit(c(0.3,0,0,0),"cm"))

B1 <- p.markerByConc + theme(text = element_text(size=10), 
                             plot.margin=unit(c(0.3,0,1,0),"cm"),
                             legend.position="none")
B2 <- p.UMIcountsPerMarker + theme(legend.position="none")
C <- p.UMIinExpressingCells + theme(legend.position="none")

BC.legend <- cowplot::get_legend(p.UMIcountsPerMarker + 
                                   theme(legend.position="bottom", 
                                         legend.direction="horizontal", 
                                         legend.background=element_blank(), 
                                         legend.box.background=element_blank(), 
                                         legend.key=element_blank(),
                                         legend.key.height=unit(2,"mm")))

D <- p.CD31 + theme(plot.margin=unit(c(0.5,0,0,0),"cm"))

AD <- cowplot::plot_grid(A,D,NULL, 
                         ncol=1, 
                         rel_heights = c(14,16,1.5),
                         labels=c("A","D",""), 
                         label_size=panel.label_size, 
                         vjust=panel.label_vjust, 
                         hjust=panel.label_hjust)

BC <- cowplot::plot_grid(B1, B2, C, 
                         nrow=1, 
                         rel_widths=c(2,10,10), 
                         align="h", 
                         axis="tb", 
                         labels=c("B", "", "C"), 
                         label_size=panel.label_size, 
                         vjust=panel.label_vjust, 
                         hjust=panel.label_hjust)

p.final <- cowplot::ggdraw(plot_grid(AD, BC, nrow=1, rel_widths=c(1.2,4), align="v", axis="l")) + 
    cowplot::draw_plot(BC.legend,0.27,0.023,0.2,0.00001)

png(file=file.path(outdir,"Supplementary Figure S5.png"), 
    width=figure.width.full, 
    height=4.7, 
    units = figure.unit, 
    res=figure.resolution, 
    antialias=figure.antialias)

  p.final
  
dev.off()
## png 
##   2
p.final

Individual titration plots

For supplementary information.

plots.columns = 6
rows.max <- 5

markers <- abpanel[rownames(object[["ADT.kallisto"]]),]
markers <- markers[order(markers$Category, markers$Marker),]

plots <- list()

## Make individual plots for each marker
for(i in 1:nrow(markers)){
  curMarker <- markers[i,]
  curMarker.name <- curMarker$Marker
  y.axis <- ifelse((i-1) %in% c(0,6,12,18,24,30,36,42,48),TRUE,FALSE)
  plots[[curMarker.name]] <- titrationPlot(curMarker.name, y.axis=y.axis)
}

# a bit of a hack to make celltype legend
p.legend <- cowplot::get_legend(ggplot(data.frame(supercluster=object$supercluster), 
                                           aes(color=supercluster,x=1,y=1)) + 
  geom_point(shape=15, size=1.5) + 
  scale_color_manual(values=color.supercluster) + 
  theme(legend.title=element_blank(), 
        legend.margin=margin(0,0,0,0), 
        legend.key.size = unit(0.15,"cm"),
        legend.position = c(0.98,1.1), 
        legend.justification=c(1,1), 
        legend.direction="horizontal"))

plots.num <- length(plots)
plots.perPage <- plots.columns*rows.max
plots.pages <- ceiling(plots.num/plots.perPage)

## Make a supplementary figure split into pages
for(i in 1:plots.pages){
  start <- (i-1)*plots.perPage+1
  end <- i*plots.perPage
  end <- min(end,plots.num)
  curPlots <- c(start:end)
  plots.rows <- ceiling(length(curPlots)/plots.columns)
  
  curPlots <- cowplot::plot_grid(plotlist=plots[curPlots],ncol=plots.columns, rel_widths=c(1.1,1,1,1,1,1), align="h", axis="tb")
  curPlots.layout <- cowplot::plot_grid(NULL, p.legend, curPlots, vjust=-0.5, hjust=panel.label_hjust, label_size=panel.label_size, ncol=1, rel_heights= c(0.5, 1.3, 70/5*plots.rows))
  
  png(file=file.path(outdir,paste0("Supplementary Figure X",LETTERS[i],".png")), 
      units=figure.unit, 
      res=figure.resolution, 
      width=figure.width.full, 
      height=(2*plots.rows),
      antialias=figure.antialias)

  print(curPlots.layout)
  
  dev.off()
  
  print(curPlots.layout)
}