Skip to content

Commit

Permalink
Update normalizeRecount3PerTissueAndDoPca.R
Browse files Browse the repository at this point in the history
  • Loading branch information
PatrickDeelen committed Nov 13, 2022
1 parent a2819ac commit bf2f099
Showing 1 changed file with 51 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@



remoter::client("localhost", port = 55508)
remoter::client("localhost", port = 55506)

library(DESeq2)
library(parallel)
Expand Down Expand Up @@ -65,6 +65,50 @@ perTissueExp <- lapply(tissueClasses, function(tissue){
#names(perTissueExp) <- tissueClasses
#save(perTissueExp, file = "perTissueNormalization/selectedSamplesRawExpressionPerTissue.RData")
tissue = "Kidney"
load(file = "Metadata/combinedMeta_2022_09_15.RData", verbose = T)

covariatesToCorrectFor <- read.delim("CovariateNames.txt", header = F)$V1

str(sum(is.na(combinedMeta[,"sra.sample_spots"])))

#TCGA samples don't have sra.sample_spots instead use recount_qc.bc_frag.count
missingSampleSpots <- is.na(combinedMeta[,"sra.sample_spots"])
combinedMeta[missingSampleSpots,"sra.sample_spots"] <- combinedMeta[missingSampleSpots,"recount_qc.bc_frag.count"]

#TCGA and Gtex don't report layout but is all paired
missingLayout <- is.na(combinedMeta[,"sra.library_layout"])
combinedMeta[missingLayout,"sra.library_layout"] <- "paired"

combinedMetaSelection <- combinedMeta[,covariatesToCorrectFor]
combinedMetaSelection$sra.library_layout <- as.factor(combinedMetaSelection$sra.library_layout)
table(combinedMeta[,"sra.library_layout"],useNA = "a")

cl <- makeCluster(20)

tissue <- "Salivary Gland-Minor Salivary Gland"

sink <- lapply(tissueClasses, function(tissue){
cat(tissue, "\n")


load(file = paste0("perTissueNormalization/vstExp/",make.names(tissue),".RData"))

combinedMetaTissue <- combinedMetaSelection[colnames(vstExp),]
if(min(table(combinedMetaTissue$sra.library_layout)) < 10){
combinedMetaTissue <- combinedMetaTissue[,-match("sra.library_layout",colnames(combinedMetaTissue))]
}

vstExpCovCor <- parApply(cl, vstExp, 1 ,function(geneExp, combinedMetaTissue){
return(residuals(lm(geneExp ~ . ,data = combinedMetaTissue)))
}, combinedMetaTissue = combinedMetaTissue)
vstExpCovCor <- t(vstExpCovCor)


save(vstExpCovCor, file = paste0("perTissueNormalization/vstExpCovCor/",make.names(tissue),".RData"))

})

stopCluster(cl)


sink <- lapply(tissueClasses, function(tissue){
Expand All @@ -74,7 +118,7 @@ sink <- lapply(tissueClasses, function(tissue){
load(file = paste0("perTissueNormalization/vstExp/",make.names(tissue),".RData"))

#https://stackoverflow.com/questions/18964837/fast-correlation-in-r-using-c-and-parallelization/18965892#18965892
expScale = vstExp - rowMeans(vstExp);
expScale = vstExpCovCor - rowMeans(vstExpCovCor);
# Standardize each variable
expScale = expScale / sqrt(rowSums(expScale^2));
#expCov = tcrossprod(expScale);#equevelent to correlation due to center scale
Expand Down Expand Up @@ -121,7 +165,6 @@ eigenVec <- tissueVstPca$eigenVectors
rownames(vstExp) <- (gsub("\\..+", "", rownames(eigenVec)))
write.table(eigenVec, file = gzfile(paste0("/groups/umcg-fg/tmp01/projects/genenetwork/recount3/perTissueNormalization/vstPca/",make.names(tissue),"_eigenVec.txt.gz")), sep = "\t", col.names = NA, quote = F)

load(file = "Metadata/combinedMeta_2022_09_15.RData", verbose = T)

metaNumeric <- as.matrix(combinedMeta[,sapply(combinedMeta, is.numeric)])

Expand All @@ -134,7 +177,7 @@ marCorZPerTissue <- lapply(tissueClasses, function(tissue){
pcs <- tissueVstPca$expPcs
metaTest <- metaNumeric[rownames(pcs),]

metaVPcsZ <- apply(expPcs, 2, function(x){
metaVPcsZ <- apply(pcs, 2, function(x){
apply(metaTest, 2, function(y, x){
z <- !is.na(x) & !is.na(y)
if(sum(z) < 10){
Expand All @@ -157,7 +200,7 @@ str(marCorZPerTissue)
marCorZPerTissue2 <- do.call(cbind, marCorZPerTissue)
str(marCorZPerTissue2)

sort(apply(marCorZPerTissue2,1,max))
covariateZscores <- apply(marCorZPerTissue2,1,mean)

marCorZPerTissue2["recount_qc.star.%_of_reads_mapped_to_multiple_loci",]
library(vioplot)
Expand All @@ -174,6 +217,8 @@ cor.test(pcs[,1], metaTest[,"recount_qc.star.%_of_reads_mapped_to_multiple_loci"
plot(pcs[,1], metaTest[,"recount_qc.star.%_of_reads_mapped_to_multiple_loci"], pch = 16, col=adjustcolor("grey", alpha.f = 0.5))
dev.off()

pcs <- expPcs


tissueSamplesInfo <- samplesWithPredictionNoOutliers[rownames(pcs),]
studies <- length(unique(tissueSamplesInfo$study))
Expand All @@ -184,7 +229,7 @@ rpng(width = 1500, height = 1500)
palette(adjustcolor(viridis(studies, option = "H"), alpha.f = 0.5))
pchMap <- rep(c(15,16,17), length.out = studies)
plot(pcs[,1],metaTest[,"recount_qc.star.%_of_reads_mapped_to_multiple_loci"], col = as.factor(tissueSamplesInfo$study), pch = pchMap[as.factor(tissueSamplesInfo$study)], cex = 2, main = paste0("Studies (", studies,")"), xlab = paste0("Comp 1 (", round(explainedVariance[1],2) ,"%)"), ylab = "Percentage read map multiple loci", bty = "n")

plot(pcs[,1],pcs[,2], col = as.factor(tissueSamplesInfo$study), pch = pchMap[as.factor(tissueSamplesInfo$study)], cex = 2, main = paste0("Studies (", studies,")"), xlab = paste0("Comp 1 (", round(explainedVariance[1],2) ,"%)"), ylab = "Percentage read map multiple loci", bty = "n")
dev.off()


Expand All @@ -202,10 +247,6 @@ dev.off()







#OLD


Expand Down

0 comments on commit bf2f099

Please sign in to comment.