Report adjusted

fischuu · Mar 11, 2021 · 02257d0 · 02257d0
1 parent cc21492
commit 02257d0
Show file tree

Hide file tree

Showing 3 changed files with 15 additions and 14 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -4,6 +4,7 @@
 #####                                                                      #####
 ################################################################################
 
+0.5.15: * Report adjusted
 0.5.14: * Bugfix: Rule FinalMockVsRef_alignment didn't have resource allocations
 0.5.13: * Statistics on final mock reference added
 0.5.12: * Coverage for finalMock alignments are now also calculated

diff --git a/GBS-pipeline.smk b/GBS-pipeline.smk
@@ -10,8 +10,8 @@ import os
 ##### Natural Resources Institute Finland (Luke)
 ##### This pipeline is build upon the the GBS-SNP-CROP pipeline:
 ##### https://github.com/halelab/GBS-SNP-CROP
-##### Version: 0.5.14
-version = "0.5.14"
+##### Version: 0.5.15
+version = "0.5.15"
 
 ##### set minimum snakemake version #####
 min_version("6.0")

diff --git a/scripts/workflow-report.Rmd b/scripts/workflow-report.Rmd
@@ -1183,7 +1183,7 @@ ADD HERE STILL A COMPARISON, HOW THE READS FROM THE MOCK DISTRIBUTE ACROSS THE R
 
 Import the mapping statistics to the Mock reference
 
-```{r import flagstats}
+```{r import final flagstats}
 flagstatFiles <- list.files(file.path(projFolder,"BAM", "alignments_finalMock"), pattern="*.flagstat")
 flagstats <- list()
 for(i in 1:length(flagstatFiles)){
@@ -1193,7 +1193,7 @@ for(i in 1:length(flagstatFiles)){
 
 Visualization of the alignments, red stars indicate the mock reference samples
 
-```{r vis mapping stats}
+```{r vis mapping final stats}
 par(oma=c(6,3,0,0))
 mapStats <- matrix(0,ncol=length(flagstatFiles), nrow=2)
 
@@ -1216,15 +1216,15 @@ mockPos <- which(is.element(flagstatFiles, mockFiles))
 points(p[mockPos], rep(200000, length(mockPos)), pch="*", col="red", cex=4)
 ```
 
-```{r mapping percentage}
+```{r mapping final percentage}
 barplot(mapStats[1,] / (apply(mapStats,2,sum)), ylim=c(0,1), ylab="Mapping in Percent", col=viridis(20)[8], las=2)
 ```
 
 
 ### Coverage
 
 Data mapped against the clusters and then the reads per cluster visualized
-```{r importClusterCoverage, warning=FALSE}
+```{r importFinalClusterCoverage, warning=FALSE}
 clusterFiles <- list.files(file.path(projFolder, "BAM", "alignments_finalMock"), pattern="*.coverage")
 clusterCoverage <- read.table(file.path(projFolder, "BAM", "alignments_finalMock", clusterFiles[1]))
 names(clusterCoverage)[1:2] <- c("cluster", clusterFiles[1])
@@ -1245,7 +1245,7 @@ clusterCoverage.std <- t(t(clusterCoverage)/apply(clusterCoverage,2,sum))*100000
 ### Lorenz curve
 Concentration measure of reads against clusters
 
-```{r lorenz}
+```{r final lorenz}
 plot(cumsum(sort(clusterCoverage[,2] / sum(clusterCoverage[,2]))), type="l", xlab="Cluster", ylab="Concentration")
 for(i in 3:ncol(clusterCoverage)){
   lines(cumsum(sort(clusterCoverage[,i] / sum(clusterCoverage[,i]))))
@@ -1257,14 +1257,14 @@ for(i in 3:ncol(clusterCoverage)){
 
 These are the amounts of clusters with different samples. 
 
-```{r cluster coverage stats}
+```{r final  cluster coverage stats}
 coveredClusters <- apply(clusterCoverage[,-1]>0,1,sum)
 barplot(table(coveredClusters), xlab="No. of different samples aligned to cluster", col=viridis(20)[8])
 ```
 
 Now we have here the number of reads per coverage class. That means, instead of having it binary as in the previous plot, we now count all the reads per coverage group.
 
-```{r reads per coverage group}
+```{r final reads per coverage group}
 readsPerCoverageGroup <- c()
 
 for(i in 1:max(coveredClusters)){
@@ -1277,13 +1277,13 @@ barplot(readsPerCoverageGroup, xlab="Coverage group", ylab="Reads on cluster gro
 ```
 
 
-```{r reads per coverage group in percent}
+```{r final reads per coverage group in percent}
 barplot(readsPerCoverageGroup/sum(readsPerCoverageGroup)*100, xlab="Coverage group", ylab="Reads on cluster group (in %)", ylim=c(0,100), col=viridis(20)[8])
 ```
 
 Then still the number of clusters without coverage per sample
 
-```{r clusters without coverage}
+```{r final clusters without coverage}
 nonHittedClusters <- apply(clusterCoverage[,-1]==0,2,sum)
 names(nonHittedClusters) <- gsub(".coverage", "", names(nonHittedClusters))
 barplot(nonHittedClusters, col=viridis(20)[8], las=2)
@@ -1292,7 +1292,7 @@ lines(c(0,100000), c(nrow(clusterCoverage), nrow(clusterCoverage)), lty="dotted"
 
 And then this still as percentage
 
-```{r clusterhits percentage}
+```{r final clusterhits percentage}
 barplot(nonHittedClusters/nrow(clusterCoverage), ylim=c(0,1), las=2, col=viridis(20)[8])
 lines(c(0,10000),c(0.2,0.2), lty="dotted")
 lines(c(0,10000),c(0.4,0.4), lty="dotted")
@@ -1303,7 +1303,7 @@ lines(c(0,10000),c(0.8,0.8), lty="dotted")
 
 ### Smoothed log-coverage per cluster
 
-```{r vizCusterCoverage}
+```{r final vizCusterCoverage}
 plot(smooth.spline(clusterCoverage[,1], log(clusterCoverage[,2]+1), all.knots=FALSE), type="l", xlab="Cluster", ylab="Log-coverage", ylim=c(0, max(log(clusterCoverage))/2))
 
 for(i in 3:ncol(clusterCoverage)){
@@ -1315,7 +1315,7 @@ for(i in 3:ncol(clusterCoverage)){
 
 Now the coverages are divided by the total amount of reads per sample and then multiplied by 10^5.
 
-```{r vizStdCusterCoverage}
+```{r final vizStdCusterCoverage}
 # I use here the first column of the other matrix to keep the same coordinates. It does not affect the plot.
 plot(smooth.spline(clusterCoverage[,1], log(clusterCoverage.std[,2]+1), all.knots=FALSE), type="l", xlab="Cluster", ylab="", ylim=c(0, max(log(clusterCoverage.std))/4))