Skip to content

Commit

Permalink
Lazy-load R packages
Browse files Browse the repository at this point in the history
This significantly speeds up conda installation since no R packages
create dependencies. It further solved some issues with R packages being
located in the global conda pkg folder.
  • Loading branch information
Marius Wöste committed Nov 15, 2019
1 parent 550e661 commit 1a89a51
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 11 deletions.
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@ To run `wg-blimp` you need a UNIX environment that contains a [Bioconda](http://
It is advised to install `wg-blimp` through Bioconda. It is also recommended to install `wg-blimp` in a fresh environment, as it has many dependencies that may conflict with other packages, for this you can use:

```
conda create -n wg-blimp wg-blimp python=3.6
conda create -n wg-blimp wg-blimp
```
The requirement `python=3.6` is currently necessary because of otherwise slow conda dependency solving.

### From source
You can also install `wg-blimp` from source using
Expand Down
3 changes: 1 addition & 2 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ channels:
dependencies:
- bedtools
- bwa
- bwameth=0.2.0
- bwameth 0.2.0
- fastqc
- git
- h5py
Expand All @@ -14,7 +14,6 @@ dependencies:
- mosdepth
- multiqc
- picard
- python >=3.5
- pysam
- qualimap
- r-base
Expand Down
35 changes: 28 additions & 7 deletions snakemake_wrapper/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,18 @@ rule all:
mbias_ob_reports +
mbias_ot_reports


### R package installation

rule install_r_packages:
output:
r_session_info = log_dir + '/r_session_info.log'
log:
log_dir + '/r_install.log'
script:
'scripts/packageManagement.R'


### ALIGNMENT

rule find_fqs:
Expand Down Expand Up @@ -229,7 +241,8 @@ rule multiqc:

rule methylation_metrics:
input:
bed_graphs = expand(methylation_dir + '/{sample}_CpG.bedGraph', sample = config['samples'])
bed_graphs = expand(methylation_dir + '/{sample}_CpG.bedGraph', sample = config['samples']),
r_installation = rules.install_r_packages.output
output:
methylation_metrics = qc_dir + '/methylation_metrics.csv'
log:
Expand Down Expand Up @@ -260,7 +273,8 @@ rule methyl_dackel:

rule bedgraph_to_methylation_ratio:
input:
bedGraph = methylation_dir + '/{sample}_CpG.bedGraph'
bedGraph = methylation_dir + '/{sample}_CpG.bedGraph',
r_installation = rules.install_r_packages.output
output:
bedGraph = temp(methylation_dir + '/{sample}_CpG_ratio.bedGraph')
log:
Expand Down Expand Up @@ -348,7 +362,8 @@ rule camel_dmr:

rule bsseq:
input:
meth = expand(methylation_dir + '/{sample}_CpG.bedGraph', sample = config['samples'])
meth = expand(methylation_dir + '/{sample}_CpG.bedGraph', sample = config['samples']),
r_installation = rules.install_r_packages.output
output:
rdata = bsseq_dir + '/bsseq.Rdata',
csv = bsseq_dir + '/dmrs.csv',
Expand All @@ -371,7 +386,8 @@ def expected_tool_files():

rule dmr_combination:
input:
**expected_tool_files()
**expected_tool_files(),
r_installation = rules.install_r_packages.output
output:
csv = dmr_dir + '/combined-dmrs.csv',
bed = dmr_dir + '/dmr-coverage/combined-dmrs.bed'
Expand Down Expand Up @@ -414,7 +430,8 @@ rule dmr_annotation:
repeat_masker_annotation_file = repeat_masker_annotation_file,
transcript_start_site_file = transcript_start_site_file,
coverages = expand(dmr_dir + '/dmr-coverage/{sample}.regions.bed.gz', sample = config['samples']),
combined_dmrs = dmr_dir + '/combined-dmrs.csv'
combined_dmrs = dmr_dir + '/combined-dmrs.csv',
r_installation = rules.install_r_packages.output
output:
annotated_dmrs = dmr_dir + '/annotated-dmrs.csv'
params:
Expand All @@ -436,7 +453,8 @@ rule methylseekr:
cgi_annotation_file = cgi_annotation_file,
gene_annotation_file = gene_annotation_file,
repeat_masker_annotation_file = repeat_masker_annotation_file,
transcript_start_site_file = transcript_start_site_file
transcript_start_site_file = transcript_start_site_file,
r_installation = rules.install_r_packages.output
output:
pmd_all = segmentation_dir + '/pmd-all.csv',
umr_lmr_all = segmentation_dir + '/umr-lmr-all.csv',
Expand Down Expand Up @@ -467,10 +485,13 @@ rule methylseekr:

rule benchmark_plot:
input:
target_files = target_files
target_files = target_files,
r_installation = rules.install_r_packages.output
output:
runtime_report = runtime_report
params:
benchmark_dir = benchmark_dir
log:
log_dir + '/benchmark.log'
script:
'scripts/benchmarkPlot.R'
7 changes: 7 additions & 0 deletions snakemake_wrapper/scripts/benchmarkPlot.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
if (exists("snakemake")) {
logFile <- file(snakemake@log[[1]])

sink(logFile, append = TRUE)
sink(logFile, append = TRUE, type = "message")
}

library(ggplot2)
library(stringr)
library(data.table)
Expand Down
69 changes: 69 additions & 0 deletions snakemake_wrapper/scripts/packageManagement.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
if (exists("snakemake")) {
logFile <- file(snakemake@log[[1]])

sink(logFile, append = TRUE)
sink(logFile, append = TRUE, type = "message")
}


options(repos = c(CRAN = "https://cran.rstudio.com"))

ensurePackageInstallation <- function (packageName, type = "CRAN") {

isPackageInstalled <- packageName %in% rownames(installed.packages())

if (!isPackageInstalled) {

if (type == "CRAN") {

install.packages(packageName, quiet = TRUE)

} else if (type == "bioc") {

if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install(packageName)

}

}

}

loadOrInstall <- function (packageName, type = "CRAN") {

ensurePackageInstallation(packageName, type)

library(packageName, character.only = TRUE)
}

# install dependencies

cranPackages <- c(
"data.table",
"KernSmooth",
"parallel",
"ggplot2",
"stringr"
)

biocPackages <- c(
"BSgenome",
"bsseq",
"GenomicRanges",
"MethylSeekR",
"rtracklayer"
)

for (package in cranPackages) {
loadOrInstall(package)
}

for (package in biocPackages) {
loadOrInstall(package, type = "bioc")
}

# prepare shiny app
ensurePackageInstallation("shiny")

writeLines(capture.output(sessionInfo()), snakemake@output$r_session_info)

0 comments on commit 1a89a51

Please sign in to comment.