diff --git a/containers/rmats4/Dockerfile b/containers/rmats4/Dockerfile index 6f4d780a..8e6e58eb 100755 --- a/containers/rmats4/Dockerfile +++ b/containers/rmats4/Dockerfile @@ -1,7 +1,15 @@ -FROM nfcore/base:1.9 +# continuumio/miniconda3:4.8.2 +FROM continuumio/miniconda3@sha256:456e3196bf3ffb13fee7c9216db4b18b5e6f4d37090b31df3e0309926e98cfe2 + LABEL authors="phil@lifebit.ai laura.urbanski@jax.org" \ - description="Docker image containing rMATS v4.1.0" + description="Docker image containing rMATS v4.1.1" COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a +RUN apt-get update && apt-get install -y procps && \ + conda env create -f /environment.yml && conda clean -a + +# Make RUN commands use the new environment: +RUN echo "conda activate rmats4" >> ~/.bashrc +SHELL ["/bin/bash", "--login", "-c"] + ENV PATH /opt/conda/envs/rmats4/bin:$PATH diff --git a/containers/rmats4/environment.yml b/containers/rmats4/environment.yml index 171d8329..cd567b6c 100755 --- a/containers/rmats4/environment.yml +++ b/containers/rmats4/environment.yml @@ -4,6 +4,6 @@ channels: - bioconda - defaults dependencies: - - rmats=4.1.0 + - rmats=4.1.1 - bioconductor-bsgenome.hsapiens.ucsc.hg38=1.4.1 - bioconductor-bsgenome.mmusculus.ucsc.mm10=1.4.0 diff --git a/docs/containers.md b/docs/containers.md index 73c03ff6..90a42685 100755 --- a/docs/containers.md +++ b/docs/containers.md @@ -42,3 +42,10 @@ docker push /: gcloud auth login docker push gcr.io//: ``` + + +## Troubleshooting Singularity Images on Sumner +On Sumner, you may need to remove old singularity images from the cache dir in order to implement updated/new image: +- Having a cache dir is a Nextflow thing. The idea of this is to save the images to prevent needing to pull the images on each execution which would be really slow +- You should only need to clear the cache as when the containers are updated. +- We ended up setting the cacheDir to cacheDir = "/projects/anczukow-lab/.singularity_cache/" diff --git a/docs/usage.md b/docs/usage.md index c2bb1241..d5ea7279 100755 --- a/docs/usage.md +++ b/docs/usage.md @@ -112,15 +112,15 @@ Main arguments: (default: false) --stranded Specifies that the input is stranded ('first-strand', 'second-strand', false (aka unstranded)) (default: 'first-strand') - -profile Configuration profile to use. Can use multiple (comma separated, string) - Available: base, docker, sumner, test and more. --readlength Read length - Note that all reads will be cropped to this length(int) (default: no read length specified) - + -profile Configuration profile to use. Can use multiple (comma separated, string) + Available: base, docker, sumner, test and more. + Trimmomatic: --minlen Drop the read if it is below a specified length (int) Default parameters turn on --variable-readlength - To crop all reads and turn off, set minlen = readlength + To crop all reads and turn off --variable-readlength, set minlen = readlength (default: 20) --slidingwindow Perform a sliding window trimming approach (bool) (default: true) @@ -138,6 +138,11 @@ Star: (default: 0.66) --sjdbOverhangMin Controls --alignSJDBoverhangMin (int) (default: 3) + --soft_clipping Enables soft clipping (bool) + If true, the STAR parameter will be --alignEndsType Local and the rMATS parameter --allow-clipping will be added. + If false, the STAR parameter will be --alignEndsType 'EndToEnd' and no rMATS parameter is added. + NOTE: Soft Clipping will cause read lengths to be variable, so turn soft_clipping off if reads need to be same length. Variable read length parameter is turned on in rMATS when minlen does not equal readlength. + (default: true) --star_memory Max memory to be used by STAR to sort BAM files. (default: Available task memory) @@ -169,11 +174,14 @@ Other: (default: false) --outdir The output directory where the results will be saved (string) (default: directory where you submit the job) - --gc_disk_size Only specific to google-cloud executor. Adds disk-space for few aggregative processes. - (default: "200 GB" based on 100 samples. Simply add 2 x Number of Samples) --mega_time Sets time limit for processes withLabel 'mega_memory' in the main.nf using the base.config (time unit) Make sure '#SBATCH -t' in 'main.pbs' is appropriately set if you are changing this parameter. (default: 20.h) + --gc_disk_size Only specific to google-cloud executor. Adds disk-space for few aggregative processes. + (default: "200 GB" based on 100 samples. Simply add 2 x Number of Samples) + --debug This option will enable echo of script execution into STDOUT with some additional + resource information (such as machine type, memory, cpu and disk space) + (default: false) ``` diff --git a/main.nf b/main.nf index 70ce6ac0..75559072 100755 --- a/main.nf +++ b/main.nf @@ -74,8 +74,10 @@ def helpMessage() { (default: readlength - 1) --filterScore Controls --outFilterScoreMinOverLread and outFilterMatchNminOverLread (default: 0.66) - --sjdbOverhangMin Controls --alignSJDBoverhangMin (int) + --sjdbOverhangMin Controls --alignSJDBoverhangMin (int) (default: 3) + --soft_clipping Enables soft clipping (bool) + (default: true) --star_memory Max memory to be used by STAR to sort BAM files. (default: Available task memory) @@ -169,6 +171,7 @@ log.info "Single-end : ${download_from('tcga') ? 'Will be check log.info "GTF : ${params.gtf}" log.info "STAR index : ${star_index}" log.info "Stranded : ${params.stranded}" +log.info "Soft_clipping : ${params.soft_clipping}" log.info "rMATS pairs file : ${params.rmats_pairs ? params.rmats_pairs : 'Not provided'}" log.info "Adapter : ${download_from('tcga') ? 'Will be set for each sample based based on whether the sample is paired or single-end' : adapter_file}" log.info "Read Length : ${params.readlength}" @@ -592,6 +595,7 @@ if (!params.bams){ out_filter_intron_motifs = params.stranded ? '' : '--outFilterIntronMotifs RemoveNoncanonicalUnannotated' out_sam_strand_field = params.stranded ? '' : '--outSAMstrandField intronMotif' xs_tag_cmd = params.stranded ? "samtools view -h ${name}.Aligned.sortedByCoord.out.bam | gawk -v strType=2 -f /usr/local/bin/tagXSstrandedData.awk | samtools view -bS - > Aligned.XS.bam && mv Aligned.XS.bam ${name}.Aligned.sortedByCoord.out.bam" : '' + endsType = params.soft_clipping ? 'Local' : 'EndToEnd' // Set maximum available memory to be used by STAR to sort BAM files star_mem = params.star_memory ? params.star_memory : task.memory avail_mem_bam_sort = star_mem ? "--limitBAMsortRAM ${star_mem.toBytes() - 2000000000}" : '' @@ -624,7 +628,7 @@ if (!params.bams){ --outBAMsortingThreadN $task.cpus \ --outFilterType BySJout \ --twopassMode Basic \ - --alignEndsType EndToEnd \ + --alignEndsType $endsType \ --alignIntronMax 1000000 \ --outReadsUnmapped Fastx \ --quantMode GeneCounts \ @@ -783,6 +787,7 @@ if (!params.test) { statoff = params.statoff ? '--statoff' : '' paired_stats = params.paired_stats ? '--paired-stats' : '' novelSS = params.novelSS ? '--novelSS' : '' + allow_clipping = params.soft_clipping ? '--allow-clipping' : '' if (b1_only) { b1_bams = bams.join(",") b2_cmd = '' @@ -808,7 +813,7 @@ if (!params.test) { --nthread $task.cpus \ --readLength ${params.readlength} \ --mil ${params.mil} \ - --mel ${params.mel} $variable_read_length_flag $statoff $paired_stats $novelSS + --mel ${params.mel} $variable_read_length_flag $statoff $paired_stats $novelSS $allow_clipping rmats_config="config_for_rmats_and_postprocessing.txt" echo b1 b1.txt > \$rmats_config $b2_config_cmd diff --git a/nextflow.config b/nextflow.config index fdda6d00..68f62580 100755 --- a/nextflow.config +++ b/nextflow.config @@ -33,6 +33,7 @@ params { filterScore = 0.66 sjdbOverhangMin = 3 star_memory = false + soft_clipping = true // rMATS statoff = false @@ -78,16 +79,16 @@ process { container = 'lifebitai/download_reads:latest' } withName: 'rmats' { - container = 'gcr.io/nextflow-250616/rmats:4.1.0' + container = 'lifebitai/splicing-rmats:4.1.1' } withName: 'paired_rmats' { - container = 'gcr.io/nextflow-250616/rmats:4.1.0' + container = 'lifebitai/splicing-rmats:4.1.1' } withName: 'collect_tool_versions_env1' { container = 'gcr.io/nextflow-250616/splicing-pipelines-nf:gawk' } withName: 'collect_tool_versions_env2' { - container = 'gcr.io/nextflow-250616/rmats:4.1.0' + container = 'lifebitai/splicing-rmats:4.1.1' } }