From 86fde10ebfaf003cdf26ec002fd0eef2b65d9c76 Mon Sep 17 00:00:00 2001 From: tdayris Date: Sun, 14 Jul 2024 07:13:18 +0200 Subject: [PATCH] Memory --- CHANGELOG.md | 8 +++ workflow/rules/bcftools.smk | 46 ++++++++---- .../rules/fair_bowtie2_mapping_pipeline.smk | 2 +- .../rules/fair_genome_indexer_pipeline.smk | 2 +- workflow/rules/gatk_mutect2_calling_meta.smk | 71 ++++++++++++------- workflow/rules/snpeff.smk | 2 +- 6 files changed, 89 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d270d06..3989552 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +# 1.5.1 + +## Features: + +* fair_bowtie2_mapping up to 3.5.2 +* fair_genome_indexer up to 3.8.1 +* Memory / time reservations + # 1.5.0 ## Features: diff --git a/workflow/rules/bcftools.smk b/workflow/rules/bcftools.smk index 639c3a6..d7a62e4 100644 --- a/workflow/rules/bcftools.smk +++ b/workflow/rules/bcftools.smk @@ -1,7 +1,11 @@ """ -Reported on Flamingo on 150 datasets -* time 37s ± 31s -* mem 423mb ± 27mb +## Memory +Requires a job with at most 440.72 Mb, + on average 328.9 ± 175.33 Mb, +on Gustave Roussy's HPC Flamingo, on a 91.0 Mb dataset. +## Time +A job took 0:01:46 to proceed, +on average 0:00:39 ± 0:00:38 """ @@ -18,7 +22,7 @@ rule fair_gatk_mutect2_bcftools_norm_split_multiallelic: ), threads: 1 resources: - mem_mb=lambda wildcards, attempt: (attempt * 200) + 500, + mem_mb=lambda wildcards, attempt: attempt * 500, runtime=lambda wildcards, attempt: attempt * 15, tmpdir=tmp, log: @@ -35,9 +39,13 @@ rule fair_gatk_mutect2_bcftools_norm_split_multiallelic: """ -Reported on Flamingo on 150 datasets -* time -* mem +## Memory +Requires a job with at most 426.09 Mb, + on average 316.03 ± 182.31 Mb, +on Gustave Roussy's HPC Flamingo, on a 35.0 Mb dataset. +## Time +A job took 0:00:36 to proceed, +on average 0:00:16 ± 0:00:13 """ @@ -50,7 +58,7 @@ rule fair_gatk_mutect2_bcftools_filter_pass: ), threads: 1 resources: - mem_mb=lambda wildcards, attempt: attempt * 200 + 500, + mem_mb=lambda wildcards, attempt: attempt * 500, runtime=lambda wildcards, attempt: attempt * 15, tmpdir=tmp, log: @@ -64,9 +72,13 @@ rule fair_gatk_mutect2_bcftools_filter_pass: """ -Reported on Flamingo on 150 datasets -* time 13s ± 10s -* time 415mb ± 38mb +## Memory +Requires a job with at most 426.09 Mb, + on average 316.03 ± 182.31 Mb, +on Gustave Roussy's HPC Flamingo, on a 35.0 Mb dataset. +## Time +A job took 0:00:36 to proceed, +on average 0:00:16 ± 0:00:13 """ @@ -83,7 +95,7 @@ rule fair_gatk_mutect2_bcftools_view: threads: 1 threads: 1 resources: - mem_mb=lambda wildcards, attempt: (attempt * 200) + 500, + mem_mb=lambda wildcards, attempt: attempt * 500, runtime=lambda wildcards, attempt: attempt * 15, tmpdir=tmp, log: @@ -100,9 +112,13 @@ rule fair_gatk_mutect2_bcftools_view: """ -Reported on Flamingo on 150 datasets -* time 20s ± 2s -* mem 540mb ± 70mb +## Memory +Requires a job with at most 563.22 Mb, + on average 401.76 ± 235.39 Mb, +on Gustave Roussy's HPC Flamingo, on a 35.0 Mb dataset. +## Time +A job took 0:00:35 to proceed, +on average 0:00:11 ± 0:00:12 """ diff --git a/workflow/rules/fair_bowtie2_mapping_pipeline.smk b/workflow/rules/fair_bowtie2_mapping_pipeline.smk index ce33130..82323f8 100644 --- a/workflow/rules/fair_bowtie2_mapping_pipeline.smk +++ b/workflow/rules/fair_bowtie2_mapping_pipeline.smk @@ -1,6 +1,6 @@ module fair_bowtie2_mapping: snakefile: - github("tdayris/fair_bowtie2_mapping", path="workflow/Snakefile", tag="3.5.1") + github("tdayris/fair_bowtie2_mapping", path="workflow/Snakefile", tag="3.5.2") config: { **config, diff --git a/workflow/rules/fair_genome_indexer_pipeline.smk b/workflow/rules/fair_genome_indexer_pipeline.smk index 0713fe8..407bcad 100644 --- a/workflow/rules/fair_genome_indexer_pipeline.smk +++ b/workflow/rules/fair_genome_indexer_pipeline.smk @@ -1,6 +1,6 @@ module fair_genome_indexer: snakefile: - github("tdayris/fair_genome_indexer", path="workflow/Snakefile", tag="3.8.0") + github("tdayris/fair_genome_indexer", path="workflow/Snakefile", tag="3.8.1") config: config diff --git a/workflow/rules/gatk_mutect2_calling_meta.smk b/workflow/rules/gatk_mutect2_calling_meta.smk index 2cad0ff..f5e9961 100644 --- a/workflow/rules/gatk_mutect2_calling_meta.smk +++ b/workflow/rules/gatk_mutect2_calling_meta.smk @@ -6,13 +6,17 @@ module gatk_mutect2_calling: """ -Reported on Flamingo on 150 datasets -* time 95min ± 49min -* mem 6.5Go ± 3Go +## Memory +Requires a job with at most 16386.32 Mb, + on average 8120.21 ± 5260.7 Mb, +on Gustave Roussy's HPC Flamingo, on a 93.0 Mb dataset. +## Time +A job took 0:23:24 to proceed, +on average 0:06:13 ± 0:07:33 """ -use rule picard_replace_read_groups from gatk_mutect2_calling as fair_gatk_mutect2_picard_reaplace_read_groups with: +use rule picard_replace_read_groups from gatk_mutect2_calling as fair_gatk_mutect2_picard_replace_read_groups with: input: "results/{species}.{build}.{release}.{datatype}/Mapping/{sample}.bam", output: @@ -21,8 +25,8 @@ use rule picard_replace_read_groups from gatk_mutect2_calling as fair_gatk_mutec ), threads: 1 resources: - mem_mb=lambda wildcards, attempt: attempt * 4_000 + 6_000, - runtime=lambda wildcards, attempt: attempt * 45 + 105, + mem_mb=lambda wildcards, attempt: attempt * 4_000 + 13_000, + runtime=lambda wildcards, attempt: attempt * 45 + 85, tmpdir=tmp, log: "logs/fair_gatk_mutect2_picard_reaplace_read_groups/{species}.{build}.{release}.{datatype}/{sample}.log", @@ -33,9 +37,13 @@ use rule picard_replace_read_groups from gatk_mutect2_calling as fair_gatk_mutec """ -Reported on Flamingo on 150 datastets -* time 18min ± 11min (peak at 91min) -* mem 462mb ± 21mb +## Memory +Requires a job with at most 468.36 Mb, + on average 348.96 ± 183.95 Mb, +on Gustave Roussy's HPC Flamingo, on a 93.0 Mb dataset. +## Time +A job took 0:05:42 to proceed, +on average 0:01:33 ± 0:01:47 """ @@ -58,9 +66,13 @@ use rule sambamba_index_picard_bam from gatk_mutect2_calling as fair_gatk_mutect """ -Reported on Flamingo on 150 datastets -* mem 21Go ± 10Go -* time 5h ± 2h40 +## Memory +Requires a job with at most 78070.07 Mb, + on average 25200.52 ± 23083.55 Mb, +on Gustave Roussy's HPC Flamingo, on a 92.0 Mb dataset. +## Time +A job took 7:31:03 to proceed, +on average 2:06:11 ± 2:26:14 """ @@ -79,8 +91,8 @@ use rule mutect2_call from gatk_mutect2_calling as fair_gatk_mutect2_gatk_mutect ), threads: 20 resources: - mem_mb=lambda wildcards, attempt: attempt * 22_000, - runtime=lambda wildcards, attempt: attempt * 60 * 5, + mem_mb=lambda wildcards, attempt: attempt * 40_000, + runtime=lambda wildcards, attempt: attempt * 60 * 15, tmpdir=tmp, log: "logs/fair_gatk_mutect2_gatk_mutect2_call/{species}.{build}.{release}.{datatype}/{sample}.log", @@ -99,6 +111,8 @@ use rule mutect2_call from gatk_mutect2_calling as fair_gatk_mutect2_gatk_mutect ), + + use rule gatk_get_pileup_summaries from gatk_mutect2_calling as fair_gatk_mutect2_gatk_get_pileup_summaries with: input: unpack(get_gatk_get_pileup_summaries_input), @@ -146,9 +160,14 @@ use rule gatk_calculate_contamination from gatk_mutect2_calling as fair_gatk_mut """ -Reported on Flamingo on 150 datastets -* mem 22Go ± 10Go -* time 1:20 ± 1min +## Memory +Requires a job with at most 69707.14 Mb, + on average 22480.43 ± 22444.62 Mb, +on Gustave Roussy's HPC Flamingo, on a 92.0 Mb dataset. + +## Time +A job took 0:01:32 to proceed, +on average 0:00:29 ± 0:00:30 """ @@ -161,8 +180,8 @@ use rule gatk_learn_read_orientation_model from gatk_mutect2_calling as fair_gat ), threads: 1 resources: - mem_mb=lambda wildcards, attempt: attempt * 8_000 + 22_000, - runtime=lambda wildcards, attempt: attempt * 30, + mem_mb=lambda wildcards, attempt: attempt * 30_000, + runtime=lambda wildcards, attempt: attempt * 60, tmpdir=tmp, log: "logs/fair_gatk_mutect2_gatk_learn_read_orientation_model/{species}.{build}.{release}.{datatype}/{sample}.log", @@ -176,9 +195,13 @@ use rule gatk_learn_read_orientation_model from gatk_mutect2_calling as fair_gat """ -Reported on Flamingo on ~150 datasets -* time 2h ± 30min -* mem 24Go ± 10Go +## Memory +Requires a job with at most 56514.84 Mb, + on average 24296.87 ± 17262.98 Mb, +on Gustave Roussy's HPC Flamingo, on a 91.0 Mb dataset. +## Time +A job took 0:30:32 to proceed, +on average 0:09:10 ± 0:09:40 """ @@ -194,8 +217,8 @@ use rule filter_mutect_calls from gatk_mutect2_calling as fair_gatk_mutect2_filt ), threads: 1 resources: - mem_mb=lambda wildcards, attempt: attempt * 10_000 + 24_000, - runtime=lambda wildcards, attempt: attempt * 60 + 75, + mem_mb=lambda wildcards, attempt: attempt * 30_000, + runtime=lambda wildcards, attempt: attempt * 60 tmpdir=tmp, log: "logs/fair_gatk_mutect2_filtermutectcalls/{species}.{build}.{release}.{datatype}/{sample}.log", diff --git a/workflow/rules/snpeff.smk b/workflow/rules/snpeff.smk index 4b72c4f..ccb7dc6 100644 --- a/workflow/rules/snpeff.smk +++ b/workflow/rules/snpeff.smk @@ -60,6 +60,6 @@ rule fair_gatk_mutect2_snpeff_annotate: benchmark: "benchmark/fair_gatk_mutect2_snpeff_annotate/{species}.{build}.{release}.{datatype}/{sample}.tsv" params: - extra=lookup_config(dpath="params/fair_gatk_mutect2_snpeff", default=""), + extra=lookup_config(dpath="params/fair_gatk_mutect2_snpeff", default="-nodownload"), wrapper: f"{snakemake_wrappers_prefix}/bio/snpeff/annotate"