From 625187bd8c3088ece4c8228523dabb1d3f30893a Mon Sep 17 00:00:00 2001 From: ericblanc20 Date: Fri, 6 Dec 2024 10:40:50 +0100 Subject: [PATCH] feat: enabled mbcs meta-tool in the ngs_mapping step model (#545) Co-authored-by: Till Hartmann --- snappy_pipeline/workflows/hla_typing/model.py | 4 +-- .../workflows/ngs_data_qc/__init__.py | 2 ++ .../workflows/ngs_mapping/model.py | 21 +++++++++-- snappy_wrappers/__init__.py | 2 +- snappy_wrappers/wrappers/mbcs/wrapper.py | 2 +- .../workflows/test_workflows_ngs_mapping.py | 35 ++++++++++++++----- 6 files changed, 51 insertions(+), 15 deletions(-) diff --git a/snappy_pipeline/workflows/hla_typing/model.py b/snappy_pipeline/workflows/hla_typing/model.py index f4aa3b63d..2f666112b 100644 --- a/snappy_pipeline/workflows/hla_typing/model.py +++ b/snappy_pipeline/workflows/hla_typing/model.py @@ -28,6 +28,6 @@ class HlaTyping(SnappyStepModel, validators.ToolsMixin, validators.NgsMappingMix tools: Annotated[list[Tool], EnumField(Tool, [Tool.optitype], min_length=1)] - optitype: Optitype | None = None + optitype: Optitype = Optitype() - arcashla: ArcasHla | None = None + arcashla: ArcasHla = ArcasHla() diff --git a/snappy_pipeline/workflows/ngs_data_qc/__init__.py b/snappy_pipeline/workflows/ngs_data_qc/__init__.py index e6184987d..9adc2dc50 100644 --- a/snappy_pipeline/workflows/ngs_data_qc/__init__.py +++ b/snappy_pipeline/workflows/ngs_data_qc/__init__.py @@ -168,6 +168,8 @@ def _get_input_files_metrics(self, wildcards): @dictify def get_output_files(self, action): + if self.name not in self.config.tools: + return {} if action == "prepare": yield "baits", "work/static_data/picard/out/baits.interval_list" yield "targets", "work/static_data/picard/out/targets.interval_list" diff --git a/snappy_pipeline/workflows/ngs_mapping/model.py b/snappy_pipeline/workflows/ngs_mapping/model.py index 30968a324..d390b0528 100644 --- a/snappy_pipeline/workflows/ngs_mapping/model.py +++ b/snappy_pipeline/workflows/ngs_mapping/model.py @@ -1,4 +1,5 @@ import enum +import itertools import os from enum import Enum from typing import Annotated @@ -21,8 +22,23 @@ class RnaMapper(Enum): STAR = "star" +class MetaTool(Enum): + MBCS = "mbcs" + + +CombinedDnaTool = Enum( + "CombinedDnaTool", + { + (name, member.value) + for name, member in itertools.chain( + DnaMapper.__members__.items(), MetaTool.__members__.items() + ) + }, +) + + class Tools(SnappyModel): - dna: Annotated[list[DnaMapper], EnumField(DnaMapper, [])] + dna: Annotated[list[CombinedDnaTool], EnumField(CombinedDnaTool, [])] """Required if DNA analysis; otherwise, leave empty.""" rna: Annotated[list[RnaMapper], EnumField(RnaMapper, [])] @@ -261,6 +277,7 @@ class Minimap2(SnappyModel): class Mbcs(SnappyModel): mapping_tool: DnaMapper + barcode_tool: BarcodeTool use_barcodes: bool recalibrate: bool @@ -287,7 +304,7 @@ class NgsMapping(SnappyStepModel): bwa_mem2: BwaMem2 | None = None """Configuration for BWA-MEM2""" - somatic: Somatic | None = None + mbcs: Mbcs | None = None """ Configuration for somatic ngs_calling (separate read groups, molecular barcodes & base quality recalibration) diff --git a/snappy_wrappers/__init__.py b/snappy_wrappers/__init__.py index 1b25644c8..1c66e6539 100644 --- a/snappy_wrappers/__init__.py +++ b/snappy_wrappers/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from snappy_pipeline.version import __version__ +# from snappy_pipeline.version import __version__ __author__ = """Manuel Holtgrewe""" __email__ = "manuel.holtgrewe@bih-charite.de" diff --git a/snappy_wrappers/wrappers/mbcs/wrapper.py b/snappy_wrappers/wrappers/mbcs/wrapper.py index 025c0fa00..f63a1fa6d 100644 --- a/snappy_wrappers/wrappers/mbcs/wrapper.py +++ b/snappy_wrappers/wrappers/mbcs/wrapper.py @@ -53,7 +53,7 @@ def pair_fastq_files(input_left, input_right): input_left = snakemake.params.args["input"]["reads_left"] input_right = snakemake.params.args["input"].get("reads_right", "") -config = snakemake.config["step_config"]["ngs_mapping"]["somatic"] +config = snakemake.config["step_config"]["ngs_mapping"]["mbcs"] mapper = config["mapping_tool"] mapper_config = snakemake.config["step_config"]["ngs_mapping"][mapper] if mapper == "bwa_mem2": diff --git a/tests/snappy_pipeline/workflows/test_workflows_ngs_mapping.py b/tests/snappy_pipeline/workflows/test_workflows_ngs_mapping.py index cddbf9adf..51585a5ab 100644 --- a/tests/snappy_pipeline/workflows/test_workflows_ngs_mapping.py +++ b/tests/snappy_pipeline/workflows/test_workflows_ngs_mapping.py @@ -33,16 +33,26 @@ def minimal_config(): step_config: ngs_mapping: tools: - dna: ['bwa'] + dna: ['mbcs'] target_coverage_report: path_target_interval_list_mapping: - pattern: "Agilent SureSelect Human All Exon V6.*" name: Agilent_SureSelect_Human_All_Exon_V6 path: path/to/SureSelect_Human_All_Exon_V6_r2.bed + ngs_chew_fingerprint: + enabled: true bwa: path_index: /path/to/bwa/index.fasta.amb bwa_mem2: path_index: /path/to/bwa_mem2/index.fasta.amb + trim_adapters: false + mask_duplicates: true + num_threads_align: 16 + num_threads_trimming: 8 + num_threads_bam_view: 4 + num_threads_bam_sort: 4 + memory_bam_sort: 4G + split_as_secondary: false # -M flag minimap2: mapping_threads: 16 star: @@ -51,7 +61,8 @@ def minimal_config(): out_filter_intron_motifs: "" out_sam_strand_field: "" mbcs: - mapping_tool: bwa + mapping_tool: bwa_mem2 + barcode_tool: agent use_barcodes: True recalibrate: True bqsr: @@ -60,10 +71,16 @@ def minimal_config(): prepare: path: /path/to/trimmer lib_prep_type: v2 + extra_args: + - "-polyG 8" + - "-minFractionRead 50" mark_duplicates: path: /path/to/creak path_baits: /path/to/baits consensus_mode: HYBRID + extra_args: [] + input_filter_args: [] + consensus_filter_args: [] bam_collect_doc: enabled: true @@ -712,18 +729,18 @@ def test_ngs_mapping_workflow_steps(ngs_mapping_workflow): def test_ngs_mapping_workflow_files(ngs_mapping_workflow): """Tests simple functionality of the workflow: checks if file structure is created according - to the expected results from the tools, namely: bwa, external, link_in, link_out, + to the expected results from the tools, namely: mbcs, external, link_in, link_out, link_out_bam, minimap2, star, target_coverage_report. """ # Check result file construction expected = [ - "output/bwa.P00{i}-N1-DNA1-WGS1/out/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format(i=i, ext=ext) + "output/mbcs.P00{i}-N1-DNA1-WGS1/out/mbcs.P00{i}-N1-DNA1-WGS1.{ext}".format(i=i, ext=ext) for i in range(1, 7) for ext in ("bam", "bam.bai", "bam.md5", "bam.bai.md5") ] for infix in ("bam_collect_doc", "mapping", "target_cov_report", "ngs_chew_fingerprint"): expected += [ - "output/bwa.P00{i}-N1-DNA1-WGS1/log/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format(i=i, ext=ext) + "output/mbcs.P00{i}-N1-DNA1-WGS1/log/mbcs.P00{i}-N1-DNA1-WGS1.{ext}".format(i=i, ext=ext) for i in range(1, 7) for ext in ( f"{infix}.log", @@ -739,7 +756,7 @@ def test_ngs_mapping_workflow_files(ngs_mapping_workflow): ) ] bam_stats_text_out = ( - "output/bwa.P00{i}-N1-DNA1-WGS1/report/bam_qc/bwa.P00{i}-N1-DNA1-WGS1.bam.{stats}.{ext}" + "output/mbcs.P00{i}-N1-DNA1-WGS1/report/bam_qc/mbcs.P00{i}-N1-DNA1-WGS1.bam.{stats}.{ext}" ) expected += [ bam_stats_text_out.format(i=i, stats=stats, ext=ext) @@ -748,7 +765,7 @@ def test_ngs_mapping_workflow_files(ngs_mapping_workflow): for stats in ("bamstats", "flagstats", "idxstats") ] expected += [ - "output/bwa.P00{i}-N1-DNA1-WGS1/report/cov/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format( + "output/mbcs.P00{i}-N1-DNA1-WGS1/report/cov/mbcs.P00{i}-N1-DNA1-WGS1.{ext}".format( i=i, ext=ext ) for ext in ( @@ -764,14 +781,14 @@ def test_ngs_mapping_workflow_files(ngs_mapping_workflow): for i in range(1, 7) ] expected += [ - "output/bwa.P00{i}-N1-DNA1-WGS1/report/fingerprint/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format( + "output/mbcs.P00{i}-N1-DNA1-WGS1/report/fingerprint/mbcs.P00{i}-N1-DNA1-WGS1.{ext}".format( i=i, ext=ext ) for ext in ("npz", "npz.md5") for i in range(1, 7) ] expected += [ - "output/bwa.P00{i}-N1-DNA1-WGS1/report/alfred_qc/bwa.P00{i}-N1-DNA1-WGS1.{ext}".format( + "output/mbcs.P00{i}-N1-DNA1-WGS1/report/alfred_qc/mbcs.P00{i}-N1-DNA1-WGS1.{ext}".format( i=i, ext=ext ) for ext in ("alfred.json.gz", "alfred.json.gz.md5")