Skip to content

Commit

Permalink
fix: numerous smaller bug fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
ericblanc20 committed Dec 19, 2024
1 parent fd8a152 commit fbf8aaf
Show file tree
Hide file tree
Showing 7 changed files with 88 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ rule somatic_purity_ploidy_estimate_link_out_run:

# ASCAT and Supporting Steps --------------------------------------------------


# Get B-allele frequencies from bam - steps are the same for WES and WGS ------
rule somatic_purity_ploidy_estimate_ascat_build_baf:
input:
Expand Down Expand Up @@ -103,6 +102,8 @@ rule somatic_purity_ploidy_estimate_ascat_run:
unpack(wf.get_input_files("ascat", "run")),
output:
**wf.get_output_files("ascat", "run"),
params:
**{"args": wf.get_args("ascat", "run")},
threads: wf.get_resource("ascat", "run", "threads")
resources:
time=wf.get_resource("ascat", "run", "time"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def _get_gender(self, library_name: str, wildcards: Wildcards | None = None) ->
case SexOrigin.AUTOMATIC:
sex = self._read_sex(self._get_guess_sex_file(wildcards))
case SexOrigin.SAMPLESHEET:
sex = self.parent.table[library_name][self.cfg.sex.column_name]
sex = self.parent.table.loc[library_name][self.cfg.sex.column_name]
case SexOrigin.CONFIG:
sex = self.cfg.sex.cohort
if sex == SexValue.MALE:
Expand Down Expand Up @@ -149,7 +149,6 @@ def get_output_files(self, action: str):
case "run":
output_files = {
"RData": tpl + "RData",
"circos": tpl + "circos.txt",
"goodness_of_fit": tpl + "goodness_of_fit.txt",
"na": tpl + "na.txt",
"nb": tpl + "nb.txt",
Expand Down Expand Up @@ -285,11 +284,14 @@ def _get_args_prepare_hts(self, wildcards: Wildcards, input: InputFiles) -> dict

def _get_input_files_run(self, wildcards: Wildcards) -> dict[str, str]:
"""Return input files for actually running ASCAT."""
input_files = {"GCcontent": self.cfg.path_gc_content, "reptiming": self.cfg.path_reptiming}

tpl = "work/{mapper}.ascat.{library_name}/out/{mapper}.ascat.{library_name}.".format(
**wildcards
)

input_files = {"tumor_logr": tpl + "Tumor_LogR.txt", "tumor_baf": tpl + "Tumor_BAF.txt"}
input_files["tumor_logr"] = tpl + "Tumor_LogR.txt"
input_files["tumor_baf"] = tpl + "Tumor_BAF.txt"
if self.tumor_ngs_library_to_sample_pair.get(wildcards["library_name"], None) is not None:
input_files["normal_logr"] = tpl + "Germline_LogR.txt"
input_files["normal_baf"] = tpl + "Germline_BAF.txt"
Expand All @@ -300,10 +302,11 @@ def _get_input_files_run(self, wildcards: Wildcards) -> dict[str, str]:
return input_files

def _get_args_run(self, wildcards: Wildcards, input: InputFiles) -> dict[str, Any]:
return {
args = {
"genomeVersion": self.cfg.genomeVersion,
"seed": self.cfg.seed,
"gender": self._get_gender(wildcards["library_name"], wildcards),
"y_limit": self.cfg.y_limit,
"penalty": self.cfg.advanced.penalty,
"gamma": self.cfg.advanced.gamma,
"min_purity": self.cfg.advanced.min_purity,
Expand All @@ -312,7 +315,11 @@ def _get_args_run(self, wildcards: Wildcards, input: InputFiles) -> dict[str, An
"max_ploidy": self.cfg.advanced.max_ploidy,
"rho_manual": self.cfg.advanced.rho_manual,
"psi_manual": self.cfg.advanced.psi_manual,
"chrom_names": AscatStepPart.chromosome_names,
}
if self.tumor_ngs_library_to_sample_pair.get(wildcards["library_name"], None) is None:
args["platform"] = self.cfg.platform
return args

def _read_sex(self, filename: str) -> SexValue:
sex: SexValue | None = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,23 +140,23 @@ class Ascat(SnappyModel):
seed: int = 1234567

path_gc_content: Annotated[
str | None,
str,
Field(
examples=[
"/data/cephfs-1/work/groups/cubi/projects/biotools/ASCAT/GC_G1000_WGS_hg38/GC_G1000_hg38.txt",
]
),
] = None
]
"""Path to the GC correction file"""

path_reptiming: Annotated[
str | None,
str,
Field(
examples=[
"/data/cephfs-1/work/groups/cubi/projects/biotools/ASCAT/RT_G1000_WGS_hg38/RT_G1000_hg38.txt",
]
),
] = None
]
"""Path to the correction file for DNA replication timing"""

platform: AscatPlatform = AscatPlatform.WGS_hg_50X
Expand Down
9 changes: 9 additions & 0 deletions snappy_wrappers/wrappers/ascat/build_baf/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,15 @@

__author__ = "Eric Blanc <eric.blanc@bih-charite.de>"

import os
import sys

# The following is required for being able to import snappy_wrappers modules
# inside wrappers. When the wrappers have their own python environment, messing
# with the path is necessary.
base_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
sys.path.insert(0, base_dir)

from snappy_wrappers.snappy_wrapper import ShellWrapper

args = getattr(snakemake.params, "args", {})
Expand Down
35 changes: 20 additions & 15 deletions snappy_wrappers/wrappers/ascat/prepareHTS/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,15 @@

__author__ = "Eric Blanc <eric.blanc@bih-charite.de>"

import os
import re
import sys

# The following is required for being able to import snappy_wrappers modules
# inside wrappers. When the wrappers have their own python environment, messing
# with the path is necessary.
base_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
sys.path.insert(0, base_dir)

from snappy_wrappers.snappy_wrapper import RWrapper

Expand All @@ -35,48 +43,45 @@ def find_prefix(snakemake_input):
assert prefix is not None, f"Can't find allele counts from {snakemake_input}"
return prefix

allele_prefix = find_prefix(snakemake.input.allele_counts)
tumorAlleleCounts_prefix = find_prefix(snakemake.tumorAlleleCounts)
allele_prefix = find_prefix(snakemake.input.alleles)
tumorAlleleCounts_prefix = find_prefix(snakemake.input.tumorAlleleCounts)
if has_normal:
normalAlleleCounts_prefix = find_prefix(snakemake.normalAlleleCounts)
normalAlleleCounts_prefix = '"' + find_prefix(snakemake.input.normalAlleleCounts) + '"'
else:
normalAlleleCounts_prefix = "NA"

cmd=r"""
library(ASCAT)
# Move to directory where alleles were counted
setwd(dirname("{snakemake.input.allele_counts[0]}"))
# Takes the part of ``ascat.prepareHTS`` which doesn't invoke ``allelecounter``.
# That is: ``ascat.getBAFsAndLogRs`` & ``ascat.synchroniseFiles``
ascat.getBafAndLogRs(
ascat.getBAFsAndLogRs(
samplename="{args[tumorname]}",
tumourAlleleCountsFile.prefix="{tumorAlleleCounts_prefix}",
normalAlleleCountsFile.prefix={normalAlleleCounts_prefix},
tumourLogR_file="{snakemake.output.tumor_logr}",
tumourBAF_file="{snkemake.output.tumor_baf}",
tumourBAF_file="{snakemake.output.tumor_baf}",
normalLogR_file={normal_logr},
normalBAF_file={normal_baf},
alleles.prefix="{allele_prefix}",
gender="args[gender]",
genomeVersion="args[genomeVersion]",
gender="{args[gender]}",
genomeVersion="{args[genomeVersion]}",
chrom_names={chrom_names},
minCounts=args[minCounts],
minCounts={args[minCounts]},
BED_file={bed_file},
probloci_file={probloci_file},
tumour_only_mode={tumor_only},
loci_binsize=1,
seed=args[seed]
seed={args[seed]}
)
ascat.synchroniseFiles(
samplename="{args[tumorname]}",
tumourLogR_file="{snakemake.output.tumor_logr}",
tumourBAF_file="{snkemake.output.tumor_baf}",
tumourBAF_file="{snakemake.output.tumor_baf}",
normalLogR_file={normal_logr},
normalBAF_file={normal_baf},
normalBAF_file={normal_baf}
)
""".format(
snakemake=snakemake,
Expand All @@ -89,7 +94,7 @@ def find_prefix(snakemake_input):
bed_file=f'"{snakemake.input.bed_file}"' if getattr(snakemake.input, "bed_file", None) is not None else "NA",
probloci_file=f'"{snakemake.input.probloci_file}"' if getattr(snakemake.input, "probloci_file", None) is not None else "NA",
chrom_names='c("{}")'.format('", "'.join(args["chrom_names"])),
tumor_only="FALSE" if has_normal else "TRUE,"
tumor_only="FALSE" if has_normal else "TRUE",
)

RWrapper(snakemake).run(cmd)
61 changes: 37 additions & 24 deletions snappy_wrappers/wrappers/ascat/run_ascat/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,71 +5,85 @@
Mandatory snakemake.input: GCcontent, replictiming, tumor_baf, tumor_logr
Optional snakemake.input: normal_baf, normal_logr
Mandatory snakemake.params.args: gamma, gender, genomeVersion, max_ploidy, max_purity,
min_ploidy, min_purity, psi_manual, penalty, rho_manual, seed, y_limit
Mandatory snakemake.params.args: chrom_names, gamma, gender, genomeVersion, max_ploidy,
max_purity, min_ploidy, min_purity, psi_manual, penalty, rho_manual, seed, y_limit
Optional snakemake.params.args: platform (mandatory when no normal)
Mandatory snakemake.output: aberrantcellfraction, circos, goodness_of_fit, na, nb,
ploidy, purity, RData, segments, segments_raw
Mandatory snakemake.output: circos, goodness_of_fit, na, nb, ploidy,
purity, RData, segments, segments_raw
Optional snakemake.output:
"""

__author__ = "Eric Blanc <eric.blanc@bih-charite.de>"

import os
import sys

# The following is required for being able to import snappy_wrappers modules
# inside wrappers. When the wrappers have their own python environment, messing
# with the path is necessary.
base_dir = os.path.normpath(os.path.join(os.path.dirname(__file__), "..", "..", "..", ".."))
sys.path.insert(0, base_dir)

from snappy_wrappers.snappy_wrapper import RWrapper

args = getattr(snakemake.params, "args", {})

has_normal = getattr(snakemake.input, "germline_logr", None) is not None and getattr(snakemake.input, "germline_baf", None) is not None
has_normal = getattr(snakemake.input, "normal_logr", None) is not None and getattr(snakemake.input, "normal_baf", None) is not None
if has_normal:
germline_genotypes = "NULL"
else:
germline_genotypes = 'ascat.predictGermlineGenotypes(ascat.bc, platform="{}", img.dir=".", img.prefix=".")'.format(args["platform"])
germline_genotypes = 'ascat.predictGermlineGenotypes(ascat.bc, platform="{}", img.dir=plot_dir, img.prefix="PredictGG_")'.format(args["platform"])

# TODO:
cmd=r"""
library(ASCAT)
library(rlang)
plot_dir <- file.path(dirname(dirname("{snakemake.output.segments}")), "plots")
dir.create(plot_dir, recursive=TRUE, mode="0750")
ascat.bc <- ascat.loadData(
Tumor_LogR_file="{snakemake.input.tumor_logr}",
Tumor_BAF_file="{snakemake.input.tumor_baf}",
Germline_LogR_file={normal_logr},
Germline_BAF_file={normal_baf},
chrs={chrom_names},
gender=args[gender],
genomeVersion=args[genomeVersion],
chrs={chrs},
gender="{args[gender]}",
genomeVersion="{args[genomeVersion]}",
isTargetedSeq=FALSE
)
ascat.plotRawData(ascat.bc, img.dir="work", img.prefix="Before_correction_")
ascat.plotRawData(ascat.bc, img.dir=plot_dir, img.prefix="Before_correction_")
ascat.bc <- ascat.correctLogR(
ascat.bc,
GCcontentfile="{snakemake.input.GCcontent}",
replictimingfile="{snakemake.input.reptiming}"
)
ascat.plotRawData(ascat.bc, img.dir="work", img.prefix="After_correction_")
ascat.plotRawData(ascat.bc, img.dir=plot_dir, img.prefix="After_correction_")
ascat.bc <- ascat.aspcf(
ASCATobj=ascat.bc,
ascat.gg={germline_genotypes},
penalty=args[penalty],
penalty={args[penalty]},
out.dir=".",
out.prefix="",
seed=args[seed]
seed={args[seed]}
)
ascat.plotSegmentedData(ascat.bc, img.dir="work", img.prefix="")
ascat.plotSegmentedData(ascat.bc, img.dir=plot_dir, img.prefix="Segmented_")
ascat.output <- ascat.runAscat(
ASCATobj=ascat.bc,
gamma=args[gamma],
y_limit=args[y_limit],
circos="{snakemake.output.circos}",
min_ploidy=args[min_ploidy], max_ploidy=args[max_ploidy],
min_purity=args[min_purity], max_purity=args[max_purity],
rho_manual=args[rho_manual], psi_manual=args[psi_manual],
img.dir=".", img.prefix="",
gamma={args[gamma]},
y_limit={args[y_limit]},
circos=file.path(plot_dir, "circos"),
min_ploidy={args[min_ploidy]}, max_ploidy={args[max_ploidy]},
min_purity={args[min_purity]}, max_purity={args[max_purity]},
rho_manual={args[rho_manual]}, psi_manual={args[psi_manual]},
img.dir=plot_dir, img.prefix="ASCAT_",
write_segments=TRUE
)
Expand All @@ -82,15 +96,14 @@
write.table(ascat.output$nB, "{snakemake.output.nb}", sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)
write.table(ascat.output$goodnessOfFit, "{snakemake.output.goodness_of_fit}", sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)
write.table(ascat.output$ploidy, "{snakemake.output.ploidy}", sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)
write.table(ascat.output$psi, "{snakemake.output.psi}", sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)
write.table(ascat.output$segments, "{snakemake.output.segments}", sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)
write.table(ascat.output$segments_raw, "{snakemake.output.segments_raw}", sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)
write.table(ascat.output$purity, "{snakemake.output.purity}", sep="\t", col.names=TRUE, row.names=FALSE, quote=FALSE)
""".format(
snakemake=snakemake,
args=args,
germline_logr='"{}"'.format(snakemake.input.germline_logr) if has_normal else "NULL",
germline_baf='"{}"'.format(snakemake.input.germline_baf) if has_normal else "NULL",
normal_logr='"{}"'.format(snakemake.input.normal_logr) if has_normal else "NULL",
normal_baf='"{}"'.format(snakemake.input.normal_baf) if has_normal else "NULL",
chrs='c("{}")'.format('", "'.join(args["chrom_names"])),
germline_genotypes=germline_genotypes,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,8 @@ def test_ascat_step_part_get_input_files_run(somatic_purity_ploidy_estimate_work
"tumor_baf": "work/bwa.ascat.P001-T1-DNA1-WGS1/out/bwa.ascat.P001-T1-DNA1-WGS1.Tumor_BAF.txt",
"normal_logr": "work/bwa.ascat.P001-T1-DNA1-WGS1/out/bwa.ascat.P001-T1-DNA1-WGS1.Germline_LogR.txt",
"normal_baf": "work/bwa.ascat.P001-T1-DNA1-WGS1/out/bwa.ascat.P001-T1-DNA1-WGS1.Germline_BAF.txt",
"GCcontent": "/path/to/gc_content.txt",
"reptiming": "/path/to/reptiming.txt",
# "sex": "work/bwa.ascat.P001-T1-DNA1-WGS1/out/bwa.ascat.P001-T1-DNA1-WGS1.sex.txt",
}
actual = somatic_purity_ploidy_estimate_workflow.get_input_files("ascat", "run")(
Expand Down Expand Up @@ -193,7 +195,6 @@ def test_ascat_step_part_get_output_files_run(somatic_purity_ploidy_estimate_wor
tpl = "work/{mapper}.ascat.{library_name}/out/{mapper}.ascat.{library_name}."
expected = {
"RData": tpl + "RData",
"circos": tpl + "circos.txt",
"goodness_of_fit": tpl + "goodness_of_fit.txt",
"na": tpl + "na.txt",
"nb": tpl + "nb.txt",
Expand All @@ -202,7 +203,6 @@ def test_ascat_step_part_get_output_files_run(somatic_purity_ploidy_estimate_wor
"segments": tpl + "segments.txt",
"segments_raw": tpl + "segments_raw.txt",
"RData_md5": tpl + "RData.md5",
"circos_md5": tpl + "circos.txt.md5",
"goodness_of_fit_md5": tpl + "goodness_of_fit.txt.md5",
"na_md5": tpl + "na.txt.md5",
"nb_md5": tpl + "nb.txt.md5",
Expand Down Expand Up @@ -270,6 +270,7 @@ def test_ascat_step_part_get_args_prepare_hts(somatic_purity_ploidy_estimate_wor

def test_ascat_step_part_get_args_run(somatic_purity_ploidy_estimate_workflow):
"""Tests AscatStepPart._get_args_run()"""
chromosome_names = list(map(str, range(1, 23))) + ["X"]
wildcards = Wildcards(fromdict={"library_name": "P001-T1-DNA1-WGS1", "mapper": "bwa"})
expected = {
"gamma": 1.0,
Expand All @@ -283,6 +284,8 @@ def test_ascat_step_part_get_args_run(somatic_purity_ploidy_estimate_workflow):
"rho_manual": "NA",
"psi_manual": "NA",
"gender": "XX",
"chrom_names": chromosome_names,
"y_limit": 5,
}
actual = somatic_purity_ploidy_estimate_workflow.get_args("ascat", "run")(wildcards, [])
assert actual == expected
Expand Down

0 comments on commit fbf8aaf

Please sign in to comment.