Skip to content

Commit

Permalink
Merge branch 'main' into 24-add-server-feature
Browse files Browse the repository at this point in the history
  • Loading branch information
holtgrewe authored Jan 5, 2024
2 parents c9d6173 + c5497aa commit 11e33c4
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 37 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

### [0.8.1](https://www.github.com/bihealth/ngs-chew/compare/v0.8.0...v0.8.1) (2023-07-18)


### Bug Fixes

* sites for GRCh38 ([#24](https://www.github.com/bihealth/ngs-chew/issues/24)) ([#39](https://www.github.com/bihealth/ngs-chew/issues/39)) ([79773ef](https://www.github.com/bihealth/ngs-chew/commit/79773ef0e829184e763aee61373ae46f6ea83ed5))

## [0.8.0](https://www.github.com/bihealth/ngs-chew/compare/v0.7.1...v0.8.0) (2023-01-16)


Expand Down
10 changes: 5 additions & 5 deletions chew/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def cli(ctx: click.Context, verbose: bool):
ctx.obj["verbose"] = verbose


@cli.command("fingerprint", help="Compute fingerprint to numpy .npz files.")
@cli.command("fingerprint", help="Compute fingerprint to numpy .npz files.") # type: ignore[attr-defined]
@click.option("--min-coverage", type=int, default=5, help="Minimal required coverage.")
@click.option("--reference", required=True, help="Path to reference FASTA file.")
@click.option(
Expand Down Expand Up @@ -95,7 +95,7 @@ def cli_fingerprint(
fingerprint.run(config)


@cli.command("compare", help="Perform fingeprint comparison.")
@cli.command("compare", help="Perform fingeprint comparison.") # type: ignore[attr-defined]
@click.option(
"--output-prefix", type=str, default="chew-comparison", help="Path to comparison file."
)
Expand All @@ -120,7 +120,7 @@ def cli_compare(
compare.run(config)


@cli.command("stats", help="Compute statistics from fingerprint .npz files.")
@cli.command("stats", help="Compute statistics from fingerprint .npz files.") # type: ignore[attr-defined]
@click.option("--output", default="chew-stats.txt", help="Path to output file.")
@click.argument("fingerprints", nargs=-1)
@click.pass_context
Expand All @@ -137,7 +137,7 @@ def cli_stats(
stats.run(config)


@cli.command("plot-compare", help="Plot result of 'ngs-chew compare'.")
@cli.command("plot-compare", help="Plot result of 'ngs-chew compare'.") # type: ignore[attr-defined]
@click.option(
"--title", default="NGS Chew Comparison Plot", help="title to use for the output HTML file."
)
Expand All @@ -159,7 +159,7 @@ def cli_plot_compare(
plot_compare.run(config)


@cli.command("plot-var-het", help="Plot var(het) metric from .npz files.")
@cli.command("plot-var-het", help="Plot var(het) metric from .npz files.") # type: ignore[attr-defined]
@click.option(
"--title", default="NGS Chew var(het) Plot", help="title to use for the output HTML file."
)
Expand Down
37 changes: 37 additions & 0 deletions chew/common.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
"""Commonly used code"""

import gzip
import os
import typing

import attrs
from logzero import logger

#: Chromosome lengths in GRCh37.
import enum
import typing
Expand Down Expand Up @@ -112,3 +119,33 @@ def pedigree_member_from_tsv(arr: typing.List[str]) -> PedigreeMember:
sex=PED_SEX_MAP.get(arr[4], Sex.UNKNOWN),
disease_state=PED_DISEASE_MAP.get(arr[5], DiseaseState.UNKNOWN),
)


@attrs.frozen
class Site:
chrom: str
pos: int
ref: str
alt: str


def load_sites(genome_release: str) -> typing.List[Site]:
logger.info("Loading sites .bed.gz for %s", genome_release)
path_gz = os.path.join(os.path.dirname(__file__), "data", f"{genome_release}_sites.bed.gz")
result = []
with gzip.open(path_gz, "rt") as inputf:
for line in inputf:
arr = line.split("\t")
# We construct sites with arbitrary REF/ALT as bcftools roh does not
# interpret them but only GT.
result.append(
Site(
chrom=arr[0],
pos=int(arr[1]) + 1,
ref="N",
alt="A",
)
)
logger.info(" finished reading %d sites", len(result))
logger.info(" first = %s, last = %s", result[0], result[-1])
return result
Binary file modified chew/data/GRCh38_sites.bed.gz
Binary file not shown.
Binary file modified chew/data/GRCh38_sitesX.bed.gz
Binary file not shown.
17 changes: 10 additions & 7 deletions chew/fingerprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,12 @@

import chew
from chew import roh
from chew.common import CHROM_LENS_GRCH37, CHROM_LENS_GRCH38, CHROM_LENS_HG19
from chew.common import (
CHROM_LENS_GRCH37,
CHROM_LENS_GRCH38,
CHROM_LENS_HG19,
load_sites,
)

#: Key to use for GRCh37 release.
RELEASE_37 = "GRCh37"
Expand Down Expand Up @@ -155,12 +160,10 @@ def snps_step_call(
):
bed_file = f"{genome_release}_{sites_suffix}.bed.gz"
logger.info("Reading sites BED (%s)...", bed_file)
path_gz = os.path.join(os.path.dirname(__file__), "data", bed_file)
with gzip.open(path_gz, "rt") as inputf:
sites = {}
for line in inputf:
arr = line.strip().split("\t")
sites["%s%s:%s" % (chr_prefix, arr[0], int(arr[1]) + 1)] = (0, 0, float("nan"))
sites = {
"%s%s:%s" % (chr_prefix, site.chrom, site.pos): (0, 0, float("nan"))
for site in load_sites(genome_release)
}
logger.info("Converting VCF to fingerprint...")
with vcfpy.Reader.from_path(path_calls) as vcf_reader:
if prefix_fingerprint:
Expand Down
25 changes: 1 addition & 24 deletions chew/roh.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
import gzip
import os
import shlex
import subprocess
import typing

import attrs
from logzero import logger
import numpy.typing as npt
import vcfpy

from chew.common import CHROM_LENS_GRCH37, CHROM_LENS_GRCH38
from chew.common import CHROM_LENS_GRCH37, CHROM_LENS_GRCH38, load_sites

#: Use this value for genotype likelyhood (FORMAT/PL)
BCFTOOLS_ROH_PL = 30
Expand Down Expand Up @@ -43,27 +41,6 @@ class Site:
alt: str


def load_sites(genome_release: str) -> typing.List[Site]:
logger.info("Loading sites .bed.gz")
path_gz = os.path.join(os.path.dirname(__file__), "data", f"{genome_release}_sites.bed.gz")
result = []
with gzip.open(path_gz, "rt") as inputf:
for lineno, line in enumerate(inputf):
arr = line.split("\t")
# We construct sites with arbitrary REF/ALT as bcftools roh does not
# interpret them but only GT.
result.append(
Site(
chrom=arr[0],
pos=int(arr[1]) + 1,
ref="N",
alt="A",
)
)
logger.info(" finished reading %d sites", len(result))
return result


def create_vcf_header(sample: str, release: str) -> vcfpy.Header:
if release == "GRCh37":
chrom_lens = CHROM_LENS_GRCH37
Expand Down
2 changes: 1 addition & 1 deletion requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ pytest-cov
pytest-click
pyfakefs >=3.7.1

mypy ==0.990
mypy ==1.4.1

0 comments on commit 11e33c4

Please sign in to comment.