diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0745703..39db619 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -154,6 +154,14 @@ jobs: mkdir reads echo "Downloading ERR6359501 from EBI ENA" curl -SLk --silent ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR635/001/ERR6359501/ERR6359501.fastq.gz > reads/ERR6359501.fastq.gz + - name: Fetch IBV test seq + run: | + curl -SLk --silent https://github.com/CFIA-NCFAD/nf-test-datasets/raw/nf-flu/nanopore/fastq/SRR24826962.sampled.fastq.gz > reads/SRR24826962.fastq.gz + - name: Check IBV data + run: | + file reads/SRR24826962.fastq.gz + md5sum reads/SRR24826962.fastq.gz + sha256sum reads/SRR24826962.fastq.gz - name: Prepare samplesheet.csv run: | echo "Subsample reads from ERR6359501.fastq.gz with seqtk to mock different runs and ways of specifying input" @@ -168,6 +176,7 @@ jobs: echo "ERR6359501-10k,$(realpath reads/ERR6359501-10k.fastq)" | tee -a samplesheet.csv echo "ERR6359501,$(realpath run1)" | tee -a samplesheet.csv echo "ERR6359501,$(realpath run2)" | tee -a samplesheet.csv + echo "SRR24826962,$(realpath reads/SRR24826962.fastq.gz)" | tee -a samplesheet.csv - name: Cache subsampled influenza.fna uses: actions/cache@v3 id: cache-influenza-fna diff --git a/CHANGELOG.md b/CHANGELOG.md index 17e7961..b5f938e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [[3.3.2](https://github.com/CFIA-NCFAD/nf-flu/releases/tag/3.3.2)] - 2023-08-03 + +This patch release fixes an IBV subtype/genotype parsing issue when generating subtyping report using the new metadata format introduced in 3.3.0 ([#32](https://github.com/CFIA-NCFAD/nf-flu/issues/32)). + ## [[3.3.1](https://github.com/CFIA-NCFAD/nf-flu/releases/tag/3.3.1)] - 2023-08-02 ### Fixes diff --git a/bin/parse_influenza_blast_results.py b/bin/parse_influenza_blast_results.py index 18c4b0b..389e4b1 100755 --- a/bin/parse_influenza_blast_results.py +++ b/bin/parse_influenza_blast_results.py @@ -60,6 +60,7 @@ ("sample_segment", "Sample Genome Segment Number"), ("#Accession", "Reference NCBI Accession"), ("Genotype", "Reference Subtype"), + ("Genus", "Genus"), ("pident", "BLASTN Percent Identity"), ("length", "BLASTN Alignment Length"), ("mismatch", "BLASTN Mismatches"), @@ -245,7 +246,18 @@ def parse_blast_result( df_top_seg_matches = df_top_seg_matches.select(pl.col(cols)) subtype_results_summary = {"sample": sample_name} if not get_top_ref: - is_iav = not df_top_seg_matches.select(pl.col("Genotype").is_null().all())[0, 0] + df_genotype_genus = df_top_seg_matches.select(pl.col(["Genotype", "Genus"])) + # where the genus is not IAV, set the genotype to "Not IAV" + df_genotype_genus = df_genotype_genus.with_columns( + pl.when(pl.col("Genus") == "Alphainfluenzavirus") + .then(pl.col("Genotype")) + .otherwise(pl.lit("Not IAV")) + .alias("Genotype") + ) + genotypes = df_genotype_genus["Genotype"] + genotype_counts = genotypes.value_counts(sort=True) + # if the top genotype is "Not IAV", then the sample is not IAV + is_iav = genotype_counts['Genotype'][0] != "Not IAV" H_results = None N_results = None if "4" in segments: @@ -290,8 +302,7 @@ def find_h_or_n_type(df_merge, seg, is_iav): "4", "6", ], "Can only determine H or N type from segments 4 or 6, respectively!" - type_name = "H_type" if seg == "4" else "N_type" - h_or_n = type_name[0] + h_or_n, type_name = ("H", "H_type") if seg == "4" else ("N", "N_type") df_segment = df_merge.filter(pl.col("sample_segment") == seg) if is_iav: type_counts = df_segment["Genotype"].value_counts(sort=True) diff --git a/nextflow.config b/nextflow.config index a0f26f0..7e3027e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -151,7 +151,7 @@ manifest { description = 'Influenza A virus genome assembly pipeline' homePage = 'https://github.com/CFIA-NCFAD/nf-flu' author = 'Peter Kruczkiewicz, Hai Nguyen' - version = '3.3.1' + version = '3.3.2' nextflowVersion = '!>=22.10.1' mainScript = 'main.nf' doi = '10.5281/zenodo.7011213'