Skip to content

Commit

Permalink
Merge pull request #467 from nextstrain/surface-nextclade-versions
Browse files Browse the repository at this point in the history
Surface Nextclade versions
  • Loading branch information
joverlee521 committed Jul 31, 2024
2 parents f9bca07 + ccc9fa7 commit f38bf5d
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 0 deletions.
30 changes: 30 additions & 0 deletions bin/generate-nextclade-version-json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash

set -euo pipefail

vendored="$(dirname "$0")"/../vendored


nextclade="${1:?A path to the Nextclade executable is required as the first argument}"
nextclade_dataset="${2:?A path to the Nextclade dataset is required as the second argument}"
nextclade_tsv="${3:?A path to the Nextclade TSV is required as the third argument}"


nextclade_version="$("$nextclade" --version)"
dataset_pathogen_json="$(unzip -p "$nextclade_dataset" pathogen.json)"
dataset_name="$(echo "$dataset_pathogen_json" | jq -r '.attributes.name')"
dataset_version="$(echo "$dataset_pathogen_json" | jq -r '.version.tag')"
nextclade_tsv_sha256sum="$("$vendored/sha256sum" < "$nextclade_tsv")"

jq -c --null-input \
--arg NEXTCLADE_VERSION "$nextclade_version" \
--arg DATASET_NAME "$dataset_name" \
--arg DATASET_VERSION "$dataset_version" \
--arg NEXTCLADE_TSV_SHA256SUM "$nextclade_tsv_sha256sum" \
'{
"schema_version": "v1",
"nextclade_version": $NEXTCLADE_VERSION,
"nextclade_dataset_name": $DATASET_NAME,
"nextclade_dataset_version": $DATASET_VERSION,
"nextclade_tsv_sha256sum": $NEXTCLADE_TSV_SHA256SUM
}'
44 changes: 44 additions & 0 deletions workflow/snakemake_rules/nextclade.smk
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,26 @@ rule nextclade_info:
"""


rule nextclade_version_json:
"""
Generates a version JSON for the Nextclade TSV.
"""
input:
nextclade_path="data/nextclade",
nextclade_dataset=lambda w: f"data/nextclade_data/sars-cov-2{w.reference.replace('_','-')}.zip",
nextclade_tsv=f"data/{database}/nextclade{{reference}}.tsv",
output:
nextclade_version_json=f"data/{database}/nextclade{{reference}}_version.json",
shell:
"""
./bin/generate-nextclade-version-json \
{input.nextclade_path} \
{input.nextclade_dataset} \
{input.nextclade_tsv} \
> {output.nextclade_version_json}
"""


rule combine_alignments:
"""
Generating full alignment by combining newly aligned sequences with previous (cached) alignment
Expand Down Expand Up @@ -365,3 +385,27 @@ rule generate_metadata:
--clade-legacy-mapping {input.clade_legacy_mapping} \
-o {output.metadata}
"""


rule metadata_version_json:
"""
Generates the metadata version JSON by adding the metadata TSV sha256sum
to the Nextclade version JSON.
TODO: Merge the 21L Nextclade version JSON to track data provenence for
specific columns
"""
input:
metadata=f"data/{database}/metadata.tsv",
nextclade_version_json=f"data/{database}/nextclade_version.json",
output:
metadata_version_json=f"data/{database}/metadata_version.json",
shell:
"""
metadata_tsv_sha256sum="$(./vendored/sha256sum < {input.metadata})"
cat {input.nextclade_version_json} \
| jq -c --arg METADATA_TSV_SHA256SUM "$metadata_tsv_sha256sum" \
'.metadata_tsv_sha256sum = $METADATA_TSV_SHA256SUM' \
> {output.metadata_version_json}
"""
3 changes: 3 additions & 0 deletions workflow/snakemake_rules/upload.smk
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ def compute_files_to_upload():
"aligned.fasta.zst": f"data/{database}/aligned.fasta",
"nextclade_21L.tsv.zst": f"data/{database}/nextclade_21L.tsv",

"nextclade_version.json": f"data/{database}/nextclade_version.json",
"nextclade_21L_version.json": f"data/{database}/nextclade_21L_version.json",
"metadata_version.json": f"data/{database}/metadata_version.json",
}
files_to_upload = files_to_upload | {
f"translation_{gene}.fasta.zst" : f"data/{database}/translation_{gene}.fasta"
Expand Down

0 comments on commit f38bf5d

Please sign in to comment.