Skip to content

Commit

Permalink
CW-2249-new-schemes
Browse files Browse the repository at this point in the history
  • Loading branch information
mattdmem committed Jul 31, 2023
1 parent 98641c5 commit 7a7279d
Show file tree
Hide file tree
Showing 12 changed files with 1,302 additions and 76 deletions.
88 changes: 32 additions & 56 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,60 +6,36 @@ variables:
NF_WORKFLOW_OPTS: "--fastq test_data/fastq --samples test_data/sample_sheet.csv"
NF_IGNORE_PROCESSES: "checkSampleSheet,combineGenotypeSummaries,genotypeSummary,report_no_data"
CI_FLAVOUR: "new"
# IMAGE_TAG: "NO_UPDATE"

# check-versions:
# stage: pre-custom-builds
# image: conda/miniconda3:latest
# needs: []
# script:
# - apt-get update && apt-get -y install gcc
# - pip install cachetools==4.1.0 docker==4.2.0 PyGithub==1.54 requests==2.20.0 semver==2.13.0 packaging
# - python3 bin/check_aux_versions.py --docker_registry 'ontresearch/pangolin' --github_repository 'cov-lineages/pangolin' --token ${LABS_BOT_GH_TOKEN} --tool pangolin -p v > ./pangolin.txt
# - python3 bin/check_aux_versions.py --docker_registry 'ontresearch/nextclade' --github_repository 'nextstrain/nextclade' --token ${LABS_BOT_GH_TOKEN} --tool nextclade > ./nextclade.txt
# artifacts:
# paths:
# - pangolin.txt
# - nextclade.txt
# expire_in: 1 day
# rules:
# - if: '$CI_PIPELINE_SOURCE == "schedule"'
#
# .release:
# stage: custom-builds
# needs: ['check-versions']
# before_script:
# - export IMAGE_TAG=`cat ${FILE_NAME} | grep ACTION | cut -f2 -d":"`
# - if [ "${IMAGE_TAG}" == "NO_UPDATE" ]; then echo "${IMAGE_NAME} already at latest version"; exit 0; fi;
# - IMAGE_DOCKERFILE="${IMAGE_DOCKERFILE:-Dockerfile}"
# - if [ -z "${IMAGE_NAME}" ]; then "IMAGE_NAME was not set"; exit 1; fi;
# - if [ -z "${IMAGE_TAG}" ]; then "IMAGE_TAG was not set"; exit 1; fi;
# - echo "Building auxiliary container with:"
# - echo " - Dockerfile:'${IMAGE_DOCKERFILE}'"
# - echo " - Image name:'${IMAGE_NAME}'"
# script:
# - !reference [.install, glibc-alpine]
# - !reference [.install, awscli]
# - echo ${DOCKERHUB_TOKEN} | docker login --username epi2melabs --password-stdin
# - LATEST="${DOCKERHUB_NAMESPACE}/${IMAGE_NAME}:${IMAGE_TAG}"
# - BUILD_CMD="docker build --no-cache -t ${LATEST} -f ${IMAGE_DOCKERFILE} . --build-arg IMAGE_TAG=${IMAGE_TAG}"
# - echo "${BUILD_CMD}"
# - ${BUILD_CMD}
# - docker push ${LATEST}
# rules:
# - if: '$CI_PIPELINE_SOURCE == "schedule"'
# when: on_success
#
# release-pangolin:
# extends: .release
# variables:
# IMAGE_NAME: "pangolin"
# FILE_NAME: "pangolin.txt"
# IMAGE_DOCKERFILE: "Dockerfile_pangolin"
#
# release-nextclade:
# extends: .release
# variables:
# IMAGE_NAME: "nextclade"
# FILE_NAME: "nextclade.txt"
# IMAGE_DOCKERFILE: "Dockerfile_nextclade"

docker-run:
artifacts:
when: always
exclude:
- ${CI_PROJECT_NAME}/workspace/**/*
parallel:
matrix:
- MATRIX_NAME: ["Midnight-ONT/V1", "Midnight-ONT/V2", "ARTIC/V4.1", "ARTIC/V5.3.2", "Midnight-IDT/V2", "NEB-VarSkip/v2"]
rules:
- if: ($CI_COMMIT_BRANCH == null || $CI_COMMIT_BRANCH == "dev-template")
when: never
- if: $MATRIX_NAME == "Midnight-ONT/V1"
variables:
NF_WORKFLOW_OPTS: " --fastq test_data/fastq --samples test_data/sample_sheet.csv --scheme_name SARS-CoV-2 --scheme_version Midnight-ONT/V1"
- if: $MATRIX_NAME == "Midnight-ONT/V2"
variables:
NF_WORKFLOW_OPTS: " --fastq test_data/fastq --samples test_data/sample_sheet.csv --scheme_name SARS-CoV-2 --scheme_version Midnight-ONT/V2"
- if: $MATRIX_NAME == "ARTIC/V4.1"
variables:
NF_WORKFLOW_OPTS: " --fastq test_data/fastq --samples test_data/sample_sheet.csv --scheme_name SARS-CoV-2 --scheme_version ARTIC/V4.1"
- if: $MATRIX_NAME == "ARTIC/V5.3.2"
variables:
NF_WORKFLOW_OPTS: " --fastq test_data/fastq --samples test_data/sample_sheet.csv --scheme_name SARS-CoV-2 --scheme_version ARTIC/V5.3.2"
- if: $MATRIX_NAME == "Midnight-IDT/V2"
variables:
NF_WORKFLOW_OPTS: " --fastq test_data/fastq --samples test_data/sample_sheet.csv --scheme_name SARS-CoV-2 --scheme_version Midnight-IDT/V2"
- if: $MATRIX_NAME == "NEB-VarSkip/v2"
variables:
NF_WORKFLOW_OPTS: " --fastq test_data/fastq --samples test_data/sample_sheet.csv --scheme_name SARS-CoV-2 --scheme_version NEB-VarSkip/v2"


7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [v0.3.30]
### Added
- New primer schemes; ARTIC V5.3.2 and Midnight IDT V2
### Updated
- Nextclade & pangolin
- For alt primers we take the largest region (Updated ARTIC code merges a canonical primer site with an alt site, producing an interval that encompasses both)

## [v0.3.29]
### Changed
- Example command to use demo data.
Expand Down
12 changes: 11 additions & 1 deletion bin/workflow_glue/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,23 @@

def get_components():
"""Find a list of workflow command scripts."""
logger = get_main_logger(_package_name)
path = os.path.dirname(os.path.abspath(__file__))
components = list()
for fname in glob.glob(os.path.join(path, "*.py")):
name = os.path.splitext(os.path.basename(fname))[0]
if name in ("__init__", "util"):
continue
mod = importlib.import_module(f"{_package_name}.{name}")

# leniently attempt to import module
try:
mod = importlib.import_module(f"{_package_name}.{name}")
except ModuleNotFoundError as e:
# if imports cannot be satisifed, refuse to add the component
# rather than exploding
logger.warn(f"Could not load {name} due to missing module {e.name}")
continue

# if theres a main() and and argparser() thats good enough for us.
try:
req = "main", "argparser"
Expand Down
34 changes: 32 additions & 2 deletions bin/workflow_glue/check_sample_sheet.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,35 @@
"""Check if a sample sheet is valid."""
import codecs
import csv
import os
import sys

from .util import get_named_logger, wf_parser # noqa: ABS101


# Some Excel users save their CSV as UTF-8 (and occasionally for a reason beyond my
# comprehension, UTF-16); Excel then adds a byte order mark (unnecessarily for UTF-8
# I should add). If we do not handle this with the correct encoding, the mark will
# appear in the parsed data, causing the header to be malformed.
# See CW-2310
def determine_codec(f):
"""Peek at a file and return an appropriate reading codec."""
with open(f, 'rb') as f_bytes:
# Could use chardet here if we need to expand codec support
initial_bytes = f_bytes.read(8)

for codec, encoding_name in [
[codecs.BOM_UTF8, "utf-8-sig"], # use the -sig codec to drop the mark
[codecs.BOM_UTF16_BE, "utf-16"], # don't specify LE or BE to drop mark
[codecs.BOM_UTF16_LE, "utf-16"],
[codecs.BOM_UTF32_BE, "utf-32"], # handle 32 for completeness
[codecs.BOM_UTF32_LE, "utf-32"], # again skip LE or BE to drop mark
]:
if initial_bytes.startswith(codec):
return encoding_name
return None # will cause file to be opened with default encoding


def main(args):
"""Run the entry point."""
logger = get_named_logger("checkSheet")
Expand All @@ -14,10 +39,15 @@ def main(args):
sample_types = []
allowed_sample_types = [
"test_sample", "positive_control", "negative_control", "no_template_control"
]
]

if not os.path.exists(args.sample_sheet) or not os.path.isfile(args.sample_sheet):
sys.stdout.write(f"Could not open sample sheet '{args.sample_sheet}'.")
sys.exit()

try:
with open(args.sample_sheet, "r") as f:
encoding = determine_codec(args.sample_sheet)
with open(args.sample_sheet, "r", encoding=encoding) as f:
csv_reader = csv.DictReader(f)
n_row = 0
for row in csv_reader:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ MN908947.3 4636 4658 nCoV-2019_16_LEFT 2 +
MN908947.3 4995 5017 nCoV-2019_16_RIGHT 2 -
MN908947.3 4939 4966 nCoV-2019_17_LEFT 1 +
MN908947.3 5296 5321 nCoV-2019_17_RIGHT 1 -
MN908947.3 5257 5287 nCoV-2019_18_LEFT_alt2 2 +
MN908947.3 5257 5287 nCoV-2019_18_LEFT 2 +
MN908947.3 5620 5644 nCoV-2019_18_RIGHT 2 -
MN908947.3 5563 5586 nCoV-2019_19_LEFT 1 +
MN908947.3 5932 5957 nCoV-2019_19_RIGHT 1 -
Expand Down
Loading

0 comments on commit 7a7279d

Please sign in to comment.