From 7b0ee0d0e59b525c50bee4f3c467c87f1ca62332 Mon Sep 17 00:00:00 2001 From: Alex Lubbock Date: Wed, 11 Oct 2023 16:38:17 +0100 Subject: [PATCH] Add well validation in seq data upload --- backend/antigenapi/bioinformatics.py | 2 +- backend/antigenapi/views.py | 51 ++++++++++++++++++++++++---- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/backend/antigenapi/bioinformatics.py b/backend/antigenapi/bioinformatics.py index ed6b086..b1c77c4 100644 --- a/backend/antigenapi/bioinformatics.py +++ b/backend/antigenapi/bioinformatics.py @@ -51,7 +51,7 @@ def _load_sequences_zip(zip_file): def load_sequences(directory_or_zip): """Load a set of sequences from .seq files from a dictionary or .zip file.""" - if os.path.isfile(directory_or_zip): + if hasattr(directory_or_zip, "read") or os.path.isfile(directory_or_zip): return _load_sequences_zip(directory_or_zip) seq_data = {} diff --git a/backend/antigenapi/views.py b/backend/antigenapi/views.py index 9975487..0805d70 100644 --- a/backend/antigenapi/views.py +++ b/backend/antigenapi/views.py @@ -1,6 +1,7 @@ import collections.abc import io import os +import re import urllib.error import urllib.parse from tempfile import NamedTemporaryFile @@ -48,6 +49,8 @@ from .parsers import parse_elisa_file +_ELISA_PLATE_MATCHER = re.compile("_EP([0-9]+)_") + # Audit logs # class AuditLogSerializer(ModelSerializer): @@ -501,6 +504,14 @@ class Meta: # noqa: D106 read_only_fields = ["added_by", "added_date"] +def _extract_plate_and_well(well): + well_name = _remove_zero_pad_well_name(well[-3:]) + plate = _ELISA_PLATE_MATCHER.search(well) + if not plate: + return (None, well_name) + return (int(plate.groups(1)[0]), well_name) + + class SequencingRunViewSet(AuditLogMixin, DeleteProtectionMixin, ModelViewSet): """A view set for sequencing runs.""" @@ -604,7 +615,15 @@ def upload_sequencing_run_results(self, request, pk, submission_idx): f"Sequencing run {pk} does not exist " "to attach results" ) - wells = [w["location"] for w in sr.wells if w["plate"] == int(submission_idx)] + # Store (elisa_plate, elisa_well tuples expected in this resultsfile) + wells = [ + ( + w["elisa_well"]["plate"], + PlateLocations.labels[w["elisa_well"]["location"] - 1], + ) + for w in sr.wells + if w["plate"] == int(submission_idx) + ] if not wells: raise ValidationError( f"Plate index {submission_idx} not found in " "sequencing run {pk}" @@ -612,7 +631,11 @@ def upload_sequencing_run_results(self, request, pk, submission_idx): # Run bioinformatics using .zip file # print("Extracting zip file...") - seq_data = load_sequences(results_file.temporary_file_path()) + try: + seq_data_fh = results_file.temporary_file_path() + except AttributeError: + seq_data_fh = results_file.file + seq_data = load_sequences(seq_data_fh) if len(seq_data) != len(wells): raise ValidationError( { @@ -622,9 +645,7 @@ def upload_sequencing_run_results(self, request, pk, submission_idx): ) # Validate wells expected vs wells supplied try: - wells_supplied = set( - [_remove_zero_pad_well_name(w[-3:]) for w in seq_data.keys()] - ) + wells_supplied = [_extract_plate_and_well(w) for w in seq_data.keys()] except IndexError: raise ValidationError( { @@ -632,7 +653,25 @@ def upload_sequencing_run_results(self, request, pk, submission_idx): "Ensure all .seq filenames end with a well." } ) - wells_expected = set([PlateLocations.labels[loc - 1] for loc in wells]) + # Plateless matching for legacy datasets + plateless_matching = any([w[0] is None for w in wells_supplied]) + if plateless_matching: + # Check for duplicates in the expected well names, and error if so + wells_expected_list = [w[1] for w in wells] + wells_expected = set(wells_expected_list) + if len(wells_expected_list) != len(wells_expected): + raise ValidationError( + { + "file": "Using legacy match (no plate numbers) but duplicate " + "wells found. Please re-upload with plate numbers in .seq " + "filenames." + } + ) + wells_supplied = set([w[1] for w in wells_supplied]) + else: + wells_expected = set(wells) + wells_supplied = set(wells_supplied) + if wells_expected - wells_supplied: raise ValidationError( {