Skip to content

Commit

Permalink
Add well validation in seq data upload
Browse files Browse the repository at this point in the history
  • Loading branch information
alubbock committed Oct 11, 2023
1 parent 08dec02 commit 7b0ee0d
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 7 deletions.
2 changes: 1 addition & 1 deletion backend/antigenapi/bioinformatics.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def _load_sequences_zip(zip_file):

def load_sequences(directory_or_zip):
"""Load a set of sequences from .seq files from a dictionary or .zip file."""
if os.path.isfile(directory_or_zip):
if hasattr(directory_or_zip, "read") or os.path.isfile(directory_or_zip):
return _load_sequences_zip(directory_or_zip)

seq_data = {}
Expand Down
51 changes: 45 additions & 6 deletions backend/antigenapi/views.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import collections.abc
import io
import os
import re
import urllib.error
import urllib.parse
from tempfile import NamedTemporaryFile
Expand Down Expand Up @@ -48,6 +49,8 @@

from .parsers import parse_elisa_file

_ELISA_PLATE_MATCHER = re.compile("_EP([0-9]+)_")


# Audit logs #
class AuditLogSerializer(ModelSerializer):
Expand Down Expand Up @@ -501,6 +504,14 @@ class Meta: # noqa: D106
read_only_fields = ["added_by", "added_date"]


def _extract_plate_and_well(well):
well_name = _remove_zero_pad_well_name(well[-3:])
plate = _ELISA_PLATE_MATCHER.search(well)
if not plate:
return (None, well_name)
return (int(plate.groups(1)[0]), well_name)


class SequencingRunViewSet(AuditLogMixin, DeleteProtectionMixin, ModelViewSet):
"""A view set for sequencing runs."""

Expand Down Expand Up @@ -604,15 +615,27 @@ def upload_sequencing_run_results(self, request, pk, submission_idx):
f"Sequencing run {pk} does not exist " "to attach results"
)

wells = [w["location"] for w in sr.wells if w["plate"] == int(submission_idx)]
# Store (elisa_plate, elisa_well tuples expected in this resultsfile)
wells = [
(
w["elisa_well"]["plate"],
PlateLocations.labels[w["elisa_well"]["location"] - 1],
)
for w in sr.wells
if w["plate"] == int(submission_idx)
]
if not wells:
raise ValidationError(
f"Plate index {submission_idx} not found in " "sequencing run {pk}"
)

# Run bioinformatics using .zip file
# print("Extracting zip file...")
seq_data = load_sequences(results_file.temporary_file_path())
try:
seq_data_fh = results_file.temporary_file_path()
except AttributeError:
seq_data_fh = results_file.file
seq_data = load_sequences(seq_data_fh)
if len(seq_data) != len(wells):
raise ValidationError(
{
Expand All @@ -622,17 +645,33 @@ def upload_sequencing_run_results(self, request, pk, submission_idx):
)
# Validate wells expected vs wells supplied
try:
wells_supplied = set(
[_remove_zero_pad_well_name(w[-3:]) for w in seq_data.keys()]
)
wells_supplied = [_extract_plate_and_well(w) for w in seq_data.keys()]
except IndexError:
raise ValidationError(
{
"file": "Unable to parse well names. "
"Ensure all .seq filenames end with a well."
}
)
wells_expected = set([PlateLocations.labels[loc - 1] for loc in wells])
# Plateless matching for legacy datasets
plateless_matching = any([w[0] is None for w in wells_supplied])
if plateless_matching:
# Check for duplicates in the expected well names, and error if so
wells_expected_list = [w[1] for w in wells]
wells_expected = set(wells_expected_list)
if len(wells_expected_list) != len(wells_expected):
raise ValidationError(
{
"file": "Using legacy match (no plate numbers) but duplicate "
"wells found. Please re-upload with plate numbers in .seq "
"filenames."
}
)
wells_supplied = set([w[1] for w in wells_supplied])
else:
wells_expected = set(wells)
wells_supplied = set(wells_supplied)

if wells_expected - wells_supplied:
raise ValidationError(
{
Expand Down

0 comments on commit 7b0ee0d

Please sign in to comment.