-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
WIP: Upload and display sequencing results
- Loading branch information
Showing
10 changed files
with
1,590 additions
and
511 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
import itertools | ||
import os | ||
import sys | ||
import zipfile | ||
|
||
import vquest.config | ||
import vquest.vq | ||
|
||
START_CODON = "ATG" | ||
|
||
|
||
def file_name_to_sequence_name(fn): | ||
"""Extract the sequence identifier from the file name (Ray Owens' method).""" | ||
# Use whole filename as sequence name, as requested by Lauren | ||
return fn[:-4] if fn.endswith(".seq") else fn | ||
|
||
|
||
def trim_sequence(seq): | ||
"""Trim the sequence after start codon, if present.""" | ||
try: | ||
return seq[seq.index(START_CODON) + len(START_CODON) :] | ||
except ValueError: | ||
return "" | ||
|
||
|
||
def _load_sequences_zip(zip_file): | ||
seq_data = {} | ||
with zipfile.ZipFile(zip_file, "r") as zip_ref: | ||
for fn in zip_ref.namelist(): | ||
if not fn.endswith(".seq"): | ||
continue | ||
|
||
# Convert the file name to a short sequence identifier | ||
seq_name = os.path.basename(file_name_to_sequence_name(fn)) | ||
|
||
# Read the .seq file | ||
with zip_ref.open(fn, "r") as f: | ||
seq = f.read().decode("utf-8") | ||
|
||
# Trim the sequence | ||
seq = trim_sequence(seq) | ||
|
||
# Add to dictionary of sequences, if a start codon was present | ||
if seq: | ||
seq_data[seq_name] = seq | ||
|
||
return seq_data | ||
|
||
|
||
def load_sequences(directory_or_zip): | ||
"""Load a set of sequences from .seq files from a dictionary or .zip file.""" | ||
if os.path.isfile(directory_or_zip): | ||
return _load_sequences_zip(directory_or_zip) | ||
|
||
seq_data = {} | ||
for fn in os.listdir(directory_or_zip): | ||
if fn.endswith(".seq"): | ||
# Convert the file name to a short sequence identifier | ||
seq_name = file_name_to_sequence_name(fn) | ||
# Read the .seq file | ||
with open(os.path.join(directory_or_zip, fn), "r") as f: | ||
seq = f.read() | ||
# Trim the sequence | ||
seq = trim_sequence(seq) | ||
# Add to dictionary of sequences, if a start codon was present | ||
if seq: | ||
seq_data[seq_name] = seq | ||
|
||
return seq_data | ||
|
||
|
||
def _chunks(data, size=None): | ||
"""Split a dict into multiple dicts of specified max size (iterator).""" | ||
if size is None: | ||
size = sys.maxsize | ||
it = iter(data) | ||
for _ in range(0, len(data), size): | ||
yield {k: data[k] for k in itertools.islice(it, size)} | ||
|
||
|
||
def as_fasta_files(seq_data, max_file_size=50): | ||
"""Convert a dictionary of sequence names and data to FASTA format files. | ||
max_file_size specifies the maximum number of sequences in each file | ||
(For IMGT, this is 50) | ||
""" | ||
fasta_files = [] | ||
for seq_data_chunk in _chunks(seq_data, max_file_size): | ||
fasta_files.append( | ||
"\n".join([f"> {name}\n{seq}" for name, seq in seq_data_chunk.items()]) | ||
) | ||
return fasta_files | ||
|
||
|
||
def run_vquest(fasta_data, species="alpaca", receptor="IG"): | ||
"""Run vquest bioinformatics on a set of fasta files.""" | ||
conf = vquest.config.DEFAULTS.copy() | ||
conf["inputType"] = "inline" | ||
conf["species"] = species | ||
conf["receptorOrLocusType"] = receptor | ||
conf["sequences"] = fasta_data | ||
|
||
# Set vquest logging to DEBUG | ||
import logging | ||
|
||
from vquest import LOGGER | ||
|
||
LOGGER.setLevel(logging.DEBUG) | ||
|
||
return vquest.vq.vquest(conf) |
61 changes: 61 additions & 0 deletions
61
backend/antigenapi/migrations/0005_sequencingrunresults_and_more.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
# Generated by Django 4.2.5 on 2023-10-03 20:40 | ||
|
||
import django.db.models.deletion | ||
from django.conf import settings | ||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
migrations.swappable_dependency(settings.AUTH_USER_MODEL), | ||
("antigenapi", "0004_remove_sequencingrun_results_date_and_more"), | ||
] | ||
|
||
operations = [ | ||
migrations.CreateModel( | ||
name="SequencingRunResults", | ||
fields=[ | ||
( | ||
"id", | ||
models.BigAutoField( | ||
auto_created=True, | ||
primary_key=True, | ||
serialize=False, | ||
verbose_name="ID", | ||
), | ||
), | ||
("seq", models.PositiveSmallIntegerField()), | ||
( | ||
"seqres_file", | ||
models.FileField(upload_to="uploads/sequencingresults/"), | ||
), | ||
( | ||
"parameters_file", | ||
models.FileField(upload_to="uploads/sequencingresults/"), | ||
), | ||
("airr_file", models.FileField(upload_to="uploads/sequencingresults/")), | ||
("added_date", models.DateTimeField(auto_now_add=True)), | ||
( | ||
"added_by", | ||
models.ForeignKey( | ||
on_delete=django.db.models.deletion.PROTECT, | ||
to=settings.AUTH_USER_MODEL, | ||
), | ||
), | ||
( | ||
"sequencing_run", | ||
models.ForeignKey( | ||
on_delete=django.db.models.deletion.PROTECT, | ||
to="antigenapi.sequencingrun", | ||
), | ||
), | ||
], | ||
), | ||
migrations.AddConstraint( | ||
model_name="sequencingrunresults", | ||
constraint=models.UniqueConstraint( | ||
fields=("sequencing_run", "seq"), name="unique_seqrun_seq" | ||
), | ||
), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.