Skip to content

Commit

Permalink
Check various lengths
Browse files Browse the repository at this point in the history
  • Loading branch information
veghp committed Apr 18, 2024
1 parent c76f8c0 commit c9e5b89
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 5 deletions.
31 changes: 30 additions & 1 deletion seqreport/SeqCollection.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,23 @@ class SeqCollection:
> Fix overhead cost for each sequence part (cloning, delivery costs etc).
**currency_symbol**
> The currency symbol to display in the report.
> The currency symbol (or string) to display in the report (`str`).
**projectname**
> The name of the project (`str`).
**comments**
> Any comments to be included in the report (`str`).
**min_length**
> Check that all sequences are at least this long (`int`).
**max_length**
> Check that all sequences are at most this long (`int`).
**name_length**
> Check which sequence IDs are longer than this cutoff. Genbank has a character
limit.
"""

def __init__(
Expand All @@ -36,25 +46,44 @@ def __init__(
currency_symbol="£",
projectname="",
comments="",
min_length=0,
max_length=0,
name_length=15, # max seq record name length. Genbank character limit.
):
self.sequences = records
self.cost_per_base = cost_per_base
self.cost_per_seq = cost_per_seq
self.currency_symbol = currency_symbol
self.projectname = projectname
self.comments = comments
self.min_length = min_length
self.max_length = max_length
self.name_length = name_length
self.calculate_values()

def calculate_values(self):
self.cost_per_base = float(self.cost_per_base) # could be str after update
self.cost_per_seq = float(self.cost_per_seq)
self.n_seq = len(self.sequences)

n_bp = 0
for part in self.sequences:
n_bp += len(part.seq)
self.n_bp = n_bp
self.cost = self.n_seq * self.cost_per_seq + self.n_bp * self.cost_per_base

self.too_short = []
self.too_long = []
self.long_names = []
for record in self.sequences:
if len(record) < self.min_length:
self.too_short += [record.id]
if self.max_length > 0: # otherwise skip if default
if len(record) > self.max_length:
self.too_long += [record.id]
if len(record.id) > self.name_length:
self.long_names += [record.id]


def read_fasta(fasta):
"""Read a FASTA sequence file into a list of records.
Expand Down
21 changes: 21 additions & 0 deletions seqreport/report_assets/seq_report.pug
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,27 @@ p.
p.
Estimated cost: <b>{{ seqcollection.currency_symbol }} {{ seqcollection.cost }}</b>

if seqcollection.too_short
p.
Sequences shorter than <b>{{ seqcollection.min_length }}</b> bp: {{ seqcollection.too_short }}
else
p.
None of the sequences are shorter than <b>{{ seqcollection.min_length }}</b> bp.

if seqcollection.too_long
p.
Sequences longer than <b>{{ seqcollection.max_length }}</b> bp: {{ seqcollection.too_long }}
else
p.
None of the sequences are longer than <b>{{ seqcollection.max_length }}</b> bp.

if seqcollection.long_names
p.
Sequence IDs longer than <b>{{ seqcollection.name_length }}</b> characters: {{ seqcollection.long_names }}
else
p.
None of the IDs are longer than <b>{{ seqcollection.name_length }}</b> characters.

if seqcollection.comments
p.
Comments: <i>{{ seqcollection.comments }}</i>
Expand Down
4 changes: 2 additions & 2 deletions tests/data/test.fa
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
>seq1
ACGGCCAATTCCTCGTAGTTGTAGT
ACGGCCAATT
>seq2
CTCAATCAGATCGATTGCAATGCACCATCGGG
>seq3
>sequence3
TGATGATGAAAGACAGCCGATGACATGGCGTACTACAGACGC
10 changes: 8 additions & 2 deletions tests/test_SeqCollection.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,17 @@ def test_SeqCollection():
records=seq_records,
projectname="EGF24",
comments="This is a test sequence set.",
min_length=20,
max_length=40,
name_length=5,
)
assert seq_coll.n_seq == 3
assert seq_coll.n_bp == 99
assert seq_coll.n_bp == 84
assert seq_coll.projectname == "EGF24"
assert seq_coll != ""
assert seq_coll.comments != ""
assert len(seq_coll.too_short) == 1
assert len(seq_coll.too_long) == 1
assert len(seq_coll.long_names) == 1


def test_read_fasta():
Expand Down

0 comments on commit c9e5b89

Please sign in to comment.