Skip to content

Commit

Permalink
Merge pull request #59 from Edinburgh-Genome-Foundry/dev
Browse files Browse the repository at this point in the history
v3.2.7
  • Loading branch information
veghp authored Jun 16, 2021
2 parents 9c72428 + 0678f74 commit db0b606
Show file tree
Hide file tree
Showing 13 changed files with 139 additions and 78 deletions.
14 changes: 8 additions & 6 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
DNA Chisel - a versatile sequence optimizer
===========================================

.. image:: https://travis-ci.org/Edinburgh-Genome-Foundry/DnaChisel.svg?branch=master
:target: https://travis-ci.org/Edinburgh-Genome-Foundry/DnaChisel
.. image:: https://travis-ci.com/Edinburgh-Genome-Foundry/DnaChisel.svg?branch=master
:target: https://travis-ci.com/Edinburgh-Genome-Foundry/DnaChisel
:alt: Travis CI build status

.. image:: https://coveralls.io/repos/github/Edinburgh-Genome-Foundry/DnaChisel/badge.svg?branch=master
Expand Down Expand Up @@ -161,18 +161,20 @@ Installation
DNA Chisel requires Python 3, and can be installed via a pip command:

.. code::
sudo pip install dnachisel # <= minimal install without reports support
sudo pip install dnachisel[reports] # <= full install with all dependencies
pip install dnachisel # <= minimal install without reports support
pip install dnachisel[reports] # <= full install with all dependencies
The full installation using ``dnachisel[reports]`` downloads heavier libraries
(Matplotlib, PDF reports, sequenticon) for report generation, but is highly
recommended to use DNA Chisel interactively via Python scripts.
recommended to use DNA Chisel interactively via Python scripts. Also install
[Geneblocks](https://edinburgh-genome-foundry.github.io/Geneblocks) and its
dependencies if you wish to include a plot of sequence edits in the report.

Alternatively, you can unzip the sources in a folder and type

.. code::
sudo python setup.py install
python setup.py install
Optionally, also install Bowtie to be able to use ``AvoidMatches`` (which
removes short homologies with existing genomes). On Ubuntu:
Expand Down
38 changes: 16 additions & 22 deletions dnachisel/Location.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,13 @@ class Location:
Parameters
----------
start
Lowest position index of the segment
Lowest position index of the segment.
end
Highest position index of the segment
Highest position index of the segment.
strand
Either 1 or -1 for sense or anti-sense orientation.
"""

__slots__ = ["strand", "start", "end"]
Expand All @@ -54,12 +53,7 @@ def overlap_region(self, other_location):
return None

def extended(
self,
extension_length,
lower_limit=0,
upper_limit=None,
left=True,
right=True,
self, extension_length, lower_limit=0, upper_limit=None, left=True, right=True,
):
"""Extend the location of a few basepairs on each side."""

Expand Down Expand Up @@ -103,27 +97,27 @@ def __lt__(self, other):
return self.to_tuple() < other.to_tuple()

def __add__(self, number):
"""Return the location shifted by the number"""
"""Return the location shifted by the number."""
return Location(self.start + number, self.end + number, self.strand)

def __sub__(self, number):
"""Return the location shifted by the number"""
"""Return the location shifted by the number."""
return Location(self.start - number, self.end - number, self.strand)

def __repr__(self):
"""Represent"""
"""Represent."""
result = "%d-%d" % (self.start, self.end)
if self.strand is not None:
result += {1: "(+)", -1: "(-)", 0: ""}[self.strand]
return result

def __len__(self):
"""Size of the location"""
"""Size of the location."""
return self.end - self.start

@staticmethod
def merge_overlapping_locations(locations):
"""Return a list of locations obtained by mergin all overlapping."""
"""Return a list of locations obtained by merging all overlapping."""
if len(locations) == 0:
return locations
locations = sorted(locations)
Expand Down Expand Up @@ -159,17 +153,20 @@ def from_data(location_data):
This method is used in particular in every built-in specification to
quickly standardize the input location.
``location_data`` can be a tuple (start, end) or (start, end, strand),
or a Biopython FeatureLocation, or a Location instance. In any case,
a new Location object will be returned.
a new Location object will be returned.
"""
if location_data is None:
return None
if isinstance(location_data, (tuple, list)):
return Location.from_tuple(location_data)
if isinstance(location_data, FeatureLocation):
return Location.from_biopython_location(location_data)
feature_location = Location.from_biopython_location(location_data)
if feature_location.strand is None:
feature_location.strand = 0
return feature_location
if isinstance(location_data, Location):
return Location(
location_data.start, location_data.end, location_data.strand
Expand All @@ -178,16 +175,13 @@ def from_data(location_data):
def to_biopython_location(self):
"""Return a Biopython FeatureLocation equivalent to the location."""
start, end, strand = [
None if e is None else int(e)
for e in [self.start, self.end, self.strand]
None if e is None else int(e) for e in [self.start, self.end, self.strand]
]
return FeatureLocation(start, end, strand)

def to_biopython_feature(self, feature_type="misc_feature", **qualifiers):
"""Return a Biopython SeqFeature with same location and custom
qualifiers."""
return SeqFeature(
self.to_biopython_location(),
type=feature_type,
qualifiers=qualifiers,
self.to_biopython_location(), type=feature_type, qualifiers=qualifiers,
)
24 changes: 10 additions & 14 deletions dnachisel/builtin_specifications/EnforcePatternOccurence.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
"""Implement AvoidPattern"""
"""Implement EnforcePatternOccurence"""

from ..MutationSpace import MutationSpace
from ..SequencePattern import SequencePattern, DnaNotationPattern
from ..Location import Location
from ..biotools import reverse_complement
from ..DnaOptimizationProblem.DnaOptimizationProblem import (
DnaOptimizationProblem,
)
from ..DnaOptimizationProblem.DnaOptimizationProblem import DnaOptimizationProblem
from ..DnaOptimizationProblem.NoSolutionError import NoSolutionError
from ..Specification import Specification, SpecEvaluation

from .EnforceSequence import EnforceSequence


class EnforcePatternOccurence(Specification):
"""Enforce a number of occurences of the given pattern in the sequence.
"""Enforce a number of occurrences of the given pattern in the sequence.
Shorthand for annotations: "insert" (although this specification can be
used to both insert new occurences of a pattern, or destroy supernumerary
Expand All @@ -27,14 +25,14 @@ class EnforcePatternOccurence(Specification):
"BsmBI_site", etc.
occurences
Desired number of occurences of the pattern.
Desired number of occurrences of the pattern.
location
Location of the DNA segment on which to enforce the pattern e.g.
``Location(10, 45, 1)``
``Location(10, 45, 1)``.
center
If true, new inserted patterns will prioritize locations at the center
If True, new inserted patterns will prioritize locations at the center
of the specification's location. Else the insertion will happen at
the beginning of the location.
Expand All @@ -44,7 +42,7 @@ class EnforcePatternOccurence(Specification):
the pattern could be on both strands (otherwise, only the
feature's strand will be considered).
(2) if you want to create a specification without preset location, but
with a set strand: ``EnforcePatternOccurence('BsmBI_site', strand=1)``
with a set strand: ``EnforcePatternOccurence('BsmBI_site', strand=1)``.
"""

best_possible_score = 0
Expand Down Expand Up @@ -115,8 +113,8 @@ def localized(self, location, problem=None):
def insert_pattern_in_problem(self, problem, reverse=False):
"""Insert the pattern in the problem's sequence by successive tries.
This heuristic is attempted to get the number of occurences in the
pattern from 0 to some number
This heuristic is attempted to get the number of occurrences in the
pattern from 0 to some number.
"""
sequence_to_insert = self.pattern.sequence
if reverse:
Expand Down Expand Up @@ -169,9 +167,7 @@ def resolution_heuristic(self, problem):
return
n_matches = len(evaluation.data["matches"])
if n_matches < self.occurences:
other_constraints = [
c for c in problem.constraints if c is not self
]
other_constraints = [c for c in problem.constraints if c is not self]
new_problem = problem
for i in range(self.occurences - n_matches):
new_occurence_cst = self.copy_with_changes(
Expand Down
50 changes: 30 additions & 20 deletions dnachisel/builtin_specifications/EnforceSequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@

from ..Specification import Specification, SpecEvaluation
from ..Location import Location
from ..biotools import (group_nearby_indices,
reverse_complement,
IUPAC_NOTATION)
from ..biotools import group_nearby_indices, reverse_complement, IUPAC_NOTATION


class EnforceSequence(Specification):
Expand All @@ -27,10 +25,11 @@ class EnforceSequence(Specification):
``Location(10, 45, 1)`` or simply ``(10, 45, 1)``
"""

localization_interval_length = 6 # used when optimizing
best_possible_score = 0
enforced_by_nucleotide_restrictions = True
shorthand_name = 'sequence'
shorthand_name = "sequence"

def __init__(self, sequence=None, location=None, boost=1.0):
"""Initialize."""
Expand All @@ -56,11 +55,13 @@ def evaluate(self, problem):
in nucleotides equal to ``localization_interval_length`.`
"""
sequence = self.location.extract_sequence(problem.sequence)
discrepancies = np.array([
i
for i, nuc in enumerate(sequence)
if nuc not in IUPAC_NOTATION[self.sequence[i]]
])
discrepancies = np.array(
[
i
for i, nuc in enumerate(sequence)
if nuc not in IUPAC_NOTATION[self.sequence[i]]
]
)

if self.location.strand == -1:
discrepancies = self.location.end - discrepancies
Expand All @@ -69,13 +70,14 @@ def evaluate(self, problem):
intervals = [
(r[0], r[-1] + 1)
for r in group_nearby_indices(
discrepancies,
max_group_spread=self.localization_interval_length)
discrepancies, max_group_spread=self.localization_interval_length
)
]
locations = [Location(start, end, 1) for start, end in intervals]

return SpecEvaluation(self, problem, score=-len(discrepancies),
locations=locations)
return SpecEvaluation(
self, problem, score=-len(discrepancies), locations=locations
)

def localized(self, location, problem=None):
"""Localize the spec to the overlap of its location and the new."""
Expand All @@ -92,8 +94,7 @@ def localized(self, location, problem=None):
end = new_location.end - self.location.start
new_sequence = self.sequence[start:end]

return self.copy_with_changes(location=new_location,
sequence=new_sequence)
return self.copy_with_changes(location=new_location, sequence=new_sequence)

def restrict_nucleotides(self, sequence, location=None):
"""When localizing, forbid any nucleotide but the one already there."""
Expand All @@ -106,13 +107,22 @@ def restrict_nucleotides(self, sequence, location=None):
start, end = new_location.start, new_location.end
if self.location.strand == -1:
lend = self.location.end
return [(i, set(reverse_complement(n) for n in
IUPAC_NOTATION[self.sequence[lend - i]]))
for i in range(start, end)]
return [
(
i,
set(
reverse_complement(n)
for n in IUPAC_NOTATION[self.sequence[lend - i - 1]]
),
)
for i in range(start, end)
]
else:
lstart = self.location.start
return [(i, IUPAC_NOTATION[self.sequence[i - lstart]])
for i in range(start, end)]
return [
(i, IUPAC_NOTATION[self.sequence[i - lstart]])
for i in range(start, end)
]

def __repr__(self):
"""Represent."""
Expand Down
20 changes: 10 additions & 10 deletions dnachisel/builtin_specifications/codon_optimization/HarmonizeRCA.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class HarmonizeRCA(BaseCodonOptimizationClass):
codon_usage_table
Optional - can be provided instead of ``species``. A dict of the form
``{'*': {"TGA": 0.112, "TAA": 0.68}, 'K': ...}`` giving the RSCU table
(relative usage of each codon).
(relative usage of each codon).
original_species
Name or TaxID of the species the original sequence was taken from. This
Expand All @@ -65,9 +65,8 @@ class HarmonizeRCA(BaseCodonOptimizationClass):
Claassens et. al., Improving heterologous membrane protein
production in Escherichia coli by combining transcriptional tuning and
codon usage algorithms. PLOS One, 2017
"""

shorthand_name = "harmonize_rca"

def __init__(
Expand All @@ -79,8 +78,8 @@ def __init__(
location=None,
boost=1,
):
if isinstance(species, str) and "=>" in species:
species, original_species = species.split('=>')
if isinstance(species, str) and "->" in species:
original_species, species = species.split("->")
species = species.strip()
original_species = original_species.strip()
BaseCodonOptimizationClass.__init__(
Expand Down Expand Up @@ -140,8 +139,7 @@ def evaluate(self, problem):
for original_codon in self.original_codons
]
rca_in_target_species = [
self.codon_usage_table["RCA"][codon]
for codon in codons
self.codon_usage_table["RCA"][codon] for codon in codons
]
discrepancies = abs(
np.array(rca_in_original_species) - np.array(rca_in_target_species)
Expand All @@ -155,12 +153,14 @@ def evaluate(self, problem):
problem,
score=score,
locations=locations,
message="Codon harmonization on %s scored %.02E"
% (self.location, score),
message="Codon harmonization on %s scored %.02E" % (self.location, score),
)

def label_parameters(self):
return ["(custom table)" if self.species is None else self.species]
if self.species is None:
return ["(custom table)"]
else:
return [self.original_species + " -> " + self.species]

def short_label(self):
result = "best-codon-optimize"
Expand Down
3 changes: 1 addition & 2 deletions dnachisel/reports/assets/optimization_report.pug
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ if constraints_evaluations.all_evaluations_pass()
img#diff-figure(src="{{ diffs_figure_data }}")
else
p(style='font-size: 8px').
Note: install Geneblocks to see a friendly plot of sequence edits here.
(pip install geneblocks)
Note: install Geneblocks (https://edinburgh-genome-foundry.github.io/Geneblocks) to see a plot of sequence edits here.
p The optimization created {{edits}} edits. See attached Genbank file for the details.

h2.section Constraints
Expand Down
2 changes: 1 addition & 1 deletion dnachisel/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "3.2.6"
__version__ = "3.2.7"
2 changes: 1 addition & 1 deletion docs/_static/images/genbank_annotations/examples.csv
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ change_objective,~change,8-15,CAATGCACACATCGATTCATCACTCAT,8-15
codon_optimize,~CodonOptimize(e_coli),2-23+,CAATGCACACATCGATTCATCACTCAT,none
use_best_codon,~use_best_codon(e_coli),2-23+,CAATGCACACATCGATTCATCACTCAT,none
match_codon_usage,~match_codon_usage,2-23+,CAATGCACACATCGATTCATCACTCAT,none
harmonize_rca,~harmonize_rca(e_coli => h_sapiens),2-23+,CAATGCACACATCGATTCATCACTCAT,none
harmonize_rca,~harmonize_rca(e_coli -> h_sapiens),2-23+,CAATGCACACATCGATTCATCACTCAT,none
allow_primer,"@primer(tmin=50, tmax=70)",2-24,CAATGCACACATCGATTCATCACTCAT,none
all_unique_kmers,@all_unique_kmers(k=15),2-24,CAATGCACACATCGATTCATGCACATT,"2-8, 18-24"
all_unique_kmers_here,"@all_unique_kmers(k=15, here)",2-24,CAATGCACACATCGATTCATGCACATT,"2-8, 18-24"
Expand Down
Binary file modified docs/_static/images/genbank_annotations/harmonize_rca.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit db0b606

Please sign in to comment.