Merge pull request #59 from Edinburgh-Genome-Foundry/dev

v3.2.7
Edinburgh-Genome-Foundry · Jun 16, 2021 · db0b606 · db0b606
2 parents 9c72428 + 0678f74
commit db0b606
Show file tree

Hide file tree

Showing 13 changed files with 139 additions and 78 deletions.
diff --git a/README.rst b/README.rst
@@ -8,8 +8,8 @@
 DNA Chisel - a versatile sequence optimizer
 ===========================================
 
-.. image:: https://travis-ci.org/Edinburgh-Genome-Foundry/DnaChisel.svg?branch=master
-   :target: https://travis-ci.org/Edinburgh-Genome-Foundry/DnaChisel
+.. image:: https://travis-ci.com/Edinburgh-Genome-Foundry/DnaChisel.svg?branch=master
+   :target: https://travis-ci.com/Edinburgh-Genome-Foundry/DnaChisel
    :alt: Travis CI build status
 
 .. image:: https://coveralls.io/repos/github/Edinburgh-Genome-Foundry/DnaChisel/badge.svg?branch=master
@@ -161,18 +161,20 @@ Installation
 DNA Chisel requires Python 3, and can be installed via a pip command:
 
 .. code::
-    sudo pip install dnachisel     # <= minimal install without reports support
-    sudo pip install dnachisel[reports] # <= full install with all dependencies
+    pip install dnachisel     # <= minimal install without reports support
+    pip install dnachisel[reports] # <= full install with all dependencies
 
 The full installation using ``dnachisel[reports]`` downloads heavier libraries
 (Matplotlib, PDF reports, sequenticon) for report generation, but is highly
-recommended to use DNA Chisel interactively via Python scripts.
+recommended to use DNA Chisel interactively via Python scripts. Also install
+[Geneblocks](https://edinburgh-genome-foundry.github.io/Geneblocks) and its
+dependencies if you wish to include a plot of sequence edits in the report.
 
 Alternatively, you can unzip the sources in a folder and type
 
 .. code::
 
-    sudo python setup.py install
+    python setup.py install
 
 Optionally, also install Bowtie to be able to use ``AvoidMatches`` (which
 removes short homologies with existing genomes). On Ubuntu:

diff --git a/dnachisel/Location.py b/dnachisel/Location.py
@@ -21,14 +21,13 @@ class Location:
     Parameters
     ----------
     start
-      Lowest position index of the segment
+      Lowest position index of the segment.
 
     end
-      Highest position index of the segment
+      Highest position index of the segment.
 
     strand
       Either 1 or -1 for sense or anti-sense orientation.
-
     """
 
     __slots__ = ["strand", "start", "end"]
@@ -54,12 +53,7 @@ def overlap_region(self, other_location):
             return None
 
     def extended(
-        self,
-        extension_length,
-        lower_limit=0,
-        upper_limit=None,
-        left=True,
-        right=True,
+        self, extension_length, lower_limit=0, upper_limit=None, left=True, right=True,
     ):
         """Extend the location of a few basepairs on each side."""
 
@@ -103,27 +97,27 @@ def __lt__(self, other):
         return self.to_tuple() < other.to_tuple()
 
     def __add__(self, number):
-        """Return the location shifted by the number"""
+        """Return the location shifted by the number."""
         return Location(self.start + number, self.end + number, self.strand)
 
     def __sub__(self, number):
-        """Return the location shifted by the number"""
+        """Return the location shifted by the number."""
         return Location(self.start - number, self.end - number, self.strand)
 
     def __repr__(self):
-        """Represent"""
+        """Represent."""
         result = "%d-%d" % (self.start, self.end)
         if self.strand is not None:
             result += {1: "(+)", -1: "(-)", 0: ""}[self.strand]
         return result
 
     def __len__(self):
-        """Size of the location"""
+        """Size of the location."""
         return self.end - self.start
 
     @staticmethod
     def merge_overlapping_locations(locations):
-        """Return a list of locations obtained by mergin all overlapping."""
+        """Return a list of locations obtained by merging all overlapping."""
         if len(locations) == 0:
             return locations
         locations = sorted(locations)
@@ -159,17 +153,20 @@ def from_data(location_data):
 
         This method is used in particular in every built-in specification to
         quickly standardize the input location.
-        
+
         ``location_data`` can be a tuple (start, end) or (start, end, strand),
         or a Biopython FeatureLocation, or a Location instance. In any case,
-        a new Location object will be returned. 
+        a new Location object will be returned.
         """
         if location_data is None:
             return None
         if isinstance(location_data, (tuple, list)):
             return Location.from_tuple(location_data)
         if isinstance(location_data, FeatureLocation):
-            return Location.from_biopython_location(location_data)
+            feature_location = Location.from_biopython_location(location_data)
+            if feature_location.strand is None:
+                feature_location.strand = 0
+            return feature_location
         if isinstance(location_data, Location):
             return Location(
                 location_data.start, location_data.end, location_data.strand
@@ -178,16 +175,13 @@ def from_data(location_data):
     def to_biopython_location(self):
         """Return a Biopython FeatureLocation equivalent to the location."""
         start, end, strand = [
-            None if e is None else int(e)
-            for e in [self.start, self.end, self.strand]
+            None if e is None else int(e) for e in [self.start, self.end, self.strand]
         ]
         return FeatureLocation(start, end, strand)
 
     def to_biopython_feature(self, feature_type="misc_feature", **qualifiers):
         """Return a Biopython SeqFeature with same location and custom
         qualifiers."""
         return SeqFeature(
-            self.to_biopython_location(),
-            type=feature_type,
-            qualifiers=qualifiers,
+            self.to_biopython_location(), type=feature_type, qualifiers=qualifiers,
         )
diff --git a/dnachisel/builtin_specifications/EnforcePatternOccurence.py b/dnachisel/builtin_specifications/EnforcePatternOccurence.py
@@ -1,20 +1,18 @@
-"""Implement AvoidPattern"""
+"""Implement EnforcePatternOccurence"""
 
 from ..MutationSpace import MutationSpace
 from ..SequencePattern import SequencePattern, DnaNotationPattern
 from ..Location import Location
 from ..biotools import reverse_complement
-from ..DnaOptimizationProblem.DnaOptimizationProblem import (
-    DnaOptimizationProblem,
-)
+from ..DnaOptimizationProblem.DnaOptimizationProblem import DnaOptimizationProblem
 from ..DnaOptimizationProblem.NoSolutionError import NoSolutionError
 from ..Specification import Specification, SpecEvaluation
 
 from .EnforceSequence import EnforceSequence
 
 
 class EnforcePatternOccurence(Specification):
-    """Enforce a number of occurences of the given pattern in the sequence.
+    """Enforce a number of occurrences of the given pattern in the sequence.
 
     Shorthand for annotations: "insert" (although this specification can be
     used to both insert new occurences of a pattern, or destroy supernumerary
@@ -27,14 +25,14 @@ class EnforcePatternOccurence(Specification):
       "BsmBI_site", etc.
 
     occurences
-      Desired number of occurences of the pattern.
+      Desired number of occurrences of the pattern.
 
     location
       Location of the DNA segment on which to enforce the pattern e.g.
-      ``Location(10, 45, 1)``
+      ``Location(10, 45, 1)``.
 
     center
-      If true, new inserted patterns will prioritize locations at the center
+      If True, new inserted patterns will prioritize locations at the center
       of the specification's location. Else the insertion will happen at
       the beginning of the location.
 
@@ -44,7 +42,7 @@ class EnforcePatternOccurence(Specification):
       the pattern could be on both strands (otherwise, only the
       feature's strand will be considered).
       (2) if you want to create a specification without preset location, but
-      with a set strand: ``EnforcePatternOccurence('BsmBI_site', strand=1)``
+      with a set strand: ``EnforcePatternOccurence('BsmBI_site', strand=1)``.
     """
 
     best_possible_score = 0
@@ -115,8 +113,8 @@ def localized(self, location, problem=None):
     def insert_pattern_in_problem(self, problem, reverse=False):
         """Insert the pattern in the problem's sequence by successive tries.
 
-        This heuristic is attempted to get the number of occurences in the
-        pattern from 0 to some number
+        This heuristic is attempted to get the number of occurrences in the
+        pattern from 0 to some number.
         """
         sequence_to_insert = self.pattern.sequence
         if reverse:
@@ -169,9 +167,7 @@ def resolution_heuristic(self, problem):
                 return
             n_matches = len(evaluation.data["matches"])
             if n_matches < self.occurences:
-                other_constraints = [
-                    c for c in problem.constraints if c is not self
-                ]
+                other_constraints = [c for c in problem.constraints if c is not self]
                 new_problem = problem
                 for i in range(self.occurences - n_matches):
                     new_occurence_cst = self.copy_with_changes(

diff --git a/dnachisel/builtin_specifications/EnforceSequence.py b/dnachisel/builtin_specifications/EnforceSequence.py
@@ -6,9 +6,7 @@
 
 from ..Specification import Specification, SpecEvaluation
 from ..Location import Location
-from ..biotools import (group_nearby_indices,
-                                reverse_complement,
-                                IUPAC_NOTATION)
+from ..biotools import group_nearby_indices, reverse_complement, IUPAC_NOTATION
 
 
 class EnforceSequence(Specification):
@@ -27,10 +25,11 @@ class EnforceSequence(Specification):
       ``Location(10, 45, 1)`` or simply ``(10, 45, 1)``
 
     """
+
     localization_interval_length = 6  # used when optimizing
     best_possible_score = 0
     enforced_by_nucleotide_restrictions = True
-    shorthand_name = 'sequence'
+    shorthand_name = "sequence"
 
     def __init__(self, sequence=None, location=None, boost=1.0):
         """Initialize."""
@@ -56,11 +55,13 @@ def evaluate(self, problem):
         in nucleotides equal to ``localization_interval_length`.`
         """
         sequence = self.location.extract_sequence(problem.sequence)
-        discrepancies = np.array([
-            i
-            for i, nuc in enumerate(sequence)
-            if nuc not in IUPAC_NOTATION[self.sequence[i]]
-        ])
+        discrepancies = np.array(
+            [
+                i
+                for i, nuc in enumerate(sequence)
+                if nuc not in IUPAC_NOTATION[self.sequence[i]]
+            ]
+        )
 
         if self.location.strand == -1:
             discrepancies = self.location.end - discrepancies
@@ -69,13 +70,14 @@ def evaluate(self, problem):
         intervals = [
             (r[0], r[-1] + 1)
             for r in group_nearby_indices(
-                discrepancies,
-                max_group_spread=self.localization_interval_length)
+                discrepancies, max_group_spread=self.localization_interval_length
+            )
         ]
         locations = [Location(start, end, 1) for start, end in intervals]
 
-        return SpecEvaluation(self, problem, score=-len(discrepancies),
-                              locations=locations)
+        return SpecEvaluation(
+            self, problem, score=-len(discrepancies), locations=locations
+        )
 
     def localized(self, location, problem=None):
         """Localize the spec to the overlap of its location and the new."""
@@ -92,8 +94,7 @@ def localized(self, location, problem=None):
                 end = new_location.end - self.location.start
             new_sequence = self.sequence[start:end]
 
-            return self.copy_with_changes(location=new_location,
-                                          sequence=new_sequence)
+            return self.copy_with_changes(location=new_location, sequence=new_sequence)
 
     def restrict_nucleotides(self, sequence, location=None):
         """When localizing, forbid any nucleotide but the one already there."""
@@ -106,13 +107,22 @@ def restrict_nucleotides(self, sequence, location=None):
         start, end = new_location.start, new_location.end
         if self.location.strand == -1:
             lend = self.location.end
-            return [(i, set(reverse_complement(n) for n in
-                            IUPAC_NOTATION[self.sequence[lend - i]]))
-                    for i in range(start, end)]
+            return [
+                (
+                    i,
+                    set(
+                        reverse_complement(n)
+                        for n in IUPAC_NOTATION[self.sequence[lend - i - 1]]
+                    ),
+                )
+                for i in range(start, end)
+            ]
         else:
             lstart = self.location.start
-            return [(i, IUPAC_NOTATION[self.sequence[i - lstart]])
-                    for i in range(start, end)]
+            return [
+                (i, IUPAC_NOTATION[self.sequence[i - lstart]])
+                for i in range(start, end)
+            ]
 
     def __repr__(self):
         """Represent."""

diff --git a/dnachisel/builtin_specifications/codon_optimization/HarmonizeRCA.py b/dnachisel/builtin_specifications/codon_optimization/HarmonizeRCA.py
@@ -40,7 +40,7 @@ class HarmonizeRCA(BaseCodonOptimizationClass):
     codon_usage_table
       Optional - can be provided instead of ``species``. A dict of the form
       ``{'*': {"TGA": 0.112, "TAA": 0.68}, 'K': ...}`` giving the RSCU table
-      (relative usage of each codon). 
+      (relative usage of each codon).
 
     original_species
       Name or TaxID of the species the original sequence was taken from. This
@@ -65,9 +65,8 @@ class HarmonizeRCA(BaseCodonOptimizationClass):
     Claassens et. al., Improving heterologous membrane protein
     production in Escherichia coli by combining transcriptional tuning and
     codon usage algorithms. PLOS One, 2017
-
     """
-    
+
     shorthand_name = "harmonize_rca"
 
     def __init__(
@@ -79,8 +78,8 @@ def __init__(
         location=None,
         boost=1,
     ):
-        if isinstance(species, str) and "=>" in species:
-            species, original_species = species.split('=>')
+        if isinstance(species, str) and "->" in species:
+            original_species, species = species.split("->")
             species = species.strip()
             original_species = original_species.strip()
         BaseCodonOptimizationClass.__init__(
@@ -140,8 +139,7 @@ def evaluate(self, problem):
             for original_codon in self.original_codons
         ]
         rca_in_target_species = [
-            self.codon_usage_table["RCA"][codon]
-            for codon in codons
+            self.codon_usage_table["RCA"][codon] for codon in codons
         ]
         discrepancies = abs(
             np.array(rca_in_original_species) - np.array(rca_in_target_species)
@@ -155,12 +153,14 @@ def evaluate(self, problem):
             problem,
             score=score,
             locations=locations,
-            message="Codon harmonization on %s scored %.02E"
-            % (self.location, score),
+            message="Codon harmonization on %s scored %.02E" % (self.location, score),
         )
 
     def label_parameters(self):
-        return ["(custom table)" if self.species is None else self.species]
+        if self.species is None:
+            return ["(custom table)"]
+        else:
+            return [self.original_species + " -> " + self.species]
 
     def short_label(self):
         result = "best-codon-optimize"

diff --git a/dnachisel/reports/assets/optimization_report.pug b/dnachisel/reports/assets/optimization_report.pug
@@ -45,8 +45,7 @@ if constraints_evaluations.all_evaluations_pass()
     img#diff-figure(src="{{ diffs_figure_data }}")
   else
     p(style='font-size: 8px').
-      Note: install Geneblocks to see a friendly plot of sequence edits here.
-      (pip install geneblocks)
+      Note: install Geneblocks (https://edinburgh-genome-foundry.github.io/Geneblocks) to see a plot of sequence edits here.
 p The optimization created {{edits}} edits. See attached Genbank file for the details.
 
 h2.section Constraints

diff --git a/dnachisel/version.py b/dnachisel/version.py
@@ -1 +1 @@
-__version__ = "3.2.6"
+__version__ = "3.2.7"
diff --git a/docs/_static/images/genbank_annotations/examples.csv b/docs/_static/images/genbank_annotations/examples.csv
@@ -17,7 +17,7 @@ change_objective,~change,8-15,CAATGCACACATCGATTCATCACTCAT,8-15
 codon_optimize,~CodonOptimize(e_coli),2-23+,CAATGCACACATCGATTCATCACTCAT,none
 use_best_codon,~use_best_codon(e_coli),2-23+,CAATGCACACATCGATTCATCACTCAT,none
 match_codon_usage,~match_codon_usage,2-23+,CAATGCACACATCGATTCATCACTCAT,none
-harmonize_rca,~harmonize_rca(e_coli => h_sapiens),2-23+,CAATGCACACATCGATTCATCACTCAT,none
+harmonize_rca,~harmonize_rca(e_coli -> h_sapiens),2-23+,CAATGCACACATCGATTCATCACTCAT,none
 allow_primer,"@primer(tmin=50, tmax=70)",2-24,CAATGCACACATCGATTCATCACTCAT,none
 all_unique_kmers,@all_unique_kmers(k=15),2-24,CAATGCACACATCGATTCATGCACATT,"2-8, 18-24"
 all_unique_kmers_here,"@all_unique_kmers(k=15, here)",2-24,CAATGCACACATCGATTCATGCACATT,"2-8, 18-24"

diff --git a/docs/_static/images/genbank_annotations/harmonize_rca.png b/docs/_static/images/genbank_annotations/harmonize_rca.png