diff --git a/CHANGES.md b/CHANGES.md index 7536e14..36100d6 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,23 +1,33 @@ # Changes since last version + +## v1.8.0 + +* Added support for selecting sub-alignments. See option `-sa` + + ## v1.7.2 * Ed Davies fixed a bug: sorting using option -so was broken + ## v1.7.1 * What?? + ## v1.7.0 * Added the option --alignment-index to support files with multiple MSAs. * Added two functions, view and glimpse, to enable use of alv in notebook environments. + ## v1.6.1 * Fixed accession abbreviation so that short accessions are left as they are. * Requiring python 3.5 or later, because Colorama has droppen support for 3.4. + ## v1.6.0 * New options: --only-variable and --only-variable-excluding-indels, contributed by nikostr. Constrains coloring @@ -25,18 +35,21 @@ * Fixed the --dotted option, which only worked with the first block for DNA sequences. Also improved the coloring which was too ugly in dotted mode (due to laziness). + ## v1.5.0 * New option: `-d` or `--dotted`: the first sequence in the output alignment is used as a template and for positions in subsequent sequences that are identical, a period ('.') is output instead of a symbol. * Adjustment: replacing blue with cyan in the DNA coloring scheme. + ## v1.4.0 * New option: `-r k` or `--random-accessions k` for only showing a sample of _k_ sequences. * New option: `-g` or `--glimpse`: display an informative cut-out of the input MSA, if it does not fit without scrolling or line-breaking. + ## v1.3.4 * For some reason, setup.py was/is not putting proper Markdown to PyPi. Solved it halfway. Weird issue. diff --git a/README.md b/README.md index a424eda..b4fdeb7 100644 --- a/README.md +++ b/README.md @@ -95,6 +95,14 @@ alv -k msa.fa | less -R The `-k` option ensures that `alv` keeps coloring the alignment (by default, piping and redirection removes colors), and the `-R` option instructs `less` to interpret color codes. + +Choose to view a sub-alignment: +``` +alv -sa 30 60 msa.fa +``` +This selects and views columns 30 to 59 of msa.fa, keeping track of the "original" columns indexes in the output. + + ## For developers Run `python setup.py develop test` for development install and to execute tests. diff --git a/THANKS.md b/THANKS.md index fbd4ddc..876c571 100644 --- a/THANKS.md +++ b/THANKS.md @@ -10,4 +10,5 @@ completely without "codons". * Michael Milton suggested the --alignment-index option. * Marina Herrera Sarrias suggested the notebook functionality. -* Ed Davies fixed a bug: sorting using option -so was broken \ No newline at end of file +* Ed Davies fixed a bug: sorting using option -so was broken. +* Artem Kushner pointed out the missing support for sub-alignments. \ No newline at end of file diff --git a/alv/alignment.py b/alv/alignment.py index 52c83da..ef3b946 100644 --- a/alv/alignment.py +++ b/alv/alignment.py @@ -11,10 +11,14 @@ class BaseAlignment: to access all alignments, and implementations for the subclasses with single-letter column widths. ''' - def __init__(self, alignment): + def __init__(self, alignment, start=0, end=-1): self.al = alignment # Holder of the BioPython alignment object - self.al_length = alignment.get_alignment_length() self.type = None + self._start = start + self._end = alignment.get_alignment_length() + if end != -1: + self._end = min(end, self._end) + self.al_length = self._end - self._start self.column_width = 1 self._update_seq_index() self.columns = self._summarize_columns() @@ -121,13 +125,12 @@ def _compute_block_width(self, terminal_width, al_width, left_margin): return terminal_width - left_margin - sacrifice def blocks(self, block_width): - al_width = self.al_length - if al_width == 0: + if self.al_length == 0: raise AlvEmptyAlignment() else: - for start in range(0, al_width, block_width): - end = min(al_width, start + block_width) - yield AlignmentBlock(start, end) + for block_start in range(self._start, self._end, block_width): + block_end = min(self._end, block_start + block_width) + yield AlignmentBlock(block_start, block_end) def apply_painter(self, acc, block, painter): @@ -167,9 +170,9 @@ def _summarize_columns(self): ''' Count the different elements in each column. ''' - columns = [] - for col_no in range(self.al_length): - columns.append(Counter(self.al[:, col_no])) + columns = {} + for col_no in range(self._start, self._end): + columns[col_no] = Counter(self.al[:, col_no]) return columns def get_basic_info(self): @@ -188,7 +191,7 @@ def get_column_conservation(self): ''' result = [] # For the return value column_summaries = self._summarize_columns() - for c in column_summaries: + for idx, c in column_summaries.items(): for indel in self.indels: del c[indel] if len(c) == 0: @@ -222,24 +225,24 @@ def get_conserved_block(self, n_columns): class AminoAcidAlignment(BaseAlignment): - def __init__(self, alignment): - super().__init__(alignment) + def __init__(self, alignment, start=0, end=-1): + super().__init__(alignment, start, end) self.type = 'aa' class DnaAlignment(BaseAlignment): - def __init__(self, alignment): - super().__init__(alignment) + def __init__(self, alignment, start=0, end=-1): + super().__init__(alignment, start, end) self.type = 'dna' class CodonAlignment(BaseAlignment): ''' Alignment of coding DNA. A column has a width of three nucleotides. ''' - def __init__(self, alignment): + def __init__(self, alignment, start=0, end=-1): self.type = 'codon' self.column_width = 3 self.genetic_code = 1 # The standard code - super().__init__(alignment) + super().__init__(alignment, start, end) self.basic_info['Genetic code'] = self.genetic_code def block_width(self, terminal_width, args): @@ -261,9 +264,9 @@ def apply_painter(self, acc, block, painter): seq = str(seq_record.seq) colored_seq = '' - for codon_col, pos in enumerate(range(0, len(seq), 3)): - c = seq[pos:pos+3] - colored_seq += painter.colorizer(c, self.columns[block.start // 3 + codon_col]) + for codon_col, pos in enumerate(range(block.start, block.end, 3)): + c = seq[codon_col:codon_col+3] + colored_seq += painter.colorizer(c, self.columns[pos]) return painter.sol() + colored_seq + painter.eol() def apply_dotter(self, acc, block, painter, template_acc): @@ -281,12 +284,12 @@ def apply_dotter(self, acc, block, painter, template_acc): template_seq = template_record.seq colored_seq = '' - for codon_col_no, pos in enumerate(range(0, len(seq), 3)): + for codon_col, pos in enumerate(range(block.start, block.end, 3)): c = seq[pos:pos+3] if c == template_seq[pos:pos+3]: colored_seq += '...' else: - colored_seq += painter.colorizer(c, self.columns[block.start // 3 + codon_col_no]) + colored_seq += painter.colorizer(c, self.columns[pos]) return painter.sol() + colored_seq + painter.eol() @@ -296,11 +299,11 @@ def _summarize_columns(self): Specialization of base method for codon columns. Do not focus on the amino acids, but look at amino acid columns. ''' - columns = [] - for pos in range(0, self.al_length, 3): + columns = {} + for pos in range(self._start, self._end, 3): codon_column = map(lambda r: str(r.seq), self.al[:, pos:pos+3]) aa_column = map(lambda codon: self._translate(codon), codon_column) - columns.append(Counter(aa_column)) + columns[pos] = Counter(aa_column) return columns @@ -310,7 +313,7 @@ def get_conserved_block(self, n_columns): Used for alignment glimpses. Specialised for codon alignments. ''' - conservation = self.get_column_conservation() # A list of conservation scores. Want a maximised "window" + conservation = self.get_column_conservation() # A dict of conservation scores. Want a maximised "window" accumulated_conservation = [] acc = 0 for c in conservation: @@ -321,7 +324,7 @@ def get_conserved_block(self, n_columns): codon_al_width = math.floor(self.al_width() / 3) if codon_al_width <= n_columns: - return AlignmentBlock(0, codon_al_width) + return AlignmentBlock(0, self._end) else: best_start = max(range(0, codon_al_width - n_columns), key=lambda i: accumulated_conservation[i+n_columns] - accumulated_conservation[i]) diff --git a/alv/alignmentterminal.py b/alv/alignmentterminal.py index 1916dbc..e9d24fd 100644 --- a/alv/alignmentterminal.py +++ b/alv/alignmentterminal.py @@ -4,7 +4,6 @@ import shutil - class AlignmentTerminal: ''' This class encapsulates knowledge about the terminal and how to draw on it. diff --git a/alv/io.py b/alv/io.py index 251a345..ab9ab35 100644 --- a/alv/io.py +++ b/alv/io.py @@ -37,10 +37,12 @@ def guess_format(filename): return 'phylip' -def read_alignment(file, seqtype, input_format, color_scheme, genetic_code, alignment_no=0): +def read_alignment(file, seqtype, input_format, color_scheme, genetic_code, al_start=0, al_end=-1, alignment_no=0): ''' Factory function. Read the alignment with BioPython's support, and return an appropriate alv alignment. + + al_start and al_end: restrict the alignment to a subalignment defined by these indices ''' if file == '-': file = sys.stdin # Start reading from stdin if "magic filename" @@ -50,11 +52,11 @@ def read_alignment(file, seqtype, input_format, color_scheme, genetic_code, alig n_msas = len(alignments) if alignment_no >= n_msas: raise IOError(f'Alignment index too large, only {n_msas} alignment(s) in the file.') - return get_alv_objects(alignments[alignment_no], seqtype, color_scheme, genetic_code) + return get_alv_objects(alignments[alignment_no], seqtype, color_scheme, genetic_code, al_start, al_end) else: raise ValueError('No alignment in input') -def get_alv_objects(alignment, seqtype, color_scheme, genetic_code): +def get_alv_objects(alignment, seqtype, color_scheme, genetic_code, al_start, al_end): ''' Take the alignment object and return a suitable Alv alignment object, with respect to sequence type, and colorization object ("painter"). @@ -70,13 +72,13 @@ def get_alv_objects(alignment, seqtype, color_scheme, genetic_code): painter = AminoAcidPainter() if seqtype == 'aa': - return AminoAcidAlignment(alignment), painter + return AminoAcidAlignment(alignment, al_start, al_end), painter if seqtype == 'dna' or seqtype == 'rna': - return DnaAlignment(alignment), DnaPainter() + return DnaAlignment(alignment, al_start, al_end), DnaPainter() elif seqtype == 'codon': al = CodonAlignment(alignment) al.set_genetic_code(genetic_code) - return CodonAlignment(alignment), CodonPainter(painter) + return CodonAlignment(alignment, al_start, al_end), CodonPainter(painter) else: raise IOError(f'Unknown sequence type: "{seqtype}"') diff --git a/alv/version.py b/alv/version.py index 8adfee4..b280975 100644 --- a/alv/version.py +++ b/alv/version.py @@ -1 +1 @@ -__version__ = '1.7.2' +__version__ = '1.8.0' diff --git a/bin/alv b/bin/alv index 153c19d..4aa2a56 100755 --- a/bin/alv +++ b/bin/alv @@ -4,7 +4,7 @@ import alv import argparse import os import sys -from traceback import print_last +from traceback import print_exc from alv.version import __version__ from alv.alignmentterminal import AlignmentShellTerminal @@ -110,6 +110,8 @@ def setup_argument_parsing(): help='Comma-separated list of accessions. Sequences will be presented in this order. Also note that one can choose which sequences to present with this opion. Overrides -s and -si.') ordering_args.add_argument('-sm', '--select-matching', metavar='ACCESSION_PATTERN', type=str, help='Only show sequences with accessions containing ACCESSION_PATTERN.') + ordering_args.add_argument('-sa', '--sub-alignment', nargs=2, metavar='INT', type=int, + help='Only show alignment columns given by FROM and UPTO indices.') # Options for limiting colorization restriction_args = ap.add_argument_group('Restricting colorization') @@ -147,7 +149,11 @@ def input_and_option_adaption(args): format = 'fasta' # Hard guess, because a bit complicated when reading from pipe (sys.stdin) else: format = args.format - alignment, painter = io.read_alignment(args.infile, args.type, format, args.color_scheme, args.code, args.alignment_index) + if args.sub_alignment: # Are we restricting the alignment? + start_col, end_col = args.sub_alignment + else: + start_col, end_col = 0, -1 + alignment, painter = io.read_alignment(args.infile, args.type, format, args.color_scheme, args.code, start_col, end_col, args.alignment_index) return alignment, painter except KeyboardInterrupt: @@ -253,6 +259,7 @@ def main(): print('alv:', e, file=sys.stderr) ap.exit(3) except Exception as e: + print_exc() print('Alv bug! Please report!', file=sys.stderr) print(e, file=sys.stderr) ap.exit(2) diff --git a/setup.py b/setup.py index 17b7c74..e960f77 100644 --- a/setup.py +++ b/setup.py @@ -55,10 +55,10 @@ def read_version_string(filename): 'biopython>=1.70', 'colorama>=0.3.8', ], - classifiers=( + classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Topic :: Scientific/Engineering :: Bio-Informatics", - ), + ], )