Skip to content

Commit

Permalink
nofilter option added to micca.api.msa.nast() and to msa command; ver…
Browse files Browse the repository at this point in the history
…sion updated to 1.4.0; documentation improved
  • Loading branch information
davidealbanese committed May 4, 2016
1 parent 6c589c0 commit fd42231
Show file tree
Hide file tree
Showing 8 changed files with 43 additions and 26 deletions.
6 changes: 6 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
CHANGES
=======

Version 1.4.0
-------------
* nofilter option added to micca.api.msa.nast() (do not remove positions which
are gaps in every sequenceces) and to the msa command (--nast-nofilter option);
* Documentation improved.

Version 1.3.0
-------------
* Swarm clustering algorithm added to micca otu;
Expand Down
10 changes: 4 additions & 6 deletions doc/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,18 @@
.. include:: ../../README.rst



.. toctree::
:maxdepth: 1
:caption: Getting Started

install
run
databases

.. toctree::
:maxdepth: 1
:caption: Tutorials

singleend
pairedend
phyloseq
Expand All @@ -43,8 +42,7 @@

.. Indices and tables
.. ==================
..
..
.. * :ref:`genindex`
.. * :ref:`modindex`
.. * :ref:`search`
12 changes: 6 additions & 6 deletions doc/source/otu.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ The :doc:`commands/otu` command returns in a single directory 5 files:
otus.fasta
FASTA containing the representative sequences (OTUs)::

>DENOVO1
>DENOVO1
GACGAACGCTGGCGGCGTGCCTAACACATGCAAGTCGAACGGGG...
>DENOVO2
GATGAACGCTAGCTACAGGCTTAACACATGCAAGTCGAGGGGCA...
Expand All @@ -40,8 +40,8 @@ The :doc:`commands/otu` command returns in a single directory 5 files:
ids::

DENOVO1 IS0AYJS04JQKIS;sample=Mw_01
DENOVO2 IS0AYJS04JL6RS;sample=Mw_01
DENOVO3 IS0AYJS04H4XNN;sample=Mw_01
DENOVO2 IS0AYJS04JL6RS;sample=Mw_01
DENOVO3 IS0AYJS04H4XNN;sample=Mw_01
...

hits.txt
Expand All @@ -50,12 +50,12 @@ The :doc:`commands/otu` command returns in a single directory 5 files:
identity (if available), see :ref:`otu-definition_identity`::

IS0AYJS04JE658;sample=Mw_01; IS0AYJS04I4XYN;sample=Mw_01 99.4
IS0AYJS04JPH34;sample=Mw_01; IS0AYJS04JVUBC;sample=Mw_01 98.0
IS0AYJS04I67XN;sample=Mw_01; IS0AYJS04JVUBC;sample=Mw_01 99.7
IS0AYJS04JPH34;sample=Mw_01; IS0AYJS04JVUBC;sample=Mw_01 98.0
IS0AYJS04I67XN;sample=Mw_01; IS0AYJS04JVUBC;sample=Mw_01 99.7
...

otuschim.fasta
(only for 'denovo_greedy' and 'open_ref' mathods, when
(only for 'denovo_greedy', 'denovo_swarm' and 'open_ref' mathods, when
``-c/--rmchim`` is specified) FASTA file containing the chimeric
otus.

Expand Down
1 change: 1 addition & 0 deletions docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ The RDP Classifier is included.
## Available Tags/Versions

- latest: GitHub snapshot (master)
- 1.4.0: micca 1.4.0 (RDP Classifier release 2.11 included)
- 1.3.0: micca 1.3.0 (RDP Classifier release 2.11 included)
- 1.2.2: micca 1.2.2 (RDP Classifier release 2.11 included)

Expand Down
2 changes: 1 addition & 1 deletion micca/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os

__version__ = "1.3.0"
__version__ = "1.4.0"

THIRDPARTY_BIN_PATH = os.path.join(os.path.dirname(__file__), "thirdparty_bin")
28 changes: 17 additions & 11 deletions micca/api/msa.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def _aln_to_seqs(aln, query, target):


def nast(input_fn, template_fn, output_fn, notaligned_fn=None, hits_fn=None,
ident=0.75, threads=1, mincov=0.75, strand="both"):
ident=0.75, threads=1, mincov=0.75, strand="both", nofilter=False):

output_dir = os.path.dirname(output_fn)

Expand Down Expand Up @@ -269,24 +269,30 @@ def nast(input_fn, template_fn, output_fn, notaligned_fn=None, hits_fn=None,
hits_temp_handle.close()
output_temp_handle.close()
hits_out_handle.close()
os.remove(hits_temp_fn)
os.remove(template_wg_temp_fn)

# remove columns which are gaps in every sequence
output_temp_handle = open(output_temp_fn, "rb")
output_handle = open(output_fn, "wb")
for title, seq in SimpleFastaParser(output_temp_handle):
seqout = "".join(np.array(list(seq))[msa_cov > 0])
output_handle.write(">{}\n{}\n".format(title, seqout))
output_temp_handle.close()
output_handle.close()
if nofilter:
os.rename(output_temp_fn, output_fn)
else:
output_temp_handle = open(output_temp_fn, "rb")
output_handle = open(output_fn, "wb")
for title, seq in SimpleFastaParser(output_temp_handle):
seqout = "".join(np.array(list(seq))[msa_cov > 0])
output_handle.write(">{}\n{}\n".format(title, seqout))
output_temp_handle.close()
output_handle.close()
os.remove(output_temp_fn)

if hits_fn is None:
os.remove(hits_out_fn)
else:
os.rename(hits_out_fn, hits_fn)

os.remove(template_wg_temp_fn)
os.remove(hits_temp_fn)
os.remove(output_temp_fn)





def muscle(input_fn, output_fn, maxiters=16):
Expand Down
7 changes: 6 additions & 1 deletion micca/cmds/msa.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ def main(argv):
help="write hits on a TAB delimited file with the "
"query sequence id, the template sequence id and "
"the identity.")
group_nast.add_argument('--nast-nofilter', default=False, action="store_true",
help="do not remove positions which are gaps in "
"every sequenceces (useful if you want to apply "
"a Lane mask filter before the tree inference).")
args = parser.parse_args(argv)


Expand All @@ -129,7 +133,8 @@ def main(argv):
ident=args.nast_id,
threads=args.nast_threads,
mincov=args.nast_mincov,
strand=args.nast_strand)
strand=args.nast_strand,
nofilter=args.nast_nofilter)
else:
micca.api.msa.muscle(
input_fn=args.input,
Expand Down
3 changes: 2 additions & 1 deletion micca/cmds/otu.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ def main(argv):
"open_ref"],
help="clustering method (default %(default)s)")
group.add_argument('-d', '--id', default=0.97, type=float,
help="sequence identity threshold (0.0 to 1.0, "
help="sequence identity threshold (for 'denovo_greedy', "
"'closed_ref' and 'open_ref', 0.0 to 1.0, "
"default %(default)s).")
group.add_argument('-n', '--mincov', default=0.75, type=float,
help="reject sequence if the fraction of alignment "
Expand Down

0 comments on commit fd42231

Please sign in to comment.