Skip to content

Commit

Permalink
added fast phylip parser - issue #135
Browse files Browse the repository at this point in the history
  • Loading branch information
xflouris committed May 12, 2017
1 parent 31b25f6 commit 5991008
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 375 deletions.
13 changes: 9 additions & 4 deletions examples/newick-phylip-unrooted/newick-phylip-unrooted.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ int main(int argc, char * argv[])
{
unsigned int i;
unsigned int tip_nodes_count, inner_nodes_count, nodes_count, branch_count;
unsigned int sequence_count;
unsigned int matrix_count, ops_count;
unsigned int * matrix_indices;
double * branch_lengths;
Expand All @@ -104,7 +103,7 @@ int main(int argc, char * argv[])
/* we accept only two arguments - the newick tree (unrooted binary) and the
alignment in the form of FASTA reads */
if (argc != 3)
fatal(" syntax: %s [newick] [fasta]", argv[0]);
fatal(" syntax: %s [newick] [phylip]", argv[0]);

/* parse the unrooted binary tree in newick format, and store the number
of tip nodes in tip_nodes_count */
Expand Down Expand Up @@ -162,12 +161,18 @@ int main(int argc, char * argv[])
}

/* read PHYLIP alignment */
pll_msa_t * msa = pll_phylip_parse_msa(argv[2], &sequence_count);
pll_phylip_t * fd = pll_phylip_open(argv[2], pll_map_phylip);
if (!fd)
fatal(pll_errmsg);

pll_msa_t * msa = pll_phylip_parse_interleaved(fd);
if (!msa)
fatal(pll_errmsg);

pll_phylip_close(fd);

/* compress site patterns */
if (sequence_count != tip_nodes_count)
if ((unsigned int)(msa->count) != tip_nodes_count)
fatal("Number of sequences does not match number of leaves in tree");

#ifdef COMPRESS
Expand Down
11 changes: 8 additions & 3 deletions examples/parsimony/npr-pars.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ int main(int argc, char * argv[])
{
unsigned int i,j,n;
unsigned int tip_nodes_count, inner_nodes_count, nodes_count, branch_count;
unsigned int sequence_count;
unsigned int ops_count;
pll_parsimony_t * pars;
pll_pars_buildop_t * operations;
Expand Down Expand Up @@ -129,11 +128,17 @@ int main(int argc, char * argv[])
}

/* read PHYLIP alignment */
pll_msa_t * msa = pll_phylip_parse_msa(argv[2], &sequence_count);
pll_phylip_t * fd = pll_phylip_open(argv[2], pll_map_phylip);
if (!fd)
fatal(pll_errmsg);

pll_msa_t * msa = pll_phylip_parse_interleaved(fd);
if (!msa)
fatal(pll_errmsg);

if (sequence_count != tip_nodes_count)
pll_phylip_close(fd);

if ((unsigned int)(msa->count) != tip_nodes_count)
fatal("Number of sequences does not match number of leaves in tree");

/* create the PLL parsimony instance
Expand Down
5 changes: 2 additions & 3 deletions src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,12 @@ core_pmatrix.c \
core_likelihood.c \
parse_utree.y \
parse_rtree.y \
parse_phylip.y \
lex_utree.l \
lex_rtree.l \
lex_phylip.l \
fast_parsimony.c \
stepwise.c \
random.c
random.c \
phylip.c

libpll_la_CFLAGS = $(AM_CFLAGS)

Expand Down
96 changes: 0 additions & 96 deletions src/lex_phylip.l

This file was deleted.

31 changes: 30 additions & 1 deletion src/maps.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,16 +101,45 @@ const unsigned int pll_map_aa[256] =


/*
map for fasta parsing
maps for fasta and phylip parsing
legal symbols: ?*- 0123456789 abcdefghijklmnopqrstuvxyz (also upper case)
fatal symbols: period (.), ascii 0-8, ascii 14-31
silently stripped: tab (9), newline (10 and 13), vt (11), formfeed (12)
stripped: !"#$&'()+,/:;<=>@^_`æøåÆØŧ¨´
includes both amino acid and nucleotide sequences, adapt to nt only
TODO: Would be more suitable to create separate maps for parsing nt data,
aa data, binary, etc
*/

const unsigned int pll_map_phylip[256] =
{
/*
0=stripped, 1=legal, 2=fatal, 3=silently stripped
@ A B C D E F G H I J K L M N O
P Q R S T U V W X Y Z [ \ ] ^ _
*/

/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 2, 2, /* 0 */
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, /* 1 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, /* 2 */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, /* 3 */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 4 */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 5 */
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, /* 6 */
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, /* 7 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 8 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 9 */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /* F */
};
const unsigned int pll_map_fasta[256] =
{
/*
Expand Down
Loading

0 comments on commit 5991008

Please sign in to comment.