From 61b78f7d6e599788769cd7bb878bccada0e1f283 Mon Sep 17 00:00:00 2001 From: flouris Date: Thu, 30 Mar 2017 21:53:09 +0200 Subject: [PATCH] added wrapper tree structure, updated examples, minor fixes in unit tests and examples - issue #126 --- examples/lg4/lg4.c | 140 ++++++----- examples/load-utree/load-utree.c | 20 +- .../newick-fasta-unrooted.c | 118 ++++----- .../newick-phylip-unrooted.c | 105 ++++---- examples/newton/newton.c | 2 + examples/parsimony/npr-pars.c | 2 +- examples/partial-traversal/partial.c | 105 ++++---- examples/protein-list/protein-list.c | 109 +++++---- examples/stepwise/stepwise.c | 57 ++++- src/parse_utree.y | 230 ++++++++++++++---- src/pll.h | 88 ++++--- src/stepwise.c | 68 +++--- src/utree.c | 216 ++++++++++------ src/utree_moves.c | 28 +-- src/utree_svg.c | 138 +++++------ test/src/asc-bias.c | 64 ++--- test/src/common.c | 18 +- test/src/common.h | 4 +- test/src/partial-traversal.c | 74 +++--- 19 files changed, 944 insertions(+), 642 deletions(-) diff --git a/examples/lg4/lg4.c b/examples/lg4/lg4.c index 0ef97cb..d78962c 100644 --- a/examples/lg4/lg4.c +++ b/examples/lg4/lg4.c @@ -29,42 +29,49 @@ static void fatal(const char * format, ...) __attribute__ ((noreturn)); +static void * xmalloc(size_t size) +{ + void * t; + t = malloc(size); + if (!t) + fatal("Unable to allocate enough memory."); + + return t; +} + +static char * xstrdup(const char * s) +{ + size_t len = strlen(s); + char * p = (char *)xmalloc(len+1); + return strcpy(p,s); +} + /* a callback function for performing a full traversal */ -static int cb_full_traversal(pll_utree_t * node) +static int cb_full_traversal(pll_unode_t * node) { return 1; } -static void set_missing_branch_length_recursive(pll_utree_t * tree, - double length) +/* branch lengths not present in the newick file get a value of 0.000001 */ +static void set_missing_branch_length(pll_utree_t * tree, double length) { - if (tree) - { - /* set branch length to default if not set */ - if (!tree->length) - tree->length = length; - - if (tree->next) - { - if (!tree->next->length) - tree->next->length = length; + unsigned int i; - if (!tree->next->next->length) - tree->next->next->length = length; + for (i = 0; i < tree->tip_count; ++i) + if (!tree->nodes[i]->length) + tree->nodes[i]->length = length; - set_missing_branch_length_recursive(tree->next->back, length); - set_missing_branch_length_recursive(tree->next->next->back, length); - } + for (i = tree->tip_count; i < tree->tip_count + tree->inner_count; ++i) + { + if (!tree->nodes[i]->length) + tree->nodes[i]->length = length; + if (!tree->nodes[i]->next->length) + tree->nodes[i]->next->length = length; + if (!tree->nodes[i]->next->next->length) + tree->nodes[i]->next->next->length = length; } } -/* branch lengths not present in the newick file get a value of 0.000001 */ -static void set_missing_branch_length(pll_utree_t * tree, double length) -{ - set_missing_branch_length_recursive(tree, length); - set_missing_branch_length_recursive(tree->back, length); -} - static void fatal(const char * format, ...) { va_list argptr; @@ -84,7 +91,7 @@ int main(int argc, char * argv[]) double * branch_lengths; pll_partition_t * partition; pll_operation_t * operations; - pll_utree_t ** travbuffer; + pll_unode_t ** travbuffer; /* we accept only two arguments - the newick tree (unrooted binary) and the alignment in the form of FASTA reads */ @@ -93,13 +100,16 @@ int main(int argc, char * argv[]) /* parse the unrooted binary tree in newick format, and store the number of tip nodes in tip_nodes_count */ - pll_utree_t * tree = pll_utree_parse_newick(argv[1], &tip_nodes_count); + pll_utree_t * tree = pll_utree_parse_newick(argv[1]); + if (!tree) + fatal("%s", pll_errmsg); + + tip_nodes_count = tree->tip_count; /* fix all missing branch lengths (i.e. those that did not appear in the newick) to 0.000001 */ set_missing_branch_length(tree, 0.000001); - /* compute and show node count information */ inner_nodes_count = tip_nodes_count - 2; nodes_count = inner_nodes_count + tip_nodes_count; @@ -117,24 +127,23 @@ int main(int argc, char * argv[]) char * newick = pll_utree_export_newick(tree); printf("%s\n", newick); free(newick); - // */ - - /* obtain an array of pointers to tip nodes */ - pll_utree_t ** tipnodes = (pll_utree_t **)calloc(tip_nodes_count, - sizeof(pll_utree_t *)); - pll_utree_query_tipnodes(tree, tipnodes); + */ /* create a libc hash table of size tip_nodes_count */ hcreate(tip_nodes_count); /* populate a libc hash table with tree tip labels */ - unsigned int * data = (unsigned int *)malloc(tip_nodes_count * - sizeof(unsigned int)); + unsigned int * data = (unsigned int *)xmalloc(tip_nodes_count * + sizeof(unsigned int)); for (i = 0; i < tip_nodes_count; ++i) { - data[i] = tipnodes[i]->clv_index; + data[i] = tree->nodes[i]->clv_index; ENTRY entry; - entry.key = tipnodes[i]->label; +#ifdef __APPLE__ + entry.key = xstrdup(tree->nodes[i]->label); +#else + entry.key = tree->nodes[i]->label; +#endif entry.data = (void *)(data+i); hsearch(entry, ENTER); } @@ -229,7 +238,6 @@ int main(int argc, char * argv[]) /* we no longer need these two arrays (keys and values of hash table... */ free(data); - free(tipnodes); /* ...neither the sequences and the headers as they are already present in the form of probabilities in the tip CLVs */ @@ -243,17 +251,21 @@ int main(int argc, char * argv[]) /* allocate a buffer for storing pointers to nodes of the tree in postorder traversal */ - travbuffer = (pll_utree_t **)malloc(nodes_count * sizeof(pll_utree_t *)); + travbuffer = (pll_unode_t **)xmalloc(nodes_count * sizeof(pll_unode_t *)); - branch_lengths = (double *)malloc(branch_count * sizeof(double)); - matrix_indices = (unsigned int *)malloc(branch_count * sizeof(unsigned int)); - operations = (pll_operation_t *)malloc(inner_nodes_count * - sizeof(pll_operation_t)); + branch_lengths = (double *)xmalloc(branch_count * sizeof(double)); + matrix_indices = (unsigned int *)xmalloc(branch_count * sizeof(unsigned int)); + operations = (pll_operation_t *)xmalloc(inner_nodes_count * + sizeof(pll_operation_t)); - /* compute a partial traversal starting from the randomly selected - inner node */ + /* compute a partial postorder traversal starting from the inner node that + was the the root of the parsed 'unrooted' binary tree */ + + pll_unode_t * root = tree->nodes[tip_nodes_count+inner_nodes_count-1]; unsigned int traversal_size; - if (!pll_utree_traverse(tree, + + if (!pll_utree_traverse(root, + PLL_TREE_TRAVERSE_POSTORDER, cb_full_traversal, travbuffer, &traversal_size)) @@ -323,11 +335,11 @@ int main(int argc, char * argv[]) frequency vector is to be used */ double logl = pll_compute_edge_loglikelihood(partition, - tree->clv_index, - tree->scaler_index, - tree->back->clv_index, - tree->back->scaler_index, - tree->pmatrix_index, + root->clv_index, + root->scaler_index, + root->back->clv_index, + root->back->scaler_index, + root->pmatrix_index, params_indices, NULL); @@ -351,8 +363,8 @@ int main(int argc, char * argv[]) double weights[4] = { 0.209224645, 0.224707726, 0.277599198, 0.288468431 }; double rates[4] = { 0.498991136, 0.563680734, 0.808264032, 1.887769458 }; - pll_set_category_rates (partition, rates); - pll_set_category_weights (partition, weights); + pll_set_category_rates(partition, rates); + pll_set_category_weights(partition, weights); /* update transition probability matrices */ pll_update_prob_matrices(partition, @@ -374,11 +386,11 @@ int main(int argc, char * argv[]) pll_update_partials(partition, operations, ops_count); logl = pll_compute_edge_loglikelihood(partition, - tree->clv_index, - tree->scaler_index, - tree->back->clv_index, - tree->back->scaler_index, - tree->pmatrix_index, + root->clv_index, + root->scaler_index, + root->back->clv_index, + root->back->scaler_index, + root->pmatrix_index, params_indices, NULL); @@ -391,11 +403,11 @@ int main(int argc, char * argv[]) printf("\nLog-L (LG4X): %f\n", logl); logl = pll_compute_edge_loglikelihood(partition, - tree->back->clv_index, - tree->back->scaler_index, - tree->clv_index, - tree->scaler_index, - tree->pmatrix_index, + root->back->clv_index, + root->back->scaler_index, + root->clv_index, + root->scaler_index, + root->pmatrix_index, params_indices, NULL); diff --git a/examples/load-utree/load-utree.c b/examples/load-utree/load-utree.c index 2254731..b774c82 100644 --- a/examples/load-utree/load-utree.c +++ b/examples/load-utree/load-utree.c @@ -42,18 +42,26 @@ pll_utree_t * load_tree_unrooted(const char * filename, if (!(rtree = pll_rtree_parse_newick(filename, tip_count))) { - if (!(utree = pll_utree_parse_newick(filename, tip_count))) + if (!(utree = pll_utree_parse_newick(filename))) { fprintf(stderr, "%s\n", pll_errmsg); return NULL; } + else + { + *tip_count = utree->tip_count; + } } else { utree = pll_rtree_unroot(rtree); + pll_unode_t * root = utree->nodes[utree->tip_count+utree->inner_count-1]; + /* optional step if using default PLL clv/pmatrix index assignments */ - pll_utree_reset_template_indices(utree, *tip_count); + pll_utree_reset_template_indices(root, *tip_count); + + pll_rtree_destroy(rtree,NULL); } return utree; @@ -70,7 +78,13 @@ int main(int argc, char * argv[]) if (!utree) fatal("Tree must be a rooted or unrooted binary."); - char * newick = pll_utree_export_newick(utree); + /* select a random inner node */ + long int r = random() % utree->inner_count; + pll_unode_t * root = utree->nodes[utree->tip_count + r]; + + /* export the tree to newick format with the selected inner node as the root + of the unrooted binary tree */ + char * newick = pll_utree_export_newick(root); printf("%s\n", newick); diff --git a/examples/newick-fasta-unrooted/newick-fasta-unrooted.c b/examples/newick-fasta-unrooted/newick-fasta-unrooted.c index f0a915e..492a822 100644 --- a/examples/newick-fasta-unrooted/newick-fasta-unrooted.c +++ b/examples/newick-fasta-unrooted/newick-fasta-unrooted.c @@ -34,40 +34,47 @@ typedef struct int clv_valid; } node_info_t; +static void * xmalloc(size_t size) +{ + void * t; + t = malloc(size); + if (!t) + fatal("Unable to allocate enough memory."); + + return t; +} + +static char * xstrdup(const char * s) +{ + size_t len = strlen(s); + char * p = (char *)xmalloc(len+1); + return strcpy(p,s); +} + /* a callback function for performing a full traversal */ -static int cb_full_traversal(pll_utree_t * node) +static int cb_full_traversal(pll_unode_t * node) { return 1; } -static void set_missing_branch_length_recursive(pll_utree_t * tree, - double length) -{ - if (tree) - { - /* set branch length to default if not set */ - if (!tree->length) - tree->length = length; - - if (tree->next) - { - if (!tree->next->length) - tree->next->length = length; - - if (!tree->next->next->length) - tree->next->next->length = length; - - set_missing_branch_length_recursive(tree->next->back, length); - set_missing_branch_length_recursive(tree->next->next->back, length); - } - } -} - /* branch lengths not present in the newick file get a value of 0.000001 */ static void set_missing_branch_length(pll_utree_t * tree, double length) { - set_missing_branch_length_recursive(tree, length); - set_missing_branch_length_recursive(tree->back, length); + unsigned int i; + + for (i = 0; i < tree->tip_count; ++i) + if (!tree->nodes[i]->length) + tree->nodes[i]->length = length; + + for (i = tree->tip_count; i < tree->tip_count + tree->inner_count; ++i) + { + if (!tree->nodes[i]->length) + tree->nodes[i]->length = length; + if (!tree->nodes[i]->next->length) + tree->nodes[i]->next->length = length; + if (!tree->nodes[i]->next->next->length) + tree->nodes[i]->next->next->length = length; + } } static void fatal(const char * format, ...) @@ -89,7 +96,7 @@ int main(int argc, char * argv[]) double * branch_lengths; pll_partition_t * partition; pll_operation_t * operations; - pll_utree_t ** travbuffer; + pll_unode_t ** travbuffer; unsigned int params_indices[RATE_CATS] = {0,0,0,0}; /* we accept only two arguments - the newick tree (unrooted binary) and the @@ -99,10 +106,12 @@ int main(int argc, char * argv[]) /* parse the unrooted binary tree in newick format, and store the number of tip nodes in tip_nodes_count */ - pll_utree_t * tree = pll_utree_parse_newick(argv[1], &tip_nodes_count); + pll_utree_t * tree = pll_utree_parse_newick(argv[1]); if (!tree) fatal("Tree must be an unrooted binary tree"); + tip_nodes_count = tree->tip_count; + /* fix all missing branch lengths (i.e. those that did not appear in the newick) to 0.000001 */ set_missing_branch_length(tree, 0.000001); @@ -131,22 +140,21 @@ int main(int argc, char * argv[]) */ - /* obtain an array of pointers to tip nodes */ - pll_utree_t ** tipnodes = (pll_utree_t **)calloc(tip_nodes_count, - sizeof(pll_utree_t *)); - pll_utree_query_tipnodes(tree, tipnodes); - /* create a libc hash table of size tip_nodes_count */ hcreate(tip_nodes_count); /* populate a libc hash table with tree tip labels */ - unsigned int * data = (unsigned int *)malloc(tip_nodes_count * - sizeof(unsigned int)); + unsigned int * data = (unsigned int *)xmalloc(tip_nodes_count * + sizeof(unsigned int)); for (i = 0; i < tip_nodes_count; ++i) { - data[i] = tipnodes[i]->clv_index; + data[i] = tree->nodes[i]->clv_index; ENTRY entry; - entry.key = tipnodes[i]->label; +#ifdef __APPLE__ + entry.key = xstrdup(tree->nodes[i]->label); +#else + entry.key = tree->nodes[i]->label; +#endif entry.data = (void *)(data+i); hsearch(entry, ENTER); } @@ -264,7 +272,6 @@ int main(int argc, char * argv[]) /* we no longer need these two arrays (keys and values of hash table... */ free(data); - free(tipnodes); /* ...neither the sequences and the headers as they are already present in the form of probabilities in the tip CLVs */ @@ -279,17 +286,22 @@ int main(int argc, char * argv[]) /* allocate a buffer for storing pointers to nodes of the tree in postorder traversal */ - travbuffer = (pll_utree_t **)malloc(nodes_count * sizeof(pll_utree_t *)); - + travbuffer = (pll_unode_t **)xmalloc(nodes_count * sizeof(pll_unode_t *)); - branch_lengths = (double *)malloc(branch_count * sizeof(double)); - matrix_indices = (unsigned int *)malloc(branch_count * sizeof(unsigned int)); - operations = (pll_operation_t *)malloc(inner_nodes_count * - sizeof(pll_operation_t)); + branch_lengths = (double *)xmalloc(branch_count * sizeof(double)); + matrix_indices = (unsigned int *)xmalloc(branch_count * sizeof(unsigned int)); + operations = (pll_operation_t *)xmalloc(inner_nodes_count * + sizeof(pll_operation_t)); /* perform a postorder traversal of the unrooted tree */ + /* perform a postorder traversal starting from the inner node that + was the the root of the parsed 'unrooted' binary tree */ + + pll_unode_t * root = tree->nodes[tip_nodes_count+inner_nodes_count-1]; unsigned int traversal_size; - if (!pll_utree_traverse(tree, + + if (!pll_utree_traverse(root, + PLL_TREE_TRAVERSE_POSTORDER, cb_full_traversal, travbuffer, &traversal_size)) @@ -353,21 +365,15 @@ int main(int argc, char * argv[]) frequency vector is to be used */ double logl = pll_compute_edge_loglikelihood(partition, - tree->clv_index, - tree->scaler_index, - tree->back->clv_index, - tree->back->scaler_index, - tree->pmatrix_index, + root->clv_index, + root->scaler_index, + root->back->clv_index, + root->back->scaler_index, + root->pmatrix_index, params_indices, NULL); printf("Log-L: %f\n", logl); - /* output svg */ - pll_svg_attrib_t * attr = pll_svg_attrib_create(); - attr->width = 500; - pll_utree_export_svg(tree,tip_nodes_count,attr,"testing.svg"); - pll_svg_attrib_destroy(attr); - /* destroy all structures allocated for the concrete PLL partition instance */ pll_partition_destroy(partition); diff --git a/examples/newick-phylip-unrooted/newick-phylip-unrooted.c b/examples/newick-phylip-unrooted/newick-phylip-unrooted.c index 3493a54..826dfc2 100644 --- a/examples/newick-phylip-unrooted/newick-phylip-unrooted.c +++ b/examples/newick-phylip-unrooted/newick-phylip-unrooted.c @@ -27,6 +27,8 @@ #define STATES 4 #define RATE_CATS 4 +#define COMPRESS + static void fatal(const char * format, ...) __attribute__ ((noreturn)); typedef struct @@ -34,40 +36,47 @@ typedef struct int clv_valid; } node_info_t; +static void * xmalloc(size_t size) +{ + void * t; + t = malloc(size); + if (!t) + fatal("Unable to allocate enough memory."); + + return t; +} + +static char * xstrdup(const char * s) +{ + size_t len = strlen(s); + char * p = (char *)xmalloc(len+1); + return strcpy(p,s); +} + /* a callback function for performing a full traversal */ -static int cb_full_traversal(pll_utree_t * node) +static int cb_full_traversal(pll_unode_t * node) { return 1; } -static void set_missing_branch_length_recursive(pll_utree_t * tree, - double length) -{ - if (tree) - { - /* set branch length to default if not set */ - if (!tree->length) - tree->length = length; - - if (tree->next) - { - if (!tree->next->length) - tree->next->length = length; - - if (!tree->next->next->length) - tree->next->next->length = length; - - set_missing_branch_length_recursive(tree->next->back, length); - set_missing_branch_length_recursive(tree->next->next->back, length); - } - } -} - /* branch lengths not present in the newick file get a value of 0.000001 */ static void set_missing_branch_length(pll_utree_t * tree, double length) { - set_missing_branch_length_recursive(tree, length); - set_missing_branch_length_recursive(tree->back, length); + unsigned int i; + + for (i = 0; i < tree->tip_count; ++i) + if (!tree->nodes[i]->length) + tree->nodes[i]->length = length; + + for (i = tree->tip_count; i < tree->tip_count + tree->inner_count; ++i) + { + if (!tree->nodes[i]->length) + tree->nodes[i]->length = length; + if (!tree->nodes[i]->next->length) + tree->nodes[i]->next->length = length; + if (!tree->nodes[i]->next->next->length) + tree->nodes[i]->next->next->length = length; + } } static void fatal(const char * format, ...) @@ -90,7 +99,7 @@ int main(int argc, char * argv[]) double * branch_lengths; pll_partition_t * partition; pll_operation_t * operations; - pll_utree_t ** travbuffer; + pll_unode_t ** travbuffer; /* we accept only two arguments - the newick tree (unrooted binary) and the alignment in the form of FASTA reads */ @@ -99,10 +108,12 @@ int main(int argc, char * argv[]) /* parse the unrooted binary tree in newick format, and store the number of tip nodes in tip_nodes_count */ - pll_utree_t * tree = pll_utree_parse_newick(argv[1], &tip_nodes_count); + pll_utree_t * tree = pll_utree_parse_newick(argv[1]); if (!tree) fatal("Tree must be an unrooted binary tree"); + tip_nodes_count = tree->tip_count; + /* fix all missing branch lengths (i.e. those that did not appear in the newick) to 0.000001 */ set_missing_branch_length(tree, 0.000001); @@ -131,11 +142,6 @@ int main(int argc, char * argv[]) */ - /* obtain an array of pointers to tip nodes */ - pll_utree_t ** tipnodes = (pll_utree_t **)calloc(tip_nodes_count, - sizeof(pll_utree_t *)); - pll_utree_query_tipnodes(tree, tipnodes); - /* create a libc hash table of size tip_nodes_count */ hcreate(tip_nodes_count); @@ -144,9 +150,13 @@ int main(int argc, char * argv[]) sizeof(unsigned int)); for (i = 0; i < tip_nodes_count; ++i) { - data[i] = tipnodes[i]->clv_index; + data[i] = tree->nodes[i]->clv_index; ENTRY entry; - entry.key = tipnodes[i]->label; +#ifdef __APPLE__ + entry.key = xstrdup(tree->nodes[i]->label); +#else + entry.key = tree->nodes[i]->label; +#endif entry.data = (void *)(data+i); hsearch(entry, ENTER); } @@ -160,13 +170,14 @@ int main(int argc, char * argv[]) if (sequence_count != tip_nodes_count) fatal("Number of sequences does not match number of leaves in tree"); +#ifdef COMPRESS printf("Original sequence (alignment) length : %d\n", msa->length); unsigned int * weight = pll_compress_site_patterns(msa->sequence, pll_map_nt, tip_nodes_count, &(msa->length)); printf("Number of unique site patterns: %d\n", msa->length); - +#endif /* create the PLL partition instance @@ -215,9 +226,11 @@ int main(int argc, char * argv[]) /* set rate categories */ pll_set_category_rates(partition, rate_cats); +#ifdef COMPRESS /* set pattern weights and free the weights array */ pll_set_pattern_weights(partition, weight); free(weight); +#endif /* find sequences in hash table and link them with the corresponding taxa */ for (i = 0; i < tip_nodes_count; ++i) @@ -244,11 +257,10 @@ int main(int argc, char * argv[]) /* we no longer need these two arrays (keys and values of hash table... */ free(data); - free(tipnodes); /* allocate a buffer for storing pointers to nodes of the tree in postorder traversal */ - travbuffer = (pll_utree_t **)malloc(nodes_count * sizeof(pll_utree_t *)); + travbuffer = (pll_unode_t **)malloc(nodes_count * sizeof(pll_unode_t *)); branch_lengths = (double *)malloc(branch_count * sizeof(double)); @@ -257,8 +269,11 @@ int main(int argc, char * argv[]) sizeof(pll_operation_t)); /* perform a postorder traversal of the unrooted tree */ + pll_unode_t * root = tree->nodes[tip_nodes_count+inner_nodes_count-1]; unsigned int traversal_size; - if (!pll_utree_traverse(tree, + + if (!pll_utree_traverse(root, + PLL_TREE_TRAVERSE_POSTORDER, cb_full_traversal, travbuffer, &traversal_size)) @@ -325,15 +340,15 @@ int main(int argc, char * argv[]) whose frequency vector is to be used for each rate category */ double logl = pll_compute_edge_loglikelihood(partition, - tree->clv_index, - tree->scaler_index, - tree->back->clv_index, - tree->back->scaler_index, - tree->pmatrix_index, + root->clv_index, + root->scaler_index, + root->back->clv_index, + root->back->scaler_index, + root->pmatrix_index, params_indices, NULL); - printf("Log-L: %f\n", logl); + printf("Log-L: %.15f\n", logl); /* destroy all structures allocated for the concrete PLL partition instance */ pll_partition_destroy(partition); diff --git a/examples/newton/newton.c b/examples/newton/newton.c index 7c8d3d8..e2bf5f5 100644 --- a/examples/newton/newton.c +++ b/examples/newton/newton.c @@ -55,6 +55,8 @@ static double newton(pll_partition_t * partition, pll_update_sumtable(partition, parent_clv_index, child_clv_index, + parent_scaler_index, + child_scaler_index, params_indices, sumtable); diff --git a/examples/parsimony/npr-pars.c b/examples/parsimony/npr-pars.c index 3292dcd..110be33 100644 --- a/examples/parsimony/npr-pars.c +++ b/examples/parsimony/npr-pars.c @@ -265,7 +265,7 @@ int main(int argc, char * argv[]) free(recops); /* we will no longer need the tree structure */ - pll_rtree_destroy(tree); + pll_rtree_destroy(tree,NULL); /* destroy the parsimony structure */ pll_parsimony_destroy(pars); diff --git a/examples/partial-traversal/partial.c b/examples/partial-traversal/partial.c index 2c0e385..db0ea51 100644 --- a/examples/partial-traversal/partial.c +++ b/examples/partial-traversal/partial.c @@ -35,6 +35,22 @@ typedef struct int clv_valid; } node_info_t; +static void * xmalloc(size_t size) +{ + void * t; + t = malloc(size); + if (!t) + fatal("Unable to allocate enough memory."); + + return t; +} + +static char * xstrdup(const char * s) +{ + size_t len = strlen(s); + char * p = (char *)xmalloc(len+1); + return strcpy(p,s); +} /* call-back function to destroy the data element of each node */ static void cb_data_destroy(void * data) { @@ -42,7 +58,7 @@ static void cb_data_destroy(void * data) } /* a callback function for performing a partial traversal */ -static int cb_partial_traversal(pll_utree_t * node) +static int cb_partial_traversal(pll_unode_t * node) { node_info_t * node_info; @@ -87,34 +103,24 @@ static int cb_partial_traversal(pll_utree_t * node) return 1; } -static void set_missing_branch_length_recursive(pll_utree_t * tree, - double length) -{ - if (tree) - { - /* set branch length to default if not set */ - if (!tree->length) - tree->length = length; - - if (tree->next) - { - if (!tree->next->length) - tree->next->length = length; - - if (!tree->next->next->length) - tree->next->next->length = length; - - set_missing_branch_length_recursive(tree->next->back, length); - set_missing_branch_length_recursive(tree->next->next->back, length); - } - } -} - /* branch lengths not present in the newick file get a value of 0.000001 */ static void set_missing_branch_length(pll_utree_t * tree, double length) { - set_missing_branch_length_recursive(tree, length); - set_missing_branch_length_recursive(tree->back, length); + unsigned int i; + + for (i = 0; i < tree->tip_count; ++i) + if (!tree->nodes[i]->length) + tree->nodes[i]->length = length; + + for (i = tree->tip_count; i < tree->tip_count + tree->inner_count; ++i) + { + if (!tree->nodes[i]->length) + tree->nodes[i]->length = length; + if (!tree->nodes[i]->next->length) + tree->nodes[i]->next->length = length; + if (!tree->nodes[i]->next->next->length) + tree->nodes[i]->next->next->length = length; + } } static void fatal(const char * format, ...) @@ -136,8 +142,8 @@ int main(int argc, char * argv[]) double * branch_lengths; pll_partition_t * partition; pll_operation_t * operations; - pll_utree_t ** travbuffer; - pll_utree_t ** inner_nodes_list; + pll_unode_t ** travbuffer; + pll_unode_t ** inner_nodes_list; /* we accept only two arguments - the newick tree (unrooted binary) and the alignment in the form of FASTA reads */ @@ -146,10 +152,12 @@ int main(int argc, char * argv[]) /* parse the unrooted binary tree in newick format, and store the number of tip nodes in tip_nodes_count */ - pll_utree_t * tree = pll_utree_parse_newick(argv[1], &tip_nodes_count); + pll_utree_t * tree = pll_utree_parse_newick(argv[1]); if (!tree) fatal("Tree must be an unrooted binary tree"); + tip_nodes_count = tree->tip_count; + /* fix all missing branch lengths (i.e. those that did not appear in the newick) to 0.000001 */ set_missing_branch_length(tree, 0.000001); @@ -179,22 +187,21 @@ int main(int argc, char * argv[]) */ - /* obtain an array of pointers to tip nodes */ - pll_utree_t ** tipnodes = (pll_utree_t **)calloc(tip_nodes_count, - sizeof(pll_utree_t *)); - pll_utree_query_tipnodes(tree, tipnodes); - /* create a libc hash table of size tip_nodes_count */ hcreate(tip_nodes_count); /* populate a libc hash table with tree tip labels */ - unsigned int * data = (unsigned int *)malloc(tip_nodes_count * - sizeof(unsigned int)); + unsigned int * data = (unsigned int *)xmalloc(tip_nodes_count * + sizeof(unsigned int)); for (i = 0; i < tip_nodes_count; ++i) { - data[i] = tipnodes[i]->clv_index; + data[i] = tree->nodes[i]->clv_index; ENTRY entry; - entry.key = tipnodes[i]->label; +#ifdef __APPLE__ + entry.key = xstrdup(tree->nodes[i]->label); +#else + entry.key = tree->nodes[i]->label; +#endif entry.data = (void *)(data+i); hsearch(entry, ENTER); } @@ -312,7 +319,6 @@ int main(int argc, char * argv[]) /* we no longer need these two arrays (keys and values of hash table... */ free(data); - free(tipnodes); /* ...neither the sequences and the headers as they are already present in the form of probabilities in the tip CLVs */ @@ -327,18 +333,20 @@ int main(int argc, char * argv[]) /* allocate a buffer for storing pointers to nodes of the tree in postorder traversal */ - travbuffer = (pll_utree_t **)malloc(nodes_count * sizeof(pll_utree_t *)); + travbuffer = (pll_unode_t **)xmalloc(nodes_count * sizeof(pll_unode_t *)); - branch_lengths = (double *)malloc(branch_count * sizeof(double)); - matrix_indices = (unsigned int *)malloc(branch_count * sizeof(unsigned int)); - operations = (pll_operation_t *)malloc(inner_nodes_count * - sizeof(pll_operation_t)); + branch_lengths = (double *)xmalloc(branch_count * sizeof(double)); + matrix_indices = (unsigned int *)xmalloc(branch_count * sizeof(unsigned int)); + operations = (pll_operation_t *)xmalloc(inner_nodes_count * + sizeof(pll_operation_t)); /* get inner nodes */ - inner_nodes_list = (pll_utree_t **)malloc(inner_nodes_count * - sizeof(pll_utree_t *)); - pll_utree_query_innernodes(tree, inner_nodes_list); + inner_nodes_list = (pll_unode_t **)xmalloc(inner_nodes_count * + sizeof(pll_unode_t *)); + memcpy(inner_nodes_list, + tree->nodes+tip_nodes_count, + inner_nodes_count*sizeof(pll_unode_t *)); /* get random directions for each inner node */ @@ -357,12 +365,13 @@ int main(int argc, char * argv[]) { /* randomly select an inner node */ r = (unsigned int)rand() % inner_nodes_count; - pll_utree_t * node = inner_nodes_list[r]; + pll_unode_t * node = inner_nodes_list[r]; /* compute a partial traversal starting from the randomly selected inner node */ unsigned int traversal_size; if (!pll_utree_traverse(node, + PLL_TREE_TRAVERSE_POSTORDER, cb_partial_traversal, travbuffer, &traversal_size)) diff --git a/examples/protein-list/protein-list.c b/examples/protein-list/protein-list.c index aeb1038..fceadce 100644 --- a/examples/protein-list/protein-list.c +++ b/examples/protein-list/protein-list.c @@ -30,6 +30,23 @@ static void fatal(const char * format, ...) __attribute__ ((noreturn)); +static void * xmalloc(size_t size) +{ + void * t; + t = malloc(size); + if (!t) + fatal("Unable to allocate enough memory."); + + return t; +} + +static char * xstrdup(const char * s) +{ + size_t len = strlen(s); + char * p = (char *)xmalloc(len+1); + return strcpy(p,s); +} + static const double * protein_models_rates_list[PROT_MODELS_COUNT] = { pll_aa_rates_dayhoff, @@ -100,39 +117,29 @@ static const char * protein_models_names_list[PROT_MODELS_COUNT] = }; /* a callback function for performing a full traversal */ -static int cb_full_traversal(pll_utree_t * node) +static int cb_full_traversal(pll_unode_t * node) { return 1; } -static void set_missing_branch_length_recursive(pll_utree_t * tree, - double length) -{ - if (tree) - { - /* set branch length to default if not set */ - if (!tree->length) - tree->length = length; - - if (tree->next) - { - if (!tree->next->length) - tree->next->length = length; - - if (!tree->next->next->length) - tree->next->next->length = length; - - set_missing_branch_length_recursive(tree->next->back, length); - set_missing_branch_length_recursive(tree->next->next->back, length); - } - } -} - /* branch lengths not present in the newick file get a value of 0.000001 */ static void set_missing_branch_length(pll_utree_t * tree, double length) { - set_missing_branch_length_recursive(tree, length); - set_missing_branch_length_recursive(tree->back, length); + unsigned int i; + + for (i = 0; i < tree->tip_count; ++i) + if (!tree->nodes[i]->length) + tree->nodes[i]->length = length; + + for (i = tree->tip_count; i < tree->tip_count + tree->inner_count; ++i) + { + if (!tree->nodes[i]->length) + tree->nodes[i]->length = length; + if (!tree->nodes[i]->next->length) + tree->nodes[i]->next->length = length; + if (!tree->nodes[i]->next->next->length) + tree->nodes[i]->next->next->length = length; + } } static void fatal(const char * format, ...) @@ -154,7 +161,7 @@ int main(int argc, char * argv[]) double * branch_lengths; pll_partition_t * partition; pll_operation_t * operations; - pll_utree_t ** travbuffer; + pll_unode_t ** travbuffer; /* we accept only two arguments - the newick tree (unrooted binary) and the alignment in the form of FASTA reads */ @@ -163,7 +170,11 @@ int main(int argc, char * argv[]) /* parse the unrooted binary tree in newick format, and store the number of tip nodes in tip_nodes_count */ - pll_utree_t * tree = pll_utree_parse_newick(argv[1], &tip_nodes_count); + pll_utree_t * tree = pll_utree_parse_newick(argv[1]); + if (!tree) + fatal("Tree must be an unrooted binary tree"); + + tip_nodes_count = tree->tip_count; /* fix all missing branch lengths (i.e. those that did not appear in the newick) to 0.000001 */ @@ -194,22 +205,21 @@ int main(int argc, char * argv[]) */ - /* obtain an array of pointers to tip nodes */ - pll_utree_t ** tipnodes = (pll_utree_t **)calloc(tip_nodes_count, - sizeof(pll_utree_t *)); - pll_utree_query_tipnodes(tree, tipnodes); - /* create a libc hash table of size tip_nodes_count */ hcreate(tip_nodes_count); /* populate a libc hash table with tree tip labels */ - unsigned int * data = (unsigned int *)malloc(tip_nodes_count * + unsigned int * data = (unsigned int *)xmalloc(tip_nodes_count * sizeof(unsigned int)); for (i = 0; i < tip_nodes_count; ++i) { - data[i] = tipnodes[i]->clv_index; + data[i] = tree->nodes[i]->clv_index; ENTRY entry; - entry.key = tipnodes[i]->label; +#ifdef __APPLE__ + entry.key = xstrdup(tree->nodes[i]->label); +#else + entry.key = tree->nodes[i]->label; +#endif entry.data = (void *)(data+i); hsearch(entry, ENTER); } @@ -304,7 +314,6 @@ int main(int argc, char * argv[]) /* we no longer need these two arrays (keys and values of hash table... */ free(data); - free(tipnodes); /* ...neither the sequences and the headers as they are already present in the form of probabilities in the tip CLVs */ @@ -318,18 +327,20 @@ int main(int argc, char * argv[]) /* allocate a buffer for storing pointers to nodes of the tree in postorder traversal */ - travbuffer = (pll_utree_t **)malloc(nodes_count * sizeof(pll_utree_t *)); + travbuffer = (pll_unode_t **)xmalloc(nodes_count * sizeof(pll_unode_t *)); - - branch_lengths = (double *)malloc(branch_count * sizeof(double)); - matrix_indices = (unsigned int *)malloc(branch_count * sizeof(unsigned int)); - operations = (pll_operation_t *)malloc(inner_nodes_count * - sizeof(pll_operation_t)); + branch_lengths = (double *)xmalloc(branch_count * sizeof(double)); + matrix_indices = (unsigned int *)xmalloc(branch_count * sizeof(unsigned int)); + operations = (pll_operation_t *)xmalloc(inner_nodes_count * + sizeof(pll_operation_t)); /* compute a partial traversal starting from the randomly selected inner node */ + pll_unode_t * root = tree->nodes[tip_nodes_count+inner_nodes_count-1]; unsigned int traversal_size; - if (!pll_utree_traverse(tree, + + if (!pll_utree_traverse(root, + PLL_TREE_TRAVERSE_POSTORDER, cb_full_traversal, travbuffer, &traversal_size)) @@ -412,11 +423,11 @@ int main(int argc, char * argv[]) whose frequency vector is to be used for each rate category */ double logl = pll_compute_edge_loglikelihood(partition, - tree->clv_index, - tree->scaler_index, - tree->back->clv_index, - tree->back->scaler_index, - tree->pmatrix_index, + root->clv_index, + root->scaler_index, + root->back->clv_index, + root->back->scaler_index, + root->pmatrix_index, params_indices, NULL); diff --git a/examples/stepwise/stepwise.c b/examples/stepwise/stepwise.c index d1698ac..08c2bfd 100644 --- a/examples/stepwise/stepwise.c +++ b/examples/stepwise/stepwise.c @@ -24,7 +24,6 @@ #include #include -#define STATES 4 #define RATE_CATS 4 static void fatal(const char * format, ...) __attribute__ ((noreturn)); @@ -44,17 +43,52 @@ int main(int argc, char * argv[]) unsigned int i; unsigned int tip_nodes_count, inner_nodes_count, nodes_count, branch_count; pll_partition_t * partition; + unsigned int arch = PLL_ATTRIB_ARCH_CPU; /* we accept only two arguments - the newick tree (unrooted binary) and the alignment in the form of FASTA reads */ - if (argc != 3) - fatal(" syntax: %s [fasta] [seed]", argv[0]); + if (argc != 5) + fatal(" syntax: %s [fasta] [seed] [attrib] [states]", argv[0]); /* open FASTA file */ pll_fasta_t * fp = pll_fasta_open(argv[1], pll_map_fasta); if (!fp) fatal("Error opening file %s", argv[1]); + if (!strcasecmp(argv[3], "avx")) + arch = PLL_ATTRIB_ARCH_AVX; + else if (!strcasecmp(argv[3], "avx2")) + arch = PLL_ATTRIB_ARCH_AVX2; + else if (!strcasecmp(argv[3], "sse")) + arch = PLL_ATTRIB_ARCH_SSE; + else if (!strcasecmp(argv[3], "tpavx")) + arch = PLL_ATTRIB_ARCH_AVX | PLL_ATTRIB_PATTERN_TIP; + else if (!strcasecmp(argv[3], "tpavx2")) + arch = PLL_ATTRIB_ARCH_AVX2 | PLL_ATTRIB_PATTERN_TIP; + else if (!strcasecmp(argv[3], "tpsse")) + arch = PLL_ATTRIB_ARCH_SSE | PLL_ATTRIB_PATTERN_TIP; + else if (!strcasecmp(argv[3], "tpcpu")) + arch = PLL_ATTRIB_ARCH_CPU| PLL_ATTRIB_PATTERN_TIP; + + printf("Setting flags:\n"); + if (arch & PLL_ATTRIB_ARCH_CPU) + printf("\tPLL_ATTRIB_ARCH_CPU\n"); + if (arch & PLL_ATTRIB_ARCH_SSE) + printf("\tPLL_ATTRIB_ARCH_SSE\n"); + if (arch & PLL_ATTRIB_ARCH_AVX) + printf("\tPLL_ATTRIB_ARCH_AVX\n"); + if (arch & PLL_ATTRIB_ARCH_AVX2) + printf("\tPLL_ATTRIB_ARCH_AVX2\n"); + if (arch & PLL_ATTRIB_PATTERN_TIP) + printf("\tPLL_ATTRIB_PATTERN_TIP\n"); + + unsigned int states = atoi(argv[4]); + unsigned const int * map = pll_map_nt; + if (states == 20) + { + map = pll_map_aa; + } + char * seq = NULL; char * hdr = NULL; long seqlen; @@ -121,7 +155,7 @@ int main(int argc, char * argv[]) printf("Number of sites: %d\n", sites); unsigned int * weight = pll_compress_site_patterns(seqdata, - pll_map_nt, + map, tip_nodes_count, &sites); printf("Number of unique sites: %d\n", sites); @@ -143,13 +177,13 @@ int main(int argc, char * argv[]) partition = pll_partition_create(tip_nodes_count, inner_nodes_count, - STATES, + states, (unsigned int)sites, 1, branch_count, RATE_CATS, inner_nodes_count, - PLL_ATTRIB_ARCH_CPU); + arch); /* set pattern weights and free the weights array */ @@ -158,7 +192,7 @@ int main(int argc, char * argv[]) /* find sequences in hash table and link them with the corresponding taxa */ for (i = 0; i < tip_nodes_count; ++i) - pll_set_tip_states(partition, i, pll_map_nt, seqdata[i]); + pll_set_tip_states(partition, i, map, seqdata[i]); unsigned int score; @@ -167,7 +201,14 @@ int main(int argc, char * argv[]) pll_utree_t * tree = pll_fastparsimony_stepwise(&parsimony, headers, &score,1,atoi(argv[2])); printf("Score: %u\n", score); - char * newick = pll_utree_export_newick(tree); + + /* select a random inner node */ + long int r = random() % tree->inner_count; + pll_unode_t * root = tree->nodes[tree->tip_count + r]; + + /* export the tree to newick format with the selected inner node as the root + of the unrooted binary tree */ + char * newick = pll_utree_export_newick(root); printf("%s\n", newick); free(newick); diff --git a/src/parse_utree.y b/src/parse_utree.y index a0416de..3dae6f3 100644 --- a/src/parse_utree.y +++ b/src/parse_utree.y @@ -34,7 +34,7 @@ extern void pll_utree__delete_buffer(struct pll_utree_buffer_state * buffer); static unsigned int tip_cnt = 0; -static void dealloc_data(pll_utree_t * node, void (*cb_destroy)(void *)) +static void dealloc_data(pll_unode_t * node, void (*cb_destroy)(void *)) { if (node->data) { @@ -43,7 +43,7 @@ static void dealloc_data(pll_utree_t * node, void (*cb_destroy)(void *)) } } -static void dealloc_tree_recursive(pll_utree_t * node, +static void dealloc_graph_recursive(pll_unode_t * node, void (*cb_destroy)(void *)) { if (!node->next) @@ -53,8 +53,8 @@ static void dealloc_tree_recursive(pll_utree_t * node, return; } - dealloc_tree_recursive(node->next->back, cb_destroy); - dealloc_tree_recursive(node->next->next->back, cb_destroy); + dealloc_graph_recursive(node->next->back, cb_destroy); + dealloc_graph_recursive(node->next->next->back, cb_destroy); /* deallocate any extra data */ dealloc_data(node,cb_destroy); @@ -67,8 +67,8 @@ static void dealloc_tree_recursive(pll_utree_t * node, free(node); } -PLL_EXPORT void pll_utree_destroy(pll_utree_t * root, - void (*cb_destroy)(void *)) +PLL_EXPORT void pll_utree_graph_destroy(pll_unode_t * root, + void (*cb_destroy)(void *)) { if (!root) return; if (!(root->next)) @@ -79,11 +79,11 @@ PLL_EXPORT void pll_utree_destroy(pll_utree_t * root, } if (root->next) - dealloc_tree_recursive(root->next->back,cb_destroy); + dealloc_graph_recursive(root->next->back,cb_destroy); if (root->next->next) - dealloc_tree_recursive(root->next->next->back,cb_destroy); + dealloc_graph_recursive(root->next->next->back,cb_destroy); if (root->back) - dealloc_tree_recursive(root->back,cb_destroy); + dealloc_graph_recursive(root->back,cb_destroy); /* deallocate any extra data */ dealloc_data(root,cb_destroy); @@ -97,7 +97,43 @@ PLL_EXPORT void pll_utree_destroy(pll_utree_t * root, free(root); } -static void pll_utree_error(pll_utree_t * tree, const char * s) +PLL_EXPORT void pll_utree_destroy(pll_utree_t * tree, + void (*cb_destroy)(void *)) +{ + unsigned int i; + pll_unode_t * node; + + /* deallocate tip nodes */ + for (i = 0; i < tree->tip_count; ++i) + { + if (tree->nodes[i]->label) + free(tree->nodes[i]->label); + free(tree->nodes[i]); + } + + /* deallocate inner nodes */ + for (i = tree->tip_count; i < tree->tip_count + tree->inner_count; ++i) + { + node = tree->nodes[i]; + + dealloc_data(node, cb_destroy); + dealloc_data(node->next, cb_destroy); + dealloc_data(node->next->next, cb_destroy); + + if (node->label) + free(node->label); + + free(node->next->next); + free(node->next); + free(node); + } + + /* deallocate tree structure */ + free(tree->nodes); + free(tree); +} + +static void pll_utree_error(pll_unode_t * node, const char * s) { pll_errno = PLL_ERROR_NEWICK_SYNTAX; if (pll_utree_colstart == pll_utree_colend) @@ -114,12 +150,12 @@ static void pll_utree_error(pll_utree_t * tree, const char * s) { char * s; char * d; - struct pll_utree * tree; + struct pll_unode_s * tree; } %error-verbose -%parse-param {struct pll_utree * tree} -%destructor { pll_utree_destroy($$,NULL); } subtree +%parse-param {struct pll_unode_s * tree} +%destructor { pll_utree_graph_destroy($$,NULL); } subtree %destructor { free($$); } STRING %destructor { free($$); } NUMBER %destructor { free($$); } label @@ -139,12 +175,11 @@ static void pll_utree_error(pll_utree_t * tree, const char * s) input: OPAR subtree COMMA subtree COMMA subtree CPAR optional_label optional_length SEMICOLON { - tree->next = (pll_utree_t *)calloc(1, sizeof(pll_utree_t)); + tree->next = (pll_unode_t *)calloc(1, sizeof(pll_unode_t)); - tree->next->next = (pll_utree_t *)calloc(1, sizeof(pll_utree_t)); + tree->next->next = (pll_unode_t *)calloc(1, sizeof(pll_unode_t)); tree->next->next->next = tree; - tree->back = $2; tree->next->back = $4; tree->next->next->back = $6; @@ -166,11 +201,11 @@ input: OPAR subtree COMMA subtree COMMA subtree CPAR optional_label optional_len subtree: OPAR subtree COMMA subtree CPAR optional_label optional_length { - $$ = (pll_utree_t *)calloc(1, sizeof(pll_utree_t)); + $$ = (pll_unode_t *)calloc(1, sizeof(pll_unode_t)); - $$->next = (pll_utree_t *)calloc(1, sizeof(pll_utree_t)); + $$->next = (pll_unode_t *)calloc(1, sizeof(pll_unode_t)); - $$->next->next = (pll_utree_t *)calloc(1, sizeof(pll_utree_t)); + $$->next->next = (pll_unode_t *)calloc(1, sizeof(pll_unode_t)); $$->next->next->next = $$; @@ -192,7 +227,7 @@ subtree: OPAR subtree COMMA subtree CPAR optional_label optional_length } | label optional_length { - $$ = (pll_utree_t *)calloc(1, sizeof(pll_utree_t)); + $$ = (pll_unode_t *)calloc(1, sizeof(pll_unode_t)); $$->label = $1; $$->length = $2 ? atof($2) : 0; @@ -209,7 +244,7 @@ number: NUMBER { $$=$1;}; %% -static void recursive_assign_indices(pll_utree_t * node, +static void recursive_assign_indices(pll_unode_t * node, unsigned int * tip_clv_index, unsigned int * inner_clv_index, int * inner_scaler_index, @@ -258,7 +293,7 @@ static void recursive_assign_indices(pll_utree_t * node, *inner_node_index = *inner_node_index + 3; } -PLL_EXPORT void pll_utree_reset_template_indices(pll_utree_t * node, +PLL_EXPORT void pll_utree_reset_template_indices(pll_unode_t * node, unsigned int tip_count) { unsigned int tip_clv_index = 0; @@ -301,28 +336,133 @@ PLL_EXPORT void pll_utree_reset_template_indices(pll_utree_t * node, node->next->next->pmatrix_index = node->next->next->back->pmatrix_index; } -PLL_EXPORT pll_utree_t * pll_utree_parse_newick(const char * filename, - unsigned int * tip_count) +static void fill_nodes_recursive(pll_unode_t * node, + pll_unode_t ** array, + unsigned int * tip_index, + unsigned int * inner_index) +{ + if (!node->next) + { + array[*tip_index] = node; + *tip_index = *tip_index + 1; + return; + } + + fill_nodes_recursive(node->next->back, array, tip_index, inner_index); + fill_nodes_recursive(node->next->next->back, array, tip_index, inner_index); + + array[*inner_index] = node; + *inner_index = *inner_index + 1; +} + +static unsigned int utree_count_tips_recursive(pll_unode_t * node) +{ + unsigned int count = 0; + + if (!node->next) + return 1; + + count += utree_count_tips_recursive(node->next->back); + count += utree_count_tips_recursive(node->next->next->back); + + return count; +} + +static unsigned int utree_count_tips(pll_unode_t * root) +{ + unsigned int count = 0; + + if (!root->next && !root->back->next) + return 0; + + if (!root->next) + root = root->back; + + count += utree_count_tips_recursive(root->back); + count += utree_count_tips_recursive(root->next->back); + count += utree_count_tips_recursive(root->next->next->back); + + return count; +} + +/* wraps/encalupsates the unrooted tree graph into a tree structure + that contains a list of nodes, number of tips and number of inner + nodes. If 0 is passed as tip_count, then an additional recrursion + of the tree structure is done to detect the number of tips */ +PLL_EXPORT pll_utree_t * pll_utree_wraptree(pll_unode_t * root, + unsigned int tip_count) { - struct pll_utree * tree; + pll_utree_t * tree = (pll_utree_t *)malloc(sizeof(pll_utree_t)); + if (!tree) + { + snprintf(pll_errmsg, 200, "Unable to allocate enough memory."); + pll_errno = PLL_ERROR_MEM_ALLOC; + return PLL_FAILURE; + } + + if (tip_count < 3 && tip_count != 0) + { + snprintf(pll_errmsg, 200, "Invalid tip_count value (%u).", tip_count); + pll_errno = PLL_ERROR_PARAM_INVALID; + return PLL_FAILURE; + } + + if (tip_count == 0) + { + tip_count = utree_count_tips(root); + if (!tip_count) + { + snprintf(pll_errmsg, 200, "Input tree contains no inner nodes."); + pll_errno = PLL_ERROR_PARAM_INVALID; + return PLL_FAILURE; + } + } + + tree->nodes = (pll_unode_t **)malloc((2*tip_count-2)*sizeof(pll_unode_t *)); + if (!tree->nodes) + { + snprintf(pll_errmsg, 200, "Unable to allocate enough memory."); + pll_errno = PLL_ERROR_MEM_ALLOC; + return PLL_FAILURE; + } + + unsigned int tip_index = 0; + unsigned int inner_index = tip_count; + + fill_nodes_recursive(root->back, tree->nodes, &tip_index, &inner_index); + fill_nodes_recursive(root->next->back, tree->nodes, &tip_index, &inner_index); + fill_nodes_recursive(root->next->next->back, tree->nodes, &tip_index, &inner_index); + + tree->nodes[inner_index] = root; + tree->tip_count = tip_count; + tree->edge_count = 2*tip_count-3; + tree->inner_count = tip_count-2; + + return tree; +} + +PLL_EXPORT pll_utree_t * pll_utree_parse_newick(const char * filename) +{ + pll_utree_t * tree; + + struct pll_unode_s * root; /* reset tip count */ tip_cnt = 0; - tree = (pll_utree_t *)calloc(1, sizeof(pll_utree_t)); - pll_utree_in = fopen(filename, "r"); if (!pll_utree_in) { - pll_utree_destroy(tree,NULL); pll_errno = PLL_ERROR_FILE_OPEN; snprintf(pll_errmsg, 200, "Unable to open file (%s)", filename); return PLL_FAILURE; } - else if (pll_utree_parse(tree)) + + root = (pll_unode_t *)calloc(1, sizeof(pll_unode_t)); + if (pll_utree_parse(root)) { - pll_utree_destroy(tree,NULL); - tree = NULL; + pll_utree_graph_destroy(root,NULL); + root = NULL; fclose(pll_utree_in); pll_utree_lex_destroy(); return PLL_FAILURE; @@ -332,24 +472,25 @@ PLL_EXPORT pll_utree_t * pll_utree_parse_newick(const char * filename, pll_utree_lex_destroy(); - *tip_count = tip_cnt; + /* initialize clv and scaler indices to the default template */ + pll_utree_reset_template_indices(root, tip_cnt); - /* initialize clv and scaler indices */ - pll_utree_reset_template_indices(tree, tip_cnt); + /* wrap tree */ + tree = pll_utree_wraptree(root,tip_cnt); return tree; } -PLL_EXPORT pll_utree_t * pll_utree_parse_newick_string(const char * s, - unsigned int * tip_count) +PLL_EXPORT pll_utree_t * pll_utree_parse_newick_string(const char * s) { int rc; - struct pll_utree * tree; + struct pll_unode_s * root; + pll_utree_t * tree = NULL; /* reset tip count */ tip_cnt = 0; - if (!(tree = (pll_utree_t *)calloc(1, sizeof(pll_utree_t)))) + if (!(root = (pll_unode_t *)calloc(1, sizeof(pll_unode_t)))) { pll_errno = PLL_ERROR_MEM_ALLOC; snprintf(pll_errmsg, 200, "Unable to allocate enough memory."); @@ -357,21 +498,20 @@ PLL_EXPORT pll_utree_t * pll_utree_parse_newick_string(const char * s, } struct pll_utree_buffer_state * buffer = pll_utree__scan_string(s); - rc = pll_utree_parse(tree); + rc = pll_utree_parse(root); pll_utree__delete_buffer(buffer); pll_utree_lex_destroy(); if (!rc) { - *tip_count = tip_cnt; - /* initialize clv and scaler indices */ - pll_utree_reset_template_indices(tree, tip_cnt); + pll_utree_reset_template_indices(root, tip_cnt); - return tree; + tree = pll_utree_wraptree(root,tip_cnt); } - free(tree); - return NULL; + free(root); + + return tree; } diff --git a/src/pll.h b/src/pll.h index dc052dc..dd19471 100644 --- a/src/pll.h +++ b/src/pll.h @@ -101,6 +101,9 @@ #define PLL_UTREE_MOVE_NNI_LEFT 1 #define PLL_UTREE_MOVE_NNI_RIGHT 2 +#define PLL_TREE_TRAVERSE_POSTORDER 1 +#define PLL_TREE_TRAVERSE_PREORDER 2 + /* error codes */ #define PLL_ERROR_FILE_OPEN 100 @@ -236,7 +239,7 @@ typedef struct pll_fasta /* Simple unrooted and rooted tree structure for parsing newick */ -typedef struct pll_utree +typedef struct pll_unode_s { char * label; double length; @@ -244,10 +247,20 @@ typedef struct pll_utree unsigned int clv_index; int scaler_index; unsigned int pmatrix_index; - struct pll_utree * next; - struct pll_utree * back; + struct pll_unode_s * next; + struct pll_unode_s * back; void * data; +} pll_unode_t; + +typedef struct pll_utree_s +{ + unsigned int tip_count; + unsigned int inner_count; + unsigned int edge_count; + + pll_unode_t ** nodes; + } pll_utree_t; typedef struct pll_rtree @@ -274,18 +287,18 @@ typedef struct pll_utree_rb_s { struct { - pll_utree_t * p; - pll_utree_t * r; - pll_utree_t * rb; - pll_utree_t * pnb; - pll_utree_t * pnnb; + pll_unode_t * p; + pll_unode_t * r; + pll_unode_t * rb; + pll_unode_t * pnb; + pll_unode_t * pnnb; double r_len; double pnb_len; double pnnb_len; } spr; struct { - pll_utree_t * p; + pll_unode_t * p; int nni_type; } nni; }; @@ -594,36 +607,43 @@ PLL_EXPORT void pll_rtree_destroy(pll_rtree_t * root, /* functions in parse_utree.y */ -PLL_EXPORT pll_utree_t * pll_utree_parse_newick(const char * filename, - unsigned int * tip_count); +PLL_EXPORT pll_utree_t * pll_utree_parse_newick(const char * filename); -PLL_EXPORT pll_utree_t * pll_utree_parse_newick_string(const char * s, - unsigned int * tip_count); +PLL_EXPORT pll_utree_t * pll_utree_parse_newick_string(const char * s); -PLL_EXPORT void pll_utree_destroy(pll_utree_t * root, +PLL_EXPORT void pll_utree_destroy(pll_utree_t * tree, void (*cb_destroy)(void *)); -PLL_EXPORT void pll_utree_reset_template_indices(pll_utree_t * node, +PLL_EXPORT void pll_utree_reset_template_indices(pll_unode_t * node, unsigned int tip_count); +PLL_EXPORT void pll_utree_graph_destroy(pll_unode_t * root, + void (*cb_destroy)(void *)); + +PLL_EXPORT pll_utree_t * pll_utree_wraptree(pll_unode_t * root, + unsigned int tip_count); + /* functions in utree.c */ -PLL_EXPORT void pll_utree_show_ascii(const pll_utree_t * tree, int options); +PLL_EXPORT void pll_utree_show_ascii(const pll_unode_t * tree, int options); -PLL_EXPORT char * pll_utree_export_newick(const pll_utree_t * root); +PLL_EXPORT char * pll_utree_export_newick(const pll_unode_t * root); -PLL_EXPORT int pll_utree_traverse(pll_utree_t * root, - int (*cbtrav)(pll_utree_t *), - pll_utree_t ** outbuffer, +PLL_EXPORT int pll_utree_traverse(pll_unode_t * root, + int traversal, + int (*cbtrav)(pll_unode_t *), + pll_unode_t ** outbuffer, unsigned int * trav_size); +#if 0 PLL_EXPORT unsigned int pll_utree_query_tipnodes(pll_utree_t * root, pll_utree_t ** node_list); PLL_EXPORT unsigned int pll_utree_query_innernodes(pll_utree_t * root, pll_utree_t ** node_list); +#endif -PLL_EXPORT void pll_utree_create_operations(pll_utree_t ** trav_buffer, +PLL_EXPORT void pll_utree_create_operations(pll_unode_t ** trav_buffer, unsigned int trav_buffer_size, double * branches, unsigned int * pmatrix_indices, @@ -633,14 +653,20 @@ PLL_EXPORT void pll_utree_create_operations(pll_utree_t ** trav_buffer, PLL_EXPORT int pll_utree_check_integrity(pll_utree_t * root); +PLL_EXPORT pll_unode_t * pll_utree_graph_clone(pll_unode_t * root); + PLL_EXPORT pll_utree_t * pll_utree_clone(pll_utree_t * root); + PLL_EXPORT pll_utree_t * pll_rtree_unroot(pll_rtree_t * root); -PLL_EXPORT int pll_utree_every(pll_utree_t * node, - int (*cb)(pll_utree_t *)); -PLL_EXPORT void pll_utree_create_pars_buildops(pll_utree_t ** trav_buffer, + +PLL_EXPORT int pll_utree_every(pll_utree_t * tree, + int (*cb)(pll_unode_t *)); + +PLL_EXPORT void pll_utree_create_pars_buildops(pll_unode_t ** trav_buffer, unsigned int trav_buffer_size, pll_pars_buildop_t * ops, unsigned int * ops_count); + /* functions in parse_phylip.y */ PLL_EXPORT pll_msa_t * pll_phylip_parse_msa(const char * filename, @@ -1579,19 +1605,19 @@ PLL_EXPORT unsigned int * pll_compress_site_patterns(char ** sequence, /* functions in utree_moves.c */ -PLL_EXPORT int pll_utree_spr(pll_utree_t * p, - pll_utree_t * r, +PLL_EXPORT int pll_utree_spr(pll_unode_t * p, + pll_unode_t * r, pll_utree_rb_t * rb, double * branch_lengths, unsigned int * matrix_indices); -PLL_EXPORT int pll_utree_spr_safe(pll_utree_t * p, - pll_utree_t * r, +PLL_EXPORT int pll_utree_spr_safe(pll_unode_t * p, + pll_unode_t * r, pll_utree_rb_t * rb, double * branch_lengths, unsigned int * matrix_indices); -PLL_EXPORT int pll_utree_nni(pll_utree_t * p, +PLL_EXPORT int pll_utree_nni(pll_unode_t * p, int type, pll_utree_rb_t * rb); @@ -1627,14 +1653,14 @@ PLL_EXPORT double pll_parsimony_score(pll_parsimony_t * pars, PLL_EXPORT void pll_parsimony_destroy(pll_parsimony_t * pars); -/* functions in svg.c */ +/* functions in utree_svg.c */ PLL_EXPORT pll_svg_attrib_t * pll_svg_attrib_create(void); PLL_EXPORT void pll_svg_attrib_destroy(pll_svg_attrib_t * attrib); PLL_EXPORT int pll_utree_export_svg(pll_utree_t * tree, - unsigned int tip_count, + pll_unode_t * root, const pll_svg_attrib_t * attribs, const char * filename); diff --git a/src/stepwise.c b/src/stepwise.c index 67798fa..b8574ee 100644 --- a/src/stepwise.c +++ b/src/stepwise.c @@ -29,7 +29,7 @@ typedef struct int clv_valid; } node_info_t; -static pll_utree_t ** travbuffer; +static pll_unode_t ** travbuffer; static pll_pars_buildop_t * parsops; static char * xstrdup(const char * s) @@ -98,7 +98,7 @@ static unsigned int * create_shuffled(unsigned int n, unsigned int seed) return x; } -static void dealloc_data_onenode(pll_utree_t * node) +static void dealloc_data_onenode(pll_unode_t * node) { if (node->data) { @@ -107,7 +107,7 @@ static void dealloc_data_onenode(pll_utree_t * node) } } -static void dealloc_data(pll_utree_t * node) +static void dealloc_data(pll_unode_t * node) { dealloc_data_onenode(node); dealloc_data_onenode(node->next); @@ -115,7 +115,7 @@ static void dealloc_data(pll_utree_t * node) } /* a callback function for performing a partial traversal */ -static int cb_partial_traversal(pll_utree_t * node) +static int cb_partial_traversal(pll_unode_t * node) { node_info_t * node_info; @@ -160,19 +160,19 @@ static int cb_partial_traversal(pll_utree_t * node) return 1; } -static pll_utree_t * utree_inner_create(unsigned int i) +static pll_unode_t * utree_inner_create(unsigned int i) { - pll_utree_t * node = (pll_utree_t *)calloc(1,sizeof(pll_utree_t)); + pll_unode_t * node = (pll_unode_t *)calloc(1,sizeof(pll_unode_t)); if (!node) return NULL; - node->next = (pll_utree_t *)calloc(1,sizeof(pll_utree_t)); + node->next = (pll_unode_t *)calloc(1,sizeof(pll_unode_t)); if (!node->next) { free(node); return NULL; } - node->next->next = (pll_utree_t *)calloc(1,sizeof(pll_utree_t)); + node->next->next = (pll_unode_t *)calloc(1,sizeof(pll_unode_t)); if (!node->next->next) { free(node->next); @@ -189,16 +189,16 @@ static pll_utree_t * utree_inner_create(unsigned int i) return node; } -static pll_utree_t * utree_tip_create(unsigned int i) +static pll_unode_t * utree_tip_create(unsigned int i) { - pll_utree_t * node = (pll_utree_t *)calloc(1,sizeof(pll_utree_t)); + pll_unode_t * node = (pll_unode_t *)calloc(1,sizeof(pll_unode_t)); node->next = NULL; node->clv_index = i; return node; } -static void utree_link(pll_utree_t * a, pll_utree_t * b) +static void utree_link(pll_unode_t * a, pll_unode_t * b) { /* @@ -214,7 +214,7 @@ static void utree_link(pll_utree_t * a, pll_utree_t * b) b->back = a; } -static void utree_edgesplit(pll_utree_t * a, pll_utree_t * b, pll_utree_t * c) +static void utree_edgesplit(pll_unode_t * a, pll_unode_t * b, pll_unode_t * c) { /* * * @@ -239,9 +239,9 @@ static void utree_edgesplit(pll_utree_t * a, pll_utree_t * b, pll_utree_t * c) } static unsigned int utree_iterate(pll_parsimony_t ** list, - pll_utree_t ** edge_list, - pll_utree_t * inner_node, - pll_utree_t * tip_node, + pll_unode_t ** edge_list, + pll_unode_t * inner_node, + pll_unode_t * tip_node, unsigned int edge_count, unsigned int partition_count) { @@ -256,12 +256,12 @@ static unsigned int utree_iterate(pll_parsimony_t ** list, min_cost = ~0u; /* find first empty slot in edge_list */ - pll_utree_t ** empty_slot = edge_list + edge_count; + pll_unode_t ** empty_slot = edge_list + edge_count; for (i = 0; i < edge_count; ++i) { /* make the split */ - pll_utree_t * d = edge_list[i]->back; + pll_unode_t * d = edge_list[i]->back; utree_edgesplit(edge_list[i], inner_node, inner_node->next); utree_link(inner_node->next->next, tip_node); @@ -271,6 +271,7 @@ static unsigned int utree_iterate(pll_parsimony_t ** list, /* make a partial traversal */ if (!pll_utree_traverse(tip_node->back, + PLL_TREE_TRAVERSE_POSTORDER, cb_partial_traversal, travbuffer, &traversal_size)) @@ -321,7 +322,7 @@ static unsigned int utree_iterate(pll_parsimony_t ** list, return min_cost; } -static void invalidate_node(pll_utree_t * node) +static void invalidate_node(pll_unode_t * node) { node_info_t * info; @@ -364,7 +365,7 @@ PLL_EXPORT pll_utree_t * pll_fastparsimony_stepwise(pll_parsimony_t ** list, *cost = ~0u; - pll_utree_t * root; + pll_unode_t * root; /* check that all parsimony structures have the same number of tips and inner nodes */ @@ -385,7 +386,7 @@ PLL_EXPORT pll_utree_t * pll_fastparsimony_stepwise(pll_parsimony_t ** list, /* 1. Make all allocations at the beginning and check everything was allocated, otherwise return an error */ - travbuffer = (pll_utree_t **)malloc((2*tips_count-2) * sizeof(pll_utree_t *)); + travbuffer = (pll_unode_t **)malloc((2*tips_count-2) * sizeof(pll_unode_t *)); root = utree_inner_create(2*tips_count-3); @@ -394,17 +395,17 @@ PLL_EXPORT pll_utree_t * pll_fastparsimony_stepwise(pll_parsimony_t ** list, sizeof(pll_pars_buildop_t)); /* create tip node list with a terminating NULL element */ - pll_utree_t ** tip_node_list = (pll_utree_t **)calloc(tips_count+1, - sizeof(pll_utree_t *)); + pll_unode_t ** tip_node_list = (pll_unode_t **)calloc(tips_count+1, + sizeof(pll_unode_t *)); /* create inner node list for (tips_count - 3) inner nodes (root was already created, and leave the last slot NULL for termination */ - pll_utree_t ** inner_node_list = (pll_utree_t **)calloc(tips_count - 2, - sizeof(pll_utree_t *)); + pll_unode_t ** inner_node_list = (pll_unode_t **)calloc(tips_count - 2, + sizeof(pll_unode_t *)); if (!inner_node_list || !parsops || !tip_node_list || !root || !travbuffer) { - pll_utree_destroy(root,NULL); + pll_utree_graph_destroy(root,NULL); free(parsops); free(inner_node_list); free(tip_node_list); @@ -421,12 +422,12 @@ PLL_EXPORT pll_utree_t * pll_fastparsimony_stepwise(pll_parsimony_t ** list, inner_node_list[i] = utree_inner_create(i+tips_count); if (!inner_node_list[i]) { - pll_utree_destroy(root,NULL); + pll_utree_graph_destroy(root,NULL); free(parsops); free(tip_node_list); free(travbuffer); for (j = 0; j < i; ++j) - pll_utree_destroy(inner_node_list[j],NULL); + pll_utree_graph_destroy(inner_node_list[j],NULL); free(inner_node_list); pll_errno = PLL_ERROR_MEM_ALLOC; @@ -451,12 +452,12 @@ PLL_EXPORT pll_utree_t * pll_fastparsimony_stepwise(pll_parsimony_t ** list, { free(tip_node_list[i]); - pll_utree_destroy(root,NULL); + pll_utree_graph_destroy(root,NULL); free(parsops); free(inner_node_list); free(travbuffer); for (j = 0; j < i; ++j) - pll_utree_destroy(tip_node_list[j],NULL); + pll_utree_graph_destroy(tip_node_list[j],NULL); free(tip_node_list); pll_errno = PLL_ERROR_MEM_ALLOC; @@ -483,8 +484,8 @@ PLL_EXPORT pll_utree_t * pll_fastparsimony_stepwise(pll_parsimony_t ** list, utree_link(root->next->next, tip_node_list[2]); /* available placements */ - pll_utree_t ** edge_list = (pll_utree_t **)calloc(2*tips_count-3, - sizeof(pll_utree_t *)); + pll_unode_t ** edge_list = (pll_unode_t **)calloc(2*tips_count-3, + sizeof(pll_unode_t *)); edge_list[0] = root; edge_list[1] = root->next; edge_list[2] = root->next->next; @@ -538,5 +539,8 @@ PLL_EXPORT pll_utree_t * pll_fastparsimony_stepwise(pll_parsimony_t ** list, free(travbuffer); free(parsops); - return root; + /* wrap tree */ + pll_utree_t * tree = pll_utree_wraptree(root,tips_count); + + return tree; } diff --git a/src/utree.c b/src/utree.c index 8db9a7f..5c5a1af 100644 --- a/src/utree.c +++ b/src/utree.c @@ -23,18 +23,18 @@ static int indent_space = 4; -static void print_node_info(const pll_utree_t * tree, int options) +static void print_node_info(const pll_unode_t * node, int options) { if (options & PLL_UTREE_SHOW_LABEL) - printf (" %s", tree->label); + printf (" %s", node->label); if (options & PLL_UTREE_SHOW_BRANCH_LENGTH) - printf (" %f", tree->length); + printf (" %f", node->length); if (options & PLL_UTREE_SHOW_CLV_INDEX) - printf (" %d", tree->clv_index); + printf (" %d", node->clv_index); if (options & PLL_UTREE_SHOW_SCALER_INDEX) - printf (" %d", tree->scaler_index); + printf (" %d", node->scaler_index); if (options & PLL_UTREE_SHOW_PMATRIX_INDEX) - printf (" %d", tree->pmatrix_index); + printf (" %d", node->pmatrix_index); printf("\n"); } @@ -51,14 +51,14 @@ static char * xstrdup(const char * s) return strcpy(p,s); } -static void print_tree_recurse(pll_utree_t * tree, +static void print_tree_recurse(pll_unode_t * node, int indent_level, int * active_node_order, int options) { int i,j; - if (!tree) return; + if (!node) return; for (i = 0; i < indent_level; ++i) { @@ -86,22 +86,22 @@ static void print_tree_recurse(pll_utree_t * tree, printf("+"); for (j = 0; j < indent_space-1; ++j) printf ("-"); - if (tree->next) printf("+"); + if (node->next) printf("+"); - print_node_info(tree, options); + print_node_info(node, options); if (active_node_order[indent_level-1] == 2) active_node_order[indent_level-1] = 0; - if (tree->next) + if (node->next) { active_node_order[indent_level] = 1; - print_tree_recurse(tree->next->back, + print_tree_recurse(node->next->back, indent_level+1, active_node_order, options); active_node_order[indent_level] = 2; - print_tree_recurse(tree->next->next->back, + print_tree_recurse(node->next->next->back, indent_level+1, active_node_order, options); @@ -109,24 +109,24 @@ static void print_tree_recurse(pll_utree_t * tree, } -static unsigned int tree_indent_level(const pll_utree_t * tree, unsigned int indent) +static unsigned int tree_indent_level(const pll_unode_t * node, unsigned int indent) { - if (!tree->next) return indent+1; + if (!node->next) return indent+1; - unsigned int a = tree_indent_level(tree->next->back, indent+1); - unsigned int b = tree_indent_level(tree->next->next->back, indent+1); + unsigned int a = tree_indent_level(node->next->back, indent+1); + unsigned int b = tree_indent_level(node->next->next->back, indent+1); return (a > b ? a : b); } -PLL_EXPORT void pll_utree_show_ascii(const pll_utree_t * tree, int options) +PLL_EXPORT void pll_utree_show_ascii(const pll_unode_t * root, int options) { unsigned int a, b; - if (!tree->next) tree=tree->back; + if (!root->next) root=root->back; - a = tree_indent_level(tree->back,1); - b = tree_indent_level(tree,0); + a = tree_indent_level(root->back,1); + b = tree_indent_level(root,0); unsigned int max_indent_level = (a > b ? a : b); int * active_node_order = (int *)malloc((max_indent_level+1) * sizeof(int)); @@ -139,14 +139,14 @@ PLL_EXPORT void pll_utree_show_ascii(const pll_utree_t * tree, int options) active_node_order[0] = 1; active_node_order[1] = 1; - print_tree_recurse(tree->back, 1, active_node_order, options); - print_tree_recurse(tree->next->back, 1, active_node_order, options); + print_tree_recurse(root->back, 1, active_node_order, options); + print_tree_recurse(root->next->back, 1, active_node_order, options); active_node_order[0] = 2; - print_tree_recurse(tree->next->next->back, 1, active_node_order, options); + print_tree_recurse(root->next->next->back, 1, active_node_order, options); free(active_node_order); } -static char * newick_utree_recurse(const pll_utree_t * root) +static char * newick_utree_recurse(const pll_unode_t * root) { char * newick; int size_alloced; @@ -189,7 +189,7 @@ static char * newick_utree_recurse(const pll_utree_t * root) return newick; } -PLL_EXPORT char * pll_utree_export_newick(const pll_utree_t * root) +PLL_EXPORT char * pll_utree_export_newick(const pll_unode_t * root) { char * newick; int size_alloced; @@ -241,7 +241,7 @@ PLL_EXPORT char * pll_utree_export_newick(const pll_utree_t * root) return (newick); } -PLL_EXPORT void pll_utree_create_operations(pll_utree_t ** trav_buffer, +PLL_EXPORT void pll_utree_create_operations(pll_unode_t ** trav_buffer, unsigned int trav_buffer_size, double * branches, unsigned int * pmatrix_indices, @@ -249,7 +249,7 @@ PLL_EXPORT void pll_utree_create_operations(pll_utree_t ** trav_buffer, unsigned int * matrix_count, unsigned int * ops_count) { - pll_utree_t * node; + pll_unode_t * node; unsigned int i; *ops_count = 0; @@ -288,7 +288,8 @@ PLL_EXPORT void pll_utree_create_operations(pll_utree_t ** trav_buffer, } } -static int utree_every_recursive(pll_utree_t * node, +#if 0 +static int utree_every_recursive(pll_utree_t * tree, int (*cb)(pll_utree_t *)) { if (!node->next) @@ -300,18 +301,51 @@ static int utree_every_recursive(pll_utree_t * node, return (utree_every_recursive(node->next->back,cb) && utree_every_recursive(node->next->next->back,cb)); } +#endif + +PLL_EXPORT int pll_utree_every(pll_utree_t * tree, + int (*cb)(pll_unode_t *)) +{ + unsigned int i; + int rc = 1; + + for (i = 0; i < tree->tip_count + tree->inner_count; ++i) + rc &= cb(tree->nodes[i]); + + return rc; + +// return (utree_every_recursive(node,cb) && +// utree_every_recursive(node->back,cb)); +} -PLL_EXPORT int pll_utree_every(pll_utree_t * node, - int (*cb)(pll_utree_t *)) +static void utree_traverse_preorder(pll_unode_t * node, + int (*cbtrav)(pll_unode_t *), + unsigned int * index, + pll_unode_t ** outbuffer) { - return (utree_every_recursive(node,cb) && - utree_every_recursive(node->back,cb)); + if (!node->next) + { + if (cbtrav(node)) + { + outbuffer[*index] = node; + *index = *index + 1; + } + return; + } + if (!cbtrav(node)) + return; + + outbuffer[*index] = node; + *index = *index + 1; + + utree_traverse_preorder(node->next->back, cbtrav, index, outbuffer); + utree_traverse_preorder(node->next->next->back, cbtrav, index, outbuffer); } -static void utree_traverse(pll_utree_t * node, - int (*cbtrav)(pll_utree_t *), - unsigned int * index, - pll_utree_t ** outbuffer) +static void utree_traverse_postorder(pll_unode_t * node, + int (*cbtrav)(pll_unode_t *), + unsigned int * index, + pll_unode_t ** outbuffer) { if (!node->next) { @@ -324,39 +358,56 @@ static void utree_traverse(pll_utree_t * node, } if (!cbtrav(node)) return; - utree_traverse(node->next->back, cbtrav, index, outbuffer); - utree_traverse(node->next->next->back, cbtrav, index, outbuffer); + utree_traverse_postorder(node->next->back, cbtrav, index, outbuffer); + utree_traverse_postorder(node->next->next->back, cbtrav, index, outbuffer); outbuffer[*index] = node; *index = *index + 1; } -PLL_EXPORT int pll_utree_traverse(pll_utree_t * root, - int (*cbtrav)(pll_utree_t *), - pll_utree_t ** outbuffer, +PLL_EXPORT int pll_utree_traverse(pll_unode_t * root, + int traversal, + int (*cbtrav)(pll_unode_t *), + pll_unode_t ** outbuffer, unsigned int * trav_size) { *trav_size = 0; if (!root->next) return PLL_FAILURE; - /* we will traverse an unrooted tree in the following way + if (traversal == PLL_TREE_TRAVERSE_POSTORDER) + { - 2 - / - 1 --* - \ - 3 + /* we will traverse an unrooted tree in the following way - at each node the callback function is called to decide whether we - are going to traversing the subtree rooted at the specific node */ + 2 + / + 1 --* + \ + 3 - utree_traverse(root->back, cbtrav, trav_size, outbuffer); - utree_traverse(root, cbtrav, trav_size, outbuffer); + at each node the callback function is called to decide whether we + are going to traversing the subtree rooted at the specific node */ + + utree_traverse_postorder(root->back, cbtrav, trav_size, outbuffer); + utree_traverse_postorder(root, cbtrav, trav_size, outbuffer); + } + else if (traversal == PLL_TREE_TRAVERSE_PREORDER) + { + utree_traverse_preorder(root->back, cbtrav, trav_size, outbuffer); + utree_traverse_preorder(root, cbtrav, trav_size, outbuffer); + } + else + { + snprintf(pll_errmsg, 200, "Invalid traversal value."); + pll_errno = PLL_ERROR_PARAM_INVALID; + return PLL_FAILURE; + } return PLL_SUCCESS; } +#if 0 static void utree_query_tipnodes_recursive(pll_utree_t * node, pll_utree_t ** node_list, unsigned int * index) @@ -422,9 +473,10 @@ PLL_EXPORT unsigned int pll_utree_query_innernodes(pll_utree_t * root, return index; } +#endif /* a callback function for checking tree integrity */ -static int cb_check_integrity(pll_utree_t * node) +static int cb_check_integrity(pll_unode_t * node) { unsigned int clv_index = node->clv_index; int scaler_index = node->scaler_index; @@ -452,18 +504,16 @@ static int cb_check_integrity(pll_utree_t * node) return 1; } -PLL_EXPORT int pll_utree_check_integrity(pll_utree_t * root) +PLL_EXPORT int pll_utree_check_integrity(pll_utree_t * tree) { - pll_utree_t * start_node = root->next?root:root->back; - - return pll_utree_every(start_node->back, cb_check_integrity); + return pll_utree_every(tree, cb_check_integrity); } /* TODO: Memory allocation checks were not implemented in this function!!! */ -static pll_utree_t * clone_node(pll_utree_t * node) +static pll_unode_t * clone_node(pll_unode_t * node) { - pll_utree_t * new = (pll_utree_t *)malloc(sizeof(pll_utree_t)); - memcpy(new, node, sizeof(pll_utree_t)); + pll_unode_t * new = (pll_unode_t *)malloc(sizeof(pll_unode_t)); + memcpy(new, node, sizeof(pll_unode_t)); if (node->label) { @@ -473,11 +523,11 @@ static pll_utree_t * clone_node(pll_utree_t * node) if (node->next) { - new->next = (pll_utree_t *)malloc(sizeof(pll_utree_t)); - memcpy(new->next, node->next, sizeof(pll_utree_t)); + new->next = (pll_unode_t *)malloc(sizeof(pll_unode_t)); + memcpy(new->next, node->next, sizeof(pll_unode_t)); - new->next->next = (pll_utree_t *)malloc(sizeof(pll_utree_t)); - memcpy(new->next->next, node->next->next, sizeof(pll_utree_t)); + new->next->next = (pll_unode_t *)malloc(sizeof(pll_unode_t)); + memcpy(new->next->next, node->next->next, sizeof(pll_unode_t)); new->next->next->next = new; new->next->label = new->next->next->label = new->label; @@ -485,7 +535,7 @@ static pll_utree_t * clone_node(pll_utree_t * node) return new; } -static void utree_recurse_clone(pll_utree_t * new, pll_utree_t * root) +static void utree_recurse_clone(pll_unode_t * new, pll_unode_t * root) { if (root->back) { @@ -500,9 +550,9 @@ static void utree_recurse_clone(pll_utree_t * new, pll_utree_t * root) } } -PLL_EXPORT pll_utree_t * pll_utree_clone(pll_utree_t * root) +PLL_EXPORT pll_unode_t * pll_utree_graph_clone(pll_unode_t * root) { - pll_utree_t * new = clone_node(root); + pll_unode_t * new = clone_node(root); utree_recurse_clone(new, root); if (root->next) @@ -514,9 +564,21 @@ PLL_EXPORT pll_utree_t * pll_utree_clone(pll_utree_t * root) return new; } -static pll_utree_t * rtree_unroot(pll_rtree_t * root, pll_utree_t * back) +PLL_EXPORT pll_utree_t * pll_utree_clone(pll_utree_t * tree) { - pll_utree_t * uroot = (void *)calloc(1,sizeof(pll_utree_t)); + unsigned int root_index = tree->inner_count + tree->tip_count - 1; + + /* choose the last inner node as the starting point of the clone. It does not + really matter which node to choose, but since the newick parser places the + root node at the end of the list, we use the same notation here */ + pll_unode_t * root = pll_utree_graph_clone(tree->nodes[root_index]); + + return pll_utree_wraptree(root, tree->tip_count); +} + +static pll_unode_t * rtree_unroot(pll_rtree_t * root, pll_unode_t * back) +{ + pll_unode_t * uroot = (void *)calloc(1,sizeof(pll_unode_t)); if (!uroot) { pll_errno = PLL_ERROR_MEM_ALLOC; @@ -534,7 +596,7 @@ static pll_utree_t * rtree_unroot(pll_rtree_t * root, pll_utree_t * back) return uroot; } - uroot->next = (void *)calloc(1,sizeof(pll_utree_t)); + uroot->next = (void *)calloc(1,sizeof(pll_unode_t)); if (!uroot->next) { free(uroot); @@ -543,7 +605,7 @@ static pll_utree_t * rtree_unroot(pll_rtree_t * root, pll_utree_t * back) return NULL; } - uroot->next->next = (void *)calloc(1,sizeof(pll_utree_t)); + uroot->next->next = (void *)calloc(1,sizeof(pll_unode_t)); if (!uroot->next->next) { free(uroot->next); @@ -576,7 +638,7 @@ PLL_EXPORT pll_utree_t * pll_rtree_unroot(pll_rtree_t * root) pll_rtree_t * new_root; - pll_utree_t * uroot = (void *)calloc(1,sizeof(pll_utree_t)); + pll_unode_t * uroot = (void *)calloc(1,sizeof(pll_unode_t)); if (!uroot) { pll_errno = PLL_ERROR_MEM_ALLOC; @@ -584,7 +646,7 @@ PLL_EXPORT pll_utree_t * pll_rtree_unroot(pll_rtree_t * root) return NULL; } - uroot->next = (void *)calloc(1,sizeof(pll_utree_t)); + uroot->next = (void *)calloc(1,sizeof(pll_unode_t)); if (!uroot->next) { free(uroot); @@ -593,7 +655,7 @@ PLL_EXPORT pll_utree_t * pll_rtree_unroot(pll_rtree_t * root) return NULL; } - uroot->next->next = (void *)calloc(1,sizeof(pll_utree_t)); + uroot->next->next = (void *)calloc(1,sizeof(pll_unode_t)); if (!uroot->next->next) { free(uroot->next); @@ -636,15 +698,15 @@ PLL_EXPORT pll_utree_t * pll_rtree_unroot(pll_rtree_t * root) /* TODO: Need to clean uroot in case of error*/ if (!uroot->next->next->back) return NULL; - return uroot; + return pll_utree_wraptree(uroot,0); } -PLL_EXPORT void pll_utree_create_pars_buildops(pll_utree_t ** trav_buffer, +PLL_EXPORT void pll_utree_create_pars_buildops(pll_unode_t ** trav_buffer, unsigned int trav_buffer_size, pll_pars_buildop_t * ops, unsigned int * ops_count) { - pll_utree_t * node; + pll_unode_t * node; unsigned int i; *ops_count = 0; diff --git a/src/utree_moves.c b/src/utree_moves.c index 9c946a9..33c5beb 100644 --- a/src/utree_moves.c +++ b/src/utree_moves.c @@ -21,7 +21,7 @@ #include "pll.h" -static int utree_find(pll_utree_t * start, pll_utree_t * target) +static int utree_find(pll_unode_t * start, pll_unode_t * target) { /* checks whether the subtree rooted at 'start' (in the direction of start->next and start->next->next) contains the node 'target' */ @@ -44,8 +44,8 @@ static int utree_find(pll_utree_t * start, pll_utree_t * target) return 0; } -static void utree_link(pll_utree_t * a, - pll_utree_t * b, +static void utree_link(pll_unode_t * a, + pll_unode_t * b, double length, unsigned int pmatrix_index) { @@ -57,24 +57,24 @@ static void utree_link(pll_utree_t * a, a->pmatrix_index = b->pmatrix_index = pmatrix_index; } -static void utree_swap(pll_utree_t * t1, pll_utree_t * t2) +static void utree_swap(pll_unode_t * t1, pll_unode_t * t2) { /* swaps the positions of trees t1 and t2. The two trees retain the branch lengths from their root to their respective parent nodes, and retain their pmatrix indices (i.e. no updating of pmatrices is required) */ - pll_utree_t * temp = t1->back; + pll_unode_t * temp = t1->back; utree_link(t1, t2->back, t2->back->length, t2->back->pmatrix_index); utree_link(t2, temp, temp->length, temp->pmatrix_index); } -PLL_EXPORT int pll_utree_nni(pll_utree_t * p, +PLL_EXPORT int pll_utree_nni(pll_unode_t * p, int type, pll_utree_rb_t * rb) { - pll_utree_t * subtree1; - pll_utree_t * subtree2; + pll_unode_t * subtree1; + pll_unode_t * subtree2; if ((type != PLL_UTREE_MOVE_NNI_LEFT) && (type != PLL_UTREE_MOVE_NNI_RIGHT)) { @@ -116,8 +116,8 @@ static int utree_nni_rollback(pll_utree_rb_t * rb) NULL); } -PLL_EXPORT int pll_utree_spr(pll_utree_t * p, - pll_utree_t * r, +PLL_EXPORT int pll_utree_spr(pll_unode_t * p, + pll_unode_t * r, pll_utree_rb_t * rb, double * branch_lengths, unsigned int * matrix_indices) @@ -202,8 +202,8 @@ PLL_EXPORT int pll_utree_spr(pll_utree_t * p, } /* (b) connect u and v */ - pll_utree_t * u = p->next->back; - pll_utree_t * v = p->next->next->back; + pll_unode_t * u = p->next->back; + pll_unode_t * v = p->next->next->back; utree_link(u, v, u->length + v->length, @@ -304,8 +304,8 @@ static int utree_spr_rollback(pll_utree_rb_t * rb, /* this is a safer (but slower) function for performing an spr move, than pll_utree_spr(). See the last paragraph in the comments section of the pll_utree_spr() function for more details */ -PLL_EXPORT int pll_utree_spr_safe(pll_utree_t * p, - pll_utree_t * r, +PLL_EXPORT int pll_utree_spr_safe(pll_unode_t * p, + pll_unode_t * r, pll_utree_rb_t * rb, double * branch_lengths, unsigned int * matrix_indices) diff --git a/src/utree_svg.c b/src/utree_svg.c index 48fb80e..4637421 100644 --- a/src/utree_svg.c +++ b/src/utree_svg.c @@ -38,45 +38,6 @@ typedef struct pll_svg_aux_s double max_tree_len; } pll_svg_aux_t; -static void data_destroy_recursive(pll_utree_t * node) -{ - if (!node->next) - { - if (node->data) - { - free(node->data); - node->data = NULL; - } - return; - } - - data_destroy_recursive(node->next->back); - data_destroy_recursive(node->next->next->back); - - if (node->data) - { - free(node->data); - node->data = NULL; - } -} - -static void data_destroy(pll_utree_t * root) -{ - if (!root) return; - if (!root->next) - { - if (root->data) - { - free(root->data); - root->data = NULL; - } - return; - } - - data_destroy_recursive(root->back); - data_destroy_recursive(root); -} - static pll_svg_data_t * create_data(int height, double x, double y) { pll_svg_data_t * data = (pll_svg_data_t *)malloc(sizeof(pll_svg_data_t)); @@ -89,7 +50,7 @@ static pll_svg_data_t * create_data(int height, double x, double y) return data; } -static int utree_height_recursive(pll_utree_t * node) +static int utree_height_recursive(pll_unode_t * node) { if (!node->next) { @@ -116,7 +77,7 @@ static int utree_height_recursive(pll_utree_t * node) return 1; } -static int utree_set_height(pll_utree_t * root) +static int utree_set_height(pll_unode_t * root) { if (!root->next) return PLL_FAILURE; @@ -152,11 +113,11 @@ static void draw_circle(FILE * fp, double cx, double cy, double r) cx, cy, r); } -static void utree_set_offset(pll_utree_t * node, +static void utree_set_offset(pll_unode_t * node, const pll_svg_attrib_t * attr, const pll_svg_aux_t * aux) { - pll_utree_t * parent = NULL; + pll_unode_t * parent = NULL; /* scale node's branch length (edge towards parent) */ pll_svg_data_t * data = (pll_svg_data_t *)(node->data); @@ -188,13 +149,13 @@ static void utree_set_offset(pll_utree_t * node, } static void utree_plot(FILE * fp, - pll_utree_t * node, + pll_unode_t * node, const pll_svg_attrib_t * attr, pll_svg_aux_t * aux) { double y; // static int tip_occ = 0; - pll_utree_t * parent = NULL; + pll_unode_t * parent = NULL; pll_svg_data_t * data = (pll_svg_data_t *)(node->data); pll_svg_data_t * parent_data = (pll_svg_data_t *)(node->back->data); @@ -276,23 +237,17 @@ static void utree_plot(FILE * fp, static void utree_scaler_init(const pll_svg_attrib_t * attr, pll_svg_aux_t * aux, - pll_utree_t * root, - unsigned int tip_count) + pll_utree_t * tree) { unsigned int i; double len = 0; double label_len; - pll_utree_t ** node_list = (pll_utree_t **)malloc((size_t)tip_count * - sizeof(pll_utree_t *)); - - pll_utree_query_tipnodes(root, node_list); - /* compute the length of all tip-to-root paths and store the longest one in max_tree_len */ - for (i = 0; i < tip_count; ++i) + for (i = 0; i < tree->tip_count; ++i) { - pll_utree_t * node = node_list[i]; + pll_unode_t * node = tree->nodes[i]; len = node->length; node = node->back; @@ -316,7 +271,7 @@ static void utree_scaler_init(const pll_svg_attrib_t * attr, aux->max_tree_len = len; label_len = (attr->font_size / 1.5) * - (node_list[i]->label ? strlen(node_list[i]->label) : 0); + (tree->nodes[i]->label ? strlen(tree->nodes[i]->label) : 0); len = (aux->canvas_width - label_len) / len; if (i == 0) @@ -331,14 +286,12 @@ static void utree_scaler_init(const pll_svg_attrib_t * attr, aux->max_font_len = label_len; } } - free(node_list); } static void print_header(FILE * fp, - pll_utree_t * root, + pll_utree_t * tree, const pll_svg_attrib_t * attr, - pll_svg_aux_t * aux, - unsigned int tip_count) + pll_svg_aux_t * aux) { long svg_height; @@ -346,10 +299,10 @@ static void print_header(FILE * fp, /* initialize pixel scaler (scaler) and compute max tree length (max_tree_len) */ - utree_scaler_init(attr, aux, root, tip_count); + utree_scaler_init(attr, aux, tree); svg_height = attr->margin_top + attr->legend_spacing + attr->margin_bottom + - attr->tip_spacing * tip_count; + attr->tip_spacing * tree->tip_count; /* print svg header tag with dimensions and grey border */ fprintf(fp, "next)) return PLL_FAILURE; - } /* open output file for writing */ FILE * fp = fopen(filename, "w"); if (!fp) { - pll_utree_destroy(cloned,NULL); return PLL_FAILURE; } - /* create the svg */ - svg_make(fp, cloned, tip_count, attribs); - /* destroy data element from nodes in the cloned tree, and deallocate - the tree */ - data_destroy(cloned); - pll_utree_destroy(cloned,NULL); + /* backup data */ + void ** data_old = (void **)malloc((tree->tip_count+tree->inner_count) * + sizeof(void *)); + if (!data_old) + { + return PLL_FAILURE; + } + + /* copy old data */ + for (i = 0; i < tree->tip_count+tree->inner_count; ++i) + { + data_old[i] = tree->nodes[i]->data; + tree->nodes[i]->data = NULL; + } + + + /* treat unrooted tree as rooted binary with a ternary root + and compute the height of each node */ + //if (!utree_set_height(cloned)) + if (!utree_set_height(root)) + rc = PLL_FAILURE; + else + svg_make(fp, tree, root, attribs); fclose(fp); - return PLL_SUCCESS; + /* restore old data */ + for (i = 0; i < tree->tip_count+tree->inner_count; ++i) + { + if (tree->nodes[i]->data) + free(tree->nodes[i]->data); + tree->nodes[i]->data = data_old[i]; + } + free(data_old); + + + return rc; } diff --git a/test/src/asc-bias.c b/test/src/asc-bias.c index 5f0813a..2d20988 100644 --- a/test/src/asc-bias.c +++ b/test/src/asc-bias.c @@ -46,12 +46,12 @@ static double test_branch_lengths[NUM_BRANCH_LENGTHS] = {0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0}; static unsigned int traversal_size, matrix_count, ops_count; -static pll_utree_t ** travbuffer; +static pll_unode_t ** travbuffer; static unsigned int * matrix_indices; static double * branch_lengths; static pll_operation_t * operations; -void print_travbuffer(pll_utree_t ** travbuffer, unsigned int len) +void print_travbuffer(pll_unode_t ** travbuffer, unsigned int len) { unsigned int i; for (i=0; iclv_index, - tree->scaler_index, - tree->back->clv_index, - tree->back->scaler_index, - tree->pmatrix_index, + node->clv_index, + node->scaler_index, + node->back->clv_index, + node->back->scaler_index, + node->pmatrix_index, params_indices, NULL); @@ -114,10 +114,10 @@ static double eval(pll_partition_t * partition, sizeof(double), partition->alignment); pll_update_sumtable(partition, - tree->clv_index, - tree->back->clv_index, - tree->scaler_index, - tree->back->scaler_index, + node->clv_index, + node->back->clv_index, + node->scaler_index, + node->back->scaler_index, params_indices, sumtable); @@ -127,8 +127,8 @@ static double eval(pll_partition_t * partition, { double branch_length = test_branch_lengths[i]; if (!pll_compute_likelihood_derivatives(partition, - tree->scaler_index, - tree->back->scaler_index, + node->scaler_index, + node->back->scaler_index, branch_length, params_indices, sumtable, @@ -142,15 +142,15 @@ static double eval(pll_partition_t * partition, /* update logLikelihood */ pll_update_prob_matrices(partition, params_indices, - &(tree->pmatrix_index), + &(node->pmatrix_index), &branch_length, 1); upbl_logl = pll_compute_edge_loglikelihood(partition, - tree->clv_index, - tree->scaler_index, - tree->back->clv_index, - tree->back->scaler_index, - tree->pmatrix_index, + node->clv_index, + node->scaler_index, + node->back->clv_index, + node->back->scaler_index, + node->pmatrix_index, params_indices, NULL); @@ -172,6 +172,7 @@ int main(int argc, char * argv[]) unsigned int attributes; pll_partition_t * partition; pll_utree_t * tree; + pll_unode_t * root; unsigned int taxa_count, nodes_count, inner_nodes_count, branch_count; double alpha = 0.5; unsigned int rate_cats = 4; @@ -182,31 +183,36 @@ int main(int argc, char * argv[]) attributes = get_attributes(argc, argv); attributes |= PLL_ATTRIB_AB_FLAG; - tree = pll_utree_parse_newick(TRE_FILENAME, &taxa_count); + tree = pll_utree_parse_newick(TRE_FILENAME); + + taxa_count = tree->tip_count; + printf("Read %s: %u taxa\n", TRE_FILENAME, taxa_count); + root = tree->nodes[tree->tip_count+tree->inner_count-1]; inner_nodes_count = taxa_count - 2; nodes_count = taxa_count + inner_nodes_count; branch_count = 2*taxa_count - 3; /* build fixed structures */ - travbuffer = (pll_utree_t **)malloc(nodes_count * sizeof(pll_utree_t *)); + travbuffer = (pll_unode_t **)malloc(nodes_count * sizeof(pll_unode_t *)); branch_lengths = (double *)malloc(branch_count * sizeof(double)); matrix_indices = (unsigned int *)malloc(branch_count * sizeof(unsigned int)); operations = (pll_operation_t *)malloc(inner_nodes_count * sizeof(pll_operation_t)); - partition = parse_msa(MSA_FILENAME, taxa_count, STATES, rate_cats, 1, + partition = parse_msa(MSA_FILENAME, STATES, rate_cats, 1, tree, attributes); printf("Read %s: %u sites\n", MSA_FILENAME, partition->sites); for (i=0;i<3;++i) { - tree = tree->next; + root = root->next; pll_set_asc_bias_type(partition, 0); - pll_utree_traverse(tree, + pll_utree_traverse(root, + PLL_TREE_TRAVERSE_POSTORDER, cb_full_traversal, travbuffer, &traversal_size); @@ -222,14 +228,14 @@ int main(int argc, char * argv[]) /* test 1: no ascertainment bias correction */ printf("\nTEST 1: NO ASC BIAS\n"); - lnl_test[0] = eval(partition, tree, alpha, lnl_test[0]); + lnl_test[0] = eval(partition, root, alpha, lnl_test[0]); /* test 2: ascertainment bias correction */ printf("\nTEST 2: ASC BIAS LEWIS\n"); pll_set_asc_bias_type(partition, PLL_ATTRIB_AB_LEWIS); - lnl_test[1] = eval(partition, tree, alpha, lnl_test[1]); + lnl_test[1] = eval(partition, root, alpha, lnl_test[1]); /* attempt to update invariant sites proportion. This should fail */ if (pll_update_invariant_sites_proportion(partition, 0, 0.5)) @@ -243,14 +249,14 @@ int main(int argc, char * argv[]) pll_set_asc_bias_type(partition, PLL_ATTRIB_AB_FELSENSTEIN); pll_set_asc_state_weights(partition, invar_weights); - lnl_test[2] = eval(partition, tree, alpha, lnl_test[2]); + lnl_test[2] = eval(partition, root, alpha, lnl_test[2]); /* test 2: ascertainment bias correction */ printf("\nTEST 2: ASC BIAS STAMATAKIS\n"); pll_set_asc_bias_type(partition, PLL_ATTRIB_AB_STAMATAKIS); pll_set_asc_state_weights(partition, invar_weights); - lnl_test[3] = eval(partition, tree, alpha, lnl_test[3]); + lnl_test[3] = eval(partition, root, alpha, lnl_test[3]); } /* clean */ diff --git a/test/src/common.c b/test/src/common.c index aba7c14..90d25ed 100644 --- a/test/src/common.c +++ b/test/src/common.c @@ -51,7 +51,6 @@ void skip_test () It is intended to use with the test datasets that were validated in advance. */ pll_partition_t * parse_msa(const char * filename, - unsigned int taxa_count, unsigned int states, unsigned int rate_cats, unsigned int rate_matrices, @@ -59,7 +58,6 @@ pll_partition_t * parse_msa(const char * filename, unsigned int attributes) { return parse_msa_reduced(filename, - taxa_count, states, rate_cats, rate_matrices, @@ -69,7 +67,6 @@ pll_partition_t * parse_msa(const char * filename, } pll_partition_t * parse_msa_reduced(const char * filename, - unsigned int taxa_count, unsigned int states, unsigned int rate_cats, unsigned int rate_matrices, @@ -78,6 +75,7 @@ pll_partition_t * parse_msa_reduced(const char * filename, unsigned int max_sites) { unsigned int i; + unsigned int taxa_count = tree->tip_count; pll_partition_t * partition; long hdrlen, seqlen, seqno; char * seq = NULL, @@ -133,10 +131,6 @@ pll_partition_t * parse_msa_reduced(const char * filename, taxa_count - 2, /* scale buffers */ attributes); - pll_utree_t ** tipnodes = (pll_utree_t **)calloc(taxa_count, - sizeof(pll_utree_t *)); - pll_utree_query_tipnodes(tree, tipnodes); - /* create a libc hash table of size tip_nodes_count */ hcreate(taxa_count); @@ -145,20 +139,18 @@ pll_partition_t * parse_msa_reduced(const char * filename, sizeof(unsigned int)); for (i = 0; i < taxa_count; ++i) { - data[i] = tipnodes[i]->clv_index; + data[i] = tree->nodes[i]->clv_index; ENTRY entry; #ifdef __APPLE__ - entry.key = xstrdup(tipnodes[i]->label); + entry.key = xstrdup(tree->nodes[i]->label); #else - entry.key = tipnodes[i]->label; + entry.key = tree->nodes[i]->label; #endif entry.data = (void *)(data+i); hsearch(entry, ENTER); } - free(tipnodes); - for (i = 0; i < taxa_count; ++i) { ENTRY query; @@ -191,7 +183,7 @@ pll_partition_t * parse_msa_reduced(const char * filename, return partition; } -int cb_full_traversal(pll_utree_t * node) +int cb_full_traversal(pll_unode_t * node) { return 1; } diff --git a/test/src/common.h b/test/src/common.h index 4863596..5e14049 100644 --- a/test/src/common.h +++ b/test/src/common.h @@ -30,7 +30,6 @@ unsigned int get_attributes(int argc, char **argv); void skip_test(); pll_partition_t * parse_msa(const char * filename, - unsigned int taxa_count, unsigned int states, unsigned int rate_cats, unsigned int rate_matrices, @@ -38,14 +37,13 @@ pll_partition_t * parse_msa(const char * filename, unsigned int attributes); pll_partition_t * parse_msa_reduced(const char * filename, - unsigned int taxa_count, unsigned int states, unsigned int rate_cats, unsigned int rate_matrices, pll_utree_t * tree, unsigned int attributes, unsigned int max_sites); -int cb_full_traversal(pll_utree_t * node); +int cb_full_traversal(pll_unode_t * node); int cb_rfull_traversal(pll_rtree_t * node); /* print error and exit */ diff --git a/test/src/partial-traversal.c b/test/src/partial-traversal.c index 45f50f9..e079c87 100644 --- a/test/src/partial-traversal.c +++ b/test/src/partial-traversal.c @@ -15,7 +15,7 @@ typedef struct } node_info_t; /* a callback function for performing a partial traversal */ -static int cb_partial_traversal(pll_utree_t * node) +static int cb_partial_traversal(pll_unode_t * node) { node_info_t * node_info; @@ -60,36 +60,26 @@ static int cb_partial_traversal(pll_utree_t * node) return 1; } -static void set_missing_branch_length_recursive(pll_utree_t * tree, - double length) +/* branch lengths not present in the newick file get a value of 0.000001 */ +static void set_missing_branch_length(pll_utree_t * tree, double length) { - if (tree) - { - /* set branch length to default if not set */ - if (!tree->length) - tree->length = length; + unsigned int i; - if (tree->next) - { - if (!tree->next->length) - tree->next->length = length; + for (i = 0; i < tree->tip_count; ++i) + if (!tree->nodes[i]->length) + tree->nodes[i]->length = length; - if (!tree->next->next->length) - tree->next->next->length = length; - - set_missing_branch_length_recursive(tree->next->back, length); - set_missing_branch_length_recursive(tree->next->next->back, length); - } + for (i = tree->tip_count; i < tree->tip_count + tree->inner_count; ++i) + { + if (!tree->nodes[i]->length) + tree->nodes[i]->length = length; + if (!tree->nodes[i]->next->length) + tree->nodes[i]->next->length = length; + if (!tree->nodes[i]->next->next->length) + tree->nodes[i]->next->next->length = length; } } -/* branch lengths not present in the newick file get a value of 0.000001 */ -static void set_missing_branch_length(pll_utree_t * tree, double length) -{ - set_missing_branch_length_recursive(tree, length); - set_missing_branch_length_recursive(tree->back, length); -} - int main(int argc, char * argv[]) { unsigned int i,j,r; @@ -99,13 +89,15 @@ int main(int argc, char * argv[]) double * branch_lengths; pll_partition_t * partition; pll_operation_t * operations; - pll_utree_t ** travbuffer; - pll_utree_t ** inner_nodes_list; + pll_unode_t ** travbuffer; + pll_unode_t ** inner_nodes_list; unsigned int params_indices[N_RATE_CATS] = {0,0,0,0}; /* parse the unrooted binary tree in newick format, and store the number of tip nodes in tip_nodes_count */ - pll_utree_t * tree = pll_utree_parse_newick(TREEFILE, &tip_nodes_count); + pll_utree_t * tree = pll_utree_parse_newick(TREEFILE); + + tip_nodes_count = tree->tip_count; unsigned int attributes = get_attributes(argc, argv); @@ -124,11 +116,6 @@ int main(int argc, char * argv[]) printf("Total number of nodes in tree: %d\n", nodes_count); printf("Number of branches in tree: %d\n", branch_count); - /* obtain an array of pointers to tip nodes */ - pll_utree_t ** tipnodes = (pll_utree_t **)calloc(tip_nodes_count, - sizeof(pll_utree_t *)); - pll_utree_query_tipnodes(tree, tipnodes); - /* create a libc hash table of size tip_nodes_count */ hcreate(tip_nodes_count); @@ -137,12 +124,13 @@ int main(int argc, char * argv[]) sizeof(unsigned int)); for (i = 0; i < tip_nodes_count; ++i) { - data[i] = i; + //data[i] = i; + data[i] = tree->nodes[i]->clv_index; ENTRY entry; #ifdef __APPLE__ - entry.key = xstrdup(tipnodes[i]->label); + entry.key = xstrdup(tree->nodes[i]->label); #else - entry.key = tipnodes[i]->label; + entry.key = tree->nodes[i]->label; #endif entry.data = (void *)(data+i); hsearch(entry, ENTER); @@ -249,7 +237,6 @@ int main(int argc, char * argv[]) /* we no longer need these two arrays (keys and values of hash table... */ free(data); - free(tipnodes); /* ...neither the sequences and the headers as they are already present in the form of probabilities in the tip CLVs */ @@ -264,7 +251,7 @@ int main(int argc, char * argv[]) /* allocate a buffer for storing pointers to nodes of the tree in postorder traversal */ - travbuffer = (pll_utree_t **)malloc(nodes_count * sizeof(pll_utree_t *)); + travbuffer = (pll_unode_t **)malloc(nodes_count * sizeof(pll_unode_t *)); branch_lengths = (double *)malloc(branch_count * sizeof(double)); @@ -273,9 +260,11 @@ int main(int argc, char * argv[]) sizeof(pll_operation_t)); /* get inner nodes */ - inner_nodes_list = (pll_utree_t **)malloc(inner_nodes_count * - sizeof(pll_utree_t *)); - pll_utree_query_innernodes(tree, inner_nodes_list); + inner_nodes_list = (pll_unode_t **)malloc(inner_nodes_count * + sizeof(pll_unode_t *)); + memcpy(inner_nodes_list, + tree->nodes+tip_nodes_count, + inner_nodes_count*sizeof(pll_unode_t *)); /* get random directions for each inner node */ for (i = 0; i < inner_nodes_count; ++i) @@ -292,12 +281,13 @@ int main(int argc, char * argv[]) /* randomly select an inner node */ r = RAND % inner_nodes_count; - pll_utree_t * node = inner_nodes_list[r]; + pll_unode_t * node = inner_nodes_list[r]; /* compute a partial traversal starting from the randomly selected inner node */ if (!pll_utree_traverse(node, + PLL_TREE_TRAVERSE_POSTORDER, cb_partial_traversal, travbuffer, &traversal_size))