diff --git a/.gitmodules b/.gitmodules index c0cbe2a9..c4a6c272 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "libs/pll-modules"] path = libs/pll-modules - url = https://github.com/ddarriba/pll-modules.git + url = https://github.com/lutteropp/pll-modules.git [submodule "libs/terraphast"] path = libs/terraphast url = https://github.com/amkozlov/terraphast-one diff --git a/CMakeLists.txt b/CMakeLists.txt index 9854d9c6..a22bcccf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,10 +39,10 @@ endif() #set(CMAKE_CXX_EXTENSIONS OFF) # set these flags globally for all subprojects (libpll etc.) -set (CMAKE_CXX_FLAGS_DEBUG "-O3 -g" CACHE INTERNAL "") -set (CMAKE_CXX_FLAGS_RELEASE "-O3" CACHE INTERNAL "") -set (CMAKE_C_FLAGS_DEBUG "-O3 -g" CACHE INTERNAL "") -set (CMAKE_C_FLAGS_RELEASE "-O3" CACHE INTERNAL "") +set (CMAKE_CXX_FLAGS_DEBUG "-O3 -g -fopenmp" CACHE INTERNAL "") +set (CMAKE_CXX_FLAGS_RELEASE "-O3 -fopenmp" CACHE INTERNAL "") +set (CMAKE_C_FLAGS_DEBUG "-O3 -g -fopenmp" CACHE INTERNAL "") +set (CMAKE_C_FLAGS_RELEASE "-O3 -fopenmp" CACHE INTERNAL "") project (raxml-ng C CXX) diff --git a/README.md b/README.md index 60e416d2..8c09e001 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,18 @@ -# RAxML Next Generation - -[![Build Status](https://www.travis-ci.org/amkozlov/raxml-ng.svg?branch=master)](https://www.travis-ci.org/amkozlov/raxml-ng) [![DOI](https://zenodo.org/badge/75947982.svg)](https://zenodo.org/badge/latestdoi/75947982) [![License](https://img.shields.io/badge/license-AGPL-blue.svg)](http://www.gnu.org/licenses/agpl-3.0.en.html) +# RAxML Next Generation with parallel computation of Transfer Bootstrap Expectation (TBE) scores and computation of TBE extra information ## Introduction -RAxML-NG is a phylogenetic tree inference tool which uses maximum-likelihood (ML) optimality criterion. Its search heuristic is based on iteratively performing a series of Subtree Pruning and Regrafting (SPR) moves, which allows to quickly navigate to the best-known ML tree. RAxML-NG is a successor of RAxML (Stamatakis 2014) and leverages the highly optimized likelihood computation implemented in [*libpll*](https://github.com/xflouris/libpll) (Flouri et al. 2014). +Greetings! If you are not here for parallel computation of TBE support or computation of the TBE extra information, we strongly advise you to use the most up-to-date version of RAxML-NG instead. It can be found here: https://github.com/amkozlov/raxml-ng -RAxML-NG offers improvements in speed, flexibility and user-friendliness over the previous RAxML versions. It also implements some of the features previously available in ExaML (Kozlov et al. 2015), including checkpointing and efficient load balancing for partitioned alignments (Kobert et al. 2014). +## Computing TBE support with extra information -RAxML-NG is currently under active development, and the mid-term goal is to have most functionality of RAxML 8.x covered. -You can see some of the planned features [here](https://github.com/amkozlov/raxml-ng/issues). +Here is one example call which computes TBE support scores as well as the extra table and the extra array. It uses the cutoff value 0.3 for that. It uses 10 threads: +``` +./raxml-ng --support --tree REF.nw --bs-trees BS.nw --bs-metric TBE --extra tbe_extra_table,tbe_extra_array,tbe-cutoff{0.3} --threads 10 +``` -Documentation: [github wiki](https://github.com/amkozlov/raxml-ng/wiki) +If you don't understand what this is, please read our paper at https://www.biorxiv.org/content/10.1101/734848v2 + ## Installation instructions @@ -65,77 +66,4 @@ cmake -DSTATIC_BUILD=ON -DENABLE_RAXML_SIMD=OFF -DENABLE_PLLMOD_SIMD=OFF .. make ``` -## Documentation and Support - -Documentation can be found in the [github wiki](https://github.com/amkozlov/raxml-ng/wiki). -For a quick start, please check out the [hands-on tutorial](https://github.com/amkozlov/raxml-ng/wiki/Tutorial). - -Also please check the online help with `raxml-ng -h`. - -If still in doubt, please feel free to post to the [RAxML google group](https://groups.google.com/forum/#!forum/raxml). - -## Usage examples - - 1. Perform single tree inference on DNA alignment - (random starting tree, general time-reversible model, ML estimate of substitution rates and - nucleotide frequencies, discrete GAMMA model of rate heterogeneity with 4 categories): - - `./raxml-ng --msa testDNA.fa --model GTR+G` - - 2. Perform an all-in-one analysis (ML tree search + non-parametric bootstrap) - (10 randomized parsimony starting trees, fixed empirical substitution matrix (LG), - empirical aminoacid frequencies from alignment, 8 discrete GAMMA categories, - 200 bootstrap replicates): - - `./raxml-ng --all --msa testAA.fa --model LG+G8+F --tree pars{10} --bs-trees 200` - - - 3. Optimize branch lengths and free model parameters on a fixed topology - (using multiple partitions with proportional branch lengths) - - `./raxml-ng --evaluate --msa testAA.fa --model partitions.txt --tree test.tree --brlen scaled` - - 4. Map support values from existing set of replicate trees: - - `./raxml-ng --support --tree bestML.tree --bs-trees bootstraps.tree` - -## License and citation - -The code is currently licensed under the GNU Affero General Public License version 3. - -When using RAxML-NG, please cite [this preprint](https://www.biorxiv.org/content/early/2018/10/18/447110): - -Alexey M. Kozlov, Diego Darriba, Tomáš Flouri, Benoit Morel, and Alexandros Stamatakis (2018) -**RAxML-NG: A fast, scalable, and user-friendly tool for maximum likelihood phylogenetic inference.** -*bioRxiv.* -doi:[10.1101/447110](https://doi.org/10.1101/447110) - -## The team - -* Alexey Kozlov -* Alexandros Stamatakis -* Diego Darriba -* Tomáš Flouri -* Benoit Morel - -## References - -* Stamatakis A. (2014) -**RAxML version 8: a tool for phylogenetic analysis and post-analysis of large phylogenies.** -*Bioinformatics*, 30(9): 1312-1313. -doi:[10.1093/bioinformatics/btu033](http://dx.doi.org/10.1093/bioinformatics/btu033) - -* Flouri T., Izquierdo-Carrasco F., Darriba D., Aberer AJ, Nguyen LT, Minh BQ, von Haeseler A., Stamatakis A. (2014) -**The Phylogenetic Likelihood Library.** -*Systematic Biology*, 64(2): 356-362. -doi:[10.1093/sysbio/syu084](http://dx.doi.org/10.1093/sysbio/syu084) - -* Kozlov A.M., Aberer A.J., Stamatakis A. (2015) -**ExaML version 3: a tool for phylogenomic analyses on supercomputers.** -*Bioinformatics (2015) 31 (15): 2577-2579.* -doi:[10.1093/bioinformatics/btv184](https://doi.org/10.1093/bioinformatics/btv184) - -* Kobert K., Flouri T., Aberer A., Stamatakis A. (2014) -**The divisible load balance problem and its application to phylogenetic inference.** -*Brown D., Morgenstern B., editors. (eds.) Algorithms in Bioinformatics, Vol. 8701 of Lecture Notes in Computer Science. Springer, Berlin, pp. 204–216* diff --git a/libs/pll-modules b/libs/pll-modules index 840a1952..6e812ef8 160000 --- a/libs/pll-modules +++ b/libs/pll-modules @@ -1 +1 @@ -Subproject commit 840a19525ed133ad243afb23124911b849d4685d +Subproject commit 6e812ef87e9254c02142a939516890166471e7d6 diff --git a/src/CommandLineParser.cpp b/src/CommandLineParser.cpp index 798249c0..e4a52ef6 100644 --- a/src/CommandLineParser.cpp +++ b/src/CommandLineParser.cpp @@ -6,6 +6,10 @@ #include #endif +#if defined(_OPENMP) +#include +#endif + using namespace std; static struct option long_options[] = @@ -737,6 +741,14 @@ void CommandLineParser::parse_options(int argc, char** argv, Options &opts) opts.tbe_naive = true; else if (eopt == "tbe-nature") opts.tbe_naive = false; + else if (sscanf(eopt.c_str(), "tbe-cutoff{%lf}", &opts.tbe_extra_cutoff) == 1) + void(); + else if (eopt == "tbe_extra_table") + opts.tbe_extra_table = true; + else if (eopt == "tbe_extra_array") + opts.tbe_extra_array = true; + else if (eopt == "tbe_extra_tree") + opts.tbe_extra_tree = true; else throw InvalidOptionValueException("Unknown extra option: " + string(optarg)); } @@ -839,6 +851,10 @@ void CommandLineParser::parse_options(int argc, char** argv, Options &opts) } } +#if defined(_OPENMP) + omp_set_num_threads(opts.num_threads); +#endif + if (c != -1) exit(EXIT_FAILURE); diff --git a/src/Options.cpp b/src/Options.cpp index fc7f40d9..c31e14bd 100644 --- a/src/Options.cpp +++ b/src/Options.cpp @@ -30,6 +30,9 @@ void Options::set_default_outfiles() set_default_outfile(outfile_names.support_tree, "support"); set_default_outfile(outfile_names.fbp_support_tree, "supportFBP"); set_default_outfile(outfile_names.tbe_support_tree, "supportTBE"); + set_default_outfile(outfile_names.tbe_extra_table, "tbeExtraTable"); + set_default_outfile(outfile_names.tbe_extra_array, "tbeExtraArray"); + set_default_outfile(outfile_names.tbe_extra_tree, "tbeExtraTree"); set_default_outfile(outfile_names.terrace, "terrace"); set_default_outfile(outfile_names.binary_msa, "rba"); set_default_outfile(outfile_names.bootstrap_msa, "bootstrapMSA"); diff --git a/src/Options.hpp b/src/Options.hpp index 4f4f93bc..91040935 100644 --- a/src/Options.hpp +++ b/src/Options.hpp @@ -17,6 +17,9 @@ struct OutputFileNames std::string bootstrap_trees; std::string support_tree; std::string tbe_support_tree; + std::string tbe_extra_table; + std::string tbe_extra_array; + std::string tbe_extra_tree; std::string fbp_support_tree; std::string terrace; std::string binary_msa; @@ -40,6 +43,7 @@ class Options spr_cutoff(1.0), brlen_linkage(PLLMOD_COMMON_BRLEN_SCALED), brlen_opt_method(PLLMOD_OPT_BLO_NEWTON_FAST), brlen_min(RAXML_BRLEN_MIN), brlen_max(RAXML_BRLEN_MAX), + tbe_extra_cutoff(0.3), tbe_extra_table(false), tbe_extra_array(false), tbe_extra_tree(false), num_searches(1), terrace_maxsize(100), num_bootstraps(1000), bootstop_criterion(BootstopCriterion::none), bootstop_cutoff(0.03), bootstop_interval(RAXML_BOOTSTOP_INTERVAL), bootstop_permutations(RAXML_BOOTSTOP_PERMUTES), @@ -82,6 +86,10 @@ class Options int brlen_opt_method; double brlen_min; double brlen_max; + double tbe_extra_cutoff; + bool tbe_extra_table; + bool tbe_extra_array; + bool tbe_extra_tree; unsigned int num_searches; unsigned long long terrace_maxsize; @@ -135,6 +143,9 @@ class Options std::string bootstrap_partition_file() const; const std::string rfdist_file() const { return outfile_names.rfdist; } const std::string cons_tree_file() const { return outfile_names.cons_tree + consense_type_name(); } + const std::string& tbe_extra_table_file() const { return outfile_names.tbe_extra_table; } + const std::string& tbe_extra_array_file() const { return outfile_names.tbe_extra_array; } + const std::string& tbe_extra_tree_file() const { return outfile_names.tbe_extra_tree; } const std::string asr_tree_file() const { return outfile_names.asr_tree; } const std::string asr_probs_file() const { return outfile_names.asr_probs; } diff --git a/src/ParallelContext.hpp b/src/ParallelContext.hpp index 9ee917c3..4ec6cd5f 100644 --- a/src/ParallelContext.hpp +++ b/src/ParallelContext.hpp @@ -51,6 +51,17 @@ class ParallelContext static void mpi_gather_custom(std::function prepare_send_cb, std::function process_recv_cb); + // WARNING: This function is like a private parking lot. Only the transfer bootstrap computation is allowed to use it. Wrongdoers will be punished. PLEASE REFACTOR ME. + static void pll_lock(bool b) + { + static std::mutex mtx; + if (b) { + mtx.lock(); + } else { + mtx.unlock(); + } + } + static bool master() { return proc_id() == 0; } static bool master_rank() { return _rank_id == 0; } static bool master_thread() { return _thread_id == 0; } diff --git a/src/bootstrap/BootstrapTree.cpp b/src/bootstrap/BootstrapTree.cpp index 48ae2883..6c2aae1b 100644 --- a/src/bootstrap/BootstrapTree.cpp +++ b/src/bootstrap/BootstrapTree.cpp @@ -5,7 +5,7 @@ BootstrapTree::BootstrapTree (const Tree& tree) : SupportTree(tree) { assert(num_splits() > 0); - _node_split_map.resize(num_splits()); + _split_node_map.resize(num_splits()); /* extract reference tree splits and add them into hashtable */ add_tree(pll_utree_root()); @@ -18,7 +18,7 @@ BootstrapTree::~BootstrapTree () void BootstrapTree::add_tree(const pll_unode_t& root) { bool ref_tree = (_num_bs_trees == 0); - pll_unode_t ** node_split_map = ref_tree ? _node_split_map.data() : nullptr; + pll_unode_t ** node_split_map = ref_tree ? _split_node_map.data() : nullptr; int update_only = ref_tree ? 0 : 1; doubleVector support(num_splits(), ref_tree ? 0. : 1.); diff --git a/src/bootstrap/ConsensusTree.cpp b/src/bootstrap/ConsensusTree.cpp index 3327897b..dcb62829 100644 --- a/src/bootstrap/ConsensusTree.cpp +++ b/src/bootstrap/ConsensusTree.cpp @@ -70,13 +70,13 @@ bool ConsensusTree::compute_support() pll_utree(_num_tips, *cons_tree->tree); /* map pll_unodes to splits */ - _node_split_map.resize(_pll_utree->inner_count); + _split_node_map.resize(_pll_utree->inner_count); _support.resize(_pll_utree->inner_count); for (unsigned int i = 0; i < _pll_utree->inner_count; ++i) { auto node = _pll_utree->nodes[_pll_utree->tip_count + i]; assert(node->data); - _node_split_map[i] = node; + _split_node_map[i] = node; _support[i] = ((pll_consensus_data_t *) node->data)->support; } diff --git a/src/bootstrap/SupportTree.cpp b/src/bootstrap/SupportTree.cpp index 388744ae..962fcc8a 100644 --- a/src/bootstrap/SupportTree.cpp +++ b/src/bootstrap/SupportTree.cpp @@ -121,9 +121,13 @@ void SupportTree::draw_support(bool support_in_pct) // printf("node_id %d, split_id %d\n", _node_split_map[i]->node_index, i); // printf("\n\n"); - pll_unode_t ** node_map = _node_split_map.empty() ? nullptr : _node_split_map.data(); + pll_unode_t ** node_map = _split_node_map.empty() ? nullptr : _split_node_map.data(); pllmod_utree_draw_support(_pll_utree.get(), _support.data(), node_map, support_in_pct ? support_fmt_pct : support_fmt_prop); LOG_DEBUG_TS << "Done!" << endl << endl; } + +const doubleVector& SupportTree::get_support() const { + return _support; +} diff --git a/src/bootstrap/SupportTree.hpp b/src/bootstrap/SupportTree.hpp index 0bb67435..7306898a 100644 --- a/src/bootstrap/SupportTree.hpp +++ b/src/bootstrap/SupportTree.hpp @@ -18,6 +18,8 @@ class SupportTree : public Tree void draw_support(bool support_in_pct = true); + const doubleVector& get_support() const; + protected: PllSplitSharedPtr extract_splits_from_tree(const pll_unode_t& root, pll_unode_t ** node_split_map); @@ -34,7 +36,7 @@ class SupportTree : public Tree protected: size_t _num_bs_trees; bitv_hashtable_t* _pll_splits_hash; - std::vector _node_split_map; + std::vector _split_node_map; doubleVector _support; }; diff --git a/src/bootstrap/TransferBootstrapTree.cpp b/src/bootstrap/TransferBootstrapTree.cpp index d89f5680..769d935c 100644 --- a/src/bootstrap/TransferBootstrapTree.cpp +++ b/src/bootstrap/TransferBootstrapTree.cpp @@ -1,10 +1,21 @@ #include "TransferBootstrapTree.hpp" -TransferBootstrapTree::TransferBootstrapTree(const Tree& tree, bool naive) : - SupportTree (tree), _split_info(nullptr), _naive_method(naive) +/*typedef unsigned int TBEFlags; + +const unsigned int TBE_DO_TABLE = 1; +const unsigned int TBE_DO_ARRAY = 2; +const unsigned int TBE_DO_OTHER = 4; + +TransferBootstrapTree bstree(tree, true, 1., TBE_DO_TABLE | TBE_DO_OTHER); + +TransferBootstrapTree bstree(tree, true, 1., false, true, false); +*/ + +TransferBootstrapTree::TransferBootstrapTree(const Tree& tree, bool naive, double tbe_cutoff, bool doTable, bool doArray, bool doTree) : + SupportTree (tree), _split_info(nullptr), _extra_info(nullptr), _naive_method(naive) { assert(num_splits() > 0); - _node_split_map.resize(num_splits()); + _split_node_map.resize(num_splits()); /* extract reference tree splits and add them into hashtable */ add_tree(pll_utree_root()); @@ -12,14 +23,38 @@ TransferBootstrapTree::TransferBootstrapTree(const Tree& tree, bool naive) : if (!_naive_method) { _split_info = pllmod_utree_tbe_nature_init((pll_unode_t*) &pll_utree_root(), _num_tips, - (const pll_unode_t**) _node_split_map.data()); + (const pll_unode_t**) _split_node_map.data()); + if (doTable || doArray || doTree) { + _extra_info = pllmod_tbe_extra_info_create(num_splits(), _num_tips, tbe_cutoff, doTable, doArray, doTree); + } } } +const std::vector TransferBootstrapTree::get_split_node_map() const { + return _split_node_map; +} + +pllmod_tbe_extra_info_t* TransferBootstrapTree::get_extra_info() const { + return _extra_info; +} + +pllmod_tbe_split_info_t* TransferBootstrapTree::get_split_info() const { + return _split_info; +} + +/* +void TransferBootstrapTree::collect_support() { + SupportTree::collect_support(); + // do the postprocessing of extra info +} +*/ + TransferBootstrapTree::~TransferBootstrapTree() { if (_split_info) free(_split_info); + if (_extra_info) + pllmod_tbe_extra_info_destroy(_extra_info, num_splits()); } void TransferBootstrapTree::add_tree(const pll_unode_t& root) @@ -29,7 +64,7 @@ void TransferBootstrapTree::add_tree(const pll_unode_t& root) if (ref_tree) { - _ref_splits = extract_splits_from_tree(root, _node_split_map.data()); + _ref_splits = extract_splits_from_tree(root, _split_node_map.data()); add_splits_to_hashtable(_ref_splits, support, 0); } @@ -44,8 +79,8 @@ void TransferBootstrapTree::add_tree(const pll_unode_t& root) pllmod_utree_tbe_naive(_ref_splits.get(), splits.get(), _num_tips, support.data()); else { - pllmod_utree_tbe_nature(_ref_splits.get(), splits.get(), (pll_unode_t*) &root, - _num_tips, support.data(), _split_info); + pllmod_utree_tbe_nature_extra(_ref_splits.get(), splits.get(), (pll_unode_t*) &root, + _num_tips, support.data(), _split_info, _extra_info); } add_splits_to_hashtable(_ref_splits, support, 1); diff --git a/src/bootstrap/TransferBootstrapTree.hpp b/src/bootstrap/TransferBootstrapTree.hpp index fa44aefe..eeba2c08 100644 --- a/src/bootstrap/TransferBootstrapTree.hpp +++ b/src/bootstrap/TransferBootstrapTree.hpp @@ -6,9 +6,14 @@ class TransferBootstrapTree : public SupportTree { public: - TransferBootstrapTree(const Tree& tree, bool naive = false); + TransferBootstrapTree(const Tree& tree, bool naive = false, double tbe_cutoff = 0.3, bool doTable = false, bool doArray = false, bool doTree = false); virtual ~TransferBootstrapTree(); + void postprocess_extra(); //... + pllmod_tbe_extra_info_t* get_extra_info() const; + const std::vector get_split_node_map() const; + pllmod_tbe_split_info_t* get_split_info() const; + protected: virtual void add_tree(const pll_unode_t& root); @@ -17,6 +22,7 @@ class TransferBootstrapTree : public SupportTree private: pllmod_tbe_split_info_t * _split_info; + pllmod_tbe_extra_info_t * _extra_info; bool _naive_method; }; diff --git a/src/io/TBEExtraArrayStream.cpp b/src/io/TBEExtraArrayStream.cpp new file mode 100644 index 00000000..544918f3 --- /dev/null +++ b/src/io/TBEExtraArrayStream.cpp @@ -0,0 +1,17 @@ +#include "file_io.hpp" + +using namespace std; + + +TBEExtraArrayStream& operator<<(TBEExtraArrayStream& stream, const TransferBootstrapTree& tree) +{ + pllmod_tbe_extra_info_t* extra_info = tree.get_extra_info(); + stream << "Taxon\ttIndex\n"; + auto tip_labels_list = tree.tip_labels_list(); + for (size_t i = 0; i < tree.num_tips(); ++i) { + stream << tip_labels_list[i] << "\t" << (double) (extra_info->extra_taxa_array[i] * 100) / (double) extra_info->num_bs_trees << "\n"; + } + return stream; +} + + diff --git a/src/io/TBEExtraTableStream.cpp b/src/io/TBEExtraTableStream.cpp new file mode 100644 index 00000000..8821e1a4 --- /dev/null +++ b/src/io/TBEExtraTableStream.cpp @@ -0,0 +1,51 @@ +#include "file_io.hpp" + +using namespace std; + + +TBEExtraTableStream& operator<<(TBEExtraTableStream& stream, const TransferBootstrapTree& tree) +{ + pllmod_tbe_extra_info_t* extra_info = tree.get_extra_info(); + if (!extra_info) { + throw std::runtime_error("Extra info is null"); + } + if (!extra_info->extra_taxa_table) { + throw std::runtime_error("Extra taxa table is null"); + } + double divideBy = extra_info->num_bs_trees * tree.num_splits(); + stream << "Edge\tSupport\t"; + auto tip_labels_list = tree.tip_labels_list(); + for (size_t i = 0; i < tree.num_tips(); ++i) { + stream << tip_labels_list[i]; + if (i < tree.num_tips() - 1) { + stream << "\t"; + } else { + stream << "\n"; + } + } + + auto bufsize = tree.num_tips() * 10; + char * buf = new char[bufsize]; + auto support = tree.get_support(); + for (size_t i = 0; i < tree.num_splits(); ++i) { + stream << tree.get_split_node_map()[i]->clv_index << "\t" << fixed << std::setprecision(6) << support[i]; + + char * bufptr = buf; + for (size_t j = 0; j < tree.num_tips(); ++j) + { + if (extra_info->extra_taxa_table[i][j] == 0) + { + bufptr += sprintf(bufptr, "\t0.000000"); + } + else + { + double d = (double) (extra_info->extra_taxa_table[i][j]) / divideBy; + bufptr += sprintf(bufptr, "\t%f", d); + } + } + *bufptr = 0; + stream << buf << "\n"; +} + delete[] buf; + return stream; +} diff --git a/src/io/TBEExtraTreeStream.cpp b/src/io/TBEExtraTreeStream.cpp new file mode 100644 index 00000000..b8bd917f --- /dev/null +++ b/src/io/TBEExtraTreeStream.cpp @@ -0,0 +1,168 @@ +#include "file_io.hpp" + +using namespace std; + +char * newick_utree_recurse(const pll_unode_t * root, int level, + pllmod_tbe_extra_info_t* extra_info, + pllmod_tbe_split_info_t* split_info, + const std::vector& node_split_map + ) +{ + char * newick; + int size_alloced = 0; + assert(root != NULL); + if (!root->next) // leaf node + { + size_alloced = asprintf(&newick, "%s:%f", root->label, root->length); + } + else + { + const pll_unode_t * start = root->next; + const pll_unode_t * snode = start; + char * cur_newick; + do + { + char * subtree = newick_utree_recurse(snode->back, level+1, extra_info, split_info, node_split_map); + if (subtree == NULL) + { + pll_errno = PLL_ERROR_MEM_ALLOC; + snprintf(pll_errmsg, 200, "Unable to allocate enough memory."); + return NULL; + } + + if (snode == start) + { + cur_newick = subtree; + } + else + { + char * temp = cur_newick; + size_alloced = asprintf(&cur_newick, + "%s,%s", + temp, + subtree); + free(temp); + free(subtree); + } + snode = snode->next; + } + while(snode != root); + + if (level > 0) + { + unsigned int id = node_split_map[root->clv_index]; + double avg_dist = extra_info->extra_avg_dist_array[id] / (double) extra_info->num_bs_trees; + unsigned int depth = split_info[id].p; + + size_alloced = asprintf(&newick, + "(%s)%d|%f|%d:%f", + cur_newick, + id, + avg_dist, + depth, + root->length); + free(cur_newick); + } + else + newick = cur_newick; + } + + if (size_alloced < 0) + { + pll_errno = PLL_ERROR_MEM_ALLOC; + snprintf(pll_errmsg, 200, "memory allocation during newick export failed"); + return NULL; + } + + return newick; +} + +char * utree_export_newick(const pll_unode_t * root, + int export_rooted, + double root_brlen, + pllmod_tbe_extra_info_t* extra_info, + pllmod_tbe_split_info_t* split_info, + const std::vector& node_split_map + ) +{ + char * newick; + char * subtree1; + char * subtree2; + int size_alloced; + + if (!root) return NULL; + + if (!root->next) root = root->back; + + unsigned int id = node_split_map[root->clv_index]; + double avg_dist = extra_info->extra_avg_dist_array[id] / (double) extra_info->num_bs_trees; + unsigned int depth = split_info[id].p; + + if (export_rooted) + { + subtree1 = newick_utree_recurse(root->back, 1, extra_info, split_info, node_split_map); + subtree2 = newick_utree_recurse(root, 0, extra_info, split_info, node_split_map); + + size_alloced = asprintf(&newick, + "(%s,(%s)%d|%f|%d:%f):0.0;", + subtree1, + subtree2, + id, + avg_dist, + depth, + root_brlen); + } + else + { + subtree1 = newick_utree_recurse(root->back, 1, extra_info, split_info, node_split_map); + subtree2 = newick_utree_recurse(root, 0, extra_info, split_info, node_split_map); + + size_alloced = asprintf(&newick, + "(%s,%s)%d|%f|%d:0.0;", + subtree1, + subtree2, + id, + avg_dist, + depth); + } + + free(subtree1); + free(subtree2); + + if (size_alloced < 0) + { + pll_errno = PLL_ERROR_MEM_ALLOC; + snprintf(pll_errmsg, 200, "memory allocation during newick export failed"); + return NULL; + } + +// printf("newick: %s\n", newick); + + return (newick); +} + +TBEExtraTreeStream& operator<<(TBEExtraTreeStream& stream, const TransferBootstrapTree& tree) +{ + pllmod_tbe_extra_info_t* extra_info = tree.get_extra_info(); + const std::vector split_node_map = tree.get_split_node_map(); + pllmod_tbe_split_info_t* split_info = tree.get_split_info(); + std::vector node_split_map(tree.num_nodes()); + for (size_t i = 0; i < split_node_map.size(); ++i) { + node_split_map[split_node_map[i]->clv_index] = i; + } + + char * newick_str = utree_export_newick(&tree.pll_utree_root(), 0, 0, extra_info, split_info, node_split_map); + if (newick_str) + { + stream << newick_str << std::endl; + free(newick_str); + } + else + { + assert(pll_errno); + libpll_check_error("Failed to generate Newick"); + } + return stream; +} + + diff --git a/src/io/file_io.hpp b/src/io/file_io.hpp index 5e26187b..19e92a7d 100644 --- a/src/io/file_io.hpp +++ b/src/io/file_io.hpp @@ -6,6 +6,7 @@ #include "../Tree.hpp" #include "../AncestralStates.hpp" #include "../bootstrap/BootstrapTree.hpp" +#include "../bootstrap/TransferBootstrapTree.hpp" #include "../bootstrap/BootstrapGenerator.hpp" #include "../PartitionedMSAView.hpp" @@ -23,6 +24,30 @@ class NewickStream : public std::fstream bool _brlens; }; +class TBEExtraTableStream : public std::fstream +{ +public: + TBEExtraTableStream(const std::string& fname) : std::fstream(fname, std::ios::out) {}; + TBEExtraTableStream(const std::string& fname, std::ios_base::openmode mode) : + std::fstream(fname, mode) {}; +}; + +class TBEExtraArrayStream : public std::fstream +{ +public: + TBEExtraArrayStream(const std::string& fname) : std::fstream(fname, std::ios::out) {}; + TBEExtraArrayStream(const std::string& fname, std::ios_base::openmode mode) : + std::fstream(fname, mode) {}; +}; + +class TBEExtraTreeStream : public std::fstream +{ +public: + TBEExtraTreeStream(const std::string& fname) : std::fstream(fname, std::ios::out) {}; + TBEExtraTreeStream(const std::string& fname, std::ios_base::openmode mode) : + std::fstream(fname, mode) {}; +}; + class MSAFileStream { public: @@ -162,6 +187,10 @@ RaxmlPartitionStream& operator<<(RaxmlPartitionStream& stream, const Partitioned AncestralProbStream& operator<<(AncestralProbStream& stream, const AncestralStates& ancestral); AncestralStateStream& operator<<(AncestralStateStream& stream, const AncestralStates& ancestral); +TBEExtraTableStream& operator<<(TBEExtraTableStream& stream, const TransferBootstrapTree& tree); +TBEExtraArrayStream& operator<<(TBEExtraArrayStream& stream, const TransferBootstrapTree& tree); +TBEExtraTreeStream& operator<<(TBEExtraTreeStream& stream, const TransferBootstrapTree& tree); + std::string to_newick_string_rooted(const Tree& tree, double root_brlen = 0.0); #endif /* RAXML_FILE_IO_HPP_ */ diff --git a/src/main.cpp b/src/main.cpp index b0cc222c..7094c607 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1350,7 +1350,7 @@ void draw_bootstrap_support(RaxmlInstance& instance, Tree& ref_tree, const TreeC } else if (metric == BranchSupportMetric::tbe) { - sup_tree = make_shared(ref_tree, instance.opts.tbe_naive); + sup_tree = make_shared(ref_tree, instance.opts.tbe_naive, instance.opts.tbe_extra_cutoff, instance.opts.tbe_extra_table, instance.opts.tbe_extra_array, instance.opts.tbe_extra_tree); support_in_pct = false; } else @@ -1364,6 +1364,7 @@ void draw_bootstrap_support(RaxmlInstance& instance, Tree& ref_tree, const TreeC } sup_tree->draw_support(support_in_pct); + instance.support_trees[metric] = sup_tree; } } @@ -1804,6 +1805,34 @@ void print_final_output(const RaxmlInstance& instance, const Checkpoint& checkp) LOG_INFO << "Best ML tree with " << metric_name << " support values saved to: " << sysutil_realpath(sup_file) << endl; + + if (it.first == BranchSupportMetric::tbe) + { + if (opts.tbe_extra_array) { + auto extra_array_file = opts.tbe_extra_array_file(); + TBEExtraArrayStream tbeExtraArray(extra_array_file); + tbeExtraArray << *dynamic_cast(it.second.get()); + + LOG_INFO << "TBE extra array saved to: " << + sysutil_realpath(extra_array_file) << endl; + } + if (opts.tbe_extra_table) { + auto extra_table_file = opts.tbe_extra_table_file(); + TBEExtraTableStream tbeExtraTable(extra_table_file); + tbeExtraTable << *dynamic_cast(it.second.get()); + + LOG_INFO << "TBE extra table saved to: " << + sysutil_realpath(extra_table_file) << endl; + } + if (opts.tbe_extra_tree) { + auto extra_tree_file = opts.tbe_extra_tree_file(); + TBEExtraTreeStream tbeExtraTree(extra_tree_file); + tbeExtraTree << *dynamic_cast(it.second.get()); + + LOG_INFO << "TBE extra tree saved to: " << + sysutil_realpath(extra_tree_file) << endl; + } + } } } }