From 5005eb034c1bffc42ae4eaab777d78944e706107 Mon Sep 17 00:00:00 2001 From: computations Date: Sat, 18 Jul 2020 00:42:25 +0200 Subject: [PATCH] Implements an optimization when using partitions --- src/model.cpp | 32 +++++++++++++++++++++----------- src/model.hpp | 11 +++++------ src/tree.hpp | 8 +++++++- test/src/model.cpp | 8 +++++--- 4 files changed, 38 insertions(+), 21 deletions(-) diff --git a/src/model.cpp b/src/model.cpp index 03a8947..9ec8b2e 100644 --- a/src/model.cpp +++ b/src/model.cpp @@ -347,17 +347,21 @@ void model_t::set_freqs_all_free(size_t p_index, model_params_t freqs) { set_freqs(p_index, freqs); } -void model_t::update_pmatrices(const std::vector &pmatrix_indices, - const std::vector &branch_lengths) { +std::vector +model_t::update_pmatrices(const std::vector &pmatrix_indices, + const std::vector &branch_lengths) { /* Update the eigen decompositions first */ + std::vector updated(_partitions.size(), false); for (size_t part_index = 0; part_index < _partitions.size(); ++part_index) { auto part = _partitions[part_index]; for (size_t i = 0; i < part->rate_cats; ++i) { if (!part->eigen_decomp_valid[i]) { pll_update_eigen(part, _param_indicies[part_index][i]); + updated[part_index] = true; } } } + for (size_t part_index = 0; part_index < _partitions.size(); ++part_index) { auto part = _partitions[part_index]; #pragma omp parallel for collapse(1) schedule(static) @@ -369,17 +373,19 @@ void model_t::update_pmatrices(const std::vector &pmatrix_indices, &branch_length, 1); } } + return updated; } double model_t::compute_lh(const root_location_t &root_location) { std::vector ops; std::vector pmatrix_indices; std::vector branch_lengths; + bool new_root = root_location != _tree.root_location(); GENERATE_AND_UNPACK_OPS(_tree, root_location, ops, pmatrix_indices, branch_lengths); - update_pmatrices(pmatrix_indices, branch_lengths); + auto updated_partitions = update_pmatrices(pmatrix_indices, branch_lengths); double lh = 0.0; @@ -387,8 +393,10 @@ double model_t::compute_lh(const root_location_t &root_location) { for (size_t i = 0; i < _partitions.size(); ++i) { auto &partition = _partitions[i]; - pll_update_partials(partition, ops.data(), - static_cast(ops.size())); + if (new_root || updated_partitions[i]) { + pll_update_partials(partition, ops.data(), + static_cast(ops.size())); + } lh += pll_compute_root_loglikelihood(partition, _tree.root_clv_index(), _tree.root_scaler_index(), @@ -410,14 +418,14 @@ double model_t::compute_lh_root(const root_location_t &root) { branch_lengths = std::move(std::get<2>(result)); } - unsigned int params[] = {0, 0, 0, 0}; double lh = 0.0; #pragma omp parallel for reduction(+ : lh) for (size_t i = 0; i < _partitions.size(); ++i) { auto &partition = _partitions[i]; int result = pll_update_prob_matrices( - partition, params, pmatrix_indices.data(), branch_lengths.data(), + partition, _param_indicies[i].data(), pmatrix_indices.data(), + branch_lengths.data(), static_cast(pmatrix_indices.size())); if (result == PLL_FAILURE) { @@ -425,8 +433,8 @@ double model_t::compute_lh_root(const root_location_t &root) { } pll_update_partials(partition, &op, 1); lh += pll_compute_root_loglikelihood(partition, _tree.root_clv_index(), - _tree.root_scaler_index(), params, - nullptr) * + _tree.root_scaler_index(), + _param_indicies[i].data(), nullptr) * _partition_weights[i]; } if (std::isnan(lh)) { @@ -1312,8 +1320,7 @@ bfgs_params(model_params_t &initial_params, size_t partition_index, gradient.data(), &factor, &pgtol, wa.data(), iwa.data(), &task, &iprint, &csave, lsave, isave, dsave); - debug_print(EMIT_LEVEL_MPROGRESS, "BFGS Iter: %lu Score: %.5f", iters, - -score); + debug_print(EMIT_LEVEL_INFO, "BFGS Iter: %lu Score: %.5f", iters, -score); set_func(partition_index, parameters); score = compute_lh(); @@ -1663,6 +1670,9 @@ void model_t::optimize_params(std::vector ¶ms, const root_location_t &rl, double pgtol, double factor, bool optimize_gamma) { for (size_t i = 0; i < _partitions.size(); ++i) { + debug_print(EMIT_LEVEL_MPROGRESS, + "Optimizing parameters for partition: %lu/%lu", i + 1, + _partitions.size()); set_subst_rates(i, params[i].subst_rates); set_freqs_all_free(i, params[i].freqs); set_gamma_rates(i, params[i].gamma_alpha); diff --git a/src/model.hpp b/src/model.hpp index 0bcdad5..553f9cf 100644 --- a/src/model.hpp +++ b/src/model.hpp @@ -97,9 +97,7 @@ class model_t { checkpoint_t &); void assign_indicies_by_rank_exhaustive(size_t, size_t, checkpoint_t &); - std::vector assigned_indicies() const{ - return _assigned_idx; - } + std::vector assigned_indicies() const { return _assigned_idx; } private: std::pair @@ -125,10 +123,11 @@ class model_t { void set_empirical_freqs(size_t); void set_empirical_freqs(); void set_freqs_all_free(size_t, model_params_t); - void set_model_params(const std::vector&); + void set_model_params(const std::vector &); void move_root(const root_location_t &new_root); - void update_pmatrices(const std::vector &pmatrix_indices, - const std::vector &branch_lengths); + std::vector + update_pmatrices(const std::vector &pmatrix_indices, + const std::vector &branch_lengths); double bfgs_rates(model_params_t &initial_rates, const root_location_t &rl, size_t partition_index, double pgtol, double factor); double bfgs_freqs(model_params_t &initial_rates, const root_location_t &rl, diff --git a/src/tree.hpp b/src/tree.hpp index 7c0104c..e5fb7bf 100644 --- a/src/tree.hpp +++ b/src/tree.hpp @@ -32,6 +32,12 @@ struct root_location_t { std::string label() const { return edge->label != nullptr ? edge->label : "(null)"; } + bool operator==(const root_location_t &other) const { + return edge == other.edge && brlen_ratio == other.brlen_ratio; + } + bool operator!=(const root_location_t &other) const { + return !(*this == other); + } }; pll_utree_t *parse_tree_file(const std::string &tree_filename); @@ -120,7 +126,7 @@ class rooted_tree_t { private: void sort_root_locations(); void generate_root_locations(); - void copy_root_locations(const rooted_tree_t&); + void copy_root_locations(const rooted_tree_t &); void add_root_space(); std::vector full_traverse() const; std::vector edge_traverse() const; diff --git a/test/src/model.cpp b/test/src/model.cpp index 0230abf..26259b0 100644 --- a/test/src/model.cpp +++ b/test/src/model.cpp @@ -298,9 +298,11 @@ TEST_CASE("model_t liklihood computation", "[model_t]") { continue; } auto rl = tree.root_location(i); - model.compute_lh(rl); - CHECK(std::fabs(model.compute_lh(rl) - model.compute_lh_root(rl)) == - Approx(0.0)); + double lh1 = model.compute_lh(rl); + double lh2 = model.compute_lh(rl); + double lh3 = model.compute_lh_root(rl); + CHECK(std::fabs(lh1 - lh2) == Approx(0.0)); + CHECK(std::fabs(lh3 - lh2) == Approx(0.0)); } }