From 16a2e8d83e6482704fe22f25f93597a41e924544 Mon Sep 17 00:00:00 2001 From: computations Date: Thu, 9 Jul 2020 18:00:18 +0200 Subject: [PATCH] Adds a bunch of tests --- src/checkpoint.cpp | 11 +++- src/checkpoint.hpp | 8 ++- src/model.cpp | 11 ++-- src/model.hpp | 4 ++ src/util.hpp | 40 +++++++------- test/src/CMakeLists.txt | 3 ++ test/src/checkpoint.cpp | 115 ++++++++++++++++++++++++++++++++++++++++ test/src/model.cpp | 80 +++++++++++++++++++++------- test/src/test_util.cpp | 17 ++++++ test/src/test_util.hpp | 12 +++++ test/src/util.cpp | 21 ++++++++ 11 files changed, 275 insertions(+), 47 deletions(-) create mode 100644 test/src/checkpoint.cpp create mode 100644 test/src/test_util.cpp create mode 100644 test/src/test_util.hpp create mode 100644 test/src/util.cpp diff --git a/src/checkpoint.cpp b/src/checkpoint.cpp index 3dac194..74680c9 100644 --- a/src/checkpoint.cpp +++ b/src/checkpoint.cpp @@ -173,14 +173,22 @@ void checkpoint_t::clean() { void checkpoint_t::write(const rd_result_t &result) { debug_print(EMIT_LEVEL_MPI_DEBUG, "Writing result with root id: %lu", result.root_id); - auto lock = write_lock(); write_with_checksum(_file_descriptor, result); } + void checkpoint_t::write( const std::vector ¶meters) { write_with_checksum(_file_descriptor, parameters); } +void checkpoint_t::write( + const rd_result_t &result, + const std::vector ¶meters) { + auto lock = write_lock(); + write(result); + write(parameters); +} + void checkpoint_t::save_options(const cli_options_t &options) { if (!_existing_results) { write_with_success(_file_descriptor, options); @@ -193,6 +201,7 @@ void checkpoint_t::load_options(cli_options_t &options) { "Loading options from the checkpoint file"); int read_fd = open(_checkpoint_filename.c_str(), O_RDONLY); read_with_success(read_fd, options); + close(read_fd); } } diff --git a/src/checkpoint.hpp b/src/checkpoint.hpp index 423152d..4967e13 100644 --- a/src/checkpoint.hpp +++ b/src/checkpoint.hpp @@ -277,8 +277,7 @@ class checkpoint_t { auto lock = write_lock(); write_with_success(_file_descriptor, val); } - void write(const rd_result_t &); - void write(const std::vector &); + void write(const rd_result_t &, const std::vector &); void save_options(const cli_options_t &); void load_options(cli_options_t &); void reload(); @@ -291,12 +290,17 @@ class checkpoint_t { std::vector completed_indicies(); + std::string get_filename() const { return _checkpoint_filename; } + private: template fcntl_lock_t write_lock() { return fcntl_lock_t(_file_descriptor, F_WRLCK); } + void write(const rd_result_t &); + void write(const std::vector &); + std::string _checkpoint_filename; int _file_descriptor; bool _existing_results; diff --git a/src/model.cpp b/src/model.cpp index f771f61..03a8947 100644 --- a/src/model.cpp +++ b/src/model.cpp @@ -979,8 +979,8 @@ model_t::search(size_t min_roots, double root_ratio, double atol, double pgtol, rl = cur_best_rl; } - checkpoint.write({cur_best_rl.id, cur_best_lh, cur_best_rl.brlen_ratio}); - checkpoint.write(params); + checkpoint.write({cur_best_rl.id, cur_best_lh, cur_best_rl.brlen_ratio}, + params); debug_print(EMIT_LEVEL_DEBUG, "finished optimize_all root, cur_best_lh: %f", cur_best_lh); @@ -1078,8 +1078,8 @@ model_t::exhaustive_search(double atol, double pgtol, double brtol, rl = cur_rl; } - checkpoint.write({cur_best_rl.id, cur_best_lh, cur_best_rl.brlen_ratio}); - checkpoint.write(params); + checkpoint.write({cur_best_rl.id, cur_best_lh, cur_best_rl.brlen_ratio}, + params); root_index++; debug_print(EMIT_LEVEL_PROGRESS, "Step %lu / %lu, ETC: %0.2fh", root_index, @@ -1666,9 +1666,6 @@ void model_t::optimize_params(std::vector ¶ms, set_subst_rates(i, params[i].subst_rates); set_freqs_all_free(i, params[i].freqs); set_gamma_rates(i, params[i].gamma_alpha); - if (_rate_category_types[i] == rate_category::FREE) { - set_gamma_weights(i, params[i].gamma_weights); - } if (_rate_category_types[i] == rate_category::FREE) { set_gamma_weights(i, params[i].gamma_weights); diff --git a/src/model.hpp b/src/model.hpp index 19ed10e..0bcdad5 100644 --- a/src/model.hpp +++ b/src/model.hpp @@ -97,6 +97,10 @@ class model_t { checkpoint_t &); void assign_indicies_by_rank_exhaustive(size_t, size_t, checkpoint_t &); + std::vector assigned_indicies() const{ + return _assigned_idx; + } + private: std::pair bisect(const root_location_t &beg, dlh_t d_beg, const root_location_t &end, diff --git a/src/util.hpp b/src/util.hpp index b5c4e7f..6742eac 100644 --- a/src/util.hpp +++ b/src/util.hpp @@ -89,25 +89,6 @@ struct partition_parameters_t { model_params_t freqs; model_params_t gamma_alpha; model_params_t gamma_weights; - -#if 0 - partition_parameters_t &operator=(const partition_parameters_t &other) { - /* seems dumb, but I benchmarked this to be faster */ - for (size_t i = 0; i < subst_rates.size(); ++i) { - subst_rates[i] = other.subst_rates[i]; - } - for (size_t i = 0; i < freqs.size(); ++i) { - freqs[i] = other.freqs[i]; - } - for (size_t i = 0; i < gamma_alpha.size(); ++i) { - gamma_alpha[i] = other.gamma_alpha[i]; - } - for (size_t i = 0; i < gamma_weights.size(); ++i) { - gamma_weights[i] = other.gamma_weights[i]; - } - return *this; - } -#endif }; struct rd_result_t { @@ -176,6 +157,27 @@ struct cli_options_t { bool echo = false; bool invariant_sites = false; initialized_flag_t early_stop; + + bool operator==(const cli_options_t &other) const { + return msa_filename == other.msa_filename && + tree_filename == other.tree_filename && prefix == other.prefix && + model_filename == other.model_filename && + freqs_filename == other.freqs_filename && + partition_filename == other.partition_filename && + data_type == other.data_type && model_string == other.model_string && + rate_cats == other.rate_cats && + rate_category_types == other.rate_category_types && + seed == other.seed && threads == other.threads && + root_ratio == other.root_ratio && + abs_tolerance == other.abs_tolerance && factor == other.factor && + br_tolerance == other.br_tolerance && bfgs_tol == other.bfgs_tol && + states == other.states && exhaustive == other.exhaustive && + echo == other.echo && invariant_sites == other.invariant_sites && + early_stop == other.early_stop; + } + bool operator!=(const cli_options_t &other) const { + return !(*this == other); + } }; #endif diff --git a/test/src/CMakeLists.txt b/test/src/CMakeLists.txt index 644c980..57fa5a6 100644 --- a/test/src/CMakeLists.txt +++ b/test/src/CMakeLists.txt @@ -5,6 +5,9 @@ add_executable(rd_test msa.cpp model.cpp tree.cpp + checkpoint.cpp + util.cpp + test_util.cpp ${RD_SOURCES} ) diff --git a/test/src/checkpoint.cpp b/test/src/checkpoint.cpp new file mode 100644 index 0000000..0a7251b --- /dev/null +++ b/test/src/checkpoint.cpp @@ -0,0 +1,115 @@ +#include "data.hpp" +#include "test_util.hpp" +#include +#include +#include +#include +#include +#include +#include + +std::string make_checkpoint_filename() { + std::random_device rd; + uint64_t nonce = + (static_cast(rd()) << 32) | static_cast(rd()); + return std::string("/tmp/checkpoint_test_") + base_58_encode(nonce); +} + +checkpoint_t make_and_init_checkpoint() { + std::string checkpoint_filename = make_checkpoint_filename(); + checkpoint_t ckp(checkpoint_filename); + cli_options_t cli_options; + ckp.save_options(cli_options); + return ckp; +} + +TEST_CASE("checkpoint_t constructor", "[checkpoint_t]") { + std::string checkpoint_filename = make_checkpoint_filename(); + checkpoint_t ckp(checkpoint_filename); + REQUIRE(access(ckp.get_filename().c_str(), F_OK) != -1); +} + +TEST_CASE("checkpoint_t multiple checkpoints", "[checkpoint_t]") { + std::string checkpoint_filename = make_checkpoint_filename(); + checkpoint_t ckp1(checkpoint_filename); + REQUIRE(access(ckp1.get_filename().c_str(), F_OK) != -1); + checkpoint_t ckp2(checkpoint_filename); + CHECK(ckp2.existing_checkpoint()); +} + +TEST_CASE("checkpoint_t writing and reading cli_options") { + std::string checkpoint_filename = make_checkpoint_filename(); + checkpoint_t ckp1(checkpoint_filename); + REQUIRE(access(ckp1.get_filename().c_str(), F_OK) != -1); + SECTION("default options") { + cli_options_t cli_options; + ckp1.save_options(cli_options); + + checkpoint_t ckp2(checkpoint_filename); + cli_options_t written_options; + ckp2.load_options(written_options); + CHECK(written_options == cli_options); + } + SECTION("non-default options") { + cli_options_t cli_options; + cli_options.msa_filename = "red roses really like to smell good"; + cli_options.rate_cats = {1, 1, 3}; + ckp1.save_options(cli_options); + + checkpoint_t ckp2(checkpoint_filename); + cli_options_t written_options; + ckp2.load_options(written_options); + CHECK(written_options == cli_options); + } + SECTION("changed options options") { + cli_options_t cli_options; + cli_options.msa_filename = "red roses really like to smell good"; + cli_options.rate_cats = {1, 1, 3}; + ckp1.save_options(cli_options); + + cli_options.msa_filename = "this is not the original string"; + + checkpoint_t ckp2(checkpoint_filename); + cli_options_t written_options; + ckp2.load_options(written_options); + CHECK(written_options != cli_options); + } +} + +TEST_CASE("checkpoint_t writing and reading results", "[checkpoint_t]") { + checkpoint_t ckp = make_and_init_checkpoint(); + SECTION("one result") { + ckp.write(rd_result_t{}, std::vector{}); + auto results = ckp.read_results(); + CHECK(results.size() == 1); + } + SECTION("many results") { + for (size_t i = 0; i < 1000; ++i) { + ckp.write(rd_result_t{}, std::vector{}); + } + auto results = ckp.read_results(); + CHECK(results.size() == 1000); + } +} + +TEST_CASE("checkpoint_t checking indicies", "[checkpoint_t]") { + checkpoint_t ckp = make_and_init_checkpoint(); + SECTION("one index") { + ckp.write(rd_result_t{0, 0.0, 0.0}, std::vector{}); + auto idx = ckp.completed_indicies(); + REQUIRE(idx.size() == 1); + CHECK(idx[0] == 0); + } + SECTION("generator section") { + auto total_idx = GENERATE(1lu, 2lu, 4lu, 5lu, 6lu, 7lu, 8lu, 9lu, 10lu); + for (size_t i = 0; i < total_idx; ++i) { + ckp.write(rd_result_t{i, 0.0, 0.0}, + std::vector{}); + } + auto load_idx = ckp.completed_indicies(); + CHECK(load_idx.size() == total_idx); + for (size_t i = 0; i < total_idx; ++i) { + CHECK(std::find(load_idx.begin(), load_idx.end(), i) != load_idx.end()); + } + } +} diff --git a/test/src/model.cpp b/test/src/model.cpp index 298f34c..0230abf 100644 --- a/test/src/model.cpp +++ b/test/src/model.cpp @@ -1,9 +1,12 @@ #include "data.hpp" +#include "test_util.hpp" +#include #include #include #include #include #include +#include model_params_t params[] = { {1, 2.5, 1, 1, 1, 2.5, 2.5, 1, 1, 1, 2.5, 1}, @@ -16,24 +19,6 @@ model_params_t freqs[] = { {.25, .25, .25, .25}, }; -constexpr char base_58_chars[] = - "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"; - -inline size_t compute_digit_with_base(size_t i, size_t n, size_t base) { - return (n % static_cast(std::pow(base, i + 1))) / std::pow(base, i); -} - -std::string base_58_encode(uint32_t n) { - size_t alphabet_size = sizeof(base_58_chars); - size_t len = std::ceil(std::log(n) / std::log(alphabet_size)); - std::string enc; - enc.resize(len); - for (size_t i = 0; i < len; ++i) { - enc[i] = base_58_chars[compute_digit_with_base(i, n, alphabet_size)]; - } - return enc; -} - checkpoint_t make_dummy_checkpoint(const std::string &dataset_name) { std::random_device rd; uint64_t nonce = @@ -478,3 +463,62 @@ TEST_CASE("model_t test no invariant sites", "[model_t]") { CHECK(model.compute_lh(final_rl) == Approx(final_lh)); } } + +TEST_CASE("assign indicies test", "[model_t]") { + auto ckp = make_dummy_checkpoint("10.fasta"); + // Since 10.fasta has 10 taxa, that makes it have 2n-3 == 17 possible + // rootings. So, we want to test that if we write a number of "dummy" results + // to the file, we get the right result. + auto dummy_results_count = GENERATE(0lu, 1lu, 2lu, 4lu, 8lu); + std::random_device rd; + std::mt19937 gen(rd()); + + std::vector possible_idx(17); + std::iota(possible_idx.begin(), possible_idx.end(), 0); + std::shuffle(possible_idx.begin(), possible_idx.end(), gen); + for (size_t i = 0; i < dummy_results_count; ++i) { + ckp.write(rd_result_t{possible_idx[i], 0.0, 0.0}, + std::vector{}); + } + + auto ds = data_files_dna["10.fasta"]; + std::vector msa; + msa.emplace_back(ds.first); + uint64_t seed = std::rand(); + rooted_tree_t tree{ds.second}; + model_t model{tree, msa, {1}, false, seed, false}; + model.initialize_partitions_uniform_freqs(msa); + + SECTION("search") { + auto root_assignment = GENERATE(1lu, 2lu, 3lu, 4lu, 5lu); + int expected_size = std::max(static_cast(root_assignment) - + static_cast(dummy_results_count), + 0); + if (static_cast(root_assignment) - + static_cast(dummy_results_count) >= + 0) { + REQUIRE_NOTHROW(model.assign_indicies_by_rank_search(root_assignment, 0.0, + 0, 1, ckp)); + auto assigned_idx = model.assigned_indicies(); + REQUIRE(assigned_idx.size() == expected_size); + for (size_t j = 0; j < assigned_idx.size(); ++j) { + for (size_t i = 0; i < dummy_results_count; ++i) { + CHECK(assigned_idx[j] != possible_idx[i]); + } + } + } else { + REQUIRE_THROWS(model.assign_indicies_by_rank_search(1, 0.0, 0, 1, ckp)); + } + } + SECTION("exhaustive") { + int expected_size = std::max(17 - static_cast(dummy_results_count), 0); + REQUIRE_NOTHROW(model.assign_indicies_by_rank_exhaustive(0, 1, ckp)); + auto assigned_idx = model.assigned_indicies(); + REQUIRE(assigned_idx.size() == expected_size); + for (size_t j = 0; j < assigned_idx.size(); ++j) { + for (size_t i = 0; i < dummy_results_count; ++i) { + CHECK(assigned_idx[j] != possible_idx[i]); + } + } + } +} diff --git a/test/src/test_util.cpp b/test/src/test_util.cpp new file mode 100644 index 0000000..233030b --- /dev/null +++ b/test/src/test_util.cpp @@ -0,0 +1,17 @@ +#include "test_util.hpp" +#include + +inline size_t compute_digit_with_base(size_t i, size_t n, size_t base) { + return (n % static_cast(std::pow(base, i + 1))) / std::pow(base, i); +} + +std::string base_58_encode(uint32_t n) { + size_t alphabet_size = sizeof(base_58_chars)-1; + size_t len = std::ceil(std::log(n) / std::log(alphabet_size)); + std::string enc; + enc.resize(len); + for (size_t i = 0; i < len; ++i) { + enc[i] = base_58_chars[compute_digit_with_base(i, n, alphabet_size)]; + } + return enc; +} diff --git a/test/src/test_util.hpp b/test/src/test_util.hpp new file mode 100644 index 0000000..7b18414 --- /dev/null +++ b/test/src/test_util.hpp @@ -0,0 +1,12 @@ +#ifndef RD_TEST_UTIL_HPP +#define RD_TEST_UTIL_HPP +#include + +constexpr char base_58_chars[] = + "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"; + +inline size_t compute_digit_with_base(size_t i, size_t n, size_t base); + +std::string base_58_encode(uint32_t n); + +#endif diff --git a/test/src/util.cpp b/test/src/util.cpp new file mode 100644 index 0000000..730fe85 --- /dev/null +++ b/test/src/util.cpp @@ -0,0 +1,21 @@ +#include "data.hpp" +#include "test_util.hpp" +#include +#include +#include + +TEST_CASE("cli_options_t comparison operators", "[cli_options_t]"){ + cli_options_t cli1; + cli1.msa_filename = "what in the world is a kangaroo doing in the room"; + cli1.seed = 12312; + cli_options_t cli2; + cli2.msa_filename = "what in the world is a kangaroo doing in the room"; + cli2.seed = 12312; + SECTION("operator=="){ + CHECK(cli1 == cli2); + } + SECTION("operator!="){ + cli1.msa_filename = "this is now a different string than it was origionally"; + CHECK(cli1 != cli2); + } +}