Skip to content

Commit

Permalink
Adds a bunch of tests
Browse files Browse the repository at this point in the history
  • Loading branch information
computations committed Jul 9, 2020
1 parent 4af3de8 commit 16a2e8d
Show file tree
Hide file tree
Showing 11 changed files with 275 additions and 47 deletions.
11 changes: 10 additions & 1 deletion src/checkpoint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,14 +173,22 @@ void checkpoint_t::clean() {
void checkpoint_t::write(const rd_result_t &result) {
debug_print(EMIT_LEVEL_MPI_DEBUG, "Writing result with root id: %lu",
result.root_id);
auto lock = write_lock<fcntl_lock_behavior::block>();
write_with_checksum(_file_descriptor, result);
}

void checkpoint_t::write(
const std::vector<partition_parameters_t> &parameters) {
write_with_checksum(_file_descriptor, parameters);
}

void checkpoint_t::write(
const rd_result_t &result,
const std::vector<partition_parameters_t> &parameters) {
auto lock = write_lock<fcntl_lock_behavior::block>();
write(result);
write(parameters);
}

void checkpoint_t::save_options(const cli_options_t &options) {
if (!_existing_results) {
write_with_success(_file_descriptor, options);
Expand All @@ -193,6 +201,7 @@ void checkpoint_t::load_options(cli_options_t &options) {
"Loading options from the checkpoint file");
int read_fd = open(_checkpoint_filename.c_str(), O_RDONLY);
read_with_success(read_fd, options);
close(read_fd);
}
}

Expand Down
8 changes: 6 additions & 2 deletions src/checkpoint.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -277,8 +277,7 @@ class checkpoint_t {
auto lock = write_lock<fcntl_lock_behavior::block>();
write_with_success(_file_descriptor, val);
}
void write(const rd_result_t &);
void write(const std::vector<partition_parameters_t> &);
void write(const rd_result_t &, const std::vector<partition_parameters_t> &);
void save_options(const cli_options_t &);
void load_options(cli_options_t &);
void reload();
Expand All @@ -291,12 +290,17 @@ class checkpoint_t {

std::vector<size_t> completed_indicies();

std::string get_filename() const { return _checkpoint_filename; }

private:
template <fcntl_lock_behavior::fcntl_lock_block_t W>
fcntl_lock_t<W> write_lock() {
return fcntl_lock_t<W>(_file_descriptor, F_WRLCK);
}

void write(const rd_result_t &);
void write(const std::vector<partition_parameters_t> &);

std::string _checkpoint_filename;
int _file_descriptor;
bool _existing_results;
Expand Down
11 changes: 4 additions & 7 deletions src/model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -979,8 +979,8 @@ model_t::search(size_t min_roots, double root_ratio, double atol, double pgtol,
rl = cur_best_rl;
}

checkpoint.write({cur_best_rl.id, cur_best_lh, cur_best_rl.brlen_ratio});
checkpoint.write(params);
checkpoint.write({cur_best_rl.id, cur_best_lh, cur_best_rl.brlen_ratio},
params);

debug_print(EMIT_LEVEL_DEBUG, "finished optimize_all root, cur_best_lh: %f",
cur_best_lh);
Expand Down Expand Up @@ -1078,8 +1078,8 @@ model_t::exhaustive_search(double atol, double pgtol, double brtol,
rl = cur_rl;
}

checkpoint.write({cur_best_rl.id, cur_best_lh, cur_best_rl.brlen_ratio});
checkpoint.write(params);
checkpoint.write({cur_best_rl.id, cur_best_lh, cur_best_rl.brlen_ratio},
params);
root_index++;

debug_print(EMIT_LEVEL_PROGRESS, "Step %lu / %lu, ETC: %0.2fh", root_index,
Expand Down Expand Up @@ -1666,9 +1666,6 @@ void model_t::optimize_params(std::vector<partition_parameters_t> &params,
set_subst_rates(i, params[i].subst_rates);
set_freqs_all_free(i, params[i].freqs);
set_gamma_rates(i, params[i].gamma_alpha);
if (_rate_category_types[i] == rate_category::FREE) {
set_gamma_weights(i, params[i].gamma_weights);
}

if (_rate_category_types[i] == rate_category::FREE) {
set_gamma_weights(i, params[i].gamma_weights);
Expand Down
4 changes: 4 additions & 0 deletions src/model.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,10 @@ class model_t {
checkpoint_t &);
void assign_indicies_by_rank_exhaustive(size_t, size_t, checkpoint_t &);

std::vector<size_t> assigned_indicies() const{
return _assigned_idx;
}

private:
std::pair<root_location_t, double>
bisect(const root_location_t &beg, dlh_t d_beg, const root_location_t &end,
Expand Down
40 changes: 21 additions & 19 deletions src/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,25 +89,6 @@ struct partition_parameters_t {
model_params_t freqs;
model_params_t gamma_alpha;
model_params_t gamma_weights;

#if 0
partition_parameters_t &operator=(const partition_parameters_t &other) {
/* seems dumb, but I benchmarked this to be faster */
for (size_t i = 0; i < subst_rates.size(); ++i) {
subst_rates[i] = other.subst_rates[i];
}
for (size_t i = 0; i < freqs.size(); ++i) {
freqs[i] = other.freqs[i];
}
for (size_t i = 0; i < gamma_alpha.size(); ++i) {
gamma_alpha[i] = other.gamma_alpha[i];
}
for (size_t i = 0; i < gamma_weights.size(); ++i) {
gamma_weights[i] = other.gamma_weights[i];
}
return *this;
}
#endif
};

struct rd_result_t {
Expand Down Expand Up @@ -176,6 +157,27 @@ struct cli_options_t {
bool echo = false;
bool invariant_sites = false;
initialized_flag_t early_stop;

bool operator==(const cli_options_t &other) const {
return msa_filename == other.msa_filename &&
tree_filename == other.tree_filename && prefix == other.prefix &&
model_filename == other.model_filename &&
freqs_filename == other.freqs_filename &&
partition_filename == other.partition_filename &&
data_type == other.data_type && model_string == other.model_string &&
rate_cats == other.rate_cats &&
rate_category_types == other.rate_category_types &&
seed == other.seed && threads == other.threads &&
root_ratio == other.root_ratio &&
abs_tolerance == other.abs_tolerance && factor == other.factor &&
br_tolerance == other.br_tolerance && bfgs_tol == other.bfgs_tol &&
states == other.states && exhaustive == other.exhaustive &&
echo == other.echo && invariant_sites == other.invariant_sites &&
early_stop == other.early_stop;
}
bool operator!=(const cli_options_t &other) const {
return !(*this == other);
}
};

#endif
3 changes: 3 additions & 0 deletions test/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ add_executable(rd_test
msa.cpp
model.cpp
tree.cpp
checkpoint.cpp
util.cpp
test_util.cpp
${RD_SOURCES}
)

Expand Down
115 changes: 115 additions & 0 deletions test/src/checkpoint.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#include "data.hpp"
#include "test_util.hpp"
#include <algorithm>
#include <catch2/catch.hpp>
#include <checkpoint.hpp>
#include <debug.h>
#include <random>
#include <unistd.h>
#include <vector>

std::string make_checkpoint_filename() {
std::random_device rd;
uint64_t nonce =
(static_cast<uint64_t>(rd()) << 32) | static_cast<uint64_t>(rd());
return std::string("/tmp/checkpoint_test_") + base_58_encode(nonce);
}

checkpoint_t make_and_init_checkpoint() {
std::string checkpoint_filename = make_checkpoint_filename();
checkpoint_t ckp(checkpoint_filename);
cli_options_t cli_options;
ckp.save_options(cli_options);
return ckp;
}

TEST_CASE("checkpoint_t constructor", "[checkpoint_t]") {
std::string checkpoint_filename = make_checkpoint_filename();
checkpoint_t ckp(checkpoint_filename);
REQUIRE(access(ckp.get_filename().c_str(), F_OK) != -1);
}

TEST_CASE("checkpoint_t multiple checkpoints", "[checkpoint_t]") {
std::string checkpoint_filename = make_checkpoint_filename();
checkpoint_t ckp1(checkpoint_filename);
REQUIRE(access(ckp1.get_filename().c_str(), F_OK) != -1);
checkpoint_t ckp2(checkpoint_filename);
CHECK(ckp2.existing_checkpoint());
}

TEST_CASE("checkpoint_t writing and reading cli_options") {
std::string checkpoint_filename = make_checkpoint_filename();
checkpoint_t ckp1(checkpoint_filename);
REQUIRE(access(ckp1.get_filename().c_str(), F_OK) != -1);
SECTION("default options") {
cli_options_t cli_options;
ckp1.save_options(cli_options);

checkpoint_t ckp2(checkpoint_filename);
cli_options_t written_options;
ckp2.load_options(written_options);
CHECK(written_options == cli_options);
}
SECTION("non-default options") {
cli_options_t cli_options;
cli_options.msa_filename = "red roses really like to smell good";
cli_options.rate_cats = {1, 1, 3};
ckp1.save_options(cli_options);

checkpoint_t ckp2(checkpoint_filename);
cli_options_t written_options;
ckp2.load_options(written_options);
CHECK(written_options == cli_options);
}
SECTION("changed options options") {
cli_options_t cli_options;
cli_options.msa_filename = "red roses really like to smell good";
cli_options.rate_cats = {1, 1, 3};
ckp1.save_options(cli_options);

cli_options.msa_filename = "this is not the original string";

checkpoint_t ckp2(checkpoint_filename);
cli_options_t written_options;
ckp2.load_options(written_options);
CHECK(written_options != cli_options);
}
}

TEST_CASE("checkpoint_t writing and reading results", "[checkpoint_t]") {
checkpoint_t ckp = make_and_init_checkpoint();
SECTION("one result") {
ckp.write(rd_result_t{}, std::vector<partition_parameters_t>{});
auto results = ckp.read_results();
CHECK(results.size() == 1);
}
SECTION("many results") {
for (size_t i = 0; i < 1000; ++i) {
ckp.write(rd_result_t{}, std::vector<partition_parameters_t>{});
}
auto results = ckp.read_results();
CHECK(results.size() == 1000);
}
}

TEST_CASE("checkpoint_t checking indicies", "[checkpoint_t]") {
checkpoint_t ckp = make_and_init_checkpoint();
SECTION("one index") {
ckp.write(rd_result_t{0, 0.0, 0.0}, std::vector<partition_parameters_t>{});
auto idx = ckp.completed_indicies();
REQUIRE(idx.size() == 1);
CHECK(idx[0] == 0);
}
SECTION("generator section") {
auto total_idx = GENERATE(1lu, 2lu, 4lu, 5lu, 6lu, 7lu, 8lu, 9lu, 10lu);
for (size_t i = 0; i < total_idx; ++i) {
ckp.write(rd_result_t{i, 0.0, 0.0},
std::vector<partition_parameters_t>{});
}
auto load_idx = ckp.completed_indicies();
CHECK(load_idx.size() == total_idx);
for (size_t i = 0; i < total_idx; ++i) {
CHECK(std::find(load_idx.begin(), load_idx.end(), i) != load_idx.end());
}
}
}
80 changes: 62 additions & 18 deletions test/src/model.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
#include "data.hpp"
#include "test_util.hpp"
#include <algorithm>
#include <catch2/catch.hpp>
#include <cmath>
#include <debug.h>
#include <model.hpp>
#include <random>
#include <unordered_set>

model_params_t params[] = {
{1, 2.5, 1, 1, 1, 2.5, 2.5, 1, 1, 1, 2.5, 1},
Expand All @@ -16,24 +19,6 @@ model_params_t freqs[] = {
{.25, .25, .25, .25},
};

constexpr char base_58_chars[] =
"123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz";

inline size_t compute_digit_with_base(size_t i, size_t n, size_t base) {
return (n % static_cast<size_t>(std::pow(base, i + 1))) / std::pow(base, i);
}

std::string base_58_encode(uint32_t n) {
size_t alphabet_size = sizeof(base_58_chars);
size_t len = std::ceil(std::log(n) / std::log(alphabet_size));
std::string enc;
enc.resize(len);
for (size_t i = 0; i < len; ++i) {
enc[i] = base_58_chars[compute_digit_with_base(i, n, alphabet_size)];
}
return enc;
}

checkpoint_t make_dummy_checkpoint(const std::string &dataset_name) {
std::random_device rd;
uint64_t nonce =
Expand Down Expand Up @@ -478,3 +463,62 @@ TEST_CASE("model_t test no invariant sites", "[model_t]") {
CHECK(model.compute_lh(final_rl) == Approx(final_lh));
}
}

TEST_CASE("assign indicies test", "[model_t]") {
auto ckp = make_dummy_checkpoint("10.fasta");
// Since 10.fasta has 10 taxa, that makes it have 2n-3 == 17 possible
// rootings. So, we want to test that if we write a number of "dummy" results
// to the file, we get the right result.
auto dummy_results_count = GENERATE(0lu, 1lu, 2lu, 4lu, 8lu);
std::random_device rd;
std::mt19937 gen(rd());

std::vector<size_t> possible_idx(17);
std::iota(possible_idx.begin(), possible_idx.end(), 0);
std::shuffle(possible_idx.begin(), possible_idx.end(), gen);
for (size_t i = 0; i < dummy_results_count; ++i) {
ckp.write(rd_result_t{possible_idx[i], 0.0, 0.0},
std::vector<partition_parameters_t>{});
}

auto ds = data_files_dna["10.fasta"];
std::vector<msa_t> msa;
msa.emplace_back(ds.first);
uint64_t seed = std::rand();
rooted_tree_t tree{ds.second};
model_t model{tree, msa, {1}, false, seed, false};
model.initialize_partitions_uniform_freqs(msa);

SECTION("search") {
auto root_assignment = GENERATE(1lu, 2lu, 3lu, 4lu, 5lu);
int expected_size = std::max(static_cast<int>(root_assignment) -
static_cast<int>(dummy_results_count),
0);
if (static_cast<int>(root_assignment) -
static_cast<int>(dummy_results_count) >=
0) {
REQUIRE_NOTHROW(model.assign_indicies_by_rank_search(root_assignment, 0.0,
0, 1, ckp));
auto assigned_idx = model.assigned_indicies();
REQUIRE(assigned_idx.size() == expected_size);
for (size_t j = 0; j < assigned_idx.size(); ++j) {
for (size_t i = 0; i < dummy_results_count; ++i) {
CHECK(assigned_idx[j] != possible_idx[i]);
}
}
} else {
REQUIRE_THROWS(model.assign_indicies_by_rank_search(1, 0.0, 0, 1, ckp));
}
}
SECTION("exhaustive") {
int expected_size = std::max(17 - static_cast<int>(dummy_results_count), 0);
REQUIRE_NOTHROW(model.assign_indicies_by_rank_exhaustive(0, 1, ckp));
auto assigned_idx = model.assigned_indicies();
REQUIRE(assigned_idx.size() == expected_size);
for (size_t j = 0; j < assigned_idx.size(); ++j) {
for (size_t i = 0; i < dummy_results_count; ++i) {
CHECK(assigned_idx[j] != possible_idx[i]);
}
}
}
}
Loading

0 comments on commit 16a2e8d

Please sign in to comment.