diff --git a/CMakeLists.txt b/CMakeLists.txt index 95243966..18c1755d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,12 +8,11 @@ if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0") endif() set(CMAKE_INTERPROCEDURAL_OPTIMIZATION yes) - set(CMAKE_EXPORT_COMPILE_COMMANDS yes) include(FetchContent) -if (USE_USHER) +if(USE_USHER) FetchContent_Declare(oneTBB-src GIT_REPOSITORY https://github.com/oneapi-src/oneTBB GIT_TAG 2019_U9 @@ -89,21 +88,6 @@ endif() find_package(ZLIB) -#ExternalProject_Add(isa-l -# GIT_REPOSITORY https://github.com/intel/isa-l.git -# GIT_TAG v2.30.0 -# GIT_SHALLOW true -# GIT_PROGRESS true -# UPDATE_DISCONNECTED true -# PREFIX isa-l -# SOURCE_DIR isa-l/source -# BINARY_DIR isa-l/source -# INSTALL_DIR isa-l/install -# CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env SED=/bin/sed GREP=/bin/grep ./autogen.sh && ${CMAKE_COMMAND} -E env AS=yasm ./configure --prefix=${CMAKE_BINARY_DIR}/isa-l/install --libdir=${CMAKE_BINARY_DIR}/isa-l/install -# BUILD_COMMAND make && make other -# INSTALL_COMMAND make install -#) -#set(isal_lib ${CMAKE_BINARY_DIR}/isa-l/install/libisal.a) set(STRICT_WARNINGS -Werror -fno-common -Wno-unknown-warning-option -Wno-pragmas -Wall -Wextra -pedantic -Wold-style-cast -Wshadow -Wconversion -Wcast-align -Wcast-qual -Wlogical-op -Wlogical-not-parentheses -Wredundant-decls -Wunreachable-code -Wparentheses -Wno-dangling-reference -Wno-ignored-optimization-argument) if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 8.0) @@ -112,7 +96,8 @@ endif() function(larch_compile_opts PRODUCT) target_compile_options(${PRODUCT} PUBLIC -march=native -std=c++17 -pipe) - if (CMAKE_BUILD_TYPE EQUAL "DEBUG") + + if(CMAKE_BUILD_TYPE EQUAL "DEBUG") target_compile_options(${PRODUCT} PUBLIC -fno-omit-frame-pointer -fno-stack-protector) endif() @@ -142,26 +127,26 @@ endfunction() FIND_PACKAGE(Boost COMPONENTS program_options iostreams filesystem date_time REQUIRED) if(${USE_USHER}) -include_directories(deps/usher/) -file(GLOB MATOPTIMIZE_SRC - "deps/usher/src/matOptimize/apply_move/*.cpp" - "deps/usher/src/matOptimize/Profitable_Moves_Enumerators/*.cpp" - "deps/usher/src/matOptimize/Fitch_Sankoff.cpp" - "deps/usher/src/matOptimize/mutation_annotated_tree.cpp" - "deps/usher/src/matOptimize/mutation_annotated_tree_nuc_util.cpp" - "deps/usher/src/matOptimize/mutation_annotated_tree_node.cpp" - "deps/usher/src/matOptimize/mutation_annotated_tree_load_store.cpp" - "deps/usher/src/matOptimize/mutation_annotated_tree_nuc_utils.cpp" - "deps/usher/src/matOptimize/optimize_inner_loop.cpp" - "deps/usher/src/matOptimize/Mutation_Collection.cpp" - "deps/usher/src/matOptimize/condense.cpp" - "deps/usher/src/matOptimize/reassign_states.cpp" - "deps/usher/src/matOptimize/detailed_mutations_load.cpp" - "deps/usher/src/matOptimize/detailed_mutations_store.cpp" - "deps/usher/src/matOptimize/optimize_tree.cpp" - "deps/usher/src/matOptimize/priority_conflict_resolver.cpp" - "deps/usher/src/matOptimize/check_samples.cpp" -) + include_directories(deps/usher/) + file(GLOB MATOPTIMIZE_SRC + "deps/usher/src/matOptimize/apply_move/*.cpp" + "deps/usher/src/matOptimize/Profitable_Moves_Enumerators/*.cpp" + "deps/usher/src/matOptimize/Fitch_Sankoff.cpp" + "deps/usher/src/matOptimize/mutation_annotated_tree.cpp" + "deps/usher/src/matOptimize/mutation_annotated_tree_nuc_util.cpp" + "deps/usher/src/matOptimize/mutation_annotated_tree_node.cpp" + "deps/usher/src/matOptimize/mutation_annotated_tree_load_store.cpp" + "deps/usher/src/matOptimize/mutation_annotated_tree_nuc_utils.cpp" + "deps/usher/src/matOptimize/optimize_inner_loop.cpp" + "deps/usher/src/matOptimize/Mutation_Collection.cpp" + "deps/usher/src/matOptimize/condense.cpp" + "deps/usher/src/matOptimize/reassign_states.cpp" + "deps/usher/src/matOptimize/detailed_mutations_load.cpp" + "deps/usher/src/matOptimize/detailed_mutations_store.cpp" + "deps/usher/src/matOptimize/optimize_tree.cpp" + "deps/usher/src/matOptimize/priority_conflict_resolver.cpp" + "deps/usher/src/matOptimize/check_samples.cpp" + ) endif() function(larch_link_opts PRODUCT) @@ -177,6 +162,7 @@ function(larch_link_opts PRODUCT) target_link_libraries(${PRODUCT} PUBLIC nlohmann_json::nlohmann_json) target_link_libraries(${PRODUCT} PUBLIC ${Protobuf_LIBRARIES}) target_link_libraries(${PRODUCT} PUBLIC ${Boost_LIBRARIES}) + if(${USE_USHER}) target_link_libraries(${PRODUCT} PUBLIC ${TBB_IMPORTED_TARGETS} -lstdc++fs) else() @@ -211,49 +197,46 @@ endif() larch_link_opts(larch) if(${USE_USHER}) -add_library(usher - ${MATOPTIMIZE_SRC} - src/usher_globals.cpp) -target_compile_options(usher PRIVATE -march=native -std=c++17 -w -fno-omit-frame-pointer -DUSE_USHER) + add_library(usher + ${MATOPTIMIZE_SRC} + src/usher_globals.cpp) + target_compile_options(usher PRIVATE -march=native -std=c++17 -w -fno-omit-frame-pointer -DUSE_USHER) -if(${USE_ASAN}) - target_compile_options(usher PUBLIC -O0 -g3 -fsanitize=address,undefined -fno-sanitize-recover) -elseif(${USE_TSAN}) - target_compile_options(usher PUBLIC -O0 -g3 -fsanitize=thread) -endif() + if(${USE_ASAN}) + target_compile_options(usher PUBLIC -O0 -g3 -fsanitize=address,undefined -fno-sanitize-recover) + elseif(${USE_TSAN}) + target_compile_options(usher PUBLIC -O0 -g3 -fsanitize=thread) + endif() -target_include_directories(usher PUBLIC include) -target_include_directories(usher PUBLIC ${Protobuf_INCLUDE_DIR}) -target_include_directories(usher PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/range-v3/install/include) + target_include_directories(usher PUBLIC include) + target_include_directories(usher PUBLIC ${Protobuf_INCLUDE_DIR}) + target_include_directories(usher PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/range-v3/install/include) -#target_include_directories(usher PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/isa-l/install/include) -add_dependencies(usher range-v3) + add_dependencies(usher range-v3) -#add_dependencies(usher isa-l) -target_include_directories(usher PUBLIC ${PROTO_OUT_DIR}/deps/usher) -larch_link_opts(usher) -target_link_libraries(usher PRIVATE ${Boost_LIBRARIES} ZLIB::ZLIB ${MPI_CXX_LIBRARIES} ${MPI_CXX_LINK_FLAGS}) + target_include_directories(usher PUBLIC ${PROTO_OUT_DIR}/deps/usher) + larch_link_opts(usher) + target_link_libraries(usher PRIVATE ${Boost_LIBRARIES} ZLIB::ZLIB ${MPI_CXX_LIBRARIES} ${MPI_CXX_LINK_FLAGS}) -#target_link_libraries(usher PUBLIC ${CMAKE_BINARY_DIR}/isa-l/install/libisal.a) -set_target_properties(usher - PROPERTIES CXX_CLANG_TIDY "" -) + set_target_properties(usher + PROPERTIES CXX_CLANG_TIDY "" + ) -add_library(larch-usher-glue - src/usher_glue.cpp) -larch_compile_opts(larch-usher-glue) -target_compile_options(larch-usher-glue PRIVATE ${STRICT_WARNINGS}) -larch_link_opts(larch-usher-glue) -target_link_libraries(larch-usher-glue PUBLIC usher) -add_dependencies(larch-usher-glue larch usher) -set_target_properties(larch-usher-glue - PROPERTIES CXX_CLANG_TIDY "" -) + add_library(larch-usher-glue + src/usher_glue.cpp) + larch_compile_opts(larch-usher-glue) + target_compile_options(larch-usher-glue PRIVATE ${STRICT_WARNINGS}) + larch_link_opts(larch-usher-glue) + target_link_libraries(larch-usher-glue PUBLIC usher) + add_dependencies(larch-usher-glue larch usher) + set_target_properties(larch-usher-glue + PROPERTIES CXX_CLANG_TIDY "" + ) endif() larch_executable(larch-test test/main.cpp - #test/test_ambiguous_vcf.cpp + test/test_ambiguous_vcf.cpp test/test_ambiguities.cpp test/test_compact_genome.cpp test/test_count_trees.cpp @@ -278,12 +261,13 @@ larch_executable(larch-test test/test_write_parsimony.cpp ) target_compile_options(larch-test PRIVATE ${STRICT_WARNINGS}) + if(${USE_USHER}) -target_link_libraries(larch-test PUBLIC larch-usher-glue usher) -add_dependencies(larch-test larch-usher-glue) -set_target_properties(larch-test - PROPERTIES CXX_CLANG_TIDY "" -) + target_link_libraries(larch-test PUBLIC larch-usher-glue usher) + add_dependencies(larch-test larch-usher-glue) + set_target_properties(larch-test + PROPERTIES CXX_CLANG_TIDY "" + ) endif() larch_executable(merge @@ -293,10 +277,10 @@ larch_executable(dag2dot tools/dag2dot.cpp) if(${USE_USHER}) -larch_executable(larch-usher - tools/larch-usher.cpp -) -target_compile_options(larch-usher PRIVATE ${STRICT_WARNINGS}) -target_link_libraries(larch-usher PRIVATE larch-usher-glue) -add_dependencies(larch-usher larch-usher-glue) + larch_executable(larch-usher + tools/larch-usher.cpp + ) + target_compile_options(larch-usher PRIVATE ${STRICT_WARNINGS}) + target_link_libraries(larch-usher PRIVATE larch-usher-glue) + add_dependencies(larch-usher larch-usher-glue) endif() diff --git a/README.md b/README.md index aea49431..84a314b3 100644 --- a/README.md +++ b/README.md @@ -14,26 +14,42 @@ To get a recent cmake, download from `https://cmake.org/download/`, for example: `wget https://github.com/Kitware/CMake/releases/download/v3.23.1/cmake-3.23.1-linux-x86_64.tar.gz` -To setup a conda environment capable of building Larch, use the environment +Build Environments +------------------ + +* singularity 3.5.3 +* conda 22.9.0 + +Larch can be built utilizing a Singularity container or a Conda environment. + +To build Singularity image, use the definition provided: + +`singularity build larch-singularity.sif larch-singularity.def` +`singularity shell larch-singularity.sif --net` + +To setup a conda environment capable of building Larch, use: + +`conda create -n larch` +`conda activate larch` +`conda install --channel "conda-forge" --update-deps --override-channels cmake make cxx-compiler openmpi openmpi-mpicc openmpi-mpicxx boost-cpp automake autoconf libtool yasm ucx zlib` + +To setup a conda environment capable of building Larch including development tools, create `larch-dev` using the environment file provided: `conda env create -f environment.yml` - Building -------- +`git submodule update --init --recursive` `mkdir build` - `cd build` - `cmake -DCMAKE_BUILD_TYPE=Debug ..` - `make -j16` -Optionally add -DCMAKE_CXX_CLANG_TIDY="clang-tidy" to enable clang-tidy. - -Optionally add -DUSE_ASAN=yes to enable asan and ubsan. +Cmake build options: + - add `-DCMAKE_CXX_CLANG_TIDY="clang-tidy"` to enable clang-tidy. + - add `-DUSE_ASAN=yes` to enable asan and ubsan. Running ------- @@ -41,13 +57,18 @@ Running From the build directory: `ln -s ../data` - `./larch-test` Passing *nocatch* to the tests executable will allow exceptions to escape, which is useful for debugging. A gdb session can be started with `gdb --args build/larch-test nocatch`. -*--list* produces a list of all available tests, along with an ID number. -*--range* runs tests by ID in [begin, end] range arguments. +larch-test options: + +- `nocatch` allows test exceptions to escape, which is useful for debugging. A gdb session can be started with `gdb --args build/larch-test nocatch`. +- `--list` produces a list of all available tests, along with an ID number. +- `--range` runs tests by ID with a string of comma-separated range or single ID arguments [e.g. 1-5,7,9,12-13]. +- `-tag` excludes tests with a given tag. +- `+tag` includes tests with a given tag. +- For example, the `-tag "slow"` removes tests which require an long runtime to complete. Third-party ----------- diff --git a/build.sh b/build.sh deleted file mode 100755 index e2ef8d9d..00000000 --- a/build.sh +++ /dev/null @@ -1,12 +0,0 @@ -LARCHBUILDPATH="$(pwd)/build/" -rm -rf $LARCHBUILDPATH -mkdir $LARCHBUILDPATH -export LD_LIBRARY_PATH="$LARCHBUILDPATH/tbb_cmake_build/tbb_cmake_build_subdir_debug" - -cd $LARCHBUILDPATH - -cmake .. -# /matsen/cmake-3.27.3-linux-x86_64/bin/cmake -DCMAKE_BUILD_TYPE=Debug .. -make -j16 larch-test -make -j16 larch-usher -make -j16 merge diff --git a/environment.yml b/environment.yml index d7150389..0f334a93 100644 --- a/environment.yml +++ b/environment.yml @@ -4,22 +4,27 @@ channels: - bioconda - defaults dependencies: - - git - - isa-l + - cmake=3.19.6 + - make - cxx-compiler - - mafft - - boost-cpp - openmpi + - openmpi-mpicc + - openmpi-mpicxx + - boost-cpp + - automake + - autoconf + - libtool + - yasm + - ucx + - zlib + + - git + - mafft - python - rsync - wget - - cmake=3.19.6 - - autoconf - - protobuf=3.12.3 - - make - openssh - gdb - bzip2 - backports.lzma - - libtool - - automake + - protobuf=3.12.3 diff --git a/include/larch/impl/subtree/subtree_weight_impl.hpp b/include/larch/impl/subtree/subtree_weight_impl.hpp index 38030969..bfad00fb 100644 --- a/include/larch/impl/subtree/subtree_weight_impl.hpp +++ b/include/larch/impl/subtree/subtree_weight_impl.hpp @@ -78,18 +78,16 @@ template typename SubtreeWeight::Storage SubtreeWeight::TrimToMinWeight(const WeightOps& weight_ops) { Storage result = Storage::EmptyDefault(); - std::unordered_map visited_nodes; - std::unordered_map visited_edges; + std::unordered_map mapped_node; result.View().SetReferenceSequence(dag_.GetReferenceSequence()); + ExtractSubset( dag_.GetRoot(), result.View().AppendNode(), weight_ops, [this](Node node, CladeIdx clade_idx) { return cached_min_weight_edges_.at(node.GetId().value).at(clade_idx.value); }, - visited_edges, - visited_nodes, + mapped_node, result.View()); - result.View().BuildConnections(); return result; } @@ -285,8 +283,7 @@ void SubtreeWeight::ExtractTree(NodeType input_node, CladeIdx clade_idx{0}; for (auto clade : input_node.GetClades()) { Assert(not clade.empty()); - - Edge input_edge = edge_selector(input_node, clade_idx); + auto input_edge = edge_selector(input_node, clade_idx); ++clade_idx.value; NodeId result_child_id = result.AppendNode(); @@ -301,67 +298,49 @@ void SubtreeWeight::ExtractTree(NodeType input_node, } template -template +template void SubtreeWeight::ExtractSubset(NodeType input_node, NodeId result_node_id, const WeightOps& weight_ops, - const EdgeSelector& edge_selector, - std::unordered_map visited_edge, - std::unordered_map visited_node, + const EdgesSelector& edge_selector, + std::unordered_map& mapped_id, MutableDAGType result) { + ComputeWeightBelow(input_node, weight_ops); + auto result_node = result.Get(result_node_id); + if constexpr (decltype(result_node)::template contains_feature) { + result_node.SetOriginalId(input_node.GetId()); + } + result_node = input_node.GetCompactGenome().Copy(); + result_node = SampleId{input_node.GetSampleId()}; - visited_node.insert({input_node.GetId(), NodeId{NoId}}); - if (visited_node[input_node.GetId()] == NodeId{NoId}) { - ComputeWeightBelow(input_node, weight_ops); - auto result_node = result.Get(result_node_id); - if constexpr (decltype(result_node)::template contains_feature) { - result_node.SetOriginalId(input_node.GetId()); - } - result_node = input_node.GetCompactGenome().Copy(); - if (input_node.IsLeaf()) { - result_node = SampleId{input_node.GetSampleId()}; - } - visited_node.insert_or_assign(input_node.GetId(), result_node_id); - - CladeIdx clade_idx{0}; - for (auto clade : input_node.GetClades()) { - Assert(not clade.empty()); - - auto input_edges = edge_selector(input_node, clade_idx); - - for (auto input_edge_id: input_edges) { - - auto input_edge = input_node.GetDAG().Get(input_edge_id); - auto input_child_id = input_edge.GetChild().GetId(); - - if (not visited_edge[input_edge_id]) { - visited_edge.insert({input_edge_id, true}); - - visited_node.insert({input_child_id, {NoId}}); - if (visited_node[input_child_id] != NodeId{NoId}) { - - NodeId result_child_id = visited_node[input_child_id]; - auto result_edge = - result.AppendEdge(result_node_id, result_child_id, input_edge.GetClade()); - - result_edge.SetEdgeMutations(input_edge.GetEdgeMutations().Copy()); - } else { - - NodeId result_child_id = result.AppendNode(); - - auto result_edge = - result.AppendEdge(result_node_id, result_child_id, input_edge.GetClade()); + mapped_id.insert({input_node.GetId(), result_node}); - result_edge.SetEdgeMutations(input_edge.GetEdgeMutations().Copy()); + CladeIdx clade_idx{0}; + for (auto clade : input_node.GetClades()) { + Assert(not clade.empty()); - ExtractSubset(input_edge.GetChild(), result_child_id, weight_ops, edge_selector, - visited_edge, - visited_node, - result); - } - } + std::ignore = clade_idx; + auto input_edges = edge_selector(input_node, clade_idx); + + for (auto edge_id: input_edges) { + auto input_edge = input_node.GetDAG().Get(edge_id); + auto input_node_child = input_edge.GetChild(); + + if (mapped_id.find(input_node_child.GetId()) != mapped_id.end()) { + NodeId result_child_id = mapped_id[input_node_child.GetId()]; + auto result_edge = + result.AppendEdge(result_node_id, result_child_id, clade_idx); + result_edge.SetEdgeMutations(input_edge.GetEdgeMutations().Copy()); + } else { + NodeId result_child_id = result.AppendNode(); + auto result_edge = + result.AppendEdge(result_node_id, result_child_id, clade_idx); + result_edge.SetEdgeMutations(input_edge.GetEdgeMutations().Copy()); + ExtractSubset(input_node_child, result_child_id, weight_ops, edge_selector, + mapped_id, result); + mapped_id.insert({input_node_child.GetId(), result_child_id}); } - ++clade_idx.value; } + ++clade_idx.value; } } diff --git a/include/larch/subtree/subtree_weight.hpp b/include/larch/subtree/subtree_weight.hpp index 29d303c7..2397fe5a 100644 --- a/include/larch/subtree/subtree_weight.hpp +++ b/include/larch/subtree/subtree_weight.hpp @@ -94,11 +94,10 @@ class SubtreeWeight { const WeightOps& weight_ops, const EdgeSelector& edge_selector, MutableDAGType result); - template + template void ExtractSubset(NodeType input_node, NodeId result_node_id, - const WeightOps& weight_ops, const EdgeSelector& edge_selector, - std::unordered_map visited_edge, - std::unordered_map visited_node, + const WeightOps& weight_ops, const EdgesSelector& edge_selector, + std::unordered_map& mapped_id, MutableDAGType result); DAG dag_; diff --git a/test/main.cpp b/test/main.cpp index 9402835a..c5f394a0 100644 --- a/test/main.cpp +++ b/test/main.cpp @@ -2,6 +2,8 @@ #include #include #include +#include +#include #ifdef USE_USHER #include @@ -9,6 +11,21 @@ #include "test_common.hpp" +static void get_usage() { + std::cout << "Usage:\n"; + std::cout << "larch-test \n"; + std::cout << " Includes all tests with names matching " + "expression.\n"; + std::cout << " --range Includes all tests with listed IDs " + "[e.g. 1,5-10,12,15].\n"; + std::cout << " -tag Excludes all tests with given tag.\n"; + std::cout << " +tag Includes all tests with given tag.\n"; + std::cout << " --list Prints information about all selected tests " + "(IDs, tags). They are not executed.\n"; + std::cout << " nocatch Allow exceptions to escape for debugging.\n"; + std::exit(EXIT_SUCCESS); +} + static std::vector& get_all_tests() { static std::vector all_tests{}; return all_tests; @@ -19,6 +36,34 @@ bool add_test(const Test& test) noexcept { return true; } +std::set parse_range(const std::string& str) { + std::ignore = str; + std::set numbers; + std::istringstream iss(str); + // Tokenize the string based on commas and dashes. + std::string token; + while (std::getline(iss, token, ',')) { + std::istringstream tokenStream(token); + std::string numberToken; + std::vector range; + while (std::getline(tokenStream, numberToken, '-')) { + int number = std::stoi(numberToken); + range.push_back(number); + } + if (range.size() == 1) { + numbers.insert(range[0]); + } else if (range.size() == 2) { + for (int i = range[0]; i < range[1] + 1; i++) { + numbers.insert(i); + } + } else { + Fail("Invalid --range argument."); + } + } + + return numbers; +} + int main(int argc, char* argv[]) { #ifdef USE_USHER int ignored{}; @@ -28,19 +73,20 @@ int main(int argc, char* argv[]) { bool opt_list_names = false; bool opt_test_range = false; - std::pair range; + std::set range; std::regex regex{".*"}; std::set include_tags; std::set exclude_tags; for (int i = 1; i < argc; ++i) { - if (std::string("nocatch") == argv[i]) { + if (std::string("-h") == argv[i]) { + get_usage(); + } else if (std::string("nocatch") == argv[i]) { no_catch = true; } else if (std::string("--list") == argv[i]) { opt_list_names = true; } else if (std::string("--range") == argv[i]) { opt_test_range = true; - range.first = atoi(argv[++i]); - range.second = atoi(argv[++i]); + range = parse_range(argv[++i]); } else if (std::string("+tag") == argv[i]) { include_tags.insert(argv[++i]); } else if (std::string("-tag") == argv[i]) { @@ -71,8 +117,7 @@ int main(int argc, char* argv[]) { } std::smatch match; if (std::regex_match(test.name, match, regex)) { - if ((!opt_test_range) || - ((test_id >= range.first) && (test_id <= range.second))) { + if ((!opt_test_range) || (range.find(test_id) != range.end())) { tests.push_back({test_id, test}); std::cout << " [" << test_id << "] " << test.name; if (not test.tags.empty()) { @@ -125,8 +170,8 @@ int main(int argc, char* argv[]) { } else { try { test.entry(); - std::cout << test_header_str << " TEST RESULT: " << test_name_str << " Passed." - << std::endl; + std::cout << test_header_str << " TEST RESULT: " << test_name_str + << " Passed." << std::endl; test_results.push_back(true); } catch (const std::exception& e) { failed.push_back({test_id, test}); @@ -137,7 +182,8 @@ int main(int argc, char* argv[]) { } auto diff_time = std::chrono::steady_clock::now() - start_time; - auto run_time = double(std::chrono::duration (diff_time).count()) / 1000.0; + auto run_time = + double(std::chrono::duration(diff_time).count()) / 1000.0; test_runtimes.push_back(run_time); } std::cout << std::endl; @@ -153,8 +199,8 @@ int main(int argc, char* argv[]) { std::string test_result_str = (test_results[i] ? "PASS" : "FAIL"); std::string test_runtime_str = std::to_string(test_runtimes[i]) + "s"; - std::cout << " " << test_id_str << " " << test_result_str - << " | " << test_runtime_str << " | " << test_name_str << std::endl; + std::cout << " " << test_id_str << " " << test_result_str << " | " + << test_runtime_str << " | " << test_name_str << std::endl; } std::cout << std::endl; diff --git a/test/sample_dag.hpp b/test/sample_dag.hpp index e0499e93..277f9abf 100644 --- a/test/sample_dag.hpp +++ b/test/sample_dag.hpp @@ -130,6 +130,7 @@ auto dag = dag_storage.View(); dag.SetCompactGenomesFromNodeSequenceMap(seq_map); dag.RecomputeEdgeMutations(); + dag.RecomputeCompactGenomes(false); dag.SampleIdsFromCG(); return dag_storage; } @@ -140,6 +141,7 @@ auto dag = dag_storage.View(); dag.SetCompactGenomesFromNodeSequenceMap(seq_map); dag.RecomputeEdgeMutations(); + dag.RecomputeCompactGenomes(false); dag.SampleIdsFromCG(); return dag_storage; } @@ -150,6 +152,7 @@ auto dag = dag_storage.View(); dag.SetCompactGenomesFromNodeSequenceMap(seq_map); dag.RecomputeEdgeMutations(); + dag.RecomputeCompactGenomes(false); dag.SampleIdsFromCG(); return dag_storage; } @@ -160,6 +163,7 @@ auto dag = dag_storage.View(); dag.SetCompactGenomesFromNodeSequenceMap(seq_map); dag.RecomputeEdgeMutations(); + dag.RecomputeCompactGenomes(false); dag.SampleIdsFromCG(); return dag_storage; } @@ -296,6 +300,7 @@ auto dag = dag_storage.View(); dag.SetCompactGenomesFromNodeSequenceMap(seq_map); dag.RecomputeEdgeMutations(); + dag.RecomputeCompactGenomes(false); dag.SampleIdsFromCG(); return dag_storage; } diff --git a/test/test_ambiguities.cpp b/test/test_ambiguities.cpp index d2259535..b87772b8 100644 --- a/test/test_ambiguities.cpp +++ b/test/test_ambiguities.cpp @@ -2,8 +2,8 @@ #include "sample_dag.hpp" #include "larch/dag_loader.hpp" -[[maybe_unused]] static auto BuildNodeSequenceMap(MADAGStorage<> &dag_storage, - bool include_nonleaf_nodes = false) { +[[maybe_unused]] static auto build_node_sequence_map( + MADAGStorage<> &dag_storage, bool include_nonleaf_nodes = false) { NodeSeqMap node_seq_map; auto dag = dag_storage.View(); auto ref_seq = dag.GetReferenceSequence(); @@ -15,7 +15,8 @@ return node_seq_map; } -[[maybe_unused]] static auto DAGMutationsAreValid(MADAGStorage<> &dag_storage) { +[[maybe_unused]] static auto verify_dag_mutations_are_valid( + MADAGStorage<> &dag_storage) { using Edge = MutableMADAG::EdgeView; auto dag = dag_storage.View(); @@ -36,20 +37,26 @@ return true; } -[[maybe_unused]] static auto VerifyCompactGenomesCompatibleWithLeaves( - MADAGStorage<> &dag_storage, NodeSeqMap &truth_leaf_seq_map) { +[[maybe_unused]] static auto verify_compact_genomes_compatible_with_leaves( + MADAGStorage<> &dag_storage, NodeSeqMap &truth_leaf_seq_map, + bool do_print_failure = true) { auto dag = dag_storage.View(); - auto dag_leaf_seq_map = BuildNodeSequenceMap(dag_storage, false); + auto dag_leaf_seq_map = build_node_sequence_map(dag_storage, false); for (auto node : dag.GetLeafs()) { if (dag_leaf_seq_map[node.GetId()] != truth_leaf_seq_map[node.GetId()]) { + if (do_print_failure) { + std::cout << "Failed_at [" << node.GetId() + << "]: TEST:" << truth_leaf_seq_map[node.GetId()] + << " vs TRUTH:" << dag_leaf_seq_map[node.GetId()] << std::endl; + } return false; } } return true; } -[[maybe_unused]] static void WriteDAGToFile(MADAGStorage<> &dag_storage, - const std::string &output_filename) { +[[maybe_unused]] static void write_dag_to_file(MADAGStorage<> &dag_storage, + const std::string &output_filename) { std::ofstream os; auto dag = dag_storage.View(); os.open(output_filename); @@ -68,27 +75,33 @@ bool write_files = false; if (write_files) { - WriteDAGToFile(amb_dag_storage, "_ignore/amb_dag.dot"); - WriteDAGToFile(unamb_dag_storage, "_ignore/unamb_dag.dot"); + write_dag_to_file(amb_dag_storage, "_ignore/amb_dag.dot"); + write_dag_to_file(unamb_dag_storage, "_ignore/unamb_dag.dot"); StoreDAGToProtobuf(amb_dag, "_ignore/amb_dag.pb"); StoreDAGToProtobuf(unamb_dag, "_ignore/unamb_dag.pb"); } - // (0) Test checks that all edges mutations are compatible with adjacent compact + // (0) Test spot checks that edges mutations are compatible with adjacent compact // genomes. + // Initial edge mutation: T->A. + // Set to T->G. amb_dag.Get(EdgeId{1}).GetMutableEdgeMutations()[{1}] = {'T', 'G'}; - TestAssert(not(DAGMutationsAreValid(amb_dag_storage)) && + TestAssert(not(verify_dag_mutations_are_valid(amb_dag_storage)) && "Test_0a: DAG EdgeMutations incorrectly found to be compatible with " "Compact Genomes."); + // Set to G->A. amb_dag.Get(EdgeId{1}).GetMutableEdgeMutations()[{1}] = {'G', 'A'}; - TestAssert(not(DAGMutationsAreValid(amb_dag_storage)) && + TestAssert(not(verify_dag_mutations_are_valid(amb_dag_storage)) && "Test_0b: DAG EdgeMutations incorrectly found to be compatible with " "Compact Genomes."); + // Reset back to T->A. amb_dag.Get(EdgeId{1}).GetMutableEdgeMutations()[{1}] = {'T', 'A'}; - TestAssert((DAGMutationsAreValid(amb_dag_storage)) && - "Test_0c: DAG EdgeMutations are not compatible with Compact Genomes."); - TestAssert((DAGMutationsAreValid(unamb_dag_storage)) && - "Test_0d: DAG EdgeMutations are not compatible with Compact Genomes."); + TestAssert((verify_dag_mutations_are_valid(amb_dag_storage)) && + "Test_0c: DAG EdgeMutations are not compatible with ambiguous Compact " + "Genomes."); + TestAssert((verify_dag_mutations_are_valid(unamb_dag_storage)) && + "Test_0d: DAG EdgeMutations are not compatible with unambiguous Compact " + "Genomes."); // (1) Test checks for ambiguity. for (auto base : {'A', 'C', 'G', 'T'}) { @@ -123,6 +136,7 @@ "Test_2g: GetCommonBases does not return correct value."); // (3) Test that ambiguous leaves are compatible with unambiguous leaves. + for (auto node : amb_dag.GetLeafs()) { auto &cg_1 = amb_dag.Get(node.GetId()).GetCompactGenome(); auto &cg_2 = unamb_dag.Get(node.GetId()).GetCompactGenome(); @@ -174,17 +188,22 @@ // (6) Test that recomputing edge mutations and compact genomes does not alter // leaves. + TestAssert( + (verify_compact_genomes_compatible_with_leaves(amb_dag_storage, amb_seq_map)) && + "Test_6a: Leaf CGs altered before running RecomputeCompactGenomes."); amb_dag.RecomputeCompactGenomes(false); - TestAssert((VerifyCompactGenomesCompatibleWithLeaves(amb_dag_storage, amb_seq_map)) && - "Test_6a: RecomputeCompactGenomes incorrectly altered leaf CGs."); - unamb_dag.RecomputeCompactGenomes(false); + verify_compact_genomes_compatible_with_leaves(amb_dag_storage, amb_seq_map); TestAssert( - (VerifyCompactGenomesCompatibleWithLeaves(unamb_dag_storage, unamb_seq_map)) && + (verify_compact_genomes_compatible_with_leaves(amb_dag_storage, amb_seq_map)) && "Test_6b: RecomputeCompactGenomes incorrectly altered leaf CGs."); + unamb_dag.RecomputeCompactGenomes(false); + TestAssert((verify_compact_genomes_compatible_with_leaves(unamb_dag_storage, + unamb_seq_map)) && + "Test_6c: RecomputeCompactGenomes incorrectly altered leaf CGs."); amb_dag.RecomputeCompactGenomes(true); - TestAssert( - not(VerifyCompactGenomesCompatibleWithLeaves(amb_dag_storage, amb_seq_map)) && - "Test_6c: RecomputeCompactGenomes incorrectly left leaf CGs unaltered ."); + TestAssert(not(verify_compact_genomes_compatible_with_leaves(amb_dag_storage, + amb_seq_map, false)) && + "Test_6d: RecomputeCompactGenomes incorrectly left leaf CGs unaltered ."); } [[maybe_unused]] static const auto test_added0 = diff --git a/test/test_dag_trimming.cpp b/test/test_dag_trimming.cpp index f83e06f1..6e8d40e4 100644 --- a/test/test_dag_trimming.cpp +++ b/test/test_dag_trimming.cpp @@ -23,7 +23,7 @@ static void test_dag_trimming(std::string_view path, size_t expected_edges) { } [[maybe_unused]] static const auto test_added0 = - add_test({[] { test_dag_trimming("data/testcase/full_dag.pb.gz", 57); }, + add_test({[] { test_dag_trimming("data/testcase/full_dag.pb.gz", 530); }, "DAG trimming: testcase"}); [[maybe_unused]] static const auto test_added1 = diff --git a/test/test_rf_distance.cpp b/test/test_rf_distance.cpp index b3011030..124e7bb5 100644 --- a/test/test_rf_distance.cpp +++ b/test/test_rf_distance.cpp @@ -137,6 +137,7 @@ static void test_rf_distance_hand_computed_example() { } static void test_rf_distance_different_weight_ops() { + auto dag0_storage = make_base_sample_dag(); auto dag1_storage = make_sample_dag(); auto dag2_storage = make_nonintersecting_sample_dag(); auto dag3_storage = make_sample_dag_with_one_unique_node(); @@ -178,6 +179,7 @@ static void test_rf_distance_different_weight_ops() { for (auto ref_id : ref_ids) { subvec.push_back(true_dist_map[{compute_id, ref_id}]); } + if (rf_dist_type == RFDistanceType::Min) { vec.push_back(*std::min_element(subvec.begin(), subvec.end())); } else if (rf_dist_type == RFDistanceType::Max) { diff --git a/tools/larch-usher.cpp b/tools/larch-usher.cpp index 223bf886..b786ceb1 100644 --- a/tools/larch-usher.cpp +++ b/tools/larch-usher.cpp @@ -533,6 +533,7 @@ int main(int argc, char** argv) { // NOLINT(bugprone-exception-escape) std::string vcf_path; std::string callback_config = "best-moves"; bool write_intermediate_pb = true; + enum class SampleMethod { Random, UniformRandom, @@ -689,7 +690,8 @@ int main(int argc, char** argv) { // NOLINT(bugprone-exception-escape) std::ofstream logfile; logfile.open(logfile_name); logfile << "Iteration\tNTrees\tNNodes\tNEdges\tMaxParsimony\tNTreesMaxParsimony\tWors" - "tParsimony\tMinSumRFDistance\tMaxSumRFDistance\tMinSumRFCount\tMaxSumRFCount\tSecondsElapsed"; + "tParsimony\tMinSumRFDistance\tMaxSumRFDistance\tMinSumRFCount\tMaxSumRFCo" + "unt\tSecondsElapsed"; // tbb::global_control c(tbb::global_control::max_allowed_parallelism, 1); MADAGStorage<> input_dag = @@ -718,7 +720,8 @@ int main(int argc, char** argv) { // NOLINT(bugprone-exception-escape) return std::chrono::duration_cast(now - start_time).count(); }; - auto logger = [&merge, &logfile, &time_elapsed, &write_intermediate_pb](size_t iteration) { + auto logger = [&merge, &logfile, &time_elapsed, + &write_intermediate_pb](size_t iteration) { SubtreeWeight parsimonyscorer{merge.GetResult()}; SubtreeWeight maxparsimonyscorer{ merge.GetResult()}; @@ -738,12 +741,17 @@ int main(int argc, char** argv) { // NOLINT(bugprone-exception-escape) MaxSumRFDistance this_max_rf_weight_ops{merge, merge}; auto shiftsum = this_min_rf_weight_ops.GetOps().GetShiftSum(); - auto min_rf_distance = this_min_sum_rf_dist.ComputeWeightBelow(merge.GetResult().GetRoot(), this_min_rf_weight_ops) + shiftsum; - auto min_rf_count = this_min_sum_rf_dist.MinWeightCount(merge.GetResult().GetRoot(), this_min_rf_weight_ops); - - auto max_rf_distance = this_max_sum_rf_dist.ComputeWeightBelow(merge.GetResult().GetRoot(), this_max_rf_weight_ops) + shiftsum; - auto max_rf_count = this_max_sum_rf_dist.MinWeightCount(merge.GetResult().GetRoot(), this_max_rf_weight_ops); + auto min_rf_distance = this_min_sum_rf_dist.ComputeWeightBelow( + merge.GetResult().GetRoot(), this_min_rf_weight_ops) + + shiftsum; + auto min_rf_count = this_min_sum_rf_dist.MinWeightCount(merge.GetResult().GetRoot(), + this_min_rf_weight_ops); + auto max_rf_distance = this_max_sum_rf_dist.ComputeWeightBelow( + merge.GetResult().GetRoot(), this_max_rf_weight_ops) + + shiftsum; + auto max_rf_count = this_max_sum_rf_dist.MinWeightCount(merge.GetResult().GetRoot(), + this_max_rf_weight_ops); std::cout << "Best parsimony score in DAG: " << minparsimony << "\n"; std::cout << "Worst parsimony score in DAG: " << maxparsimony << "\n"; @@ -754,12 +762,9 @@ int main(int argc, char** argv) { // NOLINT(bugprone-exception-escape) logfile << '\n' << iteration << '\t' << ntrees << '\t' << merge.GetResult().GetNodesCount() << '\t' << merge.GetResult().GetEdgesCount() << '\t' << minparsimony << '\t' - << minparsimonytrees << '\t' << maxparsimony << '\t' - << min_rf_distance << '\t' - << max_rf_distance << '\t' - << min_rf_count << '\t' - << max_rf_count << '\t' - << time_elapsed() << std::flush; + << minparsimonytrees << '\t' << maxparsimony << '\t' << min_rf_distance + << '\t' << max_rf_distance << '\t' << min_rf_count << '\t' << max_rf_count + << '\t' << time_elapsed() << std::flush; if (write_intermediate_pb) { std::string intermediate_dag_path = "intermediate_MADAG_untrimmed.pb"; StoreDAGToProtobuf(merge.GetResult(), intermediate_dag_path);