From 08eaea67d845982bc8d5cef924b46ea9ed1048c6 Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Fri, 11 Oct 2019 23:04:26 -0400 Subject: [PATCH 001/128] [paf writer] add cigar string to paf output --- cudamapper/include/claragenomics/cudamapper/types.hpp | 2 ++ cudamapper/src/overlapper.cpp | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/cudamapper/include/claragenomics/cudamapper/types.hpp b/cudamapper/include/claragenomics/cudamapper/types.hpp index 602c9780a..78972561c 100644 --- a/cudamapper/include/claragenomics/cudamapper/types.hpp +++ b/cudamapper/include/claragenomics/cudamapper/types.hpp @@ -102,6 +102,8 @@ typedef struct Overlap std::uint32_t target_length_ = 0; /// Whether the overlap is considered valid by the generating overlapper bool overlap_complete = false; + /// CIGAR string for alignment of mapped section. + std::string cigar_ = ""; } Overlap; } // namespace cudamapper diff --git a/cudamapper/src/overlapper.cpp b/cudamapper/src/overlapper.cpp index c64e6d69d..08151c5ef 100644 --- a/cudamapper/src/overlapper.cpp +++ b/cudamapper/src/overlapper.cpp @@ -33,7 +33,8 @@ void Overlapper::print_paf(const std::vector& overlaps) for (const auto& overlap : filtered_overlaps) { - std::printf("%s\t%i\t%i\t%i\t%c\t%s\t%i\t%i\t%i\t%i\t%i\t%i\n", + // Add basic overlap information. + std::printf("%s\t%i\t%i\t%i\t%c\t%s\t%i\t%i\t%i\t%i\t%i\t%i", overlap.query_read_name_.c_str(), overlap.query_length_, overlap.query_start_position_in_read_, @@ -46,6 +47,13 @@ void Overlapper::print_paf(const std::vector& overlaps) overlap.num_residues_, 0, 255); + // If CIGAR string is generated, output in PAF. + if (overlap.cigar_ != "") + { + std::printf("\tcg:Z:%s", overlap.cigar_.c_str()); + } + // Add new line to demarcate new entry. + std::printf("\n"); } } } // namespace cudamapper From b1469bacdd7cd9fb9d6a8d47b24c94d50240060f Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Tue, 22 Oct 2019 17:22:03 +0000 Subject: [PATCH 002/128] [cmake] remove fatal error when packaging not possible --- cmake/Packaging.cmake | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/cmake/Packaging.cmake b/cmake/Packaging.cmake index f5450a7c7..602319919 100644 --- a/cmake/Packaging.cmake +++ b/cmake/Packaging.cmake @@ -8,6 +8,8 @@ # license agreement from NVIDIA CORPORATION is strictly prohibited. # +set(CGA_ENABLE_PACKAGING TRUE) + # Find Linux Distribution EXECUTE_PROCESS( COMMAND "awk" "-F=" "/^NAME/{print $2}" "/etc/os-release" @@ -21,11 +23,14 @@ elseif(${LINUX_OS_NAME} MATCHES "CentOS") MESSAGE(STATUS "Package generator - RPM") SET(CPACK_GENERATOR "RPM") else() - MESSAGE(FATAL_ERROR "Unrecognized Linux distribution - ${LINUX_OS_NAME}") + MESSAGE(STATUS "Unrecognized Linux distribution - ${LINUX_OS_NAME}. Disabling packaging.") + set(CGA_ENABLE_PACKAGING FALSE) endif() -SET(CPACK_DEBIAN_PACKAGE_MAINTAINER "NVIDIA Corporation") -SET(CPACK_PACKAGE_VERSION "${CGA_VERSION}") -SET(CPACK_PACKAGING_INSTALL_PREFIX "/usr/local/${CGA_PROJECT_NAME}-${CGA_VERSION}") +if (CGA_ENABLE_PACKAGING) + SET(CPACK_DEBIAN_PACKAGE_MAINTAINER "NVIDIA Corporation") + SET(CPACK_PACKAGE_VERSION "${CGA_VERSION}") + SET(CPACK_PACKAGING_INSTALL_PREFIX "/usr/local/${CGA_PROJECT_NAME}-${CGA_VERSION}") -include(CPack) + include(CPack) +endif() From a4e0b66b7d53b43da7888e018c19db7f9c760f6e Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Thu, 24 Oct 2019 04:08:27 +0200 Subject: [PATCH 003/128] [cudamapper] Added NVTX ranges for profiling --- CMakeLists.txt | 1 + .../include/claragenomics/io/fasta_parser.hpp | 2 +- common/utils/CMakeLists.txt | 5 ++ .../include/claragenomics/utils/cudautils.hpp | 30 +++++++++++ cudamapper/src/index.cu | 2 + cudamapper/src/index_gpu.cuh | 51 ++++++++++--------- cudamapper/src/main.cpp | 3 ++ cudamapper/src/matcher.cu | 2 +- cudamapper/src/overlapper_triggered.cu | 1 + 9 files changed, 71 insertions(+), 26 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0359f7c9f..a0042af1f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,6 +27,7 @@ option(cga_device_synchronize_kernels "Run cudaDeviceSynchronize() in CGA_CU_CHE # overall consensus generated, and hence makes it harder to validate and debug. option(spoa_accurate "Run cudapoa code in mode that matches spoa" OFF) option(cga_enable_cudapoa_nw_print "Enable verbose prints within cudapoa NW kernel" OFF) +option(cga_profiling "Compile a binary for profiling with NVTX markers." OFF) if (cga_enable_tests) message(STATUS "Enabling ClaraGenomicsAnalysis unit tests") diff --git a/common/io/include/claragenomics/io/fasta_parser.hpp b/common/io/include/claragenomics/io/fasta_parser.hpp index 19f51fe64..3465949aa 100644 --- a/common/io/include/claragenomics/io/fasta_parser.hpp +++ b/common/io/include/claragenomics/io/fasta_parser.hpp @@ -62,4 +62,4 @@ class FastaParser std::unique_ptr create_fasta_parser(const std::string& fasta_file); } // namespace io -} // namespace claragenomicsi +} // namespace claragenomics diff --git a/common/utils/CMakeLists.txt b/common/utils/CMakeLists.txt index 42c11a654..71e7f5754 100644 --- a/common/utils/CMakeLists.txt +++ b/common/utils/CMakeLists.txt @@ -12,6 +12,11 @@ project(utils) add_library(utils INTERFACE) target_link_libraries(utils INTERFACE logging) +if (cga_profiling) + find_library(NVTX_LIBRARY nvToolsExt HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib64) + target_compile_definitions(utils INTERFACE -DCGA_PROFILING) + target_link_libraries(utils INTERFACE ${NVTX_LIBRARY}) +endif() add_doxygen_source_dir(${CMAKE_CURRENT_SOURCE_DIR}/include) diff --git a/common/utils/include/claragenomics/utils/cudautils.hpp b/common/utils/include/claragenomics/utils/cudautils.hpp index 1804b14ca..3ed2b8a66 100644 --- a/common/utils/include/claragenomics/utils/cudautils.hpp +++ b/common/utils/include/claragenomics/utils/cudautils.hpp @@ -18,6 +18,10 @@ #include #include +#ifdef CGA_PROFILING +#include +#endif // CGA_PROFILING + /// \ingroup cudautils /// \{ @@ -85,6 +89,32 @@ __host__ __device__ __forceinline__ return (value + boundary) & ~(boundary - 1); } +#ifdef CGA_PROFILING +/// \ingroup cudautils +/// \def CGA_NVTX_RANGE +/// \brief starts an NVTX range for profiling which stops automatically at the end of the scope +/// \param varname an arbitrary variable name for the nvtx_range object, which doesn't conflict with other variables in the scope +/// \param label the label/name of the NVTX range +#define CGA_NVTX_RANGE(varname, label) ::claragenomics::cudautils::nvtx_range varname(label) +/// nvtx_range +/// implementation of CGA_NVTX_RANGE +class nvtx_range +{ +public: + explicit nvtx_range(char const* name) + { + nvtxRangePush(name); + } + + ~nvtx_range() + { + nvtxRangePop(); + } +}; +#else +#define CGA_NVTX_RANGE(name) +#endif // CGA_PROFILING + } // namespace cudautils /// \brief A class to switch the CUDA device for the current scope using RAII diff --git a/cudamapper/src/index.cu b/cudamapper/src/index.cu index 32b5f5e6d..c55d7898c 100644 --- a/cudamapper/src/index.cu +++ b/cudamapper/src/index.cu @@ -9,6 +9,7 @@ */ #include "claragenomics/cudamapper/index.hpp" +#include #include "index_gpu.cuh" #include "minimizer.hpp" @@ -21,6 +22,7 @@ std::unique_ptr Index::create_index(const std::vector& const std::uint64_t window_size, const std::vector>& read_ranges) { + CGA_NVTX_RANGE(profiler, "create_index"); return std::make_unique>(parsers, kmer_size, window_size, read_ranges); } diff --git a/cudamapper/src/index_gpu.cuh b/cudamapper/src/index_gpu.cuh index 8871a7fc8..1d3e3cc35 100644 --- a/cudamapper/src/index_gpu.cuh +++ b/cudamapper/src/index_gpu.cuh @@ -573,35 +573,38 @@ void IndexGPU::generate_index(const std::vector(parser->get_num_seqences())); - - for (auto read_id = first_read_; read_id < last_read_; read_id++) + CGA_NVTX_RANGE(profile_reads, "reading fasta"); + for (auto range : read_ranges) { - fasta_sequences.emplace_back(parser->get_sequence_by_id(read_id)); - const std::string& seq = fasta_sequences.back().seq; - const std::string& name = fasta_sequences.back().name; - if (seq.length() >= window_size_ + kmer_size_ - 1) - { - read_id_to_basepairs_section_h.emplace_back(ArrayBlock{total_basepairs, static_cast(seq.length())}); - total_basepairs += seq.length(); - read_id_to_read_name_.push_back(name); - read_id_to_read_length_.push_back(seq.length()); - fasta_sequence_indices.push_back(global_read_id); - } - else + io::FastaParser* parser = parsers[count]; + auto first_read_ = range.first; + auto last_read_ = std::min(range.second, static_cast(parser->get_num_seqences())); + + for (auto read_id = first_read_; read_id < last_read_; read_id++) { - CGA_LOG_INFO("Skipping read {}. It has {} basepairs, one window covers {} basepairs", - name, - seq.length(), window_size_ + kmer_size_ - 1); + fasta_sequences.emplace_back(parser->get_sequence_by_id(read_id)); + const std::string& seq = fasta_sequences.back().seq; + const std::string& name = fasta_sequences.back().name; + if (seq.length() >= window_size_ + kmer_size_ - 1) + { + read_id_to_basepairs_section_h.emplace_back(ArrayBlock{total_basepairs, static_cast(seq.length())}); + total_basepairs += seq.length(); + read_id_to_read_name_.push_back(name); + read_id_to_read_length_.push_back(seq.length()); + fasta_sequence_indices.push_back(global_read_id); + } + else + { + CGA_LOG_INFO("Skipping read {}. It has {} basepairs, one window covers {} basepairs", + name, + seq.length(), window_size_ + kmer_size_ - 1); + } + global_read_id++; } - global_read_id++; - } - count++; + count++; + } } auto number_of_reads_to_add = read_id_to_basepairs_section_h.size(); // This is the number of reads in this specific iteration diff --git a/cudamapper/src/main.cpp b/cudamapper/src/main.cpp index d147434c5..4ac478f2f 100644 --- a/cudamapper/src/main.cpp +++ b/cudamapper/src/main.cpp @@ -20,6 +20,7 @@ #include #include +#include #include "claragenomics/cudamapper/index.hpp" #include "claragenomics/cudamapper/overlapper.hpp" @@ -110,6 +111,7 @@ int main(int argc, char* argv[]) auto add_overlaps_to_write_queue = [&overlaps_to_write, &overlaps_writer_mtx](claragenomics::cudamapper::Overlapper& overlapper, std::vector& anchors, const claragenomics::cudamapper::Index& index) { + CGA_NVTX_RANGE(profiler, "add_overlaps_to_write_queue"); overlaps_writer_mtx.lock(); overlaps_to_write.push_back(std::vector()); overlapper.get_overlaps(overlaps_to_write.back(), anchors, index); @@ -128,6 +130,7 @@ int main(int argc, char* argv[]) overlaps_writer_mtx.lock(); if (!overlaps_to_write.empty()) { + CGA_NVTX_RANGE(profile, "overlaps_writer"); std::vector& overlaps = overlaps_to_write.front(); // An empty overlap vector indicates end of processing. if (overlaps.size() > 0) diff --git a/cudamapper/src/matcher.cu b/cudamapper/src/matcher.cu index 0353a8a49..8edce6248 100644 --- a/cudamapper/src/matcher.cu +++ b/cudamapper/src/matcher.cu @@ -109,7 +109,7 @@ __global__ void generate_anchors(const position_in_read_t* const positions_in_re Matcher::Matcher(const Index& index, uint32_t query_target_division_idx) { - + CGA_NVTX_RANGE(profile, "matcher"); if (0 == index.number_of_reads()) { return; diff --git a/cudamapper/src/overlapper_triggered.cu b/cudamapper/src/overlapper_triggered.cu index 9ee63ebb4..add4e17b4 100644 --- a/cudamapper/src/overlapper_triggered.cu +++ b/cudamapper/src/overlapper_triggered.cu @@ -31,6 +31,7 @@ namespace cudamapper { void OverlapperTriggered::get_overlaps(std::vector& fused_overlaps, std::vector& anchors, const Index& index) { + CGA_NVTX_RANGE(profiler, "OverlapperTriggered::get_overlaps"); const auto& read_names = index.read_id_to_read_name(); const auto& read_lengths = index.read_id_to_read_length(); size_t total_anchors = anchors.size(); From 0c64c3f8e5d3ec38863c4b4427dba71411b28eee Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Tue, 29 Oct 2019 12:12:20 +0200 Subject: [PATCH 004/128] fix #144 - constructor member initializer formatting --- .clang-format | 2 +- .../include/claragenomics/utils/device_buffer.cuh | 3 ++- cudaaligner/src/aligner_global_hirschberg_myers.cpp | 6 +++++- cudaaligner/src/aligner_global_myers.cpp | 8 ++++++-- cudaaligner/src/aligner_global_ukkonen.cpp | 4 +++- cudaaligner/src/batched_device_matrices.cuh | 8 ++++++-- cudaaligner/src/matrix_cpu.hpp | 4 +++- cudamapper/src/bioparser_sequence.cpp | 3 ++- cudamapper/src/index_gpu.cuh | 13 ++++++++++--- cudamapper/src/minimizer.cu | 5 ++++- cudapoa/tests/Test_CudapoaAddAlignment.cpp | 5 ++++- cudapoa/tests/Test_CudapoaGenerateConsensus.cpp | 4 +++- cudapoa/tests/Test_CudapoaNW.cpp | 3 ++- cudapoa/tests/basic_graph.hpp | 6 +++++- cudapoa/tests/sorted_graph.hpp | 6 ++++-- 15 files changed, 60 insertions(+), 20 deletions(-) diff --git a/.clang-format b/.clang-format index 10a40ca5f..26de2ad62 100644 --- a/.clang-format +++ b/.clang-format @@ -49,7 +49,7 @@ BreakAfterJavaFieldAnnotations: false BreakStringLiterals: true ColumnLimit: 0 CommentPragmas: '^ IWYU pragma:' -ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true diff --git a/common/utils/include/claragenomics/utils/device_buffer.cuh b/common/utils/include/claragenomics/utils/device_buffer.cuh index 07725c19d..28fa0580d 100644 --- a/common/utils/include/claragenomics/utils/device_buffer.cuh +++ b/common/utils/include/claragenomics/utils/device_buffer.cuh @@ -65,7 +65,8 @@ public: device_buffer& operator=(device_buffer const&) = delete; device_buffer(device_buffer&& r) - : data_(std::exchange(r.data_, nullptr)), size_(std::exchange(r.size_, 0)) + : data_(std::exchange(r.data_, nullptr)) + , size_(std::exchange(r.size_, 0)) { } diff --git a/cudaaligner/src/aligner_global_hirschberg_myers.cpp b/cudaaligner/src/aligner_global_hirschberg_myers.cpp index a615f27d8..814aed478 100644 --- a/cudaaligner/src/aligner_global_hirschberg_myers.cpp +++ b/cudaaligner/src/aligner_global_hirschberg_myers.cpp @@ -27,7 +27,11 @@ static constexpr int32_t hirschberg_myers_switch_to_myers_size = 63; // ideally struct AlignerGlobalHirschbergMyers::Workspace { Workspace(int32_t max_alignments, int32_t max_n_words, int32_t max_target_length, int32_t warps_per_alignment, int32_t switch_to_myers_size, cudaStream_t stream) - : stackbuffer(max_alignments * hirschberg_myers_stackbuffer_size), pvs(max_alignments * warps_per_alignment, max_n_words * (switch_to_myers_size + 1), stream), mvs(max_alignments * warps_per_alignment, max_n_words * (switch_to_myers_size + 1), stream), scores(max_alignments * warps_per_alignment, std::max(max_n_words * (switch_to_myers_size + 1), (max_target_length + 1) * 2), stream), query_patterns(max_alignments, max_n_words * 8, stream) + : stackbuffer(max_alignments * hirschberg_myers_stackbuffer_size) + , pvs(max_alignments * warps_per_alignment, max_n_words * (switch_to_myers_size + 1), stream) + , mvs(max_alignments * warps_per_alignment, max_n_words * (switch_to_myers_size + 1), stream) + , scores(max_alignments * warps_per_alignment, std::max(max_n_words * (switch_to_myers_size + 1), (max_target_length + 1) * 2), stream) + , query_patterns(max_alignments, max_n_words * 8, stream) { assert(switch_to_myers_size >= 1); } diff --git a/cudaaligner/src/aligner_global_myers.cpp b/cudaaligner/src/aligner_global_myers.cpp index 475fa437c..d25386cc2 100644 --- a/cudaaligner/src/aligner_global_myers.cpp +++ b/cudaaligner/src/aligner_global_myers.cpp @@ -23,7 +23,10 @@ namespace cudaaligner struct AlignerGlobalMyers::Workspace { Workspace(int32_t max_alignments, int32_t max_n_words, int32_t max_target_length, cudaStream_t stream) - : pvs(max_alignments, max_n_words * (max_target_length + 1), stream), mvs(max_alignments, max_n_words * (max_target_length + 1), stream), scores(max_alignments, max_n_words * (max_target_length + 1), stream), query_patterns(max_alignments, max_n_words * 4, stream) + : pvs(max_alignments, max_n_words * (max_target_length + 1), stream) + , mvs(max_alignments, max_n_words * (max_target_length + 1), stream) + , scores(max_alignments, max_n_words * (max_target_length + 1), stream) + , query_patterns(max_alignments, max_n_words * 4, stream) { } batched_device_matrices pvs; @@ -33,7 +36,8 @@ struct AlignerGlobalMyers::Workspace }; AlignerGlobalMyers::AlignerGlobalMyers(int32_t max_query_length, int32_t max_target_length, int32_t max_alignments, cudaStream_t stream, int32_t device_id) - : AlignerGlobal(max_query_length, max_target_length, max_alignments, stream, device_id), workspace_() + : AlignerGlobal(max_query_length, max_target_length, max_alignments, stream, device_id) + , workspace_() { scoped_device_switch dev(device_id); workspace_ = std::make_unique(max_alignments, ceiling_divide(max_query_length, sizeof(myers::WordType)), max_target_length, stream); diff --git a/cudaaligner/src/aligner_global_ukkonen.cpp b/cudaaligner/src/aligner_global_ukkonen.cpp index b1b49cd66..683989aa2 100644 --- a/cudaaligner/src/aligner_global_ukkonen.cpp +++ b/cudaaligner/src/aligner_global_ukkonen.cpp @@ -21,7 +21,9 @@ namespace cudaaligner static constexpr float max_target_query_length_difference = 0.1; // query has to be >=90% of target length AlignerGlobalUkkonen::AlignerGlobalUkkonen(int32_t max_query_length, int32_t max_target_length, int32_t max_alignments, cudaStream_t stream, int32_t device_id) - : AlignerGlobal(max_query_length, max_target_length, max_alignments, stream, device_id), score_matrices_(), ukkonen_p_(100) + : AlignerGlobal(max_query_length, max_target_length, max_alignments, stream, device_id) + , score_matrices_() + , ukkonen_p_(100) { scoped_device_switch dev(device_id); int32_t const allocated_max_length_difference = this->get_max_target_length() * max_target_query_length_difference; diff --git a/cudaaligner/src/batched_device_matrices.cuh b/cudaaligner/src/batched_device_matrices.cuh index 894a1c821..f769ed61f 100644 --- a/cudaaligner/src/batched_device_matrices.cuh +++ b/cudaaligner/src/batched_device_matrices.cuh @@ -34,7 +34,9 @@ class device_matrix_view { public: __device__ device_matrix_view(T* storage, int32_t n_rows, int32_t n_cols) - : data_(storage), n_rows_(n_rows), n_cols_(n_cols) + : data_(storage) + , n_rows_(n_rows) + , n_cols_(n_cols) { } @@ -77,7 +79,9 @@ public: { public: device_interface(T* storage, int32_t n_matrices, int32_t max_elements_per_matrix) - : storage_(storage), max_elements_per_matrix_(max_elements_per_matrix), n_matrices_(n_matrices) + : storage_(storage) + , max_elements_per_matrix_(max_elements_per_matrix) + , n_matrices_(n_matrices) { } __device__ device_matrix_view get_matrix_view(int32_t id, int32_t n_rows, int32_t n_cols) diff --git a/cudaaligner/src/matrix_cpu.hpp b/cudaaligner/src/matrix_cpu.hpp index 1d3cfdd8f..3ddf63c0a 100644 --- a/cudaaligner/src/matrix_cpu.hpp +++ b/cudaaligner/src/matrix_cpu.hpp @@ -39,7 +39,9 @@ class matrix ~matrix() = default; matrix(int n, int m, T value = 0) - : data_(n * m, value), n(n), m(m) + : data_(n * m, value) + , n(n) + , m(m) { } diff --git a/cudamapper/src/bioparser_sequence.cpp b/cudamapper/src/bioparser_sequence.cpp index d2ddf645a..c7e532355 100644 --- a/cudamapper/src/bioparser_sequence.cpp +++ b/cudamapper/src/bioparser_sequence.cpp @@ -18,7 +18,8 @@ namespace cudamapper { BioParserSequence::BioParserSequence(const char* name, uint32_t name_length, const char* data, uint32_t data_length) - : name_(name, name_length), data_() + : name_(name, name_length) + , data_() { data_.reserve(data_length); diff --git a/cudamapper/src/index_gpu.cuh b/cudamapper/src/index_gpu.cuh index 8871a7fc8..53f08a227 100644 --- a/cudamapper/src/index_gpu.cuh +++ b/cudamapper/src/index_gpu.cuh @@ -133,7 +133,9 @@ class approximate_sketch_elements_per_bucket_too_small : public std::exception { public: approximate_sketch_elements_per_bucket_too_small(const std::string& message) - : message_(message) {} + : message_(message) + { + } approximate_sketch_elements_per_bucket_too_small(approximate_sketch_elements_per_bucket_too_small const&) = default; approximate_sketch_elements_per_bucket_too_small& operator=(approximate_sketch_elements_per_bucket_too_small const&) = default; virtual ~approximate_sketch_elements_per_bucket_too_small() = default; @@ -491,14 +493,19 @@ void build_index(const std::uint64_t number_of_reads, template IndexGPU::IndexGPU(const std::vector& parsers, const std::uint64_t kmer_size, const std::uint64_t window_size, const std::vector>& read_ranges) - : kmer_size_(kmer_size), window_size_(window_size), number_of_reads_(0), reached_end_of_input_(false) + : kmer_size_(kmer_size) + , window_size_(window_size) + , number_of_reads_(0) + , reached_end_of_input_(false) { generate_index(parsers, read_ranges); } template IndexGPU::IndexGPU() - : kmer_size_(0), window_size_(0), number_of_reads_(0) + : kmer_size_(0) + , window_size_(0) + , number_of_reads_(0) { } diff --git a/cudamapper/src/minimizer.cu b/cudamapper/src/minimizer.cu index a7d9f9a8d..e3fe71784 100644 --- a/cudamapper/src/minimizer.cu +++ b/cudamapper/src/minimizer.cu @@ -17,7 +17,10 @@ namespace cudamapper { Minimizer::Minimizer(representation_t representation, position_in_read_t position_in_read, DirectionOfRepresentation direction, read_id_t read_id) - : representation_(representation), position_in_read_(position_in_read), direction_(direction), read_id_(read_id) + : representation_(representation) + , position_in_read_(position_in_read) + , direction_(direction) + , read_id_(read_id) { } diff --git a/cudapoa/tests/Test_CudapoaAddAlignment.cpp b/cudapoa/tests/Test_CudapoaAddAlignment.cpp index 1c1455420..c898e1419 100644 --- a/cudapoa/tests/Test_CudapoaAddAlignment.cpp +++ b/cudapoa/tests/Test_CudapoaAddAlignment.cpp @@ -29,7 +29,10 @@ class BasicAlignment BasicAlignment(std::vector nodes, Uint16Vec2D outgoing_edges, Uint16Vec2D node_alignments, std::vector node_coverage_counts, std::vector read, std::vector base_weights, std::vector alignment_graph, std::vector alignment_read) - : graph(nodes, outgoing_edges, node_alignments, node_coverage_counts), read_(read), alignment_graph_(alignment_graph), alignment_read_(alignment_read) + : graph(nodes, outgoing_edges, node_alignments, node_coverage_counts) + , read_(read) + , alignment_graph_(alignment_graph) + , alignment_read_(alignment_read) { //do nothing for now } diff --git a/cudapoa/tests/Test_CudapoaGenerateConsensus.cpp b/cudapoa/tests/Test_CudapoaGenerateConsensus.cpp index 1fa614786..cf8e52363 100644 --- a/cudapoa/tests/Test_CudapoaGenerateConsensus.cpp +++ b/cudapoa/tests/Test_CudapoaGenerateConsensus.cpp @@ -28,7 +28,9 @@ class BasicGenerateConsensus public: BasicGenerateConsensus(std::vector nodes, std::vector sorted_graph, Uint16Vec2D node_alignments, Uint16Vec2D outgoing_edges, std::vector node_coverage_counts, Uint16Vec2D outgoing_edge_w) - : graph_(nodes, sorted_graph, node_alignments, node_coverage_counts, outgoing_edges), outgoing_edge_w_(outgoing_edge_w), outgoing_edges_(outgoing_edges) + : graph_(nodes, sorted_graph, node_alignments, node_coverage_counts, outgoing_edges) + , outgoing_edge_w_(outgoing_edge_w) + , outgoing_edges_(outgoing_edges) { } diff --git a/cudapoa/tests/Test_CudapoaNW.cpp b/cudapoa/tests/Test_CudapoaNW.cpp index f3b0cd0df..79fef1387 100644 --- a/cudapoa/tests/Test_CudapoaNW.cpp +++ b/cudapoa/tests/Test_CudapoaNW.cpp @@ -34,7 +34,8 @@ class BasicNW public: BasicNW(std::vector nodes, std::vector sorted_graph, Uint16Vec2D outgoing_edges, std::vector read) - : graph_(nodes, sorted_graph, outgoing_edges), read_(read) + : graph_(nodes, sorted_graph, outgoing_edges) + , read_(read) { // do nothing } diff --git a/cudapoa/tests/basic_graph.hpp b/cudapoa/tests/basic_graph.hpp index 7c22a942b..694955de1 100644 --- a/cudapoa/tests/basic_graph.hpp +++ b/cudapoa/tests/basic_graph.hpp @@ -30,7 +30,11 @@ class BasicGraph { public: BasicGraph(std::vector nodes, Uint16Vec2D outgoing_edges, Uint16Vec2D node_alignments, std::vector node_coverage_counts, Uint16Vec3D outgoing_edges_coverage = {}) - : nodes_(nodes), outgoing_edges_(outgoing_edges), node_alignments_(node_alignments), node_coverage_counts_(node_coverage_counts), outgoing_edges_coverage_(outgoing_edges_coverage) + : nodes_(nodes) + , outgoing_edges_(outgoing_edges) + , node_alignments_(node_alignments) + , node_coverage_counts_(node_coverage_counts) + , outgoing_edges_coverage_(outgoing_edges_coverage) { graph_complete_ = true; node_count_ = get_size(nodes_); diff --git a/cudapoa/tests/sorted_graph.hpp b/cudapoa/tests/sorted_graph.hpp index b7d360ccf..cc63a6424 100644 --- a/cudapoa/tests/sorted_graph.hpp +++ b/cudapoa/tests/sorted_graph.hpp @@ -28,7 +28,8 @@ class SortedGraph : public BasicGraph public: SortedGraph(std::vector nodes, std::vector sorted_graph, Uint16Vec2D outgoing_edges) - : BasicGraph(nodes, outgoing_edges), sorted_graph_(sorted_graph) + : BasicGraph(nodes, outgoing_edges) + , sorted_graph_(sorted_graph) { // do nothing for now } @@ -36,7 +37,8 @@ class SortedGraph : public BasicGraph SortedGraph(std::vector nodes, std::vector sorted_graph, Uint16Vec2D node_alignments, std::vector node_coverage_counts, Uint16Vec2D outgoing_edges, Uint16Vec3D outgoing_edges_coverage = {}) - : BasicGraph(nodes, outgoing_edges, node_alignments, node_coverage_counts, outgoing_edges_coverage), sorted_graph_(sorted_graph) + : BasicGraph(nodes, outgoing_edges, node_alignments, node_coverage_counts, outgoing_edges_coverage) + , sorted_graph_(sorted_graph) { // do nothing for now } From f615467b5460a7e4c402e72e0717001b34c389ba Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Tue, 29 Oct 2019 16:13:47 -0400 Subject: [PATCH 005/128] [cudapoa] returning error when consensus/msa larger than max size --- .../include/claragenomics/cudapoa/batch.hpp | 4 ++ cudapoa/src/cudapoa_batch.cpp | 10 ++++- cudapoa/src/cudapoa_generate_consensus.cu | 21 +++++++++- cudapoa/src/cudapoa_generate_msa.cu | 9 +++++ cudapoa/tests/Test_CudapoaGenerateMSA2.cpp | 40 +++++++++++++++++-- 5 files changed, 78 insertions(+), 6 deletions(-) diff --git a/cudapoa/include/claragenomics/cudapoa/batch.hpp b/cudapoa/include/claragenomics/cudapoa/batch.hpp index f2f1b64a4..6b5842bc5 100644 --- a/cudapoa/include/claragenomics/cudapoa/batch.hpp +++ b/cudapoa/include/claragenomics/cudapoa/batch.hpp @@ -83,6 +83,8 @@ class Batch /// base in each consensus string is returned /// \param output_status Reference to vector where the errors /// during kernel execution is captured + /// + /// \return Status indicating whether consensus generation is available for this batch. virtual StatusType get_consensus(std::vector& consensus, std::vector>& coverage, std::vector& output_status) = 0; @@ -93,6 +95,8 @@ class Batch /// poa is returned /// \param output_status Reference to vector where the errors /// during kernel execution is captured + /// + /// \return Status indicating whether MSA generation is available for this batch. virtual StatusType get_msa(std::vector>& msa, std::vector& output_status) = 0; diff --git a/cudapoa/src/cudapoa_batch.cpp b/cudapoa/src/cudapoa_batch.cpp index f1eb0f122..9cd5a3810 100644 --- a/cudapoa/src/cudapoa_batch.cpp +++ b/cudapoa/src/cudapoa_batch.cpp @@ -183,14 +183,20 @@ void CudapoaBatch::decode_cudapoa_kernel_error(claragenomics::cudapoa::StatusTyp output_status.emplace_back(error_type); break; case claragenomics::cudapoa::StatusType::seq_len_exceeded_maximum_nodes_per_window: - CGA_LOG_WARN("Kernel Error::Sequence length exceeded maximum nodes per window in batch {}\n", bid_); + CGA_LOG_WARN("Kernel Error:: Sequence length exceeded maximum nodes per window in batch {}\n", bid_); output_status.emplace_back(error_type); break; case claragenomics::cudapoa::StatusType::loop_count_exceeded_upper_bound: - CGA_LOG_WARN("Kernel Error::Loop count exceeded upper bound in nw algorithm in batch {}\n", bid_); + CGA_LOG_WARN("Kernel Error:: Loop count exceeded upper bound in nw algorithm in batch {}\n", bid_); + output_status.emplace_back(error_type); + break; + case claragenomics::cudapoa::StatusType::exceeded_maximum_sequence_size: + CGA_LOG_WARN("Kernel Error:: Consensus/MSA sequence size exceeded max sequence size in batch {}\n", bid_); output_status.emplace_back(error_type); break; default: + CGA_LOG_WARN("Kernel Error:: Unknown error in batch {}\n", bid_); + output_status.emplace_back(error_type); break; } } diff --git a/cudapoa/src/cudapoa_generate_consensus.cu b/cudapoa/src/cudapoa_generate_consensus.cu index 67424ed85..aaef3b78c 100644 --- a/cudapoa/src/cudapoa_generate_consensus.cu +++ b/cudapoa/src/cudapoa_generate_consensus.cu @@ -224,7 +224,13 @@ __device__ void generateConsensus(uint8_t* nodes, return; } + // Use consensus_pos to track which position to put new element in. Clip this to the maximum + // size of consensus so as not to overwrite other good data. uint16_t consensus_pos = 0; + // Use consensus_count to track how many elements are in consensus. If more than the maximum + // size, then consensus cannot be properly represented. So throw error. + uint16_t consensus_count = 0; + while (predecessors[max_score_id] != -1) { consensus[consensus_pos] = nodes[max_score_id]; @@ -235,7 +241,8 @@ __device__ void generateConsensus(uint8_t* nodes, } coverage[consensus_pos] = cov; max_score_id = predecessors[max_score_id]; - consensus_pos++; + consensus_pos = min(consensus_pos + 1, CUDAPOA_MAX_CONSENSUS_SIZE - 1); + consensus_count++; } consensus[consensus_pos] = nodes[max_score_id]; uint16_t cov = node_coverage_counts[max_score_id]; @@ -244,7 +251,19 @@ __device__ void generateConsensus(uint8_t* nodes, cov += node_coverage_counts[node_alignments[max_score_id * CUDAPOA_MAX_NODE_ALIGNMENTS + a]]; } coverage[consensus_pos] = cov; + + // Check consensus count against maximum size. + if (consensus_count >= (CUDAPOA_MAX_CONSENSUS_SIZE - 1)) + { + consensus[0] = CUDAPOA_KERNEL_ERROR_ENCOUNTERED; + consensus[1] = static_cast(StatusType::exceeded_maximum_sequence_size); + return; + } + + // Now we can increment consensus_pos without checking for upper bound because the max length + // test above guarantees that consensus_pos <= (CUDAPOA_MAX_CONSENSUS_SIZE - 2). consensus_pos++; + // Add EOL character at the end of the string. consensus[consensus_pos] = '\0'; } diff --git a/cudapoa/src/cudapoa_generate_msa.cu b/cudapoa/src/cudapoa_generate_msa.cu index 066b7600f..a6441a56d 100644 --- a/cudapoa/src/cudapoa_generate_msa.cu +++ b/cudapoa/src/cudapoa_generate_msa.cu @@ -186,10 +186,19 @@ __global__ void generateMSAKernel(uint8_t* nodes_d, sorted_poa, node_id_to_msa_pos, node_alignment_counts); + + if (msa_length >= CUDAPOA_MAX_CONSENSUS_SIZE) + { + consensus[0] = CUDAPOA_KERNEL_ERROR_ENCOUNTERED; + consensus[1] = static_cast(StatusType::exceeded_maximum_sequence_size); + } } __syncthreads(); + if (consensus[0] == CUDAPOA_KERNEL_ERROR_ENCOUNTERED) + return; + generateMSADevice(nodes, num_sequences, outgoing_edge_count, diff --git a/cudapoa/tests/Test_CudapoaGenerateMSA2.cpp b/cudapoa/tests/Test_CudapoaGenerateMSA2.cpp index 1a18491b5..7a0d2bd93 100644 --- a/cudapoa/tests/Test_CudapoaGenerateMSA2.cpp +++ b/cudapoa/tests/Test_CudapoaGenerateMSA2.cpp @@ -8,6 +8,8 @@ * license agreement from NVIDIA CORPORATION is strictly prohibited. */ +#include "../src/cudapoa_kernels.cuh" + #include #include @@ -90,7 +92,7 @@ TEST_F(MSATest, CudapoaMSA) e.length = seq.length(); poa_group.push_back(e); } - EXPECT_EQ(cudapoa_batch->add_poa_group(status, poa_group), StatusType::success); + ASSERT_EQ(cudapoa_batch->add_poa_group(status, poa_group), StatusType::success); std::vector> cudapoa_msa; std::vector output_status; @@ -99,6 +101,8 @@ TEST_F(MSATest, CudapoaMSA) cudapoa_batch->get_msa(cudapoa_msa, output_status); + ASSERT_EQ(output_status[0], StatusType::success); + auto spoa_msa = spoa_generate_multiple_sequence_alignments(sequences); #ifndef SPOA_ACCURATE @@ -107,13 +111,43 @@ TEST_F(MSATest, CudapoaMSA) std::string msa = cudapoa_msa[0][i]; msa.erase(std::remove(msa.begin(), msa.end(), '-'), msa.end()); - EXPECT_EQ(msa, sequences[i]); + ASSERT_EQ(msa, sequences[i]); } #else - EXPECT_EQ(spoa_msa, cudapoa_msa[0]); + ASSERT_EQ(spoa_msa, cudapoa_msa[0]); #endif } +TEST_F(MSATest, CudapoaMSAFailure) +{ + std::minstd_rand rng(1); + int num_sequences = 10; + std::string backbone = claragenomics::genomeutils::generate_random_genome(CUDAPOA_MAX_CONSENSUS_SIZE - 1, rng); + auto sequences = claragenomics::genomeutils::generate_random_sequences(backbone, num_sequences, rng, 10, 5, 10); + + initialize(num_sequences); + Group poa_group; + std::vector status; + for (const auto& seq : sequences) + { + Entry e{}; + e.seq = seq.c_str(); + e.weights = nullptr; + e.length = seq.length(); + poa_group.push_back(e); + } + ASSERT_EQ(cudapoa_batch->add_poa_group(status, poa_group), StatusType::success); + + std::vector> cudapoa_msa; + std::vector output_status; + + cudapoa_batch->generate_poa(); + + cudapoa_batch->get_msa(cudapoa_msa, output_status); + + ASSERT_EQ(output_status[0], StatusType::exceeded_maximum_sequence_size); +} + } // namespace cudapoa } // namespace claragenomics From 7db59ac6ea0a76bce028b831f2be30752cb5caf5 Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Tue, 29 Oct 2019 14:37:27 -0400 Subject: [PATCH 006/128] [cudapoa] fix max sequences per poa check --- cudapoa/src/cudapoa_batch.cpp | 2 +- cudapoa/tests/Test_CudapoaBatch.cpp | 11 ++++++----- cudapoa/tests/Test_CudapoaGenerateMSA2.cpp | 6 ++++-- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/cudapoa/src/cudapoa_batch.cpp b/cudapoa/src/cudapoa_batch.cpp index f1eb0f122..80955ce39 100644 --- a/cudapoa/src/cudapoa_batch.cpp +++ b/cudapoa/src/cudapoa_batch.cpp @@ -399,7 +399,7 @@ StatusType CudapoaBatch::add_seq_to_poa(const char* seq, const int8_t* weights, window_details->scores_width = scores_width_; } - if (static_cast(window_details->num_seqs) + 1 >= max_sequences_per_poa_) + if (static_cast(window_details->num_seqs) >= max_sequences_per_poa_) { return StatusType::exceeded_maximum_sequences_per_poa; } diff --git a/cudapoa/tests/Test_CudapoaBatch.cpp b/cudapoa/tests/Test_CudapoaBatch.cpp index e6d9a8c4b..46fb8ad99 100644 --- a/cudapoa/tests/Test_CudapoaBatch.cpp +++ b/cudapoa/tests/Test_CudapoaBatch.cpp @@ -92,16 +92,17 @@ TEST_F(TestCudapoaBatch, AddPOATest) TEST_F(TestCudapoaBatch, MaxSeqPerPOATest) { - const int32_t device_id = 0; - size_t free = get_free_device_mem(device_id); - initialize(10, 0.9 * free, device_id); + const int32_t device_id = 0; + const int32_t max_sequences_per_poa = 10; + size_t free = get_free_device_mem(device_id); + initialize(max_sequences_per_poa, 0.9 * free, device_id); Group poa_group; std::vector status; int32_t seq_length = 20; std::string seq(seq_length, 'A'); std::vector weights(seq_length, 1); - for (uint16_t i = 0; i < 10; ++i) + for (uint16_t i = 0; i < (max_sequences_per_poa + 1); ++i) { Entry e{}; e.seq = seq.c_str(); @@ -111,7 +112,7 @@ TEST_F(TestCudapoaBatch, MaxSeqPerPOATest) } EXPECT_EQ(cudapoa_batch->add_poa_group(status, poa_group), StatusType::success); EXPECT_EQ(cudapoa_batch->get_total_poas(), 1); - EXPECT_EQ(status.at(9), StatusType::exceeded_maximum_sequences_per_poa); + EXPECT_EQ(status.at(max_sequences_per_poa), StatusType::exceeded_maximum_sequences_per_poa); } TEST_F(TestCudapoaBatch, MaxSeqSizeTest) diff --git a/cudapoa/tests/Test_CudapoaGenerateMSA2.cpp b/cudapoa/tests/Test_CudapoaGenerateMSA2.cpp index 1a18491b5..f96f185dd 100644 --- a/cudapoa/tests/Test_CudapoaGenerateMSA2.cpp +++ b/cudapoa/tests/Test_CudapoaGenerateMSA2.cpp @@ -75,11 +75,11 @@ class MSATest : public ::testing::Test TEST_F(MSATest, CudapoaMSA) { std::minstd_rand rng(1); - int num_sequences = 499; + int num_sequences = 500; std::string backbone = claragenomics::genomeutils::generate_random_genome(50, rng); auto sequences = claragenomics::genomeutils::generate_random_sequences(backbone, num_sequences, rng, 10, 5, 10); - initialize(num_sequences + 1); // + initialize(num_sequences); Group poa_group; std::vector status; for (const auto& seq : sequences) @@ -99,6 +99,8 @@ TEST_F(MSATest, CudapoaMSA) cudapoa_batch->get_msa(cudapoa_msa, output_status); + EXPECT_EQ(poa_group.size(), cudapoa_msa[0].size()); + auto spoa_msa = spoa_generate_multiple_sequence_alignments(sequences); #ifndef SPOA_ACCURATE From 0b61627047fe012b0f3ea4bc3e9da68cbb39bab2 Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Wed, 30 Oct 2019 19:49:45 +0200 Subject: [PATCH 007/128] \#147 [cudapoa] combine benchmark_cudapoa_singlebatch & benchmark_cudapoa_multibatch into one binary --- cudapoa/benchmarks/CMakeLists.txt | 3 +- cudapoa/benchmarks/README.md | 4 +- .../{multi-batch => run-batch}/CMakeLists.txt | 2 +- .../{multi-batch => run-batch}/main.cpp | 29 ++++++++++--- .../multi_batch.hpp | 0 .../single_batch.hpp | 0 .../benchmarks/single-batch/CMakeLists.txt | 33 --------------- cudapoa/benchmarks/single-batch/main.cpp | 42 ------------------- cudapoa/tests/Test_CudapoaBatchEnd2End.cpp | 2 +- 9 files changed, 28 insertions(+), 87 deletions(-) rename cudapoa/benchmarks/{multi-batch => run-batch}/CMakeLists.txt (96%) rename cudapoa/benchmarks/{multi-batch => run-batch}/main.cpp (73%) rename cudapoa/benchmarks/{multi-batch => run-batch}/multi_batch.hpp (100%) rename cudapoa/benchmarks/{single-batch => run-batch}/single_batch.hpp (100%) delete mode 100644 cudapoa/benchmarks/single-batch/CMakeLists.txt delete mode 100644 cudapoa/benchmarks/single-batch/main.cpp diff --git a/cudapoa/benchmarks/CMakeLists.txt b/cudapoa/benchmarks/CMakeLists.txt index c97101320..36343efd3 100644 --- a/cudapoa/benchmarks/CMakeLists.txt +++ b/cudapoa/benchmarks/CMakeLists.txt @@ -9,8 +9,7 @@ # # Add benchmarks -add_subdirectory(single-batch) -add_subdirectory(multi-batch) +add_subdirectory(run-batch) install(FILES README.md DESTINATION benchmarks/cudapoa) diff --git a/cudapoa/benchmarks/README.md b/cudapoa/benchmarks/README.md index 043598eed..acc2c0c2f 100644 --- a/cudapoa/benchmarks/README.md +++ b/cudapoa/benchmarks/README.md @@ -7,7 +7,7 @@ of a single batch of POA. To run the benchmark, execute ``` -./benchmarks/cudapoa/benchmark_cudapoa_singlebatch +./benchmarks/cudapoa/benchmark_cudapoa_run_batch --benchmark_filter="BM_SingleBatchTest" ``` ## Multi Batch @@ -17,5 +17,5 @@ of several batched CUDA POA stream that fill up the GPU. To the the benchmark, execute ``` -./benchmarks/cudapoa/benchmark_cudapoa_multibatch +./benchmarks/cudapoa/benchmark_cudapoa_run_batch --benchmark_filter="BM_MultiBatchTest" ``` diff --git a/cudapoa/benchmarks/multi-batch/CMakeLists.txt b/cudapoa/benchmarks/run-batch/CMakeLists.txt similarity index 96% rename from cudapoa/benchmarks/multi-batch/CMakeLists.txt rename to cudapoa/benchmarks/run-batch/CMakeLists.txt index b8fd3919e..54ad33de1 100644 --- a/cudapoa/benchmarks/multi-batch/CMakeLists.txt +++ b/cudapoa/benchmarks/run-batch/CMakeLists.txt @@ -18,7 +18,7 @@ # license agreement from NVIDIA CORPORATION is strictly prohibited. # -project(benchmark_cudapoa_multibatch) +project(benchmark_cudapoa_run_batch) set(SOURCES main.cpp diff --git a/cudapoa/benchmarks/multi-batch/main.cpp b/cudapoa/benchmarks/run-batch/main.cpp similarity index 73% rename from cudapoa/benchmarks/multi-batch/main.cpp rename to cudapoa/benchmarks/run-batch/main.cpp index b0713a17d..508475b83 100644 --- a/cudapoa/benchmarks/multi-batch/main.cpp +++ b/cudapoa/benchmarks/run-batch/main.cpp @@ -10,6 +10,7 @@ #include "../common/utils.hpp" #include "multi_batch.hpp" +#include "single_batch.hpp" #include "file_location.hpp" #include @@ -23,14 +24,15 @@ namespace claragenomics namespace cudapoa { -static void BM_MultiBatchTest(benchmark::State& state) +static void BM_SingleBatchTest(benchmark::State& state) { - int32_t batches = state.range(0); - const int32_t total_windows = 5500; - MultiBatch mb(batches, std::string(CUDAPOA_BENCHMARK_DATA_DIR) + "/sample-windows.txt", total_windows); + SingleBatch sb(state.range(0), std::string(CUDAPOA_BENCHMARK_DATA_DIR) + "/sample-windows.txt", state.range(0)); for (auto _ : state) { - mb.process_batches(); + state.PauseTiming(); + sb.add_windows(); + state.ResumeTiming(); + sb.process_consensus(); } } @@ -44,7 +46,22 @@ static void CustomArguments(benchmark::internal::Benchmark* b) } } -// Register the function as a benchmark +static void BM_MultiBatchTest(benchmark::State& state) +{ + int32_t batches = state.range(0); + const int32_t total_windows = 5500; + MultiBatch mb(batches, std::string(CUDAPOA_BENCHMARK_DATA_DIR) + "/sample-windows.txt", total_windows); + for (auto _ : state) + { + mb.process_batches(); + } +} + +// Register the functions as a benchmark +BENCHMARK(BM_SingleBatchTest) + ->Unit(benchmark::kMillisecond) + ->RangeMultiplier(4) + ->Range(1, 1 << 10); BENCHMARK(BM_MultiBatchTest) ->Unit(benchmark::kMillisecond) ->Apply(CustomArguments); diff --git a/cudapoa/benchmarks/multi-batch/multi_batch.hpp b/cudapoa/benchmarks/run-batch/multi_batch.hpp similarity index 100% rename from cudapoa/benchmarks/multi-batch/multi_batch.hpp rename to cudapoa/benchmarks/run-batch/multi_batch.hpp diff --git a/cudapoa/benchmarks/single-batch/single_batch.hpp b/cudapoa/benchmarks/run-batch/single_batch.hpp similarity index 100% rename from cudapoa/benchmarks/single-batch/single_batch.hpp rename to cudapoa/benchmarks/run-batch/single_batch.hpp diff --git a/cudapoa/benchmarks/single-batch/CMakeLists.txt b/cudapoa/benchmarks/single-batch/CMakeLists.txt deleted file mode 100644 index aa99eba3e..000000000 --- a/cudapoa/benchmarks/single-batch/CMakeLists.txt +++ /dev/null @@ -1,33 +0,0 @@ -# -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. -# - -# -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. -# - -project(benchmark_cudapoa_singlebatch) - -set(SOURCES - main.cpp - ) - -get_property(cudapoa_data_include_dir GLOBAL PROPERTY cudapoa_data_include_dir) -include_directories(${cudapoa_data_include_dir}) - -set(LIBS - cudapoa) - -cga_add_benchmarks(${PROJECT_NAME} "cudapoa" "${SOURCES}" "${LIBS}") diff --git a/cudapoa/benchmarks/single-batch/main.cpp b/cudapoa/benchmarks/single-batch/main.cpp deleted file mode 100644 index 17d5600cd..000000000 --- a/cudapoa/benchmarks/single-batch/main.cpp +++ /dev/null @@ -1,42 +0,0 @@ -/* -* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -* -* NVIDIA CORPORATION and its licensors retain all intellectual property -* and proprietary rights in and to this software, related documentation -* and any modifications thereto. Any use, reproduction, disclosure or -* distribution of this software and related documentation without an express -* license agreement from NVIDIA CORPORATION is strictly prohibited. -*/ - -#include "single_batch.hpp" -#include "file_location.hpp" - -#include - -namespace claragenomics -{ - -namespace cudapoa -{ - -static void BM_SingleBatchTest(benchmark::State& state) -{ - SingleBatch sb(state.range(0), std::string(CUDAPOA_BENCHMARK_DATA_DIR) + "/sample-windows.txt", state.range(0)); - for (auto _ : state) - { - state.PauseTiming(); - sb.add_windows(); - state.ResumeTiming(); - sb.process_consensus(); - } -} - -// Register the function as a benchmark -BENCHMARK(BM_SingleBatchTest) - ->Unit(benchmark::kMillisecond) - ->RangeMultiplier(4) - ->Range(1, 1 << 10); -} // namespace cudapoa -} // namespace claragenomics - -BENCHMARK_MAIN(); diff --git a/cudapoa/tests/Test_CudapoaBatchEnd2End.cpp b/cudapoa/tests/Test_CudapoaBatchEnd2End.cpp index f32fda82c..d92f142f3 100644 --- a/cudapoa/tests/Test_CudapoaBatchEnd2End.cpp +++ b/cudapoa/tests/Test_CudapoaBatchEnd2End.cpp @@ -8,7 +8,7 @@ * license agreement from NVIDIA CORPORATION is strictly prohibited. */ -#include "../benchmarks/multi-batch/multi_batch.hpp" +#include "../benchmarks/run-batch/multi_batch.hpp" #include "../benchmarks/common/utils.hpp" #include "file_location.hpp" From 0ee3069c73931b094f3e37c6b63fff700d1a5db6 Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Wed, 30 Oct 2019 22:21:25 +0200 Subject: [PATCH 008/128] [cuda poa] #147 adapt ci test script --- ci/common/build-test-sdk.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/common/build-test-sdk.sh b/ci/common/build-test-sdk.sh index 7b4b3baf0..eb0b6789d 100644 --- a/ci/common/build-test-sdk.sh +++ b/ci/common/build-test-sdk.sh @@ -63,7 +63,7 @@ if [ "$GPU_TEST" == '1' ]; then LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR find ${LOCAL_BUILD_DIR}/install/tests -type f -exec {} \; logger "Running ClaraGenomicsAnalysis benchmarks..." - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR ${LOCAL_BUILD_DIR}/install/benchmarks/cudapoa/benchmark_cudapoa_singlebatch + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR ${LOCAL_BUILD_DIR}/install/benchmarks/cudapoa/benchmark_cudapoa_run_batch --benchmark_filter="BM_SingleBatchTest" LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR ${LOCAL_BUILD_DIR}/install/benchmarks/cudaaligner/benchmark_cudaaligner_singlebatch_singlealignment fi From 45081631dd42802dd153cc05eb5d2c66bad3e13a Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Wed, 30 Oct 2019 17:25:05 -0400 Subject: [PATCH 009/128] [utils] add documentation for dummy nvtx macro --- common/utils/include/claragenomics/utils/cudautils.hpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/common/utils/include/claragenomics/utils/cudautils.hpp b/common/utils/include/claragenomics/utils/cudautils.hpp index 3ed2b8a66..8b3189a8b 100644 --- a/common/utils/include/claragenomics/utils/cudautils.hpp +++ b/common/utils/include/claragenomics/utils/cudautils.hpp @@ -112,7 +112,12 @@ class nvtx_range } }; #else -#define CGA_NVTX_RANGE(name) +/// \ingroup cudautils +/// \def CGA_NVTX_RANGE +/// \brief Dummy implementation for CGA_NVTX_RANGE macro +/// \param varname Unused variable +/// \param label Unused variable +#define CGA_NVTX_RANGE(varname, label) #endif // CGA_PROFILING } // namespace cudautils From 4dfa2bb715f6a0ffec8bdb8d91180fc8f6dc081e Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Thu, 31 Oct 2019 13:44:56 +0200 Subject: [PATCH 010/128] [cudapoa] #147 - fix joyjit's review comments --- cudapoa/benchmarks/CMakeLists.txt | 16 +++++++-- cudapoa/benchmarks/{run-batch => }/main.cpp | 2 +- .../{run-batch => }/multi_batch.hpp | 2 +- cudapoa/benchmarks/run-batch/CMakeLists.txt | 33 ------------------- .../{run-batch => }/single_batch.hpp | 2 +- cudapoa/tests/Test_CudapoaBatchEnd2End.cpp | 2 +- 6 files changed, 18 insertions(+), 39 deletions(-) rename cudapoa/benchmarks/{run-batch => }/main.cpp (98%) rename cudapoa/benchmarks/{run-batch => }/multi_batch.hpp (99%) delete mode 100644 cudapoa/benchmarks/run-batch/CMakeLists.txt rename cudapoa/benchmarks/{run-batch => }/single_batch.hpp (98%) diff --git a/cudapoa/benchmarks/CMakeLists.txt b/cudapoa/benchmarks/CMakeLists.txt index 36343efd3..eea3a29ec 100644 --- a/cudapoa/benchmarks/CMakeLists.txt +++ b/cudapoa/benchmarks/CMakeLists.txt @@ -8,8 +8,20 @@ # license agreement from NVIDIA CORPORATION is strictly prohibited. # -# Add benchmarks -add_subdirectory(run-batch) +## Add benchmark to cudapoa project + +project(benchmark_cudapoa) + +set(SOURCES + main.cpp + ) +get_property(cudapoa_data_include_dir GLOBAL PROPERTY cudapoa_data_include_dir) +include_directories(${cudapoa_data_include_dir}) + +set(LIBS + cudapoa) + +cga_add_benchmarks(${PROJECT_NAME} "cudapoa" "${SOURCES}" "${LIBS}") install(FILES README.md DESTINATION benchmarks/cudapoa) diff --git a/cudapoa/benchmarks/run-batch/main.cpp b/cudapoa/benchmarks/main.cpp similarity index 98% rename from cudapoa/benchmarks/run-batch/main.cpp rename to cudapoa/benchmarks/main.cpp index 508475b83..7c1c7df13 100644 --- a/cudapoa/benchmarks/run-batch/main.cpp +++ b/cudapoa/benchmarks/main.cpp @@ -8,7 +8,7 @@ * license agreement from NVIDIA CORPORATION is strictly prohibited. */ -#include "../common/utils.hpp" +#include "common/utils.hpp" #include "multi_batch.hpp" #include "single_batch.hpp" #include "file_location.hpp" diff --git a/cudapoa/benchmarks/run-batch/multi_batch.hpp b/cudapoa/benchmarks/multi_batch.hpp similarity index 99% rename from cudapoa/benchmarks/run-batch/multi_batch.hpp rename to cudapoa/benchmarks/multi_batch.hpp index 7831250ce..129d8386e 100644 --- a/cudapoa/benchmarks/run-batch/multi_batch.hpp +++ b/cudapoa/benchmarks/multi_batch.hpp @@ -8,7 +8,7 @@ * license agreement from NVIDIA CORPORATION is strictly prohibited. */ -#include "../common/utils.hpp" +#include "common/utils.hpp" #include #include diff --git a/cudapoa/benchmarks/run-batch/CMakeLists.txt b/cudapoa/benchmarks/run-batch/CMakeLists.txt deleted file mode 100644 index 54ad33de1..000000000 --- a/cudapoa/benchmarks/run-batch/CMakeLists.txt +++ /dev/null @@ -1,33 +0,0 @@ -# -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. -# - -# -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. -# - -project(benchmark_cudapoa_run_batch) - -set(SOURCES - main.cpp - ) - -get_property(cudapoa_data_include_dir GLOBAL PROPERTY cudapoa_data_include_dir) -include_directories(${cudapoa_data_include_dir}) - -set(LIBS - cudapoa) - -cga_add_benchmarks(${PROJECT_NAME} "cudapoa" "${SOURCES}" "${LIBS}") diff --git a/cudapoa/benchmarks/run-batch/single_batch.hpp b/cudapoa/benchmarks/single_batch.hpp similarity index 98% rename from cudapoa/benchmarks/run-batch/single_batch.hpp rename to cudapoa/benchmarks/single_batch.hpp index 5375213bd..24aa08ff5 100644 --- a/cudapoa/benchmarks/run-batch/single_batch.hpp +++ b/cudapoa/benchmarks/single_batch.hpp @@ -8,7 +8,7 @@ * license agreement from NVIDIA CORPORATION is strictly prohibited. */ -#include "../common/utils.hpp" +#include "common/utils.hpp" #include #include diff --git a/cudapoa/tests/Test_CudapoaBatchEnd2End.cpp b/cudapoa/tests/Test_CudapoaBatchEnd2End.cpp index d92f142f3..bc5f2527a 100644 --- a/cudapoa/tests/Test_CudapoaBatchEnd2End.cpp +++ b/cudapoa/tests/Test_CudapoaBatchEnd2End.cpp @@ -8,7 +8,7 @@ * license agreement from NVIDIA CORPORATION is strictly prohibited. */ -#include "../benchmarks/run-batch/multi_batch.hpp" +#include "../benchmarks/multi_batch.hpp" #include "../benchmarks/common/utils.hpp" #include "file_location.hpp" From 817bf1eafef48c0b42b9cf2b848eb68bf1b536c1 Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Thu, 31 Oct 2019 13:59:35 +0200 Subject: [PATCH 011/128] [cudapoa] fix CI benchmark script test Fixes #147 --- ci/common/build-test-sdk.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/common/build-test-sdk.sh b/ci/common/build-test-sdk.sh index eb0b6789d..d9016d378 100644 --- a/ci/common/build-test-sdk.sh +++ b/ci/common/build-test-sdk.sh @@ -63,7 +63,7 @@ if [ "$GPU_TEST" == '1' ]; then LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR find ${LOCAL_BUILD_DIR}/install/tests -type f -exec {} \; logger "Running ClaraGenomicsAnalysis benchmarks..." - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR ${LOCAL_BUILD_DIR}/install/benchmarks/cudapoa/benchmark_cudapoa_run_batch --benchmark_filter="BM_SingleBatchTest" + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR ${LOCAL_BUILD_DIR}/install/benchmarks/cudapoa/benchmark_cudapoa --benchmark_filter="BM_SingleBatchTest" LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR ${LOCAL_BUILD_DIR}/install/benchmarks/cudaaligner/benchmark_cudaaligner_singlebatch_singlealignment fi From 37b932c1faa5d06c3a9091b7179c64de2cfe2edf Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Thu, 31 Oct 2019 14:53:39 +0200 Subject: [PATCH 012/128] [cudapoa] Fix benchmark readme instructions Fixes #147 --- cudapoa/benchmarks/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cudapoa/benchmarks/README.md b/cudapoa/benchmarks/README.md index acc2c0c2f..2a1596dfd 100644 --- a/cudapoa/benchmarks/README.md +++ b/cudapoa/benchmarks/README.md @@ -7,7 +7,7 @@ of a single batch of POA. To run the benchmark, execute ``` -./benchmarks/cudapoa/benchmark_cudapoa_run_batch --benchmark_filter="BM_SingleBatchTest" +./benchmarks/cudapoa/benchmark_cudapoa --benchmark_filter="BM_SingleBatchTest" ``` ## Multi Batch @@ -17,5 +17,5 @@ of several batched CUDA POA stream that fill up the GPU. To the the benchmark, execute ``` -./benchmarks/cudapoa/benchmark_cudapoa_run_batch --benchmark_filter="BM_MultiBatchTest" +./benchmarks/cudapoa/benchmark_cudapoa --benchmark_filter="BM_MultiBatchTest" ``` From 4b9c98079b2b4f82f1967bc5435982e585f30160 Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Thu, 31 Oct 2019 12:14:39 -0400 Subject: [PATCH 013/128] [README] update clone instructions --- README.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.md b/README.md index 23765e668..147be40f4 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,19 @@ A query fasta can be mapped to a reference as follows: To access more information about running cudamapper, run `cudamapper --help`. ## Clone Clara Genomics Analysis + +### Latest released version +This will clone the repo to the `master` branch, which contains code for latest released version +and hot-fixes. + +``` +git clone --recursive -b master git@github.com:clara-genomics/ClaraGenomicsAnalysis.git +``` + +### Latest development version +This will clone the repo to the default branch, which is set to be the latest development branch. +This branch is subject to change frequently as features and bug fixes are pushed. + ```bash git clone --recursive git@github.com:clara-genomics/ClaraGenomicsAnalysis.git ``` From 06221a5eeae5b4b9a7d29f5ddef69dd1dee28289 Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Fri, 1 Nov 2019 17:01:18 +0100 Subject: [PATCH 014/128] [3rdparty] turn off compiler warnings for spoa --- cmake/3rdparty.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/3rdparty.cmake b/cmake/3rdparty.cmake index c1204425d..59b055a84 100644 --- a/cmake/3rdparty.cmake +++ b/cmake/3rdparty.cmake @@ -33,4 +33,6 @@ endif() if (NOT TARGET spoa) add_subdirectory(3rdparty/spoa EXCLUDE_FROM_ALL) +# Don't show warnings when compiling the 3rd party library + target_compile_options(spoa PRIVATE -w) endif() From 120905dd15685bea232941c8614c9468f6ffebd1 Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Thu, 31 Oct 2019 16:44:04 +0100 Subject: [PATCH 015/128] Initial implementation of MatcherGPU and matcher_gpu::create_new_value_mask (authored by Milos Maric ) --- cudamapper/CMakeLists.txt | 8 +- .../claragenomics/cudamapper/index.hpp | 6 +- cudamapper/src/matcher_gpu.cu | 64 ++++++++++ cudamapper/src/matcher_gpu.cuh | 51 ++++++++ cudamapper/tests/CMakeLists.txt | 4 +- cudamapper/tests/Test_CudamapperMatcherGPU.cu | 120 ++++++++++++++++++ 6 files changed, 248 insertions(+), 5 deletions(-) create mode 100644 cudamapper/src/matcher_gpu.cu create mode 100644 cudamapper/src/matcher_gpu.cuh create mode 100644 cudamapper/tests/Test_CudamapperMatcherGPU.cu diff --git a/cudamapper/CMakeLists.txt b/cudamapper/CMakeLists.txt index 4c23ff3f8..19c831244 100644 --- a/cudamapper/CMakeLists.txt +++ b/cudamapper/CMakeLists.txt @@ -35,6 +35,12 @@ target_include_directories(matcher PUBLIC include) target_link_libraries(matcher logging utils cgaio) target_compile_options(matcher PRIVATE -Werror) +cuda_add_library(matcher_gpu + src/matcher_gpu.cu) +target_include_directories(matcher_gpu PUBLIC include) +target_link_libraries(matcher_gpu logging utils cgaio) +target_compile_options(matcher_gpu PRIVATE -Werror) + add_library(cudamapper_utils src/cudamapper_utils.cpp) target_include_directories(cudamapper_utils PUBLIC include) @@ -62,7 +68,7 @@ target_include_directories(cudamapper $ ) -target_link_libraries(cudamapper utils index_gpu matcher logging overlapper_triggerred cudamapper_utils) +target_link_libraries(cudamapper utils index_gpu matcher matcher_gpu logging overlapper_triggerred cudamapper_utils) # Add tests folder add_subdirectory(tests) diff --git a/cudamapper/include/claragenomics/cudamapper/index.hpp b/cudamapper/include/claragenomics/cudamapper/index.hpp index 77810b05e..786be8466 100644 --- a/cudamapper/include/claragenomics/cudamapper/index.hpp +++ b/cudamapper/include/claragenomics/cudamapper/index.hpp @@ -13,9 +13,9 @@ #include #include #include -#include "claragenomics/cudamapper/sketch_element.hpp" -#include "claragenomics/cudamapper/types.hpp" -#include "claragenomics/io/fasta_parser.hpp" +#include +#include +#include namespace claragenomics { diff --git a/cudamapper/src/matcher_gpu.cu b/cudamapper/src/matcher_gpu.cu new file mode 100644 index 000000000..e6a2b786b --- /dev/null +++ b/cudamapper/src/matcher_gpu.cu @@ -0,0 +1,64 @@ +/* +* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +* +* NVIDIA CORPORATION and its licensors retain all intellectual property +* and proprietary rights in and to this software, related documentation +* and any modifications thereto. Any use, reproduction, disclosure or +* distribution of this software and related documentation without an express +* license agreement from NVIDIA CORPORATION is strictly prohibited. +*/ + +#include "matcher_gpu.cuh" + +namespace claragenomics +{ + +namespace cudamapper +{ + +MatcherGPU::MatcherGPU(const Index& query_index, + const Index& target_index) +{ +} + +std::vector& MatcherGPU::anchors() +{ + return anchors_h_; +} + +namespace details +{ + +namespace matcher_gpu +{ + +__global__ void create_new_value_mask(const representation_t* const representations_d, + const std::size_t number_of_elements, + std::uint8_t* const new_value_mask_d) +{ + std::uint64_t index = blockIdx.x * blockDim.x + threadIdx.x; + + if (index >= number_of_elements) + return; + + if (index == 0) + { + new_value_mask_d[0] = 1; + } + else + { + if (representations_d[index] == representations_d[index - 1]) + { + new_value_mask_d[index] = 0; + } + else + new_value_mask_d[index] = 1; + } +} + +} // namespace matcher_gpu + +} // namespace details +} // namespace cudamapper + +} // namespace claragenomics diff --git a/cudamapper/src/matcher_gpu.cuh b/cudamapper/src/matcher_gpu.cuh new file mode 100644 index 000000000..a3e596d9d --- /dev/null +++ b/cudamapper/src/matcher_gpu.cuh @@ -0,0 +1,51 @@ +/* +* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +* +* NVIDIA CORPORATION and its licensors retain all intellectual property +* and proprietary rights in and to this software, related documentation +* and any modifications thereto. Any use, reproduction, disclosure or +* distribution of this software and related documentation without an express +* license agreement from NVIDIA CORPORATION is strictly prohibited. +*/ + +#pragma once + +#include +#include +#include + +namespace claragenomics +{ + +namespace cudamapper +{ + +class MatcherGPU +{ +public: + MatcherGPU(const Index& query_index, + const Index& target_index); + + std::vector& anchors(); + +private: + std::vector anchors_h_; +}; + +namespace details +{ + +namespace matcher_gpu +{ + +/// \brief Writes 0 to the output array if the value to the left is the same as the current value, 1 otherwise. First element is always 1 +__global__ void create_new_value_mask(const representation_t* const representations_d, + const std::size_t number_of_elements, + std::uint8_t* const new_value_mask_d); +} // namespace matcher_gpu + +} // namespace details + +} // namespace cudamapper + +} // namespace claragenomics diff --git a/cudamapper/tests/CMakeLists.txt b/cudamapper/tests/CMakeLists.txt index bfeb95069..044317476 100644 --- a/cudamapper/tests/CMakeLists.txt +++ b/cudamapper/tests/CMakeLists.txt @@ -15,6 +15,7 @@ set(SOURCES main.cpp Test_CudamapperIndexGPU.cu Test_CudamapperMatcher.cu + Test_CudamapperMatcherGPU.cu Test_CudamapperMinimizer.cpp Test_CudamapperOverlapperTriggered.cu ../src/bioparser_sequence.cpp) @@ -26,7 +27,8 @@ set(LIBS bioparser index_gpu matcher + matcher_gpu overlapper_triggerred cudamapper_utils) -cga_add_tests(${PROJECT_NAME} "${SOURCES}" "${LIBS}") \ No newline at end of file +cga_add_tests(${PROJECT_NAME} "${SOURCES}" "${LIBS}") diff --git a/cudamapper/tests/Test_CudamapperMatcherGPU.cu b/cudamapper/tests/Test_CudamapperMatcherGPU.cu new file mode 100644 index 000000000..171607d6e --- /dev/null +++ b/cudamapper/tests/Test_CudamapperMatcherGPU.cu @@ -0,0 +1,120 @@ +/* +* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +* +* NVIDIA CORPORATION and its licensors retain all intellectual property +* and proprietary rights in and to this software, related documentation +* and any modifications thereto. Any use, reproduction, disclosure or +* distribution of this software and related documentation without an express +* license agreement from NVIDIA CORPORATION is strictly prohibited. +*/ + +#include "gtest/gtest.h" + +#include +#include + +#include + +#include "../src/matcher_gpu.cuh" + +namespace claragenomics +{ + +namespace cudamapper +{ + +void test_create_new_value_mask(const thrust::host_vector& representations_h, + const thrust::host_vector& expected_new_value_mask_h, + std::uint32_t number_of_threads) +{ + thrust::device_vector representations_d(representations_h); + thrust::device_vector new_value_mask_d(representations_h.size()); + + std::uint32_t number_of_blocks = (representations_h.size() - 1) / number_of_threads + 1; + + details::matcher_gpu::create_new_value_mask<<>>(thrust::raw_pointer_cast(representations_d.data()), + representations_d.size(), + thrust::raw_pointer_cast(new_value_mask_d.data())); + + CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); + + thrust::host_vector new_value_mask_h(new_value_mask_d); + + ASSERT_EQ(new_value_mask_h.size(), expected_new_value_mask_h.size()); + for (std::size_t i = 0; i < expected_new_value_mask_h.size(); ++i) + { + EXPECT_EQ(new_value_mask_h[i], expected_new_value_mask_h[i]) << "index: " << i; + } +} + +TEST(TestCudamapperMatcherGPU, test_create_new_value_mask_small_example) +{ + thrust::host_vector representations_h; + thrust::host_vector expected_new_value_mask_h; + representations_h.push_back(0); + expected_new_value_mask_h.push_back(1); + representations_h.push_back(0); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(0); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(0); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(0); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(3); + expected_new_value_mask_h.push_back(1); + representations_h.push_back(3); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(3); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(4); + expected_new_value_mask_h.push_back(1); + representations_h.push_back(5); + expected_new_value_mask_h.push_back(1); + representations_h.push_back(5); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(8); + expected_new_value_mask_h.push_back(1); + representations_h.push_back(8); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(8); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(9); + expected_new_value_mask_h.push_back(1); + representations_h.push_back(9); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(9); + expected_new_value_mask_h.push_back(0); + + std::uint32_t number_of_threads = 3; + + test_create_new_value_mask(representations_h, + expected_new_value_mask_h, + number_of_threads); +} + +TEST(TestCudamapperMatcherGPU, test_create_new_value_mask_small_data_large_example) +{ + std::uint64_t total_sketch_elements = 10000000; + std::uint32_t sketch_elements_with_same_representation = 1000; + + thrust::host_vector representations_h; + thrust::host_vector expected_new_value_mask_h; + for (std::size_t i = 0; i < total_sketch_elements; ++i) + { + representations_h.push_back(i / sketch_elements_with_same_representation); + if (i % sketch_elements_with_same_representation == 0) + expected_new_value_mask_h.push_back(1); + else + expected_new_value_mask_h.push_back(0); + } + + std::uint32_t number_of_threads = 256; + + test_create_new_value_mask(representations_h, + expected_new_value_mask_h, + number_of_threads); +} + +} // namespace cudamapper +} // namespace claragenomics From ace7d4ad3cf8c2ac4c101fb42ec5807b52bf2f0e Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Thu, 31 Oct 2019 17:27:41 +0100 Subject: [PATCH 016/128] Index now also saves and returns representations array (authored by Milos Maric ) --- .../claragenomics/cudamapper/index.hpp | 4 + cudamapper/src/index_gpu.cuh | 20 ++- cudamapper/tests/Test_CudamapperIndexGPU.cu | 114 +++++++++++++----- cudamapper/tests/Test_CudamapperMatcher.cu | 3 + 4 files changed, 105 insertions(+), 36 deletions(-) diff --git a/cudamapper/include/claragenomics/cudamapper/index.hpp b/cudamapper/include/claragenomics/cudamapper/index.hpp index 786be8466..b4751818c 100644 --- a/cudamapper/include/claragenomics/cudamapper/index.hpp +++ b/cudamapper/include/claragenomics/cudamapper/index.hpp @@ -43,6 +43,10 @@ class Index /// \brief Virtual destructor for Index virtual ~Index() = default; + /// \brief returns an array of representations of sketch elements + /// \return an array of representations of sketch elements + virtual const std::vector& representations() const = 0; + /// \brief returns an array of starting positions of sketch elements in their reads /// \return an array of starting positions of sketch elements in their reads virtual const std::vector& positions_in_reads() const = 0; diff --git a/cudamapper/src/index_gpu.cuh b/cudamapper/src/index_gpu.cuh index 7d4e1fd55..71b6c2259 100644 --- a/cudamapper/src/index_gpu.cuh +++ b/cudamapper/src/index_gpu.cuh @@ -62,6 +62,10 @@ public: /// \brief Constructor IndexGPU(); + /// \brief returns an array of representations of sketch elements + /// \return an array of representations of sketch elements + const std::vector& representations() const override; + /// \brief returns an array of starting positions of sketch elements in their reads /// \return an array of starting positions of sketch elements in their reads const std::vector& positions_in_reads() const override; @@ -112,6 +116,7 @@ private: std::uint64_t number_of_reads_; bool reached_end_of_input_; + std::vector representations_; std::vector positions_in_reads_; std::vector read_ids_; std::vector directions_of_reads_; @@ -509,6 +514,12 @@ IndexGPU::IndexGPU() { } +template +const std::vector& IndexGPU::representations() const +{ + return representations_; +}; + template const std::vector& IndexGPU::positions_in_reads() const { @@ -705,7 +716,6 @@ void IndexGPU::generate_index(const std::vector merged_representations_h; std::vector merged_rest_h; if (representations_from_all_loops_h.size() > 1) @@ -718,14 +728,14 @@ void IndexGPU::generate_index(const std::vector::generate_index(const std::vector& expected_read_id_to_read_name, const std::vector& expected_read_id_to_read_length, const std::vector>& expected_read_id_and_representation_to_sketch_elements, + const std::vector& expected_representations, const std::vector& expected_positions_in_reads, const std::vector& expected_read_ids, const std::vector& expected_directions_of_reads) @@ -76,16 +77,20 @@ void test_function(const std::string& filename, } // check arrays + const std::vector& representations = index.representations(); const std::vector& positions_in_reads = index.positions_in_reads(); const std::vector& read_ids = index.read_ids(); const std::vector& directions_of_reads = index.directions_of_reads(); + ASSERT_EQ(representations.size(), expected_representations.size()); ASSERT_EQ(positions_in_reads.size(), expected_positions_in_reads.size()); ASSERT_EQ(read_ids.size(), expected_read_ids.size()); ASSERT_EQ(directions_of_reads.size(), expected_directions_of_reads.size()); + ASSERT_EQ(representations.size(), positions_in_reads.size()); ASSERT_EQ(positions_in_reads.size(), read_ids.size()); - ASSERT_EQ(positions_in_reads.size(), directions_of_reads.size()); + ASSERT_EQ(read_ids.size(), directions_of_reads.size()); for (std::size_t i = 0; i < expected_positions_in_reads.size(); ++i) { + EXPECT_EQ(representations[i], expected_representations[i]) << "i: " << i; EXPECT_EQ(positions_in_reads[i], expected_positions_in_reads[i]) << "i: " << i; EXPECT_EQ(read_ids[i], expected_read_ids[i]) << "i: " << i; EXPECT_EQ(directions_of_reads[i], expected_directions_of_reads[i]) << "i: " << i; @@ -118,9 +123,11 @@ TEST(TestCudamapperIndexGPU, GATT_4_1) std::vector> expected_read_id_and_representation_to_sketch_elements(1); expected_read_id_and_representation_to_sketch_elements[0].push_back({0b00001101, {0, 1}, {0, 1}}); + std::vector expected_representations; std::vector expected_positions_in_reads; std::vector expected_read_ids; std::vector expected_directions_of_reads; + expected_representations.push_back(0b1101); expected_positions_in_reads.push_back(0); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); @@ -132,6 +139,7 @@ TEST(TestCudamapperIndexGPU, GATT_4_1) expected_read_id_to_read_name, expected_read_id_to_read_length, expected_read_id_and_representation_to_sketch_elements, + expected_representations, expected_positions_in_reads, expected_read_ids, expected_directions_of_reads); @@ -187,16 +195,21 @@ TEST(TestCudamapperIndexGPU, GATT_2_3) expected_read_id_and_representation_to_sketch_elements[0].push_back({0b0011, {1, 1}, {1, 1}}); // AT expected_read_id_and_representation_to_sketch_elements[0].push_back({0b1000, {2, 1}, {2, 1}}); // GA + std::vector expected_representations; std::vector expected_positions_in_reads; std::vector expected_read_ids; std::vector expected_directions_of_reads; - expected_positions_in_reads.push_back(2); // AA(2r0) + + expected_representations.push_back(0b0000); // AA(2r0) + expected_positions_in_reads.push_back(2); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); - expected_positions_in_reads.push_back(1); // AT(1f0) + expected_representations.push_back(0b0011); // AT(1f0) + expected_positions_in_reads.push_back(1); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_positions_in_reads.push_back(0); // GA(0f0) + expected_representations.push_back(0b1000); // GA(0f0) + expected_positions_in_reads.push_back(0); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); @@ -207,6 +220,7 @@ TEST(TestCudamapperIndexGPU, GATT_2_3) expected_read_id_to_read_name, expected_read_id_to_read_length, expected_read_id_and_representation_to_sketch_elements, + expected_representations, expected_positions_in_reads, expected_read_ids, expected_directions_of_reads); @@ -233,6 +247,7 @@ TEST(TestCudamapperIndexGPU, CCCATACC_2_8) std::vector> expected_read_id_and_representation_to_sketch_elements(0); + std::vector expected_representations; std::vector expected_positions_in_reads; std::vector expected_read_ids; std::vector expected_directions_of_reads; @@ -244,6 +259,7 @@ TEST(TestCudamapperIndexGPU, CCCATACC_2_8) expected_read_id_to_read_name, expected_read_id_to_read_length, expected_read_id_and_representation_to_sketch_elements, + expected_representations, expected_positions_in_reads, expected_read_ids, expected_directions_of_reads); @@ -314,16 +330,20 @@ TEST(TestCudamapperIndexGPU, CATCAAG_AAGCTA_3_5) expected_read_id_and_representation_to_sketch_elements[0].push_back({0b001101, {1, 1}, {1, 1}}); // ATC expected_read_id_and_representation_to_sketch_elements[0].push_back({0b001110, {2, 1}, {2, 1}}); // ATG + std::vector expected_representations; std::vector expected_positions_in_reads; std::vector expected_read_ids; std::vector expected_directions_of_reads; - expected_positions_in_reads.push_back(4); // AAG(4f0) + expected_representations.push_back(0b000010); // AAG(4f0) + expected_positions_in_reads.push_back(4); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_positions_in_reads.push_back(1); // ATC(1f0) + expected_representations.push_back(0b001101); // ATC(1f0) + expected_positions_in_reads.push_back(1); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_positions_in_reads.push_back(0); // ATG(0r0) + expected_representations.push_back(0b001110); // ATG(0r0) + expected_positions_in_reads.push_back(0); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); @@ -334,6 +354,7 @@ TEST(TestCudamapperIndexGPU, CATCAAG_AAGCTA_3_5) expected_read_id_to_read_name, expected_read_id_to_read_length, expected_read_id_and_representation_to_sketch_elements, + expected_representations, expected_positions_in_reads, expected_read_ids, expected_directions_of_reads); @@ -401,22 +422,28 @@ TEST(TestCudamapperIndexGPU, CCCATACC_3_5) expected_read_id_and_representation_to_sketch_elements[0].push_back({0b010100, {3, 1}, {3, 1}}); // CCA expected_read_id_and_representation_to_sketch_elements[0].push_back({0b010101, {4, 1}, {4, 1}}); // CCC + std::vector expected_representations; std::vector expected_positions_in_reads; std::vector expected_read_ids; std::vector expected_directions_of_reads; - expected_positions_in_reads.push_back(5); // ACC(5f0) + expected_representations.push_back(0b000101); // ACC(5f0) + expected_positions_in_reads.push_back(5); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_positions_in_reads.push_back(3); // ATA(3f0) + expected_representations.push_back(0b001100); // ATA(3f0) + expected_positions_in_reads.push_back(3); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_positions_in_reads.push_back(2); // ATG(2r0) + expected_representations.push_back(0b001110); // ATG(2r0) + expected_positions_in_reads.push_back(2); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); - expected_positions_in_reads.push_back(1); // CAA(1f0) + expected_representations.push_back(0b010100); // CCA(1f0) + expected_positions_in_reads.push_back(1); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_positions_in_reads.push_back(0); // CCC(0f0) + expected_representations.push_back(0b010101); // CCC(0f0) + expected_positions_in_reads.push_back(0); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); @@ -427,6 +454,7 @@ TEST(TestCudamapperIndexGPU, CCCATACC_3_5) expected_read_id_to_read_name, expected_read_id_to_read_length, expected_read_id_and_representation_to_sketch_elements, + expected_representations, expected_positions_in_reads, expected_read_ids, expected_directions_of_reads); @@ -517,28 +545,37 @@ TEST(TestCudamapperIndexGPU, CATCAAG_AAGCTA_3_2) expected_read_id_and_representation_to_sketch_elements[1].push_back({0b001001, {2, 1}, {2, 1}}); // AGC expected_read_id_and_representation_to_sketch_elements[1].push_back({0b011100, {6, 1}, {6, 1}}); // CTA + std::vector expected_representations; std::vector expected_positions_in_reads; std::vector expected_read_ids; std::vector expected_directions_of_reads; - expected_positions_in_reads.push_back(4); // AAG(4f0) + + expected_representations.push_back(0b000010); // AAG(4f0) + expected_positions_in_reads.push_back(4); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_positions_in_reads.push_back(0); // AAG(0f1) + expected_representations.push_back(0b000010); // AAG(0f1) + expected_positions_in_reads.push_back(0); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_positions_in_reads.push_back(2); // AGC(2r1) + expected_representations.push_back(0b001001); // AGC(2r1) + expected_positions_in_reads.push_back(2); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); - expected_positions_in_reads.push_back(1); // ATC(1f0) + expected_representations.push_back(0b001101); // ATC(1f0) + expected_positions_in_reads.push_back(1); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_positions_in_reads.push_back(0); // ATG(0r0) + expected_representations.push_back(0b001110); // ATG(0r0) + expected_positions_in_reads.push_back(0); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); - expected_positions_in_reads.push_back(3); // CAA(3f0) + expected_representations.push_back(0b010000); // CAA(3f0) + expected_positions_in_reads.push_back(3); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_positions_in_reads.push_back(3); // CTA(3f1) + expected_representations.push_back(0b011100); // CTA(3f1) + expected_positions_in_reads.push_back(3); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); @@ -549,6 +586,7 @@ TEST(TestCudamapperIndexGPU, CATCAAG_AAGCTA_3_2) expected_read_id_to_read_name, expected_read_id_to_read_length, expected_read_id_and_representation_to_sketch_elements, + expected_representations, expected_positions_in_reads, expected_read_ids, expected_directions_of_reads); @@ -652,43 +690,56 @@ TEST(TestCudamapperIndexGPU, AAAACTGAA_GCCAAAG_2_3) expected_read_id_and_representation_to_sketch_elements[1].push_back({0b0101, {10, 1}, {10, 1}}); // CC expected_read_id_and_representation_to_sketch_elements[1].push_back({0b1001, {11, 1}, {11, 1}}); // GC + std::vector expected_representations; std::vector expected_positions_in_reads; std::vector expected_read_ids; std::vector expected_directions_of_reads; - expected_positions_in_reads.push_back(0); // AA(0f0) + expected_representations.push_back(0b0000); // AA(0f0) + expected_positions_in_reads.push_back(0); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_positions_in_reads.push_back(1); // AA(1f0) + expected_representations.push_back(0b0000); // AA(1f0) + expected_positions_in_reads.push_back(1); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_positions_in_reads.push_back(2); // AA(2f0) + expected_representations.push_back(0b0000); // AA(2f0) + expected_positions_in_reads.push_back(2); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_positions_in_reads.push_back(7); // AA(7f0) + expected_representations.push_back(0b0000); // AA(7f0) + expected_positions_in_reads.push_back(7); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_positions_in_reads.push_back(3); // AA(3f1) + expected_representations.push_back(0b0000); // AA(3f1) + expected_positions_in_reads.push_back(3); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_positions_in_reads.push_back(4); // AA(4f1) + expected_representations.push_back(0b0000); // AA(4f1) + expected_positions_in_reads.push_back(4); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_positions_in_reads.push_back(3); // AC(3f0) + expected_representations.push_back(0b0001); // AC(3f0) + expected_positions_in_reads.push_back(3); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_positions_in_reads.push_back(4); // AG(4r0) + expected_representations.push_back(0b0010); // AG(4r0) + expected_positions_in_reads.push_back(4); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); - expected_positions_in_reads.push_back(5); // AG(5f1) + expected_representations.push_back(0b0010); // AG(5f1) + expected_positions_in_reads.push_back(5); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_positions_in_reads.push_back(2); // CA(2f1) + expected_representations.push_back(0b0100); // CA(2f1) + expected_positions_in_reads.push_back(2); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_positions_in_reads.push_back(1); // CC(1f1) + expected_representations.push_back(0b0101); // CC(1f1) + expected_positions_in_reads.push_back(1); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_positions_in_reads.push_back(0); // GC(0f1) + expected_representations.push_back(0b1001); // GC(0f1) + expected_positions_in_reads.push_back(0); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); @@ -699,6 +750,7 @@ TEST(TestCudamapperIndexGPU, AAAACTGAA_GCCAAAG_2_3) expected_read_id_to_read_name, expected_read_id_to_read_length, expected_read_id_and_representation_to_sketch_elements, + expected_representations, expected_positions_in_reads, expected_read_ids, expected_directions_of_reads); diff --git a/cudamapper/tests/Test_CudamapperMatcher.cu b/cudamapper/tests/Test_CudamapperMatcher.cu index c9a31d2e8..180418fbd 100644 --- a/cudamapper/tests/Test_CudamapperMatcher.cu +++ b/cudamapper/tests/Test_CudamapperMatcher.cu @@ -90,6 +90,7 @@ class TestIndex : public Index { public: // getters + const std::vector& representations() const override { return representations_; } const std::vector& positions_in_reads() const override { return positions_in_reads_; } const std::vector& read_ids() const override { return read_ids_; } const std::vector& directions_of_reads() const override { return directions_of_reads_; } @@ -99,6 +100,7 @@ public: const std::vector>& read_id_and_representation_to_sketch_elements() const override { return read_id_and_representation_to_sketch_elements_; } // setters + void representations(const std::vector& val) { representations_ = val; } void positions_in_reads(const std::vector& val) { positions_in_reads_ = val; } void read_ids(const std::vector& val) { read_ids_ = val; } void directions_of_reads(const std::vector& val) { directions_of_reads_ = val; } @@ -110,6 +112,7 @@ public: bool reached_end_of_input() const override { return false; }; private: + std::vector representations_; std::vector positions_in_reads_; std::vector read_ids_; std::vector directions_of_reads_; From 8e7aa17f4302594f8c1379c70ffa44518886d6bb Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Thu, 31 Oct 2019 17:46:25 +0100 Subject: [PATCH 017/128] Implementation of copy_index_of_first_occurence (authored by Milos Maric ) --- cudamapper/src/matcher_gpu.cu | 25 ++++++ cudamapper/src/matcher_gpu.cuh | 28 +++++++ cudamapper/tests/Test_CudamapperMatcherGPU.cu | 84 ++++++++++++++++++- 3 files changed, 133 insertions(+), 4 deletions(-) diff --git a/cudamapper/src/matcher_gpu.cu b/cudamapper/src/matcher_gpu.cu index e6a2b786b..384cbbe78 100644 --- a/cudamapper/src/matcher_gpu.cu +++ b/cudamapper/src/matcher_gpu.cu @@ -56,6 +56,31 @@ __global__ void create_new_value_mask(const representation_t* const representati } } +__global__ void copy_index_of_first_occurence(const std::uint64_t* const representation_index_mask_d, + const std::size_t number_of_input_elements, + std::size_t* const starting_index_of_each_representation) +{ + std::uint64_t index = blockIdx.x * blockDim.x + threadIdx.x; + + if (index >= number_of_input_elements) + return; + + if (index == 0) + { + starting_index_of_each_representation[0] = 0; + } + else + { + if (representation_index_mask_d[index] != representation_index_mask_d[index - 1]) + { + // if new representation (= not the same as its left neighbor) + // save the index at which that representation starts + // representation_index_mask_d gives a unique index to each representation, starting from 1, thus '-1' + starting_index_of_each_representation[representation_index_mask_d[index] - 1] = index; + } + } +} + } // namespace matcher_gpu } // namespace details diff --git a/cudamapper/src/matcher_gpu.cuh b/cudamapper/src/matcher_gpu.cuh index a3e596d9d..371125d61 100644 --- a/cudamapper/src/matcher_gpu.cuh +++ b/cudamapper/src/matcher_gpu.cuh @@ -39,9 +39,37 @@ namespace matcher_gpu { /// \brief Writes 0 to the output array if the value to the left is the same as the current value, 1 otherwise. First element is always 1 +/// +/// For example: +/// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 +/// 0 0 0 0 12 12 12 12 12 12 23 23 23 32 32 32 32 32 46 46 46 +/// gives: +/// 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 +/// +/// \param representations_d +/// \param number_of_elements +/// \param new_value_mask_d generated array __global__ void create_new_value_mask(const representation_t* const representations_d, const std::size_t number_of_elements, std::uint8_t* const new_value_mask_d); + +/// \brief Creates an array in which each element represents the index in representation_index_mask_d at which a new representation starts +/// +/// For example: +/// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 +/// 0 0 0 0 12 12 12 12 12 12 23 23 23 32 32 32 32 32 46 46 46 +/// 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 +/// 1 1 1 1 2 2 2 2 2 2 3 3 3 4 4 4 4 4 5 5 5 +/// ^ ^ ^ ^ ^ +/// gives: +/// 0 4 10 13 18 +/// +/// \param representation_index_mask_d +/// \param number_of_input_elements +/// \param starting_index_of_each_representation +__global__ void copy_index_of_first_occurence(const std::uint64_t* const representation_index_mask_d, + const std::size_t number_of_input_elements, + std::size_t* const starting_index_of_each_representation); } // namespace matcher_gpu } // namespace details diff --git a/cudamapper/tests/Test_CudamapperMatcherGPU.cu b/cudamapper/tests/Test_CudamapperMatcherGPU.cu index 171607d6e..0d714a9f1 100644 --- a/cudamapper/tests/Test_CudamapperMatcherGPU.cu +++ b/cudamapper/tests/Test_CudamapperMatcherGPU.cu @@ -27,7 +27,7 @@ void test_create_new_value_mask(const thrust::host_vector& rep const thrust::host_vector& expected_new_value_mask_h, std::uint32_t number_of_threads) { - thrust::device_vector representations_d(representations_h); + const thrust::device_vector representations_d(representations_h); thrust::device_vector new_value_mask_d(representations_h.size()); std::uint32_t number_of_blocks = (representations_h.size() - 1) / number_of_threads + 1; @@ -38,7 +38,7 @@ void test_create_new_value_mask(const thrust::host_vector& rep CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); - thrust::host_vector new_value_mask_h(new_value_mask_d); + const thrust::host_vector new_value_mask_h(new_value_mask_d); ASSERT_EQ(new_value_mask_h.size(), expected_new_value_mask_h.size()); for (std::size_t i = 0; i < expected_new_value_mask_h.size(); ++i) @@ -95,8 +95,8 @@ TEST(TestCudamapperMatcherGPU, test_create_new_value_mask_small_example) TEST(TestCudamapperMatcherGPU, test_create_new_value_mask_small_data_large_example) { - std::uint64_t total_sketch_elements = 10000000; - std::uint32_t sketch_elements_with_same_representation = 1000; + const std::uint64_t total_sketch_elements = 10000000; + const std::uint32_t sketch_elements_with_same_representation = 1000; thrust::host_vector representations_h; thrust::host_vector expected_new_value_mask_h; @@ -115,6 +115,82 @@ TEST(TestCudamapperMatcherGPU, test_create_new_value_mask_small_data_large_examp expected_new_value_mask_h, number_of_threads); } +void test_copy_index_of_first_occurence(const thrust::host_vector& representation_index_mask_h, + const thrust::host_vector& expected_starting_index_of_each_representation_h, + const std::uint32_t number_of_threads) +{ + const thrust::device_vector representation_index_mask_d(representation_index_mask_h); + ASSERT_EQ(expected_starting_index_of_each_representation_h.size(), representation_index_mask_h.back()); + thrust::device_vector starting_index_of_each_representation_d(expected_starting_index_of_each_representation_h.size()); + + std::uint32_t number_of_blocks = (representation_index_mask_d.size() - 1) / number_of_threads + 1; + + details::matcher_gpu::copy_index_of_first_occurence<<>>(thrust::raw_pointer_cast(representation_index_mask_d.data()), + representation_index_mask_d.size(), + thrust::raw_pointer_cast(starting_index_of_each_representation_d.data())); + CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); + + const thrust::host_vector starting_index_of_each_representation_h(starting_index_of_each_representation_d); + + ASSERT_EQ(starting_index_of_each_representation_h.size(), expected_starting_index_of_each_representation_h.size()); + for (std::size_t i = 0; i < expected_starting_index_of_each_representation_h.size(); ++i) + { + EXPECT_EQ(starting_index_of_each_representation_h[i], expected_starting_index_of_each_representation_h[i]) << "index: " << i; + } +} + +TEST(TestCudamapperMatcherGPU, test_copy_index_of_first_occurence_small_example) +{ + thrust::host_vector representation_index_mask_h; + thrust::host_vector expected_starting_index_of_each_representation_h; + representation_index_mask_h.push_back(1); + expected_starting_index_of_each_representation_h.push_back(0); + representation_index_mask_h.push_back(1); + representation_index_mask_h.push_back(1); + representation_index_mask_h.push_back(1); + representation_index_mask_h.push_back(2); + expected_starting_index_of_each_representation_h.push_back(4); + representation_index_mask_h.push_back(3); + expected_starting_index_of_each_representation_h.push_back(5); + representation_index_mask_h.push_back(3); + representation_index_mask_h.push_back(3); + representation_index_mask_h.push_back(3); + representation_index_mask_h.push_back(4); + expected_starting_index_of_each_representation_h.push_back(9); + representation_index_mask_h.push_back(4); + representation_index_mask_h.push_back(4); + representation_index_mask_h.push_back(5); + expected_starting_index_of_each_representation_h.push_back(12); + representation_index_mask_h.push_back(6); + expected_starting_index_of_each_representation_h.push_back(13); + + std::uint32_t number_of_threads = 3; + + test_copy_index_of_first_occurence(representation_index_mask_h, + expected_starting_index_of_each_representation_h, + number_of_threads); +} + +TEST(TestCudamapperMatcherGPU, test_copy_index_of_first_occurence_large_example) +{ + const std::uint64_t total_sketch_elements = 10000000; + const std::uint32_t sketch_elements_with_same_representation = 1000; + + thrust::host_vector representation_index_mask_h; + thrust::host_vector expected_starting_index_of_each_representation_h; + for (std::size_t i = 0; i < total_sketch_elements; ++i) + { + representation_index_mask_h.push_back(i / sketch_elements_with_same_representation + 1); + if (i % sketch_elements_with_same_representation == 0) + expected_starting_index_of_each_representation_h.push_back(i); + } + + std::uint32_t number_of_threads = 256; + + test_copy_index_of_first_occurence(representation_index_mask_h, + expected_starting_index_of_each_representation_h, + number_of_threads); +} } // namespace cudamapper } // namespace claragenomics From b919a205d240242e91866e31d031574667cbea00 Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Thu, 31 Oct 2019 18:03:00 +0100 Subject: [PATCH 018/128] Implementaiton of find_first_occurrences_of_representations (authored by Milos Maric ) --- cudamapper/src/matcher_gpu.cu | 55 +++++++++- cudamapper/src/matcher_gpu.cuh | 20 +++- cudamapper/tests/Test_CudamapperMatcherGPU.cu | 100 ++++++++++++++++-- 3 files changed, 162 insertions(+), 13 deletions(-) diff --git a/cudamapper/src/matcher_gpu.cu b/cudamapper/src/matcher_gpu.cu index 384cbbe78..cfe5b7817 100644 --- a/cudamapper/src/matcher_gpu.cu +++ b/cudamapper/src/matcher_gpu.cu @@ -10,6 +10,11 @@ #include "matcher_gpu.cuh" +#include +#include + +#include + namespace claragenomics { @@ -31,10 +36,56 @@ namespace details namespace matcher_gpu { +thrust::device_vector find_first_occurrences_of_representations(const thrust::device_vector& representations_d) +{ + // each element has value 1 if representation with the same index in representations_d has a different value than it's neighbour to the left, 0 otehrwise + // underlying type is 32-bit because a scan operation will be performed on the array, so the elements should be capable of holding a number that is equal to + // the total number of 1s in the array + thrust::device_vector new_value_mask_d(representations_d.size()); + + // TODO: Currently maximum number of thread blocks is 2^31-1. This means we support representations of up to (2^31-1) * number_of_threads + // With 256 that's (2^31-1)*2^8 ~= 2^39. If representation is 4-byte (we expect it to be 4 or 8) that's 2^39*2^2 = 2^41 = 2TB. We don't expect to hit this limit any time soon + // The kernel can be modified to process several representation per thread to support arbitrary size + std::uint32_t number_of_threads = 256; // arbitrary value + std::uint32_t number_of_blocks = (representations_d.size() - 1) / number_of_threads + 1; + + create_new_value_mask<<>>(thrust::raw_pointer_cast(representations_d.data()), + representations_d.size(), + thrust::raw_pointer_cast(new_value_mask_d.data())); + CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); // sync not necessary, here only to detect the error immediately + + // do inclusive scan + // for example for + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 + // 0 0 0 0 12 12 12 12 12 12 23 23 23 32 32 32 32 32 46 46 46 + // 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 + // gives + // 1 1 1 1 2 2 2 2 2 2 3 3 3 4 4 4 4 4 5 5 5 + // meaning all elements with the same representation have the same value and those values are sorted in increasing order starting from 1 + thrust::device_vector representation_index_mask_d(new_value_mask_d.size()); + thrust::inclusive_scan(thrust::device, + new_value_mask_d.begin(), + new_value_mask_d.end(), + representation_index_mask_d.begin()); + new_value_mask_d.clear(); + new_value_mask_d.shrink_to_fit(); + + std::uint64_t number_of_unique_representations = representation_index_mask_d.back(); // D2H copy + + thrust::device_vector starting_index_of_each_representation(number_of_unique_representations + 1); + + copy_index_of_first_occurence<<>>(thrust::raw_pointer_cast(representation_index_mask_d.data()), + representation_index_mask_d.size(), + thrust::raw_pointer_cast(starting_index_of_each_representation.data())); + // last element is the total number of elements in representations array + starting_index_of_each_representation.back() = representations_d.size(); // H2D copy + + return starting_index_of_each_representation; +} __global__ void create_new_value_mask(const representation_t* const representations_d, const std::size_t number_of_elements, - std::uint8_t* const new_value_mask_d) + std::uint32_t* const new_value_mask_d) { std::uint64_t index = blockIdx.x * blockDim.x + threadIdx.x; @@ -58,7 +109,7 @@ __global__ void create_new_value_mask(const representation_t* const representati __global__ void copy_index_of_first_occurence(const std::uint64_t* const representation_index_mask_d, const std::size_t number_of_input_elements, - std::size_t* const starting_index_of_each_representation) + std::uint32_t* const starting_index_of_each_representation) { std::uint64_t index = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/cudamapper/src/matcher_gpu.cuh b/cudamapper/src/matcher_gpu.cuh index 371125d61..a6a2e067c 100644 --- a/cudamapper/src/matcher_gpu.cuh +++ b/cudamapper/src/matcher_gpu.cuh @@ -11,6 +11,7 @@ #pragma once #include +#include #include #include @@ -37,6 +38,21 @@ namespace details namespace matcher_gpu { +/// \brief Creates compressed representation of index +/// +/// Creates an array in which n-th element represents the first occurrence of n-th representation. +/// Last element of the array is the total number of elements in representations_d array +/// +/// For example: +/// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 +/// 0 0 0 0 12 12 12 12 12 12 23 23 23 32 32 32 32 32 46 46 46 +/// ^ ^ ^ ^ ^ ^ +/// gives: +/// 0 4 10 13 18 21 +/// +/// \param representations_d +/// \return first_element_for_representation +thrust::device_vector find_first_occurrences_of_representations(const thrust::device_vector& representations_d); /// \brief Writes 0 to the output array if the value to the left is the same as the current value, 1 otherwise. First element is always 1 /// @@ -51,7 +67,7 @@ namespace matcher_gpu /// \param new_value_mask_d generated array __global__ void create_new_value_mask(const representation_t* const representations_d, const std::size_t number_of_elements, - std::uint8_t* const new_value_mask_d); + std::uint32_t* const new_value_mask_d); /// \brief Creates an array in which each element represents the index in representation_index_mask_d at which a new representation starts /// @@ -69,7 +85,7 @@ __global__ void create_new_value_mask(const representation_t* const representati /// \param starting_index_of_each_representation __global__ void copy_index_of_first_occurence(const std::uint64_t* const representation_index_mask_d, const std::size_t number_of_input_elements, - std::size_t* const starting_index_of_each_representation); + std::uint32_t* const starting_index_of_each_representation); } // namespace matcher_gpu } // namespace details diff --git a/cudamapper/tests/Test_CudamapperMatcherGPU.cu b/cudamapper/tests/Test_CudamapperMatcherGPU.cu index 0d714a9f1..e8a1ba369 100644 --- a/cudamapper/tests/Test_CudamapperMatcherGPU.cu +++ b/cudamapper/tests/Test_CudamapperMatcherGPU.cu @@ -24,11 +24,11 @@ namespace cudamapper { void test_create_new_value_mask(const thrust::host_vector& representations_h, - const thrust::host_vector& expected_new_value_mask_h, + const thrust::host_vector& expected_new_value_mask_h, std::uint32_t number_of_threads) { const thrust::device_vector representations_d(representations_h); - thrust::device_vector new_value_mask_d(representations_h.size()); + thrust::device_vector new_value_mask_d(representations_h.size()); std::uint32_t number_of_blocks = (representations_h.size() - 1) / number_of_threads + 1; @@ -38,7 +38,7 @@ void test_create_new_value_mask(const thrust::host_vector& rep CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); - const thrust::host_vector new_value_mask_h(new_value_mask_d); + const thrust::host_vector new_value_mask_h(new_value_mask_d); ASSERT_EQ(new_value_mask_h.size(), expected_new_value_mask_h.size()); for (std::size_t i = 0; i < expected_new_value_mask_h.size(); ++i) @@ -50,7 +50,7 @@ void test_create_new_value_mask(const thrust::host_vector& rep TEST(TestCudamapperMatcherGPU, test_create_new_value_mask_small_example) { thrust::host_vector representations_h; - thrust::host_vector expected_new_value_mask_h; + thrust::host_vector expected_new_value_mask_h; representations_h.push_back(0); expected_new_value_mask_h.push_back(1); representations_h.push_back(0); @@ -99,7 +99,7 @@ TEST(TestCudamapperMatcherGPU, test_create_new_value_mask_small_data_large_examp const std::uint32_t sketch_elements_with_same_representation = 1000; thrust::host_vector representations_h; - thrust::host_vector expected_new_value_mask_h; + thrust::host_vector expected_new_value_mask_h; for (std::size_t i = 0; i < total_sketch_elements; ++i) { representations_h.push_back(i / sketch_elements_with_same_representation); @@ -116,12 +116,12 @@ TEST(TestCudamapperMatcherGPU, test_create_new_value_mask_small_data_large_examp number_of_threads); } void test_copy_index_of_first_occurence(const thrust::host_vector& representation_index_mask_h, - const thrust::host_vector& expected_starting_index_of_each_representation_h, + const thrust::host_vector& expected_starting_index_of_each_representation_h, const std::uint32_t number_of_threads) { const thrust::device_vector representation_index_mask_d(representation_index_mask_h); ASSERT_EQ(expected_starting_index_of_each_representation_h.size(), representation_index_mask_h.back()); - thrust::device_vector starting_index_of_each_representation_d(expected_starting_index_of_each_representation_h.size()); + thrust::device_vector starting_index_of_each_representation_d(expected_starting_index_of_each_representation_h.size()); std::uint32_t number_of_blocks = (representation_index_mask_d.size() - 1) / number_of_threads + 1; @@ -130,7 +130,7 @@ void test_copy_index_of_first_occurence(const thrust::host_vector thrust::raw_pointer_cast(starting_index_of_each_representation_d.data())); CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); - const thrust::host_vector starting_index_of_each_representation_h(starting_index_of_each_representation_d); + const thrust::host_vector starting_index_of_each_representation_h(starting_index_of_each_representation_d); ASSERT_EQ(starting_index_of_each_representation_h.size(), expected_starting_index_of_each_representation_h.size()); for (std::size_t i = 0; i < expected_starting_index_of_each_representation_h.size(); ++i) @@ -142,7 +142,7 @@ void test_copy_index_of_first_occurence(const thrust::host_vector TEST(TestCudamapperMatcherGPU, test_copy_index_of_first_occurence_small_example) { thrust::host_vector representation_index_mask_h; - thrust::host_vector expected_starting_index_of_each_representation_h; + thrust::host_vector expected_starting_index_of_each_representation_h; representation_index_mask_h.push_back(1); expected_starting_index_of_each_representation_h.push_back(0); representation_index_mask_h.push_back(1); @@ -192,5 +192,87 @@ TEST(TestCudamapperMatcherGPU, test_copy_index_of_first_occurence_large_example) number_of_threads); } +void test_find_first_occurrences_of_representations(const thrust::host_vector& representations_h, + const thrust::host_vector& expected_starting_index_of_each_representation_h) +{ + const thrust::device_vector representations_d(representations_h); + + const thrust::device_vector starting_index_of_each_representation_d = details::matcher_gpu::find_first_occurrences_of_representations(representations_d); + + const thrust::host_vector starting_index_of_each_representation_h(starting_index_of_each_representation_d); + + ASSERT_EQ(starting_index_of_each_representation_h.size(), expected_starting_index_of_each_representation_h.size()); + + for (std::size_t i = 0; i < expected_starting_index_of_each_representation_h.size(); ++i) + { + EXPECT_EQ(starting_index_of_each_representation_h[i], expected_starting_index_of_each_representation_h[i]) << "index: " << i; + } +} + +TEST(TestCudamapperMatcherGPU, test_find_first_occurrences_of_representations_small_example) +{ + /// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 + /// 0 0 0 0 12 12 12 12 12 12 23 23 23 32 32 32 32 32 46 46 46 + /// 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 + /// 1 1 1 1 2 2 2 2 2 2 3 3 3 4 4 4 4 4 5 5 5 + /// ^ ^ ^ ^ ^ ^ + /// 0 4 10 13 18 21 + + thrust::host_vector representations_h; + thrust::device_vector expected_starting_index_of_each_representation_h; + representations_h.push_back(0); + expected_starting_index_of_each_representation_h.push_back(0); + representations_h.push_back(0); + representations_h.push_back(0); + representations_h.push_back(0); + representations_h.push_back(12); + expected_starting_index_of_each_representation_h.push_back(4); + representations_h.push_back(12); + representations_h.push_back(12); + representations_h.push_back(12); + representations_h.push_back(12); + representations_h.push_back(12); + representations_h.push_back(23); + expected_starting_index_of_each_representation_h.push_back(10); + representations_h.push_back(23); + representations_h.push_back(23); + representations_h.push_back(32); + expected_starting_index_of_each_representation_h.push_back(13); + representations_h.push_back(32); + representations_h.push_back(32); + representations_h.push_back(32); + representations_h.push_back(32); + representations_h.push_back(46); + expected_starting_index_of_each_representation_h.push_back(18); + representations_h.push_back(46); + representations_h.push_back(46); + expected_starting_index_of_each_representation_h.push_back(21); + + test_find_first_occurrences_of_representations(representations_h, + expected_starting_index_of_each_representation_h); +} + +TEST(TestCudamapperMatcherGPU, test_find_first_occurrences_of_representations_large_example) +{ + const std::uint64_t total_sketch_elements = 10000000; + const std::uint32_t sketch_elements_with_same_representation = 1000; + + thrust::host_vector representations_h; + thrust::device_vector expected_starting_index_of_each_representation_h; + + for (std::size_t i = 0; i < total_sketch_elements; ++i) + { + representations_h.push_back(i / sketch_elements_with_same_representation); + if (i % sketch_elements_with_same_representation == 0) + { + expected_starting_index_of_each_representation_h.push_back(i); + } + } + expected_starting_index_of_each_representation_h.push_back(total_sketch_elements); + + test_find_first_occurrences_of_representations(representations_h, + expected_starting_index_of_each_representation_h); +} + } // namespace cudamapper } // namespace claragenomics From 4070990699b9a4e13829ecafc8e50776d5f22904 Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Mon, 4 Nov 2019 13:25:27 +0200 Subject: [PATCH 019/128] [cudaaligner] Combine benchmarks into a single binary Fixes #148 --- ci/common/build-test-sdk.sh | 2 +- cudaaligner/benchmarks/CMakeLists.txt | 18 ++++-- cudaaligner/benchmarks/README.md | 4 +- .../{singlebatchalignment => }/main.cpp | 34 ++++++++++- .../benchmarks/singlealignment/CMakeLists.txt | 31 ---------- .../benchmarks/singlealignment/main.cpp | 58 ------------------- .../singlebatchalignment/CMakeLists.txt | 32 ---------- 7 files changed, 50 insertions(+), 129 deletions(-) rename cudaaligner/benchmarks/{singlebatchalignment => }/main.cpp (71%) delete mode 100644 cudaaligner/benchmarks/singlealignment/CMakeLists.txt delete mode 100644 cudaaligner/benchmarks/singlealignment/main.cpp delete mode 100644 cudaaligner/benchmarks/singlebatchalignment/CMakeLists.txt diff --git a/ci/common/build-test-sdk.sh b/ci/common/build-test-sdk.sh index d9016d378..235e15175 100644 --- a/ci/common/build-test-sdk.sh +++ b/ci/common/build-test-sdk.sh @@ -64,6 +64,6 @@ if [ "$GPU_TEST" == '1' ]; then logger "Running ClaraGenomicsAnalysis benchmarks..." LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR ${LOCAL_BUILD_DIR}/install/benchmarks/cudapoa/benchmark_cudapoa --benchmark_filter="BM_SingleBatchTest" - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR ${LOCAL_BUILD_DIR}/install/benchmarks/cudaaligner/benchmark_cudaaligner_singlebatch_singlealignment + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR ${LOCAL_BUILD_DIR}/install/benchmarks/cudaaligner/benchmark_cudaaligner_singlebatch --benchmark_filter="BM_SingleAlignment" fi diff --git a/cudaaligner/benchmarks/CMakeLists.txt b/cudaaligner/benchmarks/CMakeLists.txt index bc1f74091..ff164926a 100644 --- a/cudaaligner/benchmarks/CMakeLists.txt +++ b/cudaaligner/benchmarks/CMakeLists.txt @@ -8,9 +8,19 @@ # license agreement from NVIDIA CORPORATION is strictly prohibited. # -# Add benchmarks -add_subdirectory(singlealignment) -add_subdirectory(singlebatchalignment) + +project(benchmark_cudaaligner_singlebatch) + +set(SOURCES + main.cpp + ) + +set(LIBS + cudaaligner_internal + cudaaligner + utils) + +cga_add_benchmarks(${PROJECT_NAME} "cudaaligner" "${SOURCES}" "${LIBS}") install(FILES README.md - DESTINATION benchmarks/cudapoa) + DESTINATION benchmarks/cudaaligner) diff --git a/cudaaligner/benchmarks/README.md b/cudaaligner/benchmarks/README.md index d47517821..0415f5b83 100644 --- a/cudaaligner/benchmarks/README.md +++ b/cudaaligner/benchmarks/README.md @@ -7,7 +7,7 @@ of a single alignment in CUDA. To run the benchmark, execute ``` -./benchmarks/cudaaligner/benchmark_cudaaligner_singlebatch_singlealignment +./benchmarks/cudaaligner/benchmark_cudaaligner_singlebatch --benchmark_filter="BM_SingleAlignment" ``` ## Single Batch Alignment @@ -16,5 +16,5 @@ sizes. The intention of this benchmark is to measure performanceo of batched ali To the the benchmark, execute ``` -./benchmarks/cudaaligner/benchmark_cudaaligner_singlebatch_multialignment +./benchmarks/cudaaligner/benchmark_cudaaligner_singlebatch --benchmark_filter="BM_SingleBatchAlignment" ``` diff --git a/cudaaligner/benchmarks/singlebatchalignment/main.cpp b/cudaaligner/benchmarks/main.cpp similarity index 71% rename from cudaaligner/benchmarks/singlebatchalignment/main.cpp rename to cudaaligner/benchmarks/main.cpp index 284654faa..5879ef286 100644 --- a/cudaaligner/benchmarks/singlebatchalignment/main.cpp +++ b/cudaaligner/benchmarks/main.cpp @@ -26,6 +26,33 @@ namespace claragenomics namespace cudaaligner { +static void BM_SingleAlignment(benchmark::State& state) +{ + int32_t genome_size = state.range(0); + + // Generate random sequences + std::minstd_rand rng(1); + std::string genome_1 = claragenomics::genomeutils::generate_random_genome(genome_size, rng); + std::string genome_2 = claragenomics::genomeutils::generate_random_genome(genome_size, rng); + + // Create aligner object + std::unique_ptr aligner = create_aligner(genome_size, + genome_size, + 1, + AlignmentType::global_alignment, + 0, + 0); + aligner->add_alignment(genome_1.c_str(), genome_1.length(), + genome_2.c_str(), genome_2.length()); + + // Run alignment repeatedly + for (auto _ : state) + { + aligner->align_all(); + aligner->sync_alignments(); + } +} + class CudaStream { public: @@ -91,7 +118,12 @@ static void BM_SingleBatchAlignment(benchmark::State& state) } } -// Register the function as a benchmark +// Register the functions as a benchmark +BENCHMARK(BM_SingleAlignment) + ->Unit(benchmark::kMillisecond) + ->RangeMultiplier(10) + ->Range(100, 100000); + BENCHMARK_TEMPLATE(BM_SingleBatchAlignment, AlignerGlobalUkkonen) ->Unit(benchmark::kMillisecond) ->RangeMultiplier(4) diff --git a/cudaaligner/benchmarks/singlealignment/CMakeLists.txt b/cudaaligner/benchmarks/singlealignment/CMakeLists.txt deleted file mode 100644 index 6c70e5d8b..000000000 --- a/cudaaligner/benchmarks/singlealignment/CMakeLists.txt +++ /dev/null @@ -1,31 +0,0 @@ -# -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. -# - -# -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. -# - -project(benchmark_cudaaligner_singlebatch_singlealignment) - -set(SOURCES - main.cpp - ) - -set(LIBS - cudaaligner - utils) - -cga_add_benchmarks(${PROJECT_NAME} "cudaaligner" "${SOURCES}" "${LIBS}") diff --git a/cudaaligner/benchmarks/singlealignment/main.cpp b/cudaaligner/benchmarks/singlealignment/main.cpp deleted file mode 100644 index 4262838d6..000000000 --- a/cudaaligner/benchmarks/singlealignment/main.cpp +++ /dev/null @@ -1,58 +0,0 @@ -/* -* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -* -* NVIDIA CORPORATION and its licensors retain all intellectual property -* and proprietary rights in and to this software, related documentation -* and any modifications thereto. Any use, reproduction, disclosure or -* distribution of this software and related documentation without an express -* license agreement from NVIDIA CORPORATION is strictly prohibited. -*/ - -#include -#include -#include - -#include - -namespace claragenomics -{ - -namespace cudaaligner -{ - -static void BM_SingleAlignment(benchmark::State& state) -{ - int32_t genome_size = state.range(0); - - // Generate random sequences - std::minstd_rand rng(1); - std::string genome_1 = claragenomics::genomeutils::generate_random_genome(genome_size, rng); - std::string genome_2 = claragenomics::genomeutils::generate_random_genome(genome_size, rng); - - // Create aligner object - std::unique_ptr aligner = create_aligner(genome_size, - genome_size, - 1, - AlignmentType::global_alignment, - 0, - 0); - aligner->add_alignment(genome_1.c_str(), genome_1.length(), - genome_2.c_str(), genome_2.length()); - - // Run alignment repeatedly - for (auto _ : state) - { - aligner->align_all(); - aligner->sync_alignments(); - } -} - -// Register the function as a benchmark -BENCHMARK(BM_SingleAlignment) - ->Unit(benchmark::kMillisecond) - ->RangeMultiplier(10) - ->Range(100, 100000); -} // namespace cudaaligner -} // namespace claragenomics - -BENCHMARK_MAIN(); diff --git a/cudaaligner/benchmarks/singlebatchalignment/CMakeLists.txt b/cudaaligner/benchmarks/singlebatchalignment/CMakeLists.txt deleted file mode 100644 index 78cc32c8c..000000000 --- a/cudaaligner/benchmarks/singlebatchalignment/CMakeLists.txt +++ /dev/null @@ -1,32 +0,0 @@ -# -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. -# - -# -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. -# - -project(benchmark_cudaaligner_singlebatch_multialignment) - -set(SOURCES - main.cpp - ) - -set(LIBS - cudaaligner_internal - cudaaligner - utils) - -cga_add_benchmarks(${PROJECT_NAME} "cudaaligner" "${SOURCES}" "${LIBS}") From e70189a7e3c9b100f55eda874a8a2a63e6d4155b Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Mon, 4 Nov 2019 15:47:20 +0100 Subject: [PATCH 020/128] [cudamapper] Implementation of target search for query representations --- cudamapper/src/matcher_gpu.cu | 41 ++++++++++ cudamapper/src/matcher_gpu.cuh | 50 ++++++++++++ cudamapper/tests/Test_CudamapperMatcherGPU.cu | 78 +++++++++++++++++++ 3 files changed, 169 insertions(+) diff --git a/cudamapper/src/matcher_gpu.cu b/cudamapper/src/matcher_gpu.cu index cfe5b7817..dfec1421e 100644 --- a/cudamapper/src/matcher_gpu.cu +++ b/cudamapper/src/matcher_gpu.cu @@ -14,6 +14,8 @@ #include #include +#include +#include namespace claragenomics { @@ -83,6 +85,16 @@ thrust::device_vector find_first_occurrences_of_representations(c return starting_index_of_each_representation; } +void find_query_target_matches(thrust::device_vector& found_target_indices_d, const thrust::device_vector& query_representations_d, const thrust::device_vector& target_representations_d) +{ + assert(found_target_indices_d.size() == query_representations_d.size()); + + const int32_t n_threads = 256; + const int32_t n_blocks = ceiling_divide(query_representations_d.size(), n_threads); + + find_query_target_matches_kernel<<>>(found_target_indices_d.data().get(), query_representations_d.data().get(), get_size(query_representations_d), target_representations_d.data().get(), get_size(target_representations_d)); +} + __global__ void create_new_value_mask(const representation_t* const representations_d, const std::size_t number_of_elements, std::uint32_t* const new_value_mask_d) @@ -132,6 +144,35 @@ __global__ void copy_index_of_first_occurence(const std::uint64_t* const represe } } +__global__ void find_query_target_matches_kernel(int64_t* const found_target_indices, const representation_t* const query_representations_d, const int64_t n_query_representations, const representation_t* const target_representations_d, const int64_t n_target_representations) +{ + const int64_t i = blockIdx.x * blockDim.x + threadIdx.x; + + if (i >= n_query_representations) + return; + + const representation_t query = query_representations_d[i]; + const representation_t* lower_bound = target_representations_d; + const representation_t* upper_bound = target_representations_d + n_target_representations; + int64_t found_target_index = -1; + while (upper_bound - lower_bound > 0) + { + const representation_t* mid = lower_bound + (upper_bound - lower_bound) / 2; + const representation_t target = *mid; + if (target < query) + lower_bound = mid + 1; + else if (target > query) + upper_bound = mid; + else + { + found_target_index = mid - target_representations_d; + break; + } + } + + found_target_indices[i] = found_target_index; +} + } // namespace matcher_gpu } // namespace details diff --git a/cudamapper/src/matcher_gpu.cuh b/cudamapper/src/matcher_gpu.cuh index a6a2e067c..6072e351e 100644 --- a/cudamapper/src/matcher_gpu.cuh +++ b/cudamapper/src/matcher_gpu.cuh @@ -54,6 +54,34 @@ namespace matcher_gpu /// \return first_element_for_representation thrust::device_vector find_first_occurrences_of_representations(const thrust::device_vector& representations_d); +/// \brief Finds the array index of the target representation for each query representation +/// +/// Takes an array of query representations and an array of target representations +/// and checks for each query representation if the representation is present in the target array. +/// To return the result, the function takes a reference to an array of indices \param found_target_indices_d +/// which has to be of the same length of the query representations array. +/// If a query representation is found in the target the same representation the corresponding index +/// of the target array will be stored in \param found_target_indices_d at the position corresponding +/// to the query in the query array. +/// If a query is not found in the target array, -1 will be stored to the corresponding position of \param found_target_indices_d +/// For example: +/// query: +/// array-index: 0 1 2 3 4 +/// representation: 0 12 23 32 46 +/// target: +/// array-index: 0 1 2 3 4 5 6 +/// representation: 5 12 16 23 24 25 46 +/// +/// gives: +/// found_target_indicies_d: +/// array-index: 0 1 2 3 4 +/// target-index: -1 1 3 -1 6 +/// +/// \param found_target_indices_d The array which will filled with the resulting target indices. This array has to be of same size as query_representations_d. +/// \param query_representations_d An array of query representations +/// \param target_representations_d An sorted array of target representations +void find_query_target_matches(thrust::device_vector& found_target_indices_d, const thrust::device_vector& query_representations_d, const thrust::device_vector& target_representations_d); + /// \brief Writes 0 to the output array if the value to the left is the same as the current value, 1 otherwise. First element is always 1 /// /// For example: @@ -86,6 +114,28 @@ __global__ void create_new_value_mask(const representation_t* const representati __global__ void copy_index_of_first_occurence(const std::uint64_t* const representation_index_mask_d, const std::size_t number_of_input_elements, std::uint32_t* const starting_index_of_each_representation); + +/// \brief Performs a binary search on target_representations_d for each element of query_representations_d and stores the found index (or -1 iff not found) in found_target_indices. +/// +/// For example: +/// query: +/// array-index: 0 1 2 3 4 +/// representation: 0 12 23 32 46 +/// target: +/// array-index: 0 1 2 3 4 5 6 +/// representation: 5 12 16 23 24 25 46 +/// +/// gives: +/// found_target_indicies_d: +/// array-index: 0 1 2 3 4 +/// target-index: -1 1 3 -1 6 +/// +/// \param found_target_indices_d the array which will hold the result +/// \param query_representations_d the array of queries +/// \param n_query_representations size of \param query_representations_d and \param found_target_indices_d +/// \param target_representations_d the array of targets to be searched +/// \param n_target_representations size of \param target_representations_d +__global__ void find_query_target_matches_kernel(int64_t* const found_target_indices_d, const representation_t* const query_representations_d, const int64_t n_query_representations, const representation_t* const target_representations_d, const int64_t n_target_representations); } // namespace matcher_gpu } // namespace details diff --git a/cudamapper/tests/Test_CudamapperMatcherGPU.cu b/cudamapper/tests/Test_CudamapperMatcherGPU.cu index e8a1ba369..4f4eb0bf9 100644 --- a/cudamapper/tests/Test_CudamapperMatcherGPU.cu +++ b/cudamapper/tests/Test_CudamapperMatcherGPU.cu @@ -14,6 +14,7 @@ #include #include +#include #include "../src/matcher_gpu.cuh" @@ -274,5 +275,82 @@ TEST(TestCudamapperMatcherGPU, test_find_first_occurrences_of_representations_la expected_starting_index_of_each_representation_h); } +void test_find_query_target_matches(const thrust::host_vector& query_representations_h, + const thrust::host_vector& target_representations_h, + const thrust::host_vector& expected_found_target_indices_h) +{ + const thrust::device_vector query_representations_d(query_representations_h); + const thrust::device_vector target_representations_d(target_representations_h); + thrust::device_vector found_target_indices_d(query_representations_d.size()); + + details::matcher_gpu::find_query_target_matches(found_target_indices_d, query_representations_d, target_representations_d); + + thrust::device_vector found_target_indices_h(found_target_indices_d); + + ASSERT_EQ(found_target_indices_h.size(), expected_found_target_indices_h.size()); + + for (int32_t i = 0; i < get_size(found_target_indices_h); ++i) + { + EXPECT_EQ(found_target_indices_h[i], expected_found_target_indices_h[i]) << "index: " << i; + } +} + +TEST(TestCudamapperMatcherGPU, test_find_query_target_matches_small_example) +{ + thrust::host_vector query_representations_h; + query_representations_h.push_back(0); + query_representations_h.push_back(12); + query_representations_h.push_back(23); + query_representations_h.push_back(32); + query_representations_h.push_back(46); + thrust::host_vector target_representations_h; + target_representations_h.push_back(5); + target_representations_h.push_back(12); + target_representations_h.push_back(16); + target_representations_h.push_back(23); + target_representations_h.push_back(24); + target_representations_h.push_back(25); + target_representations_h.push_back(46); + + thrust::host_vector expected_found_target_indices_h; + expected_found_target_indices_h.push_back(-1); + expected_found_target_indices_h.push_back(1); + expected_found_target_indices_h.push_back(3); + expected_found_target_indices_h.push_back(-1); + expected_found_target_indices_h.push_back(6); + + test_find_query_target_matches(query_representations_h, target_representations_h, expected_found_target_indices_h); +} + +TEST(TestCudamapperMatcherGPU, test_query_target_matches_large_example) +{ + const std::int64_t total_query_representations = 1000000; + + thrust::host_vector query_representations_h; + thrust::host_vector target_representations_h; + + for (std::int64_t i = 0; i < total_query_representations; ++i) + { + query_representations_h.push_back(i * 3); + } + + thrust::device_vector expected_found_target_indices_h(query_representations_h.size(), -1); + + const representation_t max_representation = query_representations_h.back(); + for (representation_t r = 0; r < max_representation; r += 2) + { + target_representations_h.push_back(r); + if (r % 3 == 0) + { + if (r / 3 < expected_found_target_indices_h.size()) + { + expected_found_target_indices_h[r / 3] = get_size(target_representations_h) - 1; + } + } + } + + test_find_query_target_matches(query_representations_h, target_representations_h, expected_found_target_indices_h); +} + } // namespace cudamapper } // namespace claragenomics From 0bb796be2d4893643162fb15cd24dd8d0f059ac2 Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Mon, 4 Nov 2019 18:52:03 +0200 Subject: [PATCH 021/128] [cudaaligner] Combining bentchmark binaries- fix review comments Fixes #148 --- ci/common/build-test-sdk.sh | 2 +- cudaaligner/benchmarks/CMakeLists.txt | 2 +- cudaaligner/benchmarks/README.md | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/common/build-test-sdk.sh b/ci/common/build-test-sdk.sh index 235e15175..93cfe47cf 100644 --- a/ci/common/build-test-sdk.sh +++ b/ci/common/build-test-sdk.sh @@ -64,6 +64,6 @@ if [ "$GPU_TEST" == '1' ]; then logger "Running ClaraGenomicsAnalysis benchmarks..." LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR ${LOCAL_BUILD_DIR}/install/benchmarks/cudapoa/benchmark_cudapoa --benchmark_filter="BM_SingleBatchTest" - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR ${LOCAL_BUILD_DIR}/install/benchmarks/cudaaligner/benchmark_cudaaligner_singlebatch --benchmark_filter="BM_SingleAlignment" + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR ${LOCAL_BUILD_DIR}/install/benchmarks/cudaaligner/benchmark_cudaaligner --benchmark_filter="BM_SingleAlignment" fi diff --git a/cudaaligner/benchmarks/CMakeLists.txt b/cudaaligner/benchmarks/CMakeLists.txt index ff164926a..e1d45dc20 100644 --- a/cudaaligner/benchmarks/CMakeLists.txt +++ b/cudaaligner/benchmarks/CMakeLists.txt @@ -9,7 +9,7 @@ # -project(benchmark_cudaaligner_singlebatch) +project(benchmark_cudaaligner) set(SOURCES main.cpp diff --git a/cudaaligner/benchmarks/README.md b/cudaaligner/benchmarks/README.md index 0415f5b83..108069176 100644 --- a/cudaaligner/benchmarks/README.md +++ b/cudaaligner/benchmarks/README.md @@ -7,7 +7,7 @@ of a single alignment in CUDA. To run the benchmark, execute ``` -./benchmarks/cudaaligner/benchmark_cudaaligner_singlebatch --benchmark_filter="BM_SingleAlignment" +./benchmarks/cudaaligner/benchmark_cudaaligner --benchmark_filter="BM_SingleAlignment" ``` ## Single Batch Alignment @@ -16,5 +16,5 @@ sizes. The intention of this benchmark is to measure performanceo of batched ali To the the benchmark, execute ``` -./benchmarks/cudaaligner/benchmark_cudaaligner_singlebatch --benchmark_filter="BM_SingleBatchAlignment" +./benchmarks/cudaaligner/benchmark_cudaaligner --benchmark_filter="BM_SingleBatchAlignment" ``` From a2e2f41f54b5af9952842bc52b672fb800599af2 Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Mon, 4 Nov 2019 13:25:27 +0200 Subject: [PATCH 022/128] [cudaaligner] Combine benchmarks into a single binary Fixes #148 [cudaaligner] Combining bentchmark binaries- fix review comments Fixes #148 --- ci/common/build-test-sdk.sh | 2 +- cudaaligner/benchmarks/CMakeLists.txt | 18 ++++-- cudaaligner/benchmarks/README.md | 4 +- .../{singlebatchalignment => }/main.cpp | 34 ++++++++++- .../benchmarks/singlealignment/CMakeLists.txt | 31 ---------- .../benchmarks/singlealignment/main.cpp | 58 ------------------- .../singlebatchalignment/CMakeLists.txt | 32 ---------- 7 files changed, 50 insertions(+), 129 deletions(-) rename cudaaligner/benchmarks/{singlebatchalignment => }/main.cpp (71%) delete mode 100644 cudaaligner/benchmarks/singlealignment/CMakeLists.txt delete mode 100644 cudaaligner/benchmarks/singlealignment/main.cpp delete mode 100644 cudaaligner/benchmarks/singlebatchalignment/CMakeLists.txt diff --git a/ci/common/build-test-sdk.sh b/ci/common/build-test-sdk.sh index d9016d378..93cfe47cf 100644 --- a/ci/common/build-test-sdk.sh +++ b/ci/common/build-test-sdk.sh @@ -64,6 +64,6 @@ if [ "$GPU_TEST" == '1' ]; then logger "Running ClaraGenomicsAnalysis benchmarks..." LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR ${LOCAL_BUILD_DIR}/install/benchmarks/cudapoa/benchmark_cudapoa --benchmark_filter="BM_SingleBatchTest" - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR ${LOCAL_BUILD_DIR}/install/benchmarks/cudaaligner/benchmark_cudaaligner_singlebatch_singlealignment + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR ${LOCAL_BUILD_DIR}/install/benchmarks/cudaaligner/benchmark_cudaaligner --benchmark_filter="BM_SingleAlignment" fi diff --git a/cudaaligner/benchmarks/CMakeLists.txt b/cudaaligner/benchmarks/CMakeLists.txt index bc1f74091..e1d45dc20 100644 --- a/cudaaligner/benchmarks/CMakeLists.txt +++ b/cudaaligner/benchmarks/CMakeLists.txt @@ -8,9 +8,19 @@ # license agreement from NVIDIA CORPORATION is strictly prohibited. # -# Add benchmarks -add_subdirectory(singlealignment) -add_subdirectory(singlebatchalignment) + +project(benchmark_cudaaligner) + +set(SOURCES + main.cpp + ) + +set(LIBS + cudaaligner_internal + cudaaligner + utils) + +cga_add_benchmarks(${PROJECT_NAME} "cudaaligner" "${SOURCES}" "${LIBS}") install(FILES README.md - DESTINATION benchmarks/cudapoa) + DESTINATION benchmarks/cudaaligner) diff --git a/cudaaligner/benchmarks/README.md b/cudaaligner/benchmarks/README.md index d47517821..108069176 100644 --- a/cudaaligner/benchmarks/README.md +++ b/cudaaligner/benchmarks/README.md @@ -7,7 +7,7 @@ of a single alignment in CUDA. To run the benchmark, execute ``` -./benchmarks/cudaaligner/benchmark_cudaaligner_singlebatch_singlealignment +./benchmarks/cudaaligner/benchmark_cudaaligner --benchmark_filter="BM_SingleAlignment" ``` ## Single Batch Alignment @@ -16,5 +16,5 @@ sizes. The intention of this benchmark is to measure performanceo of batched ali To the the benchmark, execute ``` -./benchmarks/cudaaligner/benchmark_cudaaligner_singlebatch_multialignment +./benchmarks/cudaaligner/benchmark_cudaaligner --benchmark_filter="BM_SingleBatchAlignment" ``` diff --git a/cudaaligner/benchmarks/singlebatchalignment/main.cpp b/cudaaligner/benchmarks/main.cpp similarity index 71% rename from cudaaligner/benchmarks/singlebatchalignment/main.cpp rename to cudaaligner/benchmarks/main.cpp index 284654faa..5879ef286 100644 --- a/cudaaligner/benchmarks/singlebatchalignment/main.cpp +++ b/cudaaligner/benchmarks/main.cpp @@ -26,6 +26,33 @@ namespace claragenomics namespace cudaaligner { +static void BM_SingleAlignment(benchmark::State& state) +{ + int32_t genome_size = state.range(0); + + // Generate random sequences + std::minstd_rand rng(1); + std::string genome_1 = claragenomics::genomeutils::generate_random_genome(genome_size, rng); + std::string genome_2 = claragenomics::genomeutils::generate_random_genome(genome_size, rng); + + // Create aligner object + std::unique_ptr aligner = create_aligner(genome_size, + genome_size, + 1, + AlignmentType::global_alignment, + 0, + 0); + aligner->add_alignment(genome_1.c_str(), genome_1.length(), + genome_2.c_str(), genome_2.length()); + + // Run alignment repeatedly + for (auto _ : state) + { + aligner->align_all(); + aligner->sync_alignments(); + } +} + class CudaStream { public: @@ -91,7 +118,12 @@ static void BM_SingleBatchAlignment(benchmark::State& state) } } -// Register the function as a benchmark +// Register the functions as a benchmark +BENCHMARK(BM_SingleAlignment) + ->Unit(benchmark::kMillisecond) + ->RangeMultiplier(10) + ->Range(100, 100000); + BENCHMARK_TEMPLATE(BM_SingleBatchAlignment, AlignerGlobalUkkonen) ->Unit(benchmark::kMillisecond) ->RangeMultiplier(4) diff --git a/cudaaligner/benchmarks/singlealignment/CMakeLists.txt b/cudaaligner/benchmarks/singlealignment/CMakeLists.txt deleted file mode 100644 index 6c70e5d8b..000000000 --- a/cudaaligner/benchmarks/singlealignment/CMakeLists.txt +++ /dev/null @@ -1,31 +0,0 @@ -# -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. -# - -# -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. -# - -project(benchmark_cudaaligner_singlebatch_singlealignment) - -set(SOURCES - main.cpp - ) - -set(LIBS - cudaaligner - utils) - -cga_add_benchmarks(${PROJECT_NAME} "cudaaligner" "${SOURCES}" "${LIBS}") diff --git a/cudaaligner/benchmarks/singlealignment/main.cpp b/cudaaligner/benchmarks/singlealignment/main.cpp deleted file mode 100644 index 4262838d6..000000000 --- a/cudaaligner/benchmarks/singlealignment/main.cpp +++ /dev/null @@ -1,58 +0,0 @@ -/* -* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -* -* NVIDIA CORPORATION and its licensors retain all intellectual property -* and proprietary rights in and to this software, related documentation -* and any modifications thereto. Any use, reproduction, disclosure or -* distribution of this software and related documentation without an express -* license agreement from NVIDIA CORPORATION is strictly prohibited. -*/ - -#include -#include -#include - -#include - -namespace claragenomics -{ - -namespace cudaaligner -{ - -static void BM_SingleAlignment(benchmark::State& state) -{ - int32_t genome_size = state.range(0); - - // Generate random sequences - std::minstd_rand rng(1); - std::string genome_1 = claragenomics::genomeutils::generate_random_genome(genome_size, rng); - std::string genome_2 = claragenomics::genomeutils::generate_random_genome(genome_size, rng); - - // Create aligner object - std::unique_ptr aligner = create_aligner(genome_size, - genome_size, - 1, - AlignmentType::global_alignment, - 0, - 0); - aligner->add_alignment(genome_1.c_str(), genome_1.length(), - genome_2.c_str(), genome_2.length()); - - // Run alignment repeatedly - for (auto _ : state) - { - aligner->align_all(); - aligner->sync_alignments(); - } -} - -// Register the function as a benchmark -BENCHMARK(BM_SingleAlignment) - ->Unit(benchmark::kMillisecond) - ->RangeMultiplier(10) - ->Range(100, 100000); -} // namespace cudaaligner -} // namespace claragenomics - -BENCHMARK_MAIN(); diff --git a/cudaaligner/benchmarks/singlebatchalignment/CMakeLists.txt b/cudaaligner/benchmarks/singlebatchalignment/CMakeLists.txt deleted file mode 100644 index 78cc32c8c..000000000 --- a/cudaaligner/benchmarks/singlebatchalignment/CMakeLists.txt +++ /dev/null @@ -1,32 +0,0 @@ -# -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. -# - -# -# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -# -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. -# - -project(benchmark_cudaaligner_singlebatch_multialignment) - -set(SOURCES - main.cpp - ) - -set(LIBS - cudaaligner_internal - cudaaligner - utils) - -cga_add_benchmarks(${PROJECT_NAME} "cudaaligner" "${SOURCES}" "${LIBS}") From 50628a091d36dfa7314ac81697028afdd6868f3b Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Tue, 5 Nov 2019 11:45:59 +0100 Subject: [PATCH 023/128] Initial implementation of new indexer, does not cover the case where some reads are shorter than only one window --- cudamapper/CMakeLists.txt | 9 +- cudamapper/src/index_gpu_two_indices.cu | 0 cudamapper/src/index_gpu_two_indices.cuh | 311 +++++++ cudamapper/tests/CMakeLists.txt | 2 + .../Test_CudamapperIndexGPUTwoIndices.cu | 768 ++++++++++++++++++ 5 files changed, 1089 insertions(+), 1 deletion(-) create mode 100644 cudamapper/src/index_gpu_two_indices.cu create mode 100644 cudamapper/src/index_gpu_two_indices.cuh create mode 100644 cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu diff --git a/cudamapper/CMakeLists.txt b/cudamapper/CMakeLists.txt index 19c831244..e8503c9f2 100644 --- a/cudamapper/CMakeLists.txt +++ b/cudamapper/CMakeLists.txt @@ -29,6 +29,13 @@ target_include_directories(index_gpu PUBLIC include) target_link_libraries(index_gpu logging pthread utils cgaio) target_compile_options(index_gpu PRIVATE -Werror) +cuda_add_library(index_gpu_two_indices + src/index_gpu_two_indices.cu + src/minimizer.cu) +target_include_directories(index_gpu_two_indices PUBLIC include) +target_link_libraries(index_gpu_two_indices logging pthread utils cgaio) +target_compile_options(index_gpu_two_indices PRIVATE -Werror) + cuda_add_library(matcher src/matcher.cu) target_include_directories(matcher PUBLIC include) @@ -68,7 +75,7 @@ target_include_directories(cudamapper $ ) -target_link_libraries(cudamapper utils index_gpu matcher matcher_gpu logging overlapper_triggerred cudamapper_utils) +target_link_libraries(cudamapper utils index_gpu index_gpu_two_indices matcher matcher_gpu logging overlapper_triggerred cudamapper_utils) # Add tests folder add_subdirectory(tests) diff --git a/cudamapper/src/index_gpu_two_indices.cu b/cudamapper/src/index_gpu_two_indices.cu new file mode 100644 index 000000000..e69de29bb diff --git a/cudamapper/src/index_gpu_two_indices.cuh b/cudamapper/src/index_gpu_two_indices.cuh new file mode 100644 index 000000000..e40bd410d --- /dev/null +++ b/cudamapper/src/index_gpu_two_indices.cuh @@ -0,0 +1,311 @@ +/* +* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +* +* NVIDIA CORPORATION and its licensors retain all intellectual property +* and proprietary rights in and to this software, related documentation +* and any modifications thereto. Any use, reproduction, disclosure or +* distribution of this software and related documentation without an express +* license agreement from NVIDIA CORPORATION is strictly prohibited. +*/ + +#pragma once + +#include + +#include +#include +#include + +#include "claragenomics/cudamapper/types.hpp" +#include +#include +#include + +namespace claragenomics +{ +namespace cudamapper +{ +/// IndexGPU - Contains sketch elements grouped by representation and by read id within the representation +/// +/// Class contains four separate data arrays: representations, read_ids, positions_in_reads and directions_of_reads. +/// Elements of these four arrays with the same index represent one sketch element +/// (representation, read_id of the read it belongs to, position in that read of the first basepair of sketch element and whether it is forward or reverse complement representation). +/// +/// Elements of data arrays are grouped by sketch element representation and within those groups by read_id. Both representations and read_ids within representations are sorted in ascending order +/// +/// \tparam SketchElementImpl any implementation of SketchElement +template +class IndexGPUTwoIndices +{ +public: + /// \brief Constructor + /// + /// \param parser parser for the whole input file (part that goes into this index is determined by first_read_id and past_the_last_read_id) + /// \param first_read_id read_id of the first read to the included in this index + /// \param past_the_last_read_id read_id+1 of the last read to be included in this index + /// \param kmer_size k - the kmer length + /// \param window_size w - the length of the sliding window used to find sketch elements + IndexGPUTwoIndices(io::FastaParser* parser, + const read_id_t first_read_id, + const read_id_t past_the_last_read_id, + const std::uint64_t kmer_size, + const std::uint64_t window_size); + + /// \brief Constructor + IndexGPUTwoIndices(); + + /// \brief returns an array of representations of sketch elements + /// \return an array of representations of sketch elements + const thrust::device_vector& representations() const; + + /// \brief returns an array of starting positions of sketch elements in their reads + /// \return an array of starting positions of sketch elements in their reads + const thrust::device_vector& positions_in_reads() const; + + /// \brief returns an array of reads ids for sketch elements + /// \return an array of reads ids for sketch elements + const thrust::device_vector& read_ids() const; + + /// \brief returns an array of directions in which sketch elements were read + /// \return an array of directions in which sketch elements were read + const thrust::device_vector& directions_of_reads() const; + + /// \brief returns read name of read with the given read_id + /// \param read_id + /// \return read name of read with the given read_id + const std::string& read_id_to_read_name(const read_id_t read_id) const; + + /// \brief returns read length for the read with the gived read_id + /// \param read_id + /// \return read length for the read with the gived read_id + const std::uint32_t& read_id_to_read_length(const read_id_t read_id) const; + + /// \brief returns number of reads in input data + /// \return number of reads in input data + std::uint64_t number_of_reads() const; + +private: + /// \brief generates the index + void generate_index(io::FastaParser* query_parser, + const read_id_t first_read_id, + const read_id_t past_the_last_read_id); + + thrust::device_vector representations_d_; + thrust::device_vector positions_in_reads_d_; + thrust::device_vector read_ids_d_; + thrust::device_vector directions_of_reads_d_; + + std::vector read_id_to_read_name_; + std::vector read_id_to_read_length_; + + const read_id_t first_read_id_; + const std::uint64_t kmer_size_; + const std::uint64_t window_size_; + std::uint64_t number_of_reads_; +}; + +template +IndexGPUTwoIndices::IndexGPUTwoIndices(io::FastaParser* parser, + const read_id_t first_read_id, + const read_id_t past_the_last_read_id, + const std::uint64_t kmer_size, + const std::uint64_t window_size) + : first_read_id_(first_read_id) + , kmer_size_(kmer_size) + , window_size_(window_size) + , number_of_reads_(0) +{ + generate_index(parser, + first_read_id_, + past_the_last_read_id); +} + +template +IndexGPUTwoIndices::IndexGPUTwoIndices() +{ +} + +template +const thrust::device_vector& IndexGPUTwoIndices::representations() const +{ + return representations_d_; +}; + +template +const thrust::device_vector& IndexGPUTwoIndices::positions_in_reads() const +{ + return positions_in_reads_d_; +} + +template +const thrust::device_vector& IndexGPUTwoIndices::read_ids() const +{ + return read_ids_d_; +} + +template +const thrust::device_vector& IndexGPUTwoIndices::directions_of_reads() const +{ + return directions_of_reads_d_; +} + +template +const std::string& IndexGPUTwoIndices::read_id_to_read_name(const read_id_t read_id) const +{ + return read_id_to_read_name_[read_id - first_read_id_]; +} + +template +const std::uint32_t& IndexGPUTwoIndices::read_id_to_read_length(const read_id_t read_id) const +{ + return read_id_to_read_length_[read_id - first_read_id_]; +} + +template +std::uint64_t IndexGPUTwoIndices::number_of_reads() const +{ + return number_of_reads_; +} + +template +void IndexGPUTwoIndices::generate_index(io::FastaParser* parser, + const read_id_t first_read_id, + const read_id_t past_the_last_read_id) +{ + + // check if there are any reads to process + if (first_read_id >= past_the_last_read_id) + { + CGA_LOG_INFO("No Sketch Elements to be added to index"); + number_of_reads_ = 0; + return; + } + + number_of_reads_ = past_the_last_read_id - first_read_id; + + std::uint64_t total_basepairs = 0; + std::vector read_id_to_basepairs_section_h; + std::vector fasta_reads; + + // deterine the number of basepairs in each read and assign read_id to each read + for (read_id_t read_id = first_read_id; read_id < past_the_last_read_id; ++read_id) + { + fasta_reads.emplace_back(parser->get_sequence_by_id(read_id)); + const std::string& read_basepairs = fasta_reads.back().seq; + const std::string& read_name = fasta_reads.back().name; + if (read_basepairs.length() >= window_size_ + kmer_size_ - 1) + { + read_id_to_basepairs_section_h.emplace_back(ArrayBlock{total_basepairs, static_cast(read_basepairs.length())}); + total_basepairs += read_basepairs.length(); + read_id_to_read_name_.push_back(read_name); + read_id_to_read_length_.push_back(read_basepairs.length()); + } + else + { + // TODO: Implement this skipping in a correct manner + CGA_LOG_INFO("Skipping read {}. It has {} basepairs, one window covers {} basepairs", + read_name, + read_basepairs.length(), + window_size_ + kmer_size_ - 1); + } + } + + if (0 == total_basepairs) + { + CGA_LOG_INFO("Index for reads {} to past {} is empty", + first_read_id, + past_the_last_read_id); + number_of_reads_ = 0; + return; + } + + std::vector merged_basepairs_h(total_basepairs); + + // copy basepairs from each read into one big array + // read_id starts from first_read_id which can have an arbitrary value, local_read_id always starts from 0 + for (read_id_t local_read_id = 0; local_read_id < number_of_reads_; ++local_read_id) + { + const std::string& read_basepairs = fasta_reads[local_read_id].seq; + std::copy(std::begin(read_basepairs), + std::end(read_basepairs), + std::next(std::begin(merged_basepairs_h), read_id_to_basepairs_section_h[local_read_id].first_element_)); + } + fasta_reads.clear(); + fasta_reads.shrink_to_fit(); + + // move basepairs to the device + CGA_LOG_INFO("Allocating {} bytes for read_id_to_basepairs_section_d", read_id_to_basepairs_section_h.size() * sizeof(decltype(read_id_to_basepairs_section_h)::value_type)); + device_buffer read_id_to_basepairs_section_d(read_id_to_basepairs_section_h.size()); + CGA_CU_CHECK_ERR(cudaMemcpy(read_id_to_basepairs_section_d.data(), + read_id_to_basepairs_section_h.data(), + read_id_to_basepairs_section_h.size() * sizeof(decltype(read_id_to_basepairs_section_h)::value_type), + cudaMemcpyHostToDevice)); + + CGA_LOG_INFO("Allocating {} bytes for merged_basepairs_d", merged_basepairs_h.size() * sizeof(decltype(merged_basepairs_h)::value_type)); + device_buffer merged_basepairs_d(merged_basepairs_h.size()); + CGA_CU_CHECK_ERR(cudaMemcpy(merged_basepairs_d.data(), + merged_basepairs_h.data(), + merged_basepairs_h.size() * sizeof(decltype(merged_basepairs_h)::value_type), + cudaMemcpyHostToDevice)); + merged_basepairs_h.clear(); + merged_basepairs_h.shrink_to_fit(); + + // sketch elements get generated here + auto sketch_elements = SketchElementImpl::generate_sketch_elements(number_of_reads_, + kmer_size_, + window_size_, + first_read_id, + merged_basepairs_d, + read_id_to_basepairs_section_h, + read_id_to_basepairs_section_d); + device_buffer representations_d = std::move(sketch_elements.representations_d); + device_buffer rest_d = std::move(sketch_elements.rest_d); + + CGA_LOG_INFO("Deallocating {} bytes from read_id_to_basepairs_section_d", read_id_to_basepairs_section_d.size() * sizeof(decltype(read_id_to_basepairs_section_d)::value_type)); + read_id_to_basepairs_section_d.free(); + CGA_LOG_INFO("Deallocating {} bytes from merged_basepairs_d", merged_basepairs_d.size() * sizeof(decltype(merged_basepairs_d)::value_type)); + merged_basepairs_d.free(); + + // *** sort sketch elements by representation *** + // As this is a stable sort and the data was initailly grouper by read_id this means that the sketch elements within each representations are sorted by read_id + thrust::stable_sort_by_key(thrust::device, + representations_d.data(), + representations_d.data() + representations_d.size(), + rest_d.data()); + + // copy the data to member functions (depending on the interface desing these copies might not be needed) + representations_d_.resize(representations_d.size()); + representations_d_.shrink_to_fit(); + thrust::copy(thrust::device, + representations_d.data(), + representations_d.data() + representations_d.size(), + representations_d_.begin()); + representations_d.free(); + + // TODO: implement this on GPU + thrust::device_vector rest_d_thrust(rest_d.data(), rest_d.data() + rest_d.size()); + rest_d.free(); + rest_d_thrust.shrink_to_fit(); + + thrust::host_vector rest_h(rest_d_thrust); + rest_d_thrust.clear(); + rest_d_thrust.shrink_to_fit(); + + thrust::host_vector positions_in_reads_h(rest_h.size()); + thrust::host_vector read_ids_h(rest_h.size()); + thrust::host_vector directions_of_reads_h(rest_h.size()); + + for (std::size_t i = 0; i < rest_h.size(); ++i) + { + read_ids_h[i] = rest_h[i].read_id_; + positions_in_reads_h[i] = rest_h[i].position_in_read_; + directions_of_reads_h[i] = typename SketchElementImpl::DirectionOfRepresentation(rest_h[i].direction_); + } + + read_ids_d_ = read_ids_h; + positions_in_reads_d_ = positions_in_reads_h; + directions_of_reads_d_ = directions_of_reads_h; +} + +} // namespace cudamapper +} // namespace claragenomics \ No newline at end of file diff --git a/cudamapper/tests/CMakeLists.txt b/cudamapper/tests/CMakeLists.txt index 044317476..9511adbff 100644 --- a/cudamapper/tests/CMakeLists.txt +++ b/cudamapper/tests/CMakeLists.txt @@ -14,6 +14,7 @@ project(cudamappertests) set(SOURCES main.cpp Test_CudamapperIndexGPU.cu + Test_CudamapperIndexGPUTwoIndices.cu Test_CudamapperMatcher.cu Test_CudamapperMatcherGPU.cu Test_CudamapperMinimizer.cpp @@ -26,6 +27,7 @@ include_directories(${cudamapper_data_include_dir}) set(LIBS bioparser index_gpu + index_gpu_two_indices matcher matcher_gpu overlapper_triggerred diff --git a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu new file mode 100644 index 000000000..8ab596e70 --- /dev/null +++ b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu @@ -0,0 +1,768 @@ +/* +* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +* +* NVIDIA CORPORATION and its licensors retain all intellectual property +* and proprietary rights in and to this software, related documentation +* and any modifications thereto. Any use, reproduction, disclosure or +* distribution of this software and related documentation without an express +* license agreement from NVIDIA CORPORATION is strictly prohibited. +*/ + +#include "gtest/gtest.h" + +#include +#include + +#include "cudamapper_file_location.hpp" +#include "../src/index_gpu_two_indices.cuh" +#include "../src/minimizer.hpp" + +namespace claragenomics +{ +namespace cudamapper +{ + +void test_function(const std::string& filename, + const read_id_t first_read_id, + const read_id_t past_the_last_read_id, + const std::uint64_t kmer_size, + const std::uint64_t window_size, + const thrust::host_vector& expected_representations, + const thrust::host_vector& expected_positions_in_reads, + const thrust::host_vector& expected_read_ids, + const thrust::host_vector& expected_directions_of_reads, + const std::vector& expected_read_id_to_read_name, + const std::vector& expected_read_id_to_read_length, + const std::uint64_t expected_number_of_reads) +{ + std::unique_ptr parser = io::create_fasta_parser(filename); + IndexGPUTwoIndices index(parser.get(), + first_read_id, + past_the_last_read_id, + kmer_size, + window_size); + + ASSERT_EQ(index.number_of_reads(), expected_number_of_reads); + if (0 == expected_number_of_reads) + { + return; + } + + ASSERT_EQ(expected_number_of_reads, expected_read_id_to_read_name.size()); + ASSERT_EQ(expected_number_of_reads, expected_read_id_to_read_length.size()); + for (read_id_t read_id = first_read_id; read_id < past_the_last_read_id; ++read_id) + { + ASSERT_EQ(index.read_id_to_read_length(read_id), expected_read_id_to_read_length[read_id - first_read_id]) << "read_id: " << read_id; + ASSERT_EQ(index.read_id_to_read_name(read_id), expected_read_id_to_read_name[read_id - first_read_id]) << "read_id: " << read_id; + } + + // check arrays + const thrust::device_vector& representations_d = index.representations(); + const thrust::device_vector& positions_in_reads_d = index.positions_in_reads(); + const thrust::device_vector& read_ids_d = index.read_ids(); + const thrust::device_vector& directions_of_reads_d = index.directions_of_reads(); + const thrust::host_vector& representations_h(representations_d); + const thrust::host_vector& positions_in_reads_h(positions_in_reads_d); + const thrust::host_vector& read_ids_h(read_ids_d); + const thrust::host_vector& directions_of_reads_h(directions_of_reads_d); + ASSERT_EQ(representations_h.size(), expected_representations.size()); + ASSERT_EQ(positions_in_reads_h.size(), expected_positions_in_reads.size()); + ASSERT_EQ(read_ids_h.size(), expected_read_ids.size()); + ASSERT_EQ(directions_of_reads_h.size(), expected_directions_of_reads.size()); + ASSERT_EQ(representations_h.size(), positions_in_reads_h.size()); + ASSERT_EQ(positions_in_reads_h.size(), read_ids_h.size()); + ASSERT_EQ(read_ids_h.size(), directions_of_reads_h.size()); + for (std::size_t i = 0; i < expected_positions_in_reads.size(); ++i) + { + EXPECT_EQ(representations_h[i], expected_representations[i]) << "i: " << i; + EXPECT_EQ(positions_in_reads_h[i], expected_positions_in_reads[i]) << "i: " << i; + EXPECT_EQ(read_ids_h[i], expected_read_ids[i]) << "i: " << i; + EXPECT_EQ(directions_of_reads_h[i], expected_directions_of_reads[i]) << "i: " << i; + } +} + +TEST(TestCudamapperIndexGPUTwoIndices, GATT_4_1) +{ + // >read_0 + // GATT + + // GATT = 0b10001111 + // AATC = 0b00001101 <- minimizer + + const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/gatt.fasta"; + const std::uint64_t minimizer_size = 4; + const std::uint64_t window_size = 1; + + std::vector expected_read_id_to_read_name; + expected_read_id_to_read_name.push_back("read_0"); + + std::vector expected_read_id_to_read_length; + expected_read_id_to_read_length.push_back(4); + + std::vector expected_representations; + std::vector expected_positions_in_reads; + std::vector expected_read_ids; + std::vector expected_directions_of_reads; + expected_representations.push_back(0b1101); + expected_positions_in_reads.push_back(0); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); + + test_function(filename, + 0, + 1, + minimizer_size, + window_size, + expected_representations, + expected_positions_in_reads, + expected_read_ids, + expected_directions_of_reads, + expected_read_id_to_read_name, + expected_read_id_to_read_length, + 1); +} + +TEST(TestCudamapperIndexGPUTwoIndices, GATT_2_3) +{ + // >read_0 + // GATT + + // kmer representation: forward, reverse + // GA: <20> 31 + // AT: <03> 03 + // TT: 33 <00> + + // front end minimizers: representation, position_in_read, direction, read_id + // GA : 20 0 F 0 + // GAT: 03 1 F 0 + + // central minimizers + // GATT: 00 2 R 0 + + // back end minimizers + // ATT: 00 2 R 0 + // TT : 00 2 R 0 + + // All minimizers: GA(0f), AT(1f), AA(2r) + + // (2r1) means position 2, reverse direction, read 1 + // (1,2) means array block start at element 1 and has 2 elements + + // 0 1 2 + // data arrays: GA(0f0), AT(1f0), AA(2r0) + + const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/gatt.fasta"; + const std::uint64_t minimizer_size = 2; + const std::uint64_t window_size = 3; + + std::vector expected_read_id_to_read_name; + expected_read_id_to_read_name.push_back("read_0"); + + std::vector expected_read_id_to_read_length; + expected_read_id_to_read_length.push_back(4); + + std::vector expected_representations; + std::vector expected_positions_in_reads; + std::vector expected_read_ids; + std::vector expected_directions_of_reads; + + expected_representations.push_back(0b0000); // AA(2r0) + expected_positions_in_reads.push_back(2); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); + expected_representations.push_back(0b0011); // AT(1f0) + expected_positions_in_reads.push_back(1); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b1000); // GA(0f0) + expected_positions_in_reads.push_back(0); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + + test_function(filename, + 0, + 1, + minimizer_size, + window_size, + expected_representations, + expected_positions_in_reads, + expected_read_ids, + expected_directions_of_reads, + expected_read_id_to_read_name, + expected_read_id_to_read_length, + 1); +} + +TEST(TestCudamapperIndexGPUTwoIndices, CCCATACC_2_8) +{ + // *** Read is shorter than one full window, the result should be empty *** + + // >read_0 + // CCCATACC + + const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/cccatacc.fasta"; + const std::uint64_t minimizer_size = 2; + const std::uint64_t window_size = 8; + + // all data arrays should be empty + + std::vector expected_read_id_to_read_name; + + std::vector expected_read_id_to_read_length; + + std::vector expected_representations; + std::vector expected_positions_in_reads; + std::vector expected_read_ids; + std::vector expected_directions_of_reads; + + test_function(filename, + 0, + 1, + minimizer_size, + window_size, + expected_representations, + expected_positions_in_reads, + expected_read_ids, + expected_directions_of_reads, + expected_read_id_to_read_name, + expected_read_id_to_read_length, + 0); +} + +// TODO: Cover this case as well +/*TEST(TestCudamapperIndexGPUTwoIndices, CATCAAG_AAGCTA_3_5) +{ + // *** One Read is shorter than one full window, the other is not *** + + // >read_0 + // CATCAAG + // >read_1 + // AAGCTA + + // ** CATCAAG ** + + // kmer representation: forward, reverse + // CAT: 103 <032> + // ATC: <031> 203 + // TCA: <310> 320 + // CAA: <100> 332 + // AAG: <002> 133 + + // front end minimizers: representation, position_in_read, direction, read_id + // CAT : 032 0 R 0 + // CATC : 031 1 F 0 + // CATCA : 031 1 F 0 + // CATCAA: 031 1 F 0 + + // central minimizers + // CATCAAG: 002 4 F 0 + + // back end minimizers + // ATCAAG: 002 4 F 0 + // TCAAG : 002 4 F 0 + // CAAG : 002 4 F 0 + // AAG : 002 4 F 0 + + // ** AAGCTA ** + // ** read does not fit one array ** + + // All minimizers: ATG(0r0), ATC(1f0), AAG(4f0) + + // (2r1) means position 2, reverse direction, read 1 + // (1,2) means array block start at element 1 and has 2 elements + + // 0 1 2 + // data arrays: AAG(4f0), ATC(1f0), ATG(0r0) + + const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/catcaag_aagcta.fasta"; + const std::uint64_t minimizer_size = 3; + const std::uint64_t window_size = 5; + + std::vector expected_read_id_to_read_name; + expected_read_id_to_read_name.push_back("read_0"); + + std::vector expected_read_id_to_read_length; + expected_read_id_to_read_length.push_back(7); + + std::vector expected_representations; + std::vector expected_positions_in_reads; + std::vector expected_read_ids; + std::vector expected_directions_of_reads; + expected_representations.push_back(0b000010); // AAG(4f0) + expected_positions_in_reads.push_back(4); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b001101); // ATC(1f0) + expected_positions_in_reads.push_back(1); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b001110); // ATG(0r0) + expected_positions_in_reads.push_back(0); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); + + test_function(filename, + 0, + 2, + minimizer_size, + window_size, + expected_representations, + expected_positions_in_reads, + expected_read_ids, + expected_directions_of_reads, + expected_read_id_to_read_name, + expected_read_id_to_read_length, + 1); // <- only one read goes into index, the other is too short +}*/ + +TEST(TestCudamapperIndexGPUTwoIndices, CCCATACC_3_5) +{ + // >read_0 + // CCCATACC + + // ** CCCATAC ** + + // kmer representation: forward, reverse + // CCC: <111> 222 + // CCA: <110> 322 + // CAT: 103 <032> + // ATA: <030> 303 + // TAC: 301 <230> + // ACC: <011> 223 + + // front end minimizers: representation, position_in_read, direction + // CCC : 111 0 F + // CCCA : 110 1 F + // CCCAT : 032 2 R + // CCCATA: 030 3 F + + // central minimizers + // CCCATAC: 030 3 F + // CCATACC: 011 5 F + + // back end minimizers + // CATACC: 011 5 F + // ATACC : 011 5 F + // TACC : 011 5 F + // ACC : 011 5 F + + // All minimizers: CCC(0f), CCA(1f), ATG(2r), ATA(3f), ACC(5f) + + // (2r1) means position 2, reverse direction, read 1 + // (1,2) means array block start at element 1 and has 2 elements + + // 0 1 2 + // data arrays: ACC(5f0), ATA(3f0), ATG(2r0), CCA(1f0), CCC(0f0) + + const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/cccatacc.fasta"; + const std::uint64_t minimizer_size = 3; + const std::uint64_t window_size = 5; + + std::vector expected_read_id_to_read_name; + expected_read_id_to_read_name.push_back("read_0"); + + std::vector expected_read_id_to_read_length; + expected_read_id_to_read_length.push_back(8); + + std::vector expected_representations; + std::vector expected_positions_in_reads; + std::vector expected_read_ids; + std::vector expected_directions_of_reads; + expected_representations.push_back(0b000101); // ACC(5f0) + expected_positions_in_reads.push_back(5); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b001100); // ATA(3f0) + expected_positions_in_reads.push_back(3); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b001110); // ATG(2r0) + expected_positions_in_reads.push_back(2); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); + expected_representations.push_back(0b010100); // CCA(1f0) + expected_positions_in_reads.push_back(1); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b010101); // CCC(0f0) + expected_positions_in_reads.push_back(0); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + + test_function(filename, + 0, + 1, + minimizer_size, + window_size, + expected_representations, + expected_positions_in_reads, + expected_read_ids, + expected_directions_of_reads, + expected_read_id_to_read_name, + expected_read_id_to_read_length, + 1); +} + +TEST(TestCudamapperIndexGPUTwoIndices, CATCAAG_AAGCTA_3_2) +{ + // >read_0 + // CATCAAG + // >read_1 + // AAGCTA + + // ** CATCAAG ** + + // kmer representation: forward, reverse + // CAT: 103 <032> + // ATC: <031> 203 + // TCA: <310> 320 + // CAA: <100> 332 + // AAG: <002> 133 + + // front end minimizers: representation, position_in_read, direction, read_id + // CAT: 032 0 R 0 + + // central minimizers + // CATC: 031 1 F 0 + // ATCA: 031 1 F 0 + // TCAA: 100 3 F 0 + // CAAG: 002 4 F 0 + + // back end minimizers + // AAG: 002 4 F 0 + + // All minimizers: ATC(1f), CAA(3f), AAG(4f), ATG(0r) + + // ** AAGCTA ** + + // kmer representation: forward, reverse + // AAG: <002> 133 + // AGC: <021> 213 + // GCT: 213 <021> + // CTA: <130> 302 + + // front end minimizers: representation, position_in_read, direction, read_id + // AAG: 002 0 F 1 + + // central minimizers + // AAGC: 002 0 F 1 + // AGCT: 021 2 R 1 // only the last minimizer is saved + // GCTA: 021 2 R 1 + + // back end minimizers + // CTA: 130 3 F 1 + + // All minimizers: AAG(0f), AGC(1f), CTA(3f) + + // (2r1) means position 2, reverse direction, read 1 + // (1,2) means array block start at element 1 and has 2 elements + + // 0 1 2 3 4 5 6 + // data arrays: AAG(4f0), AAG(0f1), AGC(2r1), ATC(1f0), ATG(0r0), CAA(3f0), CTA(3f1) + + const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/catcaag_aagcta.fasta"; + const std::uint64_t minimizer_size = 3; + const std::uint64_t window_size = 2; + + std::vector expected_read_id_to_read_name; + expected_read_id_to_read_name.push_back("read_0"); + expected_read_id_to_read_name.push_back("read_1"); + + std::vector expected_read_id_to_read_length; + expected_read_id_to_read_length.push_back(7); + expected_read_id_to_read_length.push_back(6); + + std::vector expected_representations; + std::vector expected_positions_in_reads; + std::vector expected_read_ids; + std::vector expected_directions_of_reads; + + expected_representations.push_back(0b000010); // AAG(4f0) + expected_positions_in_reads.push_back(4); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b000010); // AAG(0f1) + expected_positions_in_reads.push_back(0); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b001001); // AGC(2r1) + expected_positions_in_reads.push_back(2); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); + expected_representations.push_back(0b001101); // ATC(1f0) + expected_positions_in_reads.push_back(1); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b001110); // ATG(0r0) + expected_positions_in_reads.push_back(0); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); + expected_representations.push_back(0b010000); // CAA(3f0) + expected_positions_in_reads.push_back(3); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b011100); // CTA(3f1) + expected_positions_in_reads.push_back(3); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + + test_function(filename, + 0, + 2, + minimizer_size, + window_size, + expected_representations, + expected_positions_in_reads, + expected_read_ids, + expected_directions_of_reads, + expected_read_id_to_read_name, + expected_read_id_to_read_length, + 2); +} + +TEST(TestCudamapperIndexGPUTwoIndices, AAAACTGAA_GCCAAAG_2_3) +{ + // >read_0 + // AAAACTGAA + // >read_1 + // GCCAAAG + + // ** AAAACTGAA ** + + // kmer representation: forward, reverse + // AA: <00> 33 + // AA: <00> 33 + // AA: <00> 33 + // AC: <01> 23 + // CT: 13 <02> + // TG: 32 <10> + // GA: <20> 31 + // AA: <00> 33 + + // front end minimizers: representation, position_in_read, direction, read_id + // AA : 00 0 F 0 + // AAA: 00 1 F 0 + + // central minimizers + // AAAA: 00 2 F 0 + // AAAC: 00 2 F 0 + // AACT: 00 2 F 0 + // ACTG: 01 3 F 0 + // CTGA: 02 4 R 0 + // TGAA: 00 7 F 0 + + // back end minimizers + // GAA: 00 7 F 0 + // AA : 00 7 F 0 + + // All minimizers: AA(0f), AA(1f), AA(2f), AC(3f), AG(4r), AA (7f) + + // ** GCCAAAG ** + + // kmer representation: forward, reverse + // GC: <21> 21 + // CC: <11> 22 + // CA: <10> 32 + // AA: <00> 33 + // AA: <00> 33 + // AG: <03> 21 + + // front end minimizers: representation, position_in_read, direction, read_id + // GC : 21 0 F 0 + // GCC: 11 1 F 0 + + // central minimizers + // GCCA: 10 2 F 0 + // CCAA: 00 3 F 0 + // CAAA: 00 4 F 0 + // AAAG: 00 4 F 0 + + // back end minimizers + // AAG: 00 4 F 0 + // AG : 03 5 F 0 + + // All minimizers: GC(0f), CC(1f), CA(2f), AA(3f), AA(4f), AG(5f) + + // (2r1) means position 2, reverse direction, read 1 + // (1,2) means array block start at element 1 and has 2 elements + + // 0 1 2 3 4 5 6 7 8 9 10 11 + // data arrays: AA(0f0), AA(1f0), AA(2f0), AA(7f0), AA(3f1), AA(4f1), AC(3f0), AG(4r0), AG(5f1), CA(2f1), CC(1f1), GC(0f1) + + const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/aaaactgaa_gccaaag.fasta"; + const std::uint64_t minimizer_size = 2; + const std::uint64_t window_size = 3; + + std::vector expected_read_id_to_read_name; + expected_read_id_to_read_name.push_back("read_0"); + expected_read_id_to_read_name.push_back("read_1"); + + std::vector expected_read_id_to_read_length; + expected_read_id_to_read_length.push_back(9); + expected_read_id_to_read_length.push_back(7); + + std::vector expected_representations; + std::vector expected_positions_in_reads; + std::vector expected_read_ids; + std::vector expected_directions_of_reads; + expected_representations.push_back(0b0000); // AA(0f0) + expected_positions_in_reads.push_back(0); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b0000); // AA(1f0) + expected_positions_in_reads.push_back(1); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b0000); // AA(2f0) + expected_positions_in_reads.push_back(2); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b0000); // AA(7f0) + expected_positions_in_reads.push_back(7); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b0000); // AA(3f1) + expected_positions_in_reads.push_back(3); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b0000); // AA(4f1) + expected_positions_in_reads.push_back(4); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b0001); // AC(3f0) + expected_positions_in_reads.push_back(3); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b0010); // AG(4r0) + expected_positions_in_reads.push_back(4); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); + expected_representations.push_back(0b0010); // AG(5f1) + expected_positions_in_reads.push_back(5); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b0100); // CA(2f1) + expected_positions_in_reads.push_back(2); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b0101); // CC(1f1) + expected_positions_in_reads.push_back(1); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b1001); // GC(0f1) + expected_positions_in_reads.push_back(0); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + + test_function(filename, + 0, + 2, + minimizer_size, + window_size, + expected_representations, + expected_positions_in_reads, + expected_read_ids, + expected_directions_of_reads, + expected_read_id_to_read_name, + expected_read_id_to_read_length, + 2); +} + +TEST(TestCudamapperIndexGPUTwoIndices, AAAACTGAA_GCCAAAG_2_3_only_second_read_in_index) +{ + // >read_0 + // AAAACTGAA + // >read_1 + // GCCAAAG + + // ** AAAACTGAA ** + // only second read goes into index + + // ** GCCAAAG ** + + // kmer representation: forward, reverse + // GC: <21> 21 + // CC: <11> 22 + // CA: <10> 32 + // AA: <00> 33 + // AA: <00> 33 + // AG: <03> 21 + + // front end minimizers: representation, position_in_read, direction, read_id + // GC : 21 0 F 0 + // GCC: 11 1 F 0 + + // central minimizers + // GCCA: 10 2 F 0 + // CCAA: 00 3 F 0 + // CAAA: 00 4 F 0 + // AAAG: 00 4 F 0 + + // back end minimizers + // AAG: 00 4 F 0 + // AG : 03 5 F 0 + + // All minimizers: GC(0f), CC(1f), CA(2f), AA(3f), AA(4f), AG(5f) + + // (2r1) means position 2, reverse direction, read 1 + // (1,2) means array block start at element 1 and has 2 elements + + // 0 1 2 3 4 5 + // data arrays: AA(3f1), AA(4f1), AG(5f1), CA(2f1), CC(1f1), GC(0f1) + + const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/aaaactgaa_gccaaag.fasta"; + const std::uint64_t minimizer_size = 2; + const std::uint64_t window_size = 3; + + // only take second read + std::vector expected_read_id_to_read_name; + expected_read_id_to_read_name.push_back("read_1"); + + std::vector expected_read_id_to_read_length; + expected_read_id_to_read_length.push_back(7); + + std::vector expected_representations; + std::vector expected_positions_in_reads; + std::vector expected_read_ids; + std::vector expected_directions_of_reads; + expected_representations.push_back(0b0000); // AA(3f1) + expected_positions_in_reads.push_back(3); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b0000); // AA(4f1) + expected_positions_in_reads.push_back(4); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b0010); // AG(5f1) + expected_positions_in_reads.push_back(5); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b0100); // CA(2f1) + expected_positions_in_reads.push_back(2); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b0101); // CC(1f1) + expected_positions_in_reads.push_back(1); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b1001); // GC(0f1) + expected_positions_in_reads.push_back(0); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + + test_function(filename, + 1, // <- only take second read + 2, + minimizer_size, + window_size, + expected_representations, + expected_positions_in_reads, + expected_read_ids, + expected_directions_of_reads, + expected_read_id_to_read_name, + expected_read_id_to_read_length, + 1); +} + +} // namespace cudamapper +} // namespace claragenomics \ No newline at end of file From 591000416a469603f5b98b8a04a94648f0c76248 Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Tue, 5 Nov 2019 15:00:51 +0100 Subject: [PATCH 024/128] [cudamapper] Added new computation of number of anchors --- cudamapper/src/matcher_gpu.cu | 20 +++++ cudamapper/src/matcher_gpu.cuh | 28 +++++++ cudamapper/tests/Test_CudamapperMatcherGPU.cu | 74 +++++++++++++++++++ 3 files changed, 122 insertions(+) diff --git a/cudamapper/src/matcher_gpu.cu b/cudamapper/src/matcher_gpu.cu index dfec1421e..ac9eaf72f 100644 --- a/cudamapper/src/matcher_gpu.cu +++ b/cudamapper/src/matcher_gpu.cu @@ -11,6 +11,7 @@ #include "matcher_gpu.cuh" #include +#include #include #include @@ -95,6 +96,25 @@ void find_query_target_matches(thrust::device_vector& found_target find_query_target_matches_kernel<<>>(found_target_indices_d.data().get(), query_representations_d.data().get(), get_size(query_representations_d), target_representations_d.data().get(), get_size(target_representations_d)); } +std::int64_t compute_number_of_anchors(const thrust::device_vector query_starting_index_of_each_representation_d, const thrust::device_vector& found_target_indices_d, const thrust::device_vector target_starting_index_of_each_representation_d) +{ + assert(query_starting_index_of_each_representation_d.size() == found_target_indices_d.size() + 1); + const std::uint32_t* const query_starting_indices = query_starting_index_of_each_representation_d.data().get(); + const std::uint32_t* const target_starting_indices = target_starting_index_of_each_representation_d.data().get(); + const std::int64_t* const found_target_indices = found_target_indices_d.data().get(); + + return thrust::transform_reduce(thrust::make_counting_iterator(std::int64_t(0)), thrust::make_counting_iterator(get_size(query_starting_index_of_each_representation_d) - 1), + [query_starting_indices, target_starting_indices, found_target_indices] __device__(std::uint32_t query_index) -> std::int64_t { + std::int32_t n_queries_with_representation = query_starting_indices[query_index + 1] - query_starting_indices[query_index]; + std::int64_t target_index = found_target_indices[query_index]; + std::int32_t n_targets_with_representation = 0; + if (target_index >= 0) + n_targets_with_representation = target_starting_indices[target_index + 1] - target_starting_indices[target_index]; + return n_queries_with_representation * n_targets_with_representation; + }, + std::int64_t(0), thrust::plus()); +} + __global__ void create_new_value_mask(const representation_t* const representations_d, const std::size_t number_of_elements, std::uint32_t* const new_value_mask_d) diff --git a/cudamapper/src/matcher_gpu.cuh b/cudamapper/src/matcher_gpu.cuh index 6072e351e..2b5b785be 100644 --- a/cudamapper/src/matcher_gpu.cuh +++ b/cudamapper/src/matcher_gpu.cuh @@ -82,6 +82,34 @@ thrust::device_vector find_first_occurrences_of_representations(c /// \param target_representations_d An sorted array of target representations void find_query_target_matches(thrust::device_vector& found_target_indices_d, const thrust::device_vector& query_representations_d, const thrust::device_vector& target_representations_d); +/// \brief Computes the number of anchors for matches in a query and target arrays. +/// +/// Takes the arrays which store the positions of the first occurrences the different representations +/// in the query and target representation arrays (see find_first_occurrences_of_representations) +/// and the array with the found matches (see find_query_target_matches) and computes the total number +/// of anchors which can be computed from the query and target arrays. +/// The number of anchors is the number of all-to-all combinations of the matching representations in query and target. +/// For example: +/// query: +/// representation: 0 12 23 32 46 +/// starting index: 0 4 10 13 18 21 +/// target: +/// representation: 5 12 16 23 24 25 46 +/// starting index: 0 3 7 9 13 16 18 21 +/// +/// found_target_indicies_d: (matching representations: 12, 23, 46) +/// array-index: 0 1 2 3 4 +/// target-index: -1 1 3 -1 6 (-1 indicates no matching representation in target) +/// +/// gives: +/// (10-4)*(7-3) + (13-10)*(13-9) + (21-18)*(21-18) = 45 +/// +/// \param query_starting_index_of_each_representation_d +/// \param found_target_indices_d +/// \param target_starting_index_of_each_representation_d +/// \return The number of anchors which can be generated from the query and target arrays +std::int64_t compute_number_of_anchors(const thrust::device_vector query_starting_index_of_each_representation_d, const thrust::device_vector& found_target_indices_d, const thrust::device_vector target_starting_index_of_each_representation_d); + /// \brief Writes 0 to the output array if the value to the left is the same as the current value, 1 otherwise. First element is always 1 /// /// For example: diff --git a/cudamapper/tests/Test_CudamapperMatcherGPU.cu b/cudamapper/tests/Test_CudamapperMatcherGPU.cu index 4f4eb0bf9..78f29680c 100644 --- a/cudamapper/tests/Test_CudamapperMatcherGPU.cu +++ b/cudamapper/tests/Test_CudamapperMatcherGPU.cu @@ -352,5 +352,79 @@ TEST(TestCudamapperMatcherGPU, test_query_target_matches_large_example) test_find_query_target_matches(query_representations_h, target_representations_h, expected_found_target_indices_h); } +void test_compute_number_of_anchors(const thrust::host_vector& query_starting_index_of_each_representation_h, + const thrust::host_vector& found_target_indices_h, + const thrust::host_vector& target_starting_index_of_each_representation_h, + const std::int64_t expected_n_anchors) +{ + const thrust::device_vector query_starting_index_of_each_representation_d(query_starting_index_of_each_representation_h); + const thrust::device_vector target_starting_index_of_each_representation_d(target_starting_index_of_each_representation_h); + const thrust::device_vector found_target_indices_d(found_target_indices_h); + + const std::int64_t n_anchors = details::matcher_gpu::compute_number_of_anchors(query_starting_index_of_each_representation_d, found_target_indices_d, target_starting_index_of_each_representation_d); + + EXPECT_EQ(n_anchors, expected_n_anchors); +} + +TEST(TestCudamapperMatcherGPU, test_compute_number_of_anchors_small_example) +{ + thrust::host_vector query_starting_index_of_each_representation_h; + query_starting_index_of_each_representation_h.push_back(0); + query_starting_index_of_each_representation_h.push_back(4); + query_starting_index_of_each_representation_h.push_back(10); + query_starting_index_of_each_representation_h.push_back(13); + query_starting_index_of_each_representation_h.push_back(18); + query_starting_index_of_each_representation_h.push_back(21); + + thrust::host_vector target_starting_index_of_each_representation_h; + target_starting_index_of_each_representation_h.push_back(0); + target_starting_index_of_each_representation_h.push_back(3); + target_starting_index_of_each_representation_h.push_back(7); + target_starting_index_of_each_representation_h.push_back(9); + target_starting_index_of_each_representation_h.push_back(13); + target_starting_index_of_each_representation_h.push_back(16); + target_starting_index_of_each_representation_h.push_back(18); + target_starting_index_of_each_representation_h.push_back(21); + + thrust::host_vector found_target_indices_h; + found_target_indices_h.push_back(-1); + found_target_indices_h.push_back(1); + found_target_indices_h.push_back(3); + found_target_indices_h.push_back(-1); + found_target_indices_h.push_back(6); + + const int64_t expected_n_anchors = 45; + + test_compute_number_of_anchors(query_starting_index_of_each_representation_h, + found_target_indices_h, + target_starting_index_of_each_representation_h, + expected_n_anchors); +} + +TEST(TestCudamapperMatcherGPU, test_compute_number_of_anchors_large_example) +{ + const std::int64_t length = 100000; + + thrust::host_vector query_starting_index_of_each_representation_h; + thrust::host_vector target_starting_index_of_each_representation_h; + thrust::host_vector found_target_indices_h(length - 1, -1); + std::int64_t expected_n_anchors = 0; + for (std::int64_t i = 0; i < length; ++i) + { + query_starting_index_of_each_representation_h.push_back(2 * i); + target_starting_index_of_each_representation_h.push_back(10 * i + i % 10); + if (i % 3 == 0 && i < length - 1) + { + found_target_indices_h[i] = i; + expected_n_anchors += 2 * (10 + (i + 1) % 10 - i % 10); + } + } + + test_compute_number_of_anchors(query_starting_index_of_each_representation_h, + found_target_indices_h, + target_starting_index_of_each_representation_h, + expected_n_anchors); +} + } // namespace cudamapper } // namespace claragenomics From bf3bd815201ce19e4795dc2dcffdbeb6b5100c64 Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Tue, 5 Nov 2019 16:00:24 +0100 Subject: [PATCH 025/128] Splitting ReadidPositionDirection into several arrays without using the host memory --- cudamapper/src/index_gpu_two_indices.cuh | 93 +++++++---- .../Test_CudamapperIndexGPUTwoIndices.cu | 144 ++++++++++++++++++ 2 files changed, 205 insertions(+), 32 deletions(-) diff --git a/cudamapper/src/index_gpu_two_indices.cuh b/cudamapper/src/index_gpu_two_indices.cuh index e40bd410d..65aa5e210 100644 --- a/cudamapper/src/index_gpu_two_indices.cuh +++ b/cudamapper/src/index_gpu_two_indices.cuh @@ -20,6 +20,7 @@ #include #include #include +#include namespace claragenomics { @@ -58,14 +59,14 @@ public: /// \return an array of representations of sketch elements const thrust::device_vector& representations() const; - /// \brief returns an array of starting positions of sketch elements in their reads - /// \return an array of starting positions of sketch elements in their reads - const thrust::device_vector& positions_in_reads() const; - /// \brief returns an array of reads ids for sketch elements /// \return an array of reads ids for sketch elements const thrust::device_vector& read_ids() const; + /// \brief returns an array of starting positions of sketch elements in their reads + /// \return an array of starting positions of sketch elements in their reads + const thrust::device_vector& positions_in_reads() const; + /// \brief returns an array of directions in which sketch elements were read /// \return an array of directions in which sketch elements were read const thrust::device_vector& directions_of_reads() const; @@ -91,8 +92,8 @@ private: const read_id_t past_the_last_read_id); thrust::device_vector representations_d_; - thrust::device_vector positions_in_reads_d_; thrust::device_vector read_ids_d_; + thrust::device_vector positions_in_reads_d_; thrust::device_vector directions_of_reads_d_; std::vector read_id_to_read_name_; @@ -104,6 +105,41 @@ private: std::uint64_t number_of_reads_; }; +namespace details +{ +namespace index_gpu_two_indices +{ + +/// \brief Splits array of structs into one array per struct element +/// +/// \param rest_d original struct +/// \param positions_in_reads_d output array +/// \param read_ids_d output array +/// \param directions_of_reads_d output array +/// \param total_elements number of elements in each array +/// +/// \tparam ReadidPositionDirection any implementation of SketchElementImpl::ReadidPositionDirection +/// \tparam DirectionOfRepresentation any implementation of SketchElementImpl::SketchElementImpl::DirectionOfRepresentation +template +__global__ void copy_rest_to_separate_arrays(const ReadidPositionDirection* const rest_d, + read_id_t* const read_ids_d, + position_in_read_t* const positions_in_reads_d, + DirectionOfRepresentation* const directions_of_reads_d, + const std::size_t total_elements) +{ + auto i = blockIdx.x * blockDim.x + threadIdx.x; + + if (i >= total_elements) + return; + + read_ids_d[i] = rest_d[i].read_id_; + positions_in_reads_d[i] = rest_d[i].position_in_read_; + directions_of_reads_d[i] = DirectionOfRepresentation(rest_d[i].direction_); +} + +} // namespace index_gpu_two_indices +} // namespace details + template IndexGPUTwoIndices::IndexGPUTwoIndices(io::FastaParser* parser, const read_id_t first_read_id, @@ -132,15 +168,15 @@ const thrust::device_vector& IndexGPUTwoIndices -const thrust::device_vector& IndexGPUTwoIndices::positions_in_reads() const +const thrust::device_vector& IndexGPUTwoIndices::read_ids() const { - return positions_in_reads_d_; + return read_ids_d_; } template -const thrust::device_vector& IndexGPUTwoIndices::read_ids() const +const thrust::device_vector& IndexGPUTwoIndices::positions_in_reads() const { - return read_ids_d_; + return positions_in_reads_d_; } template @@ -282,29 +318,22 @@ void IndexGPUTwoIndices::generate_index(io::FastaParser* pars representations_d_.begin()); representations_d.free(); - // TODO: implement this on GPU - thrust::device_vector rest_d_thrust(rest_d.data(), rest_d.data() + rest_d.size()); - rest_d.free(); - rest_d_thrust.shrink_to_fit(); - - thrust::host_vector rest_h(rest_d_thrust); - rest_d_thrust.clear(); - rest_d_thrust.shrink_to_fit(); - - thrust::host_vector positions_in_reads_h(rest_h.size()); - thrust::host_vector read_ids_h(rest_h.size()); - thrust::host_vector directions_of_reads_h(rest_h.size()); - - for (std::size_t i = 0; i < rest_h.size(); ++i) - { - read_ids_h[i] = rest_h[i].read_id_; - positions_in_reads_h[i] = rest_h[i].position_in_read_; - directions_of_reads_h[i] = typename SketchElementImpl::DirectionOfRepresentation(rest_h[i].direction_); - } - - read_ids_d_ = read_ids_h; - positions_in_reads_d_ = positions_in_reads_h; - directions_of_reads_d_ = directions_of_reads_h; + read_ids_d_.resize(representations_d_.size()); + read_ids_d_.shrink_to_fit(); + positions_in_reads_d_.resize(representations_d_.size()); + positions_in_reads_d_.shrink_to_fit(); + directions_of_reads_d_.resize(representations_d_.size()); + directions_of_reads_d_.shrink_to_fit(); + + const std::uint32_t threads = 256; + const std::uint32_t blocks = ceiling_divide(representations_d_.size(), threads); + + details::index_gpu_two_indices::copy_rest_to_separate_arrays<<>>(rest_d.data(), + thrust::raw_pointer_cast(read_ids_d_.data()), + thrust::raw_pointer_cast(positions_in_reads_d_.data()), + thrust::raw_pointer_cast(directions_of_reads_d_.data()), + representations_d_.size()); + CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); } } // namespace cudamapper diff --git a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu index 8ab596e70..6327cd27f 100644 --- a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu +++ b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu @@ -17,6 +17,8 @@ #include "../src/index_gpu_two_indices.cuh" #include "../src/minimizer.hpp" +#include + namespace claragenomics { namespace cudamapper @@ -764,5 +766,147 @@ TEST(TestCudamapperIndexGPUTwoIndices, AAAACTGAA_GCCAAAG_2_3_only_second_read_in 1); } +namespace details +{ +namespace index_gpu_two_indices +{ +// ************ Test copy_rest_to_separate_arrays ************** + +template +void test_function_copy_rest_to_separate_arrays(const thrust::host_vector& rest_h, + const thrust::host_vector& expected_read_ids_h, + const thrust::host_vector& expected_positions_in_reads_h, + const thrust::host_vector& expected_directions_of_reads_h, + const std::uint32_t threads) +{ + ASSERT_EQ(rest_h.size(), expected_read_ids_h.size()); + ASSERT_EQ(rest_h.size(), expected_positions_in_reads_h.size()); + ASSERT_EQ(rest_h.size(), expected_directions_of_reads_h.size()); + thrust::device_vector generated_read_ids_d(rest_h.size()); + thrust::device_vector generated_positions_in_reads_d(rest_h.size()); + thrust::device_vector generated_directions_of_reads_d(rest_h.size()); + + const thrust::device_vector rest_d(rest_h); + + const std::uint32_t blocks = ceiling_divide(rest_h.size(), threads); + + copy_rest_to_separate_arrays<<>>(thrust::raw_pointer_cast(rest_d.data()), + thrust::raw_pointer_cast(generated_read_ids_d.data()), + thrust::raw_pointer_cast(generated_positions_in_reads_d.data()), + thrust::raw_pointer_cast(generated_directions_of_reads_d.data()), + rest_h.size()); + + const thrust::host_vector& generated_read_ids_h(generated_read_ids_d); + const thrust::host_vector& generated_positions_in_reads_h(generated_positions_in_reads_d); + const thrust::host_vector& generated_directions_of_reads_h(generated_directions_of_reads_d); + + for (std::size_t i = 0; i < rest_h.size(); ++i) + { + EXPECT_EQ(generated_read_ids_h[i], expected_read_ids_h[i]); + EXPECT_EQ(generated_positions_in_reads_h[i], expected_positions_in_reads_h[i]); + EXPECT_EQ(generated_directions_of_reads_h[i], expected_directions_of_reads_h[i]); + } +} + +TEST(TestCudamapperIndexGPUTwoIndices, test_function_copy_rest_to_separate_arrays) +{ + thrust::host_vector rest_h; + thrust::host_vector expected_read_ids_h; + thrust::host_vector expected_positions_in_reads_h; + thrust::host_vector expected_directions_of_reads_h; + + rest_h.push_back({5, 8, 0}); + expected_read_ids_h.push_back(5); + expected_positions_in_reads_h.push_back(8); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({15, 6, 0}); + expected_read_ids_h.push_back(15); + expected_positions_in_reads_h.push_back(6); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({2, 4, 1}); + expected_read_ids_h.push_back(2); + expected_positions_in_reads_h.push_back(4); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); + rest_h.push_back({18, 15, 0}); + expected_read_ids_h.push_back(18); + expected_positions_in_reads_h.push_back(15); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({6, 4, 1}); + expected_read_ids_h.push_back(6); + expected_positions_in_reads_h.push_back(4); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); + rest_h.push_back({6, 3, 1}); + expected_read_ids_h.push_back(6); + expected_positions_in_reads_h.push_back(3); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); + rest_h.push_back({89, 45, 0}); + expected_read_ids_h.push_back(89); + expected_positions_in_reads_h.push_back(45); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({547, 25, 0}); + expected_read_ids_h.push_back(547); + expected_positions_in_reads_h.push_back(25); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({14, 16, 1}); + expected_read_ids_h.push_back(14); + expected_positions_in_reads_h.push_back(16); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); + rest_h.push_back({18, 16, 0}); + expected_read_ids_h.push_back(18); + expected_positions_in_reads_h.push_back(16); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({45, 44, 0}); + expected_read_ids_h.push_back(45); + expected_positions_in_reads_h.push_back(44); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({65, 45, 1}); + expected_read_ids_h.push_back(65); + expected_positions_in_reads_h.push_back(45); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); + rest_h.push_back({15, 20, 0}); + expected_read_ids_h.push_back(15); + expected_positions_in_reads_h.push_back(20); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({45, 654, 1}); + expected_read_ids_h.push_back(45); + expected_positions_in_reads_h.push_back(654); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); + rest_h.push_back({782, 216, 0}); + expected_read_ids_h.push_back(782); + expected_positions_in_reads_h.push_back(216); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({255, 245, 1}); + expected_read_ids_h.push_back(255); + expected_positions_in_reads_h.push_back(245); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); + rest_h.push_back({346, 579, 0}); + expected_read_ids_h.push_back(346); + expected_positions_in_reads_h.push_back(579); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({12, 8, 0}); + expected_read_ids_h.push_back(12); + expected_positions_in_reads_h.push_back(8); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({65, 42, 1}); + expected_read_ids_h.push_back(65); + expected_positions_in_reads_h.push_back(42); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); + rest_h.push_back({566, 42, 0}); + expected_read_ids_h.push_back(566); + expected_positions_in_reads_h.push_back(42); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + + const std::uint32_t threads = 8; + + test_function_copy_rest_to_separate_arrays(rest_h, + expected_read_ids_h, + expected_positions_in_reads_h, + expected_directions_of_reads_h, + threads); +} + +} // namespace index_gpu_two_indices +} // namespace details + } // namespace cudamapper } // namespace claragenomics \ No newline at end of file From ca60bf69d516910c2004ab4be575315b7574d395 Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Tue, 5 Nov 2019 17:14:12 +0100 Subject: [PATCH 026/128] [cudamapper] Compute the starting index for the anchors --- cudamapper/src/matcher_gpu.cu | 29 +++++++++++-------- cudamapper/src/matcher_gpu.cuh | 20 ++++++++----- cudamapper/tests/Test_CudamapperMatcherGPU.cu | 28 +++++++++++++----- 3 files changed, 51 insertions(+), 26 deletions(-) diff --git a/cudamapper/src/matcher_gpu.cu b/cudamapper/src/matcher_gpu.cu index ac9eaf72f..f571b03d1 100644 --- a/cudamapper/src/matcher_gpu.cu +++ b/cudamapper/src/matcher_gpu.cu @@ -11,7 +11,7 @@ #include "matcher_gpu.cuh" #include -#include +#include #include #include @@ -96,23 +96,28 @@ void find_query_target_matches(thrust::device_vector& found_target find_query_target_matches_kernel<<>>(found_target_indices_d.data().get(), query_representations_d.data().get(), get_size(query_representations_d), target_representations_d.data().get(), get_size(target_representations_d)); } -std::int64_t compute_number_of_anchors(const thrust::device_vector query_starting_index_of_each_representation_d, const thrust::device_vector& found_target_indices_d, const thrust::device_vector target_starting_index_of_each_representation_d) +void compute_anchor_starting_indices(thrust::device_vector& anchor_starting_indices_d, const thrust::device_vector query_starting_index_of_each_representation_d, const thrust::device_vector& found_target_indices_d, const thrust::device_vector target_starting_index_of_each_representation_d) { assert(query_starting_index_of_each_representation_d.size() == found_target_indices_d.size() + 1); + assert(anchor_starting_indices_d.size() == found_target_indices_d.size()); + const std::uint32_t* const query_starting_indices = query_starting_index_of_each_representation_d.data().get(); const std::uint32_t* const target_starting_indices = target_starting_index_of_each_representation_d.data().get(); const std::int64_t* const found_target_indices = found_target_indices_d.data().get(); - return thrust::transform_reduce(thrust::make_counting_iterator(std::int64_t(0)), thrust::make_counting_iterator(get_size(query_starting_index_of_each_representation_d) - 1), - [query_starting_indices, target_starting_indices, found_target_indices] __device__(std::uint32_t query_index) -> std::int64_t { - std::int32_t n_queries_with_representation = query_starting_indices[query_index + 1] - query_starting_indices[query_index]; - std::int64_t target_index = found_target_indices[query_index]; - std::int32_t n_targets_with_representation = 0; - if (target_index >= 0) - n_targets_with_representation = target_starting_indices[target_index + 1] - target_starting_indices[target_index]; - return n_queries_with_representation * n_targets_with_representation; - }, - std::int64_t(0), thrust::plus()); + thrust::transform_inclusive_scan( + thrust::make_counting_iterator(std::int64_t(0)), + thrust::make_counting_iterator(get_size(anchor_starting_indices_d)), + anchor_starting_indices_d.begin(), + [query_starting_indices, target_starting_indices, found_target_indices] __device__(std::uint32_t query_index) -> std::int64_t { + std::int32_t n_queries_with_representation = query_starting_indices[query_index + 1] - query_starting_indices[query_index]; + std::int64_t target_index = found_target_indices[query_index]; + std::int32_t n_targets_with_representation = 0; + if (target_index >= 0) + n_targets_with_representation = target_starting_indices[target_index + 1] - target_starting_indices[target_index]; + return n_queries_with_representation * n_targets_with_representation; + }, + thrust::plus()); } __global__ void create_new_value_mask(const representation_t* const representations_d, diff --git a/cudamapper/src/matcher_gpu.cuh b/cudamapper/src/matcher_gpu.cuh index 2b5b785be..2210dd905 100644 --- a/cudamapper/src/matcher_gpu.cuh +++ b/cudamapper/src/matcher_gpu.cuh @@ -82,13 +82,13 @@ thrust::device_vector find_first_occurrences_of_representations(c /// \param target_representations_d An sorted array of target representations void find_query_target_matches(thrust::device_vector& found_target_indices_d, const thrust::device_vector& query_representations_d, const thrust::device_vector& target_representations_d); -/// \brief Computes the number of anchors for matches in a query and target arrays. +/// \brief Computes the starting indices for an array of anchors based on the matches in query and target arrays. /// /// Takes the arrays which store the positions of the first occurrences the different representations /// in the query and target representation arrays (see find_first_occurrences_of_representations) -/// and the array with the found matches (see find_query_target_matches) and computes the total number -/// of anchors which can be computed from the query and target arrays. -/// The number of anchors is the number of all-to-all combinations of the matching representations in query and target. +/// and the array with the found matches (see find_query_target_matches) and computes the starting indices to construct an array of anchors. +/// The i-1-th element tells the starting point of the i-th element in the query array (including invalid entries for unmatched queries). +/// The last element is the total number of anchors. /// For example: /// query: /// representation: 0 12 23 32 46 @@ -101,14 +101,20 @@ void find_query_target_matches(thrust::device_vector& found_target /// array-index: 0 1 2 3 4 /// target-index: -1 1 3 -1 6 (-1 indicates no matching representation in target) /// +/// anchors per representation: +/// 12: (10-4)*(7-3) +/// 23: (13-10)*(13-9) +/// 46: (21-18)*(21-18) /// gives: -/// (10-4)*(7-3) + (13-10)*(13-9) + (21-18)*(21-18) = 45 +/// query representation: 0 12 23 32 46 +/// number of anchors per representation: 0 24 12 0 9 +/// anchor starting index: 0 24 36 36 45 /// +/// \param anchor_starting_indices_d The starting indices for the anchors based on each query /// \param query_starting_index_of_each_representation_d /// \param found_target_indices_d /// \param target_starting_index_of_each_representation_d -/// \return The number of anchors which can be generated from the query and target arrays -std::int64_t compute_number_of_anchors(const thrust::device_vector query_starting_index_of_each_representation_d, const thrust::device_vector& found_target_indices_d, const thrust::device_vector target_starting_index_of_each_representation_d); +void compute_anchor_starting_indices(thrust::device_vector& anchor_starting_indices_d, const thrust::device_vector query_starting_index_of_each_representation_d, const thrust::device_vector& found_target_indices_d, const thrust::device_vector target_starting_index_of_each_representation_d); /// \brief Writes 0 to the output array if the value to the left is the same as the current value, 1 otherwise. First element is always 1 /// diff --git a/cudamapper/tests/Test_CudamapperMatcherGPU.cu b/cudamapper/tests/Test_CudamapperMatcherGPU.cu index 78f29680c..16d873a08 100644 --- a/cudamapper/tests/Test_CudamapperMatcherGPU.cu +++ b/cudamapper/tests/Test_CudamapperMatcherGPU.cu @@ -355,15 +355,21 @@ TEST(TestCudamapperMatcherGPU, test_query_target_matches_large_example) void test_compute_number_of_anchors(const thrust::host_vector& query_starting_index_of_each_representation_h, const thrust::host_vector& found_target_indices_h, const thrust::host_vector& target_starting_index_of_each_representation_h, - const std::int64_t expected_n_anchors) + const thrust::host_vector& expected_anchor_starting_indices_h) { const thrust::device_vector query_starting_index_of_each_representation_d(query_starting_index_of_each_representation_h); const thrust::device_vector target_starting_index_of_each_representation_d(target_starting_index_of_each_representation_h); - const thrust::device_vector found_target_indices_d(found_target_indices_h); + const thrust::device_vector found_target_indices_d(found_target_indices_h); + thrust::device_vector anchor_starting_indices_d(found_target_indices_h.size()); - const std::int64_t n_anchors = details::matcher_gpu::compute_number_of_anchors(query_starting_index_of_each_representation_d, found_target_indices_d, target_starting_index_of_each_representation_d); + details::matcher_gpu::compute_anchor_starting_indices(anchor_starting_indices_d, query_starting_index_of_each_representation_d, found_target_indices_d, target_starting_index_of_each_representation_d); - EXPECT_EQ(n_anchors, expected_n_anchors); + thrust::host_vector anchor_starting_indices_h(anchor_starting_indices_d); + + for (int32_t i = 0; i < get_size(found_target_indices_h); ++i) + { + EXPECT_EQ(anchor_starting_indices_h[i], expected_anchor_starting_indices_h[i]); + } } TEST(TestCudamapperMatcherGPU, test_compute_number_of_anchors_small_example) @@ -393,12 +399,17 @@ TEST(TestCudamapperMatcherGPU, test_compute_number_of_anchors_small_example) found_target_indices_h.push_back(-1); found_target_indices_h.push_back(6); - const int64_t expected_n_anchors = 45; + thrust::host_vector expected_anchor_starting_indices; + expected_anchor_starting_indices.push_back(0); + expected_anchor_starting_indices.push_back(24); + expected_anchor_starting_indices.push_back(36); + expected_anchor_starting_indices.push_back(36); + expected_anchor_starting_indices.push_back(45); test_compute_number_of_anchors(query_starting_index_of_each_representation_h, found_target_indices_h, target_starting_index_of_each_representation_h, - expected_n_anchors); + expected_anchor_starting_indices); } TEST(TestCudamapperMatcherGPU, test_compute_number_of_anchors_large_example) @@ -408,6 +419,7 @@ TEST(TestCudamapperMatcherGPU, test_compute_number_of_anchors_large_example) thrust::host_vector query_starting_index_of_each_representation_h; thrust::host_vector target_starting_index_of_each_representation_h; thrust::host_vector found_target_indices_h(length - 1, -1); + thrust::host_vector expected_anchor_starting_indices_h; std::int64_t expected_n_anchors = 0; for (std::int64_t i = 0; i < length; ++i) { @@ -418,12 +430,14 @@ TEST(TestCudamapperMatcherGPU, test_compute_number_of_anchors_large_example) found_target_indices_h[i] = i; expected_n_anchors += 2 * (10 + (i + 1) % 10 - i % 10); } + if (i < length - 1) + expected_anchor_starting_indices_h.push_back(expected_n_anchors); } test_compute_number_of_anchors(query_starting_index_of_each_representation_h, found_target_indices_h, target_starting_index_of_each_representation_h, - expected_n_anchors); + expected_anchor_starting_indices_h); } } // namespace cudamapper From fcd0f2b1a9f68cf7f735899e3fb93f93b9ea1d6d Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Wed, 6 Nov 2019 10:41:15 +0100 Subject: [PATCH 027/128] Using .data().get() instead of thrust::raw_pointer_cast on thrust::device_vector --- cudamapper/src/index_gpu_two_indices.cuh | 8 ++++---- cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu | 10 +++++----- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cudamapper/src/index_gpu_two_indices.cuh b/cudamapper/src/index_gpu_two_indices.cuh index 65aa5e210..160c16f6e 100644 --- a/cudamapper/src/index_gpu_two_indices.cuh +++ b/cudamapper/src/index_gpu_two_indices.cuh @@ -329,12 +329,12 @@ void IndexGPUTwoIndices::generate_index(io::FastaParser* pars const std::uint32_t blocks = ceiling_divide(representations_d_.size(), threads); details::index_gpu_two_indices::copy_rest_to_separate_arrays<<>>(rest_d.data(), - thrust::raw_pointer_cast(read_ids_d_.data()), - thrust::raw_pointer_cast(positions_in_reads_d_.data()), - thrust::raw_pointer_cast(directions_of_reads_d_.data()), + read_ids_d_.data().get(), + positions_in_reads_d_.data().get(), + directions_of_reads_d_.data().get(), representations_d_.size()); CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); } } // namespace cudamapper -} // namespace claragenomics \ No newline at end of file +} // namespace claragenomics diff --git a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu index 6327cd27f..48668203f 100644 --- a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu +++ b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu @@ -790,10 +790,10 @@ void test_function_copy_rest_to_separate_arrays(const thrust::host_vector(rest_h.size(), threads); - copy_rest_to_separate_arrays<<>>(thrust::raw_pointer_cast(rest_d.data()), - thrust::raw_pointer_cast(generated_read_ids_d.data()), - thrust::raw_pointer_cast(generated_positions_in_reads_d.data()), - thrust::raw_pointer_cast(generated_directions_of_reads_d.data()), + copy_rest_to_separate_arrays<<>>(rest_d.data().get(), + generated_read_ids_d.data().get(), + generated_positions_in_reads_d.data().get(), + generated_directions_of_reads_d.data().get(), rest_h.size()); const thrust::host_vector& generated_read_ids_h(generated_read_ids_d); @@ -909,4 +909,4 @@ TEST(TestCudamapperIndexGPUTwoIndices, test_function_copy_rest_to_separate_array } // namespace details } // namespace cudamapper -} // namespace claragenomics \ No newline at end of file +} // namespace claragenomics From 89925adf31b2a321b0ec7768c733d890606070ff Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Wed, 6 Nov 2019 12:09:09 +0100 Subject: [PATCH 028/128] Implementation of IndexTwoIndices base class --- cudamapper/CMakeLists.txt | 1 + .../cudamapper/index_two_indices.hpp | 87 +++++++++++++++++++ cudamapper/src/index_gpu_two_indices.cuh | 21 +++-- cudamapper/src/index_two_indices.cu | 39 +++++++++ 4 files changed, 140 insertions(+), 8 deletions(-) create mode 100644 cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp create mode 100644 cudamapper/src/index_two_indices.cu diff --git a/cudamapper/CMakeLists.txt b/cudamapper/CMakeLists.txt index e8503c9f2..404ba6c0c 100644 --- a/cudamapper/CMakeLists.txt +++ b/cudamapper/CMakeLists.txt @@ -64,6 +64,7 @@ cuda_add_executable(cudamapper src/cudamapper.cpp src/main.cpp src/index.cu + src/index_two_indices.cu src/overlapper.cpp ) diff --git a/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp b/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp new file mode 100644 index 000000000..983826f83 --- /dev/null +++ b/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp @@ -0,0 +1,87 @@ +/* +* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +* +* NVIDIA CORPORATION and its licensors retain all intellectual property +* and proprietary rights in and to this software, related documentation +* and any modifications thereto. Any use, reproduction, disclosure or +* distribution of this software and related documentation without an express +* license agreement from NVIDIA CORPORATION is strictly prohibited. +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +namespace claragenomics +{ + +namespace cudamapper +{ +/// \addtogroup cudamapper +/// \{ + +/// Index - manages mapping of (k,w)-kmer-representation and all its occurences +class IndexTwoIndices +{ +public: + /// \brief returns an array of representations of sketch elements + /// \return an array of representations of sketch elements + virtual const thrust::device_vector& representations() const = 0; + + /// \brief returns an array of reads ids for sketch elements + /// \return an array of reads ids for sketch elements + virtual const thrust::device_vector& read_ids() const = 0; + + /// \brief returns an array of starting positions of sketch elements in their reads + /// \return an array of starting positions of sketch elements in their reads + virtual const thrust::device_vector& positions_in_reads() const = 0; + + /// \brief returns an array of directions in which sketch elements were read + /// \return an array of directions in which sketch elements were read + virtual const thrust::device_vector& directions_of_reads() const = 0; + + /// \brief returns read name of read with the given read_id + /// \param read_id + /// \return read name of read with the given read_id + virtual const std::string& read_id_to_read_name(const read_id_t read_id) const = 0; + + /// \brief returns read length for the read with the gived read_id + /// \param read_id + /// \return read length for the read with the gived read_id + virtual const std::uint32_t& read_id_to_read_length(const read_id_t read_id) const = 0; + + /// \brief returns number of reads in input data + /// \return number of reads in input data + virtual std::uint64_t number_of_reads() const = 0; + + /// \brief generates a mapping of (k,w)-kmer-representation to all of its occurrences for one or more sequences + /// \param parser parser for the whole input file (part that goes into this index is determined by first_read_id and past_the_last_read_id) + /// \param first_read_id read_id of the first read to the included in this index + /// \param past_the_last_read_id read_id+1 of the last read to be included in this index + /// \param kmer_size k - the kmer length + /// \param window_size w - the length of the sliding window used to find sketch elements + /// \return instance of IndexTwoIndices + static std::unique_ptr + create_index(io::FastaParser* parser, + const read_id_t first_read_id, + const read_id_t past_the_last_read_id, + const std::uint64_t kmer_size, + const std::uint64_t window_size); + + /// \brief creates an empty IndexTwoIndices + /// \return empty instacne of IndexTwoIndices + static std::unique_ptr create_index(); +}; + +/// \} + +} // namespace cudamapper + +} // namespace claragenomics diff --git a/cudamapper/src/index_gpu_two_indices.cuh b/cudamapper/src/index_gpu_two_indices.cuh index 160c16f6e..4f65377a8 100644 --- a/cudamapper/src/index_gpu_two_indices.cuh +++ b/cudamapper/src/index_gpu_two_indices.cuh @@ -16,6 +16,7 @@ #include #include +#include "claragenomics/cudamapper/index_two_indices.hpp" #include "claragenomics/cudamapper/types.hpp" #include #include @@ -36,7 +37,7 @@ namespace cudamapper /// /// \tparam SketchElementImpl any implementation of SketchElement template -class IndexGPUTwoIndices +class IndexGPUTwoIndices : public IndexTwoIndices { public: /// \brief Constructor @@ -57,33 +58,33 @@ public: /// \brief returns an array of representations of sketch elements /// \return an array of representations of sketch elements - const thrust::device_vector& representations() const; + const thrust::device_vector& representations() const override; /// \brief returns an array of reads ids for sketch elements /// \return an array of reads ids for sketch elements - const thrust::device_vector& read_ids() const; + const thrust::device_vector& read_ids() const override; /// \brief returns an array of starting positions of sketch elements in their reads /// \return an array of starting positions of sketch elements in their reads - const thrust::device_vector& positions_in_reads() const; + const thrust::device_vector& positions_in_reads() const override; /// \brief returns an array of directions in which sketch elements were read /// \return an array of directions in which sketch elements were read - const thrust::device_vector& directions_of_reads() const; + const thrust::device_vector& directions_of_reads() const override; /// \brief returns read name of read with the given read_id /// \param read_id /// \return read name of read with the given read_id - const std::string& read_id_to_read_name(const read_id_t read_id) const; + const std::string& read_id_to_read_name(const read_id_t read_id) const override; /// \brief returns read length for the read with the gived read_id /// \param read_id /// \return read length for the read with the gived read_id - const std::uint32_t& read_id_to_read_length(const read_id_t read_id) const; + const std::uint32_t& read_id_to_read_length(const read_id_t read_id) const override; /// \brief returns number of reads in input data /// \return number of reads in input data - std::uint64_t number_of_reads() const; + std::uint64_t number_of_reads() const override; private: /// \brief generates the index @@ -158,6 +159,10 @@ IndexGPUTwoIndices::IndexGPUTwoIndices(io::FastaParser* parse template IndexGPUTwoIndices::IndexGPUTwoIndices() + : first_read_id_(0) + , kmer_size_(0) + , window_size_(0) + , number_of_reads_(0) { } diff --git a/cudamapper/src/index_two_indices.cu b/cudamapper/src/index_two_indices.cu new file mode 100644 index 000000000..cc7e4dabb --- /dev/null +++ b/cudamapper/src/index_two_indices.cu @@ -0,0 +1,39 @@ +/* +* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +* +* NVIDIA CORPORATION and its licensors retain all intellectual property +* and proprietary rights in and to this software, related documentation +* and any modifications thereto. Any use, reproduction, disclosure or +* distribution of this software and related documentation without an express +* license agreement from NVIDIA CORPORATION is strictly prohibited. +*/ + +#include "claragenomics/cudamapper/index_two_indices.hpp" +#include +#include "index_gpu_two_indices.cuh" +#include "minimizer.hpp" + +namespace claragenomics +{ +namespace cudamapper +{ +std::unique_ptr IndexTwoIndices::create_index(io::FastaParser* parser, + const read_id_t first_read_id, + const read_id_t past_the_last_read_id, + const std::uint64_t kmer_size, + const std::uint64_t window_size) +{ + CGA_NVTX_RANGE(profiler, "create_index"); + return std::make_unique>(parser, + first_read_id, + past_the_last_read_id, + kmer_size, + window_size); +} + +std::unique_ptr IndexTwoIndices::create_index() +{ + return std::make_unique>(); +} +} // namespace cudamapper +} // namespace claragenomics From ee4dc0b3f853ea4256b4b1271e1a69d77b7324ea Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Wed, 6 Nov 2019 13:09:26 +0100 Subject: [PATCH 029/128] First change to main.cpp to enble new indexer + mather. For now only calling indexer --- cudamapper/src/main.cpp | 55 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 5 deletions(-) diff --git a/cudamapper/src/main.cpp b/cudamapper/src/main.cpp index 4ac478f2f..f67dc424f 100644 --- a/cudamapper/src/main.cpp +++ b/cudamapper/src/main.cpp @@ -23,6 +23,7 @@ #include #include "claragenomics/cudamapper/index.hpp" +#include "claragenomics/cudamapper/index_two_indices.hpp" #include "claragenomics/cudamapper/overlapper.hpp" #include "matcher.hpp" #include "overlapper_triggered.hpp" @@ -159,11 +160,55 @@ int main(int argc, char* argv[]) std::chrono::milliseconds matcher_time = std::chrono::duration_values::zero(); std::chrono::milliseconds overlapper_time = std::chrono::duration_values::zero(); - //Now carry out all the looped polling - //size_t query_start = 0; - //size_t query_end = query_start + index_size - 1; - for (size_t query_start = 0; query_start < queries; query_start += index_size) + { // outer loop over query + size_t query_end = std::min(query_start + index_size, static_cast(queries) - 1); + + std::cerr << "Query range: " << query_start << " - " << query_end << std::endl; + + { + CGA_NVTX_RANGE(profiler, "generate_query_index"); + auto start_time = std::chrono::high_resolution_clock::now(); + auto query_index = claragenomics::cudamapper::IndexTwoIndices::create_index(query_parser.get(), + query_start, + query_end + 1, // <- past the last + k, + w); + index_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); + std::cerr << "Query index generation time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; + + } + + size_t target_start = 0; + // If all_to_all mode, then we can optimzie by starting the target sequences from the same index as + // query because all indices before the current query index are guaranteed to have been processed in + // a2a mapping. + if (all_to_all) + { + target_start = query_start; + } + for (; target_start < targets; target_start += target_index_size) + { + size_t target_end = std::min(target_start + target_index_size, static_cast(targets) - 1); + + std::cerr << "Target range: " << target_start << " - " << target_end << std::endl; + + { + CGA_NVTX_RANGE(profiler, "generate_target_index"); + auto start_time = std::chrono::high_resolution_clock::now(); + auto target_index = claragenomics::cudamapper::IndexTwoIndices::create_index(target_parser.get(), + target_start, + target_end + 1, // <- past the last + k, + w); + index_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); + std::cerr << "Target index generation time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; + } + } + } + + + /*for (size_t query_start = 0; query_start < queries; query_start += index_size) { // outer loop over query size_t query_end = std::min(query_start + index_size, static_cast(queries)); auto start_time = std::chrono::high_resolution_clock::now(); @@ -251,7 +296,7 @@ int main(int argc, char* argv[]) //the new target start is set to be the next read index after the last read //from the previous chunk } - } + }*/ // Insert empty overlap vector to denote end of processing. // The lambda function for adding overlaps to queue ensures that no empty From 6536a45f6cd628d6b39d22ce9cbc7de1304e803e Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Wed, 6 Nov 2019 15:28:35 +0100 Subject: [PATCH 030/128] Declare indices outside of NVTX scopes so they can be reused --- cudamapper/src/main.cpp | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/cudamapper/src/main.cpp b/cudamapper/src/main.cpp index f67dc424f..4df9e1edd 100644 --- a/cudamapper/src/main.cpp +++ b/cudamapper/src/main.cpp @@ -166,17 +166,19 @@ int main(int argc, char* argv[]) std::cerr << "Query range: " << query_start << " - " << query_end << std::endl; - { - CGA_NVTX_RANGE(profiler, "generate_query_index"); - auto start_time = std::chrono::high_resolution_clock::now(); - auto query_index = claragenomics::cudamapper::IndexTwoIndices::create_index(query_parser.get(), - query_start, - query_end + 1, // <- past the last - k, - w); - index_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); - std::cerr << "Query index generation time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; + std::unique_ptr query_index(nullptr); + std::unique_ptr target_index(nullptr); + { + CGA_NVTX_RANGE(profiler, "generate_query_index"); + auto start_time = std::chrono::high_resolution_clock::now(); + query_index = claragenomics::cudamapper::IndexTwoIndices::create_index(query_parser.get(), + query_start, + query_end + 1, // <- past the last + k, + w); + index_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); + std::cerr << "Query index generation time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; } size_t target_start = 0; @@ -196,7 +198,7 @@ int main(int argc, char* argv[]) { CGA_NVTX_RANGE(profiler, "generate_target_index"); auto start_time = std::chrono::high_resolution_clock::now(); - auto target_index = claragenomics::cudamapper::IndexTwoIndices::create_index(target_parser.get(), + target_index = claragenomics::cudamapper::IndexTwoIndices::create_index(target_parser.get(), target_start, target_end + 1, // <- past the last k, From caf04ed86f67ddb894315666e2449a38749280bd Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Wed, 6 Nov 2019 16:12:30 +0100 Subject: [PATCH 031/128] Base class for new matcher, updated matcher's interface, calling new matcher from main --- cudamapper/CMakeLists.txt | 1 + .../cudamapper/matcher_two_indices.hpp | 45 +++++++++++++++++++ cudamapper/src/main.cpp | 31 ++++++++----- cudamapper/src/matcher_gpu.cu | 6 +-- cudamapper/src/matcher_gpu.cuh | 13 +++--- cudamapper/src/matcher_two_indices.cu | 26 +++++++++++ 6 files changed, 102 insertions(+), 20 deletions(-) create mode 100644 cudamapper/include/claragenomics/cudamapper/matcher_two_indices.hpp create mode 100644 cudamapper/src/matcher_two_indices.cu diff --git a/cudamapper/CMakeLists.txt b/cudamapper/CMakeLists.txt index 404ba6c0c..92c5f0140 100644 --- a/cudamapper/CMakeLists.txt +++ b/cudamapper/CMakeLists.txt @@ -65,6 +65,7 @@ cuda_add_executable(cudamapper src/main.cpp src/index.cu src/index_two_indices.cu + src/matcher_two_indices.cu src/overlapper.cpp ) diff --git a/cudamapper/include/claragenomics/cudamapper/matcher_two_indices.hpp b/cudamapper/include/claragenomics/cudamapper/matcher_two_indices.hpp new file mode 100644 index 000000000..5c3bebad8 --- /dev/null +++ b/cudamapper/include/claragenomics/cudamapper/matcher_two_indices.hpp @@ -0,0 +1,45 @@ +/* +* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +* +* NVIDIA CORPORATION and its licensors retain all intellectual property +* and proprietary rights in and to this software, related documentation +* and any modifications thereto. Any use, reproduction, disclosure or +* distribution of this software and related documentation without an express +* license agreement from NVIDIA CORPORATION is strictly prohibited. +*/ + +#pragma once + +#include +#include +#include + +namespace claragenomics +{ + +namespace cudamapper +{ +/// \addtogroup cudamapper +/// \{ + +/// MatcehrTwoIndice - base matcher +class MatcherTwoIndices +{ +public: + /// \brief returns anchors + /// \return anchors + virtual thrust::device_vector& anchors() = 0; + + /// \brief Creates a Matcher object + /// \param query_index + /// \param target_index + /// \return matcher + static std::unique_ptr create_matcher(const IndexTwoIndices& query_index, + const IndexTwoIndices& target_index); +}; + +/// \} + +} // namespace cudamapper + +} // namespace claragenomics diff --git a/cudamapper/src/main.cpp b/cudamapper/src/main.cpp index 4df9e1edd..53bd326d2 100644 --- a/cudamapper/src/main.cpp +++ b/cudamapper/src/main.cpp @@ -25,6 +25,7 @@ #include "claragenomics/cudamapper/index.hpp" #include "claragenomics/cudamapper/index_two_indices.hpp" #include "claragenomics/cudamapper/overlapper.hpp" +#include "claragenomics/cudamapper/matcher_two_indices.hpp" #include "matcher.hpp" #include "overlapper_triggered.hpp" @@ -166,13 +167,14 @@ int main(int argc, char* argv[]) std::cerr << "Query range: " << query_start << " - " << query_end << std::endl; - std::unique_ptr query_index(nullptr); - std::unique_ptr target_index(nullptr); + std::unique_ptr query_index(nullptr); + std::unique_ptr target_index(nullptr); + std::unique_ptr matcher(nullptr); { CGA_NVTX_RANGE(profiler, "generate_query_index"); - auto start_time = std::chrono::high_resolution_clock::now(); - query_index = claragenomics::cudamapper::IndexTwoIndices::create_index(query_parser.get(), + auto start_time = std::chrono::high_resolution_clock::now(); + query_index = claragenomics::cudamapper::IndexTwoIndices::create_index(query_parser.get(), query_start, query_end + 1, // <- past the last k, @@ -197,19 +199,26 @@ int main(int argc, char* argv[]) { CGA_NVTX_RANGE(profiler, "generate_target_index"); - auto start_time = std::chrono::high_resolution_clock::now(); - target_index = claragenomics::cudamapper::IndexTwoIndices::create_index(target_parser.get(), - target_start, - target_end + 1, // <- past the last - k, - w); + auto start_time = std::chrono::high_resolution_clock::now(); + target_index = claragenomics::cudamapper::IndexTwoIndices::create_index(target_parser.get(), + target_start, + target_end + 1, // <- past the last + k, + w); index_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); std::cerr << "Target index generation time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; } + { + CGA_NVTX_RANGE(profiler, "generate_matcher"); + auto start_time = std::chrono::high_resolution_clock::now(); + matcher = claragenomics::cudamapper::MatcherTwoIndices::create_matcher(*query_index, + *target_index); + matcher_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); + std::cerr << "Matcher generation time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; + } } } - /*for (size_t query_start = 0; query_start < queries; query_start += index_size) { // outer loop over query size_t query_end = std::min(query_start + index_size, static_cast(queries)); diff --git a/cudamapper/src/matcher_gpu.cu b/cudamapper/src/matcher_gpu.cu index f571b03d1..5be45944d 100644 --- a/cudamapper/src/matcher_gpu.cu +++ b/cudamapper/src/matcher_gpu.cu @@ -24,12 +24,12 @@ namespace claragenomics namespace cudamapper { -MatcherGPU::MatcherGPU(const Index& query_index, - const Index& target_index) +MatcherGPU::MatcherGPU(const IndexTwoIndices& query_index, + const IndexTwoIndices& target_index) { } -std::vector& MatcherGPU::anchors() +thrust::device_vector& MatcherGPU::anchors() { return anchors_h_; } diff --git a/cudamapper/src/matcher_gpu.cuh b/cudamapper/src/matcher_gpu.cuh index 2210dd905..85d8502b4 100644 --- a/cudamapper/src/matcher_gpu.cuh +++ b/cudamapper/src/matcher_gpu.cuh @@ -12,7 +12,8 @@ #include #include -#include +#include +#include #include namespace claragenomics @@ -21,16 +22,16 @@ namespace claragenomics namespace cudamapper { -class MatcherGPU +class MatcherGPU : public MatcherTwoIndices { public: - MatcherGPU(const Index& query_index, - const Index& target_index); + MatcherGPU(const IndexTwoIndices& query_index, + const IndexTwoIndices& target_index); - std::vector& anchors(); + thrust::device_vector& anchors() override; private: - std::vector anchors_h_; + thrust::device_vector anchors_h_; }; namespace details diff --git a/cudamapper/src/matcher_two_indices.cu b/cudamapper/src/matcher_two_indices.cu new file mode 100644 index 000000000..15aa833a0 --- /dev/null +++ b/cudamapper/src/matcher_two_indices.cu @@ -0,0 +1,26 @@ +/* +* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +* +* NVIDIA CORPORATION and its licensors retain all intellectual property +* and proprietary rights in and to this software, related documentation +* and any modifications thereto. Any use, reproduction, disclosure or +* distribution of this software and related documentation without an express +* license agreement from NVIDIA CORPORATION is strictly prohibited. +*/ + +#include "claragenomics/cudamapper/matcher_two_indices.hpp" +#include "matcher_gpu.cuh" + +namespace claragenomics +{ +namespace cudamapper +{ + +std::unique_ptr MatcherTwoIndices::create_matcher(const IndexTwoIndices& query_index, + const IndexTwoIndices& target_index) +{ + return std::make_unique(query_index, target_index); +} + +} // namespace cudamapper +} // namespace claragenomics From 22bb52ef2772ad93020fcd178538c2f70b440ec9 Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Wed, 6 Nov 2019 17:51:52 +0100 Subject: [PATCH 032/128] Passing vectors to compute_anchor_starting_indices by reference, not by value --- cudamapper/src/matcher_gpu.cu | 5 ++++- cudamapper/src/matcher_gpu.cuh | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/cudamapper/src/matcher_gpu.cu b/cudamapper/src/matcher_gpu.cu index 5be45944d..a0c8e83af 100644 --- a/cudamapper/src/matcher_gpu.cu +++ b/cudamapper/src/matcher_gpu.cu @@ -96,7 +96,10 @@ void find_query_target_matches(thrust::device_vector& found_target find_query_target_matches_kernel<<>>(found_target_indices_d.data().get(), query_representations_d.data().get(), get_size(query_representations_d), target_representations_d.data().get(), get_size(target_representations_d)); } -void compute_anchor_starting_indices(thrust::device_vector& anchor_starting_indices_d, const thrust::device_vector query_starting_index_of_each_representation_d, const thrust::device_vector& found_target_indices_d, const thrust::device_vector target_starting_index_of_each_representation_d) +void compute_anchor_starting_indices(thrust::device_vector& anchor_starting_indices_d, + const thrust::device_vector& query_starting_index_of_each_representation_d, + const thrust::device_vector& found_target_indices_d, + const thrust::device_vector& target_starting_index_of_each_representation_d) { assert(query_starting_index_of_each_representation_d.size() == found_target_indices_d.size() + 1); assert(anchor_starting_indices_d.size() == found_target_indices_d.size()); diff --git a/cudamapper/src/matcher_gpu.cuh b/cudamapper/src/matcher_gpu.cuh index 85d8502b4..2b08cbb8c 100644 --- a/cudamapper/src/matcher_gpu.cuh +++ b/cudamapper/src/matcher_gpu.cuh @@ -115,7 +115,10 @@ void find_query_target_matches(thrust::device_vector& found_target /// \param query_starting_index_of_each_representation_d /// \param found_target_indices_d /// \param target_starting_index_of_each_representation_d -void compute_anchor_starting_indices(thrust::device_vector& anchor_starting_indices_d, const thrust::device_vector query_starting_index_of_each_representation_d, const thrust::device_vector& found_target_indices_d, const thrust::device_vector target_starting_index_of_each_representation_d); +void compute_anchor_starting_indices(thrust::device_vector& anchor_starting_indices_d, + const thrust::device_vector& query_starting_index_of_each_representation_d, + const thrust::device_vector& found_target_indices_d, + const thrust::device_vector& target_starting_index_of_each_representation_d); /// \brief Writes 0 to the output array if the value to the left is the same as the current value, 1 otherwise. First element is always 1 /// From d3261a39f5051151fe7b5e87dd69fa72e4b43959 Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Wed, 6 Nov 2019 22:14:51 +0100 Subject: [PATCH 033/128] find_first_occurrences_of_representations now also returns a list of unique representations --- cudamapper/src/matcher_gpu.cu | 47 ++++++----- cudamapper/src/matcher_gpu.cuh | 35 +++++--- cudamapper/tests/Test_CudamapperMatcherGPU.cu | 81 +++++++++++++++---- 3 files changed, 118 insertions(+), 45 deletions(-) diff --git a/cudamapper/src/matcher_gpu.cu b/cudamapper/src/matcher_gpu.cu index a0c8e83af..d4f1570be 100644 --- a/cudamapper/src/matcher_gpu.cu +++ b/cudamapper/src/matcher_gpu.cu @@ -39,22 +39,24 @@ namespace details namespace matcher_gpu { -thrust::device_vector find_first_occurrences_of_representations(const thrust::device_vector& representations_d) +void find_first_occurrences_of_representations(thrust::device_vector& unique_representations_d, + thrust::device_vector& first_occurrence_index_d, + const thrust::device_vector& input_representations_d) { // each element has value 1 if representation with the same index in representations_d has a different value than it's neighbour to the left, 0 otehrwise // underlying type is 32-bit because a scan operation will be performed on the array, so the elements should be capable of holding a number that is equal to // the total number of 1s in the array - thrust::device_vector new_value_mask_d(representations_d.size()); + thrust::device_vector new_value_mask_d(input_representations_d.size()); // TODO: Currently maximum number of thread blocks is 2^31-1. This means we support representations of up to (2^31-1) * number_of_threads // With 256 that's (2^31-1)*2^8 ~= 2^39. If representation is 4-byte (we expect it to be 4 or 8) that's 2^39*2^2 = 2^41 = 2TB. We don't expect to hit this limit any time soon // The kernel can be modified to process several representation per thread to support arbitrary size std::uint32_t number_of_threads = 256; // arbitrary value - std::uint32_t number_of_blocks = (representations_d.size() - 1) / number_of_threads + 1; + std::uint32_t number_of_blocks = (input_representations_d.size() - 1) / number_of_threads + 1; - create_new_value_mask<<>>(thrust::raw_pointer_cast(representations_d.data()), - representations_d.size(), - thrust::raw_pointer_cast(new_value_mask_d.data())); + create_new_value_mask<<>>(input_representations_d.data().get(), + input_representations_d.size(), + new_value_mask_d.data().get()); CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); // sync not necessary, here only to detect the error immediately // do inclusive scan @@ -75,15 +77,18 @@ thrust::device_vector find_first_occurrences_of_representations(c std::uint64_t number_of_unique_representations = representation_index_mask_d.back(); // D2H copy - thrust::device_vector starting_index_of_each_representation(number_of_unique_representations + 1); + first_occurrence_index_d.resize(number_of_unique_representations + 1); // <- +1 for the additional element + first_occurrence_index_d.shrink_to_fit(); + unique_representations_d.resize(number_of_unique_representations); + unique_representations_d.shrink_to_fit(); - copy_index_of_first_occurence<<>>(thrust::raw_pointer_cast(representation_index_mask_d.data()), - representation_index_mask_d.size(), - thrust::raw_pointer_cast(starting_index_of_each_representation.data())); + find_first_occurrences_of_representations_kernel<<>>(representation_index_mask_d.data().get(), + input_representations_d.data().get(), + representation_index_mask_d.size(), + first_occurrence_index_d.data().get(), + unique_representations_d.data().get()); // last element is the total number of elements in representations array - starting_index_of_each_representation.back() = representations_d.size(); // H2D copy - - return starting_index_of_each_representation; + first_occurrence_index_d.back() = input_representations_d.size(); // H2D copy } void find_query_target_matches(thrust::device_vector& found_target_indices_d, const thrust::device_vector& query_representations_d, const thrust::device_vector& target_representations_d) @@ -147,9 +152,11 @@ __global__ void create_new_value_mask(const representation_t* const representati } } -__global__ void copy_index_of_first_occurence(const std::uint64_t* const representation_index_mask_d, - const std::size_t number_of_input_elements, - std::uint32_t* const starting_index_of_each_representation) +__global__ void find_first_occurrences_of_representations_kernel(const std::uint64_t* const representation_index_mask_d, + const representation_t* const input_representations_d, + const std::size_t number_of_input_elements, + std::uint32_t* const starting_index_of_each_representation_d, + representation_t* const unique_representations_d) { std::uint64_t index = blockIdx.x * blockDim.x + threadIdx.x; @@ -158,16 +165,18 @@ __global__ void copy_index_of_first_occurence(const std::uint64_t* const represe if (index == 0) { - starting_index_of_each_representation[0] = 0; + starting_index_of_each_representation_d[0] = 0; + unique_representations_d[0] = input_representations_d[0]; } else { if (representation_index_mask_d[index] != representation_index_mask_d[index - 1]) { - // if new representation (= not the same as its left neighbor) + // if new representation is not the same as its left neighbor // save the index at which that representation starts // representation_index_mask_d gives a unique index to each representation, starting from 1, thus '-1' - starting_index_of_each_representation[representation_index_mask_d[index] - 1] = index; + starting_index_of_each_representation_d[representation_index_mask_d[index] - 1] = index; + unique_representations_d[representation_index_mask_d[index] - 1] = input_representations_d[index]; } } } diff --git a/cudamapper/src/matcher_gpu.cuh b/cudamapper/src/matcher_gpu.cuh index 2b08cbb8c..b3f58a46c 100644 --- a/cudamapper/src/matcher_gpu.cuh +++ b/cudamapper/src/matcher_gpu.cuh @@ -41,19 +41,24 @@ namespace matcher_gpu { /// \brief Creates compressed representation of index /// -/// Creates an array in which n-th element represents the first occurrence of n-th representation. -/// Last element of the array is the total number of elements in representations_d array +/// Creates two arrays: first one contains a list of unique representations and the second one the index +/// at which that representation occurrs for the first time in the original data. +/// Second element contains one additional elemet at the end, containing the total number of elemets in the original array. /// /// For example: /// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 /// 0 0 0 0 12 12 12 12 12 12 23 23 23 32 32 32 32 32 46 46 46 /// ^ ^ ^ ^ ^ ^ /// gives: -/// 0 4 10 13 18 21 +/// 0 12 23 32 46 +/// 0 4 10 13 18 21 /// -/// \param representations_d -/// \return first_element_for_representation -thrust::device_vector find_first_occurrences_of_representations(const thrust::device_vector& representations_d); +/// \param unique_representations_d empty on input, contains one value of each representation on the output +/// \param first_occurrence_index_d empty on input, index of first occurrence of each representation and additional elemnt on the output +/// \param input_representations_d an array of representaton where representations with the same value stand next to each other +void find_first_occurrences_of_representations(thrust::device_vector& unique_representations_d, + thrust::device_vector& first_occurrence_index_d, + const thrust::device_vector& input_representations_d); /// \brief Finds the array index of the target representation for each query representation /// @@ -135,7 +140,10 @@ __global__ void create_new_value_mask(const representation_t* const representati const std::size_t number_of_elements, std::uint32_t* const new_value_mask_d); -/// \brief Creates an array in which each element represents the index in representation_index_mask_d at which a new representation starts +/// \brief Helper kernel for find_first_occurrences_of_representations +/// +/// Creates two arrays: first one contains a list of unique representations and the second one the index +/// at which that representation occurrs for the first time in the original data. /// /// For example: /// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 @@ -144,14 +152,19 @@ __global__ void create_new_value_mask(const representation_t* const representati /// 1 1 1 1 2 2 2 2 2 2 3 3 3 4 4 4 4 4 5 5 5 /// ^ ^ ^ ^ ^ /// gives: +/// 0 12 23 32 46 /// 0 4 10 13 18 /// /// \param representation_index_mask_d +/// \param input_representatons_d /// \param number_of_input_elements -/// \param starting_index_of_each_representation -__global__ void copy_index_of_first_occurence(const std::uint64_t* const representation_index_mask_d, - const std::size_t number_of_input_elements, - std::uint32_t* const starting_index_of_each_representation); +/// \param starting_index_of_each_representation_d +/// \param unique_representations_d +__global__ void find_first_occurrences_of_representations_kernel(const std::uint64_t* const representation_index_mask_d, + const representation_t* const input_representations_d, + const std::size_t number_of_input_elements, + std::uint32_t* const starting_index_of_each_representation_d, + representation_t* const unique_representations_d); /// \brief Performs a binary search on target_representations_d for each element of query_representations_d and stores the found index (or -1 iff not found) in found_target_indices. /// diff --git a/cudamapper/tests/Test_CudamapperMatcherGPU.cu b/cudamapper/tests/Test_CudamapperMatcherGPU.cu index 16d873a08..343239056 100644 --- a/cudamapper/tests/Test_CudamapperMatcherGPU.cu +++ b/cudamapper/tests/Test_CudamapperMatcherGPU.cu @@ -116,81 +116,128 @@ TEST(TestCudamapperMatcherGPU, test_create_new_value_mask_small_data_large_examp expected_new_value_mask_h, number_of_threads); } -void test_copy_index_of_first_occurence(const thrust::host_vector& representation_index_mask_h, - const thrust::host_vector& expected_starting_index_of_each_representation_h, - const std::uint32_t number_of_threads) +void test_find_first_occurrences_of_representations_kernel(const thrust::host_vector& representation_index_mask_h, + const thrust::host_vector& input_representations_h, + const thrust::host_vector& expected_starting_index_of_each_representation_h, + const thrust::host_vector& expected_unique_representations_h, + const std::uint32_t number_of_threads) { const thrust::device_vector representation_index_mask_d(representation_index_mask_h); + const thrust::device_vector input_representations_d(input_representations_h); ASSERT_EQ(expected_starting_index_of_each_representation_h.size(), representation_index_mask_h.back()); + ASSERT_EQ(expected_unique_representations_h.size(), representation_index_mask_h.back()); thrust::device_vector starting_index_of_each_representation_d(expected_starting_index_of_each_representation_h.size()); + thrust::device_vector unique_representations_d(expected_starting_index_of_each_representation_h.size()); std::uint32_t number_of_blocks = (representation_index_mask_d.size() - 1) / number_of_threads + 1; - details::matcher_gpu::copy_index_of_first_occurence<<>>(thrust::raw_pointer_cast(representation_index_mask_d.data()), - representation_index_mask_d.size(), - thrust::raw_pointer_cast(starting_index_of_each_representation_d.data())); + details::matcher_gpu::find_first_occurrences_of_representations_kernel<<>>(representation_index_mask_d.data().get(), + input_representations_d.data().get(), + representation_index_mask_d.size(), + starting_index_of_each_representation_d.data().get(), + unique_representations_d.data().get()); CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); const thrust::host_vector starting_index_of_each_representation_h(starting_index_of_each_representation_d); + const thrust::host_vector unique_representations_h(unique_representations_d); ASSERT_EQ(starting_index_of_each_representation_h.size(), expected_starting_index_of_each_representation_h.size()); + ASSERT_EQ(unique_representations_h.size(), expected_unique_representations_h.size()); for (std::size_t i = 0; i < expected_starting_index_of_each_representation_h.size(); ++i) { EXPECT_EQ(starting_index_of_each_representation_h[i], expected_starting_index_of_each_representation_h[i]) << "index: " << i; + EXPECT_EQ(unique_representations_h[i], expected_unique_representations_h[i]) << "index: " << i; } } -TEST(TestCudamapperMatcherGPU, test_copy_index_of_first_occurence_small_example) +TEST(TestCudamapperMatcherGPU, test_find_first_occurrences_of_representations_kernel_small_example) { thrust::host_vector representation_index_mask_h; + thrust::host_vector input_representations_h; thrust::host_vector expected_starting_index_of_each_representation_h; + thrust::host_vector expected_unique_representations_h; representation_index_mask_h.push_back(1); + input_representations_h.push_back(10); expected_starting_index_of_each_representation_h.push_back(0); + expected_unique_representations_h.push_back(10); representation_index_mask_h.push_back(1); + input_representations_h.push_back(10); representation_index_mask_h.push_back(1); + input_representations_h.push_back(10); representation_index_mask_h.push_back(1); + input_representations_h.push_back(10); + // representation_index_mask_h.push_back(2); + input_representations_h.push_back(20); expected_starting_index_of_each_representation_h.push_back(4); + expected_unique_representations_h.push_back(20); + // representation_index_mask_h.push_back(3); + input_representations_h.push_back(30); expected_starting_index_of_each_representation_h.push_back(5); + expected_unique_representations_h.push_back(30); representation_index_mask_h.push_back(3); + input_representations_h.push_back(30); representation_index_mask_h.push_back(3); + input_representations_h.push_back(30); representation_index_mask_h.push_back(3); + input_representations_h.push_back(30); + // representation_index_mask_h.push_back(4); + input_representations_h.push_back(40); expected_starting_index_of_each_representation_h.push_back(9); + expected_unique_representations_h.push_back(40); representation_index_mask_h.push_back(4); + input_representations_h.push_back(40); representation_index_mask_h.push_back(4); + input_representations_h.push_back(40); + // representation_index_mask_h.push_back(5); + input_representations_h.push_back(50); expected_starting_index_of_each_representation_h.push_back(12); + expected_unique_representations_h.push_back(50); + // representation_index_mask_h.push_back(6); + input_representations_h.push_back(60); expected_starting_index_of_each_representation_h.push_back(13); + expected_unique_representations_h.push_back(60); std::uint32_t number_of_threads = 3; - test_copy_index_of_first_occurence(representation_index_mask_h, - expected_starting_index_of_each_representation_h, - number_of_threads); + test_find_first_occurrences_of_representations_kernel(representation_index_mask_h, + input_representations_h, + expected_starting_index_of_each_representation_h, + expected_unique_representations_h, + number_of_threads); } -TEST(TestCudamapperMatcherGPU, test_copy_index_of_first_occurence_large_example) +TEST(TestCudamapperMatcherGPU, test_find_first_occurrences_of_representations_kernel_large_example) { const std::uint64_t total_sketch_elements = 10000000; const std::uint32_t sketch_elements_with_same_representation = 1000; thrust::host_vector representation_index_mask_h; + thrust::host_vector input_representations_h; thrust::host_vector expected_starting_index_of_each_representation_h; + thrust::host_vector expected_unique_representations_h; for (std::size_t i = 0; i < total_sketch_elements; ++i) { representation_index_mask_h.push_back(i / sketch_elements_with_same_representation + 1); + input_representations_h.push_back(representation_index_mask_h.back() * 10); if (i % sketch_elements_with_same_representation == 0) + { expected_starting_index_of_each_representation_h.push_back(i); + expected_unique_representations_h.push_back(input_representations_h.back()); + } } std::uint32_t number_of_threads = 256; - test_copy_index_of_first_occurence(representation_index_mask_h, - expected_starting_index_of_each_representation_h, - number_of_threads); + test_find_first_occurrences_of_representations_kernel(representation_index_mask_h, + input_representations_h, + expected_starting_index_of_each_representation_h, + expected_unique_representations_h, + number_of_threads); } void test_find_first_occurrences_of_representations(const thrust::host_vector& representations_h, @@ -198,7 +245,11 @@ void test_find_first_occurrences_of_representations(const thrust::host_vector representations_d(representations_h); - const thrust::device_vector starting_index_of_each_representation_d = details::matcher_gpu::find_first_occurrences_of_representations(representations_d); + thrust::device_vector starting_index_of_each_representation_d; + thrust::device_vector unique_representations_d; + details::matcher_gpu::find_first_occurrences_of_representations(unique_representations_d, + starting_index_of_each_representation_d, + representations_d); const thrust::host_vector starting_index_of_each_representation_h(starting_index_of_each_representation_d); From 8d67e7b290291528af9a192b5bdceb50b16fe3a8 Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Tue, 5 Nov 2019 18:55:09 -0500 Subject: [PATCH 034/128] [pycga] evaluate_paf to handle multiple overlaps from same read pair --- .../evaluate_paf.py => bin/evaluate_paf} | 76 +++++++++++-------- pyclaragenomics/claragenomics/io/pafio.py | 2 + 2 files changed, 47 insertions(+), 31 deletions(-) rename pyclaragenomics/{claragenomics/utilities/evaluate_paf.py => bin/evaluate_paf} (58%) mode change 100644 => 100755 diff --git a/pyclaragenomics/claragenomics/utilities/evaluate_paf.py b/pyclaragenomics/bin/evaluate_paf old mode 100644 new mode 100755 similarity index 58% rename from pyclaragenomics/claragenomics/utilities/evaluate_paf.py rename to pyclaragenomics/bin/evaluate_paf index 2227044dc..3d9405ef8 --- a/pyclaragenomics/claragenomics/utilities/evaluate_paf.py +++ b/pyclaragenomics/bin/evaluate_paf @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + # # Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # @@ -11,6 +13,7 @@ import argparse from claragenomics.io import pafio +from collections import defaultdict def evaluate_paf(truth_paf_filepath, test_paf_filepath, pos_tolerance=400): @@ -24,11 +27,14 @@ def evaluate_paf(truth_paf_filepath, test_paf_filepath, pos_tolerance=400): """ # Put the truth paf into a dictionary: - truth_overlaps = {} + truth_overlaps = defaultdict(list) + num_true_overlaps = 0 for truth_overlap in pafio.read_paf(truth_paf_filepath): key = truth_overlap.query_sequence_name + truth_overlap.target_sequence_name - truth_overlaps[key] = truth_overlap + + truth_overlaps[key].append(truth_overlap) + num_true_overlaps += 1 true_positive_count = 0 false_positive_count = 0 @@ -41,41 +47,49 @@ def evaluate_paf(truth_paf_filepath, test_paf_filepath, pos_tolerance=400): target_end_0 = test_overlap.target_end key = test_overlap.query_sequence_name + test_overlap.target_sequence_name - key_reversed = test_overlap.target_sequence_name + test_overlap.query_sequence_name + key_reversed = test_overlap.target_sequence_name + "_" + test_overlap.query_sequence_name - matched = False - potential_match = False if key in truth_overlaps: - potential_match = True - truth_overlap = truth_overlaps[key] - - query_start_1 = truth_overlap.query_start - query_end_1 = truth_overlap.query_end - target_start_1 = truth_overlap.target_start - target_end_1 = truth_overlap.target_end + found_match = False + for truth_overlap in truth_overlaps[key]: + query_start_1 = truth_overlap.query_start + query_end_1 = truth_overlap.query_end + target_start_1 = truth_overlap.target_start + target_end_1 = truth_overlap.target_end + + matched = abs(query_start_0 - query_start_1) < pos_tolerance and \ + abs(query_end_0 - query_end_1) < pos_tolerance and \ + abs(target_start_0 - target_start_1) < pos_tolerance and \ + abs(target_end_0 - target_end_1) < pos_tolerance + + if matched: + true_positive_count += 1 + found_match = True + break + if not found_match: + false_positive_count += 1 elif key_reversed in truth_overlaps: - potential_match = True - truth_overlap = truth_overlaps[key_reversed] - - query_start_1 = truth_overlap.target_start - query_end_1 = truth_overlap.target_end - target_start_1 = truth_overlap.query_start - target_end_1 = truth_overlap.query_end - - matched = potential_match and \ - abs(query_start_0 - query_start_1) < pos_tolerance and \ - abs(query_end_0 - query_end_1) < pos_tolerance and \ - abs(target_start_0 - target_start_1) < pos_tolerance and \ - abs(target_end_0 - target_end_1) < pos_tolerance - - if matched: - true_positive_count += 1 - else: - false_positive_count += 1 + found_match = False + for truth_overlap in truth_overlaps[key_reversed]: + query_start_1 = truth_overlap.target_start + query_end_1 = truth_overlap.target_end + target_start_1 = truth_overlap.query_start + target_end_1 = truth_overlap.query_end + + matched = abs(query_start_0 - query_start_1) < pos_tolerance and \ + abs(query_end_0 - query_end_1) < pos_tolerance and \ + abs(target_start_0 - target_start_1) < pos_tolerance and \ + abs(target_end_0 - target_end_1) < pos_tolerance + + if matched: + true_positive_count += 1 + found_match = True + break + if not found_match: + false_positive_count += 1 # Now count the false negatives: - num_true_overlaps = len(truth_overlaps) false_negative_count = num_true_overlaps - true_positive_count return(true_positive_count, false_positive_count, false_negative_count) diff --git a/pyclaragenomics/claragenomics/io/pafio.py b/pyclaragenomics/claragenomics/io/pafio.py index 21bed305f..aa986c9cc 100644 --- a/pyclaragenomics/claragenomics/io/pafio.py +++ b/pyclaragenomics/claragenomics/io/pafio.py @@ -56,6 +56,8 @@ def read_paf(filepath): paf_entry = paf_entry.replace('\n', '') paf_entry = paf_entry.split('\t') paf_entry_sanitised = [int(x) if x.isdigit() else x for x in paf_entry] + paf_entry_sanitised[0] = str(paf_entry_sanitised[0]) + paf_entry_sanitised[5] = str(paf_entry_sanitised[5]) overlaps.append(Overlap(*paf_entry_sanitised[:12])) return overlaps From 76ca63db41bf42ec61b2423e1486c9e40690097a Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Thu, 7 Nov 2019 11:34:38 +0100 Subject: [PATCH 035/128] Moved index-building helper functions to indexer from matcher --- cudamapper/src/index_gpu_two_indices.cu | 129 ++++++++ cudamapper/src/index_gpu_two_indices.cuh | 61 ++++ cudamapper/src/matcher_gpu.cu | 104 ------ cudamapper/src/matcher_gpu.cuh | 62 ---- .../Test_CudamapperIndexGPUTwoIndices.cu | 309 ++++++++++++++++++ cudamapper/tests/Test_CudamapperMatcherGPU.cu | 303 ----------------- 6 files changed, 499 insertions(+), 469 deletions(-) diff --git a/cudamapper/src/index_gpu_two_indices.cu b/cudamapper/src/index_gpu_two_indices.cu index e69de29bb..ab1ade652 100644 --- a/cudamapper/src/index_gpu_two_indices.cu +++ b/cudamapper/src/index_gpu_two_indices.cu @@ -0,0 +1,129 @@ +/* +* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +* +* NVIDIA CORPORATION and its licensors retain all intellectual property +* and proprietary rights in and to this software, related documentation +* and any modifications thereto. Any use, reproduction, disclosure or +* distribution of this software and related documentation without an express +* license agreement from NVIDIA CORPORATION is strictly prohibited. +*/ + +#include "index_gpu_two_indices.cuh" + +namespace claragenomics +{ +namespace cudamapper +{ +namespace details +{ +namespace index_gpu_two_indices +{ +void find_first_occurrences_of_representations(thrust::device_vector& unique_representations_d, + thrust::device_vector& first_occurrence_index_d, + const thrust::device_vector& input_representations_d) +{ + // each element has value 1 if representation with the same index in representations_d has a different value than it's neighbour to the left, 0 otehrwise + // underlying type is 32-bit because a scan operation will be performed on the array, so the elements should be capable of holding a number that is equal to + // the total number of 1s in the array + thrust::device_vector new_value_mask_d(input_representations_d.size()); + + // TODO: Currently maximum number of thread blocks is 2^31-1. This means we support representations of up to (2^31-1) * number_of_threads + // With 256 that's (2^31-1)*2^8 ~= 2^39. If representation is 4-byte (we expect it to be 4 or 8) that's 2^39*2^2 = 2^41 = 2TB. We don't expect to hit this limit any time soon + // The kernel can be modified to process several representation per thread to support arbitrary size + std::uint32_t number_of_threads = 256; // arbitrary value + std::uint32_t number_of_blocks = (input_representations_d.size() - 1) / number_of_threads + 1; + + create_new_value_mask<<>>(input_representations_d.data().get(), + input_representations_d.size(), + new_value_mask_d.data().get()); + CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); // sync not necessary, here only to detect the error immediately + + // do inclusive scan + // for example for + // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 + // 0 0 0 0 12 12 12 12 12 12 23 23 23 32 32 32 32 32 46 46 46 + // 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 + // gives + // 1 1 1 1 2 2 2 2 2 2 3 3 3 4 4 4 4 4 5 5 5 + // meaning all elements with the same representation have the same value and those values are sorted in increasing order starting from 1 + thrust::device_vector representation_index_mask_d(new_value_mask_d.size()); + thrust::inclusive_scan(thrust::device, + new_value_mask_d.begin(), + new_value_mask_d.end(), + representation_index_mask_d.begin()); + new_value_mask_d.clear(); + new_value_mask_d.shrink_to_fit(); + + std::uint64_t number_of_unique_representations = representation_index_mask_d.back(); // D2H copy + + first_occurrence_index_d.resize(number_of_unique_representations + 1); // <- +1 for the additional element + first_occurrence_index_d.shrink_to_fit(); + unique_representations_d.resize(number_of_unique_representations); + unique_representations_d.shrink_to_fit(); + + find_first_occurrences_of_representations_kernel<<>>(representation_index_mask_d.data().get(), + input_representations_d.data().get(), + representation_index_mask_d.size(), + first_occurrence_index_d.data().get(), + unique_representations_d.data().get()); + // last element is the total number of elements in representations array + first_occurrence_index_d.back() = input_representations_d.size(); // H2D copy +} + +__global__ void create_new_value_mask(const representation_t* const representations_d, + const std::size_t number_of_elements, + std::uint32_t* const new_value_mask_d) +{ + std::uint64_t index = blockIdx.x * blockDim.x + threadIdx.x; + + if (index >= number_of_elements) + return; + + if (index == 0) + { + new_value_mask_d[0] = 1; + } + else + { + if (representations_d[index] == representations_d[index - 1]) + { + new_value_mask_d[index] = 0; + } + else + new_value_mask_d[index] = 1; + } +} + +__global__ void find_first_occurrences_of_representations_kernel(const std::uint64_t* const representation_index_mask_d, + const representation_t* const input_representations_d, + const std::size_t number_of_input_elements, + std::uint32_t* const starting_index_of_each_representation_d, + representation_t* const unique_representations_d) +{ + std::uint64_t index = blockIdx.x * blockDim.x + threadIdx.x; + + if (index >= number_of_input_elements) + return; + + if (index == 0) + { + starting_index_of_each_representation_d[0] = 0; + unique_representations_d[0] = input_representations_d[0]; + } + else + { + if (representation_index_mask_d[index] != representation_index_mask_d[index - 1]) + { + // if new representation is not the same as its left neighbor + // save the index at which that representation starts + // representation_index_mask_d gives a unique index to each representation, starting from 1, thus '-1' + starting_index_of_each_representation_d[representation_index_mask_d[index] - 1] = index; + unique_representations_d[representation_index_mask_d[index] - 1] = input_representations_d[index]; + } + } +} +} // namespace index_gpu_two_indices +} // namespace details + +} // namespace cudamapper +} // namespace claragenomics diff --git a/cudamapper/src/index_gpu_two_indices.cuh b/cudamapper/src/index_gpu_two_indices.cuh index 4f65377a8..5b0c5f64e 100644 --- a/cudamapper/src/index_gpu_two_indices.cuh +++ b/cudamapper/src/index_gpu_two_indices.cuh @@ -110,6 +110,67 @@ namespace details { namespace index_gpu_two_indices { +/// \brief Creates compressed representation of index +/// +/// Creates two arrays: first one contains a list of unique representations and the second one the index +/// at which that representation occurrs for the first time in the original data. +/// Second element contains one additional elemet at the end, containing the total number of elemets in the original array. +/// +/// For example: +/// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 +/// 0 0 0 0 12 12 12 12 12 12 23 23 23 32 32 32 32 32 46 46 46 +/// ^ ^ ^ ^ ^ ^ +/// gives: +/// 0 12 23 32 46 +/// 0 4 10 13 18 21 +/// +/// \param unique_representations_d empty on input, contains one value of each representation on the output +/// \param first_occurrence_index_d empty on input, index of first occurrence of each representation and additional elemnt on the output +/// \param input_representations_d an array of representaton where representations with the same value stand next to each other +void find_first_occurrences_of_representations(thrust::device_vector& unique_representations_d, + thrust::device_vector& first_occurrence_index_d, + const thrust::device_vector& input_representations_d); + +/// \brief Writes 0 to the output array if the value to the left is the same as the current value, 1 otherwise. First element is always 1 +/// +/// For example: +/// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 +/// 0 0 0 0 12 12 12 12 12 12 23 23 23 32 32 32 32 32 46 46 46 +/// gives: +/// 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 +/// +/// \param representations_d +/// \param number_of_elements +/// \param new_value_mask_d generated array +__global__ void create_new_value_mask(const representation_t* const representations_d, + const std::size_t number_of_elements, + std::uint32_t* const new_value_mask_d); + +/// \brief Helper kernel for find_first_occurrences_of_representations +/// +/// Creates two arrays: first one contains a list of unique representations and the second one the index +/// at which that representation occurrs for the first time in the original data. +/// +/// For example: +/// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 +/// 0 0 0 0 12 12 12 12 12 12 23 23 23 32 32 32 32 32 46 46 46 +/// 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 +/// 1 1 1 1 2 2 2 2 2 2 3 3 3 4 4 4 4 4 5 5 5 +/// ^ ^ ^ ^ ^ +/// gives: +/// 0 12 23 32 46 +/// 0 4 10 13 18 +/// +/// \param representation_index_mask_d +/// \param input_representatons_d +/// \param number_of_input_elements +/// \param starting_index_of_each_representation_d +/// \param unique_representations_d +__global__ void find_first_occurrences_of_representations_kernel(const std::uint64_t* const representation_index_mask_d, + const representation_t* const input_representations_d, + const std::size_t number_of_input_elements, + std::uint32_t* const starting_index_of_each_representation_d, + representation_t* const unique_representations_d); /// \brief Splits array of structs into one array per struct element /// diff --git a/cudamapper/src/matcher_gpu.cu b/cudamapper/src/matcher_gpu.cu index d4f1570be..9b9afba8f 100644 --- a/cudamapper/src/matcher_gpu.cu +++ b/cudamapper/src/matcher_gpu.cu @@ -39,57 +39,6 @@ namespace details namespace matcher_gpu { -void find_first_occurrences_of_representations(thrust::device_vector& unique_representations_d, - thrust::device_vector& first_occurrence_index_d, - const thrust::device_vector& input_representations_d) -{ - // each element has value 1 if representation with the same index in representations_d has a different value than it's neighbour to the left, 0 otehrwise - // underlying type is 32-bit because a scan operation will be performed on the array, so the elements should be capable of holding a number that is equal to - // the total number of 1s in the array - thrust::device_vector new_value_mask_d(input_representations_d.size()); - - // TODO: Currently maximum number of thread blocks is 2^31-1. This means we support representations of up to (2^31-1) * number_of_threads - // With 256 that's (2^31-1)*2^8 ~= 2^39. If representation is 4-byte (we expect it to be 4 or 8) that's 2^39*2^2 = 2^41 = 2TB. We don't expect to hit this limit any time soon - // The kernel can be modified to process several representation per thread to support arbitrary size - std::uint32_t number_of_threads = 256; // arbitrary value - std::uint32_t number_of_blocks = (input_representations_d.size() - 1) / number_of_threads + 1; - - create_new_value_mask<<>>(input_representations_d.data().get(), - input_representations_d.size(), - new_value_mask_d.data().get()); - CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); // sync not necessary, here only to detect the error immediately - - // do inclusive scan - // for example for - // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 - // 0 0 0 0 12 12 12 12 12 12 23 23 23 32 32 32 32 32 46 46 46 - // 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 - // gives - // 1 1 1 1 2 2 2 2 2 2 3 3 3 4 4 4 4 4 5 5 5 - // meaning all elements with the same representation have the same value and those values are sorted in increasing order starting from 1 - thrust::device_vector representation_index_mask_d(new_value_mask_d.size()); - thrust::inclusive_scan(thrust::device, - new_value_mask_d.begin(), - new_value_mask_d.end(), - representation_index_mask_d.begin()); - new_value_mask_d.clear(); - new_value_mask_d.shrink_to_fit(); - - std::uint64_t number_of_unique_representations = representation_index_mask_d.back(); // D2H copy - - first_occurrence_index_d.resize(number_of_unique_representations + 1); // <- +1 for the additional element - first_occurrence_index_d.shrink_to_fit(); - unique_representations_d.resize(number_of_unique_representations); - unique_representations_d.shrink_to_fit(); - - find_first_occurrences_of_representations_kernel<<>>(representation_index_mask_d.data().get(), - input_representations_d.data().get(), - representation_index_mask_d.size(), - first_occurrence_index_d.data().get(), - unique_representations_d.data().get()); - // last element is the total number of elements in representations array - first_occurrence_index_d.back() = input_representations_d.size(); // H2D copy -} void find_query_target_matches(thrust::device_vector& found_target_indices_d, const thrust::device_vector& query_representations_d, const thrust::device_vector& target_representations_d) { @@ -128,59 +77,6 @@ void compute_anchor_starting_indices(thrust::device_vector& anchor thrust::plus()); } -__global__ void create_new_value_mask(const representation_t* const representations_d, - const std::size_t number_of_elements, - std::uint32_t* const new_value_mask_d) -{ - std::uint64_t index = blockIdx.x * blockDim.x + threadIdx.x; - - if (index >= number_of_elements) - return; - - if (index == 0) - { - new_value_mask_d[0] = 1; - } - else - { - if (representations_d[index] == representations_d[index - 1]) - { - new_value_mask_d[index] = 0; - } - else - new_value_mask_d[index] = 1; - } -} - -__global__ void find_first_occurrences_of_representations_kernel(const std::uint64_t* const representation_index_mask_d, - const representation_t* const input_representations_d, - const std::size_t number_of_input_elements, - std::uint32_t* const starting_index_of_each_representation_d, - representation_t* const unique_representations_d) -{ - std::uint64_t index = blockIdx.x * blockDim.x + threadIdx.x; - - if (index >= number_of_input_elements) - return; - - if (index == 0) - { - starting_index_of_each_representation_d[0] = 0; - unique_representations_d[0] = input_representations_d[0]; - } - else - { - if (representation_index_mask_d[index] != representation_index_mask_d[index - 1]) - { - // if new representation is not the same as its left neighbor - // save the index at which that representation starts - // representation_index_mask_d gives a unique index to each representation, starting from 1, thus '-1' - starting_index_of_each_representation_d[representation_index_mask_d[index] - 1] = index; - unique_representations_d[representation_index_mask_d[index] - 1] = input_representations_d[index]; - } - } -} - __global__ void find_query_target_matches_kernel(int64_t* const found_target_indices, const representation_t* const query_representations_d, const int64_t n_query_representations, const representation_t* const target_representations_d, const int64_t n_target_representations) { const int64_t i = blockIdx.x * blockDim.x + threadIdx.x; diff --git a/cudamapper/src/matcher_gpu.cuh b/cudamapper/src/matcher_gpu.cuh index b3f58a46c..eeb5bebbf 100644 --- a/cudamapper/src/matcher_gpu.cuh +++ b/cudamapper/src/matcher_gpu.cuh @@ -39,27 +39,6 @@ namespace details namespace matcher_gpu { -/// \brief Creates compressed representation of index -/// -/// Creates two arrays: first one contains a list of unique representations and the second one the index -/// at which that representation occurrs for the first time in the original data. -/// Second element contains one additional elemet at the end, containing the total number of elemets in the original array. -/// -/// For example: -/// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 -/// 0 0 0 0 12 12 12 12 12 12 23 23 23 32 32 32 32 32 46 46 46 -/// ^ ^ ^ ^ ^ ^ -/// gives: -/// 0 12 23 32 46 -/// 0 4 10 13 18 21 -/// -/// \param unique_representations_d empty on input, contains one value of each representation on the output -/// \param first_occurrence_index_d empty on input, index of first occurrence of each representation and additional elemnt on the output -/// \param input_representations_d an array of representaton where representations with the same value stand next to each other -void find_first_occurrences_of_representations(thrust::device_vector& unique_representations_d, - thrust::device_vector& first_occurrence_index_d, - const thrust::device_vector& input_representations_d); - /// \brief Finds the array index of the target representation for each query representation /// /// Takes an array of query representations and an array of target representations @@ -125,47 +104,6 @@ void compute_anchor_starting_indices(thrust::device_vector& anchor const thrust::device_vector& found_target_indices_d, const thrust::device_vector& target_starting_index_of_each_representation_d); -/// \brief Writes 0 to the output array if the value to the left is the same as the current value, 1 otherwise. First element is always 1 -/// -/// For example: -/// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 -/// 0 0 0 0 12 12 12 12 12 12 23 23 23 32 32 32 32 32 46 46 46 -/// gives: -/// 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 -/// -/// \param representations_d -/// \param number_of_elements -/// \param new_value_mask_d generated array -__global__ void create_new_value_mask(const representation_t* const representations_d, - const std::size_t number_of_elements, - std::uint32_t* const new_value_mask_d); - -/// \brief Helper kernel for find_first_occurrences_of_representations -/// -/// Creates two arrays: first one contains a list of unique representations and the second one the index -/// at which that representation occurrs for the first time in the original data. -/// -/// For example: -/// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 -/// 0 0 0 0 12 12 12 12 12 12 23 23 23 32 32 32 32 32 46 46 46 -/// 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 -/// 1 1 1 1 2 2 2 2 2 2 3 3 3 4 4 4 4 4 5 5 5 -/// ^ ^ ^ ^ ^ -/// gives: -/// 0 12 23 32 46 -/// 0 4 10 13 18 -/// -/// \param representation_index_mask_d -/// \param input_representatons_d -/// \param number_of_input_elements -/// \param starting_index_of_each_representation_d -/// \param unique_representations_d -__global__ void find_first_occurrences_of_representations_kernel(const std::uint64_t* const representation_index_mask_d, - const representation_t* const input_representations_d, - const std::size_t number_of_input_elements, - std::uint32_t* const starting_index_of_each_representation_d, - representation_t* const unique_representations_d); - /// \brief Performs a binary search on target_representations_d for each element of query_representations_d and stores the found index (or -1 iff not found) in found_target_indices. /// /// For example: diff --git a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu index 48668203f..c799a9737 100644 --- a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu +++ b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu @@ -770,6 +770,315 @@ namespace details { namespace index_gpu_two_indices { +// ************ Test find_first_occurrences_of_representations ************** + +void test_find_first_occurrences_of_representations(const thrust::host_vector& representations_h, + const thrust::host_vector& expected_starting_index_of_each_representation_h) +{ + const thrust::device_vector representations_d(representations_h); + + thrust::device_vector starting_index_of_each_representation_d; + thrust::device_vector unique_representations_d; + find_first_occurrences_of_representations(unique_representations_d, + starting_index_of_each_representation_d, + representations_d); + + const thrust::host_vector starting_index_of_each_representation_h(starting_index_of_each_representation_d); + + ASSERT_EQ(starting_index_of_each_representation_h.size(), expected_starting_index_of_each_representation_h.size()); + + for (std::size_t i = 0; i < expected_starting_index_of_each_representation_h.size(); ++i) + { + EXPECT_EQ(starting_index_of_each_representation_h[i], expected_starting_index_of_each_representation_h[i]) << "index: " << i; + } +} + +TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representations_small_example) +{ + /// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 + /// 0 0 0 0 12 12 12 12 12 12 23 23 23 32 32 32 32 32 46 46 46 + /// 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 + /// 1 1 1 1 2 2 2 2 2 2 3 3 3 4 4 4 4 4 5 5 5 + /// ^ ^ ^ ^ ^ ^ + /// 0 4 10 13 18 21 + + thrust::host_vector representations_h; + thrust::device_vector expected_starting_index_of_each_representation_h; + representations_h.push_back(0); + expected_starting_index_of_each_representation_h.push_back(0); + representations_h.push_back(0); + representations_h.push_back(0); + representations_h.push_back(0); + representations_h.push_back(12); + expected_starting_index_of_each_representation_h.push_back(4); + representations_h.push_back(12); + representations_h.push_back(12); + representations_h.push_back(12); + representations_h.push_back(12); + representations_h.push_back(12); + representations_h.push_back(23); + expected_starting_index_of_each_representation_h.push_back(10); + representations_h.push_back(23); + representations_h.push_back(23); + representations_h.push_back(32); + expected_starting_index_of_each_representation_h.push_back(13); + representations_h.push_back(32); + representations_h.push_back(32); + representations_h.push_back(32); + representations_h.push_back(32); + representations_h.push_back(46); + expected_starting_index_of_each_representation_h.push_back(18); + representations_h.push_back(46); + representations_h.push_back(46); + expected_starting_index_of_each_representation_h.push_back(21); + + test_find_first_occurrences_of_representations(representations_h, + expected_starting_index_of_each_representation_h); +} + +TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representations_large_example) +{ + const std::uint64_t total_sketch_elements = 10000000; + const std::uint32_t sketch_elements_with_same_representation = 1000; + + thrust::host_vector representations_h; + thrust::device_vector expected_starting_index_of_each_representation_h; + + for (std::size_t i = 0; i < total_sketch_elements; ++i) + { + representations_h.push_back(i / sketch_elements_with_same_representation); + if (i % sketch_elements_with_same_representation == 0) + { + expected_starting_index_of_each_representation_h.push_back(i); + } + } + expected_starting_index_of_each_representation_h.push_back(total_sketch_elements); + + test_find_first_occurrences_of_representations(representations_h, + expected_starting_index_of_each_representation_h); +} + +// ************ Test create_new_value_mask ************** + +void test_create_new_value_mask(const thrust::host_vector& representations_h, + const thrust::host_vector& expected_new_value_mask_h, + std::uint32_t number_of_threads) +{ + const thrust::device_vector representations_d(representations_h); + thrust::device_vector new_value_mask_d(representations_h.size()); + + std::uint32_t number_of_blocks = (representations_h.size() - 1) / number_of_threads + 1; + + create_new_value_mask<<>>(thrust::raw_pointer_cast(representations_d.data()), + representations_d.size(), + thrust::raw_pointer_cast(new_value_mask_d.data())); + + CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); + + const thrust::host_vector new_value_mask_h(new_value_mask_d); + + ASSERT_EQ(new_value_mask_h.size(), expected_new_value_mask_h.size()); + for (std::size_t i = 0; i < expected_new_value_mask_h.size(); ++i) + { + EXPECT_EQ(new_value_mask_h[i], expected_new_value_mask_h[i]) << "index: " << i; + } +} + +TEST(TestCudamapperIndexGPUTwoIndices, test_create_new_value_mask_small_example) +{ + thrust::host_vector representations_h; + thrust::host_vector expected_new_value_mask_h; + representations_h.push_back(0); + expected_new_value_mask_h.push_back(1); + representations_h.push_back(0); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(0); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(0); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(0); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(3); + expected_new_value_mask_h.push_back(1); + representations_h.push_back(3); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(3); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(4); + expected_new_value_mask_h.push_back(1); + representations_h.push_back(5); + expected_new_value_mask_h.push_back(1); + representations_h.push_back(5); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(8); + expected_new_value_mask_h.push_back(1); + representations_h.push_back(8); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(8); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(9); + expected_new_value_mask_h.push_back(1); + representations_h.push_back(9); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(9); + expected_new_value_mask_h.push_back(0); + + std::uint32_t number_of_threads = 3; + + test_create_new_value_mask(representations_h, + expected_new_value_mask_h, + number_of_threads); +} + +TEST(TestCudamapperIndexGPUTwoIndices, test_create_new_value_mask_small_data_large_example) +{ + const std::uint64_t total_sketch_elements = 10000000; + const std::uint32_t sketch_elements_with_same_representation = 1000; + + thrust::host_vector representations_h; + thrust::host_vector expected_new_value_mask_h; + for (std::size_t i = 0; i < total_sketch_elements; ++i) + { + representations_h.push_back(i / sketch_elements_with_same_representation); + if (i % sketch_elements_with_same_representation == 0) + expected_new_value_mask_h.push_back(1); + else + expected_new_value_mask_h.push_back(0); + } + + std::uint32_t number_of_threads = 256; + + test_create_new_value_mask(representations_h, + expected_new_value_mask_h, + number_of_threads); +} + +// ************ Test find_first_occurrences_of_representations_kernel ************** + +void test_find_first_occurrences_of_representations_kernel(const thrust::host_vector& representation_index_mask_h, + const thrust::host_vector& input_representations_h, + const thrust::host_vector& expected_starting_index_of_each_representation_h, + const thrust::host_vector& expected_unique_representations_h, + const std::uint32_t number_of_threads) +{ + const thrust::device_vector representation_index_mask_d(representation_index_mask_h); + const thrust::device_vector input_representations_d(input_representations_h); + ASSERT_EQ(expected_starting_index_of_each_representation_h.size(), representation_index_mask_h.back()); + ASSERT_EQ(expected_unique_representations_h.size(), representation_index_mask_h.back()); + thrust::device_vector starting_index_of_each_representation_d(expected_starting_index_of_each_representation_h.size()); + thrust::device_vector unique_representations_d(expected_starting_index_of_each_representation_h.size()); + + std::uint32_t number_of_blocks = (representation_index_mask_d.size() - 1) / number_of_threads + 1; + + find_first_occurrences_of_representations_kernel<<>>(representation_index_mask_d.data().get(), + input_representations_d.data().get(), + representation_index_mask_d.size(), + starting_index_of_each_representation_d.data().get(), + unique_representations_d.data().get()); + CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); + + const thrust::host_vector starting_index_of_each_representation_h(starting_index_of_each_representation_d); + const thrust::host_vector unique_representations_h(unique_representations_d); + + ASSERT_EQ(starting_index_of_each_representation_h.size(), expected_starting_index_of_each_representation_h.size()); + ASSERT_EQ(unique_representations_h.size(), expected_unique_representations_h.size()); + for (std::size_t i = 0; i < expected_starting_index_of_each_representation_h.size(); ++i) + { + EXPECT_EQ(starting_index_of_each_representation_h[i], expected_starting_index_of_each_representation_h[i]) << "index: " << i; + EXPECT_EQ(unique_representations_h[i], expected_unique_representations_h[i]) << "index: " << i; + } +} + +TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representations_kernel_small_example) +{ + thrust::host_vector representation_index_mask_h; + thrust::host_vector input_representations_h; + thrust::host_vector expected_starting_index_of_each_representation_h; + thrust::host_vector expected_unique_representations_h; + representation_index_mask_h.push_back(1); + input_representations_h.push_back(10); + expected_starting_index_of_each_representation_h.push_back(0); + expected_unique_representations_h.push_back(10); + representation_index_mask_h.push_back(1); + input_representations_h.push_back(10); + representation_index_mask_h.push_back(1); + input_representations_h.push_back(10); + representation_index_mask_h.push_back(1); + input_representations_h.push_back(10); + // + representation_index_mask_h.push_back(2); + input_representations_h.push_back(20); + expected_starting_index_of_each_representation_h.push_back(4); + expected_unique_representations_h.push_back(20); + // + representation_index_mask_h.push_back(3); + input_representations_h.push_back(30); + expected_starting_index_of_each_representation_h.push_back(5); + expected_unique_representations_h.push_back(30); + representation_index_mask_h.push_back(3); + input_representations_h.push_back(30); + representation_index_mask_h.push_back(3); + input_representations_h.push_back(30); + representation_index_mask_h.push_back(3); + input_representations_h.push_back(30); + // + representation_index_mask_h.push_back(4); + input_representations_h.push_back(40); + expected_starting_index_of_each_representation_h.push_back(9); + expected_unique_representations_h.push_back(40); + representation_index_mask_h.push_back(4); + input_representations_h.push_back(40); + representation_index_mask_h.push_back(4); + input_representations_h.push_back(40); + // + representation_index_mask_h.push_back(5); + input_representations_h.push_back(50); + expected_starting_index_of_each_representation_h.push_back(12); + expected_unique_representations_h.push_back(50); + // + representation_index_mask_h.push_back(6); + input_representations_h.push_back(60); + expected_starting_index_of_each_representation_h.push_back(13); + expected_unique_representations_h.push_back(60); + + std::uint32_t number_of_threads = 3; + + test_find_first_occurrences_of_representations_kernel(representation_index_mask_h, + input_representations_h, + expected_starting_index_of_each_representation_h, + expected_unique_representations_h, + number_of_threads); +} + +TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representations_kernel_large_example) +{ + const std::uint64_t total_sketch_elements = 10000000; + const std::uint32_t sketch_elements_with_same_representation = 1000; + + thrust::host_vector representation_index_mask_h; + thrust::host_vector input_representations_h; + thrust::host_vector expected_starting_index_of_each_representation_h; + thrust::host_vector expected_unique_representations_h; + for (std::size_t i = 0; i < total_sketch_elements; ++i) + { + representation_index_mask_h.push_back(i / sketch_elements_with_same_representation + 1); + input_representations_h.push_back(representation_index_mask_h.back() * 10); + if (i % sketch_elements_with_same_representation == 0) + { + expected_starting_index_of_each_representation_h.push_back(i); + expected_unique_representations_h.push_back(input_representations_h.back()); + } + } + + std::uint32_t number_of_threads = 256; + + test_find_first_occurrences_of_representations_kernel(representation_index_mask_h, + input_representations_h, + expected_starting_index_of_each_representation_h, + expected_unique_representations_h, + number_of_threads); +} + // ************ Test copy_rest_to_separate_arrays ************** template diff --git a/cudamapper/tests/Test_CudamapperMatcherGPU.cu b/cudamapper/tests/Test_CudamapperMatcherGPU.cu index 343239056..d3dd0d3f7 100644 --- a/cudamapper/tests/Test_CudamapperMatcherGPU.cu +++ b/cudamapper/tests/Test_CudamapperMatcherGPU.cu @@ -23,309 +23,6 @@ namespace claragenomics namespace cudamapper { - -void test_create_new_value_mask(const thrust::host_vector& representations_h, - const thrust::host_vector& expected_new_value_mask_h, - std::uint32_t number_of_threads) -{ - const thrust::device_vector representations_d(representations_h); - thrust::device_vector new_value_mask_d(representations_h.size()); - - std::uint32_t number_of_blocks = (representations_h.size() - 1) / number_of_threads + 1; - - details::matcher_gpu::create_new_value_mask<<>>(thrust::raw_pointer_cast(representations_d.data()), - representations_d.size(), - thrust::raw_pointer_cast(new_value_mask_d.data())); - - CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); - - const thrust::host_vector new_value_mask_h(new_value_mask_d); - - ASSERT_EQ(new_value_mask_h.size(), expected_new_value_mask_h.size()); - for (std::size_t i = 0; i < expected_new_value_mask_h.size(); ++i) - { - EXPECT_EQ(new_value_mask_h[i], expected_new_value_mask_h[i]) << "index: " << i; - } -} - -TEST(TestCudamapperMatcherGPU, test_create_new_value_mask_small_example) -{ - thrust::host_vector representations_h; - thrust::host_vector expected_new_value_mask_h; - representations_h.push_back(0); - expected_new_value_mask_h.push_back(1); - representations_h.push_back(0); - expected_new_value_mask_h.push_back(0); - representations_h.push_back(0); - expected_new_value_mask_h.push_back(0); - representations_h.push_back(0); - expected_new_value_mask_h.push_back(0); - representations_h.push_back(0); - expected_new_value_mask_h.push_back(0); - representations_h.push_back(3); - expected_new_value_mask_h.push_back(1); - representations_h.push_back(3); - expected_new_value_mask_h.push_back(0); - representations_h.push_back(3); - expected_new_value_mask_h.push_back(0); - representations_h.push_back(4); - expected_new_value_mask_h.push_back(1); - representations_h.push_back(5); - expected_new_value_mask_h.push_back(1); - representations_h.push_back(5); - expected_new_value_mask_h.push_back(0); - representations_h.push_back(8); - expected_new_value_mask_h.push_back(1); - representations_h.push_back(8); - expected_new_value_mask_h.push_back(0); - representations_h.push_back(8); - expected_new_value_mask_h.push_back(0); - representations_h.push_back(9); - expected_new_value_mask_h.push_back(1); - representations_h.push_back(9); - expected_new_value_mask_h.push_back(0); - representations_h.push_back(9); - expected_new_value_mask_h.push_back(0); - - std::uint32_t number_of_threads = 3; - - test_create_new_value_mask(representations_h, - expected_new_value_mask_h, - number_of_threads); -} - -TEST(TestCudamapperMatcherGPU, test_create_new_value_mask_small_data_large_example) -{ - const std::uint64_t total_sketch_elements = 10000000; - const std::uint32_t sketch_elements_with_same_representation = 1000; - - thrust::host_vector representations_h; - thrust::host_vector expected_new_value_mask_h; - for (std::size_t i = 0; i < total_sketch_elements; ++i) - { - representations_h.push_back(i / sketch_elements_with_same_representation); - if (i % sketch_elements_with_same_representation == 0) - expected_new_value_mask_h.push_back(1); - else - expected_new_value_mask_h.push_back(0); - } - - std::uint32_t number_of_threads = 256; - - test_create_new_value_mask(representations_h, - expected_new_value_mask_h, - number_of_threads); -} -void test_find_first_occurrences_of_representations_kernel(const thrust::host_vector& representation_index_mask_h, - const thrust::host_vector& input_representations_h, - const thrust::host_vector& expected_starting_index_of_each_representation_h, - const thrust::host_vector& expected_unique_representations_h, - const std::uint32_t number_of_threads) -{ - const thrust::device_vector representation_index_mask_d(representation_index_mask_h); - const thrust::device_vector input_representations_d(input_representations_h); - ASSERT_EQ(expected_starting_index_of_each_representation_h.size(), representation_index_mask_h.back()); - ASSERT_EQ(expected_unique_representations_h.size(), representation_index_mask_h.back()); - thrust::device_vector starting_index_of_each_representation_d(expected_starting_index_of_each_representation_h.size()); - thrust::device_vector unique_representations_d(expected_starting_index_of_each_representation_h.size()); - - std::uint32_t number_of_blocks = (representation_index_mask_d.size() - 1) / number_of_threads + 1; - - details::matcher_gpu::find_first_occurrences_of_representations_kernel<<>>(representation_index_mask_d.data().get(), - input_representations_d.data().get(), - representation_index_mask_d.size(), - starting_index_of_each_representation_d.data().get(), - unique_representations_d.data().get()); - CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); - - const thrust::host_vector starting_index_of_each_representation_h(starting_index_of_each_representation_d); - const thrust::host_vector unique_representations_h(unique_representations_d); - - ASSERT_EQ(starting_index_of_each_representation_h.size(), expected_starting_index_of_each_representation_h.size()); - ASSERT_EQ(unique_representations_h.size(), expected_unique_representations_h.size()); - for (std::size_t i = 0; i < expected_starting_index_of_each_representation_h.size(); ++i) - { - EXPECT_EQ(starting_index_of_each_representation_h[i], expected_starting_index_of_each_representation_h[i]) << "index: " << i; - EXPECT_EQ(unique_representations_h[i], expected_unique_representations_h[i]) << "index: " << i; - } -} - -TEST(TestCudamapperMatcherGPU, test_find_first_occurrences_of_representations_kernel_small_example) -{ - thrust::host_vector representation_index_mask_h; - thrust::host_vector input_representations_h; - thrust::host_vector expected_starting_index_of_each_representation_h; - thrust::host_vector expected_unique_representations_h; - representation_index_mask_h.push_back(1); - input_representations_h.push_back(10); - expected_starting_index_of_each_representation_h.push_back(0); - expected_unique_representations_h.push_back(10); - representation_index_mask_h.push_back(1); - input_representations_h.push_back(10); - representation_index_mask_h.push_back(1); - input_representations_h.push_back(10); - representation_index_mask_h.push_back(1); - input_representations_h.push_back(10); - // - representation_index_mask_h.push_back(2); - input_representations_h.push_back(20); - expected_starting_index_of_each_representation_h.push_back(4); - expected_unique_representations_h.push_back(20); - // - representation_index_mask_h.push_back(3); - input_representations_h.push_back(30); - expected_starting_index_of_each_representation_h.push_back(5); - expected_unique_representations_h.push_back(30); - representation_index_mask_h.push_back(3); - input_representations_h.push_back(30); - representation_index_mask_h.push_back(3); - input_representations_h.push_back(30); - representation_index_mask_h.push_back(3); - input_representations_h.push_back(30); - // - representation_index_mask_h.push_back(4); - input_representations_h.push_back(40); - expected_starting_index_of_each_representation_h.push_back(9); - expected_unique_representations_h.push_back(40); - representation_index_mask_h.push_back(4); - input_representations_h.push_back(40); - representation_index_mask_h.push_back(4); - input_representations_h.push_back(40); - // - representation_index_mask_h.push_back(5); - input_representations_h.push_back(50); - expected_starting_index_of_each_representation_h.push_back(12); - expected_unique_representations_h.push_back(50); - // - representation_index_mask_h.push_back(6); - input_representations_h.push_back(60); - expected_starting_index_of_each_representation_h.push_back(13); - expected_unique_representations_h.push_back(60); - - std::uint32_t number_of_threads = 3; - - test_find_first_occurrences_of_representations_kernel(representation_index_mask_h, - input_representations_h, - expected_starting_index_of_each_representation_h, - expected_unique_representations_h, - number_of_threads); -} - -TEST(TestCudamapperMatcherGPU, test_find_first_occurrences_of_representations_kernel_large_example) -{ - const std::uint64_t total_sketch_elements = 10000000; - const std::uint32_t sketch_elements_with_same_representation = 1000; - - thrust::host_vector representation_index_mask_h; - thrust::host_vector input_representations_h; - thrust::host_vector expected_starting_index_of_each_representation_h; - thrust::host_vector expected_unique_representations_h; - for (std::size_t i = 0; i < total_sketch_elements; ++i) - { - representation_index_mask_h.push_back(i / sketch_elements_with_same_representation + 1); - input_representations_h.push_back(representation_index_mask_h.back() * 10); - if (i % sketch_elements_with_same_representation == 0) - { - expected_starting_index_of_each_representation_h.push_back(i); - expected_unique_representations_h.push_back(input_representations_h.back()); - } - } - - std::uint32_t number_of_threads = 256; - - test_find_first_occurrences_of_representations_kernel(representation_index_mask_h, - input_representations_h, - expected_starting_index_of_each_representation_h, - expected_unique_representations_h, - number_of_threads); -} - -void test_find_first_occurrences_of_representations(const thrust::host_vector& representations_h, - const thrust::host_vector& expected_starting_index_of_each_representation_h) -{ - const thrust::device_vector representations_d(representations_h); - - thrust::device_vector starting_index_of_each_representation_d; - thrust::device_vector unique_representations_d; - details::matcher_gpu::find_first_occurrences_of_representations(unique_representations_d, - starting_index_of_each_representation_d, - representations_d); - - const thrust::host_vector starting_index_of_each_representation_h(starting_index_of_each_representation_d); - - ASSERT_EQ(starting_index_of_each_representation_h.size(), expected_starting_index_of_each_representation_h.size()); - - for (std::size_t i = 0; i < expected_starting_index_of_each_representation_h.size(); ++i) - { - EXPECT_EQ(starting_index_of_each_representation_h[i], expected_starting_index_of_each_representation_h[i]) << "index: " << i; - } -} - -TEST(TestCudamapperMatcherGPU, test_find_first_occurrences_of_representations_small_example) -{ - /// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 - /// 0 0 0 0 12 12 12 12 12 12 23 23 23 32 32 32 32 32 46 46 46 - /// 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 - /// 1 1 1 1 2 2 2 2 2 2 3 3 3 4 4 4 4 4 5 5 5 - /// ^ ^ ^ ^ ^ ^ - /// 0 4 10 13 18 21 - - thrust::host_vector representations_h; - thrust::device_vector expected_starting_index_of_each_representation_h; - representations_h.push_back(0); - expected_starting_index_of_each_representation_h.push_back(0); - representations_h.push_back(0); - representations_h.push_back(0); - representations_h.push_back(0); - representations_h.push_back(12); - expected_starting_index_of_each_representation_h.push_back(4); - representations_h.push_back(12); - representations_h.push_back(12); - representations_h.push_back(12); - representations_h.push_back(12); - representations_h.push_back(12); - representations_h.push_back(23); - expected_starting_index_of_each_representation_h.push_back(10); - representations_h.push_back(23); - representations_h.push_back(23); - representations_h.push_back(32); - expected_starting_index_of_each_representation_h.push_back(13); - representations_h.push_back(32); - representations_h.push_back(32); - representations_h.push_back(32); - representations_h.push_back(32); - representations_h.push_back(46); - expected_starting_index_of_each_representation_h.push_back(18); - representations_h.push_back(46); - representations_h.push_back(46); - expected_starting_index_of_each_representation_h.push_back(21); - - test_find_first_occurrences_of_representations(representations_h, - expected_starting_index_of_each_representation_h); -} - -TEST(TestCudamapperMatcherGPU, test_find_first_occurrences_of_representations_large_example) -{ - const std::uint64_t total_sketch_elements = 10000000; - const std::uint32_t sketch_elements_with_same_representation = 1000; - - thrust::host_vector representations_h; - thrust::device_vector expected_starting_index_of_each_representation_h; - - for (std::size_t i = 0; i < total_sketch_elements; ++i) - { - representations_h.push_back(i / sketch_elements_with_same_representation); - if (i % sketch_elements_with_same_representation == 0) - { - expected_starting_index_of_each_representation_h.push_back(i); - } - } - expected_starting_index_of_each_representation_h.push_back(total_sketch_elements); - - test_find_first_occurrences_of_representations(representations_h, - expected_starting_index_of_each_representation_h); -} - void test_find_query_target_matches(const thrust::host_vector& query_representations_h, const thrust::host_vector& target_representations_h, const thrust::host_vector& expected_found_target_indices_h) From f94c71e80871bc20a5d463b9ca8396360ac0656b Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Thu, 7 Nov 2019 15:48:42 +0100 Subject: [PATCH 036/128] Creating compressed index in indexer, added methods to acceess unique_representatons and first_occurrence_of_representations --- .../cudamapper/index_two_indices.hpp | 8 + cudamapper/src/index_gpu_two_indices.cuh | 39 ++- .../Test_CudamapperIndexGPUTwoIndices.cu | 289 ++++++++++++------ 3 files changed, 245 insertions(+), 91 deletions(-) diff --git a/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp b/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp index 983826f83..3d4682f78 100644 --- a/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp +++ b/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp @@ -52,6 +52,14 @@ class IndexTwoIndices /// \return read name of read with the given read_id virtual const std::string& read_id_to_read_name(const read_id_t read_id) const = 0; + /// \brief returns an array where each representation is recorder only once, sorted by representation + /// \return an array where each representation is recorder only once, sorted by representation + virtual const thrust::device_vector& unique_representations() const = 0; + + /// \brief returns first occurrence of corresponding representation from unique_representations() in data arrays + /// \return first occurrence of corresponding representation from unique_representations() in data arrays + virtual const thrust::device_vector& first_occurrence_of_representations() const = 0; + /// \brief returns read length for the read with the gived read_id /// \param read_id /// \return read length for the read with the gived read_id diff --git a/cudamapper/src/index_gpu_two_indices.cuh b/cudamapper/src/index_gpu_two_indices.cuh index 5b0c5f64e..b4187bbed 100644 --- a/cudamapper/src/index_gpu_two_indices.cuh +++ b/cudamapper/src/index_gpu_two_indices.cuh @@ -29,11 +29,16 @@ namespace cudamapper { /// IndexGPU - Contains sketch elements grouped by representation and by read id within the representation /// -/// Class contains four separate data arrays: representations, read_ids, positions_in_reads and directions_of_reads. +/// Sketch elements are separated in four data arrays: representations, read_ids, positions_in_reads and directions_of_reads. /// Elements of these four arrays with the same index represent one sketch element -/// (representation, read_id of the read it belongs to, position in that read of the first basepair of sketch element and whether it is forward or reverse complement representation). +/// (representation, read_id of the read it belongs to, position in that read of the first basepair of sketch element and whether it is +/// forward or reverse complement representation). /// -/// Elements of data arrays are grouped by sketch element representation and within those groups by read_id. Both representations and read_ids within representations are sorted in ascending order +/// Elements of data arrays are grouped by sketch element representation and within those groups by read_id. Both representations and read_ids within +/// representations are sorted in ascending order +/// +/// In addition to this the class contains an array where each representation is recorder only once (unique_representations) sorted by representation +/// and an array in which the index of first occurrence of that representation is recorded /// /// \tparam SketchElementImpl any implementation of SketchElement template @@ -72,6 +77,14 @@ public: /// \return an array of directions in which sketch elements were read const thrust::device_vector& directions_of_reads() const override; + /// \brief returns an array where each representation is recorder only once, sorted by representation + /// \return an array where each representation is recorder only once, sorted by representation + const thrust::device_vector& unique_representations() const override; + + /// \brief returns first occurrence of corresponding representation from unique_representations() in data arrays + /// \return first occurrence of corresponding representation from unique_representations() in data arrays + const thrust::device_vector& first_occurrence_of_representations() const override; + /// \brief returns read name of read with the given read_id /// \param read_id /// \return read name of read with the given read_id @@ -97,6 +110,9 @@ private: thrust::device_vector positions_in_reads_d_; thrust::device_vector directions_of_reads_d_; + thrust::device_vector unique_representations_d_; + thrust::device_vector first_occurrence_of_representations_d_; + std::vector read_id_to_read_name_; std::vector read_id_to_read_length_; @@ -251,6 +267,18 @@ const thrust::device_vector +const thrust::device_vector& IndexGPUTwoIndices::unique_representations() const +{ + return unique_representations_d_; +} + +template +const thrust::device_vector& IndexGPUTwoIndices::first_occurrence_of_representations() const +{ + return first_occurrence_of_representations_d_; +} + template const std::string& IndexGPUTwoIndices::read_id_to_read_name(const read_id_t read_id) const { @@ -400,6 +428,11 @@ void IndexGPUTwoIndices::generate_index(io::FastaParser* pars directions_of_reads_d_.data().get(), representations_d_.size()); CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); + + // now generate the index elements + details::index_gpu_two_indices::find_first_occurrences_of_representations(unique_representations_d_, + first_occurrence_of_representations_d_, + representations_d_); } } // namespace cudamapper diff --git a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu index c799a9737..652cd9653 100644 --- a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu +++ b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu @@ -29,10 +29,12 @@ void test_function(const std::string& filename, const read_id_t past_the_last_read_id, const std::uint64_t kmer_size, const std::uint64_t window_size, - const thrust::host_vector& expected_representations, - const thrust::host_vector& expected_positions_in_reads, - const thrust::host_vector& expected_read_ids, - const thrust::host_vector& expected_directions_of_reads, + const std::vector& expected_representations, + const std::vector& expected_positions_in_reads, + const std::vector& expected_read_ids, + const std::vector& expected_directions_of_reads, + const std::vector& expected_unique_representations, + const std::vector& expected_first_occurrence_of_representations, const std::vector& expected_read_id_to_read_name, const std::vector& expected_read_id_to_read_length, const std::uint64_t expected_number_of_reads) @@ -81,6 +83,21 @@ void test_function(const std::string& filename, EXPECT_EQ(read_ids_h[i], expected_read_ids[i]) << "i: " << i; EXPECT_EQ(directions_of_reads_h[i], expected_directions_of_reads[i]) << "i: " << i; } + + const thrust::device_vector unique_representations_d = index.unique_representations(); + const thrust::device_vector first_occurrence_of_representations_d = index.first_occurrence_of_representations(); + const thrust::host_vector unique_representations_h(unique_representations_d); + const thrust::host_vector first_occurrence_of_representations_h(first_occurrence_of_representations_d); + ASSERT_EQ(expected_unique_representations.size() + 1, expected_first_occurrence_of_representations.size()); + ASSERT_EQ(unique_representations_h.size(), expected_unique_representations.size()); + ASSERT_EQ(first_occurrence_of_representations_h.size(), expected_first_occurrence_of_representations.size()); + for (std::size_t i = 0; i < expected_unique_representations.size(); ++i) + { + EXPECT_EQ(expected_unique_representations[i], unique_representations_h[i]) << "index: " << i; + EXPECT_EQ(expected_first_occurrence_of_representations[i], first_occurrence_of_representations_h[i]) << "index: " << i; + } + EXPECT_EQ(expected_first_occurrence_of_representations.back(), expected_representations.size()); + } TEST(TestCudamapperIndexGPUTwoIndices, GATT_4_1) @@ -105,10 +122,17 @@ TEST(TestCudamapperIndexGPUTwoIndices, GATT_4_1) std::vector expected_positions_in_reads; std::vector expected_read_ids; std::vector expected_directions_of_reads; + std::vector expected_unique_representations; + std::vector expected_first_occurrence_of_representations; + expected_representations.push_back(0b1101); expected_positions_in_reads.push_back(0); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); + expected_unique_representations.push_back(0b1101); + expected_first_occurrence_of_representations.push_back(0); + + expected_first_occurrence_of_representations.push_back(1); test_function(filename, 0, @@ -119,6 +143,8 @@ TEST(TestCudamapperIndexGPUTwoIndices, GATT_4_1) expected_positions_in_reads, expected_read_ids, expected_directions_of_reads, + expected_unique_representations, + expected_first_occurrence_of_representations, expected_read_id_to_read_name, expected_read_id_to_read_length, 1); @@ -167,19 +193,29 @@ TEST(TestCudamapperIndexGPUTwoIndices, GATT_2_3) std::vector expected_positions_in_reads; std::vector expected_read_ids; std::vector expected_directions_of_reads; + std::vector expected_unique_representations; + std::vector expected_first_occurrence_of_representations; expected_representations.push_back(0b0000); // AA(2r0) expected_positions_in_reads.push_back(2); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); + expected_unique_representations.push_back(0b0000); + expected_first_occurrence_of_representations.push_back(0); expected_representations.push_back(0b0011); // AT(1f0) expected_positions_in_reads.push_back(1); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b0011); + expected_first_occurrence_of_representations.push_back(1); expected_representations.push_back(0b1000); // GA(0f0) expected_positions_in_reads.push_back(0); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b1000); + expected_first_occurrence_of_representations.push_back(2); + + expected_first_occurrence_of_representations.push_back(3); test_function(filename, 0, @@ -190,6 +226,8 @@ TEST(TestCudamapperIndexGPUTwoIndices, GATT_2_3) expected_positions_in_reads, expected_read_ids, expected_directions_of_reads, + expected_unique_representations, + expected_first_occurrence_of_representations, expected_read_id_to_read_name, expected_read_id_to_read_length, 1); @@ -216,6 +254,8 @@ TEST(TestCudamapperIndexGPUTwoIndices, CCCATACC_2_8) std::vector expected_positions_in_reads; std::vector expected_read_ids; std::vector expected_directions_of_reads; + std::vector expected_unique_representations; + std::vector expected_first_occurrence_of_representations; test_function(filename, 0, @@ -226,96 +266,98 @@ TEST(TestCudamapperIndexGPUTwoIndices, CCCATACC_2_8) expected_positions_in_reads, expected_read_ids, expected_directions_of_reads, + expected_unique_representations, + expected_first_occurrence_of_representations, expected_read_id_to_read_name, expected_read_id_to_read_length, 0); } // TODO: Cover this case as well -/*TEST(TestCudamapperIndexGPUTwoIndices, CATCAAG_AAGCTA_3_5) -{ - // *** One Read is shorter than one full window, the other is not *** - - // >read_0 - // CATCAAG - // >read_1 - // AAGCTA - - // ** CATCAAG ** - - // kmer representation: forward, reverse - // CAT: 103 <032> - // ATC: <031> 203 - // TCA: <310> 320 - // CAA: <100> 332 - // AAG: <002> 133 - - // front end minimizers: representation, position_in_read, direction, read_id - // CAT : 032 0 R 0 - // CATC : 031 1 F 0 - // CATCA : 031 1 F 0 - // CATCAA: 031 1 F 0 - - // central minimizers - // CATCAAG: 002 4 F 0 - - // back end minimizers - // ATCAAG: 002 4 F 0 - // TCAAG : 002 4 F 0 - // CAAG : 002 4 F 0 - // AAG : 002 4 F 0 - - // ** AAGCTA ** - // ** read does not fit one array ** - - // All minimizers: ATG(0r0), ATC(1f0), AAG(4f0) - - // (2r1) means position 2, reverse direction, read 1 - // (1,2) means array block start at element 1 and has 2 elements - - // 0 1 2 - // data arrays: AAG(4f0), ATC(1f0), ATG(0r0) - - const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/catcaag_aagcta.fasta"; - const std::uint64_t minimizer_size = 3; - const std::uint64_t window_size = 5; - - std::vector expected_read_id_to_read_name; - expected_read_id_to_read_name.push_back("read_0"); - - std::vector expected_read_id_to_read_length; - expected_read_id_to_read_length.push_back(7); - - std::vector expected_representations; - std::vector expected_positions_in_reads; - std::vector expected_read_ids; - std::vector expected_directions_of_reads; - expected_representations.push_back(0b000010); // AAG(4f0) - expected_positions_in_reads.push_back(4); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b001101); // ATC(1f0) - expected_positions_in_reads.push_back(1); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b001110); // ATG(0r0) - expected_positions_in_reads.push_back(0); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); - - test_function(filename, - 0, - 2, - minimizer_size, - window_size, - expected_representations, - expected_positions_in_reads, - expected_read_ids, - expected_directions_of_reads, - expected_read_id_to_read_name, - expected_read_id_to_read_length, - 1); // <- only one read goes into index, the other is too short -}*/ +//TEST(TestCudamapperIndexGPUTwoIndices, CATCAAG_AAGCTA_3_5) +//{ +// // *** One Read is shorter than one full window, the other is not *** +// +// // >read_0 +// // CATCAAG +// // >read_1 +// // AAGCTA +// +// // ** CATCAAG ** +// +// // kmer representation: forward, reverse +// // CAT: 103 <032> +// // ATC: <031> 203 +// // TCA: <310> 320 +// // CAA: <100> 332 +// // AAG: <002> 133 +// +// // front end minimizers: representation, position_in_read, direction, read_id +// // CAT : 032 0 R 0 +// // CATC : 031 1 F 0 +// // CATCA : 031 1 F 0 +// // CATCAA: 031 1 F 0 +// +// // central minimizers +// // CATCAAG: 002 4 F 0 +// +// // back end minimizers +// // ATCAAG: 002 4 F 0 +// // TCAAG : 002 4 F 0 +// // CAAG : 002 4 F 0 +// // AAG : 002 4 F 0 +// +// // ** AAGCTA ** +// // ** read does not fit one array ** +// +// // All minimizers: ATG(0r0), ATC(1f0), AAG(4f0) +// +// // (2r1) means position 2, reverse direction, read 1 +// // (1,2) means array block start at element 1 and has 2 elements +// +// // 0 1 2 +// // data arrays: AAG(4f0), ATC(1f0), ATG(0r0) +// +// const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/catcaag_aagcta.fasta"; +// const std::uint64_t minimizer_size = 3; +// const std::uint64_t window_size = 5; +// +// std::vector expected_read_id_to_read_name; +// expected_read_id_to_read_name.push_back("read_0"); +// +// std::vector expected_read_id_to_read_length; +// expected_read_id_to_read_length.push_back(7); +// +// std::vector expected_representations; +// std::vector expected_positions_in_reads; +// std::vector expected_read_ids; +// std::vector expected_directions_of_reads; +// expected_representations.push_back(0b000010); // AAG(4f0) +// expected_positions_in_reads.push_back(4); +// expected_read_ids.push_back(0); +// expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); +// expected_representations.push_back(0b001101); // ATC(1f0) +// expected_positions_in_reads.push_back(1); +// expected_read_ids.push_back(0); +// expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); +// expected_representations.push_back(0b001110); // ATG(0r0) +// expected_positions_in_reads.push_back(0); +// expected_read_ids.push_back(0); +// expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); +// +// test_function(filename, +// 0, +// 2, +// minimizer_size, +// window_size, +// expected_representations, +// expected_positions_in_reads, +// expected_read_ids, +// expected_directions_of_reads, +// expected_read_id_to_read_name, +// expected_read_id_to_read_length, +// 1); // <- only one read goes into index, the other is too short +//} TEST(TestCudamapperIndexGPUTwoIndices, CCCATACC_3_5) { @@ -370,26 +412,41 @@ TEST(TestCudamapperIndexGPUTwoIndices, CCCATACC_3_5) std::vector expected_positions_in_reads; std::vector expected_read_ids; std::vector expected_directions_of_reads; + std::vector expected_unique_representations; + std::vector expected_first_occurrence_of_representations; + expected_representations.push_back(0b000101); // ACC(5f0) expected_positions_in_reads.push_back(5); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b000101); + expected_first_occurrence_of_representations.push_back(0); expected_representations.push_back(0b001100); // ATA(3f0) expected_positions_in_reads.push_back(3); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b001100); + expected_first_occurrence_of_representations.push_back(1); expected_representations.push_back(0b001110); // ATG(2r0) expected_positions_in_reads.push_back(2); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); + expected_unique_representations.push_back(0b001110); + expected_first_occurrence_of_representations.push_back(2); expected_representations.push_back(0b010100); // CCA(1f0) expected_positions_in_reads.push_back(1); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b010100); + expected_first_occurrence_of_representations.push_back(3); expected_representations.push_back(0b010101); // CCC(0f0) expected_positions_in_reads.push_back(0); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b010101); + expected_first_occurrence_of_representations.push_back(4); + + expected_first_occurrence_of_representations.push_back(5); test_function(filename, 0, @@ -400,6 +457,8 @@ TEST(TestCudamapperIndexGPUTwoIndices, CCCATACC_3_5) expected_positions_in_reads, expected_read_ids, expected_directions_of_reads, + expected_unique_representations, + expected_first_occurrence_of_representations, expected_read_id_to_read_name, expected_read_id_to_read_length, 1); @@ -478,11 +537,15 @@ TEST(TestCudamapperIndexGPUTwoIndices, CATCAAG_AAGCTA_3_2) std::vector expected_positions_in_reads; std::vector expected_read_ids; std::vector expected_directions_of_reads; + std::vector expected_unique_representations; + std::vector expected_first_occurrence_of_representations; expected_representations.push_back(0b000010); // AAG(4f0) expected_positions_in_reads.push_back(4); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b0010); + expected_first_occurrence_of_representations.push_back(0); expected_representations.push_back(0b000010); // AAG(0f1) expected_positions_in_reads.push_back(0); expected_read_ids.push_back(1); @@ -491,22 +554,34 @@ TEST(TestCudamapperIndexGPUTwoIndices, CATCAAG_AAGCTA_3_2) expected_positions_in_reads.push_back(2); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); + expected_unique_representations.push_back(0b001001); + expected_first_occurrence_of_representations.push_back(2); expected_representations.push_back(0b001101); // ATC(1f0) expected_positions_in_reads.push_back(1); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b001101); + expected_first_occurrence_of_representations.push_back(3); expected_representations.push_back(0b001110); // ATG(0r0) expected_positions_in_reads.push_back(0); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); + expected_unique_representations.push_back(0b001110); + expected_first_occurrence_of_representations.push_back(4); expected_representations.push_back(0b010000); // CAA(3f0) expected_positions_in_reads.push_back(3); expected_read_ids.push_back(0); + expected_unique_representations.push_back(0b010000); + expected_first_occurrence_of_representations.push_back(5); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); expected_representations.push_back(0b011100); // CTA(3f1) expected_positions_in_reads.push_back(3); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b011100); + expected_first_occurrence_of_representations.push_back(6); + + expected_first_occurrence_of_representations.push_back(7); test_function(filename, 0, @@ -517,6 +592,8 @@ TEST(TestCudamapperIndexGPUTwoIndices, CATCAAG_AAGCTA_3_2) expected_positions_in_reads, expected_read_ids, expected_directions_of_reads, + expected_unique_representations, + expected_first_occurrence_of_representations, expected_read_id_to_read_name, expected_read_id_to_read_length, 2); @@ -607,10 +684,15 @@ TEST(TestCudamapperIndexGPUTwoIndices, AAAACTGAA_GCCAAAG_2_3) std::vector expected_positions_in_reads; std::vector expected_read_ids; std::vector expected_directions_of_reads; + std::vector expected_unique_representations; + std::vector expected_first_occurrence_of_representations; + expected_representations.push_back(0b0000); // AA(0f0) expected_positions_in_reads.push_back(0); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b0000); + expected_first_occurrence_of_representations.push_back(0); expected_representations.push_back(0b0000); // AA(1f0) expected_positions_in_reads.push_back(1); expected_read_ids.push_back(0); @@ -635,10 +717,14 @@ TEST(TestCudamapperIndexGPUTwoIndices, AAAACTGAA_GCCAAAG_2_3) expected_positions_in_reads.push_back(3); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b0001); + expected_first_occurrence_of_representations.push_back(6); expected_representations.push_back(0b0010); // AG(4r0) expected_positions_in_reads.push_back(4); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); + expected_unique_representations.push_back(0b0010); + expected_first_occurrence_of_representations.push_back(7); expected_representations.push_back(0b0010); // AG(5f1) expected_positions_in_reads.push_back(5); expected_read_ids.push_back(1); @@ -647,14 +733,22 @@ TEST(TestCudamapperIndexGPUTwoIndices, AAAACTGAA_GCCAAAG_2_3) expected_positions_in_reads.push_back(2); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b0100); + expected_first_occurrence_of_representations.push_back(9); expected_representations.push_back(0b0101); // CC(1f1) expected_positions_in_reads.push_back(1); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b0101); + expected_first_occurrence_of_representations.push_back(10); expected_representations.push_back(0b1001); // GC(0f1) expected_positions_in_reads.push_back(0); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b1001); + expected_first_occurrence_of_representations.push_back(11); + + expected_first_occurrence_of_representations.push_back(12); test_function(filename, 0, @@ -665,6 +759,8 @@ TEST(TestCudamapperIndexGPUTwoIndices, AAAACTGAA_GCCAAAG_2_3) expected_positions_in_reads, expected_read_ids, expected_directions_of_reads, + expected_unique_representations, + expected_first_occurrence_of_representations, expected_read_id_to_read_name, expected_read_id_to_read_length, 2); @@ -727,10 +823,15 @@ TEST(TestCudamapperIndexGPUTwoIndices, AAAACTGAA_GCCAAAG_2_3_only_second_read_in std::vector expected_positions_in_reads; std::vector expected_read_ids; std::vector expected_directions_of_reads; + std::vector expected_unique_representations; + std::vector expected_first_occurrence_of_representations; + expected_representations.push_back(0b0000); // AA(3f1) expected_positions_in_reads.push_back(3); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b00); + expected_first_occurrence_of_representations.push_back(0); expected_representations.push_back(0b0000); // AA(4f1) expected_positions_in_reads.push_back(4); expected_read_ids.push_back(1); @@ -739,18 +840,28 @@ TEST(TestCudamapperIndexGPUTwoIndices, AAAACTGAA_GCCAAAG_2_3_only_second_read_in expected_positions_in_reads.push_back(5); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b0010); + expected_first_occurrence_of_representations.push_back(2); expected_representations.push_back(0b0100); // CA(2f1) expected_positions_in_reads.push_back(2); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b0100); + expected_first_occurrence_of_representations.push_back(3); expected_representations.push_back(0b0101); // CC(1f1) expected_positions_in_reads.push_back(1); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b0101); + expected_first_occurrence_of_representations.push_back(4); expected_representations.push_back(0b1001); // GC(0f1) expected_positions_in_reads.push_back(0); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b1001); + expected_first_occurrence_of_representations.push_back(5); + + expected_first_occurrence_of_representations.push_back(6); test_function(filename, 1, // <- only take second read @@ -761,6 +872,8 @@ TEST(TestCudamapperIndexGPUTwoIndices, AAAACTGAA_GCCAAAG_2_3_only_second_read_in expected_positions_in_reads, expected_read_ids, expected_directions_of_reads, + expected_unique_representations, + expected_first_occurrence_of_representations, expected_read_id_to_read_name, expected_read_id_to_read_length, 1); From e91fae6a373775eca58c69e0f0bf5757e124b6fd Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Thu, 7 Nov 2019 10:27:10 -0500 Subject: [PATCH 037/128] [evaluate_paf] skipping matches within same read --- pyclaragenomics/bin/evaluate_paf | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyclaragenomics/bin/evaluate_paf b/pyclaragenomics/bin/evaluate_paf index 3d9405ef8..b1d85e3a5 100755 --- a/pyclaragenomics/bin/evaluate_paf +++ b/pyclaragenomics/bin/evaluate_paf @@ -31,6 +31,9 @@ def evaluate_paf(truth_paf_filepath, test_paf_filepath, pos_tolerance=400): num_true_overlaps = 0 for truth_overlap in pafio.read_paf(truth_paf_filepath): + if truth_overlap.query_sequence_name == truth_overlap.target_sequence_name: + print("Skipping") + continue key = truth_overlap.query_sequence_name + truth_overlap.target_sequence_name truth_overlaps[key].append(truth_overlap) From 7f89a17b4bf29b1cf83567dc5fef679af63ccd8c Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Fri, 8 Nov 2019 11:19:31 -0500 Subject: [PATCH 038/128] [evaluate_paf] refactor and clean up code --- pyclaragenomics/bin/evaluate_paf | 76 +++++++++++++++++++------------- 1 file changed, 46 insertions(+), 30 deletions(-) diff --git a/pyclaragenomics/bin/evaluate_paf b/pyclaragenomics/bin/evaluate_paf index b1d85e3a5..c05b290b4 100755 --- a/pyclaragenomics/bin/evaluate_paf +++ b/pyclaragenomics/bin/evaluate_paf @@ -15,13 +15,30 @@ import argparse from claragenomics.io import pafio from collections import defaultdict +def match_overlaps(query_0, query_1, target_0, target_1, pos_tolerance): + """Given two sets of query and target ranges, check if the query and target ranges + fall within a specified tolerance of each other. -def evaluate_paf(truth_paf_filepath, test_paf_filepath, pos_tolerance=400): + Args: + query_0 (int, int) : Start/end locations for query_0 + query_1 (int, int) : Start/end locations of query_1 + target_0 (int, int): Start/end location of target_0 + target_1 (int, int): Start/end locations of target_1 + + Returns: Boolean indicating query and target match. + """ + return abs(query_0[0] - query_1[0]) < pos_tolerance and \ + abs(query_0[1] - query_1[1]) < pos_tolerance and \ + abs(target_0[0] - target_1[0]) < pos_tolerance and \ + abs(target_0[1] - target_1[1]) < pos_tolerance + +def evaluate_paf(truth_paf_filepath, test_paf_filepath, pos_tolerance=400, skip_self_mappings=True): """Given a truth and test set PAF file, count number of in/correctly detected, and non-detected overlaps Args: truth_paf_filepath (str): Path to truth set PAF file test_paf_filepath (str): Path to test set PAF file pos_tolerance (int): query and referennce positions within this range will be connsidered to be a matched overlap + skip_self_mappings (bool: Skip overlaps where query and target ID are the same Returns: 3-tupe consisting of (rue_positive_count, false_positive_count, false_negative_count). """ @@ -31,9 +48,10 @@ def evaluate_paf(truth_paf_filepath, test_paf_filepath, pos_tolerance=400): num_true_overlaps = 0 for truth_overlap in pafio.read_paf(truth_paf_filepath): - if truth_overlap.query_sequence_name == truth_overlap.target_sequence_name: - print("Skipping") + if skip_self_mappings and \ + (truth_overlap.query_sequence_name == truth_overlap.target_sequence_name): continue + key = truth_overlap.query_sequence_name + truth_overlap.target_sequence_name truth_overlaps[key].append(truth_overlap) @@ -44,53 +62,43 @@ def evaluate_paf(truth_paf_filepath, test_paf_filepath, pos_tolerance=400): false_negative_count = 0 for test_overlap in pafio.read_paf(test_paf_filepath): - query_start_0 = test_overlap.query_start - query_end_0 = test_overlap.query_end - target_start_0 = test_overlap.target_start - target_end_0 = test_overlap.target_end + if skip_self_mappings and \ + (test_overlap.query_sequence_name == test_overlap.target_sequence_name): + continue + + query_0 = (test_overlap.query_start, test_overlap.query_end) + target_0 = (test_overlap.target_start, test_overlap.target_end) key = test_overlap.query_sequence_name + test_overlap.target_sequence_name key_reversed = test_overlap.target_sequence_name + "_" + test_overlap.query_sequence_name + found_match = False if key in truth_overlaps: - found_match = False for truth_overlap in truth_overlaps[key]: - query_start_1 = truth_overlap.query_start - query_end_1 = truth_overlap.query_end - target_start_1 = truth_overlap.target_start - target_end_1 = truth_overlap.target_end + query_1 = (truth_overlap.query_start, truth_overlap.query_end) + target_1 = (truth_overlap.target_start, truth_overlap.target_end) - matched = abs(query_start_0 - query_start_1) < pos_tolerance and \ - abs(query_end_0 - query_end_1) < pos_tolerance and \ - abs(target_start_0 - target_start_1) < pos_tolerance and \ - abs(target_end_0 - target_end_1) < pos_tolerance + matched = match_overlaps(query_0, query_1, target_0, target_1, pos_tolerance) if matched: true_positive_count += 1 found_match = True break - if not found_match: - false_positive_count += 1 elif key_reversed in truth_overlaps: - found_match = False for truth_overlap in truth_overlaps[key_reversed]: - query_start_1 = truth_overlap.target_start - query_end_1 = truth_overlap.target_end - target_start_1 = truth_overlap.query_start - target_end_1 = truth_overlap.query_end + query_1 = (truth_overlap.target_start, truth_overlap.target_end) + target_1 = (truth_overlap.query_start, truth_overlap.query_end) - matched = abs(query_start_0 - query_start_1) < pos_tolerance and \ - abs(query_end_0 - query_end_1) < pos_tolerance and \ - abs(target_start_0 - target_start_1) < pos_tolerance and \ - abs(target_end_0 - target_end_1) < pos_tolerance + matched = match_overlaps(query_0, query_1, target_0, target_1, pos_tolerance) if matched: true_positive_count += 1 found_match = True break - if not found_match: - false_positive_count += 1 + + if not found_match: + false_positive_count += 1 # Now count the false negatives: false_negative_count = num_true_overlaps - true_positive_count @@ -106,10 +114,18 @@ if __name__ == "__main__": parser.add_argument('--test_paf', type=str, default='test.paf') + parser.add_argument('--pos_tolerance', + type=int, + default=400, + help="Position tolerance around truth set interval to count as successful match.") + parser.add_argument('--skip_self_mapping', + action="store_true", + help="Skip checking overlaps where query/target name are same") args = parser.parse_args() - true_positives, false_positives, false_negatives = evaluate_paf(args.truth_paf, args.test_paf) + true_positives, false_positives, false_negatives = evaluate_paf(args.truth_paf, args.test_paf, \ + args.pos_tolerance, args.skip_self_mapping) precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) From f182baf370ef8c122b75259bffe04f8b0d061135 Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Fri, 8 Nov 2019 18:44:38 -0500 Subject: [PATCH 039/128] [cudapoa] add graph data structure --- common/utils/CMakeLists.txt | 3 + .../include/claragenomics/utils/graph.hpp | 116 ++++++++++++++++++ common/utils/tests/CMakeLists.txt | 20 +++ common/utils/tests/TestGraph.cpp | 47 +++++++ common/utils/tests/main.cpp | 19 +++ .../include/claragenomics/cudapoa/batch.hpp | 5 + cudapoa/src/allocate_block.cpp | 20 ++- cudapoa/src/allocate_block.hpp | 2 +- cudapoa/src/cudapoa_batch.cpp | 64 +++++++++- cudapoa/src/cudapoa_batch.hpp | 4 + cudapoa/tests/Test_CudapoaGenerateMSA2.cpp | 6 +- 11 files changed, 301 insertions(+), 5 deletions(-) create mode 100644 common/utils/include/claragenomics/utils/graph.hpp create mode 100644 common/utils/tests/CMakeLists.txt create mode 100644 common/utils/tests/TestGraph.cpp create mode 100644 common/utils/tests/main.cpp diff --git a/common/utils/CMakeLists.txt b/common/utils/CMakeLists.txt index 71e7f5754..140861413 100644 --- a/common/utils/CMakeLists.txt +++ b/common/utils/CMakeLists.txt @@ -41,5 +41,8 @@ install(TARGETS utils install(DIRECTORY include/ DESTINATION include) install(EXPORT utils DESTINATION cmake) +# Add tests +add_subdirectory(tests) + # Adding formatting cga_enable_auto_formatting("${CMAKE_CURRENT_SOURCE_DIR}") diff --git a/common/utils/include/claragenomics/utils/graph.hpp b/common/utils/include/claragenomics/utils/graph.hpp new file mode 100644 index 000000000..ebf4d8aa5 --- /dev/null +++ b/common/utils/include/claragenomics/utils/graph.hpp @@ -0,0 +1,116 @@ +/* +* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +* +* NVIDIA CORPORATION and its licensors retain all intellectual property +* and proprietary rights in and to this software, related documentation +* and any modifications thereto. Any use, reproduction, disclosure or +* distribution of this software and related documentation without an express +* license agreement from NVIDIA CORPORATION is strictly prohibited. +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace claragenomics +{ + +using node_id_t = int32_t; + +/// \struct pair_hasher +struct PairHash +{ +public: + template + size_t operator()(const std::pair& pair) const + { + size_t hash_1 = std::hash()(pair.first); + size_t hash_2 = std::hash()(pair.second); + return hash_1 ^ hash_2; + } +}; + +/// \class DirectedGraph +/// Object representing a graph structure +class DirectedGraph +{ +public: + DirectedGraph() = default; + + ~DirectedGraph() = default; + + virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to) + { + auto edge = std::pair(node_id_from, node_id_to); + auto find_edge = edges_.find(edge); + if (find_edge == edges_.end()) + { + edges_.insert(edge); + auto find_node = adjacent_nodes_.find(node_id_from); + if (find_node == adjacent_nodes_.end()) + { + adjacent_nodes_.insert({node_id_from, {node_id_to}}); + } + else + { + find_node->second.push_back(node_id_to); + } + } + } + + virtual void add_label(node_id_t node, const std::string& label) + { + node_labels_.insert({node, label}); + } + + virtual const std::vector& get_adjacent_nodes(node_id_t node) + { + auto iter = adjacent_nodes_.find(node); + if (iter != adjacent_nodes_.end()) + { + return iter->second; + } + else + { + return empty_; + } + } + + virtual const std::vector get_node_ids() + { + std::vector nodes; + for (auto iter : adjacent_nodes_) + { + nodes.push_back(iter.first); + } + + return nodes; + } + + virtual std::string get_node_label(node_id_t node) + { + auto found_node = node_labels_.find(node); + if (found_node != node_labels_.end()) + { + return found_node->second; + } + else + { + throw std::runtime_error("No node found with given ID"); + } + } + +private: + std::unordered_map> adjacent_nodes_; + std::unordered_set, PairHash> edges_; + std::unordered_map node_labels_; + const std::vector empty_; +}; + +} // namespace claragenomics diff --git a/common/utils/tests/CMakeLists.txt b/common/utils/tests/CMakeLists.txt new file mode 100644 index 000000000..77ab115d5 --- /dev/null +++ b/common/utils/tests/CMakeLists.txt @@ -0,0 +1,20 @@ +# +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. +# + +project(cgautilstests) + +set(SOURCES + main.cpp + TestGraph.cpp) + +set(LIBS + utils) + +cga_add_tests(${PROJECT_NAME} "${SOURCES}" "${LIBS}") diff --git a/common/utils/tests/TestGraph.cpp b/common/utils/tests/TestGraph.cpp new file mode 100644 index 000000000..b9ba7e680 --- /dev/null +++ b/common/utils/tests/TestGraph.cpp @@ -0,0 +1,47 @@ +/* +* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +* +* NVIDIA CORPORATION and its licensors retain all intellectual property +* and proprietary rights in and to this software, related documentation +* and any modifications thereto. Any use, reproduction, disclosure or +* distribution of this software and related documentation without an express +* license agreement from NVIDIA CORPORATION is strictly prohibited. +*/ + +#include + +#include "gtest/gtest.h" + +namespace claragenomics +{ + +TEST(GraphTest, DirectediGraph) +{ + DirectedGraph graph; + + // Sample graph + // 3 + // ^ + // | + // 1 -> 2 -> 5 + // | ^ + // u | + // 4 ---| + + graph.add_edge(1, 2); + graph.add_edge(2, 5); + graph.add_edge(2, 3); + graph.add_edge(2, 4); + graph.add_edge(4, 5); + + const auto& adjacent_nodes_to_2 = graph.get_adjacent_nodes(2); + EXPECT_NE(std::find(adjacent_nodes_to_2.begin(), adjacent_nodes_to_2.end(), 3), adjacent_nodes_to_2.end()); + EXPECT_NE(std::find(adjacent_nodes_to_2.begin(), adjacent_nodes_to_2.end(), 4), adjacent_nodes_to_2.end()); + EXPECT_NE(std::find(adjacent_nodes_to_2.begin(), adjacent_nodes_to_2.end(), 5), adjacent_nodes_to_2.end()); + EXPECT_EQ(std::find(adjacent_nodes_to_2.begin(), adjacent_nodes_to_2.end(), 1), adjacent_nodes_to_2.end()); + + const auto& adjacent_nodes_to_3 = graph.get_adjacent_nodes(3); + EXPECT_EQ(std::find(adjacent_nodes_to_3.begin(), adjacent_nodes_to_3.end(), 2), adjacent_nodes_to_3.end()); +} + +} // namespace claragenomics diff --git a/common/utils/tests/main.cpp b/common/utils/tests/main.cpp new file mode 100644 index 000000000..528bdde36 --- /dev/null +++ b/common/utils/tests/main.cpp @@ -0,0 +1,19 @@ +/* +* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +* +* NVIDIA CORPORATION and its licensors retain all intellectual property +* and proprietary rights in and to this software, related documentation +* and any modifications thereto. Any use, reproduction, disclosure or +* distribution of this software and related documentation without an express +* license agreement from NVIDIA CORPORATION is strictly prohibited. +*/ + +#include "gtest/gtest.h" + +// ----------------------------------------------------------------------------- +int main(int argc, char** argv) +{ + ::testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +} diff --git a/cudapoa/include/claragenomics/cudapoa/batch.hpp b/cudapoa/include/claragenomics/cudapoa/batch.hpp index 6b5842bc5..98d011991 100644 --- a/cudapoa/include/claragenomics/cudapoa/batch.hpp +++ b/cudapoa/include/claragenomics/cudapoa/batch.hpp @@ -12,6 +12,8 @@ #include +#include + #include #include #include @@ -100,6 +102,9 @@ class Batch virtual StatusType get_msa(std::vector>& msa, std::vector& output_status) = 0; + virtual StatusType get_graphs(std::vector& graphs, + std::vector& output_status) = 0; + /// \brief Return batch ID. /// /// \return Batch ID diff --git a/cudapoa/src/allocate_block.cpp b/cudapoa/src/allocate_block.cpp index ce28d6052..7317c92c9 100644 --- a/cudapoa/src/allocate_block.cpp +++ b/cudapoa/src/allocate_block.cpp @@ -120,7 +120,12 @@ std::tuple BatchBlock::calculate_space_per_p device_size_per_poa += (output_mask_ & OutputType::msa) ? poa_count * max_sequences_per_poa_ * sizeof(uint16_t) : 0; // input_details_d_->sequence_begin_nodes_ids // for graph - host - host_size_fixed += sizeof(GraphDetails); // graph_details_d_ + host_size_fixed += sizeof(GraphDetails); // graph_details_h_ + host_size_fixed += sizeof(GraphDetails); // graph_details_d_ + host_size_per_poa += sizeof(uint8_t) * max_nodes_per_window_ * poa_count; // graph_details_h_->nodes + host_size_per_poa += sizeof(uint16_t) * max_nodes_per_window_ * CUDAPOA_MAX_NODE_EDGES * poa_count; // graph_details_d_->outgoing_edges + host_size_per_poa += sizeof(uint16_t) * max_nodes_per_window_ * poa_count; // graph_details_d_->outgoing_edge_count + // for graph - device device_size_per_poa += sizeof(uint8_t) * max_nodes_per_window_ * poa_count; // graph_details_d_->nodes device_size_per_poa += sizeof(uint16_t) * max_nodes_per_window_ * CUDAPOA_MAX_NODE_ALIGNMENTS * poa_count; // graph_details_d_->node_alignments @@ -259,13 +264,23 @@ void BatchBlock::get_alignment_details(AlignmentDetails** alignment_details_d_p) *alignment_details_d_p = alignment_details_d; } -void BatchBlock::get_graph_details(GraphDetails** graph_details_d_p) +void BatchBlock::get_graph_details(GraphDetails** graph_details_d_p, GraphDetails** graph_details_h_p) { GraphDetails* graph_details_d{}; + GraphDetails* graph_details_h{}; // on host + graph_details_h = reinterpret_cast(&block_data_h_[offset_h_]); + offset_h_ += sizeof(GraphDetails); + graph_details_h->nodes = &block_data_h_[offset_h_]; + offset_h_ += sizeof(uint8_t) * max_nodes_per_window_ * max_poas_; + graph_details_h->outgoing_edges = reinterpret_cast(&block_data_h_[offset_h_]); + offset_h_ += sizeof(uint16_t) * max_nodes_per_window_ * CUDAPOA_MAX_NODE_EDGES * max_poas_; + graph_details_h->outgoing_edge_count = reinterpret_cast(&block_data_h_[offset_h_]); + offset_h_ += sizeof(uint16_t) * max_nodes_per_window_ * max_poas_; graph_details_d = reinterpret_cast(&block_data_h_[offset_h_]); offset_h_ += sizeof(GraphDetails); + graph_details_d->nodes = &block_data_h_[offset_h_]; // on device graph_details_d->nodes = &block_data_d_[offset_d_]; @@ -319,6 +334,7 @@ void BatchBlock::get_graph_details(GraphDetails** graph_details_d_p) } *graph_details_d_p = graph_details_d; + *graph_details_h_p = graph_details_h; } } // namespace cudapoa diff --git a/cudapoa/src/allocate_block.hpp b/cudapoa/src/allocate_block.hpp index 512b49356..e10cd8e67 100644 --- a/cudapoa/src/allocate_block.hpp +++ b/cudapoa/src/allocate_block.hpp @@ -36,7 +36,7 @@ class BatchBlock void get_alignment_details(AlignmentDetails** alignment_details_d_p); - void get_graph_details(GraphDetails** graph_details_d_p); + void get_graph_details(GraphDetails** graph_details_d_p, GraphDetails** graph_details_h_p); uint8_t* get_block_host(); diff --git a/cudapoa/src/cudapoa_batch.cpp b/cudapoa/src/cudapoa_batch.cpp index 995442f5f..9b0238298 100644 --- a/cudapoa/src/cudapoa_batch.cpp +++ b/cudapoa/src/cudapoa_batch.cpp @@ -64,7 +64,7 @@ void CudapoaBatch::initialize_alignment_details() void CudapoaBatch::initialize_graph_details() { - batch_block_->get_graph_details(&graph_details_d_); + batch_block_->get_graph_details(&graph_details_d_, &graph_details_h_); } CudapoaBatch::CudapoaBatch(int32_t max_sequences_per_poa, @@ -312,6 +312,68 @@ StatusType CudapoaBatch::get_msa(std::vector>& msa, std return StatusType::success; } +StatusType CudapoaBatch::get_graphs(std::vector& graphs, std::vector& output_status) +{ + int32_t max_nodes_per_window_ = banded_alignment_ ? CUDAPOA_MAX_NODES_PER_WINDOW_BANDED : CUDAPOA_MAX_NODES_PER_WINDOW; + CGA_CU_CHECK_ERR(cudaMemcpyAsync(graph_details_h_->nodes, + graph_details_d_->nodes, + sizeof(uint8_t) * max_nodes_per_window_ * max_poas_, + cudaMemcpyDeviceToHost, + stream_)); + + CGA_CU_CHECK_ERR(cudaMemcpyAsync(graph_details_h_->outgoing_edges, + graph_details_d_->outgoing_edges, + sizeof(uint16_t) * max_nodes_per_window_ * CUDAPOA_MAX_NODE_EDGES * max_poas_, + cudaMemcpyDeviceToHost, + stream_)); + + CGA_CU_CHECK_ERR(cudaMemcpyAsync(graph_details_h_->outgoing_edge_count, + graph_details_d_->outgoing_edge_count, + sizeof(uint16_t) * max_nodes_per_window_ * max_poas_, + cudaMemcpyDeviceToHost, + stream_)); + + CGA_CU_CHECK_ERR(cudaMemcpyAsync(input_details_h_->sequence_lengths, + input_details_d_->sequence_lengths, + global_sequence_idx_ * sizeof(uint16_t), + cudaMemcpyDeviceToHost, + stream_)); + + CGA_CU_CHECK_ERR(cudaMemcpyAsync(output_details_h_->consensus, + output_details_d_->consensus, + CUDAPOA_MAX_CONSENSUS_SIZE * max_poas_ * sizeof(uint8_t), + cudaMemcpyDeviceToHost, + stream_)); + + CGA_CU_CHECK_ERR(cudaStreamSynchronize(stream_)); + + for (int32_t poa = 0; poa < poa_count_; poa++) + { + graphs.emplace_back(DirectedGraph()); + char* c = reinterpret_cast(&(output_details_h_->consensus[poa * CUDAPOA_MAX_CONSENSUS_SIZE])); + // We use the first two entries in the consensus buffer to log error during kernel execution + // c[0] == 0 means an error occured and when that happens the error type is saved in c[1] + if (static_cast(c[0]) == CUDAPOA_KERNEL_ERROR_ENCOUNTERED) + { + decode_cudapoa_kernel_error(static_cast(c[1]), output_status); + } + else + { + output_status.emplace_back(claragenomics::cudapoa::StatusType::success); + int32_t num_nodes = input_details_h_->sequence_lengths[input_details_h_->window_details[poa].seq_len_buffer_offset]; + uint8_t* nodes = &graph_details_h_->nodes[max_nodes_per_window_ * poa]; + printf("%d\n", num_nodes); + for (int32_t a = 0; a < num_nodes; a++) + { + printf("%c,", nodes[a]); + } + printf("\n"); + } + } + + return StatusType::success; +} + bool CudapoaBatch::reserve_buf(int32_t max_seq_length) { int32_t max_graph_dimension = banded_alignment_ ? CUDAPOA_MAX_MATRIX_GRAPH_DIMENSION_BANDED : CUDAPOA_MAX_MATRIX_GRAPH_DIMENSION; diff --git a/cudapoa/src/cudapoa_batch.hpp b/cudapoa/src/cudapoa_batch.hpp index 1052e033e..bb57146f7 100644 --- a/cudapoa/src/cudapoa_batch.hpp +++ b/cudapoa/src/cudapoa_batch.hpp @@ -66,6 +66,9 @@ class CudapoaBatch : public Batch StatusType get_msa(std::vector>& msa, std::vector& output_status); + StatusType get_graphs(std::vector& graphs, + std::vector& output_status); + // Return batch ID. int32_t batch_id() const; @@ -132,6 +135,7 @@ class CudapoaBatch : public Batch // Device buffer struct for graph details GraphDetails* graph_details_d_; + GraphDetails* graph_details_h_; // Static batch count used to generate batch IDs. static int32_t batches; diff --git a/cudapoa/tests/Test_CudapoaGenerateMSA2.cpp b/cudapoa/tests/Test_CudapoaGenerateMSA2.cpp index 7767fa782..29ca5a5fc 100644 --- a/cudapoa/tests/Test_CudapoaGenerateMSA2.cpp +++ b/cudapoa/tests/Test_CudapoaGenerateMSA2.cpp @@ -77,7 +77,7 @@ class MSATest : public ::testing::Test TEST_F(MSATest, CudapoaMSA) { std::minstd_rand rng(1); - int num_sequences = 500; + int num_sequences = 2; std::string backbone = claragenomics::genomeutils::generate_random_genome(50, rng); auto sequences = claragenomics::genomeutils::generate_random_sequences(backbone, num_sequences, rng, 10, 5, 10); @@ -104,6 +104,10 @@ TEST_F(MSATest, CudapoaMSA) ASSERT_EQ(output_status[0], StatusType::success); ASSERT_EQ(poa_group.size(), cudapoa_msa[0].size()); + std::vector cudapoa_graphs; + output_status.clear(); + cudapoa_batch->get_graphs(cudapoa_graphs, output_status); + auto spoa_msa = spoa_generate_multiple_sequence_alignments(sequences); #ifndef SPOA_ACCURATE From bf668f672fe00179a1219bdfac76e36a83c66cc9 Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Sat, 9 Nov 2019 17:35:20 -0500 Subject: [PATCH 040/128] [cudapoa] fill up graph from cudapoa also add dot serialization to graph object --- .../include/claragenomics/utils/graph.hpp | 19 +++++++++++++++++ cudapoa/src/cudapoa_batch.cpp | 21 +++++++++++++------ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/common/utils/include/claragenomics/utils/graph.hpp b/common/utils/include/claragenomics/utils/graph.hpp index ebf4d8aa5..e162bb405 100644 --- a/common/utils/include/claragenomics/utils/graph.hpp +++ b/common/utils/include/claragenomics/utils/graph.hpp @@ -16,6 +16,7 @@ #include #include #include +#include #include namespace claragenomics @@ -106,6 +107,24 @@ class DirectedGraph } } + virtual std::string serialize_to_dot() const + { + std::ostringstream dot_str; + dot_str << "digraph g {\n"; + for (auto iter : adjacent_nodes_) + { + node_id_t src = iter.first; + auto label_found = node_labels_.find(src); + dot_str << src << " [label=\"" << label_found->second << "\"];\n"; + for (node_id_t sink : iter.second) + { + dot_str << src << " -> " << sink << "\n"; + } + } + dot_str << "\n"; + return dot_str.str(); + } + private: std::unordered_map> adjacent_nodes_; std::unordered_set, PairHash> edges_; diff --git a/cudapoa/src/cudapoa_batch.cpp b/cudapoa/src/cudapoa_batch.cpp index 9b0238298..264ce5a11 100644 --- a/cudapoa/src/cudapoa_batch.cpp +++ b/cudapoa/src/cudapoa_batch.cpp @@ -360,14 +360,23 @@ StatusType CudapoaBatch::get_graphs(std::vector& graphs, std::vec else { output_status.emplace_back(claragenomics::cudapoa::StatusType::success); - int32_t num_nodes = input_details_h_->sequence_lengths[input_details_h_->window_details[poa].seq_len_buffer_offset]; - uint8_t* nodes = &graph_details_h_->nodes[max_nodes_per_window_ * poa]; - printf("%d\n", num_nodes); - for (int32_t a = 0; a < num_nodes; a++) + DirectedGraph& graph = graphs.back(); + int32_t seq_0_offset = input_details_h_->window_details[poa].seq_len_buffer_offset; + int32_t num_nodes = input_details_h_->sequence_lengths[seq_0_offset]; + uint8_t* nodes = &graph_details_h_->nodes[max_nodes_per_window_ * poa]; + for (int32_t n = 0; n < num_nodes; n++) { - printf("%c,", nodes[a]); + // For each node, find it's outgoing edges and add the edge to the graph, + // along with its label. + node_id_t src = n; + graph.add_label(src, std::string(1, static_cast(nodes[n]))); + uint16_t num_edges = graph_details_h_->outgoing_edge_count[poa * max_nodes_per_window_ + n]; + for (uint16_t e = 0; e < num_edges; e++) + { + node_id_t sink = graph_details_h_->outgoing_edges[poa * max_nodes_per_window_ * CUDAPOA_MAX_NODE_EDGES + n * CUDAPOA_MAX_NODE_EDGES + e]; + graph.add_edge(src, sink); + } } - printf("\n"); } } From af6e035352fe4f076fe075b228defbc3d05c6d18 Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Mon, 11 Nov 2019 10:12:27 +0100 Subject: [PATCH 041/128] [cudamapper] Added the new Anchor generation kernel --- cudamapper/src/matcher_gpu.cu | 80 +++++++++- cudamapper/src/matcher_gpu.cuh | 25 +++- .../Test_CudamapperIndexGPUTwoIndices.cu | 3 +- cudamapper/tests/Test_CudamapperMatcherGPU.cu | 137 ++++++++++++++++++ 4 files changed, 241 insertions(+), 4 deletions(-) diff --git a/cudamapper/src/matcher_gpu.cu b/cudamapper/src/matcher_gpu.cu index 9b9afba8f..d2d9c0c9c 100644 --- a/cudamapper/src/matcher_gpu.cu +++ b/cudamapper/src/matcher_gpu.cu @@ -31,7 +31,7 @@ MatcherGPU::MatcherGPU(const IndexTwoIndices& query_index, thrust::device_vector& MatcherGPU::anchors() { - return anchors_h_; + return anchors_d_; } namespace details @@ -106,6 +106,84 @@ __global__ void find_query_target_matches_kernel(int64_t* const found_target_ind found_target_indices[i] = found_target_index; } +void generate_anchors(thrust::device_vector& anchors, + const thrust::device_vector& anchor_starting_indices, + const thrust::device_vector& query_starting_index_of_each_representation_d, + const thrust::device_vector& found_target_indices_d, + const thrust::device_vector& target_starting_index_of_each_representation_d, + const thrust::device_vector& query_read_ids, + const thrust::device_vector& query_positions_in_read, + const thrust::device_vector& target_read_ids, + const thrust::device_vector& target_positions_in_read) +{ + assert(anchor_starting_indices.size() + 1 == query_starting_index_of_each_representation_d.size()); + assert(found_target_indices_d.size() + 1 == query_starting_index_of_each_representation_d.size()); + assert(query_read_ids.size() == query_positions_in_read.size()); + assert(target_read_ids.size() == target_positions_in_read.size()); + + const int32_t n_threads = 256; + const int32_t n_blocks = ceiling_divide(get_size(found_target_indices_d), n_threads); + generate_anchors_kernel<<>>( + anchors.data().get(), + anchor_starting_indices.data().get(), + query_starting_index_of_each_representation_d.data().get(), + found_target_indices_d.data().get(), + get_size(found_target_indices_d), + target_starting_index_of_each_representation_d.data().get(), + query_read_ids.data().get(), + query_positions_in_read.data().get(), + target_read_ids.data().get(), + target_positions_in_read.data().get()); +} + +__global__ void generate_anchors_kernel( + Anchor* const anchors_d, + const int64_t* const anchor_starting_index_d, + const std::uint32_t* const query_starting_index_of_each_representation_d, + const std::int64_t* const found_target_indices_d, + int32_t n_query_representations, + const std::uint32_t* const target_starting_index_of_each_representation_d, + const read_id_t* const query_read_ids, + const position_in_read_t* const query_positions_in_read, + const read_id_t* const target_read_ids, + const position_in_read_t* const target_positions_in_read) +{ + const std::int32_t i = blockIdx.x * blockDim.x + threadIdx.x; + + if (i >= n_query_representations) + return; + + const std::int64_t j = found_target_indices_d[i]; + if (j < 0) + return; + + std::int64_t anchor_idx = 0; + if (i > 0) + anchor_idx = anchor_starting_index_d[i - 1]; + std::uint32_t query_idx = query_starting_index_of_each_representation_d[i]; + const std::uint32_t query_end = query_starting_index_of_each_representation_d[i + 1]; + const std::uint32_t target_begin = target_starting_index_of_each_representation_d[j]; + const std::uint32_t target_end = target_starting_index_of_each_representation_d[j + 1]; + + while (query_idx < query_end) + { + std::uint32_t target_idx = target_begin; + while (target_idx < target_end) + { + Anchor a; + a.query_read_id_ = query_read_ids[query_idx]; + a.target_read_id_ = target_read_ids[target_idx]; + a.query_position_in_read_ = query_positions_in_read[query_idx]; + a.target_position_in_read_ = target_positions_in_read[target_idx]; + anchors_d[anchor_idx] = a; + ++anchor_idx; + ++target_idx; + } + ++query_idx; + } + assert(anchor_idx == anchor_starting_index_d[i] || anchor_starting_index_d[i - 1] == anchor_starting_index_d[i]); +} + } // namespace matcher_gpu } // namespace details diff --git a/cudamapper/src/matcher_gpu.cuh b/cudamapper/src/matcher_gpu.cuh index eeb5bebbf..1aacdcbac 100644 --- a/cudamapper/src/matcher_gpu.cuh +++ b/cudamapper/src/matcher_gpu.cuh @@ -31,7 +31,7 @@ public: thrust::device_vector& anchors() override; private: - thrust::device_vector anchors_h_; + thrust::device_vector anchors_d_; }; namespace details @@ -104,6 +104,16 @@ void compute_anchor_starting_indices(thrust::device_vector& anchor const thrust::device_vector& found_target_indices_d, const thrust::device_vector& target_starting_index_of_each_representation_d); +///TODO +void generate_anchors(thrust::device_vector& anchors, + const thrust::device_vector& anchor_starting_indices, + const thrust::device_vector& query_starting_index_of_each_representation_d, + const thrust::device_vector& found_target_indices_d, + const thrust::device_vector& target_starting_index_of_each_representation_d, + const thrust::device_vector& query_read_ids, + const thrust::device_vector& query_positions_in_read, + const thrust::device_vector& target_read_ids, + const thrust::device_vector& target_positions_in_read); /// \brief Performs a binary search on target_representations_d for each element of query_representations_d and stores the found index (or -1 iff not found) in found_target_indices. /// /// For example: @@ -125,6 +135,19 @@ void compute_anchor_starting_indices(thrust::device_vector& anchor /// \param target_representations_d the array of targets to be searched /// \param n_target_representations size of \param target_representations_d __global__ void find_query_target_matches_kernel(int64_t* const found_target_indices_d, const representation_t* const query_representations_d, const int64_t n_query_representations, const representation_t* const target_representations_d, const int64_t n_target_representations); + +/// TODO +__global__ void generate_anchors_kernel( + Anchor* const anchors_d, + const int64_t* const anchor_starting_index_d, + const std::uint32_t* const query_starting_index_of_each_representation_d, + const std::int64_t* const found_target_indices_d, + int32_t n_query_representations, + const std::uint32_t* const target_starting_index_of_each_representation_d, + const read_id_t* const query_read_ids, + const position_in_read_t* const query_positions_in_read, + const read_id_t* const target_read_ids, + const position_in_read_t* const target_positions_in_read); } // namespace matcher_gpu } // namespace details diff --git a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu index 652cd9653..c3f8322e9 100644 --- a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu +++ b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu @@ -84,7 +84,7 @@ void test_function(const std::string& filename, EXPECT_EQ(directions_of_reads_h[i], expected_directions_of_reads[i]) << "i: " << i; } - const thrust::device_vector unique_representations_d = index.unique_representations(); + const thrust::device_vector unique_representations_d = index.unique_representations(); const thrust::device_vector first_occurrence_of_representations_d = index.first_occurrence_of_representations(); const thrust::host_vector unique_representations_h(unique_representations_d); const thrust::host_vector first_occurrence_of_representations_h(first_occurrence_of_representations_d); @@ -97,7 +97,6 @@ void test_function(const std::string& filename, EXPECT_EQ(expected_first_occurrence_of_representations[i], first_occurrence_of_representations_h[i]) << "index: " << i; } EXPECT_EQ(expected_first_occurrence_of_representations.back(), expected_representations.size()); - } TEST(TestCudamapperIndexGPUTwoIndices, GATT_4_1) diff --git a/cudamapper/tests/Test_CudamapperMatcherGPU.cu b/cudamapper/tests/Test_CudamapperMatcherGPU.cu index d3dd0d3f7..b86b589c3 100644 --- a/cudamapper/tests/Test_CudamapperMatcherGPU.cu +++ b/cudamapper/tests/Test_CudamapperMatcherGPU.cu @@ -188,5 +188,142 @@ TEST(TestCudamapperMatcherGPU, test_compute_number_of_anchors_large_example) expected_anchor_starting_indices_h); } +void test_generate_anchors( + const thrust::host_vector& expected_anchors_h, + const thrust::host_vector& anchor_starting_indices_h, + const thrust::host_vector& query_starting_index_of_each_representation_h, + const thrust::host_vector& found_target_indices_h, + const thrust::host_vector& target_starting_index_of_each_representation_h, + const thrust::host_vector& query_read_ids_h, + const thrust::host_vector& query_positions_in_read_h, + const thrust::host_vector& target_read_ids_h, + const thrust::host_vector& target_positions_in_read_h) +{ + const thrust::device_vector anchor_starting_indices_d(anchor_starting_indices_h); + const thrust::device_vector query_starting_index_of_each_representation_d(query_starting_index_of_each_representation_h); + const thrust::device_vector found_target_indices_d(found_target_indices_h); + const thrust::device_vector target_starting_index_of_each_representation_d(target_starting_index_of_each_representation_h); + const thrust::device_vector query_read_ids_d(query_read_ids_h); + const thrust::device_vector query_positions_in_read_d(query_positions_in_read_h); + const thrust::device_vector target_read_ids_d(target_read_ids_h); + const thrust::device_vector target_positions_in_read_d(target_positions_in_read_h); + + thrust::device_vector anchors_d(anchor_starting_indices_h.back()); + + details::matcher_gpu::generate_anchors(anchors_d, + anchor_starting_indices_d, + query_starting_index_of_each_representation_d, + found_target_indices_d, + target_starting_index_of_each_representation_d, + query_read_ids_d, + query_positions_in_read_d, + target_read_ids_d, + target_positions_in_read_d); + + thrust::host_vector anchors_h(anchors_d); + ASSERT_EQ(anchors_h.size(), expected_anchors_h.size()); + + for (int64_t i = 0; i < get_size(anchors_h); ++i) + { + EXPECT_EQ(anchors_h[i].query_read_id_, expected_anchors_h[i].query_read_id_) << " index: " << i; + EXPECT_EQ(anchors_h[i].query_position_in_read_, expected_anchors_h[i].query_position_in_read_) << " index: " << i; + EXPECT_EQ(anchors_h[i].target_read_id_, expected_anchors_h[i].target_read_id_) << " index: " << i; + EXPECT_EQ(anchors_h[i].target_position_in_read_, expected_anchors_h[i].target_position_in_read_) << " index: " << i; + } +} + +TEST(TestCudamapperMatcherGPU, test_generate_anchors_small_example) +{ + thrust::host_vector query_starting_index_of_each_representation_h; + query_starting_index_of_each_representation_h.push_back(0); + query_starting_index_of_each_representation_h.push_back(4); + query_starting_index_of_each_representation_h.push_back(10); + query_starting_index_of_each_representation_h.push_back(13); + query_starting_index_of_each_representation_h.push_back(18); + query_starting_index_of_each_representation_h.push_back(21); + + thrust::host_vector target_starting_index_of_each_representation_h; + target_starting_index_of_each_representation_h.push_back(0); + target_starting_index_of_each_representation_h.push_back(3); + target_starting_index_of_each_representation_h.push_back(7); + target_starting_index_of_each_representation_h.push_back(9); + target_starting_index_of_each_representation_h.push_back(13); + target_starting_index_of_each_representation_h.push_back(16); + target_starting_index_of_each_representation_h.push_back(18); + target_starting_index_of_each_representation_h.push_back(21); + + thrust::host_vector found_target_indices_h; + found_target_indices_h.push_back(-1); + found_target_indices_h.push_back(1); + found_target_indices_h.push_back(3); + found_target_indices_h.push_back(-1); + found_target_indices_h.push_back(6); + + thrust::host_vector anchor_starting_indices_h; + anchor_starting_indices_h.push_back(0); + anchor_starting_indices_h.push_back(24); + anchor_starting_indices_h.push_back(36); + anchor_starting_indices_h.push_back(36); + anchor_starting_indices_h.push_back(45); + + thrust::host_vector query_read_ids_h; + thrust::host_vector query_positions_in_read_h; + for (std::uint32_t i = 0; i < query_starting_index_of_each_representation_h.back(); ++i) + { + query_read_ids_h.push_back(i); + query_positions_in_read_h.push_back(10 * i); + } + + thrust::host_vector target_read_ids_h; + thrust::host_vector target_positions_in_read_h; + for (std::uint32_t i = 0; i < target_starting_index_of_each_representation_h.back(); ++i) + { + target_read_ids_h.push_back(100 * i); + target_positions_in_read_h.push_back(1000 * i); + } + + thrust::host_vector expected_anchors(anchor_starting_indices_h.back()); + for (int32_t i = 0; i < 6; ++i) + for (int32_t j = 0; j < 4; ++j) + { + Anchor& a = expected_anchors[i * 4 + j]; + a.query_read_id_ = 4 + i; + a.query_position_in_read_ = 10 * (4 + i); + a.target_read_id_ = 100 * (j + 3); + a.target_position_in_read_ = 1000 * (j + 3); + } + + for (int32_t i = 0; i < 3; ++i) + for (int32_t j = 0; j < 4; ++j) + { + Anchor& a = expected_anchors[i * 4 + j + 24]; + a.query_read_id_ = 10 + i; + a.query_position_in_read_ = 10 * (10 + i); + a.target_read_id_ = 100 * (j + 9); + a.target_position_in_read_ = 1000 * (j + 9); + } + + for (int32_t i = 0; i < 3; ++i) + for (int32_t j = 0; j < 3; ++j) + { + Anchor& a = expected_anchors[i * 3 + j + 36]; + a.query_read_id_ = 18 + i; + a.query_position_in_read_ = 10 * (18 + i); + a.target_read_id_ = 100 * (j + 18); + a.target_position_in_read_ = 1000 * (j + 18); + } + + test_generate_anchors( + expected_anchors, + anchor_starting_indices_h, + query_starting_index_of_each_representation_h, + found_target_indices_h, + target_starting_index_of_each_representation_h, + query_read_ids_h, + query_positions_in_read_h, + target_read_ids_h, + target_positions_in_read_h); +} + } // namespace cudamapper } // namespace claragenomics From 286729149230637d0af33b90f2197c2689e2804e Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Thu, 7 Nov 2019 14:40:14 +0200 Subject: [PATCH 042/128] [pyclaragenomics] use pip for installation instead of setuptools Created an installation shell script for pyclaragenomics. Separated Clara Genomics SDK installation from setup.py script. Updated readme and CI test. Fixes #153 [pyclaragenomics] Fixed Joyjit's review comments Fixes #153 [pyclaragenomics] Fix CI test Fixes #153 [pyclaragenomics] Fix linter errors Fixes #153 [pyclaragenomics] Fixed more review comments Fixes #153 --- ci/common/test-pyclaragenomics.sh | 2 +- pyclaragenomics/README.md | 5 +- pyclaragenomics/setup.py | 87 ++++-------------- pyclaragenomics/setup_pyclaragenomics.py | 111 +++++++++++++++++++++++ 4 files changed, 134 insertions(+), 71 deletions(-) create mode 100644 pyclaragenomics/setup_pyclaragenomics.py diff --git a/ci/common/test-pyclaragenomics.sh b/ci/common/test-pyclaragenomics.sh index 9704d4bec..e341998d3 100644 --- a/ci/common/test-pyclaragenomics.sh +++ b/ci/common/test-pyclaragenomics.sh @@ -19,7 +19,7 @@ cd $PYCLARAGENOMICS_DIR #Install external dependencies. python -m pip install -r requirements.txt -python setup.py install +python setup_pyclaragenomics.py # Run tests. cd test/ diff --git a/pyclaragenomics/README.md b/pyclaragenomics/README.md index f0f7219a2..dcc34aac4 100644 --- a/pyclaragenomics/README.md +++ b/pyclaragenomics/README.md @@ -6,15 +6,14 @@ Python libraries and utilities for manipulating genomics data ``` pip install -r requirements.txt -python setup.py install +python setup_pyclaragenomics.py --build_output_folder BUILD_FOLDER ``` *Note* if you are developing pyclaragenomics you should do a develop build instead, changes you make to the source code will then be picked up on immediately: ``` pip install -r requirements.txt -python setup.py develop - +python setup_pyclaragenomics.py --build_output_folder BUILD_FOLDER --develop ``` ### Testing installation diff --git a/pyclaragenomics/setup.py b/pyclaragenomics/setup.py index c188b0abe..6e6bb6e1d 100755 --- a/pyclaragenomics/setup.py +++ b/pyclaragenomics/setup.py @@ -10,96 +10,49 @@ # license agreement from NVIDIA CORPORATION is strictly prohibited. # -import os.path import os -import subprocess - from setuptools import setup, find_packages, Extension - from Cython.Build import cythonize -class CMakeWrapper(): - """Class to encapsulate building a CMake project.""" - - def __init__(self, cmake_root_dir, cmake_build_path="cmake_build", cmake_extra_args=""): - """ - Class constructor. - - Args: - cmake_root_dir : Root directory of CMake project - cmake_install_dir : Install location for CMake project - cmake_extra_args : Extra string arguments to be passed to CMake during setup - """ - self.build_path = os.path.abspath(cmake_build_path) - self.cmake_root_dir = os.path.abspath(cmake_root_dir) - self.cmake_install_dir = os.path.join(self.build_path, "install") - self.cmake_extra_args = cmake_extra_args - self.cuda_toolkit_root_dir = os.environ.get("CUDA_TOOLKIT_ROOT_DIR") - - def run_cmake_cmd(self): - cmake_args = ['-DCMAKE_INSTALL_PREFIX=' + self.cmake_install_dir, - '-DCMAKE_BUILD_TYPE=' + 'Release', - '-DCMAKE_INSTALL_RPATH=' + os.path.join(self.cmake_install_dir, "lib")] - cmake_args += [self.cmake_extra_args] - - if self.cuda_toolkit_root_dir: - cmake_args += ["-DCUDA_TOOLKIT_ROOT_DIR=%s" % self.cuda_toolkit_root_dir] +def get_verified_path(path): + installed_path = os.path.abspath(path) + if not os.path.exists(installed_path): + raise RuntimeError("No valid path for requested component exists") + return installed_path - if not os.path.exists(self.build_path): - os.makedirs(self.build_path) - subprocess.check_call(['cmake', self.cmake_root_dir] + cmake_args, cwd=self.build_path) +# Must be set before calling pip +try: + pycga_dir = os.environ['PYCGA_DIR'] + cga_install_dir = os.environ['CGA_INSTALL_DIR'] +except KeyError as e: + raise EnvironmentError( + 'PYCGA_DIR CGA_INSTALL_DIR environment variables must be set').with_traceback(e.__traceback__) - def run_build_cmd(self): - build_args = ['--', '-j16', 'install'] - subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_path) - - def build(self): - self.run_cmake_cmd() - self.run_build_cmd() - - def get_installed_path(self, component=""): - installed_path = os.path.abspath(os.path.join(self.cmake_install_dir, component)) - if (not os.path.exists(installed_path)): - raise RuntimeError("No valid path for requested component exists") - return installed_path - - -# Initialize builds. -pycga_directory = os.path.dirname(os.path.realpath(__file__)) -cmake_root_dir = os.path.dirname(pycga_directory) -cmake_proj = CMakeWrapper(cmake_root_dir, - cmake_build_path=os.path.join(pycga_directory, "cga_build"), - cmake_extra_args="-Dcga_build_shared=ON") -cmake_proj.build() extensions = [ Extension( "*", - sources=[os.path.join(pycga_directory, "claragenomics/**/*.pyx")], + sources=[os.path.join(pycga_dir, "claragenomics/**/*.pyx")], include_dirs=[ "/usr/local/cuda/include", - os.path.join(cmake_root_dir, "cudapoa/include"), - os.path.join(cmake_root_dir, "cudaaligner/include"), + get_verified_path(os.path.join(cga_install_dir, "include")), ], - library_dirs=["/usr/local/cuda/lib64", cmake_proj.get_installed_path("lib")], - runtime_library_dirs=["/usr/local/cuda/lib64", cmake_proj.get_installed_path("lib")], + library_dirs=["/usr/local/cuda/lib64", get_verified_path(os.path.join(cga_install_dir, "lib"))], + runtime_library_dirs=["/usr/local/cuda/lib64", get_verified_path(os.path.join(cga_install_dir, "lib"))], libraries=["cudapoa", "cudaaligner", "cudart"], language="c++", extra_compile_args=["-std=c++14"], ) ] -# Run from the pycga directory -os.chdir(pycga_directory) - setup(name='pyclaragenomics', version='0.3.0', - description='NVIDIA genomics python libraries an utiliites', + description='NVIDIA genomics python libraries and utiliites', author='NVIDIA Corporation', - packages=find_packages(where=pycga_directory), + packages=find_packages(where=pycga_dir), ext_modules=cythonize(extensions, compiler_directives={'embedsignature': True}), - scripts=[os.path.join(pycga_directory, 'bin', 'genome_simulator'), - os.path.join(pycga_directory, 'bin', 'assembly_evaluator')], + scripts=[get_verified_path(os.path.join(pycga_dir, 'bin', 'genome_simulator')), + get_verified_path(os.path.join(pycga_dir, 'bin', 'assembly_evaluator'))], ) diff --git a/pyclaragenomics/setup_pyclaragenomics.py b/pyclaragenomics/setup_pyclaragenomics.py new file mode 100644 index 000000000..9965ab46b --- /dev/null +++ b/pyclaragenomics/setup_pyclaragenomics.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 + +# +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. +# + +import argparse +import os.path +import os +import subprocess + + +def get_relative_path(sub_folder_name): + return os.path.join( + os.path.dirname(os.path.realpath(__file__)), + sub_folder_name + ) + + +def parse_arguments(): + parser = argparse.ArgumentParser(description='build & install Clara Genomics Analysis SDK.') + parser.add_argument('--build_output_folder', + required=False, + default=get_relative_path("cga_build"), + help="Choose an output folder for building") + parser.add_argument('--develop', + required=False, + action='store_true', + help="CInstall using pip editble mode") + return parser.parse_args() + + +class CMakeWrapper: + """Class to encapsulate building a CMake project.""" + + def __init__(self, + cmake_root_dir, + cmake_build_path="cmake_build", + cga_install_dir="cmake_build/install", + cmake_extra_args=""): + """ + Class constructor. + + Args: + cmake_root_dir : Root directory of CMake project + cmake_build_path : cmake build output folder + cga_install_dir: Clara Genomics Analysis installation directory + cmake_extra_args : Extra string arguments to be passed to CMake during setup + """ + self.cmake_root_dir = os.path.abspath(cmake_root_dir) + self.build_path = os.path.abspath(cmake_build_path) + self.cga_install_dir = os.path.abspath(cga_install_dir) + self.cmake_extra_args = cmake_extra_args + self.cuda_toolkit_root_dir = os.environ.get("CUDA_TOOLKIT_ROOT_DIR") + + def run_cmake_cmd(self): + cmake_args = ['-DCMAKE_INSTALL_PREFIX=' + self.cga_install_dir, + '-DCMAKE_BUILD_TYPE=' + 'Release', + '-DCMAKE_INSTALL_RPATH=' + os.path.join(self.cga_install_dir, "lib")] + cmake_args += [self.cmake_extra_args] if self.cmake_extra_args else [] + + if self.cuda_toolkit_root_dir: + cmake_args += ["-DCUDA_TOOLKIT_ROOT_DIR=%s" % self.cuda_toolkit_root_dir] + + if not os.path.exists(self.build_path): + os.makedirs(self.build_path) + + subprocess.check_call(['cmake', self.cmake_root_dir] + cmake_args, cwd=self.build_path) + + def run_build_cmd(self): + build_args = ['--', '-j16', 'install'] + subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_path) + + def build(self): + self.run_cmake_cmd() + self.run_build_cmd() + + +def setup_python_binding(is_develop_mode, pycga_dir, cga_install_dir): + subprocess.check_call(['pip', 'install'] + (['-e'] if is_develop_mode else []) + ["."], + env={ + **os.environ, + 'PYCGA_DIR': pycga_dir, + 'CGA_INSTALL_DIR': cga_install_dir + }, + cwd=pycga_dir) + + +if __name__ == "__main__": + + args = parse_arguments() + current_dir = os.path.dirname(os.path.realpath(__file__)) + cga_installation_directory = os.path.join(args.build_output_folder, "install") + # Build & install Clara Genomics Analysis SDK + cmake_proj = CMakeWrapper(cmake_root_dir=os.path.dirname(current_dir), + cmake_build_path=args.build_output_folder, + cga_install_dir=cga_installation_directory, + cmake_extra_args="-Dcga_build_shared=ON") + cmake_proj.build() + # Setup pyclaragenomics + setup_python_binding(is_develop_mode=args.develop, + pycga_dir=current_dir, + cga_install_dir=cga_installation_directory) + print("pyclaragenomics was successfully setup in {} mode!" + .format("development" if args.develop else "installation")) From 07d10772406e45e52de2f789604e8f686b3f56ca Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Mon, 11 Nov 2019 17:39:23 +0100 Subject: [PATCH 043/128] [cudamapper] Implemented new Matcher constructor --- cudamapper/CMakeLists.txt | 2 +- cudamapper/src/matcher_gpu.cu | 23 +++++++++++++++++++ cudamapper/tests/CMakeLists.txt | 2 +- cudamapper/tests/Test_CudamapperMatcherGPU.cu | 13 +++++++++++ 4 files changed, 38 insertions(+), 2 deletions(-) diff --git a/cudamapper/CMakeLists.txt b/cudamapper/CMakeLists.txt index 92c5f0140..fc8218647 100644 --- a/cudamapper/CMakeLists.txt +++ b/cudamapper/CMakeLists.txt @@ -30,6 +30,7 @@ target_link_libraries(index_gpu logging pthread utils cgaio) target_compile_options(index_gpu PRIVATE -Werror) cuda_add_library(index_gpu_two_indices + src/index_two_indices.cu src/index_gpu_two_indices.cu src/minimizer.cu) target_include_directories(index_gpu_two_indices PUBLIC include) @@ -64,7 +65,6 @@ cuda_add_executable(cudamapper src/cudamapper.cpp src/main.cpp src/index.cu - src/index_two_indices.cu src/matcher_two_indices.cu src/overlapper.cpp ) diff --git a/cudamapper/src/matcher_gpu.cu b/cudamapper/src/matcher_gpu.cu index d2d9c0c9c..4908fa2f3 100644 --- a/cudamapper/src/matcher_gpu.cu +++ b/cudamapper/src/matcher_gpu.cu @@ -27,6 +27,29 @@ namespace cudamapper MatcherGPU::MatcherGPU(const IndexTwoIndices& query_index, const IndexTwoIndices& target_index) { + + CGA_NVTX_RANGE(profile, "matcherGPU"); + if (query_index.number_of_reads() == 0 || target_index.number_of_reads() == 0) + return; + + thrust::device_vector found_target_indices_d(query_index.unique_representations().size()); + thrust::device_vector anchor_starting_indices_d(query_index.unique_representations().size()); + details::matcher_gpu::find_query_target_matches(found_target_indices_d, query_index.unique_representations(), target_index.unique_representations()); + details::matcher_gpu::compute_anchor_starting_indices(anchor_starting_indices_d, query_index.first_occurrence_of_representations(), found_target_indices_d, target_index.first_occurrence_of_representations()); + + const int64_t n_anchors = anchor_starting_indices_d.back(); // D->H transfer + + anchors_d_.resize(n_anchors); + + details::matcher_gpu::generate_anchors(anchors_d_, + anchor_starting_indices_d, + query_index.first_occurrence_of_representations(), + found_target_indices_d, + target_index.first_occurrence_of_representations(), + query_index.read_ids(), + query_index.positions_in_reads(), + target_index.read_ids(), + target_index.positions_in_reads()); } thrust::device_vector& MatcherGPU::anchors() diff --git a/cudamapper/tests/CMakeLists.txt b/cudamapper/tests/CMakeLists.txt index 9511adbff..67efe3b87 100644 --- a/cudamapper/tests/CMakeLists.txt +++ b/cudamapper/tests/CMakeLists.txt @@ -18,7 +18,7 @@ set(SOURCES Test_CudamapperMatcher.cu Test_CudamapperMatcherGPU.cu Test_CudamapperMinimizer.cpp - Test_CudamapperOverlapperTriggered.cu + Test_CudamapperOverlapperTriggered.cu ../src/bioparser_sequence.cpp) get_property(cudamapper_data_include_dir GLOBAL PROPERTY cudamapper_data_include_dir) diff --git a/cudamapper/tests/Test_CudamapperMatcherGPU.cu b/cudamapper/tests/Test_CudamapperMatcherGPU.cu index b86b589c3..f2f38d9bb 100644 --- a/cudamapper/tests/Test_CudamapperMatcherGPU.cu +++ b/cudamapper/tests/Test_CudamapperMatcherGPU.cu @@ -10,6 +10,8 @@ #include "gtest/gtest.h" +#include "cudamapper_file_location.hpp" + #include #include @@ -325,5 +327,16 @@ TEST(TestCudamapperMatcherGPU, test_generate_anchors_small_example) target_positions_in_read_h); } +TEST(TestCudamapperMatcherGPU, OneReadOneMinimizer) +{ + std::unique_ptr parser = io::create_fasta_parser(std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/gatt.fasta"); + std::unique_ptr query_index = IndexTwoIndices::create_index(parser.get(), 0, parser->get_num_seqences(), 4, 1); + std::unique_ptr target_index = IndexTwoIndices::create_index(parser.get(), 0, parser->get_num_seqences(), 4, 1); + MatcherGPU matcher(*query_index, *target_index); + + const thrust::host_vector anchors(matcher.anchors()); + ASSERT_EQ(get_size(anchors), 1); +} + } // namespace cudamapper } // namespace claragenomics From 79e280336e9cf31342242b3014fa262db7ddefeb Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Tue, 12 Nov 2019 14:10:52 +0100 Subject: [PATCH 044/128] [common] fixed (unlikely) memleaks in fasta parser --- common/io/src/hts_fasta_parser.cpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/common/io/src/hts_fasta_parser.cpp b/common/io/src/hts_fasta_parser.cpp index 4bee013aa..564b658c1 100644 --- a/common/io/src/hts_fasta_parser.cpp +++ b/common/io/src/hts_fasta_parser.cpp @@ -11,11 +11,24 @@ #include "hts_fasta_parser.hpp" #include +#include extern "C" { #include } +namespace +{ +struct free_deleter +{ + template + void operator()(T* x) + { + std::free(x); + } +}; +} // namespace + namespace claragenomics { namespace io @@ -32,6 +45,7 @@ FastaParserHTS::FastaParserHTS(const std::string& fasta_file) num_seqequences_ = faidx_nseq(fasta_index_); if (num_seqequences_ == 0) { + fai_destroy(fasta_index_); throw std::runtime_error("FASTA file has 0 sequences"); } } @@ -60,7 +74,7 @@ FastaSequence FastaParserHTS::get_sequence_by_id(int32_t i) const FastaSequence FastaParserHTS::get_sequence_by_name(const std::string& name) const { int32_t length; - char* seq = fai_fetch(fasta_index_, name.c_str(), &length); + std::unique_ptr seq(fai_fetch(fasta_index_, name.c_str(), &length)); if (length < 0) { throw std::runtime_error("Error in reading sequence information for seq ID " + name); @@ -68,10 +82,7 @@ FastaSequence FastaParserHTS::get_sequence_by_name(const std::string& name) cons FastaSequence s{}; s.name = std::string(name); - s.seq = std::string(seq); - - // Since htslib allocates space for the seq using malloc() - free(seq); + s.seq = std::string(seq.get()); return s; } From 94fa68e0278d91675096218a0916d96c7c09c6e0 Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Tue, 12 Nov 2019 14:40:33 +0100 Subject: [PATCH 045/128] [cudamapper] fixed a memory leak --- .../include/claragenomics/cudamapper/index_two_indices.hpp | 3 +++ .../include/claragenomics/cudamapper/matcher_two_indices.hpp | 3 +++ 2 files changed, 6 insertions(+) diff --git a/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp b/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp index 3d4682f78..3791980ab 100644 --- a/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp +++ b/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp @@ -31,6 +31,9 @@ namespace cudamapper class IndexTwoIndices { public: + /// \brief Virtual destructor + virtual ~IndexTwoIndices() = default; + /// \brief returns an array of representations of sketch elements /// \return an array of representations of sketch elements virtual const thrust::device_vector& representations() const = 0; diff --git a/cudamapper/include/claragenomics/cudamapper/matcher_two_indices.hpp b/cudamapper/include/claragenomics/cudamapper/matcher_two_indices.hpp index 5c3bebad8..9d8cee87c 100644 --- a/cudamapper/include/claragenomics/cudamapper/matcher_two_indices.hpp +++ b/cudamapper/include/claragenomics/cudamapper/matcher_two_indices.hpp @@ -26,6 +26,9 @@ namespace cudamapper class MatcherTwoIndices { public: + /// \brief Virtual destructor + virtual ~MatcherTwoIndices() = default; + /// \brief returns anchors /// \return anchors virtual thrust::device_vector& anchors() = 0; From c3a59750845f4e7772ee2a6f2afa00d9d1c0490d Mon Sep 17 00:00:00 2001 From: Mike Vella Date: Tue, 12 Nov 2019 17:07:54 +0000 Subject: [PATCH 046/128] Initial hashing - currently contains some bug as overlaps not being generated --- cudamapper/src/minimizer.cu | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/cudamapper/src/minimizer.cu b/cudamapper/src/minimizer.cu index e3fe71784..358f51838 100644 --- a/cudamapper/src/minimizer.cu +++ b/cudamapper/src/minimizer.cu @@ -39,6 +39,18 @@ read_id_t Minimizer::read_id() const return read_id_; } + +__device__ representation_t wang_hash64(representation_t key){ + key = (~key) + (key << 21);// key = (key << 21) - key - 1; + key = key ^ (key >> 24); + key = (key + (key << 3)) + (key << 8);// key * 265 + key = key ^ (key >> 14); + key = (key + (key << 2)) + (key << 4);// key * 21 + key = key ^ (key >> 28); + key = key + (key << 31); + return key; +} + Minimizer::DirectionOfRepresentation Minimizer::direction() const { return direction_; @@ -193,6 +205,10 @@ __global__ void find_front_end_minimizers(const std::uint64_t minimizer_size, forward_representation |= forward_basepair_hashes[threadIdx.x + i] << 2 * (minimizer_size - i - 1); reverse_representation |= reverse_basepair_hashes[threadIdx.x + i] << 2 * i; } + + forward_representation = wang_hash64(forward_representation); + reverse_representation = wang_hash64(reverse_representation); + if (forward_representation <= reverse_representation) { minimizers_representation[threadIdx.x] = forward_representation; @@ -456,6 +472,10 @@ __global__ void find_central_minimizers(const std::uint64_t minimizer_size, forward_representation |= forward_basepair_hashes[kmer_index + i] << 2 * (minimizer_size - i - 1); reverse_representation |= reverse_basepair_hashes[kmer_index + i] << 2 * i; } + + forward_representation = wang_hash64(forward_representation); + reverse_representation = wang_hash64(reverse_representation); + if (forward_representation <= reverse_representation) { minimizers_representation[kmer_index] = forward_representation; @@ -680,6 +700,11 @@ __global__ void find_back_end_minimizers(const std::uint64_t minimizer_size, forward_representation |= forward_basepair_hashes[kmer_index + i] << 2 * (minimizer_size - i - 1); reverse_representation |= reverse_basepair_hashes[kmer_index + i] << 2 * i; } + + //printf("Pre hash %lu, Post hash %lu\n", forward_representation, wang_hash64(forward_representation)); + forward_representation = wang_hash64(forward_representation); + reverse_representation = wang_hash64(reverse_representation); + if (forward_representation <= reverse_representation) { minimizers_representation[kmer_index] = forward_representation; From 0ec135de9d5968afbda5ca33299da55b9fdc0afe Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Tue, 12 Nov 2019 14:50:20 +0200 Subject: [PATCH 047/128] [cmake] Add an option to disable doxygen documentation generation Fixes #205 [cmake] Validate arguments value Fixes #205 [cmake] Fix misplaced includes Fixes #205 [cmake] Adding a comment for including cmake/Utils.cmake before options value validation Fixes #205 --- CMakeLists.txt | 21 +++++++++++++++++---- README.md | 6 ++++-- cmake/Utils.cmake | 15 +++++++++++++++ 3 files changed, 36 insertions(+), 6 deletions(-) create mode 100644 cmake/Utils.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index a0042af1f..abd80fd59 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,17 +28,24 @@ option(cga_device_synchronize_kernels "Run cudaDeviceSynchronize() in CGA_CU_CHE option(spoa_accurate "Run cudapoa code in mode that matches spoa" OFF) option(cga_enable_cudapoa_nw_print "Enable verbose prints within cudapoa NW kernel" OFF) option(cga_profiling "Compile a binary for profiling with NVTX markers." OFF) +option(cga_generate_docs "Generate Doxygen documentation" ON) +# Must be included before others for options value validation +include(cmake/Utils.cmake) + +validate_boolean(cga_enable_tests) if (cga_enable_tests) message(STATUS "Enabling ClaraGenomicsAnalysis unit tests") set_property(GLOBAL PROPERTY enable_tests ON) endif() +validate_boolean(cga_enable_benchmarks) if (cga_enable_benchmarks) message(STATUS "Enabling ClaraGenomicsAnalysis benchmarks") set_property(GLOBAL PROPERTY enable_benchmarks ON) endif() +validate_boolean(cga_build_shared) if (cga_build_shared) message(STATUS "Building ClaraGenomicsAnalysis libraries as shared objects") set_property(GLOBAL PROPERTY cga_library_type SHARED) @@ -56,6 +63,16 @@ include(cmake/Benchmarks.cmake) include(cmake/Format.cmake) include(cmake/Packaging.cmake) +# Add documentation generation. +validate_boolean(cga_generate_docs) +if (cga_generate_docs) + message(STATUS "Enabling Doxygen documentation generation") + set_doxygen_mainpage(${CMAKE_CURRENT_SOURCE_DIR}/README.md) + add_docs_target("ClaraGenomicsAnalysis" "${CGA_VERSION}") +else() + message(STATUS "Disabling Doxygen documentation generation") +endif() + # Add ClaraGenomicsAnalysis projects. add_subdirectory(common/logging) add_subdirectory(common/utils) @@ -64,10 +81,6 @@ add_subdirectory(cudapoa) add_subdirectory(cudamapper) add_subdirectory(cudaaligner) -# Add documentation generation. -set_doxygen_mainpage(${CMAKE_CURRENT_SOURCE_DIR}/README.md) -add_docs_target("ClaraGenomicsAnalysis" "${CGA_VERSION}") - # Add auto formatting. cga_enable_formatting_targets() diff --git a/README.md b/README.md index 147be40f4..f7d1d7f62 100644 --- a/README.md +++ b/README.md @@ -124,8 +124,8 @@ e.g. A description of each of the benchmarks is present in a README under the module's benchmark folder. ## Enable Doc Generation -To enable document generation for Clara Genomics Analysis, please install `Doxygen` on your system. Once -`Doxygen` has been installed, run the following to build documents. +To enable document generation for Clara Genomics Analysis, please install `Doxygen` on your system. +Once`Doxygen` has been installed, run the following to build documents. ```bash make docs @@ -133,6 +133,8 @@ make docs Docs are also generated as part of the default `all` target when `Doxygen` is available on the system. +To disable documentation generation add `-Dcga_generate_docs=OFF` to the `cmake` command in the [build step](#build). + ## Code Formatting ### C++ / CUDA diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake new file mode 100644 index 000000000..ea5efeb95 --- /dev/null +++ b/cmake/Utils.cmake @@ -0,0 +1,15 @@ +# +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. +# + +function(validate_boolean CMAKE_OPTION) + if ((NOT ${CMAKE_OPTION} STREQUAL "ON") AND (NOT ${CMAKE_OPTION} STREQUAL "OFF")) + message(FATAL_ERROR "${CMAKE_OPTION} can only be set to ON/OFF") + endif() +endfunction(validate_boolean) From 17bd1413bfb4d2648b85beaf07852756d3a028cb Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Mon, 11 Nov 2019 11:27:46 +0200 Subject: [PATCH 048/128] [CI] Add support for running CI tests locally Fixes #206 [CI] Added copywrite to the new file [CI] Fix review comments Fixes #206 --- README.md | 21 ++++++ ci/common/prep-init-env.sh | 2 +- ci/local/build.sh | 134 +++++++++++++++++++++++++++++++++++++ 3 files changed, 156 insertions(+), 1 deletion(-) create mode 100644 ci/local/build.sh diff --git a/README.md b/README.md index f7d1d7f62..873d379ea 100644 --- a/README.md +++ b/README.md @@ -167,3 +167,24 @@ To run style check manually, simply run the following from the top level folder. ``` flake8 pyclaragenomics/ ``` + +## Running CI Tests Locally +Please note, your git repository will be mounted to the container, any untracked files will be removed from it. +Before executing the CI locally, stash or add them to the index. + +Requirements: +1. docker (https://docs.docker.com/install/linux/docker-ce/ubuntu/) +2. nvidia-docker (https://github.com/NVIDIA/nvidia-docker) +3. nvidia-container-runtime (https://github.com/NVIDIA/nvidia-container-runtime) + +Run the following command to execute the CI build steps inside a container locally: +```bash +bash ci/local/build.sh -r +``` +ci/local/build.sh script was adapted from [rapidsai/cudf](https://github.com/rapidsai/cudf/tree/branch-0.11/ci/local) + +The default docker image is **clara-genomics-base:cuda10.0-ubuntu16.04-gcc5-py3.7**. +Other images from [gpuci/clara-genomics-base](https://hub.docker.com/r/gpuci/clara-genomics-base/tags) repository can be used instead, by using -i argument +```bash +bash ci/local/build.sh -r -i gpuci/clara-genomics-base:cuda10.0-ubuntu18.04-gcc7-py3.6 +``` diff --git a/ci/common/prep-init-env.sh b/ci/common/prep-init-env.sh index 8526d9bb8..702fee8d2 100644 --- a/ci/common/prep-init-env.sh +++ b/ci/common/prep-init-env.sh @@ -56,6 +56,6 @@ if [ "${CUDA:0:2}" == '10' ]; then fi # Cleanup local git -cd $1 +cd "$1" git clean -xdf diff --git a/ci/local/build.sh b/ci/local/build.sh new file mode 100644 index 000000000..9031bb10c --- /dev/null +++ b/ci/local/build.sh @@ -0,0 +1,134 @@ +#!/bin/bash +# +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. +# + +DOCKER_IMAGE="gpuci/clara-genomics-base:cuda10.0-ubuntu16.04-gcc5-py3.7" +REPO_PATH=${PWD} +RAPIDS_DIR_IN_CONTAINER="/rapids" +CPP_BUILD_DIR="cpp" +CONTAINER_SHELL_ONLY=0 + +SHORTHELP="$(basename "$0") [-h] [-H] [-s] [-r ] [-i ]" +LONGHELP="${SHORTHELP} +Build and test your local repository using a base gpuCI Docker image +where: + -H Show this help text + -r Path to repository (defaults to working directory) + -i Use Docker image (default is ${DOCKER_IMAGE}) + -s Skip building and testing and start an interactive shell in a container of the Docker image +" + +# Limit GPUs available to container based on CUDA_VISIBLE_DEVICES +if [[ -z "${CUDA_VISIBLE_DEVICES}" ]]; then + NVIDIA_VISIBLE_DEVICES="all" +else + NVIDIA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES} +fi + +while getopts ":hHr:i:s" option; do + case ${option} in + r) + REPO_PATH=${OPTARG} + ;; + i) + DOCKER_IMAGE=${OPTARG} + ;; + s) + CONTAINER_SHELL_ONLY=1 + ;; + h) + echo "${SHORTHELP}" + exit 0 + ;; + H) + echo "${LONGHELP}" + exit 0 + ;; + *) + echo "ERROR: Invalid flag" + echo "${SHORTHELP}" + exit 1 + ;; + esac +done +IMAGE_FOLDER_NAME="build_$(echo $(basename "${DOCKER_IMAGE}")|sed -e 's/:/_/g')" +REPO_PATH_IN_CONTAINER="${RAPIDS_DIR_IN_CONTAINER}/$(basename "${REPO_PATH}")" +CPP_BUILD_DIR_IN_CONTAINER="${RAPIDS_DIR_IN_CONTAINER}/$(basename "${REPO_PATH}")/${IMAGE_FOLDER_NAME}/${CPP_BUILD_DIR}" + + +# BASE_CONTAINER_BUILD_DIR is named after the image name, allowing for +# multiple image builds to coexist on the local filesystem. This will +# be mapped to the typical BUILD_DIR inside of the container. Builds +# running in the container generate build artifacts just as they would +# in a bare-metal environment, and the host filesystem is able to +# maintain the host build in BUILD_DIR as well. +# shellcheck disable=SC2001,SC2005,SC2046 +BASE_CONTAINER_BUILD_DIR=${REPO_PATH}/${IMAGE_FOLDER_NAME} +CPP_CONTAINER_BUILD_DIR=${BASE_CONTAINER_BUILD_DIR}/cpp + + +BUILD_SCRIPT="#!/bin/bash +set -e +WORKSPACE=${REPO_PATH_IN_CONTAINER} +PREBUILD_SCRIPT=${REPO_PATH_IN_CONTAINER}/ci/gpu/prebuild.sh +BUILD_SCRIPT=${REPO_PATH_IN_CONTAINER}/ci/gpu/build.sh +if [ -f \${PREBUILD_SCRIPT} ]; then + source \${PREBUILD_SCRIPT} +fi +yes | source \${BUILD_SCRIPT} +" + +if (( CONTAINER_SHELL_ONLY == 0 )); then + COMMAND="${CPP_BUILD_DIR_IN_CONTAINER}/build.sh || bash" +else + COMMAND="bash" +fi + +# Create the build dir for the container to mount, generate the build script inside of it +mkdir -p "${BASE_CONTAINER_BUILD_DIR}" +mkdir -p "${CPP_CONTAINER_BUILD_DIR}" +# Create build directories. This is to ensure correct owner for directories. If +# directories don't exist there is side effect from docker volume mounting creating build +# directories owned by root(volume mount point(s)) + +echo "${BUILD_SCRIPT}" > "${CPP_CONTAINER_BUILD_DIR}/build.sh" +chmod ugo+x "${CPP_CONTAINER_BUILD_DIR}/build.sh" + +# Mount passwd and group files to docker. This allows docker to resolve username and group +# avoiding these nags: +# * groups: cannot find name for group ID ID +# * I have no name!@id:/$ +# For ldap user user information is not present in system /etc/passwd and /etc/group files. +# Hence we generate dummy files for ldap users which docker uses to resolve username and group + +PASSWD_FILE="/etc/passwd" +GROUP_FILE="/etc/group" + +USER_FOUND=$(grep -wc "$(whoami)" < "$PASSWD_FILE") +if [ "$USER_FOUND" == 0 ]; then + echo "Local User not found, LDAP WAR for docker mounts activated. Creating dummy passwd and group" + echo "files to allow docker resolve username and group" + cp "$PASSWD_FILE" /tmp/passwd + PASSWD_FILE="/tmp/passwd" + cp "$GROUP_FILE" /tmp/group + GROUP_FILE="/tmp/group" + echo "$(whoami):x:$(id -u):$(id -g):$(whoami),,,:$HOME:$SHELL" >> "$PASSWD_FILE" + echo "$(whoami):x:$(id -g):" >> "$GROUP_FILE" +fi + +# Run the generated build script in a container +sudo docker pull "${DOCKER_IMAGE}" +sudo docker run --runtime=nvidia --rm -it -e NVIDIA_VISIBLE_DEVICES="${NVIDIA_VISIBLE_DEVICES}" \ + -u "$(id -u)":"$(id -g)" \ + -v "${REPO_PATH}":"${REPO_PATH_IN_CONTAINER}" \ + -v "$PASSWD_FILE":/etc/passwd:ro \ + -v "$GROUP_FILE":/etc/group:ro \ + --cap-add=SYS_PTRACE \ + "${DOCKER_IMAGE}" bash -c "${COMMAND}" From 1d7bea8d660a0d391ec1dddc7909c9a2256e2349 Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Wed, 13 Nov 2019 12:15:12 -0500 Subject: [PATCH 049/128] [common] updatge graph classes create base graph class, extend to directed and undirected graphs add basic tests for graphs --- .../include/claragenomics/utils/graph.hpp | 218 ++++++++++++++---- common/utils/tests/TestGraph.cpp | 30 +++ .../include/claragenomics/cudapoa/batch.hpp | 10 +- cudapoa/src/cudapoa_batch.cpp | 6 +- cudapoa/src/cudapoa_batch.hpp | 4 +- cudapoa/tests/Test_CudapoaGenerateMSA2.cpp | 6 +- 6 files changed, 220 insertions(+), 54 deletions(-) diff --git a/common/utils/include/claragenomics/utils/graph.hpp b/common/utils/include/claragenomics/utils/graph.hpp index e162bb405..b43ff604e 100644 --- a/common/utils/include/claragenomics/utils/graph.hpp +++ b/common/utils/include/claragenomics/utils/graph.hpp @@ -23,11 +23,14 @@ namespace claragenomics { using node_id_t = int32_t; +using edge_t = std::pair; -/// \struct pair_hasher +/// \struct PairHash +/// Hash function for a pair struct PairHash { public: + /// \brief Operator overload to define hash function template size_t operator()(const std::pair& pair) const { @@ -37,40 +40,25 @@ struct PairHash } }; -/// \class DirectedGraph -/// Object representing a graph structure -class DirectedGraph +/// \class Graph +/// Object representing a generic graph structure +class Graph { public: - DirectedGraph() = default; - - ~DirectedGraph() = default; - - virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to) - { - auto edge = std::pair(node_id_from, node_id_to); - auto find_edge = edges_.find(edge); - if (find_edge == edges_.end()) - { - edges_.insert(edge); - auto find_node = adjacent_nodes_.find(node_id_from); - if (find_node == adjacent_nodes_.end()) - { - adjacent_nodes_.insert({node_id_from, {node_id_to}}); - } - else - { - find_node->second.push_back(node_id_to); - } - } - } + /// \brief Default dtor + ~Graph() = default; - virtual void add_label(node_id_t node, const std::string& label) - { - node_labels_.insert({node, label}); - } + /// \brief Add edges to a graph + /// + /// \param node_id_from Source node ID + /// \param node_id_to Sink node ID + virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to) = 0; - virtual const std::vector& get_adjacent_nodes(node_id_t node) + /// \brief Get a list of adjacent nodes to a given node + /// + /// \param node Node for which adjacent nodes are requested + /// \return Vector of adjacent node IDs + virtual const std::vector& get_adjacent_nodes(node_id_t node) const { auto iter = adjacent_nodes_.find(node); if (iter != adjacent_nodes_.end()) @@ -83,7 +71,10 @@ class DirectedGraph } } - virtual const std::vector get_node_ids() + /// \brief List all node IDs in the graph + /// + /// \return A vector of node IDs + virtual const std::vector get_node_ids() const { std::vector nodes; for (auto iter : adjacent_nodes_) @@ -94,7 +85,34 @@ class DirectedGraph return nodes; } - virtual std::string get_node_label(node_id_t node) + /// \brief Get a list of all edges in the graph + /// + /// \return A vector of edges + virtual const std::vector get_edges() const + { + std::vector edges; + for (auto iter : edges_) + { + edges.push_back(iter); + } + return edges; + } + + /// \brief Add string labels to a node ID + /// + /// \param node ID of node + /// \param label Label to attach to that node ID + virtual void set_node_label(node_id_t node, const std::string& label) + { + node_labels_.insert({node, label}); + } + + /// \brief Get the label associated with a node + /// + /// \param node node ID for label query + /// \return String label for associated node. Returns empty string if + // no label is associated or node ID doesn't exist. + virtual std::string get_node_label(node_id_t node) const { auto found_node = node_labels_.find(node); if (found_node != node_labels_.end()) @@ -103,7 +121,80 @@ class DirectedGraph } else { - throw std::runtime_error("No node found with given ID"); + return ""; + } + } + + /// \brief Serialize graph structure to dot format + /// + /// \return A string encoding the graph in dot format + virtual std::string serialize_to_dot() const = 0; + +protected: + Graph() = default; + + /// \brief Check if a directed edge exists in the grph + /// + /// \param edge A directed edge + /// \return Boolean result of check + bool directed_edge_exists(edge_t edge) + { + auto find_edge = edges_.find(edge); + if (find_edge == edges_.end()) + { + return false; + } + else + { + return true; + } + } + + /// \brief Update the adjacent nodes based on edge information + /// + /// \param edge A directed edge + void update_adject_nodes(edge_t edge) + { + auto find_node = adjacent_nodes_.find(edge.first); + if (find_node == adjacent_nodes_.end()) + { + adjacent_nodes_.insert({edge.first, {edge.second}}); + } + else + { + find_node->second.push_back(edge.second); + } + } + + /// List of adjacent nodes per node ID + std::unordered_map> adjacent_nodes_; + + /// All edges in the graph + std::unordered_set edges_; + + /// Label per node + std::unordered_map node_labels_; + + /// An empty list representing no connectivity + const std::vector empty_; +}; + +/// \class DirectedGraph +/// Object representing a directed graph structure +class DirectedGraph : public Graph +{ +public: + DirectedGraph() = default; + + ~DirectedGraph() = default; + + virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to) + { + auto edge = edge_t(node_id_from, node_id_to); + if (!directed_edge_exists(edge)) + { + edges_.insert(edge); + update_adject_nodes(edge); } } @@ -115,21 +206,66 @@ class DirectedGraph { node_id_t src = iter.first; auto label_found = node_labels_.find(src); - dot_str << src << " [label=\"" << label_found->second << "\"];\n"; + if (label_found != node_labels_.end()) + { + dot_str << src << " [label=\"" << label_found->second << "\"];\n"; + } for (node_id_t sink : iter.second) { dot_str << src << " -> " << sink << "\n"; } } - dot_str << "\n"; + dot_str << "}\n"; return dot_str.str(); } +}; -private: - std::unordered_map> adjacent_nodes_; - std::unordered_set, PairHash> edges_; - std::unordered_map node_labels_; - const std::vector empty_; +/// \class UndirectedGraph +/// Object representing an undirected graph structure +class UndirectedGraph : public Graph +{ +public: + UndirectedGraph() = default; + + ~UndirectedGraph() = default; + + virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to) + { + auto edge = edge_t(node_id_from, node_id_to); + auto edge_reversed = edge_t(node_id_to, node_id_from); + if (!directed_edge_exists(edge) && !directed_edge_exists(edge_reversed)) + { + edges_.insert(edge); + update_adject_nodes(edge); + update_adject_nodes(edge_reversed); + } + } + + virtual std::string serialize_to_dot() const + { + std::ostringstream dot_str; + dot_str << "graph g {\n"; + + // Get nodel labels, if any. + const std::vector nodes = get_node_ids(); + for (auto node : nodes) + { + auto label_found = node_labels_.find(node); + if (label_found != node_labels_.end()) + { + dot_str << node << " [label=\"" << label_found->second << "\"];\n"; + } + } + + // Get edges. + for (auto iter : edges_) + { + dot_str << iter.first << " -- " << iter.second << "\n"; + } + + dot_str << "}\n"; + return dot_str.str(); + } }; } // namespace claragenomics diff --git a/common/utils/tests/TestGraph.cpp b/common/utils/tests/TestGraph.cpp index b9ba7e680..947674774 100644 --- a/common/utils/tests/TestGraph.cpp +++ b/common/utils/tests/TestGraph.cpp @@ -44,4 +44,34 @@ TEST(GraphTest, DirectediGraph) EXPECT_EQ(std::find(adjacent_nodes_to_3.begin(), adjacent_nodes_to_3.end(), 2), adjacent_nodes_to_3.end()); } +TEST(GraphTest, UndirectediGraph) +{ + UndirectedGraph graph; + + // Sample graph + // 3 + // | + // | + // 1 -- 2 -- 5 + // | | + // | | + // 4 ---| + + graph.add_edge(1, 2); + graph.add_edge(2, 5); + graph.add_edge(2, 3); + graph.add_edge(2, 4); + graph.add_edge(4, 5); + + const auto& adjacent_nodes_to_2 = graph.get_adjacent_nodes(2); + EXPECT_NE(std::find(adjacent_nodes_to_2.begin(), adjacent_nodes_to_2.end(), 3), adjacent_nodes_to_2.end()); + EXPECT_NE(std::find(adjacent_nodes_to_2.begin(), adjacent_nodes_to_2.end(), 4), adjacent_nodes_to_2.end()); + EXPECT_NE(std::find(adjacent_nodes_to_2.begin(), adjacent_nodes_to_2.end(), 5), adjacent_nodes_to_2.end()); + EXPECT_NE(std::find(adjacent_nodes_to_2.begin(), adjacent_nodes_to_2.end(), 1), adjacent_nodes_to_2.end()); + + const auto& adjacent_nodes_to_3 = graph.get_adjacent_nodes(3); + EXPECT_EQ(std::find(adjacent_nodes_to_3.begin(), adjacent_nodes_to_3.end(), 1), adjacent_nodes_to_3.end()); + EXPECT_NE(std::find(adjacent_nodes_to_3.begin(), adjacent_nodes_to_3.end(), 2), adjacent_nodes_to_3.end()); +} + } // namespace claragenomics diff --git a/cudapoa/include/claragenomics/cudapoa/batch.hpp b/cudapoa/include/claragenomics/cudapoa/batch.hpp index 98d011991..d7cc181be 100644 --- a/cudapoa/include/claragenomics/cudapoa/batch.hpp +++ b/cudapoa/include/claragenomics/cudapoa/batch.hpp @@ -102,8 +102,14 @@ class Batch virtual StatusType get_msa(std::vector>& msa, std::vector& output_status) = 0; - virtual StatusType get_graphs(std::vector& graphs, - std::vector& output_status) = 0; + /// \brief Get the graph representation for each POA. + /// + /// \param graphs Reference to a vector where directed graph of each poa + /// is returned. + /// \param output_status Reference to vector where the errors + /// during kernel execution is captured + virtual void get_graphs(std::vector& graphs, + std::vector& output_status) = 0; /// \brief Return batch ID. /// diff --git a/cudapoa/src/cudapoa_batch.cpp b/cudapoa/src/cudapoa_batch.cpp index 264ce5a11..bb75bc750 100644 --- a/cudapoa/src/cudapoa_batch.cpp +++ b/cudapoa/src/cudapoa_batch.cpp @@ -312,7 +312,7 @@ StatusType CudapoaBatch::get_msa(std::vector>& msa, std return StatusType::success; } -StatusType CudapoaBatch::get_graphs(std::vector& graphs, std::vector& output_status) +void CudapoaBatch::get_graphs(std::vector& graphs, std::vector& output_status) { int32_t max_nodes_per_window_ = banded_alignment_ ? CUDAPOA_MAX_NODES_PER_WINDOW_BANDED : CUDAPOA_MAX_NODES_PER_WINDOW; CGA_CU_CHECK_ERR(cudaMemcpyAsync(graph_details_h_->nodes, @@ -369,7 +369,7 @@ StatusType CudapoaBatch::get_graphs(std::vector& graphs, std::vec // For each node, find it's outgoing edges and add the edge to the graph, // along with its label. node_id_t src = n; - graph.add_label(src, std::string(1, static_cast(nodes[n]))); + graph.set_node_label(src, std::string(1, static_cast(nodes[n]))); uint16_t num_edges = graph_details_h_->outgoing_edge_count[poa * max_nodes_per_window_ + n]; for (uint16_t e = 0; e < num_edges; e++) { @@ -379,8 +379,6 @@ StatusType CudapoaBatch::get_graphs(std::vector& graphs, std::vec } } } - - return StatusType::success; } bool CudapoaBatch::reserve_buf(int32_t max_seq_length) diff --git a/cudapoa/src/cudapoa_batch.hpp b/cudapoa/src/cudapoa_batch.hpp index bb57146f7..6a655d5e2 100644 --- a/cudapoa/src/cudapoa_batch.hpp +++ b/cudapoa/src/cudapoa_batch.hpp @@ -66,8 +66,8 @@ class CudapoaBatch : public Batch StatusType get_msa(std::vector>& msa, std::vector& output_status); - StatusType get_graphs(std::vector& graphs, - std::vector& output_status); + void get_graphs(std::vector& graphs, + std::vector& output_status); // Return batch ID. int32_t batch_id() const; diff --git a/cudapoa/tests/Test_CudapoaGenerateMSA2.cpp b/cudapoa/tests/Test_CudapoaGenerateMSA2.cpp index 29ca5a5fc..7767fa782 100644 --- a/cudapoa/tests/Test_CudapoaGenerateMSA2.cpp +++ b/cudapoa/tests/Test_CudapoaGenerateMSA2.cpp @@ -77,7 +77,7 @@ class MSATest : public ::testing::Test TEST_F(MSATest, CudapoaMSA) { std::minstd_rand rng(1); - int num_sequences = 2; + int num_sequences = 500; std::string backbone = claragenomics::genomeutils::generate_random_genome(50, rng); auto sequences = claragenomics::genomeutils::generate_random_sequences(backbone, num_sequences, rng, 10, 5, 10); @@ -104,10 +104,6 @@ TEST_F(MSATest, CudapoaMSA) ASSERT_EQ(output_status[0], StatusType::success); ASSERT_EQ(poa_group.size(), cudapoa_msa[0].size()); - std::vector cudapoa_graphs; - output_status.clear(); - cudapoa_batch->get_graphs(cudapoa_graphs, output_status); - auto spoa_msa = spoa_generate_multiple_sequence_alignments(sequences); #ifndef SPOA_ACCURATE From 2c06fd8a9968e9bde2854fbcd56e783d6742bd56 Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Wed, 13 Nov 2019 12:35:10 -0500 Subject: [PATCH 050/128] [common] add override keyword --- common/utils/include/claragenomics/utils/graph.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/common/utils/include/claragenomics/utils/graph.hpp b/common/utils/include/claragenomics/utils/graph.hpp index b43ff604e..ac6ddcb81 100644 --- a/common/utils/include/claragenomics/utils/graph.hpp +++ b/common/utils/include/claragenomics/utils/graph.hpp @@ -188,7 +188,7 @@ class DirectedGraph : public Graph ~DirectedGraph() = default; - virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to) + virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to) override { auto edge = edge_t(node_id_from, node_id_to); if (!directed_edge_exists(edge)) @@ -198,7 +198,7 @@ class DirectedGraph : public Graph } } - virtual std::string serialize_to_dot() const + virtual std::string serialize_to_dot() const override { std::ostringstream dot_str; dot_str << "digraph g {\n"; @@ -229,7 +229,7 @@ class UndirectedGraph : public Graph ~UndirectedGraph() = default; - virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to) + virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to) override { auto edge = edge_t(node_id_from, node_id_to); auto edge_reversed = edge_t(node_id_to, node_id_from); @@ -241,7 +241,7 @@ class UndirectedGraph : public Graph } } - virtual std::string serialize_to_dot() const + virtual std::string serialize_to_dot() const override { std::ostringstream dot_str; dot_str << "graph g {\n"; From a0b8097c223c548faa1e39a2db46e911db5577df Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Wed, 13 Nov 2019 16:32:01 -0500 Subject: [PATCH 051/128] [common] add edge weights to graph --- .../include/claragenomics/utils/graph.hpp | 82 +++++++++-------- cudapoa/src/allocate_block.cpp | 11 ++- cudapoa/src/cudapoa_batch.cpp | 30 ++++--- cudapoa/tests/CMakeLists.txt | 3 +- cudapoa/tests/Test_CudapoaSerializeGraph.cpp | 88 +++++++++++++++++++ 5 files changed, 161 insertions(+), 53 deletions(-) create mode 100644 cudapoa/tests/Test_CudapoaSerializeGraph.cpp diff --git a/common/utils/include/claragenomics/utils/graph.hpp b/common/utils/include/claragenomics/utils/graph.hpp index ac6ddcb81..002978c82 100644 --- a/common/utils/include/claragenomics/utils/graph.hpp +++ b/common/utils/include/claragenomics/utils/graph.hpp @@ -22,8 +22,9 @@ namespace claragenomics { -using node_id_t = int32_t; -using edge_t = std::pair; +using node_id_t = int32_t; +using edge_weight_t = int32_t; +using edge_t = std::pair; /// \struct PairHash /// Hash function for a pair @@ -52,7 +53,7 @@ class Graph /// /// \param node_id_from Source node ID /// \param node_id_to Sink node ID - virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to) = 0; + virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to, edge_weight_t weight) = 0; /// \brief Get a list of adjacent nodes to a given node /// @@ -88,12 +89,12 @@ class Graph /// \brief Get a list of all edges in the graph /// /// \return A vector of edges - virtual const std::vector get_edges() const + virtual const std::vector> get_edges() const { - std::vector edges; + std::vector> edges; for (auto iter : edges_) { - edges.push_back(iter); + edges.push_back({iter.first, iter.second}); } return edges; } @@ -166,11 +167,35 @@ class Graph } } + void node_labels_to_dot(std::ostringstream& dot_str) const + { + const std::vector nodes = get_node_ids(); + for (auto node : nodes) + { + auto label_found = node_labels_.find(node); + if (label_found != node_labels_.end()) + { + dot_str << node << " [label=\"" << label_found->second << "\"];\n"; + } + } + } + + void edges_to_dot(std::ostringstream& dot_str, const std::string& node_separator) const + { + for (auto iter : edges_) + { + const edge_t& edge = iter.first; + const edge_weight_t& weight = iter.second; + dot_str << edge.first << " " << node_separator << " " << edge.second; + dot_str << " [label=\"" << weight << "\"];\n"; + } + } + /// List of adjacent nodes per node ID std::unordered_map> adjacent_nodes_; /// All edges in the graph - std::unordered_set edges_; + std::unordered_map edges_; /// Label per node std::unordered_map node_labels_; @@ -188,12 +213,12 @@ class DirectedGraph : public Graph ~DirectedGraph() = default; - virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to) override + virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to, edge_weight_t weight = 0) override { auto edge = edge_t(node_id_from, node_id_to); if (!directed_edge_exists(edge)) { - edges_.insert(edge); + edges_.insert({edge, weight}); update_adject_nodes(edge); } } @@ -202,19 +227,13 @@ class DirectedGraph : public Graph { std::ostringstream dot_str; dot_str << "digraph g {\n"; - for (auto iter : adjacent_nodes_) - { - node_id_t src = iter.first; - auto label_found = node_labels_.find(src); - if (label_found != node_labels_.end()) - { - dot_str << src << " [label=\"" << label_found->second << "\"];\n"; - } - for (node_id_t sink : iter.second) - { - dot_str << src << " -> " << sink << "\n"; - } - } + + // Get nodel labels, if any. + node_labels_to_dot(dot_str); + + // Get edges. + edges_to_dot(dot_str, "->"); + dot_str << "}\n"; return dot_str.str(); } @@ -229,13 +248,13 @@ class UndirectedGraph : public Graph ~UndirectedGraph() = default; - virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to) override + virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to, edge_weight_t weight = 0) override { auto edge = edge_t(node_id_from, node_id_to); auto edge_reversed = edge_t(node_id_to, node_id_from); if (!directed_edge_exists(edge) && !directed_edge_exists(edge_reversed)) { - edges_.insert(edge); + edges_.insert({edge, weight}); update_adject_nodes(edge); update_adject_nodes(edge_reversed); } @@ -247,21 +266,10 @@ class UndirectedGraph : public Graph dot_str << "graph g {\n"; // Get nodel labels, if any. - const std::vector nodes = get_node_ids(); - for (auto node : nodes) - { - auto label_found = node_labels_.find(node); - if (label_found != node_labels_.end()) - { - dot_str << node << " [label=\"" << label_found->second << "\"];\n"; - } - } + node_labels_to_dot(dot_str); // Get edges. - for (auto iter : edges_) - { - dot_str << iter.first << " -- " << iter.second << "\n"; - } + edges_to_dot(dot_str, "--"); dot_str << "}\n"; return dot_str.str(); diff --git a/cudapoa/src/allocate_block.cpp b/cudapoa/src/allocate_block.cpp index 7317c92c9..948a02676 100644 --- a/cudapoa/src/allocate_block.cpp +++ b/cudapoa/src/allocate_block.cpp @@ -123,8 +123,9 @@ std::tuple BatchBlock::calculate_space_per_p host_size_fixed += sizeof(GraphDetails); // graph_details_h_ host_size_fixed += sizeof(GraphDetails); // graph_details_d_ host_size_per_poa += sizeof(uint8_t) * max_nodes_per_window_ * poa_count; // graph_details_h_->nodes - host_size_per_poa += sizeof(uint16_t) * max_nodes_per_window_ * CUDAPOA_MAX_NODE_EDGES * poa_count; // graph_details_d_->outgoing_edges - host_size_per_poa += sizeof(uint16_t) * max_nodes_per_window_ * poa_count; // graph_details_d_->outgoing_edge_count + host_size_per_poa += sizeof(uint16_t) * max_nodes_per_window_ * CUDAPOA_MAX_NODE_EDGES * poa_count; // graph_details_d_->incoming_edges + host_size_per_poa += sizeof(uint16_t) * max_nodes_per_window_ * CUDAPOA_MAX_NODE_EDGES * poa_count; // graph_details_d_->incoming_edge_weights + host_size_per_poa += sizeof(uint16_t) * max_nodes_per_window_ * poa_count; // graph_details_d_->incoming_edge_count // for graph - device device_size_per_poa += sizeof(uint8_t) * max_nodes_per_window_ * poa_count; // graph_details_d_->nodes @@ -274,9 +275,11 @@ void BatchBlock::get_graph_details(GraphDetails** graph_details_d_p, GraphDetail offset_h_ += sizeof(GraphDetails); graph_details_h->nodes = &block_data_h_[offset_h_]; offset_h_ += sizeof(uint8_t) * max_nodes_per_window_ * max_poas_; - graph_details_h->outgoing_edges = reinterpret_cast(&block_data_h_[offset_h_]); + graph_details_h->incoming_edges = reinterpret_cast(&block_data_h_[offset_h_]); offset_h_ += sizeof(uint16_t) * max_nodes_per_window_ * CUDAPOA_MAX_NODE_EDGES * max_poas_; - graph_details_h->outgoing_edge_count = reinterpret_cast(&block_data_h_[offset_h_]); + graph_details_h->incoming_edge_weights = reinterpret_cast(&block_data_h_[offset_h_]); + offset_h_ += sizeof(uint16_t) * max_nodes_per_window_ * CUDAPOA_MAX_NODE_EDGES * max_poas_; + graph_details_h->incoming_edge_count = reinterpret_cast(&block_data_h_[offset_h_]); offset_h_ += sizeof(uint16_t) * max_nodes_per_window_ * max_poas_; graph_details_d = reinterpret_cast(&block_data_h_[offset_h_]); offset_h_ += sizeof(GraphDetails); diff --git a/cudapoa/src/cudapoa_batch.cpp b/cudapoa/src/cudapoa_batch.cpp index bb75bc750..5251f9050 100644 --- a/cudapoa/src/cudapoa_batch.cpp +++ b/cudapoa/src/cudapoa_batch.cpp @@ -321,14 +321,20 @@ void CudapoaBatch::get_graphs(std::vector& graphs, std::vectoroutgoing_edges, - graph_details_d_->outgoing_edges, + CGA_CU_CHECK_ERR(cudaMemcpyAsync(graph_details_h_->incoming_edges, + graph_details_d_->incoming_edges, sizeof(uint16_t) * max_nodes_per_window_ * CUDAPOA_MAX_NODE_EDGES * max_poas_, cudaMemcpyDeviceToHost, stream_)); - CGA_CU_CHECK_ERR(cudaMemcpyAsync(graph_details_h_->outgoing_edge_count, - graph_details_d_->outgoing_edge_count, + CGA_CU_CHECK_ERR(cudaMemcpyAsync(graph_details_h_->incoming_edge_weights, + graph_details_d_->incoming_edge_weights, + sizeof(uint16_t) * max_nodes_per_window_ * CUDAPOA_MAX_NODE_EDGES * max_poas_, + cudaMemcpyDeviceToHost, + stream_)); + + CGA_CU_CHECK_ERR(cudaMemcpyAsync(graph_details_h_->incoming_edge_count, + graph_details_d_->incoming_edge_count, sizeof(uint16_t) * max_nodes_per_window_ * max_poas_, cudaMemcpyDeviceToHost, stream_)); @@ -366,15 +372,17 @@ void CudapoaBatch::get_graphs(std::vector& graphs, std::vectornodes[max_nodes_per_window_ * poa]; for (int32_t n = 0; n < num_nodes; n++) { - // For each node, find it's outgoing edges and add the edge to the graph, + // For each node, find it's incoming edges and add the edge to the graph, // along with its label. - node_id_t src = n; - graph.set_node_label(src, std::string(1, static_cast(nodes[n]))); - uint16_t num_edges = graph_details_h_->outgoing_edge_count[poa * max_nodes_per_window_ + n]; + node_id_t sink = n; + graph.set_node_label(sink, std::string(1, static_cast(nodes[n]))); + uint16_t num_edges = graph_details_h_->incoming_edge_count[poa * max_nodes_per_window_ + n]; for (uint16_t e = 0; e < num_edges; e++) { - node_id_t sink = graph_details_h_->outgoing_edges[poa * max_nodes_per_window_ * CUDAPOA_MAX_NODE_EDGES + n * CUDAPOA_MAX_NODE_EDGES + e]; - graph.add_edge(src, sink); + int32_t idx = poa * max_nodes_per_window_ * CUDAPOA_MAX_NODE_EDGES + n * CUDAPOA_MAX_NODE_EDGES + e; + node_id_t src = graph_details_h_->incoming_edges[idx]; + edge_weight_t weight = graph_details_h_->incoming_edge_weights[idx]; + graph.add_edge(src, sink, weight); } } } @@ -496,7 +504,7 @@ StatusType CudapoaBatch::add_seq_to_poa(const char* seq, const int8_t* weights, // Verify that weightsw are positive. for (int32_t i = 0; i < seq_len; i++) { - throw_on_negative(weights[i], "Base weights need have to be non-negative"); + throw_on_negative(weights[i], "Base weights need to be non-negative"); } memcpy(&(input_details_h_->base_weights[num_nucleotides_copied_]), weights, diff --git a/cudapoa/tests/CMakeLists.txt b/cudapoa/tests/CMakeLists.txt index 2a0d97e19..965bddb0b 100644 --- a/cudapoa/tests/CMakeLists.txt +++ b/cudapoa/tests/CMakeLists.txt @@ -18,7 +18,8 @@ set(SOURCES Test_CudapoaNW.cpp Test_CudapoaGenerateConsensus.cpp Test_CudapoaBatchEnd2End.cpp - Test_CudapoaGenerateMSA2.cpp) + Test_CudapoaGenerateMSA2.cpp + Test_CudapoaSerializeGraph.cpp) get_property(cudapoa_data_include_dir GLOBAL PROPERTY cudapoa_data_include_dir) include_directories(${cudapoa_data_include_dir}) diff --git a/cudapoa/tests/Test_CudapoaSerializeGraph.cpp b/cudapoa/tests/Test_CudapoaSerializeGraph.cpp new file mode 100644 index 000000000..450b6bf8f --- /dev/null +++ b/cudapoa/tests/Test_CudapoaSerializeGraph.cpp @@ -0,0 +1,88 @@ +/* +* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +* +* NVIDIA CORPORATION and its licensors retain all intellectual property +* and proprietary rights in and to this software, related documentation +* and any modifications thereto. Any use, reproduction, disclosure or +* distribution of this software and related documentation without an express +* license agreement from NVIDIA CORPORATION is strictly prohibited. +*/ + +#include "../src/cudapoa_kernels.cuh" + +#include +#include + +#include "gtest/gtest.h" +#include +#include + +namespace claragenomics +{ + +namespace cudapoa +{ + +using ::testing::TestWithParam; +using ::testing::ValuesIn; + +class GraphTest : public ::testing::Test +{ +public: + void SetUp() {} + + void initialize(uint32_t max_sequences_per_poa, + uint32_t device_id = 0, + cudaStream_t stream = 0, + int8_t output_mask = OutputType::msa, + int16_t gap_score = -8, + int16_t mismatch_score = -6, + int16_t match_score = 8, + bool banded_alignment = false) + { + size_t total = 0, free = 0; + cudaSetDevice(device_id); + cudaMemGetInfo(&free, &total); + size_t mem_per_batch = 0.9 * free; + + cudapoa_batch = claragenomics::cudapoa::create_batch(max_sequences_per_poa, device_id, stream, mem_per_batch, output_mask, gap_score, mismatch_score, match_score, banded_alignment); + } + +public: + std::unique_ptr cudapoa_batch; +}; + +TEST_F(GraphTest, CudapoaSerializeGraph) +{ + std::minstd_rand rng(1); + int num_sequences = 500; + std::string backbone = claragenomics::genomeutils::generate_random_genome(50, rng); + auto sequences = claragenomics::genomeutils::generate_random_sequences(backbone, num_sequences, rng, 10, 5, 10); + + initialize(num_sequences); + Group poa_group; + std::vector status; + std::vector> weights; + for (const auto& seq : sequences) + { + weights.push_back(std::vector(seq.length(), 1)); + Entry e{}; + e.seq = seq.c_str(); + e.weights = weights.back().data(); + e.length = seq.length(); + poa_group.push_back(e); + } + ASSERT_EQ(cudapoa_batch->add_poa_group(status, poa_group), StatusType::success); + + std::vector cudapoa_graphs; + std::vector output_status; + + cudapoa_batch->generate_poa(); + + cudapoa_batch->get_graphs(cudapoa_graphs, output_status); + std::cout << cudapoa_graphs[0].serialize_to_dot() << std::endl; +} + +} // namespace cudapoa + +} // namespace claragenomics From 24ed59edd378c0d054ee42c177c81d7bb498d07f Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Wed, 13 Nov 2019 17:28:34 -0500 Subject: [PATCH 052/128] [common] fix documentation for classes --- .../include/claragenomics/utils/graph.hpp | 33 +++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/common/utils/include/claragenomics/utils/graph.hpp b/common/utils/include/claragenomics/utils/graph.hpp index 002978c82..a8af2a59a 100644 --- a/common/utils/include/claragenomics/utils/graph.hpp +++ b/common/utils/include/claragenomics/utils/graph.hpp @@ -41,8 +41,7 @@ struct PairHash } }; -/// \class Graph -/// Object representing a generic graph structure +/// \brief Object representing a generic graph structure class Graph { public: @@ -53,6 +52,7 @@ class Graph /// /// \param node_id_from Source node ID /// \param node_id_to Sink node ID + /// \param weight Edge weight virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to, edge_weight_t weight) = 0; /// \brief Get a list of adjacent nodes to a given node @@ -167,6 +167,9 @@ class Graph } } + /// \brief Serialize node labels to dot format + /// + /// \param dot_str Output string stream to serialize labels to void node_labels_to_dot(std::ostringstream& dot_str) const { const std::vector nodes = get_node_ids(); @@ -180,6 +183,10 @@ class Graph } } + /// \brief Serialize edges to dot format + /// + /// \param dot_str Output string stream to serialize labels to + /// \param node_separator DOT delimiter for edge description void edges_to_dot(std::ostringstream& dot_str, const std::string& node_separator) const { for (auto iter : edges_) @@ -204,8 +211,7 @@ class Graph const std::vector empty_; }; -/// \class DirectedGraph -/// Object representing a directed graph structure +/// \brief DirectedGraph Object representing a directed graph structure class DirectedGraph : public Graph { public: @@ -213,6 +219,11 @@ class DirectedGraph : public Graph ~DirectedGraph() = default; + /// \brief Add directed edges to graph. + /// + /// \param node_id_from Source node ID + /// \param node_id_to Sink node ID + /// \param weight Edge weight virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to, edge_weight_t weight = 0) override { auto edge = edge_t(node_id_from, node_id_to); @@ -223,6 +234,9 @@ class DirectedGraph : public Graph } } + /// \brief Serialize graph structure to dot format + /// + /// \return A string encoding the graph in dot format virtual std::string serialize_to_dot() const override { std::ostringstream dot_str; @@ -239,8 +253,7 @@ class DirectedGraph : public Graph } }; -/// \class UndirectedGraph -/// Object representing an undirected graph structure +/// \brief UndirectedGraph Object representing an undirected graph structure class UndirectedGraph : public Graph { public: @@ -248,6 +261,11 @@ class UndirectedGraph : public Graph ~UndirectedGraph() = default; + /// \brief Add undirected edges to graph. + /// + /// \param node_id_from Source node ID + /// \param node_id_to Sink node ID + /// \param weight Edge weight virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to, edge_weight_t weight = 0) override { auto edge = edge_t(node_id_from, node_id_to); @@ -260,6 +278,9 @@ class UndirectedGraph : public Graph } } + /// \brief Serialize graph structure to dot format + /// + /// \return A string encoding the graph in dot format virtual std::string serialize_to_dot() const override { std::ostringstream dot_str; From 963207a9f99ec988e1f3a3dfa8d8012f4fc3e6ec Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Wed, 13 Nov 2019 17:30:18 -0500 Subject: [PATCH 053/128] [cmake] fix documentation generation for modules --- CMakeLists.txt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index abd80fd59..cd171cbea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,6 +63,14 @@ include(cmake/Benchmarks.cmake) include(cmake/Format.cmake) include(cmake/Packaging.cmake) +# Add ClaraGenomicsAnalysis projects. +add_subdirectory(common/logging) +add_subdirectory(common/utils) +add_subdirectory(common/io) +add_subdirectory(cudapoa) +add_subdirectory(cudamapper) +add_subdirectory(cudaaligner) + # Add documentation generation. validate_boolean(cga_generate_docs) if (cga_generate_docs) @@ -73,14 +81,6 @@ else() message(STATUS "Disabling Doxygen documentation generation") endif() -# Add ClaraGenomicsAnalysis projects. -add_subdirectory(common/logging) -add_subdirectory(common/utils) -add_subdirectory(common/io) -add_subdirectory(cudapoa) -add_subdirectory(cudamapper) -add_subdirectory(cudaaligner) - # Add auto formatting. cga_enable_formatting_targets() From d7efbad348b9b3be766c45b0a4fea4b30100e500 Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Wed, 13 Nov 2019 17:38:05 -0500 Subject: [PATCH 054/128] [tests] remove unnecessary includes --- cudapoa/tests/Test_CudapoaSerializeGraph.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cudapoa/tests/Test_CudapoaSerializeGraph.cpp b/cudapoa/tests/Test_CudapoaSerializeGraph.cpp index 450b6bf8f..1de57daeb 100644 --- a/cudapoa/tests/Test_CudapoaSerializeGraph.cpp +++ b/cudapoa/tests/Test_CudapoaSerializeGraph.cpp @@ -8,14 +8,11 @@ * license agreement from NVIDIA CORPORATION is strictly prohibited. */ -#include "../src/cudapoa_kernels.cuh" - #include #include +#include #include "gtest/gtest.h" -#include -#include namespace claragenomics { From 8d59cd648b8fac75aa572cbcd623be30c8a1aa72 Mon Sep 17 00:00:00 2001 From: Kamesh Arumugam Date: Tue, 5 Nov 2019 23:28:39 -0800 Subject: [PATCH 055/128] [cudamapper:overlapper] - wip optimizations and gpu port --- cudamapper/CMakeLists.txt | 9 +- .../claragenomics/cudamapper/overlapper.hpp | 3 +- .../claragenomics/cudamapper/types.hpp | 7 +- cudamapper/src/{main.cpp => main.cu} | 2 +- cudamapper/src/matcher.cu | 7 +- cudamapper/src/matcher.hpp | 6 +- cudamapper/src/overlapper.cpp | 8 +- cudamapper/src/overlapper_triggered.cu | 482 ++++++++++++------ cudamapper/src/overlapper_triggered.hpp | 2 +- 9 files changed, 362 insertions(+), 164 deletions(-) rename cudamapper/src/{main.cpp => main.cu} (99%) diff --git a/cudamapper/CMakeLists.txt b/cudamapper/CMakeLists.txt index 4c23ff3f8..afee6e60e 100644 --- a/cudamapper/CMakeLists.txt +++ b/cudamapper/CMakeLists.txt @@ -14,12 +14,15 @@ project(cudamapper) add_subdirectory(data) find_package(CUDA 9.0 QUIET REQUIRED) +link_directories(/usr/local/cuda/targets/x86_64-linux/lib/) + + if(NOT ${CUDA_FOUND}) message(FATAL_ERROR "CUDA not detected on system. Please install") else() message(STATUS "Using CUDA ${CUDA_VERSION} from ${CUDA_TOOLKIT_ROOT_DIR}") - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -lineinfo --expt-extended-lambda -use_fast_math -Xcompiler -Wall,-Wno-pedantic -std=c++14") + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -lineinfo --expt-extended-lambda -use_fast_math -Xcompiler -fopenmp,-Wall,-Wno-pedantic -std=c++14 -x cu") endif() cuda_add_library(index_gpu @@ -42,14 +45,14 @@ target_include_directories(cudamapper_utils PUBLIC include) cuda_add_library(overlapper_triggerred src/overlapper_triggered.cu) target_include_directories(overlapper_triggerred PUBLIC include) -target_link_libraries(overlapper_triggerred logging utils cgaio) +target_link_libraries(overlapper_triggerred logging utils cgaio omp) target_compile_options(overlapper_triggerred PRIVATE -Werror) add_doxygen_source_dir(${CMAKE_CURRENT_SOURCE_DIR}/include) cuda_add_executable(cudamapper src/cudamapper.cpp - src/main.cpp + src/main.cu src/index.cu src/overlapper.cpp ) diff --git a/cudamapper/include/claragenomics/cudamapper/overlapper.hpp b/cudamapper/include/claragenomics/cudamapper/overlapper.hpp index 6b65128fe..9c7cdb5a5 100644 --- a/cudamapper/include/claragenomics/cudamapper/overlapper.hpp +++ b/cudamapper/include/claragenomics/cudamapper/overlapper.hpp @@ -10,6 +10,7 @@ #pragma once +#include #include "index.hpp" #include "types.hpp" @@ -33,7 +34,7 @@ class Overlapper /// \param overlaps Output vector into which generated overlaps will be placed /// \param anchors vector of anchor objects. Does not need to be ordered /// \param index representation index for reads - virtual void get_overlaps(std::vector& overlaps, std::vector& anchors, const Index& index) = 0; + virtual void get_overlaps(std::vector& overlaps, thrust::device_vector& anchors, const Index& index) = 0; /// \brief prints overlaps to stdout in PAF format static void print_paf(const std::vector& overlaps); diff --git a/cudamapper/include/claragenomics/cudamapper/types.hpp b/cudamapper/include/claragenomics/cudamapper/types.hpp index 78972561c..ffc3217cf 100644 --- a/cudamapper/include/claragenomics/cudamapper/types.hpp +++ b/cudamapper/include/claragenomics/cudamapper/types.hpp @@ -89,9 +89,9 @@ typedef struct Overlap /// end position in the target position_in_read_t target_end_position_in_read_; /// query read name (e.g from FASTA) - std::string query_read_name_; + char* query_read_name_ = 0; /// target read name (e.g from FASTA) - std::string target_read_name_; + char* target_read_name_ = 0; /// Relative strand: Forward ("+") or Reverse("-") RelativeStrand relative_strand; /// Number of residues (e.g anchors) between the two reads @@ -103,8 +103,9 @@ typedef struct Overlap /// Whether the overlap is considered valid by the generating overlapper bool overlap_complete = false; /// CIGAR string for alignment of mapped section. - std::string cigar_ = ""; + char* cigar_ = 0; } Overlap; + } // namespace cudamapper } // namespace claragenomics diff --git a/cudamapper/src/main.cpp b/cudamapper/src/main.cu similarity index 99% rename from cudamapper/src/main.cpp rename to cudamapper/src/main.cu index 4ac478f2f..c69b29199 100644 --- a/cudamapper/src/main.cpp +++ b/cudamapper/src/main.cu @@ -109,7 +109,7 @@ int main(int argc, char* argv[]) // Function for adding new overlaps to writer auto add_overlaps_to_write_queue = [&overlaps_to_write, &overlaps_writer_mtx](claragenomics::cudamapper::Overlapper& overlapper, - std::vector& anchors, + thrust::device_vector& anchors, const claragenomics::cudamapper::Index& index) { CGA_NVTX_RANGE(profiler, "add_overlaps_to_write_queue"); overlaps_writer_mtx.lock(); diff --git a/cudamapper/src/matcher.cu b/cudamapper/src/matcher.cu index 8edce6248..4b78037ad 100644 --- a/cudamapper/src/matcher.cu +++ b/cudamapper/src/matcher.cu @@ -14,6 +14,7 @@ #include #include #include +#include namespace claragenomics { @@ -351,8 +352,8 @@ Matcher::Matcher(const Index& index, uint32_t query_target_division_idx) auto num_anchors_so_far = anchors_h_.size(); anchors_h_.resize(num_anchors_so_far + total_anchors); - CGA_CU_CHECK_ERR(cudaMemcpy(anchors_h_.data() + num_anchors_so_far, anchors_d.data(), total_anchors * sizeof(Anchor), - cudaMemcpyDeviceToHost)); + thrust::copy(anchors_d.data(), anchors_d.data() + total_anchors, anchors_h_.data() + num_anchors_so_far); + //CGA_CU_CHECK_ERR(cudaMemcpy(anchors_h_.data() + num_anchors_so_far, anchors_d.data(), total_anchors * sizeof(Anchor),cudaMemcpyDeviceToHost)); // clean up device memory CGA_LOG_INFO("Deallocating {} bytes from read_id_to_anchors_section_d", @@ -392,7 +393,7 @@ Matcher::Matcher(const Index& index, uint32_t query_target_division_idx) directions_of_reads_d.free(); } -std::vector& Matcher::anchors() +thrust::device_vector& Matcher::anchors() { return anchors_h_; } diff --git a/cudamapper/src/matcher.hpp b/cudamapper/src/matcher.hpp index 005284ff9..573042f5d 100644 --- a/cudamapper/src/matcher.hpp +++ b/cudamapper/src/matcher.hpp @@ -10,7 +10,7 @@ #pragma once -#include +#include #include "claragenomics/cudamapper/index.hpp" namespace claragenomics @@ -42,11 +42,11 @@ class Matcher /// \brief return anchors /// \return anchors - std::vector& anchors(); + thrust::device_vector& anchors(); private: /// \biref list of anchors - std::vector anchors_h_; + thrust::device_vector anchors_h_; }; } // namespace cudamapper } // namespace claragenomics diff --git a/cudamapper/src/overlapper.cpp b/cudamapper/src/overlapper.cpp index 08151c5ef..c48318c07 100644 --- a/cudamapper/src/overlapper.cpp +++ b/cudamapper/src/overlapper.cpp @@ -35,12 +35,12 @@ void Overlapper::print_paf(const std::vector& overlaps) { // Add basic overlap information. std::printf("%s\t%i\t%i\t%i\t%c\t%s\t%i\t%i\t%i\t%i\t%i\t%i", - overlap.query_read_name_.c_str(), + overlap.query_read_name_, overlap.query_length_, overlap.query_start_position_in_read_, overlap.query_end_position_in_read_, static_cast(overlap.relative_strand), - overlap.target_read_name_.c_str(), + overlap.target_read_name_, overlap.target_length_, overlap.target_start_position_in_read_, overlap.target_end_position_in_read_, @@ -48,9 +48,9 @@ void Overlapper::print_paf(const std::vector& overlaps) 0, 255); // If CIGAR string is generated, output in PAF. - if (overlap.cigar_ != "") + if (overlap.cigar_ != 0) { - std::printf("\tcg:Z:%s", overlap.cigar_.c_str()); + std::printf("\tcg:Z:%s", overlap.cigar_); } // Add new line to demarcate new entry. std::printf("\n"); diff --git a/cudamapper/src/overlapper_triggered.cu b/cudamapper/src/overlapper_triggered.cu index add4e17b4..68e28bbf7 100644 --- a/cudamapper/src/overlapper_triggered.cu +++ b/cudamapper/src/overlapper_triggered.cu @@ -1,191 +1,383 @@ /* -* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -* -* NVIDIA CORPORATION and its licensors retain all intellectual property -* and proprietary rights in and to this software, related documentation -* and any modifications thereto. Any use, reproduction, disclosure or -* distribution of this software and related documentation without an express -* license agreement from NVIDIA CORPORATION is strictly prohibited. -*/ + * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + */ #include #include +#include +#include #include #include -#include -#include #include +#include +#include #include #include -#include -#include "overlapper_triggered.hpp" #include "claragenomics/cudamapper/overlapper.hpp" #include "cudamapper_utils.hpp" #include "matcher.hpp" +#include "overlapper_triggered.hpp" +#include +#include +#include -namespace claragenomics -{ -namespace cudamapper +namespace claragenomics { +namespace cudamapper { + +__host__ __device__ bool operator==(const Anchor &prev_anchor, + const Anchor ¤t_anchor) { + uint16_t score_threshold = 1; + // Very simple scoring function to quantify quality of overlaps. + auto anchor_score = [] __host__ __device__(Anchor a, Anchor b) { + if ((b.query_position_in_read_ - a.query_position_in_read_) < 350) { + return 2; + } else { + return 1; // TODO change to a more sophisticated scoring method + } + }; + auto score = anchor_score(prev_anchor, current_anchor); + return ((current_anchor.query_read_id_ == prev_anchor.query_read_id_) && + (current_anchor.target_read_id_ == prev_anchor.target_read_id_) && + score > score_threshold); +} + +struct cuOverlapKey { -void OverlapperTriggered::get_overlaps(std::vector& fused_overlaps, std::vector& anchors, const Index& index) + Anchor* anchor; +}; + +struct cuOverlapKey_transform { - CGA_NVTX_RANGE(profiler, "OverlapperTriggered::get_overlaps"); - const auto& read_names = index.read_id_to_read_name(); - const auto& read_lengths = index.read_id_to_read_length(); - size_t total_anchors = anchors.size(); + Anchor* d_anchors; + int32_t* d_chain_start; - // fetch memory info of the current device - size_t total = 0, free = 0; - CGA_CU_CHECK_ERR(cudaMemGetInfo(&free, &total)); + cuOverlapKey_transform(Anchor* anchors, int32_t* chain_start) + : d_anchors(anchors) + , d_chain_start(chain_start) + { + } - // Using 80% of available memory as heuristic since not all available memory can be used - // due to fragmentation. - size_t max_anchors_per_block = 0.8 * free / sizeof(Anchor); - // The thurst sort function makes a local copy of the array, so we need twice - // twice the device memory available for the sort to succeed. - max_anchors_per_block /= 2; + __host__ __device__ __forceinline__ cuOverlapKey + operator()(const int& idx) const + { + auto anchor_idx = d_chain_start[idx]; - // comparison function object - auto comp = [] __host__ __device__(Anchor i, Anchor j) -> bool { - return (i.query_read_id_ < j.query_read_id_) || - ((i.query_read_id_ == j.query_read_id_) && - (i.target_read_id_ < j.target_read_id_)) || - ((i.query_read_id_ == j.query_read_id_) && - (i.target_read_id_ == j.target_read_id_) && - (i.query_position_in_read_ < j.query_position_in_read_)) || - ((i.query_read_id_ == j.query_read_id_) && - (i.target_read_id_ == j.target_read_id_) && - (i.query_position_in_read_ == j.query_position_in_read_) && - (i.target_position_in_read_ < j.target_position_in_read_)); - }; + cuOverlapKey key; + key.anchor = &d_anchors[anchor_idx]; + return key; + } +}; - thrust::device_vector anchors_buf; +__host__ __device__ bool operator==(const cuOverlapKey& key0, + const cuOverlapKey& key1) +{ + Anchor* a = key0.anchor; + Anchor* b = key1.anchor; + return (a->target_read_id_ == b->target_read_id_) && + (a->query_read_id_ == b->query_read_id_); +} - // chunking anchors array to a size that fits in memory - // sort the individual chunks and merge the sorted chunks into host array - for (std::vector::iterator anchors_iter = anchors.begin(); - anchors_iter < anchors.end(); - anchors_iter += max_anchors_per_block) - { +struct cuOverlapArgs +{ + int32_t overlap_end; + int32_t num_residues; + int32_t overlap_start; +}; - auto curblock_start = anchors_iter; - auto curblock_end = anchors_iter + max_anchors_per_block; - if (curblock_end > anchors.end()) - curblock_end = anchors.end(); +struct cuOverlapArgs_transform +{ + int32_t* d_chain_start; + int32_t* d_chain_length; - auto n_anchors_curblock = curblock_end - curblock_start; + cuOverlapArgs_transform(int32_t* chain_start, int32_t* chain_length) + : d_chain_start(chain_start) + , d_chain_length(chain_length) + { + } - // move current block to device - anchors_buf.resize(n_anchors_curblock); - thrust::copy(curblock_start, curblock_end, anchors_buf.begin()); + __host__ __device__ __forceinline__ cuOverlapArgs + operator()(const int32_t& idx) const + { + cuOverlapArgs overlap; + auto overlap_start = d_chain_start[idx]; + auto overlap_length = d_chain_length[idx]; + overlap.overlap_end = overlap_start + overlap_length; + overlap.num_residues = overlap_length; + overlap.overlap_start = overlap_start; + // printf("%d %d %d\n", idx, overlap_start, overlap_length); + return overlap; + } +}; - // sort on device - thrust::sort(thrust::device, anchors_buf.begin(), anchors_buf.end(), comp); +struct CustomReduceOp +{ + __host__ __device__ cuOverlapArgs operator()(const cuOverlapArgs& a, + const cuOverlapArgs& b) const + { + cuOverlapArgs fused_overlap; + fused_overlap.num_residues = a.num_residues + b.num_residues; + fused_overlap.overlap_end = + a.overlap_end > b.overlap_end ? a.overlap_end : b.overlap_end; + fused_overlap.overlap_start = + a.overlap_start < b.overlap_start ? a.overlap_start : b.overlap_start; + return fused_overlap; + } +}; - // move sorted anchors in current block back to host - thrust::copy(anchors_buf.begin(), anchors_buf.end(), curblock_start); +struct CreateOverlap +{ + Anchor* d_anchors; - // start merging the sorted anchor blocks from second iteration - if (anchors_iter != anchors.begin()) - { - std::inplace_merge(anchors.begin(), curblock_start, curblock_end, comp); - } + __host__ __device__ __forceinline__ CreateOverlap(Anchor* anchors_ptr) + : d_anchors(anchors_ptr) + { } - //Loop through the overlaps, "trigger" when an overlap is detected and add it to vector of overlaps - //when the overlap is left - std::vector overlaps; + __host__ __device__ __forceinline__ Overlap + operator()(cuOverlapArgs overlap) + { + Anchor overlap_start_anchor = d_anchors[overlap.overlap_start]; + Anchor overlap_end_anchor = d_anchors[overlap.overlap_end - 1]; - bool in_chain = false; - uint16_t tail_length = 0; - uint16_t tail_length_for_chain = 3; - uint16_t score_threshold = 1; - Anchor overlap_start_anchor; - Anchor prev_anchor; - Anchor current_anchor; - - //Very simple scoring function to quantify quality of overlaps. - auto anchor_score = [](Anchor a, Anchor b) { - if ((b.query_position_in_read_ - a.query_position_in_read_) < 350) + Overlap new_overlap; + + new_overlap.query_read_id_ = overlap_end_anchor.query_read_id_; + new_overlap.target_read_id_ = overlap_end_anchor.target_read_id_; + new_overlap.num_residues_ = overlap.num_residues; + new_overlap.target_end_position_in_read_ = + overlap_end_anchor.target_position_in_read_; + new_overlap.target_start_position_in_read_ = + overlap_start_anchor.target_position_in_read_; + new_overlap.query_end_position_in_read_ = + overlap_end_anchor.query_position_in_read_; + new_overlap.query_start_position_in_read_ = + overlap_start_anchor.query_position_in_read_; + new_overlap.overlap_complete = true; + + // If the target start position is greater than the target end position + // We can safely assume that the query and target are template and + // complement reads. TODO: Incorporate sketchelement direction value when + // this is implemented + if (new_overlap.target_start_position_in_read_ > + new_overlap.target_end_position_in_read_) { - return 2; + new_overlap.relative_strand = RelativeStrand::Reverse; + // std::swap(new_overlap.target_end_position_in_read_, + // new_overlap.target_start_position_in_read_); + auto tmp = new_overlap.target_end_position_in_read_; + new_overlap.target_end_position_in_read_ = + new_overlap.target_start_position_in_read_; + new_overlap.target_start_position_in_read_ = tmp; } else { - return 1; //TODO change to a more sophisticated scoring method + new_overlap.relative_strand = RelativeStrand::Forward; } + return new_overlap; }; +}; - //Add an anchor to an overlap - auto terminate_anchor = [&]() { - Overlap new_overlap; - new_overlap.query_read_id_ = prev_anchor.query_read_id_; - new_overlap.query_read_name_ = read_names[prev_anchor.query_read_id_]; - new_overlap.target_read_id_ = prev_anchor.target_read_id_; - new_overlap.target_read_name_ = read_names[prev_anchor.target_read_id_]; - new_overlap.query_length_ = read_lengths[prev_anchor.query_read_id_]; - new_overlap.target_length_ = read_lengths[prev_anchor.target_read_id_]; - new_overlap.num_residues_ = tail_length; - new_overlap.target_end_position_in_read_ = prev_anchor.target_position_in_read_; - new_overlap.target_start_position_in_read_ = overlap_start_anchor.target_position_in_read_; - new_overlap.query_end_position_in_read_ = prev_anchor.query_position_in_read_; - new_overlap.query_start_position_in_read_ = overlap_start_anchor.query_position_in_read_; - new_overlap.overlap_complete = true; - overlaps.push_back(new_overlap); - }; +std::vector +fused_overlaps_ongpu(std::vector& fused_overlaps, + thrust::device_vector& d_anchors, + const Index& index) +{ + const auto& read_names = index.read_id_to_read_name(); + const auto& read_lengths = index.read_id_to_read_length(); + auto n_anchors = d_anchors.size(); - for (size_t i = 0; i < anchors.size(); i++) - { - current_anchor = anchors[i]; - if ((current_anchor.query_read_id_ == prev_anchor.query_read_id_) && (current_anchor.target_read_id_ == prev_anchor.target_read_id_)) - { //TODO: For first anchor where prev anchor is not initialised can give incorrect result - //In the same read pairing as before - int score = anchor_score(prev_anchor, current_anchor); - if (score > score_threshold) - { - tail_length++; - if (tail_length == tail_length_for_chain) - { //we enter a chain - in_chain = true; - overlap_start_anchor = anchors[i - tail_length + 1]; //TODO check - } - } - else - { - if (in_chain) - { - terminate_anchor(); - } - - tail_length = 1; - in_chain = false; - } - prev_anchor = current_anchor; - } - else - { - //In a new read pairing - if (in_chain) - { - terminate_anchor(); - } - //Reinitialise all values - tail_length = 1; - in_chain = false; - prev_anchor = current_anchor; - } - } + uint16_t tail_length_for_chain = 3; + thrust::device_vector n_uniques(1); + thrust::device_vector d_chain_length(n_anchors); + + thrust::device_vector d_chain_start(n_anchors); + + thrust::device_vector anchors_buf(d_anchors.size()); + + Anchor* d_start_anchor = thrust::raw_pointer_cast(anchors_buf.data()); + + auto d_num_runs_ptr = n_uniques.data(); + + // run length encode to compute the overlaps start and end indices + void* d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + cub::DeviceRunLengthEncode::Encode( + d_temp_storage, temp_storage_bytes, d_anchors.data(), d_start_anchor, + d_chain_length.data(), d_num_runs_ptr, n_anchors); + + // Allocate temporary storage + CGA_CU_CHECK_ERR(cudaMalloc(&d_temp_storage, temp_storage_bytes)); + + // Run encoding + cub::DeviceRunLengthEncode::Encode( + d_temp_storage, temp_storage_bytes, d_anchors.data(), d_start_anchor, + d_chain_length.data(), d_num_runs_ptr, n_anchors); + + auto n_chains = n_uniques[0]; + + d_temp_storage = nullptr; + temp_storage_bytes = 0; + cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, + d_chain_length.data(), d_chain_start.data(), + n_chains); + + // Allocate temporary storage + CGA_CU_CHECK_ERR(cudaMalloc(&d_temp_storage, temp_storage_bytes)); + + cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, + d_chain_length.data(), d_chain_start.data(), + n_chains); + + // storage for the nonzero indices + // indices to d_chain_length/d_chain_start vector + thrust::device_vector d_valid_chains_indices(n_chains); + auto indices_end = + thrust::copy_if(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(n_chains), + d_chain_length.data(), d_valid_chains_indices.data(), + [=] __host__ __device__(const int32_t& len) -> bool { + return (len >= tail_length_for_chain); + }); + + auto n_valid_chains = indices_end - d_valid_chains_indices.data(); + + // std::ofstream glog; + // glog.open ("glog.log", std::ios::app); + // glog << " # valid chains/# chains - " << n_valid_chains << "/" << n_chains + // << "\n"; glog.close(); - //terminate any hanging anchors - if (in_chain) + cuOverlapKey_transform key_op(thrust::raw_pointer_cast(d_anchors.data()), + thrust::raw_pointer_cast(d_chain_start.data())); + cub::TransformInputIterator + d_keys_in(thrust::raw_pointer_cast(d_valid_chains_indices.data()), + key_op); + + cuOverlapArgs_transform value_op( + thrust::raw_pointer_cast(d_chain_start.data()), + thrust::raw_pointer_cast(d_chain_length.data())); + + cub::TransformInputIterator + d_values_in(thrust::raw_pointer_cast(d_valid_chains_indices.data()), + value_op); + + thrust::device_vector d_unique_out(n_valid_chains); + thrust::device_vector d_aggregates_out(n_valid_chains); + + thrust::device_vector d_num_runs_out(1); + + CustomReduceOp reduction_op; + + // using namespace claragenomics::cudamapper::fused_overlap; + d_temp_storage = nullptr; + temp_storage_bytes = 0; + cub::DeviceReduce::ReduceByKey(d_temp_storage, temp_storage_bytes, d_keys_in, + d_unique_out.data(), d_values_in, + d_aggregates_out.data(), d_num_runs_out.data(), + reduction_op, n_valid_chains); + + // Allocate temporary storage + CGA_CU_CHECK_ERR(cudaMalloc(&d_temp_storage, temp_storage_bytes)); + + cub::DeviceReduce::ReduceByKey(d_temp_storage, temp_storage_bytes, d_keys_in, + d_unique_out.data(), d_values_in, + d_aggregates_out.data(), d_num_runs_out.data(), + reduction_op, n_valid_chains); + + cudaDeviceSynchronize(); + + auto n_fused_overlap = d_num_runs_out[0]; + + CreateOverlap fuse_op(thrust::raw_pointer_cast(d_anchors.data())); + thrust::device_vector d_fused_overlaps(n_fused_overlap); + thrust::transform(d_aggregates_out.data(), + d_aggregates_out.data() + n_fused_overlap, + d_fused_overlaps.data(), fuse_op); + + fused_overlaps.resize(n_fused_overlap); + thrust::copy(d_fused_overlaps.begin(), d_fused_overlaps.end(), + fused_overlaps.begin()); + + +#pragma omp parallel for + for (auto i = 0; i < n_fused_overlap; ++i) { - terminate_anchor(); + Overlap& new_overlap = fused_overlaps[i]; + + std::string query_read_name = read_names[new_overlap.query_read_id_]; + std::string target_read_name = read_names[new_overlap.target_read_id_]; + + new_overlap.query_read_name_ = new char[query_read_name.length()]; + strcpy(new_overlap.query_read_name_, query_read_name.c_str()); + + new_overlap.target_read_name_ = new char[target_read_name.length()]; + strcpy(new_overlap.target_read_name_, target_read_name.c_str()); + + new_overlap.query_length_ = read_lengths[new_overlap.query_read_id_]; + new_overlap.target_length_ = read_lengths[new_overlap.target_read_id_]; } - //Fuse overlaps - fuse_overlaps(fused_overlaps, overlaps); + CGA_CU_CHECK_ERR(cudaFree(d_temp_storage)); + + return fused_overlaps; +} + +bool operator==(const Overlap& o1, const Overlap& o2) +{ + bool same = (o1.query_read_id_ == o2.query_read_id_); + same &= (o1.target_read_id_ == o2.target_read_id_); + same &= + (o1.query_start_position_in_read_ == o2.query_start_position_in_read_); + same &= + (o1.target_start_position_in_read_ == o2.target_start_position_in_read_); + same &= (o1.query_end_position_in_read_ == o2.query_end_position_in_read_); + same &= (o1.target_end_position_in_read_ == o2.target_end_position_in_read_); + + same &= (!strcmp(o1.query_read_name_, o2.query_read_name_)); + same &= (!strcmp(o1.target_read_name_, o2.target_read_name_)); + + same &= (o1.relative_strand == o2.relative_strand); + same &= (o1.num_residues_ == o2.num_residues_); + same &= (o1.query_length_ == o2.query_length_); + same &= (o1.target_length_ == o2.target_length_); + return same; +} + +void OverlapperTriggered::get_overlaps(std::vector& fused_overlaps, + thrust::device_vector& d_anchors, + const Index& index) +{ + + CGA_NVTX_RANGE(profiler, "OverlapperTriggered::get_overlaps"); + const auto& read_names = index.read_id_to_read_name(); + const auto& read_lengths = index.read_id_to_read_length(); + size_t total_anchors = d_anchors.size(); + + // comparison function object + auto comp = [] __host__ __device__(Anchor i, Anchor j) -> bool { + return (i.query_read_id_ < j.query_read_id_) || + ((i.query_read_id_ == j.query_read_id_) && + (i.target_read_id_ < j.target_read_id_)) || + ((i.query_read_id_ == j.query_read_id_) && + (i.target_read_id_ == j.target_read_id_) && + (i.query_position_in_read_ < j.query_position_in_read_)) || + ((i.query_read_id_ == j.query_read_id_) && + (i.target_read_id_ == j.target_read_id_) && + (i.query_position_in_read_ == j.query_position_in_read_) && + (i.target_position_in_read_ < j.target_position_in_read_)); + }; + + // sort on device + thrust::sort(thrust::device, d_anchors.begin(), d_anchors.end(), comp); + + fused_overlaps_ongpu(fused_overlaps, d_anchors, index); } } // namespace cudamapper } // namespace claragenomics diff --git a/cudamapper/src/overlapper_triggered.hpp b/cudamapper/src/overlapper_triggered.hpp index bd5cec8c6..07a769b89 100644 --- a/cudamapper/src/overlapper_triggered.hpp +++ b/cudamapper/src/overlapper_triggered.hpp @@ -37,7 +37,7 @@ class OverlapperTriggered : public Overlapper /// \param anchors vector of anchors /// \param index Index /// \return vector of Overlap objects - void get_overlaps(std::vector& overlaps, std::vector& anchors, const Index& index) override; + void get_overlaps(std::vector& overlaps, thrust::device_vector& anchors, const Index& index) override; }; } // namespace cudamapper } // namespace claragenomics From d06e1ff3e23bf1fc988870a91637d788c3c26a41 Mon Sep 17 00:00:00 2001 From: Kamesh Arumugam Date: Wed, 6 Nov 2019 14:06:39 -0800 Subject: [PATCH 056/128] [cudamapper:matcher] - remove redundant D2D copy --- cudamapper/src/matcher.cu | 19 +++++++------------ cudamapper/src/matcher.hpp | 2 +- 2 files changed, 8 insertions(+), 13 deletions(-) diff --git a/cudamapper/src/matcher.cu b/cudamapper/src/matcher.cu index 4b78037ad..156522beb 100644 --- a/cudamapper/src/matcher.cu +++ b/cudamapper/src/matcher.cu @@ -157,6 +157,8 @@ Matcher::Matcher(const Index& index, uint32_t query_target_division_idx) directions_of_reads_h.size() * sizeof(SketchElement::DirectionOfRepresentation), cudaMemcpyHostToDevice)); + anchors_d_.resize(0); + while (representation_min_range <= max_representation) { @@ -327,8 +329,9 @@ Matcher::Matcher(const Index& index, uint32_t query_target_division_idx) read_id_to_pointer_arrays_section_h.clear(); read_id_to_pointer_arrays_section_h.shrink_to_fit(); - CGA_LOG_INFO("Allocating {} bytes for anchors_d", total_anchors * sizeof(Anchor)); - device_buffer anchors_d(total_anchors); + auto num_anchors_so_far = anchors_d_.size(); + anchors_d_.resize(num_anchors_so_far + total_anchors); + Anchor* anchors_d = thrust::raw_pointer_cast(anchors_d_.data()) + num_anchors_so_far; CGA_LOG_INFO("Allocating {} bytes for read_id_to_anchors_section_d", read_id_to_anchors_section_h.size() * sizeof(ArrayBlock)); @@ -345,24 +348,16 @@ Matcher::Matcher(const Index& index, uint32_t query_target_division_idx) read_id_to_sketch_elements_d.data(), read_id_to_sketch_elements_to_check_d.data(), read_id_to_pointer_arrays_section_d.data(), - anchors_d.data(), + anchors_d, read_id_to_anchors_section_d.data()); CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); - auto num_anchors_so_far = anchors_h_.size(); - anchors_h_.resize(num_anchors_so_far + total_anchors); - thrust::copy(anchors_d.data(), anchors_d.data() + total_anchors, anchors_h_.data() + num_anchors_so_far); - //CGA_CU_CHECK_ERR(cudaMemcpy(anchors_h_.data() + num_anchors_so_far, anchors_d.data(), total_anchors * sizeof(Anchor),cudaMemcpyDeviceToHost)); - // clean up device memory CGA_LOG_INFO("Deallocating {} bytes from read_id_to_anchors_section_d", read_id_to_anchors_section_d.size() * sizeof(decltype(read_id_to_anchors_section_d)::value_type)); read_id_to_anchors_section_d.free(); - CGA_LOG_INFO("Deallocating {} bytes from anchors_d", - anchors_d.size() * sizeof(decltype(anchors_d)::value_type)); - anchors_d.free(); CGA_LOG_INFO("Deallocating {} bytes from read_id_to_sketch_elements_d", read_id_to_sketch_elements_d.size() * @@ -395,7 +390,7 @@ Matcher::Matcher(const Index& index, uint32_t query_target_division_idx) thrust::device_vector& Matcher::anchors() { - return anchors_h_; + return anchors_d_; } } // namespace cudamapper diff --git a/cudamapper/src/matcher.hpp b/cudamapper/src/matcher.hpp index 573042f5d..5bab43c8b 100644 --- a/cudamapper/src/matcher.hpp +++ b/cudamapper/src/matcher.hpp @@ -46,7 +46,7 @@ class Matcher private: /// \biref list of anchors - thrust::device_vector anchors_h_; + thrust::device_vector anchors_d_; }; } // namespace cudamapper } // namespace claragenomics From 9bd4fc3e3b406020025ebd566db632f0f50b8cc9 Mon Sep 17 00:00:00 2001 From: Kamesh Arumugam Date: Wed, 13 Nov 2019 16:06:41 -0800 Subject: [PATCH 057/128] [cudamapper:overlapper] - code refactoring, remove explicit omp dependency --- cudamapper/CMakeLists.txt | 7 +- cudamapper/src/overlapper_triggered.cu | 335 ++++++++++++------------- 2 files changed, 160 insertions(+), 182 deletions(-) diff --git a/cudamapper/CMakeLists.txt b/cudamapper/CMakeLists.txt index afee6e60e..bdf1dd93d 100644 --- a/cudamapper/CMakeLists.txt +++ b/cudamapper/CMakeLists.txt @@ -14,15 +14,12 @@ project(cudamapper) add_subdirectory(data) find_package(CUDA 9.0 QUIET REQUIRED) -link_directories(/usr/local/cuda/targets/x86_64-linux/lib/) - - if(NOT ${CUDA_FOUND}) message(FATAL_ERROR "CUDA not detected on system. Please install") else() message(STATUS "Using CUDA ${CUDA_VERSION} from ${CUDA_TOOLKIT_ROOT_DIR}") - set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -lineinfo --expt-extended-lambda -use_fast_math -Xcompiler -fopenmp,-Wall,-Wno-pedantic -std=c++14 -x cu") + set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -lineinfo --expt-extended-lambda -use_fast_math -Xcompiler -Wall,-Wno-pedantic -std=c++14") endif() cuda_add_library(index_gpu @@ -45,7 +42,7 @@ target_include_directories(cudamapper_utils PUBLIC include) cuda_add_library(overlapper_triggerred src/overlapper_triggered.cu) target_include_directories(overlapper_triggerred PUBLIC include) -target_link_libraries(overlapper_triggerred logging utils cgaio omp) +target_link_libraries(overlapper_triggerred logging utils cgaio) target_compile_options(overlapper_triggerred PRIVATE -Werror) add_doxygen_source_dir(${CMAKE_CURRENT_SOURCE_DIR}/include) diff --git a/cudamapper/src/overlapper_triggered.cu b/cudamapper/src/overlapper_triggered.cu index 68e28bbf7..be50dc5e7 100644 --- a/cudamapper/src/overlapper_triggered.cu +++ b/cudamapper/src/overlapper_triggered.cu @@ -8,15 +8,6 @@ * license agreement from NVIDIA CORPORATION is strictly prohibited. */ -#include -#include -#include -#include -#include -#include -#include -#include - #include #include #include @@ -27,46 +18,51 @@ #include "overlapper_triggered.hpp" #include #include -#include - -namespace claragenomics { -namespace cudamapper { - -__host__ __device__ bool operator==(const Anchor &prev_anchor, - const Anchor ¤t_anchor) { - uint16_t score_threshold = 1; - // Very simple scoring function to quantify quality of overlaps. - auto anchor_score = [] __host__ __device__(Anchor a, Anchor b) { - if ((b.query_position_in_read_ - a.query_position_in_read_) < 350) { - return 2; - } else { - return 1; // TODO change to a more sophisticated scoring method - } - }; - auto score = anchor_score(prev_anchor, current_anchor); - return ((current_anchor.query_read_id_ == prev_anchor.query_read_id_) && - (current_anchor.target_read_id_ == prev_anchor.target_read_id_) && - score > score_threshold); + +namespace claragenomics +{ +namespace cudamapper +{ + +__host__ __device__ bool operator==(const Anchor& lhs, + const Anchor& rhs) +{ + auto score_threshold = 1; + // Very simple scoring function to quantify quality of overlaps. + auto anchor_score = [] __host__ __device__(const Anchor& a, const Anchor& b) { + if ((b.query_position_in_read_ - a.query_position_in_read_) < 350) + { + return 2; + } + else + { + return 1; // TODO change to a more sophisticated scoring method + } + }; + auto score = anchor_score(lhs, rhs); + return ((lhs.query_read_id_ == rhs.query_read_id_) && + (lhs.target_read_id_ == rhs.target_read_id_) && + score > score_threshold); } - + struct cuOverlapKey { - Anchor* anchor; + const Anchor* anchor; }; struct cuOverlapKey_transform { - Anchor* d_anchors; - int32_t* d_chain_start; + const Anchor* d_anchors; + const int32_t* d_chain_start; - cuOverlapKey_transform(Anchor* anchors, int32_t* chain_start) + cuOverlapKey_transform(const Anchor* anchors, const int32_t* chain_start) : d_anchors(anchors) , d_chain_start(chain_start) { } __host__ __device__ __forceinline__ cuOverlapKey - operator()(const int& idx) const + operator()(const int32_t& idx) const { auto anchor_idx = d_chain_start[idx]; @@ -79,8 +75,8 @@ struct cuOverlapKey_transform __host__ __device__ bool operator==(const cuOverlapKey& key0, const cuOverlapKey& key1) { - Anchor* a = key0.anchor; - Anchor* b = key1.anchor; + const Anchor* a = key0.anchor; + const Anchor* b = key1.anchor; return (a->target_read_id_ == b->target_read_id_) && (a->query_read_id_ == b->query_read_id_); } @@ -94,10 +90,10 @@ struct cuOverlapArgs struct cuOverlapArgs_transform { - int32_t* d_chain_start; - int32_t* d_chain_length; + const int32_t* d_chain_start; + const int32_t* d_chain_length; - cuOverlapArgs_transform(int32_t* chain_start, int32_t* chain_length) + cuOverlapArgs_transform(const int32_t* chain_start, const int32_t* chain_length) : d_chain_start(chain_start) , d_chain_length(chain_length) { @@ -112,12 +108,11 @@ struct cuOverlapArgs_transform overlap.overlap_end = overlap_start + overlap_length; overlap.num_residues = overlap_length; overlap.overlap_start = overlap_start; - // printf("%d %d %d\n", idx, overlap_start, overlap_length); return overlap; } }; -struct CustomReduceOp +struct FuseOverlapOp { __host__ __device__ cuOverlapArgs operator()(const cuOverlapArgs& a, const cuOverlapArgs& b) const @@ -134,9 +129,9 @@ struct CustomReduceOp struct CreateOverlap { - Anchor* d_anchors; + const Anchor* d_anchors; - __host__ __device__ __forceinline__ CreateOverlap(Anchor* anchors_ptr) + __host__ __device__ __forceinline__ CreateOverlap(const Anchor* anchors_ptr) : d_anchors(anchors_ptr) { } @@ -170,9 +165,7 @@ struct CreateOverlap new_overlap.target_end_position_in_read_) { new_overlap.relative_strand = RelativeStrand::Reverse; - // std::swap(new_overlap.target_end_position_in_read_, - // new_overlap.target_start_position_in_read_); - auto tmp = new_overlap.target_end_position_in_read_; + auto tmp = new_overlap.target_end_position_in_read_; new_overlap.target_end_position_in_read_ = new_overlap.target_start_position_in_read_; new_overlap.target_start_position_in_read_ = tmp; @@ -185,43 +178,77 @@ struct CreateOverlap }; }; -std::vector -fused_overlaps_ongpu(std::vector& fused_overlaps, - thrust::device_vector& d_anchors, - const Index& index) +void OverlapperTriggered::get_overlaps(std::vector& fused_overlaps, + thrust::device_vector& d_anchors, + const Index& index) { - const auto& read_names = index.read_id_to_read_name(); - const auto& read_lengths = index.read_id_to_read_length(); - auto n_anchors = d_anchors.size(); - uint16_t tail_length_for_chain = 3; - thrust::device_vector n_uniques(1); - thrust::device_vector d_chain_length(n_anchors); + CGA_NVTX_RANGE(profiler, "OverlapperTriggered::get_overlaps"); + const auto& read_names = index.read_id_to_read_name(); + const auto& read_lengths = index.read_id_to_read_length(); + auto tail_length_for_chain = 3; + auto n_anchors = d_anchors.size(); - thrust::device_vector d_chain_start(n_anchors); + // comparison operator - lambda used to compare Anchors in sort + auto comp = [] __host__ __device__(const Anchor& i, const Anchor& j) -> bool { + return (i.query_read_id_ < j.query_read_id_) || + ((i.query_read_id_ == j.query_read_id_) && + (i.target_read_id_ < j.target_read_id_)) || + ((i.query_read_id_ == j.query_read_id_) && + (i.target_read_id_ == j.target_read_id_) && + (i.query_position_in_read_ < j.query_position_in_read_)) || + ((i.query_read_id_ == j.query_read_id_) && + (i.target_read_id_ == j.target_read_id_) && + (i.query_position_in_read_ == j.query_position_in_read_) && + (i.target_position_in_read_ < j.target_position_in_read_)); + }; + + // sort on device + // TODO : currently thrust::sort requires O(2N) auxiliary storage, implement the same functionality using O(N) auxiliary storage + thrust::sort(thrust::device, d_anchors.begin(), d_anchors.end(), comp); + + // temporary workspace buffer on device + thrust::device_vector d_temp_buf; - thrust::device_vector anchors_buf(d_anchors.size()); + // Do run length encode to compute the chains + // note - identifies the start and end anchor of the chain without moving the anchors + // >>>>>>>>> - Anchor* d_start_anchor = thrust::raw_pointer_cast(anchors_buf.data()); + // d_start_anchor[i] contains the starting anchor of chain i + thrust::device_vector d_start_anchor(d_anchors.size()); - auto d_num_runs_ptr = n_uniques.data(); + // d_chain_length[i] contains the length of chain i + thrust::device_vector d_chain_length(n_anchors); + + // total number of chains found + thrust::device_vector d_nchains(1); - // run length encode to compute the overlaps start and end indices - void* d_temp_storage = NULL; + void* d_temp_storage = nullptr; size_t temp_storage_bytes = 0; + // calculate storage requirement for run length encoding cub::DeviceRunLengthEncode::Encode( - d_temp_storage, temp_storage_bytes, d_anchors.data(), d_start_anchor, - d_chain_length.data(), d_num_runs_ptr, n_anchors); + d_temp_storage, temp_storage_bytes, d_anchors.data(), d_start_anchor.data(), + d_chain_length.data(), d_nchains.data(), n_anchors); - // Allocate temporary storage - CGA_CU_CHECK_ERR(cudaMalloc(&d_temp_storage, temp_storage_bytes)); + // allocate temporary storage + d_temp_buf.resize(temp_storage_bytes); + d_temp_storage = thrust::raw_pointer_cast(d_temp_buf.data()); - // Run encoding + // run encoding cub::DeviceRunLengthEncode::Encode( - d_temp_storage, temp_storage_bytes, d_anchors.data(), d_start_anchor, - d_chain_length.data(), d_num_runs_ptr, n_anchors); + d_temp_storage, temp_storage_bytes, d_anchors.data(), d_start_anchor.data(), + d_chain_length.data(), d_nchains.data(), n_anchors); + + // <<<<<<<<<< + + // memcpy D2H + auto n_chains = d_nchains[0]; + + // use prefix sum to calculate the starting index position of all the chains + // >>>>>>>>>>>> - auto n_chains = n_uniques[0]; + // for a chain i, d_chain_start[i] contains the index of starting anchor from d_anchors array + thrust::device_vector d_chain_start(n_chains); d_temp_storage = nullptr; temp_storage_bytes = 0; @@ -229,155 +256,109 @@ fused_overlaps_ongpu(std::vector& fused_overlaps, d_chain_length.data(), d_chain_start.data(), n_chains); - // Allocate temporary storage - CGA_CU_CHECK_ERR(cudaMalloc(&d_temp_storage, temp_storage_bytes)); + // allocate temporary storage + d_temp_buf.resize(temp_storage_bytes); + d_temp_storage = thrust::raw_pointer_cast(d_temp_buf.data()); cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, d_chain_length.data(), d_chain_start.data(), n_chains); - // storage for the nonzero indices - // indices to d_chain_length/d_chain_start vector - thrust::device_vector d_valid_chains_indices(n_chains); + // <<<<<<<<<<<< + + // calculate overlaps where overlap is a chain with length > tail_length_for_chain + // >>>>>>>>>>>> + + // d_overlaps[j] contains index to d_chain_length/d_chain_start where + // d_chain_length[d_overlaps[j]] and d_chain_start[d_overlaps[j]] corresponds + // to length and index to starting anchor of the chain-d_overlaps[j] (also referred as overlap j) + thrust::device_vector d_overlaps(n_chains); auto indices_end = thrust::copy_if(thrust::make_counting_iterator(0), thrust::make_counting_iterator(n_chains), - d_chain_length.data(), d_valid_chains_indices.data(), + d_chain_length.data(), d_overlaps.data(), [=] __host__ __device__(const int32_t& len) -> bool { return (len >= tail_length_for_chain); }); - auto n_valid_chains = indices_end - d_valid_chains_indices.data(); + auto n_overlaps = indices_end - d_overlaps.data(); + // <<<<<<<<<<<<< - // std::ofstream glog; - // glog.open ("glog.log", std::ios::app); - // glog << " # valid chains/# chains - " << n_valid_chains << "/" << n_chains - // << "\n"; glog.close(); + // >>>>>>>>>>>> + // fuse overlaps using reduce by key operations + // key is a minimal data structure that is required to compare the overlaps cuOverlapKey_transform key_op(thrust::raw_pointer_cast(d_anchors.data()), thrust::raw_pointer_cast(d_chain_start.data())); cub::TransformInputIterator - d_keys_in(thrust::raw_pointer_cast(d_valid_chains_indices.data()), + d_keys_in(thrust::raw_pointer_cast(d_overlaps.data()), key_op); + // value is a minimal data structure that represents a overlap cuOverlapArgs_transform value_op( thrust::raw_pointer_cast(d_chain_start.data()), thrust::raw_pointer_cast(d_chain_length.data())); cub::TransformInputIterator - d_values_in(thrust::raw_pointer_cast(d_valid_chains_indices.data()), + d_values_in(thrust::raw_pointer_cast(d_overlaps.data()), value_op); - thrust::device_vector d_unique_out(n_valid_chains); - thrust::device_vector d_aggregates_out(n_valid_chains); - - thrust::device_vector d_num_runs_out(1); + thrust::device_vector d_fusedoverlap_keys(n_overlaps); + thrust::device_vector d_fusedoverlaps_args(n_overlaps); + thrust::device_vector d_nfused_overlaps(1); - CustomReduceOp reduction_op; + FuseOverlapOp reduction_op; - // using namespace claragenomics::cudamapper::fused_overlap; d_temp_storage = nullptr; temp_storage_bytes = 0; cub::DeviceReduce::ReduceByKey(d_temp_storage, temp_storage_bytes, d_keys_in, - d_unique_out.data(), d_values_in, - d_aggregates_out.data(), d_num_runs_out.data(), - reduction_op, n_valid_chains); + d_fusedoverlap_keys.data(), d_values_in, + d_fusedoverlaps_args.data(), d_nfused_overlaps.data(), + reduction_op, n_overlaps); - // Allocate temporary storage - CGA_CU_CHECK_ERR(cudaMalloc(&d_temp_storage, temp_storage_bytes)); + // allocate temporary storage + d_temp_buf.resize(temp_storage_bytes); + d_temp_storage = thrust::raw_pointer_cast(d_temp_buf.data()); cub::DeviceReduce::ReduceByKey(d_temp_storage, temp_storage_bytes, d_keys_in, - d_unique_out.data(), d_values_in, - d_aggregates_out.data(), d_num_runs_out.data(), - reduction_op, n_valid_chains); + d_fusedoverlap_keys.data(), d_values_in, + d_fusedoverlaps_args.data(), d_nfused_overlaps.data(), + reduction_op, n_overlaps); - cudaDeviceSynchronize(); - - auto n_fused_overlap = d_num_runs_out[0]; + // memcpyD2H + auto n_fused_overlap = d_nfused_overlaps[0]; + // construct overlap from the overlap args CreateOverlap fuse_op(thrust::raw_pointer_cast(d_anchors.data())); thrust::device_vector d_fused_overlaps(n_fused_overlap); - thrust::transform(d_aggregates_out.data(), - d_aggregates_out.data() + n_fused_overlap, + thrust::transform(d_fusedoverlaps_args.data(), + d_fusedoverlaps_args.data() + n_fused_overlap, d_fused_overlaps.data(), fuse_op); + // memcpyD2H - move fused overlaps to host fused_overlaps.resize(n_fused_overlap); thrust::copy(d_fused_overlaps.begin(), d_fused_overlaps.end(), fused_overlaps.begin()); - - -#pragma omp parallel for - for (auto i = 0; i < n_fused_overlap; ++i) - { - Overlap& new_overlap = fused_overlaps[i]; - - std::string query_read_name = read_names[new_overlap.query_read_id_]; - std::string target_read_name = read_names[new_overlap.target_read_id_]; - - new_overlap.query_read_name_ = new char[query_read_name.length()]; - strcpy(new_overlap.query_read_name_, query_read_name.c_str()); - - new_overlap.target_read_name_ = new char[target_read_name.length()]; - strcpy(new_overlap.target_read_name_, target_read_name.c_str()); - - new_overlap.query_length_ = read_lengths[new_overlap.query_read_id_]; - new_overlap.target_length_ = read_lengths[new_overlap.target_read_id_]; - } - - CGA_CU_CHECK_ERR(cudaFree(d_temp_storage)); - - return fused_overlaps; -} - -bool operator==(const Overlap& o1, const Overlap& o2) -{ - bool same = (o1.query_read_id_ == o2.query_read_id_); - same &= (o1.target_read_id_ == o2.target_read_id_); - same &= - (o1.query_start_position_in_read_ == o2.query_start_position_in_read_); - same &= - (o1.target_start_position_in_read_ == o2.target_start_position_in_read_); - same &= (o1.query_end_position_in_read_ == o2.query_end_position_in_read_); - same &= (o1.target_end_position_in_read_ == o2.target_end_position_in_read_); - - same &= (!strcmp(o1.query_read_name_, o2.query_read_name_)); - same &= (!strcmp(o1.target_read_name_, o2.target_read_name_)); - - same &= (o1.relative_strand == o2.relative_strand); - same &= (o1.num_residues_ == o2.num_residues_); - same &= (o1.query_length_ == o2.query_length_); - same &= (o1.target_length_ == o2.target_length_); - return same; -} - -void OverlapperTriggered::get_overlaps(std::vector& fused_overlaps, - thrust::device_vector& d_anchors, - const Index& index) -{ - - CGA_NVTX_RANGE(profiler, "OverlapperTriggered::get_overlaps"); - const auto& read_names = index.read_id_to_read_name(); - const auto& read_lengths = index.read_id_to_read_length(); - size_t total_anchors = d_anchors.size(); - - // comparison function object - auto comp = [] __host__ __device__(Anchor i, Anchor j) -> bool { - return (i.query_read_id_ < j.query_read_id_) || - ((i.query_read_id_ == j.query_read_id_) && - (i.target_read_id_ < j.target_read_id_)) || - ((i.query_read_id_ == j.query_read_id_) && - (i.target_read_id_ == j.target_read_id_) && - (i.query_position_in_read_ < j.query_position_in_read_)) || - ((i.query_read_id_ == j.query_read_id_) && - (i.target_read_id_ == j.target_read_id_) && - (i.query_position_in_read_ == j.query_position_in_read_) && - (i.target_position_in_read_ < j.target_position_in_read_)); - }; - - // sort on device - thrust::sort(thrust::device, d_anchors.begin(), d_anchors.end(), comp); - - fused_overlaps_ongpu(fused_overlaps, d_anchors, index); + // <<<<<<<<<<<< + + // parallel update the overlaps to include the corresponding read names [parallel on host] + thrust::transform(thrust::host, + fused_overlaps.data(), + fused_overlaps.data() + n_fused_overlap, + fused_overlaps.data(), [&](Overlap& new_overlap) { + std::string query_read_name = read_names[new_overlap.query_read_id_]; + std::string target_read_name = read_names[new_overlap.target_read_id_]; + + new_overlap.query_read_name_ = new char[query_read_name.length()]; + strcpy(new_overlap.query_read_name_, query_read_name.c_str()); + + new_overlap.target_read_name_ = new char[target_read_name.length()]; + strcpy(new_overlap.target_read_name_, target_read_name.c_str()); + + new_overlap.query_length_ = read_lengths[new_overlap.query_read_id_]; + new_overlap.target_length_ = read_lengths[new_overlap.target_read_id_]; + return new_overlap; + }); } } // namespace cudamapper } // namespace claragenomics From bb2b1ac2ca97f4d7fe645e1659b448d822efb7ab Mon Sep 17 00:00:00 2001 From: Kamesh Arumugam Date: Wed, 13 Nov 2019 21:59:58 -0800 Subject: [PATCH 058/128] add cub to submodules --- .gitmodules | 3 +++ 3rdparty/cub | 1 + 2 files changed, 4 insertions(+) create mode 160000 3rdparty/cub diff --git a/.gitmodules b/.gitmodules index 767025463..7f8ffef0d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -23,3 +23,6 @@ [submodule "3rdparty/spoa"] path = 3rdparty/spoa url = https://github.com/rvaser/spoa.git +[submodule "3rdparty/cub"] + path = 3rdparty/cub + url = git@github.com:NVlabs/cub.git diff --git a/3rdparty/cub b/3rdparty/cub new file mode 160000 index 000000000..c3cceac11 --- /dev/null +++ b/3rdparty/cub @@ -0,0 +1 @@ +Subproject commit c3cceac115c072fb63df1836ff46d8c60d9eb304 From dcfbd2f77cda94e5214836b61e99a87164e2e500 Mon Sep 17 00:00:00 2001 From: Kamesh Arumugam Date: Wed, 13 Nov 2019 22:03:03 -0800 Subject: [PATCH 059/128] [cudamapper] update CMake options to include cub --- cmake/3rdparty.cmake | 4 ++++ cudamapper/CMakeLists.txt | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/cmake/3rdparty.cmake b/cmake/3rdparty.cmake index 59b055a84..2464e10ac 100644 --- a/cmake/3rdparty.cmake +++ b/cmake/3rdparty.cmake @@ -36,3 +36,7 @@ if (NOT TARGET spoa) # Don't show warnings when compiling the 3rd party library target_compile_options(spoa PRIVATE -w) endif() + +set(CUB_DIR ${PROJECT_SOURCE_DIR}/3rdparty/cub CACHE STRING + "Path to cub repo") + diff --git a/cudamapper/CMakeLists.txt b/cudamapper/CMakeLists.txt index bdf1dd93d..cc1dc1624 100644 --- a/cudamapper/CMakeLists.txt +++ b/cudamapper/CMakeLists.txt @@ -41,7 +41,7 @@ target_include_directories(cudamapper_utils PUBLIC include) cuda_add_library(overlapper_triggerred src/overlapper_triggered.cu) -target_include_directories(overlapper_triggerred PUBLIC include) +target_include_directories(overlapper_triggerred PUBLIC include ${CUB_DIR}) target_link_libraries(overlapper_triggerred logging utils cgaio) target_compile_options(overlapper_triggerred PRIVATE -Werror) From 499d26f4d4188d7c5b547d1dd223434dc43e51d4 Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Thu, 14 Nov 2019 13:30:14 -0500 Subject: [PATCH 060/128] [common] removed graph runtime interface --- .../include/claragenomics/utils/graph.hpp | 80 ++++++------------- common/utils/tests/TestGraph.cpp | 4 +- cudapoa/src/cudapoa_batch.cpp | 14 ++-- 3 files changed, 33 insertions(+), 65 deletions(-) diff --git a/common/utils/include/claragenomics/utils/graph.hpp b/common/utils/include/claragenomics/utils/graph.hpp index a8af2a59a..e0911859c 100644 --- a/common/utils/include/claragenomics/utils/graph.hpp +++ b/common/utils/include/claragenomics/utils/graph.hpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include @@ -22,10 +22,6 @@ namespace claragenomics { -using node_id_t = int32_t; -using edge_weight_t = int32_t; -using edge_t = std::pair; - /// \struct PairHash /// Hash function for a pair struct PairHash @@ -45,21 +41,18 @@ struct PairHash class Graph { public: - /// \brief Default dtor - ~Graph() = default; - - /// \brief Add edges to a graph - /// - /// \param node_id_from Source node ID - /// \param node_id_to Sink node ID - /// \param weight Edge weight - virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to, edge_weight_t weight) = 0; + /// Typedef for node ID + using node_id_t = int32_t; + /// Tpyedef for edge weight + using edge_weight_t = int32_t; + /// Typedef for edge + using edge_t = std::pair; /// \brief Get a list of adjacent nodes to a given node /// /// \param node Node for which adjacent nodes are requested /// \return Vector of adjacent node IDs - virtual const std::vector& get_adjacent_nodes(node_id_t node) const + const std::vector& get_adjacent_nodes(node_id_t node) const { auto iter = adjacent_nodes_.find(node); if (iter != adjacent_nodes_.end()) @@ -68,14 +61,14 @@ class Graph } else { - return empty_; + return Graph::empty_; } } /// \brief List all node IDs in the graph /// /// \return A vector of node IDs - virtual const std::vector get_node_ids() const + const std::vector get_node_ids() const { std::vector nodes; for (auto iter : adjacent_nodes_) @@ -89,21 +82,16 @@ class Graph /// \brief Get a list of all edges in the graph /// /// \return A vector of edges - virtual const std::vector> get_edges() const + const std::vector> get_edges() const { - std::vector> edges; - for (auto iter : edges_) - { - edges.push_back({iter.first, iter.second}); - } - return edges; + return {begin(edges_), end(edges_)}; } /// \brief Add string labels to a node ID /// /// \param node ID of node /// \param label Label to attach to that node ID - virtual void set_node_label(node_id_t node, const std::string& label) + void set_node_label(node_id_t node, const std::string& label) { node_labels_.insert({node, label}); } @@ -113,7 +101,7 @@ class Graph /// \param node node ID for label query /// \return String label for associated node. Returns empty string if // no label is associated or node ID doesn't exist. - virtual std::string get_node_label(node_id_t node) const + std::string get_node_label(node_id_t node) const { auto found_node = node_labels_.find(node); if (found_node != node_labels_.end()) @@ -126,29 +114,14 @@ class Graph } } - /// \brief Serialize graph structure to dot format - /// - /// \return A string encoding the graph in dot format - virtual std::string serialize_to_dot() const = 0; - protected: - Graph() = default; - /// \brief Check if a directed edge exists in the grph /// /// \param edge A directed edge /// \return Boolean result of check bool directed_edge_exists(edge_t edge) { - auto find_edge = edges_.find(edge); - if (find_edge == edges_.end()) - { - return false; - } - else - { - return true; - } + return edges_.find(edge) != edges_.end(); } /// \brief Update the adjacent nodes based on edge information @@ -173,12 +146,13 @@ class Graph void node_labels_to_dot(std::ostringstream& dot_str) const { const std::vector nodes = get_node_ids(); - for (auto node : nodes) + for (auto iter : adjacent_nodes_) { - auto label_found = node_labels_.find(node); + auto& node_id = iter.first; + auto label_found = node_labels_.find(node_id); if (label_found != node_labels_.end()) { - dot_str << node << " [label=\"" << label_found->second << "\"];\n"; + dot_str << node_id << " [label=\"" << label_found->second << "\"];\n"; } } } @@ -215,16 +189,12 @@ class Graph class DirectedGraph : public Graph { public: - DirectedGraph() = default; - - ~DirectedGraph() = default; - /// \brief Add directed edges to graph. /// /// \param node_id_from Source node ID /// \param node_id_to Sink node ID /// \param weight Edge weight - virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to, edge_weight_t weight = 0) override + void add_edge(node_id_t node_id_from, node_id_t node_id_to, edge_weight_t weight = 0) { auto edge = edge_t(node_id_from, node_id_to); if (!directed_edge_exists(edge)) @@ -237,7 +207,7 @@ class DirectedGraph : public Graph /// \brief Serialize graph structure to dot format /// /// \return A string encoding the graph in dot format - virtual std::string serialize_to_dot() const override + std::string serialize_to_dot() const { std::ostringstream dot_str; dot_str << "digraph g {\n"; @@ -257,16 +227,12 @@ class DirectedGraph : public Graph class UndirectedGraph : public Graph { public: - UndirectedGraph() = default; - - ~UndirectedGraph() = default; - /// \brief Add undirected edges to graph. /// /// \param node_id_from Source node ID /// \param node_id_to Sink node ID /// \param weight Edge weight - virtual void add_edge(node_id_t node_id_from, node_id_t node_id_to, edge_weight_t weight = 0) override + void add_edge(node_id_t node_id_from, node_id_t node_id_to, edge_weight_t weight = 0) { auto edge = edge_t(node_id_from, node_id_to); auto edge_reversed = edge_t(node_id_to, node_id_from); @@ -281,7 +247,7 @@ class UndirectedGraph : public Graph /// \brief Serialize graph structure to dot format /// /// \return A string encoding the graph in dot format - virtual std::string serialize_to_dot() const override + std::string serialize_to_dot() const { std::ostringstream dot_str; dot_str << "graph g {\n"; diff --git a/common/utils/tests/TestGraph.cpp b/common/utils/tests/TestGraph.cpp index 947674774..fe8a40b79 100644 --- a/common/utils/tests/TestGraph.cpp +++ b/common/utils/tests/TestGraph.cpp @@ -15,7 +15,7 @@ namespace claragenomics { -TEST(GraphTest, DirectediGraph) +TEST(GraphTest, DirectedGraph) { DirectedGraph graph; @@ -44,7 +44,7 @@ TEST(GraphTest, DirectediGraph) EXPECT_EQ(std::find(adjacent_nodes_to_3.begin(), adjacent_nodes_to_3.end(), 2), adjacent_nodes_to_3.end()); } -TEST(GraphTest, UndirectediGraph) +TEST(GraphTest, UndirectedGraph) { UndirectedGraph graph; diff --git a/cudapoa/src/cudapoa_batch.cpp b/cudapoa/src/cudapoa_batch.cpp index 5251f9050..668375363 100644 --- a/cudapoa/src/cudapoa_batch.cpp +++ b/cudapoa/src/cudapoa_batch.cpp @@ -351,11 +351,13 @@ void CudapoaBatch::get_graphs(std::vector& graphs, std::vector(&(output_details_h_->consensus[poa * CUDAPOA_MAX_CONSENSUS_SIZE])); // We use the first two entries in the consensus buffer to log error during kernel execution // c[0] == 0 means an error occured and when that happens the error type is saved in c[1] @@ -366,7 +368,7 @@ void CudapoaBatch::get_graphs(std::vector& graphs, std::vectorwindow_details[poa].seq_len_buffer_offset; int32_t num_nodes = input_details_h_->sequence_lengths[seq_0_offset]; uint8_t* nodes = &graph_details_h_->nodes[max_nodes_per_window_ * poa]; @@ -374,14 +376,14 @@ void CudapoaBatch::get_graphs(std::vector& graphs, std::vector(nodes[n]))); uint16_t num_edges = graph_details_h_->incoming_edge_count[poa * max_nodes_per_window_ + n]; for (uint16_t e = 0; e < num_edges; e++) { - int32_t idx = poa * max_nodes_per_window_ * CUDAPOA_MAX_NODE_EDGES + n * CUDAPOA_MAX_NODE_EDGES + e; - node_id_t src = graph_details_h_->incoming_edges[idx]; - edge_weight_t weight = graph_details_h_->incoming_edge_weights[idx]; + int32_t idx = poa * max_nodes_per_window_ * CUDAPOA_MAX_NODE_EDGES + n * CUDAPOA_MAX_NODE_EDGES + e; + Graph::node_id_t src = graph_details_h_->incoming_edges[idx]; + Graph::edge_weight_t weight = graph_details_h_->incoming_edge_weights[idx]; graph.add_edge(src, sink, weight); } } From 1c1461cd69b6ceeef3a6eeee984c2d272c45cb4c Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Fri, 15 Nov 2019 08:13:07 -0500 Subject: [PATCH 061/128] [common] fix graph class member implementations update cga tests cga structure --- .../include/claragenomics/utils/graph.hpp | 24 ++++++++++++------- common/utils/tests/CMakeLists.txt | 4 ++-- cudaaligner/tests/CMakeLists.txt | 4 ++-- cudamapper/tests/CMakeLists.txt | 5 ++-- cudapoa/src/cudapoa_batch.cpp | 8 +++---- cudapoa/tests/CMakeLists.txt | 4 ++-- 6 files changed, 28 insertions(+), 21 deletions(-) diff --git a/common/utils/include/claragenomics/utils/graph.hpp b/common/utils/include/claragenomics/utils/graph.hpp index e0911859c..48c457aac 100644 --- a/common/utils/include/claragenomics/utils/graph.hpp +++ b/common/utils/include/claragenomics/utils/graph.hpp @@ -145,15 +145,9 @@ class Graph /// \param dot_str Output string stream to serialize labels to void node_labels_to_dot(std::ostringstream& dot_str) const { - const std::vector nodes = get_node_ids(); - for (auto iter : adjacent_nodes_) + for (auto iter : node_labels_) { - auto& node_id = iter.first; - auto label_found = node_labels_.find(node_id); - if (label_found != node_labels_.end()) - { - dot_str << node_id << " [label=\"" << label_found->second << "\"];\n"; - } + dot_str << iter.first << " [label=\"" << iter.second << "\"];\n"; } } @@ -189,6 +183,13 @@ class Graph class DirectedGraph : public Graph { public: + /// Typedef for node ID + using Graph::node_id_t; + /// Tpyedef for edge weight + using Graph::edge_weight_t; + /// Typedef for edge + using Graph::edge_t; + /// \brief Add directed edges to graph. /// /// \param node_id_from Source node ID @@ -227,6 +228,13 @@ class DirectedGraph : public Graph class UndirectedGraph : public Graph { public: + /// Typedef for node ID + using Graph::node_id_t; + /// Tpyedef for edge weight + using Graph::edge_weight_t; + /// Typedef for edge + using Graph::edge_t; + /// \brief Add undirected edges to graph. /// /// \param node_id_from Source node ID diff --git a/common/utils/tests/CMakeLists.txt b/common/utils/tests/CMakeLists.txt index 77ab115d5..8b4b51398 100644 --- a/common/utils/tests/CMakeLists.txt +++ b/common/utils/tests/CMakeLists.txt @@ -8,7 +8,7 @@ # license agreement from NVIDIA CORPORATION is strictly prohibited. # -project(cgautilstests) +set(TARGET_NAME cgautilstests) set(SOURCES main.cpp @@ -17,4 +17,4 @@ set(SOURCES set(LIBS utils) -cga_add_tests(${PROJECT_NAME} "${SOURCES}" "${LIBS}") +cga_add_tests(${TARGET_NAME} "${SOURCES}" "${LIBS}") diff --git a/cudaaligner/tests/CMakeLists.txt b/cudaaligner/tests/CMakeLists.txt index ebe02b379..d202a3376 100644 --- a/cudaaligner/tests/CMakeLists.txt +++ b/cudaaligner/tests/CMakeLists.txt @@ -8,7 +8,7 @@ # license agreement from NVIDIA CORPORATION is strictly prohibited. # -project(cudaalignertests) +set(TARGET_NAME cudaalignertests) set(SOURCES main.cpp @@ -24,4 +24,4 @@ set(LIBS cudaaligner utils) -cga_add_tests(${PROJECT_NAME} "${SOURCES}" "${LIBS}") +cga_add_tests(${TARGET_NAME} "${SOURCES}" "${LIBS}") diff --git a/cudamapper/tests/CMakeLists.txt b/cudamapper/tests/CMakeLists.txt index bfeb95069..f7e7c704f 100644 --- a/cudamapper/tests/CMakeLists.txt +++ b/cudamapper/tests/CMakeLists.txt @@ -8,8 +8,7 @@ # license agreement from NVIDIA CORPORATION is strictly prohibited. # -cmake_minimum_required(VERSION 3.10.2) -project(cudamappertests) +set(TARGET_NAME cudamappertests) set(SOURCES main.cpp @@ -29,4 +28,4 @@ set(LIBS overlapper_triggerred cudamapper_utils) -cga_add_tests(${PROJECT_NAME} "${SOURCES}" "${LIBS}") \ No newline at end of file +cga_add_tests(${TARGET_NAME} "${SOURCES}" "${LIBS}") diff --git a/cudapoa/src/cudapoa_batch.cpp b/cudapoa/src/cudapoa_batch.cpp index 668375363..6360b3613 100644 --- a/cudapoa/src/cudapoa_batch.cpp +++ b/cudapoa/src/cudapoa_batch.cpp @@ -376,14 +376,14 @@ void CudapoaBatch::get_graphs(std::vector& graphs, std::vector(nodes[n]))); uint16_t num_edges = graph_details_h_->incoming_edge_count[poa * max_nodes_per_window_ + n]; for (uint16_t e = 0; e < num_edges; e++) { - int32_t idx = poa * max_nodes_per_window_ * CUDAPOA_MAX_NODE_EDGES + n * CUDAPOA_MAX_NODE_EDGES + e; - Graph::node_id_t src = graph_details_h_->incoming_edges[idx]; - Graph::edge_weight_t weight = graph_details_h_->incoming_edge_weights[idx]; + int32_t idx = poa * max_nodes_per_window_ * CUDAPOA_MAX_NODE_EDGES + n * CUDAPOA_MAX_NODE_EDGES + e; + DirectedGraph::node_id_t src = graph_details_h_->incoming_edges[idx]; + DirectedGraph::edge_weight_t weight = graph_details_h_->incoming_edge_weights[idx]; graph.add_edge(src, sink, weight); } } diff --git a/cudapoa/tests/CMakeLists.txt b/cudapoa/tests/CMakeLists.txt index 965bddb0b..affd0a940 100644 --- a/cudapoa/tests/CMakeLists.txt +++ b/cudapoa/tests/CMakeLists.txt @@ -8,7 +8,7 @@ # license agreement from NVIDIA CORPORATION is strictly prohibited. # -project(cudapoatests) +set(TARGET_NAME cudapoatests) set(SOURCES main.cpp @@ -28,4 +28,4 @@ set(LIBS cudapoa spoa) -cga_add_tests(${PROJECT_NAME} "${SOURCES}" "${LIBS}") +cga_add_tests(${TARGET_NAME} "${SOURCES}" "${LIBS}") From 7f09a680fd79c2268a8229943e343630b6d2cff1 Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Fri, 15 Nov 2019 14:43:34 +0100 Subject: [PATCH 062/128] [cudamapper] generate_anchors: parallelize over anchors instead of representations --- cudamapper/src/matcher_gpu.cu | 111 +++++++++++++++++++-------------- cudamapper/src/matcher_gpu.cuh | 1 + 2 files changed, 66 insertions(+), 46 deletions(-) diff --git a/cudamapper/src/matcher_gpu.cu b/cudamapper/src/matcher_gpu.cu index 4908fa2f3..06478c188 100644 --- a/cudamapper/src/matcher_gpu.cu +++ b/cudamapper/src/matcher_gpu.cu @@ -18,6 +18,39 @@ #include #include +namespace +{ +template +__device__ RandomAccessIterator lower_bound(RandomAccessIterator lower_bound, RandomAccessIterator upper_bound, ValueType query) +{ + while (upper_bound - lower_bound > 0) + { + RandomAccessIterator mid = lower_bound + (upper_bound - lower_bound) / 2; + const auto mid_value = *mid; + if (mid_value < query) + lower_bound = mid + 1; + else + upper_bound = mid; + } + return lower_bound; +} + +template +__device__ RandomAccessIterator upper_bound(RandomAccessIterator lower_bound, RandomAccessIterator upper_bound, ValueType query) +{ + while (upper_bound - lower_bound > 0) + { + RandomAccessIterator mid = lower_bound + (upper_bound - lower_bound) / 2; + const auto mid_value = *mid; + if (mid_value <= query) + lower_bound = mid + 1; + else + upper_bound = mid; + } + return lower_bound; +} +} // namespace + namespace claragenomics { @@ -107,24 +140,11 @@ __global__ void find_query_target_matches_kernel(int64_t* const found_target_ind if (i >= n_query_representations) return; - const representation_t query = query_representations_d[i]; - const representation_t* lower_bound = target_representations_d; - const representation_t* upper_bound = target_representations_d + n_target_representations; - int64_t found_target_index = -1; - while (upper_bound - lower_bound > 0) - { - const representation_t* mid = lower_bound + (upper_bound - lower_bound) / 2; - const representation_t target = *mid; - if (target < query) - lower_bound = mid + 1; - else if (target > query) - upper_bound = mid; - else - { - found_target_index = mid - target_representations_d; - break; - } - } + const representation_t query = query_representations_d[i]; + int64_t found_target_index = -1; + const representation_t* lb = lower_bound(target_representations_d, target_representations_d + n_target_representations, query); + if (*lb == query) + found_target_index = lb - target_representations_d; found_target_indices[i] = found_target_index; } @@ -145,9 +165,10 @@ void generate_anchors(thrust::device_vector& anchors, assert(target_read_ids.size() == target_positions_in_read.size()); const int32_t n_threads = 256; - const int32_t n_blocks = ceiling_divide(get_size(found_target_indices_d), n_threads); + const int32_t n_blocks = ceiling_divide(get_size(anchors), n_threads); generate_anchors_kernel<<>>( anchors.data().get(), + get_size(anchors), anchor_starting_indices.data().get(), query_starting_index_of_each_representation_d.data().get(), found_target_indices_d.data().get(), @@ -161,6 +182,7 @@ void generate_anchors(thrust::device_vector& anchors, __global__ void generate_anchors_kernel( Anchor* const anchors_d, + int64_t n_anchors, const int64_t* const anchor_starting_index_d, const std::uint32_t* const query_starting_index_of_each_representation_d, const std::int64_t* const found_target_indices_d, @@ -171,40 +193,37 @@ __global__ void generate_anchors_kernel( const read_id_t* const target_read_ids, const position_in_read_t* const target_positions_in_read) { - const std::int32_t i = blockIdx.x * blockDim.x + threadIdx.x; + std::int64_t anchor_idx = blockIdx.x * blockDim.x + threadIdx.x; - if (i >= n_query_representations) + if (anchor_idx >= n_anchors) return; - const std::int64_t j = found_target_indices_d[i]; - if (j < 0) - return; + const std::int64_t representation_idx = upper_bound(anchor_starting_index_d, anchor_starting_index_d + n_query_representations, anchor_idx) - anchor_starting_index_d; + + assert(representation_idx < n_query_representations); + + std::uint32_t relative_anchor_index = anchor_idx; + if (representation_idx > 0) + relative_anchor_index -= anchor_starting_index_d[representation_idx - 1]; - std::int64_t anchor_idx = 0; - if (i > 0) - anchor_idx = anchor_starting_index_d[i - 1]; - std::uint32_t query_idx = query_starting_index_of_each_representation_d[i]; - const std::uint32_t query_end = query_starting_index_of_each_representation_d[i + 1]; + const std::int64_t j = found_target_indices_d[representation_idx]; + assert(j >= 0); + const std::uint32_t query_begin = query_starting_index_of_each_representation_d[representation_idx]; const std::uint32_t target_begin = target_starting_index_of_each_representation_d[j]; const std::uint32_t target_end = target_starting_index_of_each_representation_d[j + 1]; - while (query_idx < query_end) - { - std::uint32_t target_idx = target_begin; - while (target_idx < target_end) - { - Anchor a; - a.query_read_id_ = query_read_ids[query_idx]; - a.target_read_id_ = target_read_ids[target_idx]; - a.query_position_in_read_ = query_positions_in_read[query_idx]; - a.target_position_in_read_ = target_positions_in_read[target_idx]; - anchors_d[anchor_idx] = a; - ++anchor_idx; - ++target_idx; - } - ++query_idx; - } - assert(anchor_idx == anchor_starting_index_d[i] || anchor_starting_index_d[i - 1] == anchor_starting_index_d[i]); + const std::uint32_t n_targets = target_end - target_begin; + const std::uint32_t query_idx = query_begin + relative_anchor_index / n_targets; + const std::uint32_t target_idx = target_begin + relative_anchor_index % n_targets; + + assert(query_idx < query_starting_index_of_each_representation_d[representation_idx + 1]); + + Anchor a; + a.query_read_id_ = query_read_ids[query_idx]; + a.target_read_id_ = target_read_ids[target_idx]; + a.query_position_in_read_ = query_positions_in_read[query_idx]; + a.target_position_in_read_ = target_positions_in_read[target_idx]; + anchors_d[anchor_idx] = a; } } // namespace matcher_gpu diff --git a/cudamapper/src/matcher_gpu.cuh b/cudamapper/src/matcher_gpu.cuh index 1aacdcbac..3c274f766 100644 --- a/cudamapper/src/matcher_gpu.cuh +++ b/cudamapper/src/matcher_gpu.cuh @@ -139,6 +139,7 @@ __global__ void find_query_target_matches_kernel(int64_t* const found_target_ind /// TODO __global__ void generate_anchors_kernel( Anchor* const anchors_d, + int64_t n_anchors, const int64_t* const anchor_starting_index_d, const std::uint32_t* const query_starting_index_of_each_representation_d, const std::int64_t* const found_target_indices_d, From d6ddc36cea3f8f54c4e17ee3bd1764d0f8007252 Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Fri, 15 Nov 2019 16:18:12 +0100 Subject: [PATCH 063/128] Reverting main.cpp so it uses old indexer and matcher. We'll adapt main.cpp to use the new indexer and matcher in a separate PR (this will also require adapting overlapper to accept two indices) --- cudamapper/src/main.cpp | 68 ++++------------------------------------- 1 file changed, 6 insertions(+), 62 deletions(-) diff --git a/cudamapper/src/main.cpp b/cudamapper/src/main.cpp index 53bd326d2..25fe6609d 100644 --- a/cudamapper/src/main.cpp +++ b/cudamapper/src/main.cpp @@ -23,9 +23,7 @@ #include #include "claragenomics/cudamapper/index.hpp" -#include "claragenomics/cudamapper/index_two_indices.hpp" #include "claragenomics/cudamapper/overlapper.hpp" -#include "claragenomics/cudamapper/matcher_two_indices.hpp" #include "matcher.hpp" #include "overlapper_triggered.hpp" @@ -161,65 +159,11 @@ int main(int argc, char* argv[]) std::chrono::milliseconds matcher_time = std::chrono::duration_values::zero(); std::chrono::milliseconds overlapper_time = std::chrono::duration_values::zero(); - for (size_t query_start = 0; query_start < queries; query_start += index_size) - { // outer loop over query - size_t query_end = std::min(query_start + index_size, static_cast(queries) - 1); - - std::cerr << "Query range: " << query_start << " - " << query_end << std::endl; - - std::unique_ptr query_index(nullptr); - std::unique_ptr target_index(nullptr); - std::unique_ptr matcher(nullptr); + //Now carry out all the looped polling + //size_t query_start = 0; + //size_t query_end = query_start + index_size - 1; - { - CGA_NVTX_RANGE(profiler, "generate_query_index"); - auto start_time = std::chrono::high_resolution_clock::now(); - query_index = claragenomics::cudamapper::IndexTwoIndices::create_index(query_parser.get(), - query_start, - query_end + 1, // <- past the last - k, - w); - index_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); - std::cerr << "Query index generation time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; - } - - size_t target_start = 0; - // If all_to_all mode, then we can optimzie by starting the target sequences from the same index as - // query because all indices before the current query index are guaranteed to have been processed in - // a2a mapping. - if (all_to_all) - { - target_start = query_start; - } - for (; target_start < targets; target_start += target_index_size) - { - size_t target_end = std::min(target_start + target_index_size, static_cast(targets) - 1); - - std::cerr << "Target range: " << target_start << " - " << target_end << std::endl; - - { - CGA_NVTX_RANGE(profiler, "generate_target_index"); - auto start_time = std::chrono::high_resolution_clock::now(); - target_index = claragenomics::cudamapper::IndexTwoIndices::create_index(target_parser.get(), - target_start, - target_end + 1, // <- past the last - k, - w); - index_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); - std::cerr << "Target index generation time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; - } - { - CGA_NVTX_RANGE(profiler, "generate_matcher"); - auto start_time = std::chrono::high_resolution_clock::now(); - matcher = claragenomics::cudamapper::MatcherTwoIndices::create_matcher(*query_index, - *target_index); - matcher_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); - std::cerr << "Matcher generation time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; - } - } - } - - /*for (size_t query_start = 0; query_start < queries; query_start += index_size) + for (size_t query_start = 0; query_start < queries; query_start += index_size) { // outer loop over query size_t query_end = std::min(query_start + index_size, static_cast(queries)); auto start_time = std::chrono::high_resolution_clock::now(); @@ -307,7 +251,7 @@ int main(int argc, char* argv[]) //the new target start is set to be the next read index after the last read //from the previous chunk } - }*/ + } // Insert empty overlap vector to denote end of processing. // The lambda function for adding overlaps to queue ensures that no empty @@ -356,4 +300,4 @@ void help(int32_t exit_code = 0) << std::endl; exit(exit_code); -} +} \ No newline at end of file From b83e39fc75893448296d56d33f5bbab79b8470e2 Mon Sep 17 00:00:00 2001 From: Mike Vella Date: Fri, 15 Nov 2019 15:42:33 +0000 Subject: [PATCH 064/128] Minimizer hashing now working --- cudamapper/src/index_gpu.cuh | 14 +++-- cudamapper/src/minimizer.cu | 61 +++++++++++-------- cudamapper/src/minimizer.hpp | 4 +- cudamapper/tests/Test_CudamapperIndexGPU.cu | 4 +- cudamapper/tests/Test_CudamapperMinimizer.cpp | 3 +- 5 files changed, 52 insertions(+), 34 deletions(-) diff --git a/cudamapper/src/index_gpu.cuh b/cudamapper/src/index_gpu.cuh index 7d4e1fd55..e4bd3fa83 100644 --- a/cudamapper/src/index_gpu.cuh +++ b/cudamapper/src/index_gpu.cuh @@ -57,7 +57,7 @@ public: /// \param kmer_size k - the kmer length /// \param window_size w - the length of the sliding window used to find sketch elements /// \param read_ranges - the ranges of reads in the query file to use for mapping, index by their position (e.g in the FASA file) - IndexGPU(const std::vector& parsers, const std::uint64_t kmer_size, const std::uint64_t window_size, const std::vector>& read_ranges); + IndexGPU(const std::vector& parsers, const std::uint64_t kmer_size, const std::uint64_t window_size, const std::vector>& read_ranges, const bool hash_representations = true); /// \brief Constructor IndexGPU(); @@ -100,7 +100,7 @@ public: /// \brief max_representation /// \return the largest possible representation - std::uint64_t maximum_representation() const override { return (1 << (kmer_size_ * 2)) - 1; }; + std::uint64_t maximum_representation() const override { return (uint64_t(1) << 32) - 1;; }; private: /// \brief generates the index @@ -110,6 +110,7 @@ private: const std::uint64_t kmer_size_; const std::uint64_t window_size_; std::uint64_t number_of_reads_; + const bool hash_representations; bool reached_end_of_input_; std::vector positions_in_reads_; @@ -492,11 +493,12 @@ void build_index(const std::uint64_t number_of_reads, } // namespace details template -IndexGPU::IndexGPU(const std::vector& parsers, const std::uint64_t kmer_size, const std::uint64_t window_size, const std::vector>& read_ranges) +IndexGPU::IndexGPU(const std::vector& parsers, const std::uint64_t kmer_size, const std::uint64_t window_size, const std::vector>& read_ranges, const bool hash_representations) : kmer_size_(kmer_size) , window_size_(window_size) , number_of_reads_(0) , reached_end_of_input_(false) + , hash_representations(hash_representations) { generate_index(parsers, read_ranges); } @@ -506,6 +508,7 @@ IndexGPU::IndexGPU() : kmer_size_(0) , window_size_(0) , number_of_reads_(0) + , hash_representations(true) { } @@ -665,13 +668,14 @@ void IndexGPU::generate_index(const std::vector representations_from_this_loop_d = std::move(sketch_elements.representations_d); device_buffer rest_from_this_loop_d = std::move(sketch_elements.rest_d); diff --git a/cudamapper/src/minimizer.cu b/cudamapper/src/minimizer.cu index 358f51838..730df0039 100644 --- a/cudamapper/src/minimizer.cu +++ b/cudamapper/src/minimizer.cu @@ -39,15 +39,16 @@ read_id_t Minimizer::read_id() const return read_id_; } - -__device__ representation_t wang_hash64(representation_t key){ - key = (~key) + (key << 21);// key = (key << 21) - key - 1; - key = key ^ (key >> 24); - key = (key + (key << 3)) + (key << 8);// key * 265 - key = key ^ (key >> 14); - key = (key + (key << 2)) + (key << 4);// key * 21 - key = key ^ (key >> 28); - key = key + (key << 31); +__device__ representation_t wang_hash64(representation_t key) +{ + uint64_t mask = (uint64_t(1) << 32) - 1; + key = (~key + (key << 21)) & mask; + key = key ^ key >> 24; + key = ((key + (key << 3)) + (key << 8)) & mask; + key = key ^ key >> 14; + key = ((key + (key << 2)) + (key << 4)) & mask; + key = key ^ key >> 28; + key = (key + (key << 31)) & mask; return key; } @@ -77,7 +78,8 @@ __global__ void find_front_end_minimizers(const std::uint64_t minimizer_size, char* const window_minimizers_direction, position_in_read_t* const window_minimizers_position_in_read, const ArrayBlock* const read_id_to_windows_section, - std::uint32_t* const read_id_to_minimizers_written) + std::uint32_t* const read_id_to_minimizers_written, + const bool hash_representations) { // TODO: simplify this method similarly to find_back_end_minimizers @@ -206,8 +208,10 @@ __global__ void find_front_end_minimizers(const std::uint64_t minimizer_size, reverse_representation |= reverse_basepair_hashes[threadIdx.x + i] << 2 * i; } - forward_representation = wang_hash64(forward_representation); - reverse_representation = wang_hash64(reverse_representation); + if (hash_representations) { + forward_representation = wang_hash64(forward_representation); + reverse_representation = wang_hash64(reverse_representation); + } if (forward_representation <= reverse_representation) { @@ -378,7 +382,8 @@ __global__ void find_central_minimizers(const std::uint64_t minimizer_size, char* const window_minimizers_direction, position_in_read_t* const window_minimizers_position_in_read, const ArrayBlock* const read_id_to_windows_section, - std::uint32_t* const read_id_to_minimizers_written) + std::uint32_t* const read_id_to_minimizers_written, + const bool hash_representations) { // See find_front_end_minimizers for more details about the algorithm @@ -473,9 +478,10 @@ __global__ void find_central_minimizers(const std::uint64_t minimizer_size, reverse_representation |= reverse_basepair_hashes[kmer_index + i] << 2 * i; } - forward_representation = wang_hash64(forward_representation); - reverse_representation = wang_hash64(reverse_representation); - + if (hash_representations) { + forward_representation = wang_hash64(forward_representation); + reverse_representation = wang_hash64(reverse_representation); + } if (forward_representation <= reverse_representation) { minimizers_representation[kmer_index] = forward_representation; @@ -616,7 +622,8 @@ __global__ void find_back_end_minimizers(const std::uint64_t minimizer_size, char* const window_minimizers_direction, position_in_read_t* const window_minimizers_position_in_read, const ArrayBlock* const read_id_to_windows_section, - std::uint32_t* const read_id_to_minimizers_written) + std::uint32_t* const read_id_to_minimizers_written, + const bool hash_representations) { // See find_front_end_minimizers for more details about the algorithm @@ -701,10 +708,10 @@ __global__ void find_back_end_minimizers(const std::uint64_t minimizer_size, reverse_representation |= reverse_basepair_hashes[kmer_index + i] << 2 * i; } - //printf("Pre hash %lu, Post hash %lu\n", forward_representation, wang_hash64(forward_representation)); - forward_representation = wang_hash64(forward_representation); - reverse_representation = wang_hash64(reverse_representation); - + if (hash_representations) { + forward_representation = wang_hash64(forward_representation); + reverse_representation = wang_hash64(reverse_representation); + } if (forward_representation <= reverse_representation) { minimizers_representation[kmer_index] = forward_representation; @@ -844,7 +851,8 @@ Minimizer::GeneratedSketchElements Minimizer::generate_sketch_elements(const std const std::uint64_t read_id_of_first_read, const device_buffer& merged_basepairs_d, const std::vector& read_id_to_basepairs_section_h, - const device_buffer& read_id_to_basepairs_section_d) + const device_buffer& read_id_to_basepairs_section_d, + const bool hash_representations) { // for each read find the maximum number of minimizers (one per window), determine their section in the minimizer arrays and allocate the arrays std::uint64_t total_windows = 0; @@ -910,7 +918,8 @@ Minimizer::GeneratedSketchElements Minimizer::generate_sketch_elements(const std window_minimizers_direction_d.data(), window_minimizers_position_in_read_d.data(), read_id_to_windows_section_d.data(), - read_id_to_minimizers_written_d.data()); + read_id_to_minimizers_written_d.data(), + hash_representations); CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); // *** central minimizers *** @@ -943,7 +952,8 @@ Minimizer::GeneratedSketchElements Minimizer::generate_sketch_elements(const std window_minimizers_direction_d.data(), window_minimizers_position_in_read_d.data(), read_id_to_windows_section_d.data(), - read_id_to_minimizers_written_d.data()); + read_id_to_minimizers_written_d.data(), + hash_representations); CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); // *** back end minimizers *** @@ -969,7 +979,8 @@ Minimizer::GeneratedSketchElements Minimizer::generate_sketch_elements(const std window_minimizers_direction_d.data(), window_minimizers_position_in_read_d.data(), read_id_to_windows_section_d.data(), - read_id_to_minimizers_written_d.data()); + read_id_to_minimizers_written_d.data(), + hash_representations); CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); std::vector read_id_to_minimizers_written_h(number_of_reads_to_add); diff --git a/cudamapper/src/minimizer.hpp b/cudamapper/src/minimizer.hpp index 248837d63..816b1d565 100644 --- a/cudamapper/src/minimizer.hpp +++ b/cudamapper/src/minimizer.hpp @@ -80,13 +80,15 @@ class Minimizer : public SketchElement /// \param merged_basepairs_d basepairs of all reads, gouped by reads (device memory) /// \param read_id_to_basepairs_section_h for each read_id points to the section of merged_basepairs_d that belong to that read_id (host memory) /// \param read_id_to_basepairs_section_h for each read_id points to the section of merged_basepairs_d that belong to that read_id (device memory) + /// \param hash_minimizers if true, apply a hash function to the representations static GeneratedSketchElements generate_sketch_elements(const std::uint64_t number_of_reads_to_add, const std::uint64_t minimizer_size, const std::uint64_t window_size, const std::uint64_t read_id_of_first_read, const device_buffer& merged_basepairs_d, const std::vector& read_id_to_basepairs_section_h, - const device_buffer& read_id_to_basepairs_section_d); + const device_buffer& read_id_to_basepairs_section_d, + const bool hash_representations=true); private: representation_t representation_; diff --git a/cudamapper/tests/Test_CudamapperIndexGPU.cu b/cudamapper/tests/Test_CudamapperIndexGPU.cu index 91d4889b4..648b84ffc 100644 --- a/cudamapper/tests/Test_CudamapperIndexGPU.cu +++ b/cudamapper/tests/Test_CudamapperIndexGPU.cu @@ -39,7 +39,7 @@ void test_function(const std::string& filename, std::unique_ptr parser = io::create_fasta_parser(filename); std::vector parsers; parsers.push_back(parser.get()); - IndexGPU index(parsers, minimizer_size, window_size, read_ranges); + IndexGPU index(parsers, minimizer_size, window_size, read_ranges, false ); ASSERT_EQ(index.number_of_reads(), expected_number_of_reads); @@ -92,7 +92,7 @@ void test_function(const std::string& filename, } ASSERT_EQ(index.minimum_representation(), std::uint64_t(0)); - ASSERT_EQ(index.maximum_representation(), pow(4, std::uint64_t(minimizer_size)) - 1); + ASSERT_EQ(index.maximum_representation(), pow(4, std::uint64_t(16)) - 1); } TEST(TestCudamapperIndexGPU, GATT_4_1) diff --git a/cudamapper/tests/Test_CudamapperMinimizer.cpp b/cudamapper/tests/Test_CudamapperMinimizer.cpp index ac4eb809d..26e7478f6 100644 --- a/cudamapper/tests/Test_CudamapperMinimizer.cpp +++ b/cudamapper/tests/Test_CudamapperMinimizer.cpp @@ -43,7 +43,8 @@ void test_function(const std::uint64_t number_of_reads_to_add, read_id_of_first_read, merged_basepairs_d, read_id_to_basepairs_section_h, - read_id_to_basepairs_section_d); + read_id_to_basepairs_section_d, + false); device_buffer representations_d = std::move(sketch_elements.representations_d); std::vector representations_h(representations_d.size()); From 1c6e271b4eb6da6048749e61173316a1d7bf58d0 Mon Sep 17 00:00:00 2001 From: Mike Vella Date: Fri, 15 Nov 2019 16:04:45 +0000 Subject: [PATCH 065/128] Added documentation --- cudamapper/src/index_gpu.cuh | 11 +++++++++-- cudamapper/src/minimizer.cu | 9 +++++++++ cudamapper/tests/Test_CudamapperIndexGPU.cu | 4 ++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/cudamapper/src/index_gpu.cuh b/cudamapper/src/index_gpu.cuh index e4bd3fa83..63caa0403 100644 --- a/cudamapper/src/index_gpu.cuh +++ b/cudamapper/src/index_gpu.cuh @@ -57,6 +57,7 @@ public: /// \param kmer_size k - the kmer length /// \param window_size w - the length of the sliding window used to find sketch elements /// \param read_ranges - the ranges of reads in the query file to use for mapping, index by their position (e.g in the FASA file) + /// \param hash_representations - if true, apply hash function to all representations IndexGPU(const std::vector& parsers, const std::uint64_t kmer_size, const std::uint64_t window_size, const std::vector>& read_ranges, const bool hash_representations = true); /// \brief Constructor @@ -99,8 +100,14 @@ public: std::uint64_t minimum_representation() const override { return 0; }; /// \brief max_representation - /// \return the largest possible representation - std::uint64_t maximum_representation() const override { return (uint64_t(1) << 32) - 1;; }; + /// \return the largest possible representation, + std::uint64_t maximum_representation() const override { + if (hash_representations){ + return (uint64_t(1) << 32) - 1; + }else { + return (1 << (kmer_size_ * 2)) - 1; + } + }; private: /// \brief generates the index diff --git a/cudamapper/src/minimizer.cu b/cudamapper/src/minimizer.cu index 730df0039..fb9342b5a 100644 --- a/cudamapper/src/minimizer.cu +++ b/cudamapper/src/minimizer.cu @@ -39,6 +39,15 @@ read_id_t Minimizer::read_id() const return read_id_; } +/// \brief Apply a hash function to a representation +/// +/// Because of the non-Poisson distribuition of DNA, some common sequences with common kmer-content (e.g long poly-A runs) +/// may be over-represented in sketches. By applying a hash function, kmers are mapped to representations over +/// a more uniform space. The hash function implemented here was developed by Thomas Wang and is described +/// [here](https://gist.github.com/badboy/6267743). A mask is applied to the output so that all representations are mapped +/// to a 32 bit space. +/// +/// \param key the input representation __device__ representation_t wang_hash64(representation_t key) { uint64_t mask = (uint64_t(1) << 32) - 1; diff --git a/cudamapper/tests/Test_CudamapperIndexGPU.cu b/cudamapper/tests/Test_CudamapperIndexGPU.cu index 648b84ffc..27a49737a 100644 --- a/cudamapper/tests/Test_CudamapperIndexGPU.cu +++ b/cudamapper/tests/Test_CudamapperIndexGPU.cu @@ -39,7 +39,7 @@ void test_function(const std::string& filename, std::unique_ptr parser = io::create_fasta_parser(filename); std::vector parsers; parsers.push_back(parser.get()); - IndexGPU index(parsers, minimizer_size, window_size, read_ranges, false ); + IndexGPU index(parsers, minimizer_size, window_size, read_ranges, false); ASSERT_EQ(index.number_of_reads(), expected_number_of_reads); @@ -92,7 +92,7 @@ void test_function(const std::string& filename, } ASSERT_EQ(index.minimum_representation(), std::uint64_t(0)); - ASSERT_EQ(index.maximum_representation(), pow(4, std::uint64_t(16)) - 1); + ASSERT_EQ(index.maximum_representation(), pow(4, std::uint64_t(minimizer_size)) - 1); } TEST(TestCudamapperIndexGPU, GATT_4_1) From 0438d68c56a354e5966236e501c9bc3f9103a492 Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Fri, 15 Nov 2019 17:52:38 +0100 Subject: [PATCH 066/128] [cudamapper] fixed a typo --- .../include/claragenomics/cudamapper/matcher_two_indices.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cudamapper/include/claragenomics/cudamapper/matcher_two_indices.hpp b/cudamapper/include/claragenomics/cudamapper/matcher_two_indices.hpp index 9d8cee87c..8457c45d5 100644 --- a/cudamapper/include/claragenomics/cudamapper/matcher_two_indices.hpp +++ b/cudamapper/include/claragenomics/cudamapper/matcher_two_indices.hpp @@ -22,7 +22,7 @@ namespace cudamapper /// \addtogroup cudamapper /// \{ -/// MatcehrTwoIndice - base matcher +/// MatcherTwoIndices - base matcher class MatcherTwoIndices { public: From 6b246b79d20bce6d0cef52d7269a0ceae169b8f4 Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Fri, 15 Nov 2019 11:58:25 -0500 Subject: [PATCH 067/128] [evaluate_paf] fix re-ordering of imports --- pyclaragenomics/bin/evaluate_paf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyclaragenomics/bin/evaluate_paf b/pyclaragenomics/bin/evaluate_paf index c05b290b4..6ecce9dcf 100755 --- a/pyclaragenomics/bin/evaluate_paf +++ b/pyclaragenomics/bin/evaluate_paf @@ -12,9 +12,10 @@ """Functions and tools for calculating the accuracy of overlap detection""" import argparse -from claragenomics.io import pafio from collections import defaultdict +from claragenomics.io import pafio + def match_overlaps(query_0, query_1, target_0, target_1, pos_tolerance): """Given two sets of query and target ranges, check if the query and target ranges fall within a specified tolerance of each other. From a1f39f051adbaca6b6cee7179f9f9fec24a489bc Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Mon, 18 Nov 2019 10:07:00 +0100 Subject: [PATCH 068/128] [cudamapper] minor format fix --- cudamapper/src/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cudamapper/src/main.cpp b/cudamapper/src/main.cpp index 25fe6609d..4ac478f2f 100644 --- a/cudamapper/src/main.cpp +++ b/cudamapper/src/main.cpp @@ -300,4 +300,4 @@ void help(int32_t exit_code = 0) << std::endl; exit(exit_code); -} \ No newline at end of file +} From e1ebe0ad2a6d429ca46a829dc8be7aea821fccb9 Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Mon, 18 Nov 2019 11:30:07 +0100 Subject: [PATCH 069/128] Ran "make format" to get the style check to pass --- cudamapper/src/index_gpu.cuh | 12 ++++++++---- cudamapper/src/minimizer.cu | 25 ++++++++++++++----------- cudamapper/src/minimizer.hpp | 2 +- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/cudamapper/src/index_gpu.cuh b/cudamapper/src/index_gpu.cuh index 63caa0403..4bc533967 100644 --- a/cudamapper/src/index_gpu.cuh +++ b/cudamapper/src/index_gpu.cuh @@ -101,10 +101,14 @@ public: /// \brief max_representation /// \return the largest possible representation, - std::uint64_t maximum_representation() const override { - if (hash_representations){ + std::uint64_t maximum_representation() const override + { + if (hash_representations) + { return (uint64_t(1) << 32) - 1; - }else { + } + else + { return (1 << (kmer_size_ * 2)) - 1; } }; @@ -675,7 +679,7 @@ void IndexGPU::generate_index(const std::vector> 24; - key = ((key + (key << 3)) + (key << 8)) & mask; - key = key ^ key >> 14; - key = ((key + (key << 2)) + (key << 4)) & mask; - key = key ^ key >> 28; - key = (key + (key << 31)) & mask; + uint64_t mask = (uint64_t(1) << 32) - 1; + key = (~key + (key << 21)) & mask; + key = key ^ key >> 24; + key = ((key + (key << 3)) + (key << 8)) & mask; + key = key ^ key >> 14; + key = ((key + (key << 2)) + (key << 4)) & mask; + key = key ^ key >> 28; + key = (key + (key << 31)) & mask; return key; } @@ -217,7 +217,8 @@ __global__ void find_front_end_minimizers(const std::uint64_t minimizer_size, reverse_representation |= reverse_basepair_hashes[threadIdx.x + i] << 2 * i; } - if (hash_representations) { + if (hash_representations) + { forward_representation = wang_hash64(forward_representation); reverse_representation = wang_hash64(reverse_representation); } @@ -487,7 +488,8 @@ __global__ void find_central_minimizers(const std::uint64_t minimizer_size, reverse_representation |= reverse_basepair_hashes[kmer_index + i] << 2 * i; } - if (hash_representations) { + if (hash_representations) + { forward_representation = wang_hash64(forward_representation); reverse_representation = wang_hash64(reverse_representation); } @@ -717,7 +719,8 @@ __global__ void find_back_end_minimizers(const std::uint64_t minimizer_size, reverse_representation |= reverse_basepair_hashes[kmer_index + i] << 2 * i; } - if (hash_representations) { + if (hash_representations) + { forward_representation = wang_hash64(forward_representation); reverse_representation = wang_hash64(reverse_representation); } diff --git a/cudamapper/src/minimizer.hpp b/cudamapper/src/minimizer.hpp index 816b1d565..0adff29e0 100644 --- a/cudamapper/src/minimizer.hpp +++ b/cudamapper/src/minimizer.hpp @@ -88,7 +88,7 @@ class Minimizer : public SketchElement const device_buffer& merged_basepairs_d, const std::vector& read_id_to_basepairs_section_h, const device_buffer& read_id_to_basepairs_section_d, - const bool hash_representations=true); + const bool hash_representations = true); private: representation_t representation_; From c66d342d43f8776bbcec126e6d3cdf2a6ac477d3 Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Mon, 18 Nov 2019 16:40:54 +0100 Subject: [PATCH 070/128] Not calling CGA_CU_CHECK_ERR(cudaDeviceSynchronize()) after kernel launches --- cudamapper/src/index_gpu_two_indices.cu | 1 - cudamapper/src/index_gpu_two_indices.cuh | 1 - 2 files changed, 2 deletions(-) diff --git a/cudamapper/src/index_gpu_two_indices.cu b/cudamapper/src/index_gpu_two_indices.cu index ab1ade652..deecbabba 100644 --- a/cudamapper/src/index_gpu_two_indices.cu +++ b/cudamapper/src/index_gpu_two_indices.cu @@ -36,7 +36,6 @@ void find_first_occurrences_of_representations(thrust::device_vector>>(input_representations_d.data().get(), input_representations_d.size(), new_value_mask_d.data().get()); - CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); // sync not necessary, here only to detect the error immediately // do inclusive scan // for example for diff --git a/cudamapper/src/index_gpu_two_indices.cuh b/cudamapper/src/index_gpu_two_indices.cuh index b4187bbed..57f9dc9ad 100644 --- a/cudamapper/src/index_gpu_two_indices.cuh +++ b/cudamapper/src/index_gpu_two_indices.cuh @@ -427,7 +427,6 @@ void IndexGPUTwoIndices::generate_index(io::FastaParser* pars positions_in_reads_d_.data().get(), directions_of_reads_d_.data().get(), representations_d_.size()); - CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); // now generate the index elements details::index_gpu_two_indices::find_first_occurrences_of_representations(unique_representations_d_, From 0d6caf4bc2cd9cff834f15b12c23171e3321ef57 Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Mon, 18 Nov 2019 17:36:48 +0100 Subject: [PATCH 071/128] Explicitly caching a value in find_first_occurrences_of_representations_kernel, additional documentation for the function --- cudamapper/src/index_gpu_two_indices.cu | 10 ++++++---- cudamapper/src/index_gpu_two_indices.cuh | 10 +++++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/cudamapper/src/index_gpu_two_indices.cu b/cudamapper/src/index_gpu_two_indices.cu index deecbabba..98a453ea8 100644 --- a/cudamapper/src/index_gpu_two_indices.cu +++ b/cudamapper/src/index_gpu_two_indices.cu @@ -99,6 +99,7 @@ __global__ void find_first_occurrences_of_representations_kernel(const std::uint std::uint32_t* const starting_index_of_each_representation_d, representation_t* const unique_representations_d) { + // one thread per element of input_representations_d (i.e. sketch_element) std::uint64_t index = blockIdx.x * blockDim.x + threadIdx.x; if (index >= number_of_input_elements) @@ -111,13 +112,14 @@ __global__ void find_first_occurrences_of_representations_kernel(const std::uint } else { - if (representation_index_mask_d[index] != representation_index_mask_d[index - 1]) + // representation_index_mask_d gives a unique index to each representation, starting from 1, thus '-1' + const auto representation_index_mask_for_this_index = representation_index_mask_d[index]; + if (representation_index_mask_for_this_index != representation_index_mask_d[index - 1]) { // if new representation is not the same as its left neighbor // save the index at which that representation starts - // representation_index_mask_d gives a unique index to each representation, starting from 1, thus '-1' - starting_index_of_each_representation_d[representation_index_mask_d[index] - 1] = index; - unique_representations_d[representation_index_mask_d[index] - 1] = input_representations_d[index]; + starting_index_of_each_representation_d[representation_index_mask_for_this_index - 1] = index; + unique_representations_d[representation_index_mask_for_this_index - 1] = input_representations_d[index]; } } } diff --git a/cudamapper/src/index_gpu_two_indices.cuh b/cudamapper/src/index_gpu_two_indices.cuh index 57f9dc9ad..4cc85c589 100644 --- a/cudamapper/src/index_gpu_two_indices.cuh +++ b/cudamapper/src/index_gpu_two_indices.cuh @@ -177,11 +177,11 @@ __global__ void create_new_value_mask(const representation_t* const representati /// 0 12 23 32 46 /// 0 4 10 13 18 /// -/// \param representation_index_mask_d -/// \param input_representatons_d -/// \param number_of_input_elements -/// \param starting_index_of_each_representation_d -/// \param unique_representations_d +/// \param representation_index_mask_d an array in which each element from input_representatons_d is mapped to an ordinal number of that representation (array "1 1 1 1 2 2 2 2 2 2 3 3 3 4 4 4 4 4 5 5 5" in the example) +/// \param input_representatons_d all representations (array "0 0 0 0 12 12 12 12 12 12 23 23 23 32 32 32 32 32 46 46 46" in the example) +/// \param number_of_input_elements number of elements in input_representatons_d and representation_index_mask_d +/// \param starting_index_of_each_representation_d index with first occurrence of each representation (array "0 12 23 32 46" in the example) +/// \param unique_representations_d representation that corresponds to each element in starting_index_of_each_representation_d (array "0 4 10 13 18" in the example) __global__ void find_first_occurrences_of_representations_kernel(const std::uint64_t* const representation_index_mask_d, const representation_t* const input_representations_d, const std::size_t number_of_input_elements, From 66408bb385cc80231991ebb7efa01d91e864d530 Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Mon, 18 Nov 2019 17:52:38 +0100 Subject: [PATCH 072/128] Using default default constructor in IndexGPUTwoIndices --- cudamapper/src/index_gpu_two_indices.cuh | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/cudamapper/src/index_gpu_two_indices.cuh b/cudamapper/src/index_gpu_two_indices.cuh index 4cc85c589..feb579055 100644 --- a/cudamapper/src/index_gpu_two_indices.cuh +++ b/cudamapper/src/index_gpu_two_indices.cuh @@ -59,7 +59,7 @@ public: const std::uint64_t window_size); /// \brief Constructor - IndexGPUTwoIndices(); + IndexGPUTwoIndices() = default; /// \brief returns an array of representations of sketch elements /// \return an array of representations of sketch elements @@ -116,10 +116,10 @@ private: std::vector read_id_to_read_name_; std::vector read_id_to_read_length_; - const read_id_t first_read_id_; - const std::uint64_t kmer_size_; - const std::uint64_t window_size_; - std::uint64_t number_of_reads_; + const read_id_t first_read_id_ = 0; + const std::uint64_t kmer_size_ = 0; + const std::uint64_t window_size_ = 0; + std::uint64_t number_of_reads_ = 0; }; namespace details @@ -234,15 +234,6 @@ IndexGPUTwoIndices::IndexGPUTwoIndices(io::FastaParser* parse past_the_last_read_id); } -template -IndexGPUTwoIndices::IndexGPUTwoIndices() - : first_read_id_(0) - , kmer_size_(0) - , window_size_(0) - , number_of_reads_(0) -{ -} - template const thrust::device_vector& IndexGPUTwoIndices::representations() const { From 4ee33145f781a63fa1ba00b05f12c4e44af2edb8 Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Mon, 18 Nov 2019 18:05:49 +0100 Subject: [PATCH 073/128] Passing io::FastaParser by reference and making it const --- .../claragenomics/cudamapper/index_two_indices.hpp | 2 +- cudamapper/src/index_gpu_two_indices.cuh | 10 +++++----- cudamapper/src/index_two_indices.cu | 2 +- cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu | 2 +- cudamapper/tests/Test_CudamapperMatcherGPU.cu | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp b/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp index 3791980ab..1fcf41e42 100644 --- a/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp +++ b/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp @@ -80,7 +80,7 @@ class IndexTwoIndices /// \param window_size w - the length of the sliding window used to find sketch elements /// \return instance of IndexTwoIndices static std::unique_ptr - create_index(io::FastaParser* parser, + create_index(const io::FastaParser& parser, const read_id_t first_read_id, const read_id_t past_the_last_read_id, const std::uint64_t kmer_size, diff --git a/cudamapper/src/index_gpu_two_indices.cuh b/cudamapper/src/index_gpu_two_indices.cuh index feb579055..855f22d3d 100644 --- a/cudamapper/src/index_gpu_two_indices.cuh +++ b/cudamapper/src/index_gpu_two_indices.cuh @@ -52,7 +52,7 @@ public: /// \param past_the_last_read_id read_id+1 of the last read to be included in this index /// \param kmer_size k - the kmer length /// \param window_size w - the length of the sliding window used to find sketch elements - IndexGPUTwoIndices(io::FastaParser* parser, + IndexGPUTwoIndices(const io::FastaParser& parser, const read_id_t first_read_id, const read_id_t past_the_last_read_id, const std::uint64_t kmer_size, @@ -101,7 +101,7 @@ public: private: /// \brief generates the index - void generate_index(io::FastaParser* query_parser, + void generate_index(const io::FastaParser& query_parser, const read_id_t first_read_id, const read_id_t past_the_last_read_id); @@ -219,7 +219,7 @@ __global__ void copy_rest_to_separate_arrays(const ReadidPositionDirection* cons } // namespace details template -IndexGPUTwoIndices::IndexGPUTwoIndices(io::FastaParser* parser, +IndexGPUTwoIndices::IndexGPUTwoIndices(const io::FastaParser& parser, const read_id_t first_read_id, const read_id_t past_the_last_read_id, const std::uint64_t kmer_size, @@ -289,7 +289,7 @@ std::uint64_t IndexGPUTwoIndices::number_of_reads() const } template -void IndexGPUTwoIndices::generate_index(io::FastaParser* parser, +void IndexGPUTwoIndices::generate_index(const io::FastaParser& parser, const read_id_t first_read_id, const read_id_t past_the_last_read_id) { @@ -311,7 +311,7 @@ void IndexGPUTwoIndices::generate_index(io::FastaParser* pars // deterine the number of basepairs in each read and assign read_id to each read for (read_id_t read_id = first_read_id; read_id < past_the_last_read_id; ++read_id) { - fasta_reads.emplace_back(parser->get_sequence_by_id(read_id)); + fasta_reads.emplace_back(parser.get_sequence_by_id(read_id)); const std::string& read_basepairs = fasta_reads.back().seq; const std::string& read_name = fasta_reads.back().name; if (read_basepairs.length() >= window_size_ + kmer_size_ - 1) diff --git a/cudamapper/src/index_two_indices.cu b/cudamapper/src/index_two_indices.cu index cc7e4dabb..5125b626e 100644 --- a/cudamapper/src/index_two_indices.cu +++ b/cudamapper/src/index_two_indices.cu @@ -17,7 +17,7 @@ namespace claragenomics { namespace cudamapper { -std::unique_ptr IndexTwoIndices::create_index(io::FastaParser* parser, +std::unique_ptr IndexTwoIndices::create_index(const io::FastaParser& parser, const read_id_t first_read_id, const read_id_t past_the_last_read_id, const std::uint64_t kmer_size, diff --git a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu index c3f8322e9..7b67c6a39 100644 --- a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu +++ b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu @@ -40,7 +40,7 @@ void test_function(const std::string& filename, const std::uint64_t expected_number_of_reads) { std::unique_ptr parser = io::create_fasta_parser(filename); - IndexGPUTwoIndices index(parser.get(), + IndexGPUTwoIndices index(*parser, first_read_id, past_the_last_read_id, kmer_size, diff --git a/cudamapper/tests/Test_CudamapperMatcherGPU.cu b/cudamapper/tests/Test_CudamapperMatcherGPU.cu index f2f38d9bb..45d38c05a 100644 --- a/cudamapper/tests/Test_CudamapperMatcherGPU.cu +++ b/cudamapper/tests/Test_CudamapperMatcherGPU.cu @@ -330,8 +330,8 @@ TEST(TestCudamapperMatcherGPU, test_generate_anchors_small_example) TEST(TestCudamapperMatcherGPU, OneReadOneMinimizer) { std::unique_ptr parser = io::create_fasta_parser(std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/gatt.fasta"); - std::unique_ptr query_index = IndexTwoIndices::create_index(parser.get(), 0, parser->get_num_seqences(), 4, 1); - std::unique_ptr target_index = IndexTwoIndices::create_index(parser.get(), 0, parser->get_num_seqences(), 4, 1); + std::unique_ptr query_index = IndexTwoIndices::create_index(*parser, 0, parser->get_num_seqences(), 4, 1); + std::unique_ptr target_index = IndexTwoIndices::create_index(*parser, 0, parser->get_num_seqences(), 4, 1); MatcherGPU matcher(*query_index, *target_index); const thrust::host_vector anchors(matcher.anchors()); From 2203d3130a058379c3765806224729fb26af811a Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Mon, 18 Nov 2019 18:08:41 +0100 Subject: [PATCH 074/128] Removed "CGA_CU_CHECK_ERR(cudaDeviceSynchronize())" from tests --- cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu | 3 --- 1 file changed, 3 deletions(-) diff --git a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu index 7b67c6a39..e2c79b6d8 100644 --- a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu +++ b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu @@ -985,8 +985,6 @@ void test_create_new_value_mask(const thrust::host_vector& rep representations_d.size(), thrust::raw_pointer_cast(new_value_mask_d.data())); - CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); - const thrust::host_vector new_value_mask_h(new_value_mask_d); ASSERT_EQ(new_value_mask_h.size(), expected_new_value_mask_h.size()); @@ -1087,7 +1085,6 @@ void test_find_first_occurrences_of_representations_kernel(const thrust::host_ve representation_index_mask_d.size(), starting_index_of_each_representation_d.data().get(), unique_representations_d.data().get()); - CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); const thrust::host_vector starting_index_of_each_representation_h(starting_index_of_each_representation_d); const thrust::host_vector unique_representations_h(unique_representations_d); From 773fb5468f8b0ff4d9817cd5919be7fdb7c011ea Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Mon, 18 Nov 2019 18:09:44 +0100 Subject: [PATCH 075/128] Formatting --- cudamapper/src/index_gpu_two_indices.cuh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cudamapper/src/index_gpu_two_indices.cuh b/cudamapper/src/index_gpu_two_indices.cuh index 855f22d3d..947a4bc90 100644 --- a/cudamapper/src/index_gpu_two_indices.cuh +++ b/cudamapper/src/index_gpu_two_indices.cuh @@ -116,10 +116,10 @@ private: std::vector read_id_to_read_name_; std::vector read_id_to_read_length_; - const read_id_t first_read_id_ = 0; - const std::uint64_t kmer_size_ = 0; + const read_id_t first_read_id_ = 0; + const std::uint64_t kmer_size_ = 0; const std::uint64_t window_size_ = 0; - std::uint64_t number_of_reads_ = 0; + std::uint64_t number_of_reads_ = 0; }; namespace details From 7be2bd91818dbbeda7ba95d522e6ebd27ea2381b Mon Sep 17 00:00:00 2001 From: Kamesh Arumugam Date: Mon, 18 Nov 2019 15:25:30 -0800 Subject: [PATCH 076/128] [cudamapper:overlapper] replace thrust::raw_pointer_cast with *.data.get() on thrust::device_vector --- cudamapper/src/overlapper_triggered.cu | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/cudamapper/src/overlapper_triggered.cu b/cudamapper/src/overlapper_triggered.cu index be50dc5e7..8a1d6eaab 100644 --- a/cudamapper/src/overlapper_triggered.cu +++ b/cudamapper/src/overlapper_triggered.cu @@ -232,7 +232,7 @@ void OverlapperTriggered::get_overlaps(std::vector& fused_overlaps, // allocate temporary storage d_temp_buf.resize(temp_storage_bytes); - d_temp_storage = thrust::raw_pointer_cast(d_temp_buf.data()); + d_temp_storage = d_temp_buf.data().get(); // run encoding cub::DeviceRunLengthEncode::Encode( @@ -258,7 +258,7 @@ void OverlapperTriggered::get_overlaps(std::vector& fused_overlaps, // allocate temporary storage d_temp_buf.resize(temp_storage_bytes); - d_temp_storage = thrust::raw_pointer_cast(d_temp_buf.data()); + d_temp_storage = d_temp_buf.data().get(); cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, d_chain_length.data(), d_chain_start.data(), @@ -288,19 +288,18 @@ void OverlapperTriggered::get_overlaps(std::vector& fused_overlaps, // fuse overlaps using reduce by key operations // key is a minimal data structure that is required to compare the overlaps - cuOverlapKey_transform key_op(thrust::raw_pointer_cast(d_anchors.data()), - thrust::raw_pointer_cast(d_chain_start.data())); + cuOverlapKey_transform key_op(d_anchors.data().get(), + d_chain_start.data().get()); cub::TransformInputIterator - d_keys_in(thrust::raw_pointer_cast(d_overlaps.data()), + d_keys_in(d_overlaps.data().get(), key_op); // value is a minimal data structure that represents a overlap - cuOverlapArgs_transform value_op( - thrust::raw_pointer_cast(d_chain_start.data()), - thrust::raw_pointer_cast(d_chain_length.data())); + cuOverlapArgs_transform value_op(d_chain_start.data().get(), + d_chain_length.data().get()); cub::TransformInputIterator - d_values_in(thrust::raw_pointer_cast(d_overlaps.data()), + d_values_in(d_overlaps.data().get(), value_op); thrust::device_vector d_fusedoverlap_keys(n_overlaps); @@ -318,7 +317,7 @@ void OverlapperTriggered::get_overlaps(std::vector& fused_overlaps, // allocate temporary storage d_temp_buf.resize(temp_storage_bytes); - d_temp_storage = thrust::raw_pointer_cast(d_temp_buf.data()); + d_temp_storage = d_temp_buf.data().get(); cub::DeviceReduce::ReduceByKey(d_temp_storage, temp_storage_bytes, d_keys_in, d_fusedoverlap_keys.data(), d_values_in, @@ -329,7 +328,7 @@ void OverlapperTriggered::get_overlaps(std::vector& fused_overlaps, auto n_fused_overlap = d_nfused_overlaps[0]; // construct overlap from the overlap args - CreateOverlap fuse_op(thrust::raw_pointer_cast(d_anchors.data())); + CreateOverlap fuse_op(d_anchors.data().get()); thrust::device_vector d_fused_overlaps(n_fused_overlap); thrust::transform(d_fusedoverlaps_args.data(), d_fusedoverlaps_args.data() + n_fused_overlap, From 44b6b257ef6f41afee0632e07b3a44e052075afa Mon Sep 17 00:00:00 2001 From: Kamesh Arumugam Date: Mon, 18 Nov 2019 15:59:42 -0800 Subject: [PATCH 077/128] [cudamapper] minor code refactor to address PR feedback --- cudamapper/src/matcher.cu | 2 -- cudamapper/src/overlapper_triggered.cu | 10 +++++----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/cudamapper/src/matcher.cu b/cudamapper/src/matcher.cu index 156522beb..68530a51a 100644 --- a/cudamapper/src/matcher.cu +++ b/cudamapper/src/matcher.cu @@ -157,8 +157,6 @@ Matcher::Matcher(const Index& index, uint32_t query_target_division_idx) directions_of_reads_h.size() * sizeof(SketchElement::DirectionOfRepresentation), cudaMemcpyHostToDevice)); - anchors_d_.resize(0); - while (representation_min_range <= max_representation) { diff --git a/cudamapper/src/overlapper_triggered.cu b/cudamapper/src/overlapper_triggered.cu index 8a1d6eaab..ea92f7aa3 100644 --- a/cudamapper/src/overlapper_triggered.cu +++ b/cudamapper/src/overlapper_triggered.cu @@ -184,10 +184,10 @@ void OverlapperTriggered::get_overlaps(std::vector& fused_overlaps, { CGA_NVTX_RANGE(profiler, "OverlapperTriggered::get_overlaps"); - const auto& read_names = index.read_id_to_read_name(); - const auto& read_lengths = index.read_id_to_read_length(); - auto tail_length_for_chain = 3; - auto n_anchors = d_anchors.size(); + const auto& read_names = index.read_id_to_read_name(); + const auto& read_lengths = index.read_id_to_read_length(); + const auto tail_length_for_chain = 3; + auto n_anchors = d_anchors.size(); // comparison operator - lambda used to compare Anchors in sort auto comp = [] __host__ __device__(const Anchor& i, const Anchor& j) -> bool { @@ -215,7 +215,7 @@ void OverlapperTriggered::get_overlaps(std::vector& fused_overlaps, // >>>>>>>>> // d_start_anchor[i] contains the starting anchor of chain i - thrust::device_vector d_start_anchor(d_anchors.size()); + thrust::device_vector d_start_anchor(n_anchors); // d_chain_length[i] contains the length of chain i thrust::device_vector d_chain_length(n_anchors); From 34549a18a348680d056fc3fd97c1a52a56711732 Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Tue, 19 Nov 2019 11:43:45 +0100 Subject: [PATCH 078/128] [cudamapper] Addressed mimaric comments on #216 --- cudamapper/src/matcher_gpu.cu | 67 ++++++++++++++++++++++++------- cudamapper/src/matcher_gpu.cuh | 72 +++++++++++++++++++++++++++++++--- 2 files changed, 119 insertions(+), 20 deletions(-) diff --git a/cudamapper/src/matcher_gpu.cu b/cudamapper/src/matcher_gpu.cu index 06478c188..57601c5d3 100644 --- a/cudamapper/src/matcher_gpu.cu +++ b/cudamapper/src/matcher_gpu.cu @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -23,7 +24,8 @@ namespace template __device__ RandomAccessIterator lower_bound(RandomAccessIterator lower_bound, RandomAccessIterator upper_bound, ValueType query) { - while (upper_bound - lower_bound > 0) + assert(upper_bound >= lower_bound); + while (upper_bound > lower_bound) { RandomAccessIterator mid = lower_bound + (upper_bound - lower_bound) / 2; const auto mid_value = *mid; @@ -38,7 +40,8 @@ __device__ RandomAccessIterator lower_bound(RandomAccessIterator lower_bound, Ra template __device__ RandomAccessIterator upper_bound(RandomAccessIterator lower_bound, RandomAccessIterator upper_bound, ValueType query) { - while (upper_bound - lower_bound > 0) + assert(upper_bound >= lower_bound); + while (upper_bound > lower_bound) { RandomAccessIterator mid = lower_bound + (upper_bound - lower_bound) / 2; const auto mid_value = *mid; @@ -65,15 +68,36 @@ MatcherGPU::MatcherGPU(const IndexTwoIndices& query_index, if (query_index.number_of_reads() == 0 || target_index.number_of_reads() == 0) return; + // We need to compute a set of anchors between the query and the target. + // An anchor is a combination of a query (read_id, position) and + // target {read_id, position} with the same representation. + // The set of anchors of a matching query and target representation + // is the all-to-all combination of the corresponding set of {(read_id, position)} + // of the query with the set of {(read_id, position)} of the target. + // + // We compute the anchors for each unique representation of the query index. + // The array index of the following data structures will correspond to the array index of the + // unique representation in the query index. + thrust::device_vector found_target_indices_d(query_index.unique_representations().size()); thrust::device_vector anchor_starting_indices_d(query_index.unique_representations().size()); + + // First we search for each unique representation of the query index, the array index + // of the same representation in the array of unique representations of target index + // (or -1 if representation is not found). details::matcher_gpu::find_query_target_matches(found_target_indices_d, query_index.unique_representations(), target_index.unique_representations()); + + // For each unique representation of the query index compute the number of corrsponding anchors + // and store the resulting starting index in an anchors array if all anchors are stored in a flat array. + // The last element will be the total number of anchors. details::matcher_gpu::compute_anchor_starting_indices(anchor_starting_indices_d, query_index.first_occurrence_of_representations(), found_target_indices_d, target_index.first_occurrence_of_representations()); const int64_t n_anchors = anchor_starting_indices_d.back(); // D->H transfer anchors_d_.resize(n_anchors); + // Generate the anchors + // by computing the all-to-all combinations of the matching representations in query and target details::matcher_gpu::generate_anchors(anchors_d_, anchor_starting_indices_d, query_index.first_occurrence_of_representations(), @@ -149,17 +173,18 @@ __global__ void find_query_target_matches_kernel(int64_t* const found_target_ind found_target_indices[i] = found_target_index; } -void generate_anchors(thrust::device_vector& anchors, - const thrust::device_vector& anchor_starting_indices, - const thrust::device_vector& query_starting_index_of_each_representation_d, - const thrust::device_vector& found_target_indices_d, - const thrust::device_vector& target_starting_index_of_each_representation_d, - const thrust::device_vector& query_read_ids, - const thrust::device_vector& query_positions_in_read, - const thrust::device_vector& target_read_ids, - const thrust::device_vector& target_positions_in_read) +void generate_anchors( + thrust::device_vector& anchors, + const thrust::device_vector& anchor_starting_indices_d, + const thrust::device_vector& query_starting_index_of_each_representation_d, + const thrust::device_vector& found_target_indices_d, + const thrust::device_vector& target_starting_index_of_each_representation_d, + const thrust::device_vector& query_read_ids, + const thrust::device_vector& query_positions_in_read, + const thrust::device_vector& target_read_ids, + const thrust::device_vector& target_positions_in_read) { - assert(anchor_starting_indices.size() + 1 == query_starting_index_of_each_representation_d.size()); + assert(anchor_starting_indices_d.size() + 1 == query_starting_index_of_each_representation_d.size()); assert(found_target_indices_d.size() + 1 == query_starting_index_of_each_representation_d.size()); assert(query_read_ids.size() == query_positions_in_read.size()); assert(target_read_ids.size() == target_positions_in_read.size()); @@ -169,7 +194,7 @@ void generate_anchors(thrust::device_vector& anchors, generate_anchors_kernel<<>>( anchors.data().get(), get_size(anchors), - anchor_starting_indices.data().get(), + anchor_starting_indices_d.data().get(), query_starting_index_of_each_representation_d.data().get(), found_target_indices_d.data().get(), get_size(found_target_indices_d), @@ -182,30 +207,36 @@ void generate_anchors(thrust::device_vector& anchors, __global__ void generate_anchors_kernel( Anchor* const anchors_d, - int64_t n_anchors, + const int64_t n_anchors, const int64_t* const anchor_starting_index_d, const std::uint32_t* const query_starting_index_of_each_representation_d, const std::int64_t* const found_target_indices_d, - int32_t n_query_representations, + const int32_t n_query_representations, const std::uint32_t* const target_starting_index_of_each_representation_d, const read_id_t* const query_read_ids, const position_in_read_t* const query_positions_in_read, const read_id_t* const target_read_ids, const position_in_read_t* const target_positions_in_read) { + // Fill the anchor_d array. Each thread generates one anchor. std::int64_t anchor_idx = blockIdx.x * blockDim.x + threadIdx.x; if (anchor_idx >= n_anchors) return; + // Figure out for which representation this thread should compute the anchor. + // We only need the index in the unique representation array of the query index + // not the representation itself. const std::int64_t representation_idx = upper_bound(anchor_starting_index_d, anchor_starting_index_d + n_query_representations, anchor_idx) - anchor_starting_index_d; assert(representation_idx < n_query_representations); + // Compute the index of the anchor within only this representation. std::uint32_t relative_anchor_index = anchor_idx; if (representation_idx > 0) relative_anchor_index -= anchor_starting_index_d[representation_idx - 1]; + // Get the ranges within the query and target index with this representation. const std::int64_t j = found_target_indices_d[representation_idx]; assert(j >= 0); const std::uint32_t query_begin = query_starting_index_of_each_representation_d[representation_idx]; @@ -213,11 +244,17 @@ __global__ void generate_anchors_kernel( const std::uint32_t target_end = target_starting_index_of_each_representation_d[j + 1]; const std::uint32_t n_targets = target_end - target_begin; + + // Overall we want to do an all-to-all (n*m) matching between the query and target entries + // with the same representation. + // Compute the exact combination query and target index entry for which + // we generate the anchor in this thread. const std::uint32_t query_idx = query_begin + relative_anchor_index / n_targets; const std::uint32_t target_idx = target_begin + relative_anchor_index % n_targets; assert(query_idx < query_starting_index_of_each_representation_d[representation_idx + 1]); + // Generate and store the anchor Anchor a; a.query_read_id_ = query_read_ids[query_idx]; a.target_read_id_ = target_read_ids[target_idx]; diff --git a/cudamapper/src/matcher_gpu.cuh b/cudamapper/src/matcher_gpu.cuh index 3c274f766..33f956e4c 100644 --- a/cudamapper/src/matcher_gpu.cuh +++ b/cudamapper/src/matcher_gpu.cuh @@ -104,9 +104,56 @@ void compute_anchor_starting_indices(thrust::device_vector& anchor const thrust::device_vector& found_target_indices_d, const thrust::device_vector& target_starting_index_of_each_representation_d); -///TODO +/// \brief Generates an array of anchors from matches of representations of the query and target index +/// +/// Fills the array of anchors with anchors of matches between the query and target index by using the +/// anchor_starting_indices for each unique representation of the query index. +/// The anchor_starting_indices can be computed by compute_anchor_starting_indices and the size of the +/// anchors array must match the last element of anchor_starting_indices. +/// +/// For example: +/// (see also compute_anchor_starting_indices() ) +/// anchor_starting_indices: +/// query: +/// (representation: 0 12 23 32 46) +/// starting index: 0 4 10 13 18 21 +/// target: +/// (representation: 5 12 16 23 24 25 46) +/// starting index: 0 3 7 9 13 16 18 21 +/// matching representations are 12, 23, 46 +/// +/// (query representation: 0 12 23 32 46) +/// array-index: 0 1 2 3 4 +/// found_target_indices_d: -1 1 3 -1 6 (-1 indicates no matching representation in target) +/// anchor_starting_indices_d: 0 24 36 36 45 +/// +/// query: +/// read_ids (arbitrary data): 0 1 2 3 4 5 6 7 8 9 10 ... 21 +/// positions_in_read (arbitrary data): 0 10 20 30 40 50 60 70 80 90 100 ... 210 +/// target: +/// read_ids (arbitrary data): 60 61 62 63 64 65 66 67 68 69 70 ... 81 +/// positions_in_read (arbitrary data): 0 11 22 33 44 55 66 77 88 99 110 ... 231 +/// +/// anchors: +/// all-to-all combinations of representations 12, 23, 46: +/// format: +/// representation: anchors (query_read_id, query_position, target_read_id, target_position) +/// 12: (4,40,63,33), (4,40,64,44), ..., (4,40,66,66), (5,50,63,33), ..., (5,50,66,66), ..., ..., (9,90,66,66) -- 24 elements in total +/// 23: (10,100,69,99), (10,100,70,110), ..., (10,100,72,132), (11,110,69,99), ..., ..., (12,120,72,132) -- 12 elements in total +/// 46: (18,180,78,198), ..., ..., (20,200,80,220) -- 9 elements in total +/// +/// +/// \param anchors the array to be filled with anchors, the size of this array has to be equal to the last element of anchor_starting_indices +/// \param anchor_starting_indices_d the array of starting indices of the set of anchors for each unique representation of the query index (representations with no match in target will have the same starting index as the last matching representation) +/// \param query_starting_index_of_each_representation_d the starting index of a representation in query_read_ids and query_positions_in_read +/// \param found_target_indices_d the found matches in the array of unique target representation for each unique representation of query index +/// \param target_starting_index_of_each_representation_d the starting index of a representation in target_read_ids and target_positions_in_read +/// \param query_read_ids the array of read ids of the (read id, position)-pairs in query index +/// \param query_positions_in_read the array of positions of the (read id, position)-pairs in query index +/// \param target_read_ids the array of read ids of the (read id, position)-pairs in target index +/// \param target_positions_in_read the array of positions of the (read id, position)-pairs in target index void generate_anchors(thrust::device_vector& anchors, - const thrust::device_vector& anchor_starting_indices, + const thrust::device_vector& anchor_starting_indices_d, const thrust::device_vector& query_starting_index_of_each_representation_d, const thrust::device_vector& found_target_indices_d, const thrust::device_vector& target_starting_index_of_each_representation_d, @@ -114,6 +161,7 @@ void generate_anchors(thrust::device_vector& anchors, const thrust::device_vector& query_positions_in_read, const thrust::device_vector& target_read_ids, const thrust::device_vector& target_positions_in_read); + /// \brief Performs a binary search on target_representations_d for each element of query_representations_d and stores the found index (or -1 iff not found) in found_target_indices. /// /// For example: @@ -136,14 +184,28 @@ void generate_anchors(thrust::device_vector& anchors, /// \param n_target_representations size of \param target_representations_d __global__ void find_query_target_matches_kernel(int64_t* const found_target_indices_d, const representation_t* const query_representations_d, const int64_t n_query_representations, const representation_t* const target_representations_d, const int64_t n_target_representations); -/// TODO +/// \brief Generates an array of anchors from matches of representations of the query and target index +/// +/// see generate_anchors() +/// +/// \param anchors the array to be filled with anchors, the size of this array has to be equal to the last element of anchor_starting_indices +/// \param n_anchors the size of the anchors array +/// \param anchor_starting_indices_d the array of starting indices of the set of anchors for each unique representation of the query index (representations with no match in target will have the same starting index as the last matching representation) +/// \param query_starting_index_of_each_representation_d the starting index of a representation in query_read_ids and query_positions_in_read +/// \param found_target_indices_d the found matches in the array of unique target representation for each unique representation of query index +/// \param target_starting_index_of_each_representation_d the starting index of a representation in target_read_ids and target_positions_in_read +/// \param n_query_representations the size of the query_starting_index_of_each_representation_d and found_target_indices_d arrays, ie. the number of unique representations in the query index +/// \param query_read_ids the array of read ids of the (read id, position)-pairs in query index +/// \param query_positions_in_read the array of positions of the (read id, position)-pairs in query index +/// \param target_read_ids the array of read ids of the (read id, position)-pairs in target index +/// \param target_positions_in_read the array of positions of the (read id, position)-pairs in target index __global__ void generate_anchors_kernel( Anchor* const anchors_d, - int64_t n_anchors, + const int64_t n_anchors, const int64_t* const anchor_starting_index_d, const std::uint32_t* const query_starting_index_of_each_representation_d, const std::int64_t* const found_target_indices_d, - int32_t n_query_representations, + const int32_t n_query_representations, const std::uint32_t* const target_starting_index_of_each_representation_d, const read_id_t* const query_read_ids, const position_in_read_t* const query_positions_in_read, From 53803f756f523480f7e5bd49f42b208f6f5b7d79 Mon Sep 17 00:00:00 2001 From: Mike Vella Date: Tue, 19 Nov 2019 11:23:17 +0000 Subject: [PATCH 079/128] Added test for minimizers with hashing enabled --- cudamapper/tests/Test_CudamapperMinimizer.cpp | 137 +++++++++++++++++- 1 file changed, 130 insertions(+), 7 deletions(-) diff --git a/cudamapper/tests/Test_CudamapperMinimizer.cpp b/cudamapper/tests/Test_CudamapperMinimizer.cpp index 26e7478f6..85ef5e489 100644 --- a/cudamapper/tests/Test_CudamapperMinimizer.cpp +++ b/cudamapper/tests/Test_CudamapperMinimizer.cpp @@ -23,7 +23,8 @@ void test_function(const std::uint64_t number_of_reads_to_add, const std::vector& merged_basepairs_h, const std::vector& read_id_to_basepairs_section_h, const std::vector& expected_representations_h, - const std::vector& expected_rest_h) + const std::vector& expected_rest_h, + const bool hash_minimizers) { device_buffer merged_basepairs_d(merged_basepairs_h.size()); CGA_CU_CHECK_ERR(cudaMemcpy(merged_basepairs_d.data(), @@ -44,7 +45,7 @@ void test_function(const std::uint64_t number_of_reads_to_add, merged_basepairs_d, read_id_to_basepairs_section_h, read_id_to_basepairs_section_d, - false); + hash_minimizers); device_buffer representations_d = std::move(sketch_elements.representations_d); std::vector representations_h(representations_d.size()); @@ -97,7 +98,24 @@ TEST(TestCudamappperMinimizer, GATT_4_1) merged_basepairs_h, read_id_to_basepairs_section_h, expected_representations_h, - expected_rest_h); + expected_rest_h, + false); + + // Test with minimizer hashing enabled + std::vector expected_representations_hashed_h; + expected_representations_hashed_h.push_back(304626093); + std::vector expected_rest_hashed_h; + expected_rest_hashed_h.push_back({0, 0, 0}); + + test_function(number_of_reads_to_add, + minimizer_size, + window_size, + read_id_of_first_read, + merged_basepairs_h, + read_id_to_basepairs_section_h, + expected_representations_hashed_h, + expected_rest_hashed_h, + true); } TEST(TestCudamappperMinimizer, GATT_2_3) @@ -145,7 +163,29 @@ TEST(TestCudamappperMinimizer, GATT_2_3) merged_basepairs_h, read_id_to_basepairs_section_h, expected_representations_h, - expected_rest_h); + expected_rest_h, + false); + + + // Test with minimizer hashing enabled + std::vector expected_representations_hashed_h; + expected_representations_hashed_h.push_back(1023180699); + expected_representations_hashed_h.push_back(2797583197); + expected_representations_hashed_h.push_back(3255840626); + std::vector expected_rest_hashed_h; + expected_rest_hashed_h.push_back({0, 0, 0}); + expected_rest_hashed_h.push_back({0, 1, 0}); + expected_rest_hashed_h.push_back({0, 2, 0}); + + test_function(number_of_reads_to_add, + minimizer_size, + window_size, + read_id_of_first_read, + merged_basepairs_h, + read_id_to_basepairs_section_h, + expected_representations_hashed_h, + expected_rest_hashed_h, + true); } TEST(TestCudamappperMinimizer, CCCATACC_2_7) @@ -212,7 +252,31 @@ TEST(TestCudamappperMinimizer, CCCATACC_2_7) merged_basepairs_h, read_id_to_basepairs_section_h, expected_representations_h, - expected_rest_h); + expected_rest_h, + false); + + // Test with minimizer hashing enabled + std::vector expected_representations_hashed_h; + expected_representations_hashed_h.push_back(2515151312); + expected_representations_hashed_h.push_back(2515151312); + expected_representations_hashed_h.push_back(1582582417); + expected_representations_hashed_h.push_back(2515151312); + + std::vector expected_rest_hashed_h; + expected_rest_hashed_h.push_back({0, 0, 0}); + expected_rest_hashed_h.push_back({0, 1, 0}); + expected_rest_hashed_h.push_back({0, 2, 0}); + expected_rest_hashed_h.push_back({0, 6, 0}); + + test_function(number_of_reads_to_add, + minimizer_size, + window_size, + read_id_of_first_read, + merged_basepairs_h, + read_id_to_basepairs_section_h, + expected_representations_hashed_h, + expected_rest_hashed_h, + true); } TEST(TestCudamappperMinimizer, CATCAAG_AAGCTA_3_2) @@ -294,7 +358,36 @@ TEST(TestCudamappperMinimizer, CATCAAG_AAGCTA_3_2) merged_basepairs_h, read_id_to_basepairs_section_h, expected_representations_h, - expected_rest_h); + expected_rest_h, + false); + + // Test with minimizer hashing enabled + std::vector expected_representations_hashed_h; + expected_representations_hashed_h.push_back(549100223); + expected_representations_hashed_h.push_back(447855090); + expected_representations_hashed_h.push_back(1279515286); + expected_representations_hashed_h.push_back(1865025060); + expected_representations_hashed_h.push_back(1865025060); + expected_representations_hashed_h.push_back(4103259927); + expected_representations_hashed_h.push_back(357458314); + std::vector expected_rest_hashed_h; + expected_rest_hashed_h.push_back({0, 0, 0}); + expected_rest_hashed_h.push_back({0, 1, 1}); + expected_rest_hashed_h.push_back({0, 2, 0}); + expected_rest_hashed_h.push_back({0, 4, 0}); + expected_rest_hashed_h.push_back({1, 0, 0}); + expected_rest_hashed_h.push_back({1, 2, 1}); + expected_rest_hashed_h.push_back({1, 3, 0}); + + test_function(number_of_reads_to_add, + minimizer_size, + window_size, + read_id_of_first_read, + merged_basepairs_h, + read_id_to_basepairs_section_h, + expected_representations_hashed_h, + expected_rest_hashed_h, + true); } TEST(TestCudamappperMinimizer, CATCAAG_AAGCTA_3_2_read_id_offset_5) @@ -376,7 +469,37 @@ TEST(TestCudamappperMinimizer, CATCAAG_AAGCTA_3_2_read_id_offset_5) merged_basepairs_h, read_id_to_basepairs_section_h, expected_representations_h, - expected_rest_h); + expected_rest_h, + false); + + std::vector expected_representations_hashed_h; + expected_representations_hashed_h.push_back(549100223); + expected_representations_hashed_h.push_back(447855090); + expected_representations_hashed_h.push_back(1279515286); + expected_representations_hashed_h.push_back(1865025060); + expected_representations_hashed_h.push_back(1865025060); + expected_representations_hashed_h.push_back(4103259927); + expected_representations_hashed_h.push_back(357458314); + std::vector expected_rest_hashed_h; + expected_rest_hashed_h.push_back({5, 0, 0}); + expected_rest_hashed_h.push_back({5, 1, 1}); + expected_rest_hashed_h.push_back({5, 2, 0}); + expected_rest_hashed_h.push_back({5, 4, 0}); + expected_rest_hashed_h.push_back({6, 0, 0}); + expected_rest_hashed_h.push_back({6, 2, 1}); + expected_rest_hashed_h.push_back({6, 3, 0}); + + test_function(number_of_reads_to_add, + minimizer_size, + window_size, + read_id_of_first_read, + merged_basepairs_h, + read_id_to_basepairs_section_h, + expected_representations_hashed_h, + expected_rest_hashed_h, + true); + // Test with minimizer hashing enabled + } } // namespace cudamapper } // namespace claragenomics From 8a45f8c8c11518d35454200ddd607d4ceacf8603 Mon Sep 17 00:00:00 2001 From: Mike Vella Date: Tue, 19 Nov 2019 11:47:41 +0000 Subject: [PATCH 080/128] Running make format --- cudamapper/tests/Test_CudamapperMinimizer.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/cudamapper/tests/Test_CudamapperMinimizer.cpp b/cudamapper/tests/Test_CudamapperMinimizer.cpp index 85ef5e489..5a69c21e0 100644 --- a/cudamapper/tests/Test_CudamapperMinimizer.cpp +++ b/cudamapper/tests/Test_CudamapperMinimizer.cpp @@ -166,7 +166,6 @@ TEST(TestCudamappperMinimizer, GATT_2_3) expected_rest_h, false); - // Test with minimizer hashing enabled std::vector expected_representations_hashed_h; expected_representations_hashed_h.push_back(1023180699); @@ -499,7 +498,6 @@ TEST(TestCudamappperMinimizer, CATCAAG_AAGCTA_3_2_read_id_offset_5) expected_rest_hashed_h, true); // Test with minimizer hashing enabled - } } // namespace cudamapper } // namespace claragenomics From 4aac945c3de8d6235ad080fb1e34dba531ee343f Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Tue, 19 Nov 2019 12:53:16 +0100 Subject: [PATCH 081/128] [cudamapper] some formatting improvements --- cudamapper/src/matcher_gpu.cu | 23 +++++++++++++------ cudamapper/src/matcher_gpu.cuh | 42 +++++++++++++++++++++------------- 2 files changed, 42 insertions(+), 23 deletions(-) diff --git a/cudamapper/src/matcher_gpu.cu b/cudamapper/src/matcher_gpu.cu index 57601c5d3..33d457a6b 100644 --- a/cudamapper/src/matcher_gpu.cu +++ b/cudamapper/src/matcher_gpu.cu @@ -120,7 +120,10 @@ namespace details namespace matcher_gpu { -void find_query_target_matches(thrust::device_vector& found_target_indices_d, const thrust::device_vector& query_representations_d, const thrust::device_vector& target_representations_d) +void find_query_target_matches( + thrust::device_vector& found_target_indices_d, + const thrust::device_vector& query_representations_d, + const thrust::device_vector& target_representations_d) { assert(found_target_indices_d.size() == query_representations_d.size()); @@ -130,10 +133,11 @@ void find_query_target_matches(thrust::device_vector& found_target find_query_target_matches_kernel<<>>(found_target_indices_d.data().get(), query_representations_d.data().get(), get_size(query_representations_d), target_representations_d.data().get(), get_size(target_representations_d)); } -void compute_anchor_starting_indices(thrust::device_vector& anchor_starting_indices_d, - const thrust::device_vector& query_starting_index_of_each_representation_d, - const thrust::device_vector& found_target_indices_d, - const thrust::device_vector& target_starting_index_of_each_representation_d) +void compute_anchor_starting_indices( + thrust::device_vector& anchor_starting_indices_d, + const thrust::device_vector& query_starting_index_of_each_representation_d, + const thrust::device_vector& found_target_indices_d, + const thrust::device_vector& target_starting_index_of_each_representation_d) { assert(query_starting_index_of_each_representation_d.size() == found_target_indices_d.size() + 1); assert(anchor_starting_indices_d.size() == found_target_indices_d.size()); @@ -157,7 +161,12 @@ void compute_anchor_starting_indices(thrust::device_vector& anchor thrust::plus()); } -__global__ void find_query_target_matches_kernel(int64_t* const found_target_indices, const representation_t* const query_representations_d, const int64_t n_query_representations, const representation_t* const target_representations_d, const int64_t n_target_representations) +__global__ void find_query_target_matches_kernel( + int64_t* const found_target_indices, + const representation_t* const query_representations_d, + const int64_t n_query_representations, + const representation_t* const target_representations_d, + const int64_t n_target_representations) { const int64_t i = blockIdx.x * blockDim.x + threadIdx.x; @@ -243,7 +252,7 @@ __global__ void generate_anchors_kernel( const std::uint32_t target_begin = target_starting_index_of_each_representation_d[j]; const std::uint32_t target_end = target_starting_index_of_each_representation_d[j + 1]; - const std::uint32_t n_targets = target_end - target_begin; + const std::uint32_t n_targets = target_end - target_begin; // Overall we want to do an all-to-all (n*m) matching between the query and target entries // with the same representation. diff --git a/cudamapper/src/matcher_gpu.cuh b/cudamapper/src/matcher_gpu.cuh index 33f956e4c..5540af00a 100644 --- a/cudamapper/src/matcher_gpu.cuh +++ b/cudamapper/src/matcher_gpu.cuh @@ -65,7 +65,10 @@ namespace matcher_gpu /// \param found_target_indices_d The array which will filled with the resulting target indices. This array has to be of same size as query_representations_d. /// \param query_representations_d An array of query representations /// \param target_representations_d An sorted array of target representations -void find_query_target_matches(thrust::device_vector& found_target_indices_d, const thrust::device_vector& query_representations_d, const thrust::device_vector& target_representations_d); +void find_query_target_matches( + thrust::device_vector& found_target_indices_d, + const thrust::device_vector& query_representations_d, + const thrust::device_vector& target_representations_d); /// \brief Computes the starting indices for an array of anchors based on the matches in query and target arrays. /// @@ -99,10 +102,11 @@ void find_query_target_matches(thrust::device_vector& found_target /// \param query_starting_index_of_each_representation_d /// \param found_target_indices_d /// \param target_starting_index_of_each_representation_d -void compute_anchor_starting_indices(thrust::device_vector& anchor_starting_indices_d, - const thrust::device_vector& query_starting_index_of_each_representation_d, - const thrust::device_vector& found_target_indices_d, - const thrust::device_vector& target_starting_index_of_each_representation_d); +void compute_anchor_starting_indices( + thrust::device_vector& anchor_starting_indices_d, + const thrust::device_vector& query_starting_index_of_each_representation_d, + const thrust::device_vector& found_target_indices_d, + const thrust::device_vector& target_starting_index_of_each_representation_d); /// \brief Generates an array of anchors from matches of representations of the query and target index /// @@ -110,7 +114,7 @@ void compute_anchor_starting_indices(thrust::device_vector& anchor /// anchor_starting_indices for each unique representation of the query index. /// The anchor_starting_indices can be computed by compute_anchor_starting_indices and the size of the /// anchors array must match the last element of anchor_starting_indices. -/// +/// /// For example: /// (see also compute_anchor_starting_indices() ) /// anchor_starting_indices: @@ -152,15 +156,16 @@ void compute_anchor_starting_indices(thrust::device_vector& anchor /// \param query_positions_in_read the array of positions of the (read id, position)-pairs in query index /// \param target_read_ids the array of read ids of the (read id, position)-pairs in target index /// \param target_positions_in_read the array of positions of the (read id, position)-pairs in target index -void generate_anchors(thrust::device_vector& anchors, - const thrust::device_vector& anchor_starting_indices_d, - const thrust::device_vector& query_starting_index_of_each_representation_d, - const thrust::device_vector& found_target_indices_d, - const thrust::device_vector& target_starting_index_of_each_representation_d, - const thrust::device_vector& query_read_ids, - const thrust::device_vector& query_positions_in_read, - const thrust::device_vector& target_read_ids, - const thrust::device_vector& target_positions_in_read); +void generate_anchors( + thrust::device_vector& anchors, + const thrust::device_vector& anchor_starting_indices_d, + const thrust::device_vector& query_starting_index_of_each_representation_d, + const thrust::device_vector& found_target_indices_d, + const thrust::device_vector& target_starting_index_of_each_representation_d, + const thrust::device_vector& query_read_ids, + const thrust::device_vector& query_positions_in_read, + const thrust::device_vector& target_read_ids, + const thrust::device_vector& target_positions_in_read); /// \brief Performs a binary search on target_representations_d for each element of query_representations_d and stores the found index (or -1 iff not found) in found_target_indices. /// @@ -182,7 +187,12 @@ void generate_anchors(thrust::device_vector& anchors, /// \param n_query_representations size of \param query_representations_d and \param found_target_indices_d /// \param target_representations_d the array of targets to be searched /// \param n_target_representations size of \param target_representations_d -__global__ void find_query_target_matches_kernel(int64_t* const found_target_indices_d, const representation_t* const query_representations_d, const int64_t n_query_representations, const representation_t* const target_representations_d, const int64_t n_target_representations); +__global__ void find_query_target_matches_kernel( + int64_t* const found_target_indices_d, + const representation_t* const query_representations_d, + const int64_t n_query_representations, + const representation_t* const target_representations_d, + const int64_t n_target_representations); /// \brief Generates an array of anchors from matches of representations of the query and target index /// From 0a47ae7f4df8c086fb8ea7a1cae60b2d202118e5 Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Tue, 19 Nov 2019 13:25:11 +0100 Subject: [PATCH 082/128] Testing unique_representation in test_find_first_occurrences_of_representations --- .../Test_CudamapperIndexGPUTwoIndices.cu | 28 +++++++++++++++---- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu index e2c79b6d8..67aa019c3 100644 --- a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu +++ b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu @@ -885,7 +885,8 @@ namespace index_gpu_two_indices // ************ Test find_first_occurrences_of_representations ************** void test_find_first_occurrences_of_representations(const thrust::host_vector& representations_h, - const thrust::host_vector& expected_starting_index_of_each_representation_h) + const thrust::host_vector& expected_starting_index_of_each_representation_h, + const thrust::host_vector& expected_unique_representations_h) { const thrust::device_vector representations_d(representations_h); @@ -896,13 +897,18 @@ void test_find_first_occurrences_of_representations(const thrust::host_vector starting_index_of_each_representation_h(starting_index_of_each_representation_d); + const thrust::host_vector unique_representations_h(unique_representations_d); ASSERT_EQ(starting_index_of_each_representation_h.size(), expected_starting_index_of_each_representation_h.size()); + ASSERT_EQ(unique_representations_h.size(), expected_unique_representations_h.size()); + ASSERT_EQ(starting_index_of_each_representation_h.size(), unique_representations_h.size() + 1); // starting_index_of_each_representation_h has an additional element for the past-the-end element - for (std::size_t i = 0; i < expected_starting_index_of_each_representation_h.size(); ++i) + for (std::size_t i = 0; i < unique_representations_h.size(); ++i) { EXPECT_EQ(starting_index_of_each_representation_h[i], expected_starting_index_of_each_representation_h[i]) << "index: " << i; + EXPECT_EQ(unique_representations_h[i], expected_unique_representations_h[i]) << "index: " << i; } + EXPECT_EQ(starting_index_of_each_representation_h.back(), expected_starting_index_of_each_representation_h.back()) << "index: " << expected_starting_index_of_each_representation_h.size() - 1; } TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representations_small_example) @@ -915,14 +921,17 @@ TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representa /// 0 4 10 13 18 21 thrust::host_vector representations_h; - thrust::device_vector expected_starting_index_of_each_representation_h; + thrust::host_vector expected_starting_index_of_each_representation_h; + thrust::host_vector expected_unique_representations_h; representations_h.push_back(0); expected_starting_index_of_each_representation_h.push_back(0); + expected_unique_representations_h.push_back(0); representations_h.push_back(0); representations_h.push_back(0); representations_h.push_back(0); representations_h.push_back(12); expected_starting_index_of_each_representation_h.push_back(4); + expected_unique_representations_h.push_back(12); representations_h.push_back(12); representations_h.push_back(12); representations_h.push_back(12); @@ -930,22 +939,26 @@ TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representa representations_h.push_back(12); representations_h.push_back(23); expected_starting_index_of_each_representation_h.push_back(10); + expected_unique_representations_h.push_back(23); representations_h.push_back(23); representations_h.push_back(23); representations_h.push_back(32); expected_starting_index_of_each_representation_h.push_back(13); + expected_unique_representations_h.push_back(32); representations_h.push_back(32); representations_h.push_back(32); representations_h.push_back(32); representations_h.push_back(32); representations_h.push_back(46); expected_starting_index_of_each_representation_h.push_back(18); + expected_unique_representations_h.push_back(46); representations_h.push_back(46); representations_h.push_back(46); expected_starting_index_of_each_representation_h.push_back(21); test_find_first_occurrences_of_representations(representations_h, - expected_starting_index_of_each_representation_h); + expected_starting_index_of_each_representation_h, + expected_unique_representations_h); } TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representations_large_example) @@ -954,7 +967,8 @@ TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representa const std::uint32_t sketch_elements_with_same_representation = 1000; thrust::host_vector representations_h; - thrust::device_vector expected_starting_index_of_each_representation_h; + thrust::host_vector expected_starting_index_of_each_representation_h; + thrust::host_vector expected_unique_representations_h; for (std::size_t i = 0; i < total_sketch_elements; ++i) { @@ -962,12 +976,14 @@ TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representa if (i % sketch_elements_with_same_representation == 0) { expected_starting_index_of_each_representation_h.push_back(i); + expected_unique_representations_h.push_back(i / sketch_elements_with_same_representation); } } expected_starting_index_of_each_representation_h.push_back(total_sketch_elements); test_find_first_occurrences_of_representations(representations_h, - expected_starting_index_of_each_representation_h); + expected_starting_index_of_each_representation_h, + expected_unique_representations_h); } // ************ Test create_new_value_mask ************** From 50e00640b1e3b536e97cd9adf6fb77196f460c49 Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Tue, 19 Nov 2019 14:10:55 +0100 Subject: [PATCH 083/128] Better check of sizes in test_find_first_occurrences_of_representations_kernel --- cudamapper/src/index_gpu_two_indices.cu | 2 +- cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/cudamapper/src/index_gpu_two_indices.cu b/cudamapper/src/index_gpu_two_indices.cu index 98a453ea8..0e1a4ddcc 100644 --- a/cudamapper/src/index_gpu_two_indices.cu +++ b/cudamapper/src/index_gpu_two_indices.cu @@ -53,7 +53,7 @@ void find_first_occurrences_of_representations(thrust::device_vector input_representations_d(input_representations_h); ASSERT_EQ(expected_starting_index_of_each_representation_h.size(), representation_index_mask_h.back()); ASSERT_EQ(expected_unique_representations_h.size(), representation_index_mask_h.back()); - thrust::device_vector starting_index_of_each_representation_d(expected_starting_index_of_each_representation_h.size()); - thrust::device_vector unique_representations_d(expected_starting_index_of_each_representation_h.size()); + + const std::uint64_t number_of_unique_representations = representation_index_mask_h.back(); + ASSERT_EQ(expected_starting_index_of_each_representation_h.size(), number_of_unique_representations); + ASSERT_EQ(expected_unique_representations_h.size(), number_of_unique_representations); + thrust::device_vector starting_index_of_each_representation_d(number_of_unique_representations); + thrust::device_vector unique_representations_d(number_of_unique_representations); std::uint32_t number_of_blocks = (representation_index_mask_d.size() - 1) / number_of_threads + 1; From 04a22795ffbadb2597e1c0f5eae7507ae6287c11 Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Tue, 19 Nov 2019 14:16:41 +0100 Subject: [PATCH 084/128] Clarifying the meaning of window_size and kmer_size --- .../include/claragenomics/cudamapper/index_two_indices.hpp | 2 +- cudamapper/src/index_gpu_two_indices.cuh | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp b/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp index 1fcf41e42..e79cadcef 100644 --- a/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp +++ b/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp @@ -77,7 +77,7 @@ class IndexTwoIndices /// \param first_read_id read_id of the first read to the included in this index /// \param past_the_last_read_id read_id+1 of the last read to be included in this index /// \param kmer_size k - the kmer length - /// \param window_size w - the length of the sliding window used to find sketch elements + /// \param window_size w - the length of the sliding window used to find sketch elements (i.e. the number of adjacent k-mers in a window, adjacent = shifted by one basepair) /// \return instance of IndexTwoIndices static std::unique_ptr create_index(const io::FastaParser& parser, diff --git a/cudamapper/src/index_gpu_two_indices.cuh b/cudamapper/src/index_gpu_two_indices.cuh index 947a4bc90..ec3559419 100644 --- a/cudamapper/src/index_gpu_two_indices.cuh +++ b/cudamapper/src/index_gpu_two_indices.cuh @@ -51,7 +51,7 @@ public: /// \param first_read_id read_id of the first read to the included in this index /// \param past_the_last_read_id read_id+1 of the last read to be included in this index /// \param kmer_size k - the kmer length - /// \param window_size w - the length of the sliding window used to find sketch elements + /// \param window_size w - the length of the sliding window used to find sketch elements (i.e. the number of adjacent k-mers in a window, adjacent = shifted by one basepair) IndexGPUTwoIndices(const io::FastaParser& parser, const read_id_t first_read_id, const read_id_t past_the_last_read_id, @@ -117,7 +117,9 @@ private: std::vector read_id_to_read_length_; const read_id_t first_read_id_ = 0; + // number of basepairs in a k-mer const std::uint64_t kmer_size_ = 0; + // the number of adjacent k-mers in a window, adjacent = shifted by one basepair const std::uint64_t window_size_ = 0; std::uint64_t number_of_reads_ = 0; }; From bfaa617b81f0f09e23483265857f5d9bb8786aeb Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Tue, 19 Nov 2019 14:45:49 +0100 Subject: [PATCH 085/128] Removed default constructor for IndexTwoIndices and IndexGPUTwoIndices --- .../include/claragenomics/cudamapper/index_two_indices.hpp | 4 ---- cudamapper/src/index_gpu_two_indices.cuh | 7 ++----- cudamapper/src/index_two_indices.cu | 5 +---- 3 files changed, 3 insertions(+), 13 deletions(-) diff --git a/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp b/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp index e79cadcef..44b9eb99b 100644 --- a/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp +++ b/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp @@ -85,10 +85,6 @@ class IndexTwoIndices const read_id_t past_the_last_read_id, const std::uint64_t kmer_size, const std::uint64_t window_size); - - /// \brief creates an empty IndexTwoIndices - /// \return empty instacne of IndexTwoIndices - static std::unique_ptr create_index(); }; /// \} diff --git a/cudamapper/src/index_gpu_two_indices.cuh b/cudamapper/src/index_gpu_two_indices.cuh index ec3559419..8b46d3d7d 100644 --- a/cudamapper/src/index_gpu_two_indices.cuh +++ b/cudamapper/src/index_gpu_two_indices.cuh @@ -58,9 +58,6 @@ public: const std::uint64_t kmer_size, const std::uint64_t window_size); - /// \brief Constructor - IndexGPUTwoIndices() = default; - /// \brief returns an array of representations of sketch elements /// \return an array of representations of sketch elements const thrust::device_vector& representations() const override; @@ -116,9 +113,9 @@ private: std::vector read_id_to_read_name_; std::vector read_id_to_read_length_; - const read_id_t first_read_id_ = 0; + const read_id_t first_read_id_ = 0; // number of basepairs in a k-mer - const std::uint64_t kmer_size_ = 0; + const std::uint64_t kmer_size_ = 0; // the number of adjacent k-mers in a window, adjacent = shifted by one basepair const std::uint64_t window_size_ = 0; std::uint64_t number_of_reads_ = 0; diff --git a/cudamapper/src/index_two_indices.cu b/cudamapper/src/index_two_indices.cu index 5125b626e..418e55f93 100644 --- a/cudamapper/src/index_two_indices.cu +++ b/cudamapper/src/index_two_indices.cu @@ -17,6 +17,7 @@ namespace claragenomics { namespace cudamapper { + std::unique_ptr IndexTwoIndices::create_index(const io::FastaParser& parser, const read_id_t first_read_id, const read_id_t past_the_last_read_id, @@ -31,9 +32,5 @@ std::unique_ptr IndexTwoIndices::create_index(const io::FastaPa window_size); } -std::unique_ptr IndexTwoIndices::create_index() -{ - return std::make_unique>(); -} } // namespace cudamapper } // namespace claragenomics From 0acc0ec548ba3aba34352e1be46926106871db3b Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Tue, 19 Nov 2019 14:53:55 +0100 Subject: [PATCH 086/128] Rearranged tests in Test_CudamapperIndexGPUTwoIndices.cu --- .../Test_CudamapperIndexGPUTwoIndices.cu | 2079 +++++++++-------- 1 file changed, 1040 insertions(+), 1039 deletions(-) diff --git a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu index 383454625..aa0a4ae5c 100644 --- a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu +++ b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu @@ -24,388 +24,567 @@ namespace claragenomics namespace cudamapper { -void test_function(const std::string& filename, - const read_id_t first_read_id, - const read_id_t past_the_last_read_id, - const std::uint64_t kmer_size, - const std::uint64_t window_size, - const std::vector& expected_representations, - const std::vector& expected_positions_in_reads, - const std::vector& expected_read_ids, - const std::vector& expected_directions_of_reads, - const std::vector& expected_unique_representations, - const std::vector& expected_first_occurrence_of_representations, - const std::vector& expected_read_id_to_read_name, - const std::vector& expected_read_id_to_read_length, - const std::uint64_t expected_number_of_reads) +namespace details +{ +namespace index_gpu_two_indices { - std::unique_ptr parser = io::create_fasta_parser(filename); - IndexGPUTwoIndices index(*parser, - first_read_id, - past_the_last_read_id, - kmer_size, - window_size); - ASSERT_EQ(index.number_of_reads(), expected_number_of_reads); - if (0 == expected_number_of_reads) - { - return; - } +// ************ Test find_first_occurrences_of_representations_kernel ************** - ASSERT_EQ(expected_number_of_reads, expected_read_id_to_read_name.size()); - ASSERT_EQ(expected_number_of_reads, expected_read_id_to_read_length.size()); - for (read_id_t read_id = first_read_id; read_id < past_the_last_read_id; ++read_id) - { - ASSERT_EQ(index.read_id_to_read_length(read_id), expected_read_id_to_read_length[read_id - first_read_id]) << "read_id: " << read_id; - ASSERT_EQ(index.read_id_to_read_name(read_id), expected_read_id_to_read_name[read_id - first_read_id]) << "read_id: " << read_id; - } +void test_find_first_occurrences_of_representations_kernel(const thrust::host_vector& representation_index_mask_h, + const thrust::host_vector& input_representations_h, + const thrust::host_vector& expected_starting_index_of_each_representation_h, + const thrust::host_vector& expected_unique_representations_h, + const std::uint32_t number_of_threads) +{ + const thrust::device_vector representation_index_mask_d(representation_index_mask_h); + const thrust::device_vector input_representations_d(input_representations_h); + ASSERT_EQ(expected_starting_index_of_each_representation_h.size(), representation_index_mask_h.back()); + ASSERT_EQ(expected_unique_representations_h.size(), representation_index_mask_h.back()); - // check arrays - const thrust::device_vector& representations_d = index.representations(); - const thrust::device_vector& positions_in_reads_d = index.positions_in_reads(); - const thrust::device_vector& read_ids_d = index.read_ids(); - const thrust::device_vector& directions_of_reads_d = index.directions_of_reads(); - const thrust::host_vector& representations_h(representations_d); - const thrust::host_vector& positions_in_reads_h(positions_in_reads_d); - const thrust::host_vector& read_ids_h(read_ids_d); - const thrust::host_vector& directions_of_reads_h(directions_of_reads_d); - ASSERT_EQ(representations_h.size(), expected_representations.size()); - ASSERT_EQ(positions_in_reads_h.size(), expected_positions_in_reads.size()); - ASSERT_EQ(read_ids_h.size(), expected_read_ids.size()); - ASSERT_EQ(directions_of_reads_h.size(), expected_directions_of_reads.size()); - ASSERT_EQ(representations_h.size(), positions_in_reads_h.size()); - ASSERT_EQ(positions_in_reads_h.size(), read_ids_h.size()); - ASSERT_EQ(read_ids_h.size(), directions_of_reads_h.size()); - for (std::size_t i = 0; i < expected_positions_in_reads.size(); ++i) - { - EXPECT_EQ(representations_h[i], expected_representations[i]) << "i: " << i; - EXPECT_EQ(positions_in_reads_h[i], expected_positions_in_reads[i]) << "i: " << i; - EXPECT_EQ(read_ids_h[i], expected_read_ids[i]) << "i: " << i; - EXPECT_EQ(directions_of_reads_h[i], expected_directions_of_reads[i]) << "i: " << i; - } + const std::uint64_t number_of_unique_representations = representation_index_mask_h.back(); + ASSERT_EQ(expected_starting_index_of_each_representation_h.size(), number_of_unique_representations); + ASSERT_EQ(expected_unique_representations_h.size(), number_of_unique_representations); + thrust::device_vector starting_index_of_each_representation_d(number_of_unique_representations); + thrust::device_vector unique_representations_d(number_of_unique_representations); - const thrust::device_vector unique_representations_d = index.unique_representations(); - const thrust::device_vector first_occurrence_of_representations_d = index.first_occurrence_of_representations(); + std::uint32_t number_of_blocks = (representation_index_mask_d.size() - 1) / number_of_threads + 1; + + find_first_occurrences_of_representations_kernel<<>>(representation_index_mask_d.data().get(), + input_representations_d.data().get(), + representation_index_mask_d.size(), + starting_index_of_each_representation_d.data().get(), + unique_representations_d.data().get()); + + const thrust::host_vector starting_index_of_each_representation_h(starting_index_of_each_representation_d); const thrust::host_vector unique_representations_h(unique_representations_d); - const thrust::host_vector first_occurrence_of_representations_h(first_occurrence_of_representations_d); - ASSERT_EQ(expected_unique_representations.size() + 1, expected_first_occurrence_of_representations.size()); - ASSERT_EQ(unique_representations_h.size(), expected_unique_representations.size()); - ASSERT_EQ(first_occurrence_of_representations_h.size(), expected_first_occurrence_of_representations.size()); - for (std::size_t i = 0; i < expected_unique_representations.size(); ++i) + + ASSERT_EQ(starting_index_of_each_representation_h.size(), expected_starting_index_of_each_representation_h.size()); + ASSERT_EQ(unique_representations_h.size(), expected_unique_representations_h.size()); + for (std::size_t i = 0; i < expected_starting_index_of_each_representation_h.size(); ++i) { - EXPECT_EQ(expected_unique_representations[i], unique_representations_h[i]) << "index: " << i; - EXPECT_EQ(expected_first_occurrence_of_representations[i], first_occurrence_of_representations_h[i]) << "index: " << i; + EXPECT_EQ(starting_index_of_each_representation_h[i], expected_starting_index_of_each_representation_h[i]) << "index: " << i; + EXPECT_EQ(unique_representations_h[i], expected_unique_representations_h[i]) << "index: " << i; } - EXPECT_EQ(expected_first_occurrence_of_representations.back(), expected_representations.size()); } -TEST(TestCudamapperIndexGPUTwoIndices, GATT_4_1) +TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representations_kernel_small_example) { - // >read_0 - // GATT - - // GATT = 0b10001111 - // AATC = 0b00001101 <- minimizer - - const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/gatt.fasta"; - const std::uint64_t minimizer_size = 4; - const std::uint64_t window_size = 1; + thrust::host_vector representation_index_mask_h; + thrust::host_vector input_representations_h; + thrust::host_vector expected_starting_index_of_each_representation_h; + thrust::host_vector expected_unique_representations_h; + representation_index_mask_h.push_back(1); + input_representations_h.push_back(10); + expected_starting_index_of_each_representation_h.push_back(0); + expected_unique_representations_h.push_back(10); + representation_index_mask_h.push_back(1); + input_representations_h.push_back(10); + representation_index_mask_h.push_back(1); + input_representations_h.push_back(10); + representation_index_mask_h.push_back(1); + input_representations_h.push_back(10); + // + representation_index_mask_h.push_back(2); + input_representations_h.push_back(20); + expected_starting_index_of_each_representation_h.push_back(4); + expected_unique_representations_h.push_back(20); + // + representation_index_mask_h.push_back(3); + input_representations_h.push_back(30); + expected_starting_index_of_each_representation_h.push_back(5); + expected_unique_representations_h.push_back(30); + representation_index_mask_h.push_back(3); + input_representations_h.push_back(30); + representation_index_mask_h.push_back(3); + input_representations_h.push_back(30); + representation_index_mask_h.push_back(3); + input_representations_h.push_back(30); + // + representation_index_mask_h.push_back(4); + input_representations_h.push_back(40); + expected_starting_index_of_each_representation_h.push_back(9); + expected_unique_representations_h.push_back(40); + representation_index_mask_h.push_back(4); + input_representations_h.push_back(40); + representation_index_mask_h.push_back(4); + input_representations_h.push_back(40); + // + representation_index_mask_h.push_back(5); + input_representations_h.push_back(50); + expected_starting_index_of_each_representation_h.push_back(12); + expected_unique_representations_h.push_back(50); + // + representation_index_mask_h.push_back(6); + input_representations_h.push_back(60); + expected_starting_index_of_each_representation_h.push_back(13); + expected_unique_representations_h.push_back(60); - std::vector expected_read_id_to_read_name; - expected_read_id_to_read_name.push_back("read_0"); + std::uint32_t number_of_threads = 3; - std::vector expected_read_id_to_read_length; - expected_read_id_to_read_length.push_back(4); + test_find_first_occurrences_of_representations_kernel(representation_index_mask_h, + input_representations_h, + expected_starting_index_of_each_representation_h, + expected_unique_representations_h, + number_of_threads); +} - std::vector expected_representations; - std::vector expected_positions_in_reads; - std::vector expected_read_ids; - std::vector expected_directions_of_reads; - std::vector expected_unique_representations; - std::vector expected_first_occurrence_of_representations; +TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representations_kernel_large_example) +{ + const std::uint64_t total_sketch_elements = 10000000; + const std::uint32_t sketch_elements_with_same_representation = 1000; - expected_representations.push_back(0b1101); - expected_positions_in_reads.push_back(0); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); - expected_unique_representations.push_back(0b1101); - expected_first_occurrence_of_representations.push_back(0); + thrust::host_vector representation_index_mask_h; + thrust::host_vector input_representations_h; + thrust::host_vector expected_starting_index_of_each_representation_h; + thrust::host_vector expected_unique_representations_h; + for (std::size_t i = 0; i < total_sketch_elements; ++i) + { + representation_index_mask_h.push_back(i / sketch_elements_with_same_representation + 1); + input_representations_h.push_back(representation_index_mask_h.back() * 10); + if (i % sketch_elements_with_same_representation == 0) + { + expected_starting_index_of_each_representation_h.push_back(i); + expected_unique_representations_h.push_back(input_representations_h.back()); + } + } - expected_first_occurrence_of_representations.push_back(1); + std::uint32_t number_of_threads = 256; - test_function(filename, - 0, - 1, - minimizer_size, - window_size, - expected_representations, - expected_positions_in_reads, - expected_read_ids, - expected_directions_of_reads, - expected_unique_representations, - expected_first_occurrence_of_representations, - expected_read_id_to_read_name, - expected_read_id_to_read_length, - 1); + test_find_first_occurrences_of_representations_kernel(representation_index_mask_h, + input_representations_h, + expected_starting_index_of_each_representation_h, + expected_unique_representations_h, + number_of_threads); } -TEST(TestCudamapperIndexGPUTwoIndices, GATT_2_3) -{ - // >read_0 - // GATT - - // kmer representation: forward, reverse - // GA: <20> 31 - // AT: <03> 03 - // TT: 33 <00> +// ************ Test find_first_occurrences_of_representations ************** - // front end minimizers: representation, position_in_read, direction, read_id - // GA : 20 0 F 0 - // GAT: 03 1 F 0 +void test_find_first_occurrences_of_representations(const thrust::host_vector& representations_h, + const thrust::host_vector& expected_starting_index_of_each_representation_h, + const thrust::host_vector& expected_unique_representations_h) +{ + const thrust::device_vector representations_d(representations_h); - // central minimizers - // GATT: 00 2 R 0 + thrust::device_vector starting_index_of_each_representation_d; + thrust::device_vector unique_representations_d; + find_first_occurrences_of_representations(unique_representations_d, + starting_index_of_each_representation_d, + representations_d); - // back end minimizers - // ATT: 00 2 R 0 - // TT : 00 2 R 0 + const thrust::host_vector starting_index_of_each_representation_h(starting_index_of_each_representation_d); + const thrust::host_vector unique_representations_h(unique_representations_d); - // All minimizers: GA(0f), AT(1f), AA(2r) + ASSERT_EQ(starting_index_of_each_representation_h.size(), expected_starting_index_of_each_representation_h.size()); + ASSERT_EQ(unique_representations_h.size(), expected_unique_representations_h.size()); + ASSERT_EQ(starting_index_of_each_representation_h.size(), unique_representations_h.size() + 1); // starting_index_of_each_representation_h has an additional element for the past-the-end element - // (2r1) means position 2, reverse direction, read 1 - // (1,2) means array block start at element 1 and has 2 elements - - // 0 1 2 - // data arrays: GA(0f0), AT(1f0), AA(2r0) + for (std::size_t i = 0; i < unique_representations_h.size(); ++i) + { + EXPECT_EQ(starting_index_of_each_representation_h[i], expected_starting_index_of_each_representation_h[i]) << "index: " << i; + EXPECT_EQ(unique_representations_h[i], expected_unique_representations_h[i]) << "index: " << i; + } + EXPECT_EQ(starting_index_of_each_representation_h.back(), expected_starting_index_of_each_representation_h.back()) << "index: " << expected_starting_index_of_each_representation_h.size() - 1; +} - const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/gatt.fasta"; - const std::uint64_t minimizer_size = 2; - const std::uint64_t window_size = 3; +TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representations_small_example) +{ + /// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 + /// 0 0 0 0 12 12 12 12 12 12 23 23 23 32 32 32 32 32 46 46 46 + /// 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 + /// 1 1 1 1 2 2 2 2 2 2 3 3 3 4 4 4 4 4 5 5 5 + /// ^ ^ ^ ^ ^ ^ + /// 0 4 10 13 18 21 - std::vector expected_read_id_to_read_name; - expected_read_id_to_read_name.push_back("read_0"); + thrust::host_vector representations_h; + thrust::host_vector expected_starting_index_of_each_representation_h; + thrust::host_vector expected_unique_representations_h; + representations_h.push_back(0); + expected_starting_index_of_each_representation_h.push_back(0); + expected_unique_representations_h.push_back(0); + representations_h.push_back(0); + representations_h.push_back(0); + representations_h.push_back(0); + representations_h.push_back(12); + expected_starting_index_of_each_representation_h.push_back(4); + expected_unique_representations_h.push_back(12); + representations_h.push_back(12); + representations_h.push_back(12); + representations_h.push_back(12); + representations_h.push_back(12); + representations_h.push_back(12); + representations_h.push_back(23); + expected_starting_index_of_each_representation_h.push_back(10); + expected_unique_representations_h.push_back(23); + representations_h.push_back(23); + representations_h.push_back(23); + representations_h.push_back(32); + expected_starting_index_of_each_representation_h.push_back(13); + expected_unique_representations_h.push_back(32); + representations_h.push_back(32); + representations_h.push_back(32); + representations_h.push_back(32); + representations_h.push_back(32); + representations_h.push_back(46); + expected_starting_index_of_each_representation_h.push_back(18); + expected_unique_representations_h.push_back(46); + representations_h.push_back(46); + representations_h.push_back(46); + expected_starting_index_of_each_representation_h.push_back(21); - std::vector expected_read_id_to_read_length; - expected_read_id_to_read_length.push_back(4); + test_find_first_occurrences_of_representations(representations_h, + expected_starting_index_of_each_representation_h, + expected_unique_representations_h); +} - std::vector expected_representations; - std::vector expected_positions_in_reads; - std::vector expected_read_ids; - std::vector expected_directions_of_reads; - std::vector expected_unique_representations; - std::vector expected_first_occurrence_of_representations; +TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representations_large_example) +{ + const std::uint64_t total_sketch_elements = 10000000; + const std::uint32_t sketch_elements_with_same_representation = 1000; - expected_representations.push_back(0b0000); // AA(2r0) - expected_positions_in_reads.push_back(2); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); - expected_unique_representations.push_back(0b0000); - expected_first_occurrence_of_representations.push_back(0); - expected_representations.push_back(0b0011); // AT(1f0) - expected_positions_in_reads.push_back(1); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_unique_representations.push_back(0b0011); - expected_first_occurrence_of_representations.push_back(1); - expected_representations.push_back(0b1000); // GA(0f0) - expected_positions_in_reads.push_back(0); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_unique_representations.push_back(0b1000); - expected_first_occurrence_of_representations.push_back(2); + thrust::host_vector representations_h; + thrust::host_vector expected_starting_index_of_each_representation_h; + thrust::host_vector expected_unique_representations_h; - expected_first_occurrence_of_representations.push_back(3); + for (std::size_t i = 0; i < total_sketch_elements; ++i) + { + representations_h.push_back(i / sketch_elements_with_same_representation); + if (i % sketch_elements_with_same_representation == 0) + { + expected_starting_index_of_each_representation_h.push_back(i); + expected_unique_representations_h.push_back(i / sketch_elements_with_same_representation); + } + } + expected_starting_index_of_each_representation_h.push_back(total_sketch_elements); - test_function(filename, - 0, - 1, - minimizer_size, - window_size, - expected_representations, - expected_positions_in_reads, - expected_read_ids, - expected_directions_of_reads, - expected_unique_representations, - expected_first_occurrence_of_representations, - expected_read_id_to_read_name, - expected_read_id_to_read_length, - 1); + test_find_first_occurrences_of_representations(representations_h, + expected_starting_index_of_each_representation_h, + expected_unique_representations_h); } -TEST(TestCudamapperIndexGPUTwoIndices, CCCATACC_2_8) +// ************ Test create_new_value_mask ************** + +void test_create_new_value_mask(const thrust::host_vector& representations_h, + const thrust::host_vector& expected_new_value_mask_h, + std::uint32_t number_of_threads) { - // *** Read is shorter than one full window, the result should be empty *** + const thrust::device_vector representations_d(representations_h); + thrust::device_vector new_value_mask_d(representations_h.size()); - // >read_0 - // CCCATACC + std::uint32_t number_of_blocks = (representations_h.size() - 1) / number_of_threads + 1; - const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/cccatacc.fasta"; - const std::uint64_t minimizer_size = 2; - const std::uint64_t window_size = 8; + create_new_value_mask<<>>(thrust::raw_pointer_cast(representations_d.data()), + representations_d.size(), + thrust::raw_pointer_cast(new_value_mask_d.data())); - // all data arrays should be empty + const thrust::host_vector new_value_mask_h(new_value_mask_d); - std::vector expected_read_id_to_read_name; + ASSERT_EQ(new_value_mask_h.size(), expected_new_value_mask_h.size()); + for (std::size_t i = 0; i < expected_new_value_mask_h.size(); ++i) + { + EXPECT_EQ(new_value_mask_h[i], expected_new_value_mask_h[i]) << "index: " << i; + } +} - std::vector expected_read_id_to_read_length; +TEST(TestCudamapperIndexGPUTwoIndices, test_create_new_value_mask_small_example) +{ + thrust::host_vector representations_h; + thrust::host_vector expected_new_value_mask_h; + representations_h.push_back(0); + expected_new_value_mask_h.push_back(1); + representations_h.push_back(0); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(0); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(0); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(0); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(3); + expected_new_value_mask_h.push_back(1); + representations_h.push_back(3); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(3); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(4); + expected_new_value_mask_h.push_back(1); + representations_h.push_back(5); + expected_new_value_mask_h.push_back(1); + representations_h.push_back(5); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(8); + expected_new_value_mask_h.push_back(1); + representations_h.push_back(8); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(8); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(9); + expected_new_value_mask_h.push_back(1); + representations_h.push_back(9); + expected_new_value_mask_h.push_back(0); + representations_h.push_back(9); + expected_new_value_mask_h.push_back(0); - std::vector expected_representations; - std::vector expected_positions_in_reads; - std::vector expected_read_ids; - std::vector expected_directions_of_reads; - std::vector expected_unique_representations; - std::vector expected_first_occurrence_of_representations; + std::uint32_t number_of_threads = 3; - test_function(filename, - 0, - 1, - minimizer_size, - window_size, - expected_representations, - expected_positions_in_reads, - expected_read_ids, - expected_directions_of_reads, - expected_unique_representations, - expected_first_occurrence_of_representations, - expected_read_id_to_read_name, - expected_read_id_to_read_length, - 0); + test_create_new_value_mask(representations_h, + expected_new_value_mask_h, + number_of_threads); } -// TODO: Cover this case as well -//TEST(TestCudamapperIndexGPUTwoIndices, CATCAAG_AAGCTA_3_5) -//{ -// // *** One Read is shorter than one full window, the other is not *** -// -// // >read_0 -// // CATCAAG -// // >read_1 -// // AAGCTA -// -// // ** CATCAAG ** -// -// // kmer representation: forward, reverse -// // CAT: 103 <032> -// // ATC: <031> 203 -// // TCA: <310> 320 -// // CAA: <100> 332 -// // AAG: <002> 133 -// -// // front end minimizers: representation, position_in_read, direction, read_id -// // CAT : 032 0 R 0 -// // CATC : 031 1 F 0 -// // CATCA : 031 1 F 0 -// // CATCAA: 031 1 F 0 -// -// // central minimizers -// // CATCAAG: 002 4 F 0 -// -// // back end minimizers -// // ATCAAG: 002 4 F 0 -// // TCAAG : 002 4 F 0 -// // CAAG : 002 4 F 0 -// // AAG : 002 4 F 0 -// -// // ** AAGCTA ** -// // ** read does not fit one array ** -// -// // All minimizers: ATG(0r0), ATC(1f0), AAG(4f0) -// -// // (2r1) means position 2, reverse direction, read 1 -// // (1,2) means array block start at element 1 and has 2 elements -// -// // 0 1 2 -// // data arrays: AAG(4f0), ATC(1f0), ATG(0r0) -// -// const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/catcaag_aagcta.fasta"; -// const std::uint64_t minimizer_size = 3; -// const std::uint64_t window_size = 5; -// -// std::vector expected_read_id_to_read_name; -// expected_read_id_to_read_name.push_back("read_0"); -// -// std::vector expected_read_id_to_read_length; -// expected_read_id_to_read_length.push_back(7); -// -// std::vector expected_representations; -// std::vector expected_positions_in_reads; -// std::vector expected_read_ids; -// std::vector expected_directions_of_reads; -// expected_representations.push_back(0b000010); // AAG(4f0) -// expected_positions_in_reads.push_back(4); -// expected_read_ids.push_back(0); -// expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); -// expected_representations.push_back(0b001101); // ATC(1f0) -// expected_positions_in_reads.push_back(1); -// expected_read_ids.push_back(0); -// expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); -// expected_representations.push_back(0b001110); // ATG(0r0) -// expected_positions_in_reads.push_back(0); -// expected_read_ids.push_back(0); -// expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); -// -// test_function(filename, -// 0, -// 2, -// minimizer_size, -// window_size, -// expected_representations, -// expected_positions_in_reads, -// expected_read_ids, -// expected_directions_of_reads, -// expected_read_id_to_read_name, -// expected_read_id_to_read_length, -// 1); // <- only one read goes into index, the other is too short -//} +TEST(TestCudamapperIndexGPUTwoIndices, test_create_new_value_mask_small_data_large_example) +{ + const std::uint64_t total_sketch_elements = 10000000; + const std::uint32_t sketch_elements_with_same_representation = 1000; -TEST(TestCudamapperIndexGPUTwoIndices, CCCATACC_3_5) + thrust::host_vector representations_h; + thrust::host_vector expected_new_value_mask_h; + for (std::size_t i = 0; i < total_sketch_elements; ++i) + { + representations_h.push_back(i / sketch_elements_with_same_representation); + if (i % sketch_elements_with_same_representation == 0) + expected_new_value_mask_h.push_back(1); + else + expected_new_value_mask_h.push_back(0); + } + + std::uint32_t number_of_threads = 256; + + test_create_new_value_mask(representations_h, + expected_new_value_mask_h, + number_of_threads); +} + +// ************ Test copy_rest_to_separate_arrays ************** + +template +void test_function_copy_rest_to_separate_arrays(const thrust::host_vector& rest_h, + const thrust::host_vector& expected_read_ids_h, + const thrust::host_vector& expected_positions_in_reads_h, + const thrust::host_vector& expected_directions_of_reads_h, + const std::uint32_t threads) { - // >read_0 - // CCCATACC + ASSERT_EQ(rest_h.size(), expected_read_ids_h.size()); + ASSERT_EQ(rest_h.size(), expected_positions_in_reads_h.size()); + ASSERT_EQ(rest_h.size(), expected_directions_of_reads_h.size()); + thrust::device_vector generated_read_ids_d(rest_h.size()); + thrust::device_vector generated_positions_in_reads_d(rest_h.size()); + thrust::device_vector generated_directions_of_reads_d(rest_h.size()); - // ** CCCATAC ** + const thrust::device_vector rest_d(rest_h); - // kmer representation: forward, reverse - // CCC: <111> 222 - // CCA: <110> 322 - // CAT: 103 <032> - // ATA: <030> 303 - // TAC: 301 <230> - // ACC: <011> 223 + const std::uint32_t blocks = ceiling_divide(rest_h.size(), threads); - // front end minimizers: representation, position_in_read, direction - // CCC : 111 0 F - // CCCA : 110 1 F - // CCCAT : 032 2 R - // CCCATA: 030 3 F + copy_rest_to_separate_arrays<<>>(rest_d.data().get(), + generated_read_ids_d.data().get(), + generated_positions_in_reads_d.data().get(), + generated_directions_of_reads_d.data().get(), + rest_h.size()); - // central minimizers - // CCCATAC: 030 3 F - // CCATACC: 011 5 F + const thrust::host_vector& generated_read_ids_h(generated_read_ids_d); + const thrust::host_vector& generated_positions_in_reads_h(generated_positions_in_reads_d); + const thrust::host_vector& generated_directions_of_reads_h(generated_directions_of_reads_d); - // back end minimizers - // CATACC: 011 5 F - // ATACC : 011 5 F - // TACC : 011 5 F - // ACC : 011 5 F + for (std::size_t i = 0; i < rest_h.size(); ++i) + { + EXPECT_EQ(generated_read_ids_h[i], expected_read_ids_h[i]); + EXPECT_EQ(generated_positions_in_reads_h[i], expected_positions_in_reads_h[i]); + EXPECT_EQ(generated_directions_of_reads_h[i], expected_directions_of_reads_h[i]); + } +} - // All minimizers: CCC(0f), CCA(1f), ATG(2r), ATA(3f), ACC(5f) +TEST(TestCudamapperIndexGPUTwoIndices, test_function_copy_rest_to_separate_arrays) +{ + thrust::host_vector rest_h; + thrust::host_vector expected_read_ids_h; + thrust::host_vector expected_positions_in_reads_h; + thrust::host_vector expected_directions_of_reads_h; + + rest_h.push_back({5, 8, 0}); + expected_read_ids_h.push_back(5); + expected_positions_in_reads_h.push_back(8); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({15, 6, 0}); + expected_read_ids_h.push_back(15); + expected_positions_in_reads_h.push_back(6); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({2, 4, 1}); + expected_read_ids_h.push_back(2); + expected_positions_in_reads_h.push_back(4); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); + rest_h.push_back({18, 15, 0}); + expected_read_ids_h.push_back(18); + expected_positions_in_reads_h.push_back(15); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({6, 4, 1}); + expected_read_ids_h.push_back(6); + expected_positions_in_reads_h.push_back(4); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); + rest_h.push_back({6, 3, 1}); + expected_read_ids_h.push_back(6); + expected_positions_in_reads_h.push_back(3); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); + rest_h.push_back({89, 45, 0}); + expected_read_ids_h.push_back(89); + expected_positions_in_reads_h.push_back(45); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({547, 25, 0}); + expected_read_ids_h.push_back(547); + expected_positions_in_reads_h.push_back(25); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({14, 16, 1}); + expected_read_ids_h.push_back(14); + expected_positions_in_reads_h.push_back(16); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); + rest_h.push_back({18, 16, 0}); + expected_read_ids_h.push_back(18); + expected_positions_in_reads_h.push_back(16); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({45, 44, 0}); + expected_read_ids_h.push_back(45); + expected_positions_in_reads_h.push_back(44); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({65, 45, 1}); + expected_read_ids_h.push_back(65); + expected_positions_in_reads_h.push_back(45); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); + rest_h.push_back({15, 20, 0}); + expected_read_ids_h.push_back(15); + expected_positions_in_reads_h.push_back(20); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({45, 654, 1}); + expected_read_ids_h.push_back(45); + expected_positions_in_reads_h.push_back(654); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); + rest_h.push_back({782, 216, 0}); + expected_read_ids_h.push_back(782); + expected_positions_in_reads_h.push_back(216); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({255, 245, 1}); + expected_read_ids_h.push_back(255); + expected_positions_in_reads_h.push_back(245); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); + rest_h.push_back({346, 579, 0}); + expected_read_ids_h.push_back(346); + expected_positions_in_reads_h.push_back(579); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({12, 8, 0}); + expected_read_ids_h.push_back(12); + expected_positions_in_reads_h.push_back(8); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + rest_h.push_back({65, 42, 1}); + expected_read_ids_h.push_back(65); + expected_positions_in_reads_h.push_back(42); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); + rest_h.push_back({566, 42, 0}); + expected_read_ids_h.push_back(566); + expected_positions_in_reads_h.push_back(42); + expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + + const std::uint32_t threads = 8; + + test_function_copy_rest_to_separate_arrays(rest_h, + expected_read_ids_h, + expected_positions_in_reads_h, + expected_directions_of_reads_h, + threads); +} + +} // namespace index_gpu_two_indices +} // namespace details + +void test_function(const std::string& filename, + const read_id_t first_read_id, + const read_id_t past_the_last_read_id, + const std::uint64_t kmer_size, + const std::uint64_t window_size, + const std::vector& expected_representations, + const std::vector& expected_positions_in_reads, + const std::vector& expected_read_ids, + const std::vector& expected_directions_of_reads, + const std::vector& expected_unique_representations, + const std::vector& expected_first_occurrence_of_representations, + const std::vector& expected_read_id_to_read_name, + const std::vector& expected_read_id_to_read_length, + const std::uint64_t expected_number_of_reads) +{ + std::unique_ptr parser = io::create_fasta_parser(filename); + IndexGPUTwoIndices index(*parser, + first_read_id, + past_the_last_read_id, + kmer_size, + window_size); + + ASSERT_EQ(index.number_of_reads(), expected_number_of_reads); + if (0 == expected_number_of_reads) + { + return; + } + + ASSERT_EQ(expected_number_of_reads, expected_read_id_to_read_name.size()); + ASSERT_EQ(expected_number_of_reads, expected_read_id_to_read_length.size()); + for (read_id_t read_id = first_read_id; read_id < past_the_last_read_id; ++read_id) + { + ASSERT_EQ(index.read_id_to_read_length(read_id), expected_read_id_to_read_length[read_id - first_read_id]) << "read_id: " << read_id; + ASSERT_EQ(index.read_id_to_read_name(read_id), expected_read_id_to_read_name[read_id - first_read_id]) << "read_id: " << read_id; + } + + // check arrays + const thrust::device_vector& representations_d = index.representations(); + const thrust::device_vector& positions_in_reads_d = index.positions_in_reads(); + const thrust::device_vector& read_ids_d = index.read_ids(); + const thrust::device_vector& directions_of_reads_d = index.directions_of_reads(); + const thrust::host_vector& representations_h(representations_d); + const thrust::host_vector& positions_in_reads_h(positions_in_reads_d); + const thrust::host_vector& read_ids_h(read_ids_d); + const thrust::host_vector& directions_of_reads_h(directions_of_reads_d); + ASSERT_EQ(representations_h.size(), expected_representations.size()); + ASSERT_EQ(positions_in_reads_h.size(), expected_positions_in_reads.size()); + ASSERT_EQ(read_ids_h.size(), expected_read_ids.size()); + ASSERT_EQ(directions_of_reads_h.size(), expected_directions_of_reads.size()); + ASSERT_EQ(representations_h.size(), positions_in_reads_h.size()); + ASSERT_EQ(positions_in_reads_h.size(), read_ids_h.size()); + ASSERT_EQ(read_ids_h.size(), directions_of_reads_h.size()); + for (std::size_t i = 0; i < expected_positions_in_reads.size(); ++i) + { + EXPECT_EQ(representations_h[i], expected_representations[i]) << "i: " << i; + EXPECT_EQ(positions_in_reads_h[i], expected_positions_in_reads[i]) << "i: " << i; + EXPECT_EQ(read_ids_h[i], expected_read_ids[i]) << "i: " << i; + EXPECT_EQ(directions_of_reads_h[i], expected_directions_of_reads[i]) << "i: " << i; + } + + const thrust::device_vector unique_representations_d = index.unique_representations(); + const thrust::device_vector first_occurrence_of_representations_d = index.first_occurrence_of_representations(); + const thrust::host_vector unique_representations_h(unique_representations_d); + const thrust::host_vector first_occurrence_of_representations_h(first_occurrence_of_representations_d); + ASSERT_EQ(expected_unique_representations.size() + 1, expected_first_occurrence_of_representations.size()); + ASSERT_EQ(unique_representations_h.size(), expected_unique_representations.size()); + ASSERT_EQ(first_occurrence_of_representations_h.size(), expected_first_occurrence_of_representations.size()); + for (std::size_t i = 0; i < expected_unique_representations.size(); ++i) + { + EXPECT_EQ(expected_unique_representations[i], unique_representations_h[i]) << "index: " << i; + EXPECT_EQ(expected_first_occurrence_of_representations[i], first_occurrence_of_representations_h[i]) << "index: " << i; + } + EXPECT_EQ(expected_first_occurrence_of_representations.back(), expected_representations.size()); +} - // (2r1) means position 2, reverse direction, read 1 - // (1,2) means array block start at element 1 and has 2 elements +TEST(TestCudamapperIndexGPUTwoIndices, GATT_4_1) +{ + // >read_0 + // GATT - // 0 1 2 - // data arrays: ACC(5f0), ATA(3f0), ATG(2r0), CCA(1f0), CCC(0f0) + // GATT = 0b10001111 + // AATC = 0b00001101 <- minimizer - const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/cccatacc.fasta"; - const std::uint64_t minimizer_size = 3; - const std::uint64_t window_size = 5; + const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/gatt.fasta"; + const std::uint64_t minimizer_size = 4; + const std::uint64_t window_size = 1; std::vector expected_read_id_to_read_name; expected_read_id_to_read_name.push_back("read_0"); std::vector expected_read_id_to_read_length; - expected_read_id_to_read_length.push_back(8); + expected_read_id_to_read_length.push_back(4); std::vector expected_representations; std::vector expected_positions_in_reads; @@ -414,38 +593,14 @@ TEST(TestCudamapperIndexGPUTwoIndices, CCCATACC_3_5) std::vector expected_unique_representations; std::vector expected_first_occurrence_of_representations; - expected_representations.push_back(0b000101); // ACC(5f0) - expected_positions_in_reads.push_back(5); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_unique_representations.push_back(0b000101); - expected_first_occurrence_of_representations.push_back(0); - expected_representations.push_back(0b001100); // ATA(3f0) - expected_positions_in_reads.push_back(3); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_unique_representations.push_back(0b001100); - expected_first_occurrence_of_representations.push_back(1); - expected_representations.push_back(0b001110); // ATG(2r0) - expected_positions_in_reads.push_back(2); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); - expected_unique_representations.push_back(0b001110); - expected_first_occurrence_of_representations.push_back(2); - expected_representations.push_back(0b010100); // CCA(1f0) - expected_positions_in_reads.push_back(1); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_unique_representations.push_back(0b010100); - expected_first_occurrence_of_representations.push_back(3); - expected_representations.push_back(0b010101); // CCC(0f0) + expected_representations.push_back(0b1101); expected_positions_in_reads.push_back(0); expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_unique_representations.push_back(0b010101); - expected_first_occurrence_of_representations.push_back(4); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); + expected_unique_representations.push_back(0b1101); + expected_first_occurrence_of_representations.push_back(0); - expected_first_occurrence_of_representations.push_back(5); + expected_first_occurrence_of_representations.push_back(1); test_function(filename, 0, @@ -463,74 +618,44 @@ TEST(TestCudamapperIndexGPUTwoIndices, CCCATACC_3_5) 1); } -TEST(TestCudamapperIndexGPUTwoIndices, CATCAAG_AAGCTA_3_2) +TEST(TestCudamapperIndexGPUTwoIndices, GATT_2_3) { // >read_0 - // CATCAAG - // >read_1 - // AAGCTA - - // ** CATCAAG ** - - // kmer representation: forward, reverse - // CAT: 103 <032> - // ATC: <031> 203 - // TCA: <310> 320 - // CAA: <100> 332 - // AAG: <002> 133 - - // front end minimizers: representation, position_in_read, direction, read_id - // CAT: 032 0 R 0 - - // central minimizers - // CATC: 031 1 F 0 - // ATCA: 031 1 F 0 - // TCAA: 100 3 F 0 - // CAAG: 002 4 F 0 - - // back end minimizers - // AAG: 002 4 F 0 - - // All minimizers: ATC(1f), CAA(3f), AAG(4f), ATG(0r) - - // ** AAGCTA ** + // GATT // kmer representation: forward, reverse - // AAG: <002> 133 - // AGC: <021> 213 - // GCT: 213 <021> - // CTA: <130> 302 + // GA: <20> 31 + // AT: <03> 03 + // TT: 33 <00> // front end minimizers: representation, position_in_read, direction, read_id - // AAG: 002 0 F 1 + // GA : 20 0 F 0 + // GAT: 03 1 F 0 // central minimizers - // AAGC: 002 0 F 1 - // AGCT: 021 2 R 1 // only the last minimizer is saved - // GCTA: 021 2 R 1 + // GATT: 00 2 R 0 // back end minimizers - // CTA: 130 3 F 1 + // ATT: 00 2 R 0 + // TT : 00 2 R 0 - // All minimizers: AAG(0f), AGC(1f), CTA(3f) + // All minimizers: GA(0f), AT(1f), AA(2r) // (2r1) means position 2, reverse direction, read 1 // (1,2) means array block start at element 1 and has 2 elements - // 0 1 2 3 4 5 6 - // data arrays: AAG(4f0), AAG(0f1), AGC(2r1), ATC(1f0), ATG(0r0), CAA(3f0), CTA(3f1) + // 0 1 2 + // data arrays: GA(0f0), AT(1f0), AA(2r0) - const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/catcaag_aagcta.fasta"; - const std::uint64_t minimizer_size = 3; - const std::uint64_t window_size = 2; + const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/gatt.fasta"; + const std::uint64_t minimizer_size = 2; + const std::uint64_t window_size = 3; std::vector expected_read_id_to_read_name; expected_read_id_to_read_name.push_back("read_0"); - expected_read_id_to_read_name.push_back("read_1"); std::vector expected_read_id_to_read_length; - expected_read_id_to_read_length.push_back(7); - expected_read_id_to_read_length.push_back(6); + expected_read_id_to_read_length.push_back(4); std::vector expected_representations; std::vector expected_positions_in_reads; @@ -539,52 +664,30 @@ TEST(TestCudamapperIndexGPUTwoIndices, CATCAAG_AAGCTA_3_2) std::vector expected_unique_representations; std::vector expected_first_occurrence_of_representations; - expected_representations.push_back(0b000010); // AAG(4f0) - expected_positions_in_reads.push_back(4); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_unique_representations.push_back(0b0010); - expected_first_occurrence_of_representations.push_back(0); - expected_representations.push_back(0b000010); // AAG(0f1) - expected_positions_in_reads.push_back(0); - expected_read_ids.push_back(1); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b001001); // AGC(2r1) + expected_representations.push_back(0b0000); // AA(2r0) expected_positions_in_reads.push_back(2); - expected_read_ids.push_back(1); + expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); - expected_unique_representations.push_back(0b001001); - expected_first_occurrence_of_representations.push_back(2); - expected_representations.push_back(0b001101); // ATC(1f0) + expected_unique_representations.push_back(0b0000); + expected_first_occurrence_of_representations.push_back(0); + expected_representations.push_back(0b0011); // AT(1f0) expected_positions_in_reads.push_back(1); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_unique_representations.push_back(0b001101); - expected_first_occurrence_of_representations.push_back(3); - expected_representations.push_back(0b001110); // ATG(0r0) + expected_unique_representations.push_back(0b0011); + expected_first_occurrence_of_representations.push_back(1); + expected_representations.push_back(0b1000); // GA(0f0) expected_positions_in_reads.push_back(0); expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); - expected_unique_representations.push_back(0b001110); - expected_first_occurrence_of_representations.push_back(4); - expected_representations.push_back(0b010000); // CAA(3f0) - expected_positions_in_reads.push_back(3); - expected_read_ids.push_back(0); - expected_unique_representations.push_back(0b010000); - expected_first_occurrence_of_representations.push_back(5); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b011100); // CTA(3f1) - expected_positions_in_reads.push_back(3); - expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_unique_representations.push_back(0b011100); - expected_first_occurrence_of_representations.push_back(6); + expected_unique_representations.push_back(0b1000); + expected_first_occurrence_of_representations.push_back(2); - expected_first_occurrence_of_representations.push_back(7); + expected_first_occurrence_of_representations.push_back(3); test_function(filename, 0, - 2, + 1, minimizer_size, window_size, expected_representations, @@ -595,89 +698,183 @@ TEST(TestCudamapperIndexGPUTwoIndices, CATCAAG_AAGCTA_3_2) expected_first_occurrence_of_representations, expected_read_id_to_read_name, expected_read_id_to_read_length, - 2); + 1); +} + +TEST(TestCudamapperIndexGPUTwoIndices, CCCATACC_2_8) +{ + // *** Read is shorter than one full window, the result should be empty *** + + // >read_0 + // CCCATACC + + const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/cccatacc.fasta"; + const std::uint64_t minimizer_size = 2; + const std::uint64_t window_size = 8; + + // all data arrays should be empty + + std::vector expected_read_id_to_read_name; + + std::vector expected_read_id_to_read_length; + + std::vector expected_representations; + std::vector expected_positions_in_reads; + std::vector expected_read_ids; + std::vector expected_directions_of_reads; + std::vector expected_unique_representations; + std::vector expected_first_occurrence_of_representations; + + test_function(filename, + 0, + 1, + minimizer_size, + window_size, + expected_representations, + expected_positions_in_reads, + expected_read_ids, + expected_directions_of_reads, + expected_unique_representations, + expected_first_occurrence_of_representations, + expected_read_id_to_read_name, + expected_read_id_to_read_length, + 0); } -TEST(TestCudamapperIndexGPUTwoIndices, AAAACTGAA_GCCAAAG_2_3) +// TODO: Cover this case as well +//TEST(TestCudamapperIndexGPUTwoIndices, CATCAAG_AAGCTA_3_5) +//{ +// // *** One Read is shorter than one full window, the other is not *** +// +// // >read_0 +// // CATCAAG +// // >read_1 +// // AAGCTA +// +// // ** CATCAAG ** +// +// // kmer representation: forward, reverse +// // CAT: 103 <032> +// // ATC: <031> 203 +// // TCA: <310> 320 +// // CAA: <100> 332 +// // AAG: <002> 133 +// +// // front end minimizers: representation, position_in_read, direction, read_id +// // CAT : 032 0 R 0 +// // CATC : 031 1 F 0 +// // CATCA : 031 1 F 0 +// // CATCAA: 031 1 F 0 +// +// // central minimizers +// // CATCAAG: 002 4 F 0 +// +// // back end minimizers +// // ATCAAG: 002 4 F 0 +// // TCAAG : 002 4 F 0 +// // CAAG : 002 4 F 0 +// // AAG : 002 4 F 0 +// +// // ** AAGCTA ** +// // ** read does not fit one array ** +// +// // All minimizers: ATG(0r0), ATC(1f0), AAG(4f0) +// +// // (2r1) means position 2, reverse direction, read 1 +// // (1,2) means array block start at element 1 and has 2 elements +// +// // 0 1 2 +// // data arrays: AAG(4f0), ATC(1f0), ATG(0r0) +// +// const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/catcaag_aagcta.fasta"; +// const std::uint64_t minimizer_size = 3; +// const std::uint64_t window_size = 5; +// +// std::vector expected_read_id_to_read_name; +// expected_read_id_to_read_name.push_back("read_0"); +// +// std::vector expected_read_id_to_read_length; +// expected_read_id_to_read_length.push_back(7); +// +// std::vector expected_representations; +// std::vector expected_positions_in_reads; +// std::vector expected_read_ids; +// std::vector expected_directions_of_reads; +// expected_representations.push_back(0b000010); // AAG(4f0) +// expected_positions_in_reads.push_back(4); +// expected_read_ids.push_back(0); +// expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); +// expected_representations.push_back(0b001101); // ATC(1f0) +// expected_positions_in_reads.push_back(1); +// expected_read_ids.push_back(0); +// expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); +// expected_representations.push_back(0b001110); // ATG(0r0) +// expected_positions_in_reads.push_back(0); +// expected_read_ids.push_back(0); +// expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); +// +// test_function(filename, +// 0, +// 2, +// minimizer_size, +// window_size, +// expected_representations, +// expected_positions_in_reads, +// expected_read_ids, +// expected_directions_of_reads, +// expected_read_id_to_read_name, +// expected_read_id_to_read_length, +// 1); // <- only one read goes into index, the other is too short +//} + +TEST(TestCudamapperIndexGPUTwoIndices, CCCATACC_3_5) { // >read_0 - // AAAACTGAA - // >read_1 - // GCCAAAG - - // ** AAAACTGAA ** - - // kmer representation: forward, reverse - // AA: <00> 33 - // AA: <00> 33 - // AA: <00> 33 - // AC: <01> 23 - // CT: 13 <02> - // TG: 32 <10> - // GA: <20> 31 - // AA: <00> 33 - - // front end minimizers: representation, position_in_read, direction, read_id - // AA : 00 0 F 0 - // AAA: 00 1 F 0 - - // central minimizers - // AAAA: 00 2 F 0 - // AAAC: 00 2 F 0 - // AACT: 00 2 F 0 - // ACTG: 01 3 F 0 - // CTGA: 02 4 R 0 - // TGAA: 00 7 F 0 - - // back end minimizers - // GAA: 00 7 F 0 - // AA : 00 7 F 0 - - // All minimizers: AA(0f), AA(1f), AA(2f), AC(3f), AG(4r), AA (7f) + // CCCATACC - // ** GCCAAAG ** + // ** CCCATAC ** // kmer representation: forward, reverse - // GC: <21> 21 - // CC: <11> 22 - // CA: <10> 32 - // AA: <00> 33 - // AA: <00> 33 - // AG: <03> 21 + // CCC: <111> 222 + // CCA: <110> 322 + // CAT: 103 <032> + // ATA: <030> 303 + // TAC: 301 <230> + // ACC: <011> 223 - // front end minimizers: representation, position_in_read, direction, read_id - // GC : 21 0 F 0 - // GCC: 11 1 F 0 + // front end minimizers: representation, position_in_read, direction + // CCC : 111 0 F + // CCCA : 110 1 F + // CCCAT : 032 2 R + // CCCATA: 030 3 F // central minimizers - // GCCA: 10 2 F 0 - // CCAA: 00 3 F 0 - // CAAA: 00 4 F 0 - // AAAG: 00 4 F 0 + // CCCATAC: 030 3 F + // CCATACC: 011 5 F // back end minimizers - // AAG: 00 4 F 0 - // AG : 03 5 F 0 + // CATACC: 011 5 F + // ATACC : 011 5 F + // TACC : 011 5 F + // ACC : 011 5 F - // All minimizers: GC(0f), CC(1f), CA(2f), AA(3f), AA(4f), AG(5f) + // All minimizers: CCC(0f), CCA(1f), ATG(2r), ATA(3f), ACC(5f) // (2r1) means position 2, reverse direction, read 1 // (1,2) means array block start at element 1 and has 2 elements - // 0 1 2 3 4 5 6 7 8 9 10 11 - // data arrays: AA(0f0), AA(1f0), AA(2f0), AA(7f0), AA(3f1), AA(4f1), AC(3f0), AG(4r0), AG(5f1), CA(2f1), CC(1f1), GC(0f1) + // 0 1 2 + // data arrays: ACC(5f0), ATA(3f0), ATG(2r0), CCA(1f0), CCC(0f0) - const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/aaaactgaa_gccaaag.fasta"; - const std::uint64_t minimizer_size = 2; - const std::uint64_t window_size = 3; + const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/cccatacc.fasta"; + const std::uint64_t minimizer_size = 3; + const std::uint64_t window_size = 5; std::vector expected_read_id_to_read_name; expected_read_id_to_read_name.push_back("read_0"); - expected_read_id_to_read_name.push_back("read_1"); std::vector expected_read_id_to_read_length; - expected_read_id_to_read_length.push_back(9); - expected_read_id_to_read_length.push_back(7); + expected_read_id_to_read_length.push_back(8); std::vector expected_representations; std::vector expected_positions_in_reads; @@ -686,72 +883,42 @@ TEST(TestCudamapperIndexGPUTwoIndices, AAAACTGAA_GCCAAAG_2_3) std::vector expected_unique_representations; std::vector expected_first_occurrence_of_representations; - expected_representations.push_back(0b0000); // AA(0f0) - expected_positions_in_reads.push_back(0); + expected_representations.push_back(0b000101); // ACC(5f0) + expected_positions_in_reads.push_back(5); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_unique_representations.push_back(0b0000); + expected_unique_representations.push_back(0b000101); expected_first_occurrence_of_representations.push_back(0); - expected_representations.push_back(0b0000); // AA(1f0) - expected_positions_in_reads.push_back(1); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b0000); // AA(2f0) - expected_positions_in_reads.push_back(2); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b0000); // AA(7f0) - expected_positions_in_reads.push_back(7); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b0000); // AA(3f1) - expected_positions_in_reads.push_back(3); - expected_read_ids.push_back(1); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b0000); // AA(4f1) - expected_positions_in_reads.push_back(4); - expected_read_ids.push_back(1); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b0001); // AC(3f0) + expected_representations.push_back(0b001100); // ATA(3f0) expected_positions_in_reads.push_back(3); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_unique_representations.push_back(0b0001); - expected_first_occurrence_of_representations.push_back(6); - expected_representations.push_back(0b0010); // AG(4r0) - expected_positions_in_reads.push_back(4); + expected_unique_representations.push_back(0b001100); + expected_first_occurrence_of_representations.push_back(1); + expected_representations.push_back(0b001110); // ATG(2r0) + expected_positions_in_reads.push_back(2); expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); - expected_unique_representations.push_back(0b0010); - expected_first_occurrence_of_representations.push_back(7); - expected_representations.push_back(0b0010); // AG(5f1) - expected_positions_in_reads.push_back(5); - expected_read_ids.push_back(1); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b0100); // CA(2f1) - expected_positions_in_reads.push_back(2); - expected_read_ids.push_back(1); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_unique_representations.push_back(0b0100); - expected_first_occurrence_of_representations.push_back(9); - expected_representations.push_back(0b0101); // CC(1f1) + expected_unique_representations.push_back(0b001110); + expected_first_occurrence_of_representations.push_back(2); + expected_representations.push_back(0b010100); // CCA(1f0) expected_positions_in_reads.push_back(1); - expected_read_ids.push_back(1); + expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_unique_representations.push_back(0b0101); - expected_first_occurrence_of_representations.push_back(10); - expected_representations.push_back(0b1001); // GC(0f1) + expected_unique_representations.push_back(0b010100); + expected_first_occurrence_of_representations.push_back(3); + expected_representations.push_back(0b010101); // CCC(0f0) expected_positions_in_reads.push_back(0); - expected_read_ids.push_back(1); + expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_unique_representations.push_back(0b1001); - expected_first_occurrence_of_representations.push_back(11); + expected_unique_representations.push_back(0b010101); + expected_first_occurrence_of_representations.push_back(4); - expected_first_occurrence_of_representations.push_back(12); + expected_first_occurrence_of_representations.push_back(5); test_function(filename, 0, - 2, + 1, minimizer_size, window_size, expected_representations, @@ -762,61 +929,77 @@ TEST(TestCudamapperIndexGPUTwoIndices, AAAACTGAA_GCCAAAG_2_3) expected_first_occurrence_of_representations, expected_read_id_to_read_name, expected_read_id_to_read_length, - 2); + 1); } -TEST(TestCudamapperIndexGPUTwoIndices, AAAACTGAA_GCCAAAG_2_3_only_second_read_in_index) +TEST(TestCudamapperIndexGPUTwoIndices, CATCAAG_AAGCTA_3_2) { // >read_0 - // AAAACTGAA + // CATCAAG // >read_1 - // GCCAAAG + // AAGCTA - // ** AAAACTGAA ** - // only second read goes into index + // ** CATCAAG ** - // ** GCCAAAG ** + // kmer representation: forward, reverse + // CAT: 103 <032> + // ATC: <031> 203 + // TCA: <310> 320 + // CAA: <100> 332 + // AAG: <002> 133 + + // front end minimizers: representation, position_in_read, direction, read_id + // CAT: 032 0 R 0 + + // central minimizers + // CATC: 031 1 F 0 + // ATCA: 031 1 F 0 + // TCAA: 100 3 F 0 + // CAAG: 002 4 F 0 + + // back end minimizers + // AAG: 002 4 F 0 + + // All minimizers: ATC(1f), CAA(3f), AAG(4f), ATG(0r) + + // ** AAGCTA ** // kmer representation: forward, reverse - // GC: <21> 21 - // CC: <11> 22 - // CA: <10> 32 - // AA: <00> 33 - // AA: <00> 33 - // AG: <03> 21 + // AAG: <002> 133 + // AGC: <021> 213 + // GCT: 213 <021> + // CTA: <130> 302 // front end minimizers: representation, position_in_read, direction, read_id - // GC : 21 0 F 0 - // GCC: 11 1 F 0 + // AAG: 002 0 F 1 // central minimizers - // GCCA: 10 2 F 0 - // CCAA: 00 3 F 0 - // CAAA: 00 4 F 0 - // AAAG: 00 4 F 0 + // AAGC: 002 0 F 1 + // AGCT: 021 2 R 1 // only the last minimizer is saved + // GCTA: 021 2 R 1 // back end minimizers - // AAG: 00 4 F 0 - // AG : 03 5 F 0 + // CTA: 130 3 F 1 - // All minimizers: GC(0f), CC(1f), CA(2f), AA(3f), AA(4f), AG(5f) + // All minimizers: AAG(0f), AGC(1f), CTA(3f) // (2r1) means position 2, reverse direction, read 1 // (1,2) means array block start at element 1 and has 2 elements - // 0 1 2 3 4 5 - // data arrays: AA(3f1), AA(4f1), AG(5f1), CA(2f1), CC(1f1), GC(0f1) + // 0 1 2 3 4 5 6 + // data arrays: AAG(4f0), AAG(0f1), AGC(2r1), ATC(1f0), ATG(0r0), CAA(3f0), CTA(3f1) - const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/aaaactgaa_gccaaag.fasta"; - const std::uint64_t minimizer_size = 2; - const std::uint64_t window_size = 3; + const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/catcaag_aagcta.fasta"; + const std::uint64_t minimizer_size = 3; + const std::uint64_t window_size = 2; - // only take second read std::vector expected_read_id_to_read_name; + expected_read_id_to_read_name.push_back("read_0"); expected_read_id_to_read_name.push_back("read_1"); std::vector expected_read_id_to_read_length; expected_read_id_to_read_length.push_back(7); + expected_read_id_to_read_length.push_back(6); std::vector expected_representations; std::vector expected_positions_in_reads; @@ -825,526 +1008,344 @@ TEST(TestCudamapperIndexGPUTwoIndices, AAAACTGAA_GCCAAAG_2_3_only_second_read_in std::vector expected_unique_representations; std::vector expected_first_occurrence_of_representations; - expected_representations.push_back(0b0000); // AA(3f1) - expected_positions_in_reads.push_back(3); - expected_read_ids.push_back(1); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_unique_representations.push_back(0b00); - expected_first_occurrence_of_representations.push_back(0); - expected_representations.push_back(0b0000); // AA(4f1) + expected_representations.push_back(0b000010); // AAG(4f0) expected_positions_in_reads.push_back(4); - expected_read_ids.push_back(1); + expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b0010); // AG(5f1) - expected_positions_in_reads.push_back(5); + expected_unique_representations.push_back(0b0010); + expected_first_occurrence_of_representations.push_back(0); + expected_representations.push_back(0b000010); // AAG(0f1) + expected_positions_in_reads.push_back(0); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_unique_representations.push_back(0b0010); - expected_first_occurrence_of_representations.push_back(2); - expected_representations.push_back(0b0100); // CA(2f1) + expected_representations.push_back(0b001001); // AGC(2r1) expected_positions_in_reads.push_back(2); expected_read_ids.push_back(1); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_unique_representations.push_back(0b0100); - expected_first_occurrence_of_representations.push_back(3); - expected_representations.push_back(0b0101); // CC(1f1) + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); + expected_unique_representations.push_back(0b001001); + expected_first_occurrence_of_representations.push_back(2); + expected_representations.push_back(0b001101); // ATC(1f0) expected_positions_in_reads.push_back(1); - expected_read_ids.push_back(1); + expected_read_ids.push_back(0); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_unique_representations.push_back(0b0101); - expected_first_occurrence_of_representations.push_back(4); - expected_representations.push_back(0b1001); // GC(0f1) + expected_unique_representations.push_back(0b001101); + expected_first_occurrence_of_representations.push_back(3); + expected_representations.push_back(0b001110); // ATG(0r0) expected_positions_in_reads.push_back(0); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); + expected_unique_representations.push_back(0b001110); + expected_first_occurrence_of_representations.push_back(4); + expected_representations.push_back(0b010000); // CAA(3f0) + expected_positions_in_reads.push_back(3); + expected_read_ids.push_back(0); + expected_unique_representations.push_back(0b010000); + expected_first_occurrence_of_representations.push_back(5); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b011100); // CTA(3f1) + expected_positions_in_reads.push_back(3); expected_read_ids.push_back(1); expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_unique_representations.push_back(0b1001); - expected_first_occurrence_of_representations.push_back(5); - + expected_unique_representations.push_back(0b011100); expected_first_occurrence_of_representations.push_back(6); + expected_first_occurrence_of_representations.push_back(7); + test_function(filename, - 1, // <- only take second read + 0, 2, minimizer_size, window_size, expected_representations, - expected_positions_in_reads, - expected_read_ids, - expected_directions_of_reads, - expected_unique_representations, - expected_first_occurrence_of_representations, - expected_read_id_to_read_name, - expected_read_id_to_read_length, - 1); -} - -namespace details -{ -namespace index_gpu_two_indices -{ -// ************ Test find_first_occurrences_of_representations ************** - -void test_find_first_occurrences_of_representations(const thrust::host_vector& representations_h, - const thrust::host_vector& expected_starting_index_of_each_representation_h, - const thrust::host_vector& expected_unique_representations_h) -{ - const thrust::device_vector representations_d(representations_h); - - thrust::device_vector starting_index_of_each_representation_d; - thrust::device_vector unique_representations_d; - find_first_occurrences_of_representations(unique_representations_d, - starting_index_of_each_representation_d, - representations_d); - - const thrust::host_vector starting_index_of_each_representation_h(starting_index_of_each_representation_d); - const thrust::host_vector unique_representations_h(unique_representations_d); - - ASSERT_EQ(starting_index_of_each_representation_h.size(), expected_starting_index_of_each_representation_h.size()); - ASSERT_EQ(unique_representations_h.size(), expected_unique_representations_h.size()); - ASSERT_EQ(starting_index_of_each_representation_h.size(), unique_representations_h.size() + 1); // starting_index_of_each_representation_h has an additional element for the past-the-end element - - for (std::size_t i = 0; i < unique_representations_h.size(); ++i) - { - EXPECT_EQ(starting_index_of_each_representation_h[i], expected_starting_index_of_each_representation_h[i]) << "index: " << i; - EXPECT_EQ(unique_representations_h[i], expected_unique_representations_h[i]) << "index: " << i; - } - EXPECT_EQ(starting_index_of_each_representation_h.back(), expected_starting_index_of_each_representation_h.back()) << "index: " << expected_starting_index_of_each_representation_h.size() - 1; -} - -TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representations_small_example) -{ - /// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 - /// 0 0 0 0 12 12 12 12 12 12 23 23 23 32 32 32 32 32 46 46 46 - /// 1 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 - /// 1 1 1 1 2 2 2 2 2 2 3 3 3 4 4 4 4 4 5 5 5 - /// ^ ^ ^ ^ ^ ^ - /// 0 4 10 13 18 21 - - thrust::host_vector representations_h; - thrust::host_vector expected_starting_index_of_each_representation_h; - thrust::host_vector expected_unique_representations_h; - representations_h.push_back(0); - expected_starting_index_of_each_representation_h.push_back(0); - expected_unique_representations_h.push_back(0); - representations_h.push_back(0); - representations_h.push_back(0); - representations_h.push_back(0); - representations_h.push_back(12); - expected_starting_index_of_each_representation_h.push_back(4); - expected_unique_representations_h.push_back(12); - representations_h.push_back(12); - representations_h.push_back(12); - representations_h.push_back(12); - representations_h.push_back(12); - representations_h.push_back(12); - representations_h.push_back(23); - expected_starting_index_of_each_representation_h.push_back(10); - expected_unique_representations_h.push_back(23); - representations_h.push_back(23); - representations_h.push_back(23); - representations_h.push_back(32); - expected_starting_index_of_each_representation_h.push_back(13); - expected_unique_representations_h.push_back(32); - representations_h.push_back(32); - representations_h.push_back(32); - representations_h.push_back(32); - representations_h.push_back(32); - representations_h.push_back(46); - expected_starting_index_of_each_representation_h.push_back(18); - expected_unique_representations_h.push_back(46); - representations_h.push_back(46); - representations_h.push_back(46); - expected_starting_index_of_each_representation_h.push_back(21); - - test_find_first_occurrences_of_representations(representations_h, - expected_starting_index_of_each_representation_h, - expected_unique_representations_h); -} - -TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representations_large_example) -{ - const std::uint64_t total_sketch_elements = 10000000; - const std::uint32_t sketch_elements_with_same_representation = 1000; - - thrust::host_vector representations_h; - thrust::host_vector expected_starting_index_of_each_representation_h; - thrust::host_vector expected_unique_representations_h; - - for (std::size_t i = 0; i < total_sketch_elements; ++i) - { - representations_h.push_back(i / sketch_elements_with_same_representation); - if (i % sketch_elements_with_same_representation == 0) - { - expected_starting_index_of_each_representation_h.push_back(i); - expected_unique_representations_h.push_back(i / sketch_elements_with_same_representation); - } - } - expected_starting_index_of_each_representation_h.push_back(total_sketch_elements); - - test_find_first_occurrences_of_representations(representations_h, - expected_starting_index_of_each_representation_h, - expected_unique_representations_h); -} - -// ************ Test create_new_value_mask ************** - -void test_create_new_value_mask(const thrust::host_vector& representations_h, - const thrust::host_vector& expected_new_value_mask_h, - std::uint32_t number_of_threads) -{ - const thrust::device_vector representations_d(representations_h); - thrust::device_vector new_value_mask_d(representations_h.size()); - - std::uint32_t number_of_blocks = (representations_h.size() - 1) / number_of_threads + 1; - - create_new_value_mask<<>>(thrust::raw_pointer_cast(representations_d.data()), - representations_d.size(), - thrust::raw_pointer_cast(new_value_mask_d.data())); - - const thrust::host_vector new_value_mask_h(new_value_mask_d); - - ASSERT_EQ(new_value_mask_h.size(), expected_new_value_mask_h.size()); - for (std::size_t i = 0; i < expected_new_value_mask_h.size(); ++i) - { - EXPECT_EQ(new_value_mask_h[i], expected_new_value_mask_h[i]) << "index: " << i; - } -} - -TEST(TestCudamapperIndexGPUTwoIndices, test_create_new_value_mask_small_example) -{ - thrust::host_vector representations_h; - thrust::host_vector expected_new_value_mask_h; - representations_h.push_back(0); - expected_new_value_mask_h.push_back(1); - representations_h.push_back(0); - expected_new_value_mask_h.push_back(0); - representations_h.push_back(0); - expected_new_value_mask_h.push_back(0); - representations_h.push_back(0); - expected_new_value_mask_h.push_back(0); - representations_h.push_back(0); - expected_new_value_mask_h.push_back(0); - representations_h.push_back(3); - expected_new_value_mask_h.push_back(1); - representations_h.push_back(3); - expected_new_value_mask_h.push_back(0); - representations_h.push_back(3); - expected_new_value_mask_h.push_back(0); - representations_h.push_back(4); - expected_new_value_mask_h.push_back(1); - representations_h.push_back(5); - expected_new_value_mask_h.push_back(1); - representations_h.push_back(5); - expected_new_value_mask_h.push_back(0); - representations_h.push_back(8); - expected_new_value_mask_h.push_back(1); - representations_h.push_back(8); - expected_new_value_mask_h.push_back(0); - representations_h.push_back(8); - expected_new_value_mask_h.push_back(0); - representations_h.push_back(9); - expected_new_value_mask_h.push_back(1); - representations_h.push_back(9); - expected_new_value_mask_h.push_back(0); - representations_h.push_back(9); - expected_new_value_mask_h.push_back(0); - - std::uint32_t number_of_threads = 3; - - test_create_new_value_mask(representations_h, - expected_new_value_mask_h, - number_of_threads); + expected_positions_in_reads, + expected_read_ids, + expected_directions_of_reads, + expected_unique_representations, + expected_first_occurrence_of_representations, + expected_read_id_to_read_name, + expected_read_id_to_read_length, + 2); } -TEST(TestCudamapperIndexGPUTwoIndices, test_create_new_value_mask_small_data_large_example) +TEST(TestCudamapperIndexGPUTwoIndices, AAAACTGAA_GCCAAAG_2_3) { - const std::uint64_t total_sketch_elements = 10000000; - const std::uint32_t sketch_elements_with_same_representation = 1000; + // >read_0 + // AAAACTGAA + // >read_1 + // GCCAAAG - thrust::host_vector representations_h; - thrust::host_vector expected_new_value_mask_h; - for (std::size_t i = 0; i < total_sketch_elements; ++i) - { - representations_h.push_back(i / sketch_elements_with_same_representation); - if (i % sketch_elements_with_same_representation == 0) - expected_new_value_mask_h.push_back(1); - else - expected_new_value_mask_h.push_back(0); - } + // ** AAAACTGAA ** - std::uint32_t number_of_threads = 256; + // kmer representation: forward, reverse + // AA: <00> 33 + // AA: <00> 33 + // AA: <00> 33 + // AC: <01> 23 + // CT: 13 <02> + // TG: 32 <10> + // GA: <20> 31 + // AA: <00> 33 - test_create_new_value_mask(representations_h, - expected_new_value_mask_h, - number_of_threads); -} + // front end minimizers: representation, position_in_read, direction, read_id + // AA : 00 0 F 0 + // AAA: 00 1 F 0 -// ************ Test find_first_occurrences_of_representations_kernel ************** + // central minimizers + // AAAA: 00 2 F 0 + // AAAC: 00 2 F 0 + // AACT: 00 2 F 0 + // ACTG: 01 3 F 0 + // CTGA: 02 4 R 0 + // TGAA: 00 7 F 0 -void test_find_first_occurrences_of_representations_kernel(const thrust::host_vector& representation_index_mask_h, - const thrust::host_vector& input_representations_h, - const thrust::host_vector& expected_starting_index_of_each_representation_h, - const thrust::host_vector& expected_unique_representations_h, - const std::uint32_t number_of_threads) -{ - const thrust::device_vector representation_index_mask_d(representation_index_mask_h); - const thrust::device_vector input_representations_d(input_representations_h); - ASSERT_EQ(expected_starting_index_of_each_representation_h.size(), representation_index_mask_h.back()); - ASSERT_EQ(expected_unique_representations_h.size(), representation_index_mask_h.back()); + // back end minimizers + // GAA: 00 7 F 0 + // AA : 00 7 F 0 - const std::uint64_t number_of_unique_representations = representation_index_mask_h.back(); - ASSERT_EQ(expected_starting_index_of_each_representation_h.size(), number_of_unique_representations); - ASSERT_EQ(expected_unique_representations_h.size(), number_of_unique_representations); - thrust::device_vector starting_index_of_each_representation_d(number_of_unique_representations); - thrust::device_vector unique_representations_d(number_of_unique_representations); + // All minimizers: AA(0f), AA(1f), AA(2f), AC(3f), AG(4r), AA (7f) - std::uint32_t number_of_blocks = (representation_index_mask_d.size() - 1) / number_of_threads + 1; + // ** GCCAAAG ** - find_first_occurrences_of_representations_kernel<<>>(representation_index_mask_d.data().get(), - input_representations_d.data().get(), - representation_index_mask_d.size(), - starting_index_of_each_representation_d.data().get(), - unique_representations_d.data().get()); + // kmer representation: forward, reverse + // GC: <21> 21 + // CC: <11> 22 + // CA: <10> 32 + // AA: <00> 33 + // AA: <00> 33 + // AG: <03> 21 - const thrust::host_vector starting_index_of_each_representation_h(starting_index_of_each_representation_d); - const thrust::host_vector unique_representations_h(unique_representations_d); + // front end minimizers: representation, position_in_read, direction, read_id + // GC : 21 0 F 0 + // GCC: 11 1 F 0 - ASSERT_EQ(starting_index_of_each_representation_h.size(), expected_starting_index_of_each_representation_h.size()); - ASSERT_EQ(unique_representations_h.size(), expected_unique_representations_h.size()); - for (std::size_t i = 0; i < expected_starting_index_of_each_representation_h.size(); ++i) - { - EXPECT_EQ(starting_index_of_each_representation_h[i], expected_starting_index_of_each_representation_h[i]) << "index: " << i; - EXPECT_EQ(unique_representations_h[i], expected_unique_representations_h[i]) << "index: " << i; - } -} + // central minimizers + // GCCA: 10 2 F 0 + // CCAA: 00 3 F 0 + // CAAA: 00 4 F 0 + // AAAG: 00 4 F 0 -TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representations_kernel_small_example) -{ - thrust::host_vector representation_index_mask_h; - thrust::host_vector input_representations_h; - thrust::host_vector expected_starting_index_of_each_representation_h; - thrust::host_vector expected_unique_representations_h; - representation_index_mask_h.push_back(1); - input_representations_h.push_back(10); - expected_starting_index_of_each_representation_h.push_back(0); - expected_unique_representations_h.push_back(10); - representation_index_mask_h.push_back(1); - input_representations_h.push_back(10); - representation_index_mask_h.push_back(1); - input_representations_h.push_back(10); - representation_index_mask_h.push_back(1); - input_representations_h.push_back(10); - // - representation_index_mask_h.push_back(2); - input_representations_h.push_back(20); - expected_starting_index_of_each_representation_h.push_back(4); - expected_unique_representations_h.push_back(20); - // - representation_index_mask_h.push_back(3); - input_representations_h.push_back(30); - expected_starting_index_of_each_representation_h.push_back(5); - expected_unique_representations_h.push_back(30); - representation_index_mask_h.push_back(3); - input_representations_h.push_back(30); - representation_index_mask_h.push_back(3); - input_representations_h.push_back(30); - representation_index_mask_h.push_back(3); - input_representations_h.push_back(30); - // - representation_index_mask_h.push_back(4); - input_representations_h.push_back(40); - expected_starting_index_of_each_representation_h.push_back(9); - expected_unique_representations_h.push_back(40); - representation_index_mask_h.push_back(4); - input_representations_h.push_back(40); - representation_index_mask_h.push_back(4); - input_representations_h.push_back(40); - // - representation_index_mask_h.push_back(5); - input_representations_h.push_back(50); - expected_starting_index_of_each_representation_h.push_back(12); - expected_unique_representations_h.push_back(50); - // - representation_index_mask_h.push_back(6); - input_representations_h.push_back(60); - expected_starting_index_of_each_representation_h.push_back(13); - expected_unique_representations_h.push_back(60); + // back end minimizers + // AAG: 00 4 F 0 + // AG : 03 5 F 0 - std::uint32_t number_of_threads = 3; + // All minimizers: GC(0f), CC(1f), CA(2f), AA(3f), AA(4f), AG(5f) - test_find_first_occurrences_of_representations_kernel(representation_index_mask_h, - input_representations_h, - expected_starting_index_of_each_representation_h, - expected_unique_representations_h, - number_of_threads); + // (2r1) means position 2, reverse direction, read 1 + // (1,2) means array block start at element 1 and has 2 elements + + // 0 1 2 3 4 5 6 7 8 9 10 11 + // data arrays: AA(0f0), AA(1f0), AA(2f0), AA(7f0), AA(3f1), AA(4f1), AC(3f0), AG(4r0), AG(5f1), CA(2f1), CC(1f1), GC(0f1) + + const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/aaaactgaa_gccaaag.fasta"; + const std::uint64_t minimizer_size = 2; + const std::uint64_t window_size = 3; + + std::vector expected_read_id_to_read_name; + expected_read_id_to_read_name.push_back("read_0"); + expected_read_id_to_read_name.push_back("read_1"); + + std::vector expected_read_id_to_read_length; + expected_read_id_to_read_length.push_back(9); + expected_read_id_to_read_length.push_back(7); + + std::vector expected_representations; + std::vector expected_positions_in_reads; + std::vector expected_read_ids; + std::vector expected_directions_of_reads; + std::vector expected_unique_representations; + std::vector expected_first_occurrence_of_representations; + + expected_representations.push_back(0b0000); // AA(0f0) + expected_positions_in_reads.push_back(0); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b0000); + expected_first_occurrence_of_representations.push_back(0); + expected_representations.push_back(0b0000); // AA(1f0) + expected_positions_in_reads.push_back(1); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b0000); // AA(2f0) + expected_positions_in_reads.push_back(2); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b0000); // AA(7f0) + expected_positions_in_reads.push_back(7); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b0000); // AA(3f1) + expected_positions_in_reads.push_back(3); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b0000); // AA(4f1) + expected_positions_in_reads.push_back(4); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b0001); // AC(3f0) + expected_positions_in_reads.push_back(3); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b0001); + expected_first_occurrence_of_representations.push_back(6); + expected_representations.push_back(0b0010); // AG(4r0) + expected_positions_in_reads.push_back(4); + expected_read_ids.push_back(0); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); + expected_unique_representations.push_back(0b0010); + expected_first_occurrence_of_representations.push_back(7); + expected_representations.push_back(0b0010); // AG(5f1) + expected_positions_in_reads.push_back(5); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b0100); // CA(2f1) + expected_positions_in_reads.push_back(2); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b0100); + expected_first_occurrence_of_representations.push_back(9); + expected_representations.push_back(0b0101); // CC(1f1) + expected_positions_in_reads.push_back(1); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b0101); + expected_first_occurrence_of_representations.push_back(10); + expected_representations.push_back(0b1001); // GC(0f1) + expected_positions_in_reads.push_back(0); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b1001); + expected_first_occurrence_of_representations.push_back(11); + + expected_first_occurrence_of_representations.push_back(12); + + test_function(filename, + 0, + 2, + minimizer_size, + window_size, + expected_representations, + expected_positions_in_reads, + expected_read_ids, + expected_directions_of_reads, + expected_unique_representations, + expected_first_occurrence_of_representations, + expected_read_id_to_read_name, + expected_read_id_to_read_length, + 2); } -TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representations_kernel_large_example) +TEST(TestCudamapperIndexGPUTwoIndices, AAAACTGAA_GCCAAAG_2_3_only_second_read_in_index) { - const std::uint64_t total_sketch_elements = 10000000; - const std::uint32_t sketch_elements_with_same_representation = 1000; + // >read_0 + // AAAACTGAA + // >read_1 + // GCCAAAG - thrust::host_vector representation_index_mask_h; - thrust::host_vector input_representations_h; - thrust::host_vector expected_starting_index_of_each_representation_h; - thrust::host_vector expected_unique_representations_h; - for (std::size_t i = 0; i < total_sketch_elements; ++i) - { - representation_index_mask_h.push_back(i / sketch_elements_with_same_representation + 1); - input_representations_h.push_back(representation_index_mask_h.back() * 10); - if (i % sketch_elements_with_same_representation == 0) - { - expected_starting_index_of_each_representation_h.push_back(i); - expected_unique_representations_h.push_back(input_representations_h.back()); - } - } + // ** AAAACTGAA ** + // only second read goes into index - std::uint32_t number_of_threads = 256; + // ** GCCAAAG ** - test_find_first_occurrences_of_representations_kernel(representation_index_mask_h, - input_representations_h, - expected_starting_index_of_each_representation_h, - expected_unique_representations_h, - number_of_threads); -} + // kmer representation: forward, reverse + // GC: <21> 21 + // CC: <11> 22 + // CA: <10> 32 + // AA: <00> 33 + // AA: <00> 33 + // AG: <03> 21 -// ************ Test copy_rest_to_separate_arrays ************** + // front end minimizers: representation, position_in_read, direction, read_id + // GC : 21 0 F 0 + // GCC: 11 1 F 0 -template -void test_function_copy_rest_to_separate_arrays(const thrust::host_vector& rest_h, - const thrust::host_vector& expected_read_ids_h, - const thrust::host_vector& expected_positions_in_reads_h, - const thrust::host_vector& expected_directions_of_reads_h, - const std::uint32_t threads) -{ - ASSERT_EQ(rest_h.size(), expected_read_ids_h.size()); - ASSERT_EQ(rest_h.size(), expected_positions_in_reads_h.size()); - ASSERT_EQ(rest_h.size(), expected_directions_of_reads_h.size()); - thrust::device_vector generated_read_ids_d(rest_h.size()); - thrust::device_vector generated_positions_in_reads_d(rest_h.size()); - thrust::device_vector generated_directions_of_reads_d(rest_h.size()); + // central minimizers + // GCCA: 10 2 F 0 + // CCAA: 00 3 F 0 + // CAAA: 00 4 F 0 + // AAAG: 00 4 F 0 - const thrust::device_vector rest_d(rest_h); + // back end minimizers + // AAG: 00 4 F 0 + // AG : 03 5 F 0 - const std::uint32_t blocks = ceiling_divide(rest_h.size(), threads); + // All minimizers: GC(0f), CC(1f), CA(2f), AA(3f), AA(4f), AG(5f) - copy_rest_to_separate_arrays<<>>(rest_d.data().get(), - generated_read_ids_d.data().get(), - generated_positions_in_reads_d.data().get(), - generated_directions_of_reads_d.data().get(), - rest_h.size()); + // (2r1) means position 2, reverse direction, read 1 + // (1,2) means array block start at element 1 and has 2 elements - const thrust::host_vector& generated_read_ids_h(generated_read_ids_d); - const thrust::host_vector& generated_positions_in_reads_h(generated_positions_in_reads_d); - const thrust::host_vector& generated_directions_of_reads_h(generated_directions_of_reads_d); + // 0 1 2 3 4 5 + // data arrays: AA(3f1), AA(4f1), AG(5f1), CA(2f1), CC(1f1), GC(0f1) - for (std::size_t i = 0; i < rest_h.size(); ++i) - { - EXPECT_EQ(generated_read_ids_h[i], expected_read_ids_h[i]); - EXPECT_EQ(generated_positions_in_reads_h[i], expected_positions_in_reads_h[i]); - EXPECT_EQ(generated_directions_of_reads_h[i], expected_directions_of_reads_h[i]); - } -} + const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/aaaactgaa_gccaaag.fasta"; + const std::uint64_t minimizer_size = 2; + const std::uint64_t window_size = 3; -TEST(TestCudamapperIndexGPUTwoIndices, test_function_copy_rest_to_separate_arrays) -{ - thrust::host_vector rest_h; - thrust::host_vector expected_read_ids_h; - thrust::host_vector expected_positions_in_reads_h; - thrust::host_vector expected_directions_of_reads_h; + // only take second read + std::vector expected_read_id_to_read_name; + expected_read_id_to_read_name.push_back("read_1"); - rest_h.push_back({5, 8, 0}); - expected_read_ids_h.push_back(5); - expected_positions_in_reads_h.push_back(8); - expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); - rest_h.push_back({15, 6, 0}); - expected_read_ids_h.push_back(15); - expected_positions_in_reads_h.push_back(6); - expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); - rest_h.push_back({2, 4, 1}); - expected_read_ids_h.push_back(2); - expected_positions_in_reads_h.push_back(4); - expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); - rest_h.push_back({18, 15, 0}); - expected_read_ids_h.push_back(18); - expected_positions_in_reads_h.push_back(15); - expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); - rest_h.push_back({6, 4, 1}); - expected_read_ids_h.push_back(6); - expected_positions_in_reads_h.push_back(4); - expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); - rest_h.push_back({6, 3, 1}); - expected_read_ids_h.push_back(6); - expected_positions_in_reads_h.push_back(3); - expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); - rest_h.push_back({89, 45, 0}); - expected_read_ids_h.push_back(89); - expected_positions_in_reads_h.push_back(45); - expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); - rest_h.push_back({547, 25, 0}); - expected_read_ids_h.push_back(547); - expected_positions_in_reads_h.push_back(25); - expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); - rest_h.push_back({14, 16, 1}); - expected_read_ids_h.push_back(14); - expected_positions_in_reads_h.push_back(16); - expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); - rest_h.push_back({18, 16, 0}); - expected_read_ids_h.push_back(18); - expected_positions_in_reads_h.push_back(16); - expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); - rest_h.push_back({45, 44, 0}); - expected_read_ids_h.push_back(45); - expected_positions_in_reads_h.push_back(44); - expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); - rest_h.push_back({65, 45, 1}); - expected_read_ids_h.push_back(65); - expected_positions_in_reads_h.push_back(45); - expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); - rest_h.push_back({15, 20, 0}); - expected_read_ids_h.push_back(15); - expected_positions_in_reads_h.push_back(20); - expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); - rest_h.push_back({45, 654, 1}); - expected_read_ids_h.push_back(45); - expected_positions_in_reads_h.push_back(654); - expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); - rest_h.push_back({782, 216, 0}); - expected_read_ids_h.push_back(782); - expected_positions_in_reads_h.push_back(216); - expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); - rest_h.push_back({255, 245, 1}); - expected_read_ids_h.push_back(255); - expected_positions_in_reads_h.push_back(245); - expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); - rest_h.push_back({346, 579, 0}); - expected_read_ids_h.push_back(346); - expected_positions_in_reads_h.push_back(579); - expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); - rest_h.push_back({12, 8, 0}); - expected_read_ids_h.push_back(12); - expected_positions_in_reads_h.push_back(8); - expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); - rest_h.push_back({65, 42, 1}); - expected_read_ids_h.push_back(65); - expected_positions_in_reads_h.push_back(42); - expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::REVERSE); - rest_h.push_back({566, 42, 0}); - expected_read_ids_h.push_back(566); - expected_positions_in_reads_h.push_back(42); - expected_directions_of_reads_h.push_back(Minimizer::DirectionOfRepresentation::FORWARD); + std::vector expected_read_id_to_read_length; + expected_read_id_to_read_length.push_back(7); - const std::uint32_t threads = 8; + std::vector expected_representations; + std::vector expected_positions_in_reads; + std::vector expected_read_ids; + std::vector expected_directions_of_reads; + std::vector expected_unique_representations; + std::vector expected_first_occurrence_of_representations; - test_function_copy_rest_to_separate_arrays(rest_h, - expected_read_ids_h, - expected_positions_in_reads_h, - expected_directions_of_reads_h, - threads); -} + expected_representations.push_back(0b0000); // AA(3f1) + expected_positions_in_reads.push_back(3); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b00); + expected_first_occurrence_of_representations.push_back(0); + expected_representations.push_back(0b0000); // AA(4f1) + expected_positions_in_reads.push_back(4); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_representations.push_back(0b0010); // AG(5f1) + expected_positions_in_reads.push_back(5); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b0010); + expected_first_occurrence_of_representations.push_back(2); + expected_representations.push_back(0b0100); // CA(2f1) + expected_positions_in_reads.push_back(2); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b0100); + expected_first_occurrence_of_representations.push_back(3); + expected_representations.push_back(0b0101); // CC(1f1) + expected_positions_in_reads.push_back(1); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b0101); + expected_first_occurrence_of_representations.push_back(4); + expected_representations.push_back(0b1001); // GC(0f1) + expected_positions_in_reads.push_back(0); + expected_read_ids.push_back(1); + expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); + expected_unique_representations.push_back(0b1001); + expected_first_occurrence_of_representations.push_back(5); -} // namespace index_gpu_two_indices -} // namespace details + expected_first_occurrence_of_representations.push_back(6); + + test_function(filename, + 1, // <- only take second read + 2, + minimizer_size, + window_size, + expected_representations, + expected_positions_in_reads, + expected_read_ids, + expected_directions_of_reads, + expected_unique_representations, + expected_first_occurrence_of_representations, + expected_read_id_to_read_name, + expected_read_id_to_read_length, + 1); +} } // namespace cudamapper } // namespace claragenomics From 12a4a76e963799b10cc533a92f647710e6d02cdd Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Tue, 19 Nov 2019 15:26:02 +0100 Subject: [PATCH 087/128] Building Minimizer as a separate library --- cudamapper/CMakeLists.txt | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cudamapper/CMakeLists.txt b/cudamapper/CMakeLists.txt index fc8218647..8803e5573 100644 --- a/cudamapper/CMakeLists.txt +++ b/cudamapper/CMakeLists.txt @@ -22,11 +22,17 @@ else() set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -lineinfo --expt-extended-lambda -use_fast_math -Xcompiler -Wall,-Wno-pedantic -std=c++14") endif() +cuda_add_library(minimizer + src/minimizer.cu) +target_include_directories(minimizer PUBLIC include) +target_link_libraries(minimizer logging pthread utils) +target_compile_options(minimizer PRIVATE -Werror) + cuda_add_library(index_gpu src/index_gpu.cu src/minimizer.cu) target_include_directories(index_gpu PUBLIC include) -target_link_libraries(index_gpu logging pthread utils cgaio) +target_link_libraries(index_gpu logging minimizer pthread utils cgaio) target_compile_options(index_gpu PRIVATE -Werror) cuda_add_library(index_gpu_two_indices @@ -34,7 +40,7 @@ cuda_add_library(index_gpu_two_indices src/index_gpu_two_indices.cu src/minimizer.cu) target_include_directories(index_gpu_two_indices PUBLIC include) -target_link_libraries(index_gpu_two_indices logging pthread utils cgaio) +target_link_libraries(index_gpu_two_indices logging minimizer pthread utils cgaio) target_compile_options(index_gpu_two_indices PRIVATE -Werror) cuda_add_library(matcher From 4af48f79723e16a912c1a6611f8d66ff1875a680 Mon Sep 17 00:00:00 2001 From: Kamesh AK Date: Tue, 19 Nov 2019 07:48:40 -0800 Subject: [PATCH 088/128] update cub submodule to checkout with https --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 7f8ffef0d..0f4c8e419 100644 --- a/.gitmodules +++ b/.gitmodules @@ -25,4 +25,4 @@ url = https://github.com/rvaser/spoa.git [submodule "3rdparty/cub"] path = 3rdparty/cub - url = git@github.com:NVlabs/cub.git + url = https://github.com/NVlabs/cub.git From a1bb41fdc3018934047df550207af85a84e205c9 Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Tue, 19 Nov 2019 17:49:12 +0100 Subject: [PATCH 089/128] [cudamapper] Adapted tests to the fact that Matcher::anchors now returns thrust::device_vector instead of std::vector --- cudamapper/src/matcher.hpp | 2 +- cudamapper/tests/Test_CudamapperMatcher.cu | 8 ++++---- .../tests/Test_CudamapperOverlapperTriggered.cu | 14 +++++++------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cudamapper/src/matcher.hpp b/cudamapper/src/matcher.hpp index 5bab43c8b..beccd502a 100644 --- a/cudamapper/src/matcher.hpp +++ b/cudamapper/src/matcher.hpp @@ -45,7 +45,7 @@ class Matcher thrust::device_vector& anchors(); private: - /// \biref list of anchors + /// \brief list of anchors thrust::device_vector anchors_d_; }; } // namespace cudamapper diff --git a/cudamapper/tests/Test_CudamapperMatcher.cu b/cudamapper/tests/Test_CudamapperMatcher.cu index c9a31d2e8..ce374ee38 100644 --- a/cudamapper/tests/Test_CudamapperMatcher.cu +++ b/cudamapper/tests/Test_CudamapperMatcher.cu @@ -36,7 +36,7 @@ TEST(TestCudamapperMatcher, OneReadOneMinimizer) IndexGPU index(parsers, 4, 1, read_ranges); Matcher matcher(index, 0); - const std::vector& anchors = matcher.anchors(); + const thrust::host_vector& anchors = matcher.anchors(); ASSERT_EQ(anchors.size(), 0u); } @@ -78,7 +78,7 @@ TEST(TestCudamapperMatcher, TwoReadsMultipleMiniminizers) IndexGPU index(parsers, 3, 2, read_ranges); Matcher matcher(index, 0); - const std::vector& anchors = matcher.anchors(); + const thrust::host_vector& anchors = matcher.anchors(); ASSERT_EQ(anchors.size(), 1u); EXPECT_EQ(anchors[0].query_read_id_, 0u); EXPECT_EQ(anchors[0].target_read_id_, 1u); @@ -147,7 +147,7 @@ TEST(TestCudamapperMatcher, CustomIndexTwoReads) Matcher matcher(test_index, 0); - const std::vector& anchors = matcher.anchors(); + const thrust::host_vector& anchors = matcher.anchors(); ASSERT_EQ(anchors.size(), 2500u); for (std::size_t read_0_sketch_element = 0; read_0_sketch_element < 50; ++read_0_sketch_element) @@ -263,7 +263,7 @@ TEST(TestCudamapperMatcher, CustomIndexFourReads) Matcher matcher(test_index, 0); - const std::vector& anchors = matcher.anchors(); + const thrust::host_vector& anchors = matcher.anchors(); ASSERT_EQ(anchors.size(), 90300u); // Anchors are grouped by query read id and within that by representation (both in increasing order). diff --git a/cudamapper/tests/Test_CudamapperOverlapperTriggered.cu b/cudamapper/tests/Test_CudamapperOverlapperTriggered.cu index ee60f7144..02a3e6989 100644 --- a/cudamapper/tests/Test_CudamapperOverlapperTriggered.cu +++ b/cudamapper/tests/Test_CudamapperOverlapperTriggered.cu @@ -160,7 +160,7 @@ TEST(TestCudamapperOverlapperTriggerred, OneAchorNoOverlaps) OverlapperTriggered overlapper; std::vector unfused_overlaps; - std::vector anchors; + thrust::device_vector anchors; MockIndex test_index; std::vector testv; @@ -188,7 +188,7 @@ TEST(TestCudamapperOverlapperTriggerred, FourAnchorsOneOverlap) OverlapperTriggered overlapper; std::vector unfused_overlaps; - std::vector anchors; + thrust::device_vector anchors; MockIndex test_index; std::vector testv; @@ -247,7 +247,7 @@ TEST(TestCudamapperOverlapperTriggerred, FourAnchorsNoOverlap) OverlapperTriggered overlapper; std::vector unfused_overlaps; - std::vector anchors; + thrust::device_vector anchors; MockIndex test_index; std::vector testv; @@ -300,7 +300,7 @@ TEST(TestCudamapperOverlapperTriggerred, FourColinearAnchorsOneOverlap) OverlapperTriggered overlapper; std::vector unfused_overlaps; - std::vector anchors; + thrust::device_vector anchors; MockIndex test_index; std::vector testv; @@ -353,7 +353,7 @@ TEST(TestCudamapperOverlapperTriggerred, FourAnchorsLastNotInOverlap) OverlapperTriggered overlapper; std::vector unfused_overlaps; - std::vector anchors; + thrust::device_vector anchors; MockIndex test_index; std::vector testv; @@ -412,7 +412,7 @@ TEST(TestCudamapperOverlapperTriggerred, ShuffledAnchors) OverlapperTriggered overlapper; std::vector unfused_overlaps; - std::vector anchors; + thrust::device_vector anchors; MockIndex test_index; std::vector testv; @@ -487,7 +487,7 @@ TEST(TestCudamapperOverlapperTriggerred, ReverseStrand) OverlapperTriggered overlapper; std::vector unfused_overlaps; - std::vector anchors; + thrust::device_vector anchors; MockIndex test_index; std::vector testv; From 431e98d4f479bf5b9013320efbe0165e5a819d84 Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Tue, 19 Nov 2019 19:28:20 +0200 Subject: [PATCH 090/128] [CI] Use for loop insetad of find -exec to catch tests errors Fixes #224 --- ci/common/build-test-sdk.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/common/build-test-sdk.sh b/ci/common/build-test-sdk.sh index 93cfe47cf..a9f37d726 100644 --- a/ci/common/build-test-sdk.sh +++ b/ci/common/build-test-sdk.sh @@ -60,7 +60,10 @@ if [ "$GPU_TEST" == '1' ]; then nvidia-smi logger "Running ClaraGenomicsAnalysis unit tests..." - LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR find ${LOCAL_BUILD_DIR}/install/tests -type f -exec {} \; + # Avoid using 'find' which reutrns 0 even if -exec command fails + for binary_test in "${LOCAL_BUILD_DIR}"/install/tests/*; do + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR "${binary_test}"; + done logger "Running ClaraGenomicsAnalysis benchmarks..." LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CGA_LIB_DIR ${LOCAL_BUILD_DIR}/install/benchmarks/cudapoa/benchmark_cudapoa --benchmark_filter="BM_SingleBatchTest" From c9a0ceb7a4b038ff7c3ebdbda94310026a6b8e37 Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Tue, 19 Nov 2019 16:34:38 -0500 Subject: [PATCH 091/128] [cudapoa] add graph dumping for cudapoa graph --- .../claragenomics/bindings/cudapoa.pxd | 2 + .../claragenomics/bindings/cudapoa.pyx | 46 +++++++++++++++++-- .../claragenomics/bindings/graph.pxd | 39 ++++++++++++++++ pyclaragenomics/requirements.txt | 1 + pyclaragenomics/test/test_cudapoa_bindings.py | 27 +++++++++++ 5 files changed, 112 insertions(+), 3 deletions(-) create mode 100644 pyclaragenomics/claragenomics/bindings/graph.pxd diff --git a/pyclaragenomics/claragenomics/bindings/cudapoa.pxd b/pyclaragenomics/claragenomics/bindings/cudapoa.pxd index 36482b4ed..df49c4b5a 100644 --- a/pyclaragenomics/claragenomics/bindings/cudapoa.pxd +++ b/pyclaragenomics/claragenomics/bindings/cudapoa.pxd @@ -19,6 +19,7 @@ from libc.stdint cimport int8_t, int16_t, uint16_t, int32_t from libcpp.vector cimport vector from claragenomics.bindings.cuda_runtime_api cimport _Stream +from claragenomics.bindings.graph cimport DirectedGraph # This file declares public structs and API calls # from the ClaraGenomicsAnalysis `cudapoa` module. @@ -58,6 +59,7 @@ cdef extern from "claragenomics/cudapoa/batch.hpp" namespace "claragenomics::cud void generate_poa() except + StatusType get_msa(vector[vector[string]]&, vector[StatusType]&) except + StatusType get_consensus(vector[string]&, vector[vector[uint16_t]]&, vector[StatusType]&) except + + StatusType get_graphs(vector[DirectedGraph]&, vector[StatusType]&) except + int get_total_poas() except + int batch_id() except + void reset() except + diff --git a/pyclaragenomics/claragenomics/bindings/cudapoa.pyx b/pyclaragenomics/claragenomics/bindings/cudapoa.pyx index db3044140..85199e71d 100644 --- a/pyclaragenomics/claragenomics/bindings/cudapoa.pyx +++ b/pyclaragenomics/claragenomics/bindings/cudapoa.pyx @@ -11,13 +11,16 @@ # distutils: language = c++ from cython.operator cimport dereference as deref -from libcpp.vector cimport vector +from libc.stdint cimport uint16_t from libcpp.memory cimport unique_ptr +from libcpp.pair cimport pair from libcpp.string cimport string -from libc.stdint cimport uint16_t +from libcpp.vector cimport vector +import networkx as nx -from claragenomics.bindings cimport cudapoa from claragenomics.bindings import cuda +from claragenomics.bindings cimport cudapoa + def status_to_str(status): """ @@ -226,6 +229,43 @@ cdef class CudaPoaBatch: decoded_consensus = [c.decode('utf-8') for c in consensus] return (decoded_consensus, coverage, status) + def get_graphs(self): + """ + Get the POA graph for each POA group. + + Returns: + A tuple where + - first element is a networkx graph for each POA group + - second element is status of MSA generation for each group + """ + cdef vector[DirectedGraph] graphs + cdef vector[cudapoa.StatusType] status + cdef vector[pair[DirectedGraph.edge_t, DirectedGraph.edge_weight_t]] edges + cdef DirectedGraph* graph + cdef DirectedGraph.edge_t edge + cdef DirectedGraph.edge_weight_t weight + + # Get the graphs from batch object. + deref(self.batch).get_graphs(graphs, status) + + nx_digraphs = [] + for g in range(graphs.size()): + graph = &graphs[g] + edges = deref(graph).get_edges() + nx_digraph = nx.DiGraph() + for e in range(edges.size()): + edge = edges[e].first + weight = edges[e].second + nx_digraph.add_edge(edge.first, + edge.second, + weight=weight) + attributes = {} + for n in nx_digraph.nodes: + attributes[n] = {'label' : deref(graph).get_node_label(n).decode('utf-8')} + nx.set_node_attributes(nx_digraph, attributes) + nx_digraphs.append(nx_digraph) + return (nx_digraphs, status) + def reset(self): """ Reset the batch object. Involves deleting all windows previously diff --git a/pyclaragenomics/claragenomics/bindings/graph.pxd b/pyclaragenomics/claragenomics/bindings/graph.pxd new file mode 100644 index 000000000..c34dfed18 --- /dev/null +++ b/pyclaragenomics/claragenomics/bindings/graph.pxd @@ -0,0 +1,39 @@ +# +# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. +# + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +from libcpp.pair cimport pair +from libcpp.memory cimport unique_ptr +from libcpp.string cimport string +from libc.stdint cimport int8_t, int16_t, uint16_t, int32_t +from libcpp.vector cimport vector + +# This file declares public structs and API calls +# from the ClaraGenomicsAnalysis `graph` utility class. + +# Declare structs and APIs from graph.hpp. +cdef extern from "claragenomics/utils/graph.hpp" namespace "claragenomics": + cdef cppclass Graph: + ctypedef int32_t node_id_t + ctypedef int32_t edge_weight_t + ctypedef pair[node_id_t, node_id_t] edge_t + + cdef cppclass DirectedGraph(Graph): + vector[node_id_t]& get_adjacent_nodes(node_id_t) except + + vector[node_id_t] get_node_ids() except + + vector[pair[edge_t, edge_weight_t]] get_edges() except + + void set_node_label(node_id_t, const string&) except + + string get_node_label(node_id_t) except + + void add_edge(node_id_t, node_id_t, edge_weight_t) except + + string serialize_to_dot() except + diff --git a/pyclaragenomics/requirements.txt b/pyclaragenomics/requirements.txt index 47f1eacbb..feaf6fd7d 100644 --- a/pyclaragenomics/requirements.txt +++ b/pyclaragenomics/requirements.txt @@ -11,6 +11,7 @@ Cython==0.29.12 flake8==3.7.8 matplotlib==3.0.3 +networkx==2.4 numpy==1.16.3 pytest==4.4.1 quast==5.0.2 diff --git a/pyclaragenomics/test/test_cudapoa_bindings.py b/pyclaragenomics/test/test_cudapoa_bindings.py index 961620c75..3b6e50fbe 100644 --- a/pyclaragenomics/test/test_cudapoa_bindings.py +++ b/pyclaragenomics/test/test_cudapoa_bindings.py @@ -49,6 +49,33 @@ def test_cudapoa_reset_batch(): assert(batch.total_poas == 0) +@pytest.mark.gpu +def test_cudapoa_graph(): + device = cuda.cuda_get_device() + free, total = cuda.cuda_get_mem_info(device) + batch = CudaPoaBatch(10, 0.9 * free, device_id=device) + poa_1 = ["ACTGACTG", "ACTTACTG", "ACTCACTG"] + batch.add_poa_group(poa_1) + batch.generate_poa() + consensus, coverage, status = batch.get_consensus() + + assert(batch.total_poas == 1) + + # Expected graph + # - -> G -> - + # | | + # A -> C -> T -> T -> A -> C -> T -> G + # | | + # - -> C -> - + + graphs, status = batch.get_graphs() + assert(len(graphs) == 1) + + digraph = graphs[0] + assert(digraph.number_of_nodes() == 10) + assert(digraph.number_of_edges() == 11) + + @pytest.mark.gpu def test_cudapoa_complex_batch(): random.seed(2) From 6175d18cd20b3b052a2e6f6ff7c8bacd1406a1b0 Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Wed, 20 Nov 2019 13:37:30 +0100 Subject: [PATCH 092/128] IndexTwoIndices and IndexGPUTwoIndices now accept a parameter that says if kmer representations should be hashed or not --- .../cudamapper/index_two_indices.hpp | 6 ++++-- cudamapper/src/index_gpu_two_indices.cuh | 19 +++++++++++++------ cudamapper/src/index_two_indices.cu | 6 ++++-- .../Test_CudamapperIndexGPUTwoIndices.cu | 3 ++- 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp b/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp index 44b9eb99b..8317ad3cf 100644 --- a/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp +++ b/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp @@ -77,14 +77,16 @@ class IndexTwoIndices /// \param first_read_id read_id of the first read to the included in this index /// \param past_the_last_read_id read_id+1 of the last read to be included in this index /// \param kmer_size k - the kmer length - /// \param window_size w - the length of the sliding window used to find sketch elements (i.e. the number of adjacent k-mers in a window, adjacent = shifted by one basepair) + /// \param window_size w - the length of the sliding window used to find sketch elements (i.e. the number of adjacent kmers in a window, adjacent = shifted by one basepair) + /// \param hash_representations - if true, hash kmer representations /// \return instance of IndexTwoIndices static std::unique_ptr create_index(const io::FastaParser& parser, const read_id_t first_read_id, const read_id_t past_the_last_read_id, const std::uint64_t kmer_size, - const std::uint64_t window_size); + const std::uint64_t window_size, + const bool hash_representations = true); }; /// \} diff --git a/cudamapper/src/index_gpu_two_indices.cuh b/cudamapper/src/index_gpu_two_indices.cuh index 8b46d3d7d..d501c0ea5 100644 --- a/cudamapper/src/index_gpu_two_indices.cuh +++ b/cudamapper/src/index_gpu_two_indices.cuh @@ -52,11 +52,13 @@ public: /// \param past_the_last_read_id read_id+1 of the last read to be included in this index /// \param kmer_size k - the kmer length /// \param window_size w - the length of the sliding window used to find sketch elements (i.e. the number of adjacent k-mers in a window, adjacent = shifted by one basepair) + /// \param hash_representations - if true, hash kmer representations IndexGPUTwoIndices(const io::FastaParser& parser, const read_id_t first_read_id, const read_id_t past_the_last_read_id, const std::uint64_t kmer_size, - const std::uint64_t window_size); + const std::uint64_t window_size, + const bool hash_representations = true); /// \brief returns an array of representations of sketch elements /// \return an array of representations of sketch elements @@ -100,7 +102,8 @@ private: /// \brief generates the index void generate_index(const io::FastaParser& query_parser, const read_id_t first_read_id, - const read_id_t past_the_last_read_id); + const read_id_t past_the_last_read_id, + const bool hash_representations); thrust::device_vector representations_d_; thrust::device_vector read_ids_d_; @@ -222,7 +225,8 @@ IndexGPUTwoIndices::IndexGPUTwoIndices(const io::FastaParser& const read_id_t first_read_id, const read_id_t past_the_last_read_id, const std::uint64_t kmer_size, - const std::uint64_t window_size) + const std::uint64_t window_size, + const bool hash_representations) : first_read_id_(first_read_id) , kmer_size_(kmer_size) , window_size_(window_size) @@ -230,7 +234,8 @@ IndexGPUTwoIndices::IndexGPUTwoIndices(const io::FastaParser& { generate_index(parser, first_read_id_, - past_the_last_read_id); + past_the_last_read_id, + hash_representations); } template @@ -290,7 +295,8 @@ std::uint64_t IndexGPUTwoIndices::number_of_reads() const template void IndexGPUTwoIndices::generate_index(const io::FastaParser& parser, const read_id_t first_read_id, - const read_id_t past_the_last_read_id) + const read_id_t past_the_last_read_id, + const bool hash_representations) { // check if there are any reads to process @@ -377,7 +383,8 @@ void IndexGPUTwoIndices::generate_index(const io::FastaParser first_read_id, merged_basepairs_d, read_id_to_basepairs_section_h, - read_id_to_basepairs_section_d); + read_id_to_basepairs_section_d, + hash_representations); device_buffer representations_d = std::move(sketch_elements.representations_d); device_buffer rest_d = std::move(sketch_elements.rest_d); diff --git a/cudamapper/src/index_two_indices.cu b/cudamapper/src/index_two_indices.cu index 418e55f93..7ac0c58e6 100644 --- a/cudamapper/src/index_two_indices.cu +++ b/cudamapper/src/index_two_indices.cu @@ -22,14 +22,16 @@ std::unique_ptr IndexTwoIndices::create_index(const io::FastaPa const read_id_t first_read_id, const read_id_t past_the_last_read_id, const std::uint64_t kmer_size, - const std::uint64_t window_size) + const std::uint64_t window_size, + const bool hash_representations) { CGA_NVTX_RANGE(profiler, "create_index"); return std::make_unique>(parser, first_read_id, past_the_last_read_id, kmer_size, - window_size); + window_size, + hash_representations); } } // namespace cudamapper diff --git a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu index aa0a4ae5c..1e39539c1 100644 --- a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu +++ b/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu @@ -513,7 +513,8 @@ void test_function(const std::string& filename, first_read_id, past_the_last_read_id, kmer_size, - window_size); + window_size, + false); ASSERT_EQ(index.number_of_reads(), expected_number_of_reads); if (0 == expected_number_of_reads) From 3596b253219fe5b4968f8f1b03e7c14ac6490dfd Mon Sep 17 00:00:00 2001 From: Kamesh Arumugam Date: Wed, 20 Nov 2019 09:46:23 -0800 Subject: [PATCH 093/128] [cudamapper] code refactor to address the PR feedbacks --- cudamapper/src/matcher.cu | 2 +- cudamapper/src/overlapper_triggered.cu | 17 ++++++----------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/cudamapper/src/matcher.cu b/cudamapper/src/matcher.cu index 68530a51a..cd2b5d712 100644 --- a/cudamapper/src/matcher.cu +++ b/cudamapper/src/matcher.cu @@ -329,7 +329,7 @@ Matcher::Matcher(const Index& index, uint32_t query_target_division_idx) auto num_anchors_so_far = anchors_d_.size(); anchors_d_.resize(num_anchors_so_far + total_anchors); - Anchor* anchors_d = thrust::raw_pointer_cast(anchors_d_.data()) + num_anchors_so_far; + Anchor* anchors_d = anchors_d_.data().get() + num_anchors_so_far; CGA_LOG_INFO("Allocating {} bytes for read_id_to_anchors_section_d", read_id_to_anchors_section_h.size() * sizeof(ArrayBlock)); diff --git a/cudamapper/src/overlapper_triggered.cu b/cudamapper/src/overlapper_triggered.cu index ea92f7aa3..9128769e0 100644 --- a/cudamapper/src/overlapper_triggered.cu +++ b/cudamapper/src/overlapper_triggered.cu @@ -28,18 +28,13 @@ __host__ __device__ bool operator==(const Anchor& lhs, const Anchor& rhs) { auto score_threshold = 1; + // Very simple scoring function to quantify quality of overlaps. - auto anchor_score = [] __host__ __device__(const Anchor& a, const Anchor& b) { - if ((b.query_position_in_read_ - a.query_position_in_read_) < 350) - { - return 2; - } - else - { - return 1; // TODO change to a more sophisticated scoring method - } - }; - auto score = anchor_score(lhs, rhs); + // TODO change to a more sophisticated scoring method + auto score = 1; + if ((rhs.query_position_in_read_ - lhs.query_position_in_read_) < 350) + score = 2; + return ((lhs.query_read_id_ == rhs.query_read_id_) && (lhs.target_read_id_ == rhs.target_read_id_) && score > score_threshold); From ccd938bb2d1b74c42019d40555953fc29b935c3f Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Wed, 20 Nov 2019 17:33:35 +0100 Subject: [PATCH 094/128] [cudamapper] main.cu uses new index/matcher --- .../claragenomics/cudamapper/overlapper.hpp | 10 ++- cudamapper/src/main.cu | 80 +++++++++++++++++-- cudamapper/src/overlapper_triggered.cu | 15 ++-- cudamapper/src/overlapper_triggered.hpp | 5 +- .../Test_CudamapperOverlapperTriggered.cu | 4 +- 5 files changed, 92 insertions(+), 22 deletions(-) diff --git a/cudamapper/include/claragenomics/cudamapper/overlapper.hpp b/cudamapper/include/claragenomics/cudamapper/overlapper.hpp index 9c7cdb5a5..dd0b2afab 100644 --- a/cudamapper/include/claragenomics/cudamapper/overlapper.hpp +++ b/cudamapper/include/claragenomics/cudamapper/overlapper.hpp @@ -11,7 +11,7 @@ #pragma once #include -#include "index.hpp" +#include "index_two_indices.hpp" #include "types.hpp" namespace claragenomics @@ -33,8 +33,12 @@ class Overlapper /// \brief returns overlaps for a set of reads /// \param overlaps Output vector into which generated overlaps will be placed /// \param anchors vector of anchor objects. Does not need to be ordered - /// \param index representation index for reads - virtual void get_overlaps(std::vector& overlaps, thrust::device_vector& anchors, const Index& index) = 0; + /// \param index_query representation index for reads + /// \param index_target + virtual void get_overlaps(std::vector& overlaps, + thrust::device_vector& anchors, + const IndexTwoIndices& index_query, + const IndexTwoIndices& index_target) = 0; /// \brief prints overlaps to stdout in PAF format static void print_paf(const std::vector& overlaps); diff --git a/cudamapper/src/main.cu b/cudamapper/src/main.cu index c69b29199..ffcdf5791 100644 --- a/cudamapper/src/main.cu +++ b/cudamapper/src/main.cu @@ -23,6 +23,8 @@ #include #include "claragenomics/cudamapper/index.hpp" +#include "claragenomics/cudamapper/index_two_indices.hpp" +#include "claragenomics/cudamapper/matcher_two_indices.hpp" #include "claragenomics/cudamapper/overlapper.hpp" #include "matcher.hpp" #include "overlapper_triggered.hpp" @@ -110,11 +112,12 @@ int main(int argc, char* argv[]) // Function for adding new overlaps to writer auto add_overlaps_to_write_queue = [&overlaps_to_write, &overlaps_writer_mtx](claragenomics::cudamapper::Overlapper& overlapper, thrust::device_vector& anchors, - const claragenomics::cudamapper::Index& index) { + const claragenomics::cudamapper::IndexTwoIndices& index_query, + const claragenomics::cudamapper::IndexTwoIndices& index_target) { CGA_NVTX_RANGE(profiler, "add_overlaps_to_write_queue"); overlaps_writer_mtx.lock(); overlaps_to_write.push_back(std::vector()); - overlapper.get_overlaps(overlaps_to_write.back(), anchors, index); + overlapper.get_overlaps(overlaps_to_write.back(), anchors, index_query, index_target); if (0 == overlaps_to_write.back().size()) { overlaps_to_write.pop_back(); @@ -154,16 +157,79 @@ int main(int argc, char* argv[]) }; std::future overlap_result(std::async(std::launch::async, overlaps_writer_func)); + auto overlapper = claragenomics::cudamapper::OverlapperTriggered(); + // Track overall time std::chrono::milliseconds index_time = std::chrono::duration_values::zero(); std::chrono::milliseconds matcher_time = std::chrono::duration_values::zero(); std::chrono::milliseconds overlapper_time = std::chrono::duration_values::zero(); - //Now carry out all the looped polling - //size_t query_start = 0; - //size_t query_end = query_start + index_size - 1; - for (size_t query_start = 0; query_start < queries; query_start += index_size) + { // outer loop over query + size_t query_end = std::min(query_start + index_size, static_cast(queries) - 1); + + std::cerr << "Query range: " << query_start << " - " << query_end << std::endl; + + std::unique_ptr query_index(nullptr); + std::unique_ptr target_index(nullptr); + std::unique_ptr matcher(nullptr); + + { + CGA_NVTX_RANGE(profiler, "generate_query_index"); + auto start_time = std::chrono::high_resolution_clock::now(); + query_index = claragenomics::cudamapper::IndexTwoIndices::create_index(*query_parser, + query_start, + query_end + 1, // <- past the last + k, + w); + index_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); + std::cerr << "Query index generation time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; + } + + size_t target_start = 0; + // If all_to_all mode, then we can optimzie by starting the target sequences from the same index as + // query because all indices before the current query index are guaranteed to have been processed in + // a2a mapping. + if (all_to_all) + { + target_start = query_start; + } + for (; target_start < targets; target_start += target_index_size) + { + size_t target_end = std::min(target_start + target_index_size, static_cast(targets) - 1); + + std::cerr << "Target range: " << target_start << " - " << target_end << std::endl; + + { + CGA_NVTX_RANGE(profiler, "generate_target_index"); + auto start_time = std::chrono::high_resolution_clock::now(); + target_index = claragenomics::cudamapper::IndexTwoIndices::create_index(*target_parser, + target_start, + target_end + 1, // <- past the last + k, + w); + index_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); + std::cerr << "Target index generation time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; + } + { + CGA_NVTX_RANGE(profiler, "generate_matcher"); + auto start_time = std::chrono::high_resolution_clock::now(); + matcher = claragenomics::cudamapper::MatcherTwoIndices::create_matcher(*query_index, + *target_index); + matcher_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); + std::cerr << "Matcher generation time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; + } + { + CGA_NVTX_RANGE(profiler, "generate_overlaps"); + auto start_time = std::chrono::high_resolution_clock::now(); + add_overlaps_to_write_queue(overlapper, matcher->anchors(), *query_index, *target_index); + overlapper_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); + std::cerr << "Overlapper time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; + } + } + } + + /* for (size_t query_start = 0; query_start < queries; query_start += index_size) { // outer loop over query size_t query_end = std::min(query_start + index_size, static_cast(queries)); auto start_time = std::chrono::high_resolution_clock::now(); @@ -251,7 +317,7 @@ int main(int argc, char* argv[]) //the new target start is set to be the next read index after the last read //from the previous chunk } - } + }*/ // Insert empty overlap vector to denote end of processing. // The lambda function for adding overlaps to queue ensures that no empty diff --git a/cudamapper/src/overlapper_triggered.cu b/cudamapper/src/overlapper_triggered.cu index ea92f7aa3..c0f2ed4c2 100644 --- a/cudamapper/src/overlapper_triggered.cu +++ b/cudamapper/src/overlapper_triggered.cu @@ -180,12 +180,10 @@ struct CreateOverlap void OverlapperTriggered::get_overlaps(std::vector& fused_overlaps, thrust::device_vector& d_anchors, - const Index& index) + const IndexTwoIndices& index_query, + const IndexTwoIndices& index_target) { - CGA_NVTX_RANGE(profiler, "OverlapperTriggered::get_overlaps"); - const auto& read_names = index.read_id_to_read_name(); - const auto& read_lengths = index.read_id_to_read_length(); const auto tail_length_for_chain = 3; auto n_anchors = d_anchors.size(); @@ -345,8 +343,8 @@ void OverlapperTriggered::get_overlaps(std::vector& fused_overlaps, fused_overlaps.data(), fused_overlaps.data() + n_fused_overlap, fused_overlaps.data(), [&](Overlap& new_overlap) { - std::string query_read_name = read_names[new_overlap.query_read_id_]; - std::string target_read_name = read_names[new_overlap.target_read_id_]; + std::string query_read_name = index_query.read_id_to_read_name(new_overlap.query_read_id_); + std::string target_read_name = index_target.read_id_to_read_name(new_overlap.target_read_id_); new_overlap.query_read_name_ = new char[query_read_name.length()]; strcpy(new_overlap.query_read_name_, query_read_name.c_str()); @@ -354,8 +352,9 @@ void OverlapperTriggered::get_overlaps(std::vector& fused_overlaps, new_overlap.target_read_name_ = new char[target_read_name.length()]; strcpy(new_overlap.target_read_name_, target_read_name.c_str()); - new_overlap.query_length_ = read_lengths[new_overlap.query_read_id_]; - new_overlap.target_length_ = read_lengths[new_overlap.target_read_id_]; + new_overlap.query_length_ = index_query.read_id_to_read_length(new_overlap.query_read_id_); + new_overlap.target_length_ = index_target.read_id_to_read_length(new_overlap.target_read_id_); + return new_overlap; }); } diff --git a/cudamapper/src/overlapper_triggered.hpp b/cudamapper/src/overlapper_triggered.hpp index 07a769b89..9d8250c6d 100644 --- a/cudamapper/src/overlapper_triggered.hpp +++ b/cudamapper/src/overlapper_triggered.hpp @@ -35,9 +35,10 @@ class OverlapperTriggered : public Overlapper /// when a single anchor with a threshold below the value is encountered. /// \param overlaps Output vector into which generated overlaps will be placed /// \param anchors vector of anchors - /// \param index Index + /// \param index_query Index + /// \param index_target /// \return vector of Overlap objects - void get_overlaps(std::vector& overlaps, thrust::device_vector& anchors, const Index& index) override; + void get_overlaps(std::vector& overlaps, thrust::device_vector& anchors, const IndexTwoIndices& index_query, const IndexTwoIndices& index_target) override; }; } // namespace cudamapper } // namespace claragenomics diff --git a/cudamapper/tests/Test_CudamapperOverlapperTriggered.cu b/cudamapper/tests/Test_CudamapperOverlapperTriggered.cu index 02a3e6989..92c28ee1d 100644 --- a/cudamapper/tests/Test_CudamapperOverlapperTriggered.cu +++ b/cudamapper/tests/Test_CudamapperOverlapperTriggered.cu @@ -155,7 +155,7 @@ TEST(TestCudamapperOverlapperTriggerred, Fusee3Overlapsto2) ASSERT_EQ(fused_overlaps.size(), 2u); } -TEST(TestCudamapperOverlapperTriggerred, OneAchorNoOverlaps) +/*TEST(TestCudamapperOverlapperTriggerred, OneAchorNoOverlaps) { OverlapperTriggered overlapper; @@ -536,7 +536,7 @@ TEST(TestCudamapperOverlapperTriggerred, ReverseStrand) ASSERT_GT(overlaps[0].target_end_position_in_read_, overlaps[0].target_start_position_in_read_); ASSERT_EQ(overlaps[0].relative_strand, RelativeStrand::Reverse); ASSERT_EQ(char(overlaps[0].relative_strand), '-'); -} +}*/ } // namespace cudamapper } // namespace claragenomics From 583305cc067187805dde969fcfb26514cac4d8ab Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Thu, 21 Nov 2019 12:13:17 +0100 Subject: [PATCH 095/128] [cudaaligner] Alignment: missing virtual dtor --- cudaaligner/include/claragenomics/cudaaligner/alignment.hpp | 3 +++ cudaaligner/src/alignment_impl.cpp | 5 ----- cudaaligner/src/alignment_impl.hpp | 1 - 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/cudaaligner/include/claragenomics/cudaaligner/alignment.hpp b/cudaaligner/include/claragenomics/cudaaligner/alignment.hpp index f3bff3264..1b173aaa3 100644 --- a/cudaaligner/include/claragenomics/cudaaligner/alignment.hpp +++ b/cudaaligner/include/claragenomics/cudaaligner/alignment.hpp @@ -34,6 +34,9 @@ typedef std::pair FormattedAlignment; class Alignment { public: + /// \brief Virtual destructor + virtual ~Alignment() = default; + /// \brief Returns query sequence virtual const std::string& get_query_sequence() const = 0; diff --git a/cudaaligner/src/alignment_impl.cpp b/cudaaligner/src/alignment_impl.cpp index a5b329559..fa6e9e8f2 100644 --- a/cudaaligner/src/alignment_impl.cpp +++ b/cudaaligner/src/alignment_impl.cpp @@ -27,11 +27,6 @@ AlignmentImpl::AlignmentImpl(const char* query, int32_t query_length, const char // Initialize Alignment object. } -AlignmentImpl::~AlignmentImpl() -{ - // Nothing to destroy right now. -} - char AlignmentImpl::alignment_state_to_cigar_state(AlignmentState s) const { // CIGAR string format from http://bioinformatics.cvr.ac.uk/blog/tag/cigar-string/ diff --git a/cudaaligner/src/alignment_impl.hpp b/cudaaligner/src/alignment_impl.hpp index 1ffc11bec..cecf38cfb 100644 --- a/cudaaligner/src/alignment_impl.hpp +++ b/cudaaligner/src/alignment_impl.hpp @@ -22,7 +22,6 @@ class AlignmentImpl : public Alignment { public: AlignmentImpl(const char* query, int32_t query_length, const char* target, int32_t target_length); - ~AlignmentImpl(); /// \brief Returns query sequence virtual const std::string& get_query_sequence() const override From 321a6064bd5fdd9446b5d93e15f80bb56acada7e Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Thu, 21 Nov 2019 17:23:33 +0100 Subject: [PATCH 096/128] [cudamapper] Adapted TestCudamapperOverlapperTriggerred to new index --- .../Test_CudamapperOverlapperTriggered.cu | 81 ++++++++++--------- cudamapper/tests/mock_index.cuh | 18 ++++- 2 files changed, 58 insertions(+), 41 deletions(-) diff --git a/cudamapper/tests/Test_CudamapperOverlapperTriggered.cu b/cudamapper/tests/Test_CudamapperOverlapperTriggered.cu index 92c28ee1d..ad3f1ebde 100644 --- a/cudamapper/tests/Test_CudamapperOverlapperTriggered.cu +++ b/cudamapper/tests/Test_CudamapperOverlapperTriggered.cu @@ -155,7 +155,7 @@ TEST(TestCudamapperOverlapperTriggerred, Fusee3Overlapsto2) ASSERT_EQ(fused_overlaps.size(), 2u); } -/*TEST(TestCudamapperOverlapperTriggerred, OneAchorNoOverlaps) +TEST(TestCudamapperOverlapperTriggerred, OneAchorNoOverlaps) { OverlapperTriggered overlapper; @@ -169,17 +169,18 @@ TEST(TestCudamapperOverlapperTriggerred, Fusee3Overlapsto2) testv.push_back("READ2"); std::vector test_read_length(testv.size(), 1000); - EXPECT_CALL(test_index, read_id_to_read_name) - .WillRepeatedly(testing::ReturnRef(testv)); - EXPECT_CALL(test_index, read_id_to_read_length) - .WillRepeatedly(testing::ReturnRef(test_read_length)); + for (std::size_t i = 0; i < testv.size(); ++i) + { + EXPECT_CALL(test_index, read_id_to_read_name(i)).WillRepeatedly(testing::ReturnRef(testv[i])); + EXPECT_CALL(test_index, read_id_to_read_length(i)).WillRepeatedly(testing::ReturnRef(test_read_length[i])); + } Anchor anchor1; anchors.push_back(anchor1); std::vector overlaps; - overlapper.get_overlaps(overlaps, anchors, test_index); + overlapper.get_overlaps(overlaps, anchors, test_index, test_index); ASSERT_EQ(overlaps.size(), 0u); } @@ -197,10 +198,11 @@ TEST(TestCudamapperOverlapperTriggerred, FourAnchorsOneOverlap) testv.push_back("READ2"); std::vector test_read_length(testv.size(), 1000); - EXPECT_CALL(test_index, read_id_to_read_name) - .WillRepeatedly(testing::ReturnRef(testv)); - EXPECT_CALL(test_index, read_id_to_read_length) - .WillRepeatedly(testing::ReturnRef(test_read_length)); + for (std::size_t i = 0; i < testv.size(); ++i) + { + EXPECT_CALL(test_index, read_id_to_read_name(i)).WillRepeatedly(testing::ReturnRef(testv[i])); + EXPECT_CALL(test_index, read_id_to_read_length(i)).WillRepeatedly(testing::ReturnRef(test_read_length[i])); + } Anchor anchor1; anchor1.query_read_id_ = 1; @@ -232,7 +234,7 @@ TEST(TestCudamapperOverlapperTriggerred, FourAnchorsOneOverlap) anchors.push_back(anchor4); std::vector overlaps; - overlapper.get_overlaps(overlaps, anchors, test_index); + overlapper.get_overlaps(overlaps, anchors, test_index, test_index); ASSERT_EQ(overlaps.size(), 1u); ASSERT_EQ(overlaps[0].query_read_id_, 1u); ASSERT_EQ(overlaps[0].target_read_id_, 2u); @@ -256,10 +258,11 @@ TEST(TestCudamapperOverlapperTriggerred, FourAnchorsNoOverlap) testv.push_back("READ2"); std::vector test_read_length(testv.size(), 1000); - EXPECT_CALL(test_index, read_id_to_read_name) - .WillRepeatedly(testing::ReturnRef(testv)); - EXPECT_CALL(test_index, read_id_to_read_length) - .WillRepeatedly(testing::ReturnRef(test_read_length)); + for (std::size_t i = 0; i < testv.size(); ++i) + { + EXPECT_CALL(test_index, read_id_to_read_name(i)).WillRepeatedly(testing::ReturnRef(testv[i])); + EXPECT_CALL(test_index, read_id_to_read_length(i)).WillRepeatedly(testing::ReturnRef(test_read_length[i])); + } Anchor anchor1; anchor1.query_read_id_ = 1; @@ -291,7 +294,7 @@ TEST(TestCudamapperOverlapperTriggerred, FourAnchorsNoOverlap) anchors.push_back(anchor4); std::vector overlaps; - overlapper.get_overlaps(overlaps, anchors, test_index); + overlapper.get_overlaps(overlaps, anchors, test_index, test_index); ASSERT_EQ(overlaps.size(), 0u); } @@ -309,10 +312,11 @@ TEST(TestCudamapperOverlapperTriggerred, FourColinearAnchorsOneOverlap) testv.push_back("READ2"); std::vector test_read_length(testv.size(), 1000); - EXPECT_CALL(test_index, read_id_to_read_name) - .WillRepeatedly(testing::ReturnRef(testv)); - EXPECT_CALL(test_index, read_id_to_read_length) - .WillRepeatedly(testing::ReturnRef(test_read_length)); + for (std::size_t i = 0; i < testv.size(); ++i) + { + EXPECT_CALL(test_index, read_id_to_read_name(i)).WillRepeatedly(testing::ReturnRef(testv[i])); + EXPECT_CALL(test_index, read_id_to_read_length(i)).WillRepeatedly(testing::ReturnRef(test_read_length[i])); + } Anchor anchor1; anchor1.query_read_id_ = 1; @@ -344,7 +348,7 @@ TEST(TestCudamapperOverlapperTriggerred, FourColinearAnchorsOneOverlap) anchors.push_back(anchor4); std::vector overlaps; - overlapper.get_overlaps(overlaps, anchors, test_index); + overlapper.get_overlaps(overlaps, anchors, test_index, test_index); ASSERT_EQ(overlaps.size(), 0u); } @@ -362,10 +366,11 @@ TEST(TestCudamapperOverlapperTriggerred, FourAnchorsLastNotInOverlap) testv.push_back("READ2"); std::vector test_read_length(testv.size(), 1000); - EXPECT_CALL(test_index, read_id_to_read_name) - .WillRepeatedly(testing::ReturnRef(testv)); - EXPECT_CALL(test_index, read_id_to_read_length) - .WillRepeatedly(testing::ReturnRef(test_read_length)); + for (std::size_t i = 0; i < testv.size(); ++i) + { + EXPECT_CALL(test_index, read_id_to_read_name(i)).WillRepeatedly(testing::ReturnRef(testv[i])); + EXPECT_CALL(test_index, read_id_to_read_length(i)).WillRepeatedly(testing::ReturnRef(test_read_length[i])); + } Anchor anchor1; anchor1.query_read_id_ = 1; @@ -397,7 +402,7 @@ TEST(TestCudamapperOverlapperTriggerred, FourAnchorsLastNotInOverlap) anchors.push_back(anchor4); std::vector overlaps; - overlapper.get_overlaps(overlaps, anchors, test_index); + overlapper.get_overlaps(overlaps, anchors, test_index, test_index); ASSERT_EQ(overlaps.size(), 1u); ASSERT_EQ(overlaps[0].query_read_id_, 1u); ASSERT_EQ(overlaps[0].target_read_id_, 2u); @@ -421,10 +426,11 @@ TEST(TestCudamapperOverlapperTriggerred, ShuffledAnchors) testv.push_back("READ2"); std::vector test_read_length(testv.size(), 1000); - EXPECT_CALL(test_index, read_id_to_read_name) - .WillRepeatedly(testing::ReturnRef(testv)); - EXPECT_CALL(test_index, read_id_to_read_length) - .WillRepeatedly(testing::ReturnRef(test_read_length)); + for (std::size_t i = 0; i < testv.size(); ++i) + { + EXPECT_CALL(test_index, read_id_to_read_name(i)).WillRepeatedly(testing::ReturnRef(testv[i])); + EXPECT_CALL(test_index, read_id_to_read_length(i)).WillRepeatedly(testing::ReturnRef(test_read_length[i])); + } Anchor anchor1; anchor1.query_read_id_ = 1; @@ -470,7 +476,7 @@ TEST(TestCudamapperOverlapperTriggerred, ShuffledAnchors) for (size_t i = 0; i < 100; i++) { std::vector overlaps; - overlapper.get_overlaps(overlaps, anchors, test_index); + overlapper.get_overlaps(overlaps, anchors, test_index, test_index); std::shuffle(std::begin(overlaps), std::end(overlaps), rng); ASSERT_EQ(overlaps.size(), 1u); ASSERT_EQ(overlaps[0].query_read_id_, 1u); @@ -496,10 +502,11 @@ TEST(TestCudamapperOverlapperTriggerred, ReverseStrand) testv.push_back("READ2"); std::vector test_read_length(testv.size(), 1000); - EXPECT_CALL(test_index, read_id_to_read_name) - .WillRepeatedly(testing::ReturnRef(testv)); - EXPECT_CALL(test_index, read_id_to_read_length) - .WillRepeatedly(testing::ReturnRef(test_read_length)); + for (std::size_t i = 0; i < testv.size(); ++i) + { + EXPECT_CALL(test_index, read_id_to_read_name(i)).WillRepeatedly(testing::ReturnRef(testv[i])); + EXPECT_CALL(test_index, read_id_to_read_length(i)).WillRepeatedly(testing::ReturnRef(test_read_length[i])); + } Anchor anchor1; anchor1.query_read_id_ = 1; @@ -531,12 +538,12 @@ TEST(TestCudamapperOverlapperTriggerred, ReverseStrand) anchors.push_back(anchor4); std::vector overlaps; - overlapper.get_overlaps(overlaps, anchors, test_index); + overlapper.get_overlaps(overlaps, anchors, test_index, test_index); ASSERT_EQ(overlaps.size(), 1u); ASSERT_GT(overlaps[0].target_end_position_in_read_, overlaps[0].target_start_position_in_read_); ASSERT_EQ(overlaps[0].relative_strand, RelativeStrand::Reverse); ASSERT_EQ(char(overlaps[0].relative_strand), '-'); -}*/ +} } // namespace cudamapper } // namespace claragenomics diff --git a/cudamapper/tests/mock_index.cuh b/cudamapper/tests/mock_index.cuh index 347125c96..982fddd23 100644 --- a/cudamapper/tests/mock_index.cuh +++ b/cudamapper/tests/mock_index.cuh @@ -12,19 +12,29 @@ #include "gmock/gmock.h" -#include "../src/index_gpu.cuh" +#include "../src/index_gpu_two_indices.cuh" #include "../src/minimizer.hpp" +#include "cudamapper_file_location.hpp" namespace claragenomics { namespace cudamapper { -class MockIndex : public IndexGPU +class MockIndex : public IndexGPUTwoIndices { public: - MOCK_CONST_METHOD0(read_id_to_read_name, std::vector&()); - MOCK_CONST_METHOD0(read_id_to_read_length, std::vector&()); + MockIndex() + : IndexGPUTwoIndices(*(claragenomics::io::create_fasta_parser(std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/gatt.fasta")), + 0, + 0, + 0, + 0, + true) + {} + + MOCK_METHOD(const std::string&, read_id_to_read_name, (const read_id_t read_id), (const override)); + MOCK_METHOD(const std::uint32_t&, read_id_to_read_length, (const read_id_t read_id), (const, override)); }; } // namespace cudamapper From 7963af0ff21f4f8500d07abdc4ebe7aa69aebca1 Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Thu, 21 Nov 2019 18:37:11 +0100 Subject: [PATCH 097/128] [cudamapper] Removed old main.cu implementation and ran make format --- cudamapper/src/main.cu | 108 +++----------------------------- cudamapper/tests/mock_index.cuh | 15 ++--- 2 files changed, 17 insertions(+), 106 deletions(-) diff --git a/cudamapper/src/main.cu b/cudamapper/src/main.cu index ffcdf5791..0f42c3a74 100644 --- a/cudamapper/src/main.cu +++ b/cudamapper/src/main.cu @@ -178,10 +178,10 @@ int main(int argc, char* argv[]) CGA_NVTX_RANGE(profiler, "generate_query_index"); auto start_time = std::chrono::high_resolution_clock::now(); query_index = claragenomics::cudamapper::IndexTwoIndices::create_index(*query_parser, - query_start, - query_end + 1, // <- past the last - k, - w); + query_start, + query_end + 1, // <- past the last + k, + w); index_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); std::cerr << "Query index generation time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; } @@ -204,10 +204,10 @@ int main(int argc, char* argv[]) CGA_NVTX_RANGE(profiler, "generate_target_index"); auto start_time = std::chrono::high_resolution_clock::now(); target_index = claragenomics::cudamapper::IndexTwoIndices::create_index(*target_parser, - target_start, - target_end + 1, // <- past the last - k, - w); + target_start, + target_end + 1, // <- past the last + k, + w); index_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); std::cerr << "Target index generation time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; } @@ -215,7 +215,7 @@ int main(int argc, char* argv[]) CGA_NVTX_RANGE(profiler, "generate_matcher"); auto start_time = std::chrono::high_resolution_clock::now(); matcher = claragenomics::cudamapper::MatcherTwoIndices::create_matcher(*query_index, - *target_index); + *target_index); matcher_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); std::cerr << "Matcher generation time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; } @@ -229,96 +229,6 @@ int main(int argc, char* argv[]) } } - /* for (size_t query_start = 0; query_start < queries; query_start += index_size) - { // outer loop over query - size_t query_end = std::min(query_start + index_size, static_cast(queries)); - auto start_time = std::chrono::high_resolution_clock::now(); - - //For every range of reads the process is to first generate all-vs-all overlaps - //for that chunk and then to generate its overlaps with subsequent chunks. - //For example, if a FASTA was chunked into 4 chunks: A,B,C,D the process would be as follows: - // - // Add overlaps for All-vs-all for chunk A - // Add overlaps for Chunk A vs Chunk B - // Add overlaps for Chunk A vs Chunk C - // Add overlaps for All-vs-all for chunk B - // Add overlaps for Chunk B vs Chunk C - // Add overlaps for All-vs-all for chunk C - std::pair query_range{query_start, query_end}; - - auto overlapper = claragenomics::cudamapper::OverlapperTriggered(); - - size_t target_start = 0; - // If all_to_all mode, then we can optimzie by starting the target sequences from the same index as - // query because all indices before the current query index are guaranteed to have been processed in - // a2a mapping. - if (all_to_all) - { - target_start = query_start; - } - for (; target_start < targets; target_start += target_index_size) - { //Now loop over the targets - size_t target_end = std::min(target_start + target_index_size, static_cast(targets)); - - start_time = std::chrono::high_resolution_clock::now(); - - std::vector> ranges; - std::vector parsers; - - ranges.push_back(query_range); - parsers.push_back(query_parser.get()); - - // Match point is the index up to which all reads in the query are part of the index - // We therefore set it to be the number of reads in the query (query read index end - query read index start) - //The number of reads in the whole target chunk is set to be index size. - auto match_point = (query_range.second - query_range.first); - - if (!(all_to_all && target_start == query_start && target_end == query_end)) - { - // Only add a new range if it is not the case that mode is all_to_all and ranges between target and query match. - std::pair target_range{target_start, target_end}; - ranges.push_back(target_range); - parsers.push_back(target_parser.get()); - } - else - { - // However, if mode is all_to_all and ranges match exactly, then do all to all mapping for this index. - match_point = 0; - } - - std::cerr << "Ranges: query " << query_start << "," << query_end << " | target " << target_start << "," << target_end << std::endl; - - auto new_index = claragenomics::cudamapper::Index::create_index(parsers, k, w, ranges); - - CGA_LOG_INFO("Creating index"); - std::cerr << "Index execution time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; - index_time += std::chrono::duration_cast( - std::chrono::high_resolution_clock::now() - start_time); - CGA_LOG_INFO("Created index"); - - start_time = std::chrono::high_resolution_clock::now(); - CGA_LOG_INFO("Started matcher"); - claragenomics::cudamapper::Matcher qt_matcher(*new_index, match_point); - CGA_LOG_INFO("Finished matcher"); - std::cerr << "Matcher execution time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; - matcher_time += std::chrono::duration_cast( - std::chrono::high_resolution_clock::now() - start_time); - - start_time = std::chrono::high_resolution_clock::now(); - CGA_LOG_INFO("Started overlap detector"); - add_overlaps_to_write_queue(overlapper, qt_matcher.anchors(), *new_index); - - CGA_LOG_INFO("Finished overlap detector"); - std::cerr << "Overlap detection execution time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; - overlapper_time += std::chrono::duration_cast( - std::chrono::high_resolution_clock::now() - start_time); - - //Now that mappings from query to one range of targets has been completed, - //the new target start is set to be the next read index after the last read - //from the previous chunk - } - }*/ - // Insert empty overlap vector to denote end of processing. // The lambda function for adding overlaps to queue ensures that no empty // overlaps are added to the queue so as not to confuse it with the diff --git a/cudamapper/tests/mock_index.cuh b/cudamapper/tests/mock_index.cuh index 982fddd23..5f18477e3 100644 --- a/cudamapper/tests/mock_index.cuh +++ b/cudamapper/tests/mock_index.cuh @@ -25,13 +25,14 @@ class MockIndex : public IndexGPUTwoIndices { public: MockIndex() - : IndexGPUTwoIndices(*(claragenomics::io::create_fasta_parser(std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/gatt.fasta")), - 0, - 0, - 0, - 0, - true) - {} + : IndexGPUTwoIndices(*(claragenomics::io::create_fasta_parser(std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/gatt.fasta")), + 0, + 0, + 0, + 0, + true) + { + } MOCK_METHOD(const std::string&, read_id_to_read_name, (const read_id_t read_id), (const override)); MOCK_METHOD(const std::uint32_t&, read_id_to_read_length, (const read_id_t read_id), (const, override)); From b2c86be0c3f1567f2bbc90c5267ad1c1c856e429 Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Fri, 22 Nov 2019 09:59:24 +0100 Subject: [PATCH 098/128] Removed old index and matcher --- cudamapper/CMakeLists.txt | 16 +- .../claragenomics/cudamapper/index.hpp | 117 -- cudamapper/src/index.cu | 34 - cudamapper/src/index_gpu.cu | 222 --- cudamapper/src/index_gpu.cuh | 772 ------- cudamapper/src/main.cu | 9 +- cudamapper/src/matcher.cu | 395 ---- cudamapper/src/matcher.hpp | 52 - cudamapper/src/overlapper_triggered.cu | 1 - cudamapper/src/overlapper_triggered.hpp | 1 - cudamapper/tests/CMakeLists.txt | 4 - cudamapper/tests/Test_CudamapperIndexGPU.cu | 1769 ----------------- cudamapper/tests/Test_CudamapperMatcher.cu | 431 ---- 13 files changed, 5 insertions(+), 3818 deletions(-) delete mode 100644 cudamapper/include/claragenomics/cudamapper/index.hpp delete mode 100644 cudamapper/src/index.cu delete mode 100644 cudamapper/src/index_gpu.cu delete mode 100644 cudamapper/src/index_gpu.cuh delete mode 100644 cudamapper/src/matcher.cu delete mode 100644 cudamapper/src/matcher.hpp delete mode 100644 cudamapper/tests/Test_CudamapperIndexGPU.cu delete mode 100644 cudamapper/tests/Test_CudamapperMatcher.cu diff --git a/cudamapper/CMakeLists.txt b/cudamapper/CMakeLists.txt index 242dbf60a..5b41645e8 100644 --- a/cudamapper/CMakeLists.txt +++ b/cudamapper/CMakeLists.txt @@ -28,13 +28,6 @@ target_include_directories(minimizer PUBLIC include) target_link_libraries(minimizer logging pthread utils) target_compile_options(minimizer PRIVATE -Werror) -cuda_add_library(index_gpu - src/index_gpu.cu - src/minimizer.cu) -target_include_directories(index_gpu PUBLIC include) -target_link_libraries(index_gpu logging minimizer pthread utils cgaio) -target_compile_options(index_gpu PRIVATE -Werror) - cuda_add_library(index_gpu_two_indices src/index_two_indices.cu src/index_gpu_two_indices.cu @@ -43,12 +36,6 @@ target_include_directories(index_gpu_two_indices PUBLIC include) target_link_libraries(index_gpu_two_indices logging minimizer pthread utils cgaio) target_compile_options(index_gpu_two_indices PRIVATE -Werror) -cuda_add_library(matcher - src/matcher.cu) -target_include_directories(matcher PUBLIC include) -target_link_libraries(matcher logging utils cgaio) -target_compile_options(matcher PRIVATE -Werror) - cuda_add_library(matcher_gpu src/matcher_gpu.cu) target_include_directories(matcher_gpu PUBLIC include) @@ -70,7 +57,6 @@ add_doxygen_source_dir(${CMAKE_CURRENT_SOURCE_DIR}/include) cuda_add_executable(cudamapper src/cudamapper.cpp src/main.cu - src/index.cu src/matcher_two_indices.cu src/overlapper.cpp ) @@ -83,7 +69,7 @@ target_include_directories(cudamapper $ ) -target_link_libraries(cudamapper utils index_gpu index_gpu_two_indices matcher matcher_gpu logging overlapper_triggerred cudamapper_utils) +target_link_libraries(cudamapper utils index_gpu_two_indices matcher_gpu logging overlapper_triggerred cudamapper_utils) # Add tests folder add_subdirectory(tests) diff --git a/cudamapper/include/claragenomics/cudamapper/index.hpp b/cudamapper/include/claragenomics/cudamapper/index.hpp deleted file mode 100644 index b4751818c..000000000 --- a/cudamapper/include/claragenomics/cudamapper/index.hpp +++ /dev/null @@ -1,117 +0,0 @@ -/* -* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -* -* NVIDIA CORPORATION and its licensors retain all intellectual property -* and proprietary rights in and to this software, related documentation -* and any modifications thereto. Any use, reproduction, disclosure or -* distribution of this software and related documentation without an express -* license agreement from NVIDIA CORPORATION is strictly prohibited. -*/ - -#pragma once - -#include -#include -#include -#include -#include -#include - -namespace claragenomics -{ - -namespace cudamapper -{ -/// \addtogroup cudamapper -/// \{ - -/// Index - manages mapping of (k,w)-kmer-representation and all its occurences -class Index -{ -public: - /// RepresentationToSketchElements - representation, pointer to section of data arrays with sketch elements with that representation and a given read_id, and a pointer to section of data arrays with sketch elements with that representation and all read_ids - struct RepresentationToSketchElements - { - /// representation - representation_t representation_; - /// pointer to all sketch elements for that representation in some read (no need to save which one) - ArrayBlock sketch_elements_for_representation_and_read_id_; - /// pointer to all sketch elements with that representation in all reads - ArrayBlock sketch_elements_for_representation_and_all_read_ids_; - }; - - /// \brief Virtual destructor for Index - virtual ~Index() = default; - - /// \brief returns an array of representations of sketch elements - /// \return an array of representations of sketch elements - virtual const std::vector& representations() const = 0; - - /// \brief returns an array of starting positions of sketch elements in their reads - /// \return an array of starting positions of sketch elements in their reads - virtual const std::vector& positions_in_reads() const = 0; - - /// \brief returns an array of reads ids for sketch elements - /// \return an array of reads ids for sketch elements - virtual const std::vector& read_ids() const = 0; - - /// \brief returns an array of directions in which sketch elements were read - /// \return an array of directions in which sketch elements were read - virtual const std::vector& directions_of_reads() const = 0; - - /// \brief returns number of reads in input data - /// \return number of reads in input data - virtual std::uint64_t number_of_reads() const = 0; - - /// \brief returns mapping of internal read id that goes from 0 to number_of_reads-1 to actual read name from the input - /// \return mapping of internal read id that goes from 0 to number_of_reads-1 to actual read name from the input - virtual const std::vector& read_id_to_read_name() const = 0; - - /// \brief returns mapping of internal read id that goes from 0 to read lengths for that read - /// \return mapping of internal read id that goes from 0 to read lengths for that read - virtual const std::vector& read_id_to_read_length() const = 0; - - /// \brief minimum possible representation - /// \return the smallest possible representation - virtual std::uint64_t minimum_representation() const = 0; - - /// \brief maximum possible representation - /// \return the largest possible representation - virtual std::uint64_t maximum_representation() const = 0; - - /// \brief For each read_id (outer vector) returns a vector in which each element contains a representation from that read, pointer to section of data arrays with sketch elements with that representation and that read_id, and pointer to section of data arrays with skecth elements with that representation and all read_ids. There elements are sorted by representation in increasing order - /// \return the mapping - virtual const std::vector>& read_id_and_representation_to_sketch_elements() const = 0; - - /// \brief generates a mapping of (k,w)-kmer-representation to all of its occurrences for one or more sequences - /// \param parsers Vector of parsers for each element in ranges. Size of this vector must match size of ranges. - /// \param kmer_size k - the kmer length - /// \param window_size w - the length of the sliding window used to find sketch elements - /// \param ranges - the ranges of reads in the query file to use for mapping, index by their position (e.g in the FASTA file) - /// \return instance of Index - static std::unique_ptr - create_index(const std::vector& parsers, const std::uint64_t kmer_size, const std::uint64_t window_size, const std::vector>& ranges); - - /// \brief Returns whether there are any more reads to process in the reads file (e.g FASTA file) - /// \return Returns whether there are any more reads to process in the reads file (e.g FASTA file) - virtual bool reached_end_of_input() const = 0; - - /// \brief creates an empty Index - /// \return empty instacne of Index - static std::unique_ptr create_index(); - - /// \brief Return the maximum kmer length allowable. - /// This is just the size of the representation in bits divided by two (since 2 bits are required to - /// represent a DNA nucleotide). - /// \return Return the maximum kmer length allowable - static uint64_t maximum_kmer_size() - { - return sizeof(representation_t) * 8 / 2; - } -}; - -/// \} - -} // namespace cudamapper - -} // namespace claragenomics diff --git a/cudamapper/src/index.cu b/cudamapper/src/index.cu deleted file mode 100644 index c55d7898c..000000000 --- a/cudamapper/src/index.cu +++ /dev/null @@ -1,34 +0,0 @@ -/* -* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -* -* NVIDIA CORPORATION and its licensors retain all intellectual property -* and proprietary rights in and to this software, related documentation -* and any modifications thereto. Any use, reproduction, disclosure or -* distribution of this software and related documentation without an express -* license agreement from NVIDIA CORPORATION is strictly prohibited. -*/ - -#include "claragenomics/cudamapper/index.hpp" -#include -#include "index_gpu.cuh" -#include "minimizer.hpp" - -namespace claragenomics -{ -namespace cudamapper -{ -std::unique_ptr Index::create_index(const std::vector& parsers, - const std::uint64_t kmer_size, - const std::uint64_t window_size, - const std::vector>& read_ranges) -{ - CGA_NVTX_RANGE(profiler, "create_index"); - return std::make_unique>(parsers, kmer_size, window_size, read_ranges); -} - -std::unique_ptr Index::create_index() -{ - return std::make_unique>(); -} -} // namespace cudamapper -} // namespace claragenomics diff --git a/cudamapper/src/index_gpu.cu b/cudamapper/src/index_gpu.cu deleted file mode 100644 index e57409d7c..000000000 --- a/cudamapper/src/index_gpu.cu +++ /dev/null @@ -1,222 +0,0 @@ -/* -* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -* -* NVIDIA CORPORATION and its licensors retain all intellectual property -* and proprietary rights in and to this software, related documentation -* and any modifications thereto. Any use, reproduction, disclosure or -* distribution of this software and related documentation without an express -* license agreement from NVIDIA CORPORATION is strictly prohibited. -*/ - -#include "index_gpu.cuh" - -namespace claragenomics -{ -namespace cudamapper -{ - -namespace details -{ - -namespace index_gpu -{ - -std::vector generate_representation_buckets(const std::vector>& arrays_of_representations, - const std::uint64_t approximate_sketch_elements_per_bucket) -{ - // The function samples every approximate_sketch_elements_per_bucket/number_of_arrays element of each array and sorts them by representation. - // For the following input and approximate_sketch_elements_per_bucket = 7 this means sampling every second element: - // (1 1 2 2 4 4 6 6 9 9) - // ^ ^ ^ ^ ^ - // (0 0 1 5 5 5 7 8 8 8) - // ^ ^ ^ ^ ^ - // (1 1 1 1 3 4 5 7 9 9) - // ^ ^ ^ ^ ^ - // Sorted: 0 1 1 1 1 2 3 4 5 5 6 7 8 9 9 - // - // Number of samples that fit one bucket is approximate_sketch_elements_per_bucket/sample_size = 3 - // 0) add smallest representation - // 0 1 1 1 1 2 3 4 5 5 6 7 8 9 9 - // ^ - // representation_buckets = 0 - // 1) move three samples - // 0 1 1 1 1 2 3 4 5 5 6 7 8 9 9 - // ^ -> ^ - // representation_buckets = 0, 1 - // 2) move three samples - // 0 1 1 1 1 2 3 4 5 5 6 7 8 9 9 - // ^ -> ^ - // representation_buckets = 0, 1, 3 - // 3) move three samples - // 0 1 1 1 1 2 3 4 5 5 6 7 8 9 9 - // ^ -> ^ - // representation_buckets = 0, 1, 3, 5 - // 4) move three samples - // 0 1 1 1 1 2 3 4 5 5 6 7 8 9 9 - // ^ -> ^ - // representation_buckets = 0, 1, 3, 5, 8 - // 4) move three samples - // 0 1 1 1 1 2 3 4 5 5 6 7 8 9 9 - // ^ -> ^ -> end - // representation_buckets = 0, 1, 3, 5, 8 - // - // Obtained buckets are: - // 0: 0 0 - // 1: 1 1 1 1 1 1 1 1 2 2 - // 3: 3 4 4 4 - // 5: 5 5 5 5 6 6 7 7 - // 8: 8 8 8 9 9 9 9 - // - // If something like this would happen - // 0 1 1 1 1 1 1 1 2 3 4 5 5 6 7 8 9 9 - // ^ -> ^ - // 0 1 1 1 1 1 1 1 2 3 4 5 5 6 7 8 9 9 - // ^ -> ^ - // 0 1 1 1 1 1 1 1 2 3 4 5 5 6 7 8 9 9 - // ^ -> ^ - // i.e. the same representation is encountered more than once those additional encounters should be skipped - - std::vector sampled_representations; - - const std::uint64_t sample_length = approximate_sketch_elements_per_bucket / arrays_of_representations.size(); - - if (sample_length == 0) - { - throw approximate_sketch_elements_per_bucket_too_small("approximate_sketch_elements_per_bucket is " + std::to_string(approximate_sketch_elements_per_bucket) + - " but should be at least " + std::to_string(arrays_of_representations.size())); - } - - // sample every sample_length representation - for (std::size_t array_index = 0; array_index < arrays_of_representations.size(); ++array_index) - { - for (std::size_t sample_index = 0; sample_index < arrays_of_representations[array_index].size(); sample_index += sample_length) - { - sampled_representations.push_back(arrays_of_representations[array_index][sample_index]); - } - } - - // The number of samples whose sketch elements fit one bucket on the gpu when grouped together - const std::uint64_t samples_in_one_bucket = approximate_sketch_elements_per_bucket / sample_length; - std::vector representation_buckets; - - std::sort(std::begin(sampled_representations), std::end(sampled_representations)); - - // Merge every samples_in_one_bucket samples into one bucket, skipping samples that have the same representation as the previosuly added sample - // in order to avoid having representations split across multiple buckets - representation_buckets.push_back(sampled_representations[0]); - for (std::size_t sample_index = samples_in_one_bucket; sample_index < sampled_representations.size(); sample_index += samples_in_one_bucket) - { - if (sampled_representations[sample_index] != representation_buckets.back()) - { - representation_buckets.push_back(sampled_representations[sample_index]); - } - else - { - CGA_LOG_INFO("Representation {} does not fit one bucket", sampled_representations[sample_index]); - } - } - - return representation_buckets; -} - -std::vector generate_representation_indices(const std::vector>& arrays_of_representations, - const representation_t representation) -{ - std::vector representation_indices; - - for (const auto& one_array_of_representations : arrays_of_representations) - { - auto representation_iterator = std::lower_bound(std::begin(one_array_of_representations), - std::end(one_array_of_representations), - representation); - representation_indices.push_back(representation_iterator - std::cbegin(one_array_of_representations)); - } - - return representation_indices; -} - -std::vector>> generate_bucket_boundary_indices(const std::vector>& arrays_of_representations, - const std::vector& representation_buckets) -{ - const std::size_t number_of_arrays = arrays_of_representations.size(); - const std::size_t number_of_buckets = representation_buckets.size(); - - std::vector>> bucket_boundary_indices(number_of_buckets); - - // all buckets start from 0 - std::vector first_index_per_array(number_of_arrays, 0); - // treat last bucket separately as its last representation is not saved in representation_buckets - for (std::size_t bucket_index = 0; bucket_index < number_of_buckets - 1; ++bucket_index) - { - std::vector last_index_per_array = generate_representation_indices(arrays_of_representations, representation_buckets[bucket_index + 1]); - for (std::size_t array_index = 0; array_index < number_of_arrays; ++array_index) - { - bucket_boundary_indices[bucket_index].emplace_back(first_index_per_array[array_index], - last_index_per_array[array_index]); - } - first_index_per_array = std::move(last_index_per_array); - } - // now deal with the last bucket (last bucket always goes up to the last element in the array) - for (std::size_t array_index = 0; array_index < number_of_arrays; ++array_index) - { - bucket_boundary_indices.back().emplace_back(first_index_per_array[array_index], - arrays_of_representations[array_index].size()); - } - - return bucket_boundary_indices; -} - -std::vector> generate_sections_for_multithreaded_index_building(const std::vector& input_representations) -{ - // TODO: When too many threads are used performance gets worse. Hardcoding the number of threads for now - //auto number_of_threads = std::thread::hardware_concurrency(); - std::uint32_t number_of_threads = 4; - - if (0 == number_of_threads) - { - CGA_LOG_INFO("Could not get the number of supported threads, building index with one thread"); - number_of_threads = 1; - } - - // split the input into chunks, but make sure that no representation is spread over several chunks - auto approx_sketch_elements_per_thread = input_representations.size() / number_of_threads; - if (0 == approx_sketch_elements_per_thread) - approx_sketch_elements_per_thread = 1; - - std::vector> sections_for_threads; - for (std::size_t thread_id = 0; thread_id < number_of_threads; ++thread_id) - { - std::size_t first_index = 0; - if (thread_id != 0) - first_index = sections_for_threads.back().second; - if (thread_id == number_of_threads - 1) - { // last thread includes all remaining elements - if (first_index < input_representations.size()) - { // there is at least one element left - sections_for_threads.push_back(std::pair(first_index, input_representations.size())); - } - } - else - { - std::size_t approx_past_the_last_index = first_index + approx_sketch_elements_per_thread; - approx_past_the_last_index = std::min(approx_past_the_last_index, input_representations.size()); - representation_t last_representation_in_section = input_representations[approx_past_the_last_index - 1]; - auto past_the_last_iterator_in_section = std::upper_bound(std::begin(input_representations), - std::end(input_representations), - last_representation_in_section); - - std::size_t actuall_past_the_last_index = past_the_last_iterator_in_section - std::begin(input_representations); - if (actuall_past_the_last_index > first_index) - sections_for_threads.push_back(std::pair(first_index, actuall_past_the_last_index)); - } - } - - return sections_for_threads; -} - -} // namespace index_gpu - -} // namespace details - -} // namespace cudamapper -} // namespace claragenomics diff --git a/cudamapper/src/index_gpu.cuh b/cudamapper/src/index_gpu.cuh deleted file mode 100644 index 48ed83b5a..000000000 --- a/cudamapper/src/index_gpu.cuh +++ /dev/null @@ -1,772 +0,0 @@ -/* -* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -* -* NVIDIA CORPORATION and its licensors retain all intellectual property -* and proprietary rights in and to this software, related documentation -* and any modifications thereto. Any use, reproduction, disclosure or -* distribution of this software and related documentation without an express -* license agreement from NVIDIA CORPORATION is strictly prohibited. -*/ - -#pragma once - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include - -#include "claragenomics/cudamapper/index.hpp" -#include "claragenomics/cudamapper/types.hpp" - -#include "cudamapper_utils.hpp" - -namespace claragenomics -{ -namespace cudamapper -{ - -/// IndexGPU - Contains sketch elements grouped by representation and by read id within the representation -/// -/// Class contains three separate data arrays: read_ids, positions_in_reads and directions_of_reads. -/// Elements of these three arrays with the same index represent one sketch element -/// (read_id of the read it belongs to, position in that read of the first basepair of sketch element and whether it is forward or reverse complement representation). -/// Representation itself is not saved as it is not necessary for matching phase. It can be retrieved from the original data if needed. -/// -/// Elements of data arrays are grouped by sketch element representation and within those groups by read_id. Both representations and read_ids within representations are sorted in ascending order -/// -/// read_id_and_representation_to_sketch_elements() for each read_id (outer vector) returns a vector in which each element contains a representation from that read, pointer to section of data arrays with sketch elements with that representation and that read_id, and pointer to section of data arrays with skecth elements with that representation and all read_ids. There elements are sorted by representation in increasing order -/// -/// \tparam SketchElementImpl any implementation of SketchElement -template -class IndexGPU : public Index -{ -public: - /// \brief Constructor - /// - /// \param query_filename filepath to reads in FASTA or FASTQ format - /// \param kmer_size k - the kmer length - /// \param window_size w - the length of the sliding window used to find sketch elements - /// \param read_ranges - the ranges of reads in the query file to use for mapping, index by their position (e.g in the FASA file) - /// \param hash_representations - if true, apply hash function to all representations - IndexGPU(const std::vector& parsers, const std::uint64_t kmer_size, const std::uint64_t window_size, const std::vector>& read_ranges, const bool hash_representations = true); - - /// \brief Constructor - IndexGPU(); - - /// \brief returns an array of representations of sketch elements - /// \return an array of representations of sketch elements - const std::vector& representations() const override; - - /// \brief returns an array of starting positions of sketch elements in their reads - /// \return an array of starting positions of sketch elements in their reads - const std::vector& positions_in_reads() const override; - - /// \brief returns an array of reads ids for sketch elements - /// \return an array of reads ids for sketch elements - const std::vector& read_ids() const override; - - /// \brief returns an array of directions in which sketch elements were read - /// \return an array of directions in which sketch elements were read - const std::vector& directions_of_reads() const override; - - /// \brief returns number of reads in input data - /// \return number of reads in input data - std::uint64_t number_of_reads() const override; - - /// \brief Returns whether there are any more reads to process in the reads file (e.g FASTA file) or if the given rang has exceeded the number of reads in the file - /// \return Returns whether there are any more reads to process in the reads file (e.g FASTA file) or if the given rang has exceeded the number of reads in the file - bool reached_end_of_input() const override; - - /// \brief returns mapping of internal read id that goes from 0 to number_of_reads-1 to actual read name from the input - /// \return mapping of internal read id that goes from 0 to number_of_reads-1 to actual read name from the input - const std::vector& read_id_to_read_name() const override; - - /// \brief returns mapping of internal read id that goes from 0 to read lengths for that read - /// \return mapping of internal read id that goes from 0 to read lengths for that read - const std::vector& read_id_to_read_length() const override; - - /// \brief For each read_id (outer vector) returns a vector in which each element contains a representation from that read, pointer to section of data arrays with sketch elements with that representation and that read_id, and pointer to section of data arrays with skecth elements with that representation and all read_ids. There elements are sorted by representation in increasing order - /// \return the mapping - const std::vector>& read_id_and_representation_to_sketch_elements() const override; - - /// \brief min_representation - /// \return the smallest possible representation - std::uint64_t minimum_representation() const override { return 0; }; - - /// \brief max_representation - /// \return the largest possible representation, - std::uint64_t maximum_representation() const override - { - if (hash_representations) - { - return (uint64_t(1) << 32) - 1; - } - else - { - return (1 << (kmer_size_ * 2)) - 1; - } - }; - -private: - /// \brief generates the index - /// \param query_filename - void generate_index(const std::vector& parsers, const std::vector>& read_ranges); - - const std::uint64_t kmer_size_; - const std::uint64_t window_size_; - std::uint64_t number_of_reads_; - const bool hash_representations; - bool reached_end_of_input_; - - std::vector representations_; - std::vector positions_in_reads_; - std::vector read_ids_; - std::vector directions_of_reads_; - - std::vector read_id_to_read_name_; - std::vector read_id_to_read_length_; - - std::vector> read_id_and_representation_to_sketch_elements_; -}; - -namespace details -{ - -namespace index_gpu -{ - -/// approximate_sketch_elements_per_bucket_too_short - exception thrown when the number of sketch_elements_per_bucket is too small -class approximate_sketch_elements_per_bucket_too_small : public std::exception -{ -public: - approximate_sketch_elements_per_bucket_too_small(const std::string& message) - : message_(message) - { - } - approximate_sketch_elements_per_bucket_too_small(approximate_sketch_elements_per_bucket_too_small const&) = default; - approximate_sketch_elements_per_bucket_too_small& operator=(approximate_sketch_elements_per_bucket_too_small const&) = default; - virtual ~approximate_sketch_elements_per_bucket_too_small() = default; - - virtual const char* what() const noexcept override - { - return message_.data(); - } - -private: - const std::string message_; -}; - -/// @brief Takes multiple arrays of sketch elements and determines an array of representations such that the number of elements between each two representations is similar to the given value -/// -/// Function takes multiple arrays of sketch elements. Elements of each array are sorted by representation -/// The function generates an array of representations such that if all input arrays were sorted together the number of sketch elements -/// between neighboring elements would not be similar to approximate_sketch_elements_per_bucket. -/// The number of element in a bucket is guaranteed to be <= approximate_sketch_elements_per_bucket, unless members_with_some_representation >= approximate_sketch_elements_per_bucket, -/// in which case the number of elements in its bucket is guaranteed to be <= members_with_that_representation + approximate_sketch_elements_per_bucket (this is not expect with genomes as -/// approximate_sketch_elements_per_bucket should be the number of elements that can fit one GPU). -/// All elements with the same representation are guaranteed to be in the same bucket -/// -/// Take the following three arrays and approximate_sketch_elements_per_bucket = 5: -/// (0 1 2 3 3 5 6 7 7 9) <- index -/// (1 1 2 2 4 4 6 6 9 9) -/// (0 0 1 5 5 5 7 8 8 8) -/// (1 1 1 3 3 4 5 7 9 9) -/// -/// When all three arrays are merged and sorte this give: -/// (0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29) -/// (0 0 1 1 1 1 1 1 2 2 3 3 3 4 4 5 5 5 5 6 6 7 7 7 8 8 9 9 9 9) -/// ^ ^ ^ ^ ^ ^ ^ ^ -/// -/// Representation in the output array and the respective chunks would be: -/// 0: 0 0 -/// 1: 1 1 1 1 1 1 <- larger the approximate_sketch_elements_per_bucket, so only one representation in this chunk -/// 2: 2 2 3 3 3 -/// 4: 4 4 -/// 5: 5 5 5 5 -/// 6: 6 6 7 7 7 -/// 8: 8 8 -/// 9: 9 9 9 9 -/// -/// Note that the line 1 could also be "1 1 1 1 1 1 2 2" or "1 1 1 1 1 1 2 2 3 3 3", but not "1 1 1 1 1 1 2 2 3 3 3 4 4" -/// -/// \param arrays_of_representations multiple arrays of sketch element representations in which elements are sorted by representation -/// \param approximate_sketch_elements_per_bucket approximate number of sketch elements between two representations -/// \return list of representations that limit the buckets (left boundary inclusive, right exclusive) -/// \throw approximate_sketch_elements_per_bucket_too_small if approximate_sketch_elements_per_bucket is too small -std::vector generate_representation_buckets(const std::vector>& arrays_of_representations, - const std::uint64_t approximate_sketch_elements_per_bucket); - -/// \brief Gets the index of first occurrence of the given representation in each array -/// -/// \param arrays_of_representations multiple arrays of sketch element representations in which elements are sorted by representation -/// \param representation representation to look for -/// \return for each array in arrays_of_representations contains the index for the first element greater or equal to representation, or the index of past-the-last element if all elements have a smaller representation -std::vector generate_representation_indices(const std::vector>& arrays_of_representations, - const representation_t representation); - -/// \brief For each bucket generates first and past-the-last index that fall into that bucket for each array of representations -/// -/// \param arrays_of_representations multiple arrays of sketch element representations in which elements are sorted by representation -/// \param generate_representation_buckets first reprentation in each bucket -/// \return outer vector goes over all buckets, inner gives first and past-the-last index of that bucket in every array of representations, if first and past-the-last index are the same that means that there are no elements of that bucket in that array -std::vector>> generate_bucket_boundary_indices(const std::vector>& arrays_of_representations, - const std::vector& representation_buckets); - -/// \brief Takes multiple arrays of sketch elements and merges them together so that the output array is sorted -/// -/// Function takes multiple arrays of sketch elements (each of those arrays is actually split into two arrays, one containing representations -/// and the other read ids, positions in reads and directions, but for the sake of simplicity they are treated as one array in comments). -/// Sketch elements in each input array are sorted by representation. -/// On the output all arrays are merged in one big array and sorted by representations. -/// Within each representation group the order of the elements from arrays_of_readids_positions_directions remains the same and they are ordered -/// by the index of their array in arrays_of_readids_positions_directions, i.e. first come elements from arrays_of_readids_positions_directions[0], -/// than arrays_of_readids_positions_directions[1]... -/// -/// \param arrays_of_representations multiple arrays of sketch element representations in which elements are sorted by representation -/// \param arrays_of_readids_positions_directions multiple arrays of sketch elements (excluding representation) in which elements are sorted by representation -/// \param available_device_memory_bytes how much GPU memory is available for this merge -/// \param merged_representations on output contains all sketch element representations from all arrays_of_representations_h subarrays, sorted by representation and further sorted by read_id within each representation group -/// \param merged_readids_positions_directions contains all sketch elements (excluding representation) from all arrays_of_readid_position_direction_h subarrays, sorted by representation and further sorted by read_id within each representation group -/// -/// \tparam ReadidPositionDirection any implementation of SketchElement::ReadidPositionDirection -template -void merge_sketch_element_arrays(const std::vector>& arrays_of_representations, - const std::vector>& arrays_of_readids_positions_directions, - const std::uint64_t available_device_memory_bytes, - std::vector& merged_representations, - std::vector& merged_readids_positions_directions) -{ - // Each array in arrays_of_representations (and arrays_of_readids_positions_directions) is sorted by representation. - // The goal is to have the data from all arrays merged together. If data from all arrays fits the device memory this can be done by copying from all arrays to one - // device array, sorting it on device and copying it back to host. - // If the data is too large merging has to be done in chunks. As the data in arrays is already sorted it is possible to take the data for all representations - // between rep_x and rep_y from all arrays, put it on device, sort and move the sorted data back to host. - // If these chunks are chosen as ((rep_0, rep_x), (rep_x + 1, rep_y), (rep_y + 1, rep_z) ...) the final result will be completely sorted. - // generate_bucket_boundary_indices generates buckets/chunks of representations so that they fit the device memory - - std::uint64_t size_of_one_element = sizeof(representation_t) + sizeof(ReadidPositionDirection); - // how many elements can be sorted at once (thrust::stable_sort_by_key is done out-of-place, hence 2.1) - std::uint64_t elements_per_merge = ((available_device_memory_bytes / 21) * 10) / size_of_one_element; - - CGA_LOG_DEBUG("Performing GPU merge with {} bytes of device memory, i.e. {} elements_per_merge", - available_device_memory_bytes, - elements_per_merge); - - // generate buckets - std::vector>> bucket_boundary_indices = generate_bucket_boundary_indices(arrays_of_representations, - generate_representation_buckets(arrays_of_representations, - elements_per_merge)); - - const std::size_t number_of_buckets = bucket_boundary_indices.size(); - const std::size_t number_of_arrays = arrays_of_representations.size(); - - // find longest output bucket - std::size_t longest_merged_bucket_length = 0; - for (const auto& input_buckets_for_one_output_bucket : bucket_boundary_indices) - { - std::size_t length = 0; - for (const auto& one_input_bucket : input_buckets_for_one_output_bucket) - { - length += one_input_bucket.second - one_input_bucket.first; - } - longest_merged_bucket_length = std::max(longest_merged_bucket_length, length); - } - - // allocate the array that will be used for merging - CGA_LOG_INFO("Allocating {} bytes for representations_bucket_to_merge_d", longest_merged_bucket_length * sizeof(representation_t)); - device_buffer representations_bucket_to_merge_d(longest_merged_bucket_length); - CGA_LOG_INFO("Allocating {} bytes for readids_positions_directions_bucket_to_merge_d", longest_merged_bucket_length * sizeof(ReadidPositionDirection)); - device_buffer readids_positions_directions_bucket_to_merge_d(longest_merged_bucket_length); - - // find total number of sketch elements in all subarrays - std::size_t total_sketch_elements = 0; - total_sketch_elements = std::accumulate(std::begin(arrays_of_representations), - std::end(arrays_of_representations), - 0, - [](auto counter, const auto& one_array) { return counter += one_array.size(); }); - - // allocate enough space for merged sketch elements - merged_representations.resize(total_sketch_elements); - merged_readids_positions_directions.resize(total_sketch_elements); - - // go bucket by bucket - std::size_t output_elements_written = 0; - for (std::size_t bucket_index = 0; bucket_index < number_of_buckets; ++bucket_index) - { - // copy data from all arrays which belongs to that bucket - std::size_t elements_written = 0; - for (std::size_t array_index = 0; array_index < number_of_arrays; ++array_index) - { - std::size_t elements_to_copy = bucket_boundary_indices[bucket_index][array_index].second - bucket_boundary_indices[bucket_index][array_index].first; - if (elements_to_copy > 0) - { - // to reduce the number of cudaMemcpys one could do all copies to a host buffer and than copy all data to device at once, but that would take more space - CGA_CU_CHECK_ERR(cudaMemcpy(representations_bucket_to_merge_d.data() + elements_written, - arrays_of_representations[array_index].data() + bucket_boundary_indices[bucket_index][array_index].first, - elements_to_copy * sizeof(representation_t), - cudaMemcpyHostToDevice)); - CGA_CU_CHECK_ERR(cudaMemcpy(readids_positions_directions_bucket_to_merge_d.data() + elements_written, - arrays_of_readids_positions_directions[array_index].data() + bucket_boundary_indices[bucket_index][array_index].first, - elements_to_copy * sizeof(ReadidPositionDirection), - cudaMemcpyHostToDevice)); - elements_written += elements_to_copy; - } - } - // sort bucket - thrust::stable_sort_by_key(thrust::device, - representations_bucket_to_merge_d.data(), - representations_bucket_to_merge_d.data() + elements_written, - readids_positions_directions_bucket_to_merge_d.data()); - // copy sorted bucket to host output array - CGA_CU_CHECK_ERR(cudaMemcpy(merged_representations.data() + output_elements_written, - representations_bucket_to_merge_d.data(), - elements_written * sizeof(representation_t), - cudaMemcpyDeviceToHost)); - CGA_CU_CHECK_ERR(cudaMemcpy(merged_readids_positions_directions.data() + output_elements_written, - readids_positions_directions_bucket_to_merge_d.data(), - elements_written * sizeof(ReadidPositionDirection), - cudaMemcpyDeviceToHost)); - output_elements_written += elements_written; - } - - CGA_LOG_INFO("Deallocating {} bytes from representations_bucket_to_merge_d", longest_merged_bucket_length * sizeof(representation_t)); - representations_bucket_to_merge_d.free(); - CGA_LOG_INFO("Deallocating {} bytes from readids_positions_directions_bucket_to_merge_d", longest_merged_bucket_length * sizeof(ReadidPositionDirection)); - readids_positions_directions_bucket_to_merge_d.free(); -} - -/// \brief Splits input_representations into sections of equally the same size where no representation is split across multiple sections -/// -/// \param input_representations representations of all sketch elements, soreted by representation -/// \return for eaction it returns a pair of the index of the first element and past-the-last element -std::vector> generate_sections_for_multithreaded_index_building(const std::vector& input_representations); - -/// \brief Constructs the index and splits parts of sketch elements into separate arays -/// -/// Builds the index (read_id_and_representation_to_sketch_elements) based on input_representations and read_ids from input_readids_positions_directions. -/// Index has one subarray for each read_id. Each element of subarrays corresponds to its read_id and some representation. -/// Element points to parts of other output arrays with sketch elements with that read_id and representation, as well as representation and all read_ids -/// In adition splits data from input_readids_positions_directions into positions_in_reads, read_ids and directions_of_reads. -/// -/// \param number_of_reads number of reads in input data -/// \param input_representations representations of all sketch elements, soreted by representation -/// \param input_readids_positions_directions read_ids, positions in reads and directions of sketch element, each element corresponds the element of input_representations with the same index, elements are sorted by read_id within same representation -/// \param positions_in_reads positions in reads extracted from positions_in_reads -/// \param read_ids read_ids extracted from positions_in_reads -/// \param directions_of_reads directions of reads extracted from positions_in_reads -/// \param read_id_and_representation_to_sketch_elements index, as explained above -/// -/// \tparam ReadidPositionDirection any implementation of SketchElement::ReadidPositionDirection -/// \tparam DirectionOfRepresentation any implementation of SketchElement::DirectionOfRepresentation -template -void build_index(const std::uint64_t number_of_reads, - const std::vector& input_representations, - const std::vector& input_readids_positions_directions, - std::vector& positions_in_reads, - std::vector& read_ids, - std::vector& directions_of_reads, - std::vector>& read_id_and_representation_to_sketch_elements) -{ - std::vector> sections_for_threads = generate_sections_for_multithreaded_index_building(input_representations); - - std::vector>> read_id_and_representation_to_sketch_elements_per_section(sections_for_threads.size()); - positions_in_reads.resize(input_readids_positions_directions.size()); - read_ids.resize(input_readids_positions_directions.size()); - directions_of_reads.resize(input_readids_positions_directions.size()); - - auto build_index_lambda = [number_of_reads, - &read_id_and_representation_to_sketch_elements_per_section, - &positions_in_reads, - &read_ids, - &directions_of_reads, - &input_representations, - &input_readids_positions_directions, - §ions_for_threads](std::uint32_t section_id) { - read_id_and_representation_to_sketch_elements_per_section[section_id].resize(number_of_reads); - - std::size_t first_element_in_this_section = sections_for_threads[section_id].first; - std::size_t last_element_in_this_section = sections_for_threads[section_id].second; - - representation_t current_representation = input_representations[first_element_in_this_section]; - read_id_t current_read_id = input_readids_positions_directions[first_element_in_this_section].read_id_; - decltype(ArrayBlock::block_size_) sketch_elements_with_curr_representation_and_read_id = 1; - decltype(ArrayBlock::block_size_) sketch_elements_with_curr_representation = 0; - decltype(ArrayBlock::first_element_) first_sketch_element_with_this_representation = first_element_in_this_section; - std::vector read_ids_with_current_representation; - read_ids_with_current_representation.push_back(current_read_id); - read_id_and_representation_to_sketch_elements_per_section[section_id][current_read_id].push_back({current_representation, - {first_element_in_this_section, 0}, - {first_sketch_element_with_this_representation, 0}}); - - positions_in_reads[first_element_in_this_section] = input_readids_positions_directions[first_element_in_this_section].position_in_read_; - read_ids[first_element_in_this_section] = input_readids_positions_directions[first_element_in_this_section].read_id_; - directions_of_reads[first_element_in_this_section] = DirectionOfRepresentation(input_readids_positions_directions[first_element_in_this_section].direction_); - - for (std::size_t sketch_element_index = first_element_in_this_section + 1; sketch_element_index < last_element_in_this_section; ++sketch_element_index) - { - // TODO: edit the interface so this copy is not needed - positions_in_reads[sketch_element_index] = input_readids_positions_directions[sketch_element_index].position_in_read_; - read_ids[sketch_element_index] = input_readids_positions_directions[sketch_element_index].read_id_; - directions_of_reads[sketch_element_index] = DirectionOfRepresentation(input_readids_positions_directions[sketch_element_index].direction_); - - if (input_representations[sketch_element_index] != current_representation) - { // new representation -> save data for the previous one - // increase the number of sketch elements with previous representation - sketch_elements_with_curr_representation += sketch_elements_with_curr_representation_and_read_id; - // save the number sketch elements with the previous representation and read_id - read_id_and_representation_to_sketch_elements_per_section[section_id][current_read_id].back().sketch_elements_for_representation_and_read_id_.block_size_ = sketch_elements_with_curr_representation_and_read_id; - // update the number of sketch elements with previous representation and all read_ids - for (const read_id_t read_id_to_update : read_ids_with_current_representation) - { - read_id_and_representation_to_sketch_elements_per_section[section_id][read_id_to_update].back().sketch_elements_for_representation_and_all_read_ids_.block_size_ = sketch_elements_with_curr_representation; - } - // start processing new representation - current_representation = input_representations[sketch_element_index]; - current_read_id = input_readids_positions_directions[sketch_element_index].read_id_; - sketch_elements_with_curr_representation_and_read_id = 1; - sketch_elements_with_curr_representation = 0; - first_sketch_element_with_this_representation = sketch_element_index; - read_ids_with_current_representation.clear(); - read_ids_with_current_representation.push_back(current_read_id); - read_id_and_representation_to_sketch_elements_per_section[section_id][current_read_id].push_back({current_representation, - {sketch_element_index, 0}, - {first_sketch_element_with_this_representation, 0}}); - } - else - { // still the same representation - if (input_readids_positions_directions[sketch_element_index].read_id_ != current_read_id) - { // new read_id -> save the data for the previous one - // increase the number of sketch elements with this representation - sketch_elements_with_curr_representation += sketch_elements_with_curr_representation_and_read_id; - // save the number of sketch elements for the previous read_id - read_id_and_representation_to_sketch_elements_per_section[section_id][current_read_id].back().sketch_elements_for_representation_and_read_id_.block_size_ = sketch_elements_with_curr_representation_and_read_id; - // start processing new read_id - current_read_id = input_readids_positions_directions[sketch_element_index].read_id_; - sketch_elements_with_curr_representation_and_read_id = 1; - read_ids_with_current_representation.push_back(current_read_id); - read_id_and_representation_to_sketch_elements_per_section[section_id][current_read_id].push_back({current_representation, - {sketch_element_index, 0}, - {first_sketch_element_with_this_representation, 0}}); - } - else - { // still the same read_id - ++sketch_elements_with_curr_representation_and_read_id; - } - } - } - // process the last element - // increase the number of sketch elements with last representation - sketch_elements_with_curr_representation += sketch_elements_with_curr_representation_and_read_id; - // save the number sketch elements with last representation and read_id - read_id_and_representation_to_sketch_elements_per_section[section_id][current_read_id].back().sketch_elements_for_representation_and_read_id_.block_size_ = sketch_elements_with_curr_representation_and_read_id; - // update the number of sketch elements with last representation and all read_ids - for (const read_id_t read_id_to_update : read_ids_with_current_representation) - { - read_id_and_representation_to_sketch_elements_per_section[section_id][read_id_to_update].back().sketch_elements_for_representation_and_all_read_ids_.block_size_ = sketch_elements_with_curr_representation; - } - }; - - // build index for each section in a separate thread - std::vector index_building_threads; - for (std::size_t section_num = 0; section_num < sections_for_threads.size(); ++section_num) - { - index_building_threads.emplace_back(build_index_lambda, - section_num); - } - - for (std::size_t section_num = 0; section_num < sections_for_threads.size(); ++section_num) - { - index_building_threads[section_num].join(); - } - - // now merge the resulting index - // data gets merged in the same order it was split to threads, so the resulting arrays are still sorted by representation - // this merge could probably also be done in parallel - read_id_and_representation_to_sketch_elements.resize(number_of_reads); - for (std::size_t read_id = 0; read_id < number_of_reads; ++read_id) - { - for (std::size_t section_num = 0; section_num < read_id_and_representation_to_sketch_elements_per_section.size(); ++section_num) - { - read_id_and_representation_to_sketch_elements[read_id].insert(std::end(read_id_and_representation_to_sketch_elements[read_id]), - std::begin(read_id_and_representation_to_sketch_elements_per_section[section_num][read_id]), - std::end(read_id_and_representation_to_sketch_elements_per_section[section_num][read_id])); - } - } -} - -} // namespace index_gpu - -} // namespace details - -template -IndexGPU::IndexGPU(const std::vector& parsers, const std::uint64_t kmer_size, const std::uint64_t window_size, const std::vector>& read_ranges, const bool hash_representations) - : kmer_size_(kmer_size) - , window_size_(window_size) - , number_of_reads_(0) - , reached_end_of_input_(false) - , hash_representations(hash_representations) -{ - generate_index(parsers, read_ranges); -} - -template -IndexGPU::IndexGPU() - : kmer_size_(0) - , window_size_(0) - , number_of_reads_(0) - , hash_representations(true) -{ -} - -template -const std::vector& IndexGPU::representations() const -{ - return representations_; -}; - -template -const std::vector& IndexGPU::positions_in_reads() const -{ - return positions_in_reads_; -} - -template -const std::vector& IndexGPU::read_ids() const -{ - return read_ids_; -} - -template -const std::vector& IndexGPU::directions_of_reads() const -{ - return directions_of_reads_; -} - -template -std::uint64_t IndexGPU::number_of_reads() const -{ - return number_of_reads_; -} - -template -bool IndexGPU::reached_end_of_input() const -{ - return reached_end_of_input_; -} - -template -const std::vector& IndexGPU::read_id_to_read_name() const -{ - return read_id_to_read_name_; -} - -template -const std::vector& IndexGPU::read_id_to_read_length() const -{ - return read_id_to_read_length_; -} - -template -const std::vector>& IndexGPU::read_id_and_representation_to_sketch_elements() const -{ - return read_id_and_representation_to_sketch_elements_; -} - -// TODO: This function will be split into several functions -template -void IndexGPU::generate_index(const std::vector& parsers, const std::vector>& read_ranges) -{ - - if (parsers.size() != read_ranges.size()) - { - throw std::runtime_error("Number of parsers must match number of read ranges in index generation."); - } - - number_of_reads_ = 0; - - std::vector> representations_from_all_loops_h; - std::vector> rest_from_all_loops_h; - - std::uint64_t total_basepairs = 0; - std::vector read_id_to_basepairs_section_h; - std::vector fasta_sequences; - std::vector fasta_sequence_indices; - - read_id_t global_read_id = 0; - // find out how many basepairs each read has and determine its section in the big array with all basepairs - int32_t count = 0; - { - CGA_NVTX_RANGE(profile_reads, "reading fasta"); - for (auto range : read_ranges) - { - io::FastaParser* parser = parsers[count]; - auto first_read_ = range.first; - auto last_read_ = std::min(range.second, static_cast(parser->get_num_seqences())); - - for (auto read_id = first_read_; read_id < last_read_; read_id++) - { - fasta_sequences.emplace_back(parser->get_sequence_by_id(read_id)); - const std::string& seq = fasta_sequences.back().seq; - const std::string& name = fasta_sequences.back().name; - if (seq.length() >= window_size_ + kmer_size_ - 1) - { - read_id_to_basepairs_section_h.emplace_back(ArrayBlock{total_basepairs, static_cast(seq.length())}); - total_basepairs += seq.length(); - read_id_to_read_name_.push_back(name); - read_id_to_read_length_.push_back(seq.length()); - fasta_sequence_indices.push_back(global_read_id); - } - else - { - CGA_LOG_INFO("Skipping read {}. It has {} basepairs, one window covers {} basepairs", - name, - seq.length(), window_size_ + kmer_size_ - 1); - } - global_read_id++; - } - - count++; - } - } - - auto number_of_reads_to_add = read_id_to_basepairs_section_h.size(); // This is the number of reads in this specific iteration - number_of_reads_ += number_of_reads_to_add; // this is the *total* number of reads. - - // check if there are any reads to process - if (0 == number_of_reads_) - { - CGA_LOG_INFO("No Sketch Elements to be added to index"); - return; - } - - std::vector merged_basepairs_h(total_basepairs); - - // copy each read to its section of the basepairs array - read_id_t read_id = 0; - for (auto fasta_object_id : fasta_sequence_indices) - { //TODO do not start from zero - // skip reads which are shorter than one window - const std::string& seq = fasta_sequences[fasta_object_id].seq; - if (seq.length() >= window_size_ + kmer_size_ - 1) - { - std::copy(std::begin(seq), - std::end(seq), - std::next(std::begin(merged_basepairs_h), read_id_to_basepairs_section_h[read_id].first_element_)); - ++read_id; - } - } - - // fasta_sequences not needed after this point - fasta_sequences.clear(); - fasta_sequences.shrink_to_fit(); - - // move basepairs to the device - CGA_LOG_INFO("Allocating {} bytes for read_id_to_basepairs_section_d", read_id_to_basepairs_section_h.size() * sizeof(decltype(read_id_to_basepairs_section_h)::value_type)); - device_buffer read_id_to_basepairs_section_d(read_id_to_basepairs_section_h.size()); - //device_buffer read_id_to_basepairs_section_d(1); - CGA_LOG_INFO("Allocated"); - CGA_CU_CHECK_ERR(cudaMemcpy(read_id_to_basepairs_section_d.data(), - read_id_to_basepairs_section_h.data(), - read_id_to_basepairs_section_h.size() * sizeof(decltype(read_id_to_basepairs_section_h)::value_type), - cudaMemcpyHostToDevice)); - - CGA_LOG_INFO("Allocating {} bytes for merged_basepairs_d", merged_basepairs_h.size() * sizeof(decltype(merged_basepairs_h)::value_type)); - device_buffer merged_basepairs_d(merged_basepairs_h.size()); - CGA_CU_CHECK_ERR(cudaMemcpy(merged_basepairs_d.data(), - merged_basepairs_h.data(), - merged_basepairs_h.size() * sizeof(decltype(merged_basepairs_h)::value_type), - cudaMemcpyHostToDevice)); - merged_basepairs_h.clear(); - merged_basepairs_h.shrink_to_fit(); - - // sketch elements get generated here - auto sketch_elements = SketchElementImpl::generate_sketch_elements(number_of_reads_to_add, - kmer_size_, - window_size_, - number_of_reads_ - number_of_reads_to_add, - merged_basepairs_d, - read_id_to_basepairs_section_h, - read_id_to_basepairs_section_d, - hash_representations); - device_buffer representations_from_this_loop_d = std::move(sketch_elements.representations_d); - device_buffer rest_from_this_loop_d = std::move(sketch_elements.rest_d); - - CGA_LOG_INFO("Deallocating {} bytes from read_id_to_basepairs_section_d", read_id_to_basepairs_section_d.size() * sizeof(decltype(read_id_to_basepairs_section_d)::value_type)); - read_id_to_basepairs_section_d.free(); - CGA_LOG_INFO("Deallocating {} bytes from merged_basepairs_d", merged_basepairs_d.size() * sizeof(decltype(merged_basepairs_d)::value_type)); - merged_basepairs_d.free(); - - // *** sort sketch elements by representation *** - // As this is a stable sort and the data was initailly grouper by read_id this means that the sketch elements within each representations are sorted by read_id - thrust::stable_sort_by_key(thrust::device, - representations_from_this_loop_d.data(), - representations_from_this_loop_d.data() + representations_from_this_loop_d.size(), - rest_from_this_loop_d.data()); - - representations_from_all_loops_h.push_back(decltype(representations_from_all_loops_h)::value_type(representations_from_this_loop_d.size())); - CGA_CU_CHECK_ERR(cudaMemcpy(representations_from_all_loops_h.back().data(), - representations_from_this_loop_d.data(), - representations_from_this_loop_d.size() * sizeof(decltype(representations_from_this_loop_d)::value_type), - cudaMemcpyDeviceToHost)); - rest_from_all_loops_h.push_back(typename decltype(rest_from_all_loops_h)::value_type(rest_from_this_loop_d.size())); - CGA_CU_CHECK_ERR(cudaMemcpy(rest_from_all_loops_h.back().data(), - rest_from_this_loop_d.data(), - rest_from_this_loop_d.size() * sizeof(typename decltype(rest_from_this_loop_d)::value_type), - cudaMemcpyDeviceToHost)); - - // free these arrays as they are not needed anymore - CGA_LOG_INFO("Deallocating {} bytes from representations_from_this_loop_d", representations_from_this_loop_d.size() * sizeof(decltype(representations_from_this_loop_d)::value_type)); - representations_from_this_loop_d.free(); - CGA_LOG_INFO("Deallocating {} bytes from rest_from_this_loop_d", rest_from_this_loop_d.size() * sizeof(typename decltype(rest_from_this_loop_d)::value_type)); - rest_from_this_loop_d.free(); - - // merge sketch elements arrays from previous arrays in one big array - std::vector merged_rest_h; - - if (representations_from_all_loops_h.size() > 1) - { - std::size_t free_device_memory = 0; - std::size_t total_device_memory = 0; - CGA_CU_CHECK_ERR(cudaMemGetInfo(&free_device_memory, &total_device_memory)); - - // if there is more than one array in representations_from_all_loops_h and rest_from_all_loops_h merge those arrays together - details::index_gpu::merge_sketch_element_arrays(representations_from_all_loops_h, - rest_from_all_loops_h, - (free_device_memory / 100) * 90, // do not take all available device memory - representations_, - merged_rest_h); - } - else - { - // if there is only one array in each array there is nothing to be merged - representations_ = std::move(representations_from_all_loops_h[0]); - merged_rest_h = std::move(rest_from_all_loops_h[0]); - } - - representations_from_all_loops_h.clear(); - representations_from_all_loops_h.shrink_to_fit(); - rest_from_all_loops_h.clear(); - rest_from_all_loops_h.shrink_to_fit(); - - // build read_id_and_representation_to_sketch_elements_ and copy sketch elements to output arrays - details::index_gpu::build_index(number_of_reads_, - representations_, - merged_rest_h, - positions_in_reads_, - read_ids_, - directions_of_reads_, - read_id_and_representation_to_sketch_elements_); -} - -} // namespace cudamapper -} // namespace claragenomics diff --git a/cudamapper/src/main.cu b/cudamapper/src/main.cu index 0f42c3a74..fd70ad9a4 100644 --- a/cudamapper/src/main.cu +++ b/cudamapper/src/main.cu @@ -22,11 +22,9 @@ #include #include -#include "claragenomics/cudamapper/index.hpp" #include "claragenomics/cudamapper/index_two_indices.hpp" #include "claragenomics/cudamapper/matcher_two_indices.hpp" #include "claragenomics/cudamapper/overlapper.hpp" -#include "matcher.hpp" #include "overlapper_triggered.hpp" static struct option options[] = { @@ -72,11 +70,11 @@ int main(int argc, char* argv[]) } } - if (k > claragenomics::cudamapper::Index::maximum_kmer_size()) + /*if (k > claragenomics::cudamapper::Index::maximum_kmer_size()) { std::cerr << "kmer of size " << k << " is not allowed, maximum k = " << claragenomics::cudamapper::Index::maximum_kmer_size() << std::endl; exit(1); - } + }*/ // Check remaining argument count. if ((argc - optind) < 2) @@ -263,7 +261,8 @@ void help(int32_t exit_code = 0) options: -k, --kmer-size length of kmer to use for minimizers [15] (Max=)" - << claragenomics::cudamapper::Index::maximum_kmer_size() << ")" + //<< claragenomics::cudamapper::Index::maximum_kmer_size() + << ")" << R"( -w, --window-size length of window to use for minimizers [15])" diff --git a/cudamapper/src/matcher.cu b/cudamapper/src/matcher.cu deleted file mode 100644 index cd2b5d712..000000000 --- a/cudamapper/src/matcher.cu +++ /dev/null @@ -1,395 +0,0 @@ -/* -* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -* -* NVIDIA CORPORATION and its licensors retain all intellectual property -* and proprietary rights in and to this software, related documentation -* and any modifications thereto. Any use, reproduction, disclosure or -* distribution of this software and related documentation without an express -* license agreement from NVIDIA CORPORATION is strictly prohibited. -*/ - -#include -#include -#include "matcher.hpp" -#include -#include -#include -#include - -namespace claragenomics -{ -namespace cudamapper -{ - -/// \brief Generates anchors for all reads -/// -/// Each thread block works on one query read. read_id_to_pointer_arrays_section_d points sections of read_id_to_sketch_elements_d and read_id_to_sketch_elements_to_check_d. -/// Kernel matches sketch elements that have the same representation. These sketch elements are pointed to by read_id_to_sketch_elements_d and read_id_to_sketch_elements_to_check_d. -/// -/// Kernel has an extern shared memory array which should be at least as large as the largest number of sketch elements with the same representation in any block -/// -/// Anchors are grouped by query read id and within that by representation (both in increasing order). -/// Assume q0p4t2p8 means anchor of read id 0 at position 4 and read id 2 at position 8. -/// Assume read 0 has 30 sketch elements with certain representation, read 1 40 and read 2 50. -/// Anchors for read 0 as query and that represtnation looks like this: -/// q0p0t1p0, q0p0t1p1 .. q0p0t1p39, q0p0t2p0, q0p0t2p1 ... q0p0t2p49, q0p1t1p0, q0p1t1p1 ... q0p1t1p39, q0p1t2p0 .. q0p1t2p49, q0p2t1p0 ... q0p2t1p39, q0p2t2p0 ... q0p2t2p49, q0p3t1p0 ... q0p29t2p49 -/// -/// \param positions_in_reads_d positions of sketch elements in their reads first sorted by representation and then by read id -/// \param read_ids_d read ids of reads sketch elements belong to first sorted by representation and then by read id (elements with the same index from positions_in_reads_d belong to the same sketch element) -/// \param read_id_to_sketch_elements_d every element points to a section of positions_in_reads_d and read_ids_d that belong to sketch elements with the same read_id and representation -/// \param read_id_to_sketch_elements_to_check_d every element points to a section of positions_in_reads_d and read_ids_d that belong to sketch elements with the same representation and read_id larger than some value -/// \param read_id_to_pointer_arrays_section_d every element belongs to one read_id and points to its sections of read_id_to_sketch_elements_d and read_id_to_sketch_elements_to_check_d -/// \param anchors_d pairs of sketch elements with the same representation that belong to different reads -/// \param read_id_to_anchors_section_d points to parts of anchors_d in which all anchors have the same read_id -__global__ void generate_anchors(const position_in_read_t* const positions_in_reads_d, - const read_id_t* const read_ids_d, - // const SketchElement::DirectionOfRepresentation* const directions_of_reads_d, // currently we don't use direction - ArrayBlock* read_id_to_sketch_elements_d, - ArrayBlock* read_id_to_sketch_elements_to_check_d, - ArrayBlock* read_id_to_pointer_arrays_section_d, - Anchor* const anchors_d, - ArrayBlock* read_id_to_anchors_section_d) -{ - - extern __shared__ position_in_read_t query_positions[]; // size = largest value of block_size in read_id_to_sketch_elements_d - - const read_id_t query_read_id = blockIdx.x; - const ArrayBlock pointer_to_arrays_section = read_id_to_pointer_arrays_section_d[query_read_id]; - - __shared__ std::uint32_t anchors_written_so_far; - if (0 == threadIdx.x) - anchors_written_so_far = 0; - - // go over all representations in this read one by one - for (auto representation_index = pointer_to_arrays_section.first_element_; - representation_index < pointer_to_arrays_section.first_element_ + pointer_to_arrays_section.block_size_; - ++representation_index) - { - - // load all position_in_read for this read and representation (query) - ArrayBlock query_sketch_elements_section = read_id_to_sketch_elements_d[representation_index]; - for (auto i = threadIdx.x; i < query_sketch_elements_section.block_size_; ++i) - { - query_positions[i] = positions_in_reads_d[query_sketch_elements_section.first_element_ + i]; - } - __syncthreads(); - - // section of sketch elements with that representation and read_id larger than query_read_id - ArrayBlock target_sketch_elements_section = read_id_to_sketch_elements_to_check_d[representation_index]; - for (auto i = threadIdx.x; i < target_sketch_elements_section.block_size_; i += blockDim.x) - { - const read_id_t target_read_id = read_ids_d[target_sketch_elements_section.first_element_ + i]; - const position_in_read_t target_position_in_read = positions_in_reads_d[target_sketch_elements_section.first_element_ + i]; - for (int j = 0; j < query_sketch_elements_section.block_size_; ++j) - { - // writing anchors in form (q1t1,q1t2,q1t3...q2t1,q2t2,q3t3....) for coalescing - // TODO: split anchors_d into four arrays for better coalescing? - anchors_d[read_id_to_anchors_section_d[query_read_id].first_element_ + anchors_written_so_far + - j * target_sketch_elements_section.block_size_ + i] - .query_read_id_ = query_read_id; - anchors_d[read_id_to_anchors_section_d[query_read_id].first_element_ + anchors_written_so_far + - j * target_sketch_elements_section.block_size_ + i] - .target_read_id_ = target_read_id; - anchors_d[read_id_to_anchors_section_d[query_read_id].first_element_ + anchors_written_so_far + - j * target_sketch_elements_section.block_size_ + - i] - .query_position_in_read_ = query_positions[j]; - anchors_d[read_id_to_anchors_section_d[query_read_id].first_element_ + anchors_written_so_far + - j * target_sketch_elements_section.block_size_ + - i] - .target_position_in_read_ = target_position_in_read; - } - } - __syncthreads(); - if (0 == threadIdx.x) - anchors_written_so_far += target_sketch_elements_section.block_size_ * - query_sketch_elements_section.block_size_; - __syncthreads(); - } -} - -Matcher::Matcher(const Index& index, uint32_t query_target_division_idx) -{ - CGA_NVTX_RANGE(profile, "matcher"); - if (0 == index.number_of_reads()) - { - return; - } - - //Now perform the matching in a loop - - size_t increment = index.maximum_representation(); - - size_t max_representation = index.maximum_representation(); - size_t representation_min_range = index.minimum_representation(); - size_t representation_max_range = increment; - - // Get available device memory - size_t free_device_memory = 0; - size_t total_device_memory = 0; - CGA_CU_CHECK_ERR(cudaMemGetInfo(&free_device_memory, &total_device_memory)); - size_t max_anchor_buffer_size = 0.95 * free_device_memory; - size_t max_anchors = max_anchor_buffer_size / sizeof(Anchor); - - const std::vector& positions_in_reads_h = index.positions_in_reads(); - const std::vector& read_ids_h = index.read_ids(); - const std::vector& directions_of_reads_h = index.directions_of_reads(); - - CGA_LOG_INFO("Allocating {} bytes for positions_in_reads_d", - positions_in_reads_h.size() * sizeof(position_in_read_t)); - device_buffer positions_in_reads_d(positions_in_reads_h.size()); - CGA_CU_CHECK_ERR(cudaMemcpy(positions_in_reads_d.data(), - positions_in_reads_h.data(), - positions_in_reads_h.size() * sizeof(position_in_read_t), - cudaMemcpyHostToDevice)); - - CGA_LOG_INFO("Allocating {} bytes for read_ids_d", read_ids_h.size() * sizeof(read_id_t)); - device_buffer read_ids_d(read_ids_h.size()); - CGA_CU_CHECK_ERR(cudaMemcpy(read_ids_d.data(), - read_ids_h.data(), - read_ids_h.size() * sizeof(read_id_t), - cudaMemcpyHostToDevice)); - - CGA_LOG_INFO("Allocating {} bytes for directions_of_reads_d", directions_of_reads_h.size() * sizeof(SketchElement::DirectionOfRepresentation)); - device_buffer directions_of_reads_d(directions_of_reads_h.size()); - CGA_CU_CHECK_ERR(cudaMemcpy(directions_of_reads_d.data(), - directions_of_reads_h.data(), - directions_of_reads_h.size() * sizeof(SketchElement::DirectionOfRepresentation), - cudaMemcpyHostToDevice)); - - while (representation_min_range <= max_representation) - { - - CGA_LOG_INFO("Computing representation [{},{})", representation_min_range, representation_max_range); - // Each CUDA thread block is responsible for one read. For each sketch element in that read it checks all other reads for sketch elements with the same representation and records those pairs. - // As read_ids are numbered from 0 to number_of_reads - 1 CUDA thread block is responsible for read with read_id = blockIdx.x. - // - // Overlapping is symmetric, i.e. if sketch element at position 8 in read 2 overlaps with (= has the same representation as) sketch element at position 4 in read 5 then sketch element - // at position 4 in read 5 also overlaps with sketch element at position 8 in read 2. It is thus only necessary to check for overlapping in one direction. This is achieved by having each - // CUDA thread block only check reads with read_ids greater than the read_id of that read. - // - // In order to be able to do this check CUDA thread block has to know which sketch elements belong to its read and which are candidates for a match (have read_id greater than CUDA thread block's - // read_id and the same representation as one of sketch elements from CUDA thread block's read). - // Note that positions_in_reads, read_ids and directions_of_reads (data arrays) have sketch elements grouped by representation and within one representation grouped by read_id - // (both representations and read_ids are sorted in increasing order). - // - // Each section of read_id_to_sketch_elements belongs to one read_id. Each element in that section points to a section of data arrays that contains sketch elements belonging to that read_id - // and some representation (it's not important which one). - // Similarly to this each section of read_id_to_sketch_elements_to_check points to the section of data arrays with read_id greater than the gived read_id and the representation same as the - // representation of the element in read_id_to_sketch_elements with the same index (it's still not importatn which representation is that). - // This means that the kernel should match all sketch elements pointed by one element of read_id_to_sketch_elements with all sketch elements pointed to by the element of - // read_id_to_sketch_elements_to_check with the same index. - // - // read_id_to_pointer_arrays_section maps a read_id to its section of read_id_to_sketch_elements and read_id_to_sketch_elements_to_check (pointer arrays). - - std::vector read_id_to_sketch_elements_h; // TODO: we should be able to know this number -> reserve space? - std::vector read_id_to_sketch_elements_to_check_h; - std::vector read_id_to_pointer_arrays_section_h(index.number_of_reads(), {0, 0}); - - // Anchor is one pair of sketch elements with the same representation in different reads - // Anchors are symmetric (as explained above), so they are saved in only one direction - // As only one direction is saved for each representation in each read_id there are going to be sketch_elements_with_that_representation_in_that_read * sketch_elements_with_that_representation_to_check_in_other_reads anchors. - // This means we know upfront how many anchors are there going to be for each read_id and we can merge all anchors in one array and assign its sections to different read_ids - std::vector read_id_to_anchors_section_h(index.number_of_reads(), {0, 0}); - std::uint64_t total_anchors = 0; - std::uint32_t largest_block_size = 0; - - auto num_reads_to_query = query_target_division_idx; - if (query_target_division_idx == 0) - { - num_reads_to_query = index.number_of_reads(); - } - - for (std::size_t read_id = 0; read_id < num_reads_to_query; ++read_id) - { - // First determine the starting index of section of pointer arrays that belong to read with read_id. - // Reads are processed consecutively. Pointer arrays section for read 0 will start at index 0 and if we assume that all sketch elements in read 0 had a total of 10 unique representation its section will end at index 9. This means that the section for read 1 belongs at index 0 + 10 = 10. - if (read_id != 0) - { - read_id_to_pointer_arrays_section_h[read_id].first_element_ = - read_id_to_pointer_arrays_section_h[read_id - 1].first_element_ + - read_id_to_pointer_arrays_section_h[read_id - 1].block_size_; - read_id_to_anchors_section_h[read_id].first_element_ = - read_id_to_anchors_section_h[read_id - 1].first_element_ + - read_id_to_anchors_section_h[read_id - 1].block_size_; - } - - const std::vector& array_blocks_for_this_read_id = index.read_id_and_representation_to_sketch_elements()[read_id]; - - read_id_to_pointer_arrays_section_h[read_id].block_size_ = 0; - - // go through all representations in this read - for (const auto& one_representation_in_this_read : array_blocks_for_this_read_id) - { - // Check if we are in the correct range - // TODO: code right now loops over `array_blocks_for_this_read_id ` in every while loop iteration. - // We could save the iterator to array_blocks_for_this_read_id in order to know from which - // element to continue in the next while loop iteration and also break out of the loop once we - // reach representation_max_range. - // We can do this because the element in `array_blocks_for_this_read_id` are sorted by representation. - if ((one_representation_in_this_read.representation_ < representation_max_range) && (one_representation_in_this_read.representation_ >= representation_min_range)) - { - - const ArrayBlock& array_block_for_this_representation_and_read = one_representation_in_this_read.sketch_elements_for_representation_and_read_id_; // sketch elements with this representation and this read_id - const ArrayBlock& whole_data_arrays_section_for_representation = one_representation_in_this_read.sketch_elements_for_representation_and_all_read_ids_; // sketch elements with this representation in all read_ids - largest_block_size = std::max(largest_block_size, - array_block_for_this_representation_and_read.block_size_); - // Due to symmetry we only want to check reads with read_id greater than the current read_id. - // We are only interested in part of whole_data_arrays_section_for_representation that comes after array_block_for_this_representation_and_read because only sketch elements in that part have read_id greater than the current read_id - ArrayBlock section_to_check; - - auto start = array_block_for_this_representation_and_read.first_element_ + - array_block_for_this_representation_and_read.block_size_; // element after the last element for this read_id - - auto end = whole_data_arrays_section_for_representation.first_element_ + whole_data_arrays_section_for_representation.block_size_; - - if (query_target_division_idx == 0) - { - section_to_check.first_element_ = start; - section_to_check.block_size_ = end - start; - } - else - { - section_to_check.block_size_ = 0; - //TODO: This should be a bisectional search. - for (auto sketch_element_idx = start; sketch_element_idx < end; sketch_element_idx++) - { - if (read_ids_h[sketch_element_idx] > query_target_division_idx) - { - section_to_check.first_element_ = sketch_element_idx; - section_to_check.block_size_ = end - section_to_check.first_element_; // number of remaining elements - break; - } - } - } - // TODO: if block_size_ == 0 - if (section_to_check.block_size_) - { - read_id_to_sketch_elements_h.emplace_back(array_block_for_this_representation_and_read); - read_id_to_sketch_elements_to_check_h.emplace_back(section_to_check); - // Determine the number of matches for this representation - read_id_to_anchors_section_h[read_id].block_size_ += - array_block_for_this_representation_and_read.block_size_ * - section_to_check.block_size_; - ++read_id_to_pointer_arrays_section_h[read_id].block_size_; - } - } - } - total_anchors += read_id_to_anchors_section_h[read_id].block_size_; - - if (total_anchors > max_anchors) - { - // If the maximum number of anchors has been exceeded all host buffers are re-initialised - // and the loop is restarted with a smaller representation range to compute. - read_id_to_sketch_elements_h.clear(); - read_id_to_sketch_elements_to_check_h.clear(); - total_anchors = 0; - largest_block_size = 0; - auto growth_coefficient = 4; - increment /= growth_coefficient; //TODO investigate best coefficient - representation_max_range = representation_min_range + increment; - read_id = 0; - read_id_to_anchors_section_h = std::vector(index.number_of_reads(), {0, 0}); - read_id_to_pointer_arrays_section_h = std::vector(index.number_of_reads(), {0, 0}); - CGA_LOG_INFO("Backing off - max range adjusted to {}", representation_max_range); - } - } - - // Now done with the read IDs - - CGA_LOG_INFO("Allocating {} bytes for read_id_to_sketch_elements_d", - read_id_to_sketch_elements_h.size() * sizeof(ArrayBlock)); - device_buffer read_id_to_sketch_elements_d(read_id_to_sketch_elements_h.size()); - CGA_CU_CHECK_ERR(cudaMemcpy(read_id_to_sketch_elements_d.data(), read_id_to_sketch_elements_h.data(), - read_id_to_sketch_elements_h.size() * sizeof(ArrayBlock), - cudaMemcpyHostToDevice)); - - read_id_to_sketch_elements_h.clear(); - read_id_to_sketch_elements_h.shrink_to_fit(); - - CGA_LOG_INFO("Allocating {} bytes for read_id_to_sketch_elements_to_check_d", - read_id_to_sketch_elements_to_check_h.size() * sizeof(ArrayBlock)); - device_buffer read_id_to_sketch_elements_to_check_d( - read_id_to_sketch_elements_to_check_h.size()); - CGA_CU_CHECK_ERR(cudaMemcpy(read_id_to_sketch_elements_to_check_d.data(), - read_id_to_sketch_elements_to_check_h.data(), - read_id_to_sketch_elements_to_check_h.size() * sizeof(ArrayBlock), - cudaMemcpyHostToDevice)); - read_id_to_sketch_elements_to_check_h.clear(); - read_id_to_sketch_elements_to_check_h.shrink_to_fit(); - - CGA_LOG_INFO("Allocating {} bytes for read_id_to_pointer_arrays_section_d", - read_id_to_pointer_arrays_section_h.size() * sizeof(ArrayBlock)); - device_buffer read_id_to_pointer_arrays_section_d(read_id_to_pointer_arrays_section_h.size()); - CGA_CU_CHECK_ERR(cudaMemcpy(read_id_to_pointer_arrays_section_d.data(), read_id_to_pointer_arrays_section_h.data(), - read_id_to_pointer_arrays_section_h.size() * sizeof(ArrayBlock), - cudaMemcpyHostToDevice)); - read_id_to_pointer_arrays_section_h.clear(); - read_id_to_pointer_arrays_section_h.shrink_to_fit(); - - auto num_anchors_so_far = anchors_d_.size(); - anchors_d_.resize(num_anchors_so_far + total_anchors); - Anchor* anchors_d = anchors_d_.data().get() + num_anchors_so_far; - - CGA_LOG_INFO("Allocating {} bytes for read_id_to_anchors_section_d", - read_id_to_anchors_section_h.size() * sizeof(ArrayBlock)); - device_buffer read_id_to_anchors_section_d(read_id_to_anchors_section_h.size()); - CGA_CU_CHECK_ERR(cudaMemcpy(read_id_to_anchors_section_d.data(), read_id_to_anchors_section_h.data(), - read_id_to_anchors_section_h.size() * sizeof(ArrayBlock), - cudaMemcpyHostToDevice)); - read_id_to_anchors_section_h.clear(); - read_id_to_anchors_section_h.shrink_to_fit(); - - generate_anchors<<>>(positions_in_reads_d.data(), - read_ids_d.data(), - // directions_of_reads_d.data(), // currently we don't use direction - read_id_to_sketch_elements_d.data(), - read_id_to_sketch_elements_to_check_d.data(), - read_id_to_pointer_arrays_section_d.data(), - anchors_d, - read_id_to_anchors_section_d.data()); - - CGA_CU_CHECK_ERR(cudaDeviceSynchronize()); - - // clean up device memory - CGA_LOG_INFO("Deallocating {} bytes from read_id_to_anchors_section_d", - read_id_to_anchors_section_d.size() * - sizeof(decltype(read_id_to_anchors_section_d)::value_type)); - read_id_to_anchors_section_d.free(); - - CGA_LOG_INFO("Deallocating {} bytes from read_id_to_sketch_elements_d", - read_id_to_sketch_elements_d.size() * - sizeof(decltype(read_id_to_sketch_elements_d)::value_type)); - read_id_to_sketch_elements_d.free(); - CGA_LOG_INFO("Deallocating {} bytes from read_id_to_sketch_elements_to_check_d", - read_id_to_sketch_elements_to_check_d.size() * - sizeof(decltype(read_id_to_sketch_elements_to_check_d)::value_type)); - read_id_to_sketch_elements_to_check_d.free(); - CGA_LOG_INFO("Deallocating {} bytes from read_id_to_pointer_arrays_section_d", - read_id_to_pointer_arrays_section_d.size() * - sizeof(decltype(read_id_to_pointer_arrays_section_d)::value_type)); - read_id_to_pointer_arrays_section_d.free(); - - representation_min_range += increment; - increment *= 2; // TODO: investigate best coefficient - representation_max_range += increment; - } - - CGA_LOG_INFO("Deallocating {} bytes from positions_in_reads_d", - positions_in_reads_d.size() * sizeof(decltype(positions_in_reads_d)::value_type)); - positions_in_reads_d.free(); - CGA_LOG_INFO("Deallocating {} bytes from read_ids_d", - read_ids_d.size() * sizeof(decltype(read_ids_d)::value_type)); - read_ids_d.free(); - CGA_LOG_INFO("Deallocating {} bytes from directions_of_reads_d", - directions_of_reads_d.size() * sizeof(decltype(directions_of_reads_d)::value_type)); - directions_of_reads_d.free(); -} - -thrust::device_vector& Matcher::anchors() -{ - return anchors_d_; -} - -} // namespace cudamapper -} // namespace claragenomics diff --git a/cudamapper/src/matcher.hpp b/cudamapper/src/matcher.hpp deleted file mode 100644 index beccd502a..000000000 --- a/cudamapper/src/matcher.hpp +++ /dev/null @@ -1,52 +0,0 @@ -/* -* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -* -* NVIDIA CORPORATION and its licensors retain all intellectual property -* and proprietary rights in and to this software, related documentation -* and any modifications thereto. Any use, reproduction, disclosure or -* distribution of this software and related documentation without an express -* license agreement from NVIDIA CORPORATION is strictly prohibited. -*/ - -#pragma once - -#include -#include "claragenomics/cudamapper/index.hpp" - -namespace claragenomics -{ -namespace cudamapper -{ - -/// Matcher - finds anchors -/// -/// For a given index, all reads equal to or less than query_target_division_idx are mapped to all other reads. -/// If query_target_division_idx is 0 then all-vs-all mapping is performed. -/// -/// Anchor is a pair of two sketch elements with the same sketch element representation from different reads. -/// Anchors are symmetrical, so only one anchor is generated for each pair (if sketch element from read 5 overlaps a sketch element from read 8 -/// then the same sketch element from read 8 overlaps the sketch element from read 5). -/// -/// Anchors are grouped by query read id and within that by representation (both in increasing order). -/// Assume q0p4t2p8 means anchor of read id 0 at position 4 and read id 2 at position 8. -/// Assume read 0 has 30 sketch elements with certain representation, read 1 40 and read 2 50. -/// Anchors for read 0 as query and that represtnation looks like this: -/// q0p0t1p0, q0p0t1p1 .. q0p0t1p39, q0p0t2p0, q0p0t2p1 ... q0p0t2p49, q0p1t1p0, q0p1t1p1 ... q0p1t1p39, q0p1t2p0 .. q0p1t2p49, q0p2t1p0 ... q0p2t1p39, q0p2t2p0 ... q0p2t2p49, q0p3t1p0 ... q0p29t2p49 -class Matcher -{ -public: - /// \brief Construtor - /// \param index index to generate anchors from - /// \param query_target_division_idx the index after which all reads are target reads. If set to 0 then all-vs-all mapping is performed - Matcher(const Index& index, uint32_t query_target_division_idx); - - /// \brief return anchors - /// \return anchors - thrust::device_vector& anchors(); - -private: - /// \brief list of anchors - thrust::device_vector anchors_d_; -}; -} // namespace cudamapper -} // namespace claragenomics diff --git a/cudamapper/src/overlapper_triggered.cu b/cudamapper/src/overlapper_triggered.cu index 986276dbd..b2b84507d 100644 --- a/cudamapper/src/overlapper_triggered.cu +++ b/cudamapper/src/overlapper_triggered.cu @@ -14,7 +14,6 @@ #include "claragenomics/cudamapper/overlapper.hpp" #include "cudamapper_utils.hpp" -#include "matcher.hpp" #include "overlapper_triggered.hpp" #include #include diff --git a/cudamapper/src/overlapper_triggered.hpp b/cudamapper/src/overlapper_triggered.hpp index 9d8250c6d..fd455ce4a 100644 --- a/cudamapper/src/overlapper_triggered.hpp +++ b/cudamapper/src/overlapper_triggered.hpp @@ -14,7 +14,6 @@ #include "claragenomics/cudamapper/types.hpp" #include "claragenomics/cudamapper/overlapper.hpp" -#include "matcher.hpp" namespace claragenomics { diff --git a/cudamapper/tests/CMakeLists.txt b/cudamapper/tests/CMakeLists.txt index 909d3de2e..10dd21a56 100644 --- a/cudamapper/tests/CMakeLists.txt +++ b/cudamapper/tests/CMakeLists.txt @@ -12,9 +12,7 @@ set(TARGET_NAME cudamappertests) set(SOURCES main.cpp - Test_CudamapperIndexGPU.cu Test_CudamapperIndexGPUTwoIndices.cu - Test_CudamapperMatcher.cu Test_CudamapperMatcherGPU.cu Test_CudamapperMinimizer.cpp Test_CudamapperOverlapperTriggered.cu @@ -25,9 +23,7 @@ include_directories(${cudamapper_data_include_dir}) set(LIBS bioparser - index_gpu index_gpu_two_indices - matcher matcher_gpu overlapper_triggerred cudamapper_utils) diff --git a/cudamapper/tests/Test_CudamapperIndexGPU.cu b/cudamapper/tests/Test_CudamapperIndexGPU.cu deleted file mode 100644 index 0acc248fc..000000000 --- a/cudamapper/tests/Test_CudamapperIndexGPU.cu +++ /dev/null @@ -1,1769 +0,0 @@ -/* -* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -* -* NVIDIA CORPORATION and its licensors retain all intellectual property -* and proprietary rights in and to this software, related documentation -* and any modifications thereto. Any use, reproduction, disclosure or -* distribution of this software and related documentation without an express -* license agreement from NVIDIA CORPORATION is strictly prohibited. -*/ - -#include -#include - -#include "gtest/gtest.h" -#include "cudamapper_file_location.hpp" -#include "../src/index_gpu.cuh" -#include "../src/minimizer.hpp" - -namespace claragenomics -{ -namespace cudamapper -{ - -void test_function(const std::string& filename, - const std::uint64_t minimizer_size, - const std::uint64_t window_size, - const std::uint64_t expected_number_of_reads, - const std::vector& expected_read_id_to_read_name, - const std::vector& expected_read_id_to_read_length, - const std::vector>& expected_read_id_and_representation_to_sketch_elements, - const std::vector& expected_representations, - const std::vector& expected_positions_in_reads, - const std::vector& expected_read_ids, - const std::vector& expected_directions_of_reads) -{ - std::vector> read_ranges; - std::pair query_range{0, std::numeric_limits::max()}; - read_ranges.push_back(query_range); - - std::unique_ptr parser = io::create_fasta_parser(filename); - std::vector parsers; - parsers.push_back(parser.get()); - IndexGPU index(parsers, minimizer_size, window_size, read_ranges, false); - - ASSERT_EQ(index.number_of_reads(), expected_number_of_reads); - - const std::vector& read_id_to_read_name = index.read_id_to_read_name(); - ASSERT_EQ(read_id_to_read_name.size(), expected_read_id_to_read_name.size()); - ASSERT_EQ(read_id_to_read_name.size(), expected_number_of_reads); - const std::vector& read_id_to_read_length = index.read_id_to_read_length(); - ASSERT_EQ(read_id_to_read_length.size(), expected_read_id_to_read_length.size()); - ASSERT_EQ(read_id_to_read_length.size(), expected_number_of_reads); - - // check pointers to sections of arrays - const std::vector>& read_id_and_representation_to_sketch_elements = index.read_id_and_representation_to_sketch_elements(); - ASSERT_EQ(read_id_and_representation_to_sketch_elements.size(), expected_read_id_and_representation_to_sketch_elements.size()); - ASSERT_EQ(read_id_and_representation_to_sketch_elements.size(), expected_number_of_reads); - for (std::size_t read_id = 0; read_id < expected_number_of_reads; ++read_id) - { - EXPECT_EQ(read_id_to_read_name[read_id], expected_read_id_to_read_name[read_id]) << "read_id: " << read_id; - EXPECT_EQ(read_id_to_read_length[read_id], expected_read_id_to_read_length[read_id]) << "read_id: " << read_id; - - const std::vector& reps_to_se = read_id_and_representation_to_sketch_elements[read_id]; - const std::vector& exp_reps_to_se = expected_read_id_and_representation_to_sketch_elements[read_id]; - ASSERT_EQ(reps_to_se.size(), exp_reps_to_se.size()) << "read_id: " << read_id; - - for (std::size_t rep_id = 0; rep_id < exp_reps_to_se.size(); ++rep_id) - { - const Index::RepresentationToSketchElements& exp_rep_to_se = exp_reps_to_se[rep_id]; - const Index::RepresentationToSketchElements& rep_to_se = reps_to_se[rep_id]; - ASSERT_EQ(rep_to_se.representation_, exp_rep_to_se.representation_) << "read id: " << read_id << " , rep_id: " << rep_id; - EXPECT_EQ(rep_to_se.sketch_elements_for_representation_and_read_id_.first_element_, exp_rep_to_se.sketch_elements_for_representation_and_read_id_.first_element_) << "read id: " << read_id << " , rep_id: " << rep_id; - EXPECT_EQ(rep_to_se.sketch_elements_for_representation_and_read_id_.block_size_, exp_rep_to_se.sketch_elements_for_representation_and_read_id_.block_size_) << "read id: " << read_id << " , rep_id: " << rep_id; - EXPECT_EQ(rep_to_se.sketch_elements_for_representation_and_all_read_ids_.first_element_, exp_rep_to_se.sketch_elements_for_representation_and_all_read_ids_.first_element_) << "read id: " << read_id << " , rep_id: " << rep_id; - EXPECT_EQ(rep_to_se.sketch_elements_for_representation_and_all_read_ids_.block_size_, exp_rep_to_se.sketch_elements_for_representation_and_all_read_ids_.block_size_) << "read id: " << read_id << " , rep_id: " << rep_id; - } - } - - // check arrays - const std::vector& representations = index.representations(); - const std::vector& positions_in_reads = index.positions_in_reads(); - const std::vector& read_ids = index.read_ids(); - const std::vector& directions_of_reads = index.directions_of_reads(); - ASSERT_EQ(representations.size(), expected_representations.size()); - ASSERT_EQ(positions_in_reads.size(), expected_positions_in_reads.size()); - ASSERT_EQ(read_ids.size(), expected_read_ids.size()); - ASSERT_EQ(directions_of_reads.size(), expected_directions_of_reads.size()); - ASSERT_EQ(representations.size(), positions_in_reads.size()); - ASSERT_EQ(positions_in_reads.size(), read_ids.size()); - ASSERT_EQ(read_ids.size(), directions_of_reads.size()); - for (std::size_t i = 0; i < expected_positions_in_reads.size(); ++i) - { - EXPECT_EQ(representations[i], expected_representations[i]) << "i: " << i; - EXPECT_EQ(positions_in_reads[i], expected_positions_in_reads[i]) << "i: " << i; - EXPECT_EQ(read_ids[i], expected_read_ids[i]) << "i: " << i; - EXPECT_EQ(directions_of_reads[i], expected_directions_of_reads[i]) << "i: " << i; - } - - ASSERT_EQ(index.minimum_representation(), std::uint64_t(0)); - ASSERT_EQ(index.maximum_representation(), pow(4, std::uint64_t(minimizer_size)) - 1); -} - -TEST(TestCudamapperIndexGPU, GATT_4_1) -{ - // >read_0 - // GATT - - // GATT = 0b10001111 - // AATC = 0b00001101 <- minimizer - - const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/gatt.fasta"; - const std::uint64_t minimizer_size = 4; - const std::uint64_t window_size = 1; - - const std::uint64_t expected_number_of_reads = 1; - - std::vector expected_read_id_to_read_name; - expected_read_id_to_read_name.push_back("read_0"); - - std::vector expected_read_id_to_read_length; - expected_read_id_to_read_length.push_back(4); - - std::vector> expected_read_id_and_representation_to_sketch_elements(1); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b00001101, {0, 1}, {0, 1}}); - - std::vector expected_representations; - std::vector expected_positions_in_reads; - std::vector expected_read_ids; - std::vector expected_directions_of_reads; - expected_representations.push_back(0b1101); - expected_positions_in_reads.push_back(0); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); - - test_function(filename, - minimizer_size, - window_size, - expected_number_of_reads, - expected_read_id_to_read_name, - expected_read_id_to_read_length, - expected_read_id_and_representation_to_sketch_elements, - expected_representations, - expected_positions_in_reads, - expected_read_ids, - expected_directions_of_reads); -} - -TEST(TestCudamapperIndexGPU, GATT_2_3) -{ - // >read_0 - // GATT - - // kmer representation: forward, reverse - // GA: <20> 31 - // AT: <03> 03 - // TT: 33 <00> - - // front end minimizers: representation, position_in_read, direction, read_id - // GA : 20 0 F 0 - // GAT: 03 1 F 0 - - // central minimizers - // GATT: 00 2 R 0 - - // back end minimizers - // ATT: 00 2 R 0 - // TT : 00 2 R 0 - - // All minimizers: GA(0f), AT(1f), AA(2r) - - // (2r1) means position 2, reverse direction, read 1 - // (1,2) means array block start at element 1 and has 2 elements - - // 0 1 2 - // data arrays: GA(0f0), AT(1f0), AA(2r0) - // - // read_1(AAG(1,1)(0,2)) means read_1 has "1" minimizer with representation AAG starting at position "1", - // whereas in all reads there are "2" minimizers with representation AAG and they start at position "0" - // read_id_and_representation_to_sketch_elements: read_0(AA(0,1)(0,1), AT(1,1)(1,1), GA(2,1)(2,1) - - const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/gatt.fasta"; - const std::uint64_t minimizer_size = 2; - const std::uint64_t window_size = 3; - - const std::uint64_t expected_number_of_reads = 1; - - std::vector expected_read_id_to_read_name; - expected_read_id_to_read_name.push_back("read_0"); - - std::vector expected_read_id_to_read_length; - expected_read_id_to_read_length.push_back(4); - - std::vector> expected_read_id_and_representation_to_sketch_elements(1); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b0000, {0, 1}, {0, 1}}); // AA - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b0011, {1, 1}, {1, 1}}); // AT - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b1000, {2, 1}, {2, 1}}); // GA - - std::vector expected_representations; - std::vector expected_positions_in_reads; - std::vector expected_read_ids; - std::vector expected_directions_of_reads; - - expected_representations.push_back(0b0000); // AA(2r0) - expected_positions_in_reads.push_back(2); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); - expected_representations.push_back(0b0011); // AT(1f0) - expected_positions_in_reads.push_back(1); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b1000); // GA(0f0) - expected_positions_in_reads.push_back(0); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - - test_function(filename, - minimizer_size, - window_size, - expected_number_of_reads, - expected_read_id_to_read_name, - expected_read_id_to_read_length, - expected_read_id_and_representation_to_sketch_elements, - expected_representations, - expected_positions_in_reads, - expected_read_ids, - expected_directions_of_reads); -} - -TEST(TestCudamapperIndexGPU, CCCATACC_2_8) -{ - // *** Read is shorter than one full window, the result should be empty *** - - // >read_0 - // CCCATACC - - const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/cccatacc.fasta"; - const std::uint64_t minimizer_size = 2; - const std::uint64_t window_size = 8; - - const std::uint64_t expected_number_of_reads = 0; - - // all data arrays should be empty - - std::vector expected_read_id_to_read_name; - - std::vector expected_read_id_to_read_length; - - std::vector> expected_read_id_and_representation_to_sketch_elements(0); - - std::vector expected_representations; - std::vector expected_positions_in_reads; - std::vector expected_read_ids; - std::vector expected_directions_of_reads; - - test_function(filename, - minimizer_size, - window_size, - expected_number_of_reads, - expected_read_id_to_read_name, - expected_read_id_to_read_length, - expected_read_id_and_representation_to_sketch_elements, - expected_representations, - expected_positions_in_reads, - expected_read_ids, - expected_directions_of_reads); -} - -TEST(TestCudamapperIndexGPU, CATCAAG_AAGCTA_3_5) -{ - // *** One Read is shorter than one full window, the other is not *** - - // >read_0 - // CATCAAG - // >read_1 - // AAGCTA - - // ** CATCAAG ** - - // kmer representation: forward, reverse - // CAT: 103 <032> - // ATC: <031> 203 - // TCA: <310> 320 - // CAA: <100> 332 - // AAG: <002> 133 - - // front end minimizers: representation, position_in_read, direction, read_id - // CAT : 032 0 R 0 - // CATC : 031 1 F 0 - // CATCA : 031 1 F 0 - // CATCAA: 031 1 F 0 - - // central minimizers - // CATCAAG: 002 4 F 0 - - // back end minimizers - // ATCAAG: 002 4 F 0 - // TCAAG : 002 4 F 0 - // CAAG : 002 4 F 0 - // AAG : 002 4 F 0 - - // ** AAGCTA ** - // ** read does not fit one array ** - - // All minimizers: ATG(0r0), ATC(1f0), AAG(4f0) - - // (2r1) means position 2, reverse direction, read 1 - // (1,2) means array block start at element 1 and has 2 elements - - // 0 1 2 - // data arrays: AAG(4f0), ATC(1f0), ATG(0r0) - // - // read_1(AAG(1,1)(0,2)) means read_1 has "1" minimizer with representation AAG starting at position "1", - // whereas in all reads there are "2" minimizers with representation AAG and they start at position "0" - // read_id_and_representation_to_sketch_elements: read_0(AAG(0,1)(0,1), ATC(1,1)(1,1), ATG(2,1)(2,1) - - const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/catcaag_aagcta.fasta"; - const std::uint64_t minimizer_size = 3; - const std::uint64_t window_size = 5; - - const std::uint64_t expected_number_of_reads = 1; - - std::vector expected_read_id_to_read_name; - expected_read_id_to_read_name.push_back("read_0"); - - std::vector expected_read_id_to_read_length; - expected_read_id_to_read_length.push_back(7); - - std::vector> expected_read_id_and_representation_to_sketch_elements(1); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b000010, {0, 1}, {0, 1}}); // AAG - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b001101, {1, 1}, {1, 1}}); // ATC - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b001110, {2, 1}, {2, 1}}); // ATG - - std::vector expected_representations; - std::vector expected_positions_in_reads; - std::vector expected_read_ids; - std::vector expected_directions_of_reads; - expected_representations.push_back(0b000010); // AAG(4f0) - expected_positions_in_reads.push_back(4); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b001101); // ATC(1f0) - expected_positions_in_reads.push_back(1); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b001110); // ATG(0r0) - expected_positions_in_reads.push_back(0); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); - - test_function(filename, - minimizer_size, - window_size, - expected_number_of_reads, - expected_read_id_to_read_name, - expected_read_id_to_read_length, - expected_read_id_and_representation_to_sketch_elements, - expected_representations, - expected_positions_in_reads, - expected_read_ids, - expected_directions_of_reads); -} - -TEST(TestCudamapperIndexGPU, CCCATACC_3_5) -{ - // >read_0 - // CCCATACC - - // ** CCCATAC ** - - // kmer representation: forward, reverse - // CCC: <111> 222 - // CCA: <110> 322 - // CAT: 103 <032> - // ATA: <030> 303 - // TAC: 301 <230> - // ACC: <011> 223 - - // front end minimizers: representation, position_in_read, direction - // CCC : 111 0 F - // CCCA : 110 1 F - // CCCAT : 032 2 R - // CCCATA: 030 3 F - - // central minimizers - // CCCATAC: 030 3 F - // CCATACC: 011 5 F - - // back end minimizers - // CATACC: 011 5 F - // ATACC : 011 5 F - // TACC : 011 5 F - // ACC : 011 5 F - - // All minimizers: CCC(0f), CCA(1f), ATG(2r), ATA(3f), ACC(5f) - - // (2r1) means position 2, reverse direction, read 1 - // (1,2) means array block start at element 1 and has 2 elements - - // 0 1 2 - // data arrays: ACC(5f0), ATA(3f0), ATG(2r0), CCA(1f0), CCC(0f0) - // - // read_1(AAG(1,1)(0,2)) means read_1 has "1" minimizer with representation AAG starting at position "1", - // whereas in all reads there are "2" minimizers with representation AAG and they start at position "0" - // read_id_and_representation_to_sketch_elements: read_0(AAC(0,1)(0,1), AAT(1,1)(1,1), ATG(2,1)(2,1), CCA(3,1)(3,1), CCC(4,1)(4,1) - - const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/cccatacc.fasta"; - const std::uint64_t minimizer_size = 3; - const std::uint64_t window_size = 5; - - const std::uint64_t expected_number_of_reads = 1; - - std::vector expected_read_id_to_read_name; - expected_read_id_to_read_name.push_back("read_0"); - - std::vector expected_read_id_to_read_length; - expected_read_id_to_read_length.push_back(8); - - std::vector> expected_read_id_and_representation_to_sketch_elements(1); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b000101, {0, 1}, {0, 1}}); // ACC - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b001100, {1, 1}, {1, 1}}); // ATA - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b001110, {2, 1}, {2, 1}}); // ATG - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b010100, {3, 1}, {3, 1}}); // CCA - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b010101, {4, 1}, {4, 1}}); // CCC - - std::vector expected_representations; - std::vector expected_positions_in_reads; - std::vector expected_read_ids; - std::vector expected_directions_of_reads; - expected_representations.push_back(0b000101); // ACC(5f0) - expected_positions_in_reads.push_back(5); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b001100); // ATA(3f0) - expected_positions_in_reads.push_back(3); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b001110); // ATG(2r0) - expected_positions_in_reads.push_back(2); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); - expected_representations.push_back(0b010100); // CCA(1f0) - expected_positions_in_reads.push_back(1); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b010101); // CCC(0f0) - expected_positions_in_reads.push_back(0); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - - test_function(filename, - minimizer_size, - window_size, - expected_number_of_reads, - expected_read_id_to_read_name, - expected_read_id_to_read_length, - expected_read_id_and_representation_to_sketch_elements, - expected_representations, - expected_positions_in_reads, - expected_read_ids, - expected_directions_of_reads); -} - -TEST(TestCudamapperIndexGPU, CATCAAG_AAGCTA_3_2) -{ - // >read_0 - // CATCAAG - // >read_1 - // AAGCTA - - // ** CATCAAG ** - - // kmer representation: forward, reverse - // CAT: 103 <032> - // ATC: <031> 203 - // TCA: <310> 320 - // CAA: <100> 332 - // AAG: <002> 133 - - // front end minimizers: representation, position_in_read, direction, read_id - // CAT: 032 0 R 0 - - // central minimizers - // CATC: 031 1 F 0 - // ATCA: 031 1 F 0 - // TCAA: 100 3 F 0 - // CAAG: 002 4 F 0 - - // back end minimizers - // AAG: 002 4 F 0 - - // All minimizers: ATC(1f), CAA(3f), AAG(4f), ATG(0r) - - // ** AAGCTA ** - - // kmer representation: forward, reverse - // AAG: <002> 133 - // AGC: <021> 213 - // GCT: 213 <021> - // CTA: <130> 302 - - // front end minimizers: representation, position_in_read, direction, read_id - // AAG: 002 0 F 1 - - // central minimizers - // AAGC: 002 0 F 1 - // AGCT: 021 2 R 1 // only the last minimizer is saved - // GCTA: 021 2 R 1 - - // back end minimizers - // CTA: 130 3 F 1 - - // All minimizers: AAG(0f), AGC(1f), CTA(3f) - - // (2r1) means position 2, reverse direction, read 1 - // (1,2) means array block start at element 1 and has 2 elements - - // 0 1 2 3 4 5 6 - // data arrays: AAG(4f0), AAG(0f1), AGC(2r1), ATC(1f0), ATG(0r0), CAA(3f0), CTA(3f1) - // - // read_1(AAG(1,1)(0,2)) means read_1 has "1" minimizer with representation AAG starting at position "1", - // whereas in all reads there are "2" minimizers with representation AAG and they start at position "0" - // read_id_and_representation_to_sketch_elements: read_0(AAG(0,1)(0,2), ATC(3,1)(3,1), ATG(4,1)(4,1), CAA(5,1)(5,1)) - // read_1(AAG(1,1)(0,2), AGC(2,1)(2,1), CTA(6,1)(6,1)) - - const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/catcaag_aagcta.fasta"; - const std::uint64_t minimizer_size = 3; - const std::uint64_t window_size = 2; - - const std::uint64_t expected_number_of_reads = 2; - - std::vector expected_read_id_to_read_name; - expected_read_id_to_read_name.push_back("read_0"); - expected_read_id_to_read_name.push_back("read_1"); - - std::vector expected_read_id_to_read_length; - expected_read_id_to_read_length.push_back(7); - expected_read_id_to_read_length.push_back(6); - - std::vector> expected_read_id_and_representation_to_sketch_elements(2); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b000010, {0, 1}, {0, 2}}); // AAG - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b001101, {3, 1}, {3, 1}}); // ATC - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b001110, {4, 1}, {4, 1}}); // ATG - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b010000, {5, 1}, {5, 1}}); // CAA - expected_read_id_and_representation_to_sketch_elements[1].push_back({0b000010, {1, 1}, {0, 2}}); // AAG - expected_read_id_and_representation_to_sketch_elements[1].push_back({0b001001, {2, 1}, {2, 1}}); // AGC - expected_read_id_and_representation_to_sketch_elements[1].push_back({0b011100, {6, 1}, {6, 1}}); // CTA - - std::vector expected_representations; - std::vector expected_positions_in_reads; - std::vector expected_read_ids; - std::vector expected_directions_of_reads; - - expected_representations.push_back(0b000010); // AAG(4f0) - expected_positions_in_reads.push_back(4); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b000010); // AAG(0f1) - expected_positions_in_reads.push_back(0); - expected_read_ids.push_back(1); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b001001); // AGC(2r1) - expected_positions_in_reads.push_back(2); - expected_read_ids.push_back(1); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); - expected_representations.push_back(0b001101); // ATC(1f0) - expected_positions_in_reads.push_back(1); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b001110); // ATG(0r0) - expected_positions_in_reads.push_back(0); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); - expected_representations.push_back(0b010000); // CAA(3f0) - expected_positions_in_reads.push_back(3); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b011100); // CTA(3f1) - expected_positions_in_reads.push_back(3); - expected_read_ids.push_back(1); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - - test_function(filename, - minimizer_size, - window_size, - expected_number_of_reads, - expected_read_id_to_read_name, - expected_read_id_to_read_length, - expected_read_id_and_representation_to_sketch_elements, - expected_representations, - expected_positions_in_reads, - expected_read_ids, - expected_directions_of_reads); -} - -TEST(TestCudamapperIndexGPU, AAAACTGAA_GCCAAAG_2_3) -{ - // >read_0 - // AAAACTGAA - // >read_1 - // GCCAAAG - - // ** AAAACTGAA ** - - // kmer representation: forward, reverse - // AA: <00> 33 - // AA: <00> 33 - // AA: <00> 33 - // AC: <01> 23 - // CT: 13 <02> - // TG: 32 <10> - // GA: <20> 31 - // AA: <00> 33 - - // front end minimizers: representation, position_in_read, direction, read_id - // AA : 00 0 F 0 - // AAA: 00 1 F 0 - - // central minimizers - // AAAA: 00 2 F 0 - // AAAC: 00 2 F 0 - // AACT: 00 2 F 0 - // ACTG: 01 3 F 0 - // CTGA: 02 4 R 0 - // TGAA: 00 7 F 0 - - // back end minimizers - // GAA: 00 7 F 0 - // AA : 00 7 F 0 - - // All minimizers: AA(0f), AA(1f), AA(2f), AC(3f), AG(4r), AA (7f) - - // ** GCCAAAG ** - - // kmer representation: forward, reverse - // GC: <21> 21 - // CC: <11> 22 - // CA: <10> 32 - // AA: <00> 33 - // AA: <00> 33 - // AG: <03> 21 - - // front end minimizers: representation, position_in_read, direction, read_id - // GC : 21 0 F 0 - // GCC: 11 1 F 0 - - // central minimizers - // GCCA: 10 2 F 0 - // CCAA: 00 3 F 0 - // CAAA: 00 4 F 0 - // AAAG: 00 4 F 0 - - // back end minimizers - // AAG: 00 4 F 0 - // AG : 03 5 F 0 - - // All minimizers: GC(0f), CC(1f), CA(2f), AA(3f), AA(4f), AG(5f) - - // (2r1) means position 2, reverse direction, read 1 - // (1,2) means array block start at element 1 and has 2 elements - - // 0 1 2 3 4 5 6 7 8 9 10 11 - // data arrays: AA(0f0), AA(1f0), AA(2f0), AA(7f0), AA(3f1), AA(4f1), AC(3f0), AG(4r0), AG(5f1), CA(2f1), CC(1f1), GC(0f1) - // - // read_1(AAG(1,1)(0,2)) means read_1 has "1" minimizer with representation AAG starting at position "1", - // whereas in all reads there are "2" minimizers with representation AAG and they start at position "0" - // read_id_and_representation_to_sketch_elements: read_0(AA(0,4)(0,6), AC(6,1)(6,1), AG(7,1)(7,2) - // read_1(AA(4,2)(0,6), AG(8,1)(7,2), CA(9,1)(9,1), CC(10,1)(10,1), GC(11,1)(11,1) - - const std::string filename = std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/aaaactgaa_gccaaag.fasta"; - const std::uint64_t minimizer_size = 2; - const std::uint64_t window_size = 3; - - const std::uint64_t expected_number_of_reads = 2; - - std::vector expected_read_id_to_read_name; - expected_read_id_to_read_name.push_back("read_0"); - expected_read_id_to_read_name.push_back("read_1"); - - std::vector expected_read_id_to_read_length; - expected_read_id_to_read_length.push_back(9); - expected_read_id_to_read_length.push_back(7); - - std::vector> expected_read_id_and_representation_to_sketch_elements(2); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b0000, {0, 4}, {0, 6}}); // AA - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b0001, {6, 1}, {6, 1}}); // AC - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b0010, {7, 1}, {7, 2}}); // AG - expected_read_id_and_representation_to_sketch_elements[1].push_back({0b0000, {4, 2}, {0, 6}}); // AA - expected_read_id_and_representation_to_sketch_elements[1].push_back({0b0010, {8, 1}, {7, 2}}); // AG - expected_read_id_and_representation_to_sketch_elements[1].push_back({0b0100, {9, 1}, {9, 1}}); // CA - expected_read_id_and_representation_to_sketch_elements[1].push_back({0b0101, {10, 1}, {10, 1}}); // CC - expected_read_id_and_representation_to_sketch_elements[1].push_back({0b1001, {11, 1}, {11, 1}}); // GC - - std::vector expected_representations; - std::vector expected_positions_in_reads; - std::vector expected_read_ids; - std::vector expected_directions_of_reads; - expected_representations.push_back(0b0000); // AA(0f0) - expected_positions_in_reads.push_back(0); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b0000); // AA(1f0) - expected_positions_in_reads.push_back(1); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b0000); // AA(2f0) - expected_positions_in_reads.push_back(2); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b0000); // AA(7f0) - expected_positions_in_reads.push_back(7); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b0000); // AA(3f1) - expected_positions_in_reads.push_back(3); - expected_read_ids.push_back(1); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b0000); // AA(4f1) - expected_positions_in_reads.push_back(4); - expected_read_ids.push_back(1); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b0001); // AC(3f0) - expected_positions_in_reads.push_back(3); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b0010); // AG(4r0) - expected_positions_in_reads.push_back(4); - expected_read_ids.push_back(0); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::REVERSE); - expected_representations.push_back(0b0010); // AG(5f1) - expected_positions_in_reads.push_back(5); - expected_read_ids.push_back(1); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b0100); // CA(2f1) - expected_positions_in_reads.push_back(2); - expected_read_ids.push_back(1); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b0101); // CC(1f1) - expected_positions_in_reads.push_back(1); - expected_read_ids.push_back(1); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - expected_representations.push_back(0b1001); // GC(0f1) - expected_positions_in_reads.push_back(0); - expected_read_ids.push_back(1); - expected_directions_of_reads.push_back(SketchElement::DirectionOfRepresentation::FORWARD); - - test_function(filename, - minimizer_size, - window_size, - expected_number_of_reads, - expected_read_id_to_read_name, - expected_read_id_to_read_length, - expected_read_id_and_representation_to_sketch_elements, - expected_representations, - expected_positions_in_reads, - expected_read_ids, - expected_directions_of_reads); -} - -namespace details -{ - -namespace index_gpu -{ - -// ************ Test representation_buckets ************** - -TEST(TestCudamapperIndexGPU, representation_buckets_1) -{ - // approximate_sketch_elements_per_bucket = 7 - // sample_length = 7 / 3 = 2 - // - // (1 1 2 2 4 4 6 6 9 9) - // ^ ^ ^ ^ ^ - // (0 0 1 5 5 5 7 8 8 8) - // ^ ^ ^ ^ ^ - // (1 1 1 1 3 4 5 7 9 9) - // ^ ^ ^ ^ ^ - // - // samples_in_one_bucket = 2 * 3 = 6 - // Sorted: 0 1 1 1 1 2 3 4 5 5 6 7 8 9 9 - // ^ ^ ^ ^ ^ - - std::vector> arrays_of_representations; - arrays_of_representations.push_back({{1, 1, 2, 2, 4, 4, 6, 6, 9, 9}}); - arrays_of_representations.push_back({{0, 0, 1, 5, 5, 5, 7, 8, 8, 8}}); - arrays_of_representations.push_back({{1, 1, 1, 1, 3, 4, 5, 7, 9, 9}}); - - std::vector res = generate_representation_buckets(arrays_of_representations, 7); - - std::vector expected_res = {0, 1, 3, 5, 8}; - - ASSERT_EQ(res.size(), expected_res.size()); - for (std::size_t i = 0; i < expected_res.size(); ++i) - { - EXPECT_EQ(res[i], expected_res[i]) << "index: " << i; - } -} - -TEST(TestCudamapperIndexGPU, representation_buckets_2) -{ - // approximate_sketch_elements_per_bucket = 5 - // sample_length = 5 / 3 = 1 - // - // (1 1 2 2 4 4 6 6 9 9) - // ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ - // (0 0 1 5 5 5 7 8 8 8) - // ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ - // (1 1 1 3 3 4 5 7 9 9) - // ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ - // - // samples_in_one_bucket = 5 / 1 = 5 - // Sorted: 0 0 1 1 1 1 1 1 2 2 3 3 4 4 4 5 5 5 5 6 6 7 7 8 8 8 9 9 9 9 - // ^ ^ ^ ^ ^ ^ - - std::vector> arrays_of_representations; - arrays_of_representations.push_back({{1, 1, 2, 2, 4, 4, 6, 6, 9, 9}}); - arrays_of_representations.push_back({{0, 0, 1, 5, 5, 5, 7, 8, 8, 8}}); - arrays_of_representations.push_back({{1, 1, 1, 3, 3, 4, 5, 7, 9, 9}}); - - std::vector res = generate_representation_buckets(arrays_of_representations, 5); - - std::vector expected_res = {0, 1, 3, 5, 6, 8}; - - ASSERT_EQ(res.size(), expected_res.size()); - for (std::size_t i = 0; i < expected_res.size(); ++i) - { - EXPECT_EQ(res[i], expected_res[i]) << "index: " << i; - } -} - -TEST(TestCudamapperIndexGPU, representation_buckets_3) -{ - // approximate_sketch_elements_per_bucket = 3 - // sample_length = 3 / 3 = 1 - // - // (1 1 2 2 4 4 6 6 9 9) - // ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ - // (0 0 1 5 5 5 7 8 8 8) - // ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ - // (1 1 1 3 3 4 5 7 9 9) - // ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ - // - // samples_in_one_bucket = 3 / 1 = 3 - // Sorted: 0 0 1 1 1 1 1 1 2 2 3 3 4 4 4 5 5 5 5 6 6 7 7 8 8 8 9 9 9 9 - // ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ - - std::vector> arrays_of_representations; - arrays_of_representations.push_back({{1, 1, 2, 2, 4, 4, 6, 6, 9, 9}}); - arrays_of_representations.push_back({{0, 0, 1, 5, 5, 5, 7, 8, 8, 8}}); - arrays_of_representations.push_back({{1, 1, 1, 3, 3, 4, 5, 7, 9, 9}}); - - std::vector res = generate_representation_buckets(arrays_of_representations, 3); - - std::vector expected_res = {0, 1, 2, 4, 5, 7, 8, 9}; - - ASSERT_EQ(res.size(), expected_res.size()); - for (std::size_t i = 0; i < expected_res.size(); ++i) - { - EXPECT_EQ(res[i], expected_res[i]) << "index: " << i; - } -} - -TEST(TestCudamapperIndexGPU, representation_buckets_4) -{ - // approximate_sketch_elements_per_bucket = 9 - // sample_length = 9 / 3 = 3 - // - // (1 1 2 2 4 4 6 6 9 9) - // ^ ^ ^ ^ - // (0 0 1 5 5 5 7 8 8 8) - // ^ ^ ^ ^ - // (1 1 1 3 3 4 5 7 9 9) - // ^ ^ ^ ^ - // - // samples_in_one_bucket = 9 / 3 = 3 - // Sorted: 0 1 1 2 3 5 5 6 7 8 9 9 - // ^ ^ ^ ^ - - std::vector> arrays_of_representations; - arrays_of_representations.push_back({{1, 1, 2, 2, 4, 4, 6, 6, 9, 9}}); - arrays_of_representations.push_back({{0, 0, 1, 5, 5, 5, 7, 8, 8, 8}}); - arrays_of_representations.push_back({{1, 1, 1, 3, 3, 4, 5, 7, 9, 9}}); - - std::vector res = generate_representation_buckets(arrays_of_representations, 9); - - std::vector expected_res = {0, 2, 5, 8}; - - ASSERT_EQ(res.size(), expected_res.size()); - for (std::size_t i = 0; i < expected_res.size(); ++i) - { - EXPECT_EQ(res[i], expected_res[i]) << "index: " << i; - } -} - -TEST(TestCudamapperIndexGPU, representation_buckets_5) -{ - // approximate_sketch_elements_per_bucket = 9 - // sample_length = 9 / 3 = 3 - // - // (1 1 2) - // ^ - // (0 0 1) - // ^ - // (1 1 1) - // ^ - // - // samples_in_one_bucket = 9 / 3 = 3 - // Sorted: 0 1 1 - // ^ - - std::vector> arrays_of_representations; - arrays_of_representations.push_back({{1, 1, 2}}); - arrays_of_representations.push_back({{0, 0, 1}}); - arrays_of_representations.push_back({{1, 1, 1}}); - - std::vector res = generate_representation_buckets(arrays_of_representations, 9); - - std::vector expected_res = {0}; - - ASSERT_EQ(res.size(), expected_res.size()); - for (std::size_t i = 0; i < expected_res.size(); ++i) - { - EXPECT_EQ(res[i], expected_res[i]) << "index: " << i; - } -} - -TEST(TestCudamapperIndexGPU, representation_buckets_exception) -{ - // approximate_sketch_elements_per_bucket is smaller than the number of arrays -> function throws - - std::vector> arrays_of_representations; - arrays_of_representations.push_back({{1, 1, 2}}); - arrays_of_representations.push_back({{0, 0, 1}}); - arrays_of_representations.push_back({{1, 1, 1}}); - - EXPECT_NO_THROW(generate_representation_buckets(arrays_of_representations, 3)); - - EXPECT_THROW(generate_representation_buckets(arrays_of_representations, 2), - approximate_sketch_elements_per_bucket_too_small); -} - -// ************ Test representation_iterators ************** -TEST(TestCudamapperIndexGPU, representation_iterators) -{ - std::vector> arrays_of_representations; - // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 - arrays_of_representations.push_back({{1, 1, 2, 2, 2, 3, 3, 3, 4, 6, 7, 8, 9, 9}}); - arrays_of_representations.push_back({{0, 0, 0, 3, 3, 5, 5, 5, 6, 7, 7}}); - arrays_of_representations.push_back({{6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9}}); - - auto res = generate_representation_indices(arrays_of_representations, 0); - EXPECT_EQ(res[0], 0u); - EXPECT_EQ(res[1], 0u); - EXPECT_EQ(res[2], 0u); - - res = generate_representation_indices(arrays_of_representations, 1); - EXPECT_EQ(res[0], 0u); - EXPECT_EQ(res[1], 3u); - EXPECT_EQ(res[2], 0u); - - res = generate_representation_indices(arrays_of_representations, 6); - EXPECT_EQ(res[0], 9u); - EXPECT_EQ(res[1], 8u); - EXPECT_EQ(res[2], 0u); - - res = generate_representation_indices(arrays_of_representations, 7); - EXPECT_EQ(res[0], 10u); - EXPECT_EQ(res[1], 9u); - EXPECT_EQ(res[2], 1u); - - res = generate_representation_indices(arrays_of_representations, 8); - EXPECT_EQ(res[0], 11u); - EXPECT_EQ(res[1], 11u); - EXPECT_EQ(res[2], 5u); - - res = generate_representation_indices(arrays_of_representations, 9); - EXPECT_EQ(res[0], 12u); - EXPECT_EQ(res[1], 11u); - EXPECT_EQ(res[2], 8u); - - res = generate_representation_indices(arrays_of_representations, 10); - EXPECT_EQ(res[0], 14u); - EXPECT_EQ(res[1], 11u); - EXPECT_EQ(res[2], 12u); -} - -// ************ Test generate_bucket_boundary_indices ************** - -void test_generate_bucket_boundary_indices(const std::vector>& arrays_of_representations, - const std::vector& representation_buckets, - const std::vector>>& expected_bucket_boundary_indices) -{ - const std::size_t number_of_arrays = arrays_of_representations.size(); - const std::size_t number_of_buckets = representation_buckets.size(); - - const std::vector>> bucket_boundary_indices = generate_bucket_boundary_indices(arrays_of_representations, representation_buckets); - ASSERT_EQ(bucket_boundary_indices.size(), number_of_buckets); - - for (std::size_t bucket_index = 0; bucket_index < number_of_buckets; ++bucket_index) - { - ASSERT_EQ(bucket_boundary_indices[bucket_index].size(), number_of_arrays) << "bucket: " << bucket_index; - - for (std::size_t array_index = 0; array_index < number_of_arrays; ++array_index) - { - EXPECT_EQ(bucket_boundary_indices[bucket_index][array_index].first, - expected_bucket_boundary_indices[bucket_index][array_index].first) - << "bucket: " << bucket_index << ", array: " << array_index; - EXPECT_EQ(bucket_boundary_indices[bucket_index][array_index].second, - expected_bucket_boundary_indices[bucket_index][array_index].second) - << "bucket: " << bucket_index << ", array: " << array_index; - } - } -} - -TEST(TestCudamapperIndexGPU, generate_bucket_boundary_indices) -{ - std::vector> arrays_of_representations; - // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 - arrays_of_representations.push_back({{1, 1, 2, 2, 2, 3, 3, 3, 4, 6, 7, 8, 9, 9}}); - arrays_of_representations.push_back({{0, 0, 0, 3, 3, 5, 5, 5, 6, 7, 7}}); - arrays_of_representations.push_back({{6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9}}); - - std::vector representation_buckets = {0, 5, 7, 9}; - - std::vector>> expected_bucket_boundary_indices; - expected_bucket_boundary_indices.push_back({{0, 9}, {0, 5}, {0, 0}}); - expected_bucket_boundary_indices.push_back({{9, 10}, {5, 9}, {0, 1}}); - expected_bucket_boundary_indices.push_back({{10, 12}, {9, 11}, {1, 8}}); - expected_bucket_boundary_indices.push_back({{12, 14}, {11, 11}, {8, 12}}); - - test_generate_bucket_boundary_indices(arrays_of_representations, - representation_buckets, - expected_bucket_boundary_indices); -} - -// ************ Test merge_sketch_element_arrays ************** - -template -void test_merge_sketch_element_arrays(const std::vector>& arrays_of_representations, - const std::vector>& arrays_of_readids_positions_directions, - const std::uint64_t available_device_memory_bytes, - const std::vector& expected_merged_representations, - const std::vector& expected_merged_readids_positions_directions) -{ - std::vector generated_merged_representations; - std::vector generated_merged_readids_positions_directions; - - merge_sketch_element_arrays(arrays_of_representations, - arrays_of_readids_positions_directions, - available_device_memory_bytes, - generated_merged_representations, - generated_merged_readids_positions_directions); - - ASSERT_EQ(generated_merged_representations.size(), expected_merged_representations.size()) << "available_memory: " << available_device_memory_bytes; - ASSERT_EQ(generated_merged_readids_positions_directions.size(), expected_merged_readids_positions_directions.size()) << "available_memory: " << available_device_memory_bytes; - ASSERT_EQ(generated_merged_representations.size(), generated_merged_readids_positions_directions.size()) << "available_memory: " << available_device_memory_bytes; - - for (std::size_t i = 0; i < expected_merged_representations.size(); ++i) - { - ASSERT_EQ(generated_merged_representations[i], expected_merged_representations[i]) << "available_memory: " << available_device_memory_bytes << ", index: " << i; - ASSERT_EQ(generated_merged_readids_positions_directions[i].read_id_, expected_merged_readids_positions_directions[i].read_id_) << "available_memory: " << available_device_memory_bytes << ", index: " << i; - ASSERT_EQ(generated_merged_readids_positions_directions[i].position_in_read_, expected_merged_readids_positions_directions[i].position_in_read_) << "available_memory: " << available_device_memory_bytes << ", index: " << i; - ASSERT_EQ(generated_merged_readids_positions_directions[i].direction_, expected_merged_readids_positions_directions[i].direction_) << "available_memory: " << available_device_memory_bytes << ", index: " << i; - } -} - -TEST(TestCudamapperIndexGPU, merge_sketch_element_arrays) -{ - std::vector> arrays_of_representations; - // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 - arrays_of_representations.push_back({{1, 1, 2, 2, 2, 3, 3, 3, 4, 6, 7, 8, 9, 9}}); - arrays_of_representations.push_back({{0, 0, 0, 3, 3, 5, 5, 5, 6, 7, 7}}); - arrays_of_representations.push_back({{6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 9}}); - - std::vector> arrays_of_readids_positions_directions; - arrays_of_readids_positions_directions.push_back({ - {0, 1, 1}, // 1 - {0, 2, 0}, // 1 - {4, 7, 1}, // 2 - {5, 5, 1}, // 2 - {6, 9, 1}, // 2 - {1, 2, 0}, // 3 - {2, 8, 1}, // 3 - {4, 6, 1}, // 3 - {5, 8, 1}, // 4 - {3, 2, 0}, // 6 - {8, 1, 0}, // 7 - {0, 4, 1}, // 8 - {2, 7, 0}, // 9 - {2, 9, 0}, // 9 - }); - arrays_of_readids_positions_directions.push_back({ - {10, 7, 0}, // 0 - {10, 9, 0}, // 0 - {12, 2, 1}, // 0 - {13, 4, 1}, // 3 - {15, 1, 1}, // 3 - {12, 4, 1}, // 5 - {13, 3, 0}, // 5 - {13, 7, 0}, // 5 - {14, 8, 1}, // 6 - {15, 6, 1}, // 7 - {15, 7, 0}, // 7 - }); - arrays_of_readids_positions_directions.push_back({ - {25, 5, 0}, // 6 - {26, 7, 0}, // 7 - {26, 9, 1}, // 7 - {27, 1, 1}, // 7 - {27, 2, 1}, // 7 - {20, 3, 1}, // 8 - {20, 5, 0}, // 8 - {20, 7, 1}, // 8 - {20, 2, 0}, // 9 - {20, 4, 0}, // 9 - {20, 6, 1}, // 9 - {20, 8, 1}, // 9 - }); - - std::vector expected_merged_representations = {0, 0, 0, 1, 1, 2, 2, 2, 3, 3, - 3, 3, 3, 4, 5, 5, 5, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, - 8, 9, 9, 9, 9, 9, 9}; - std::vector expected_merged_readids_positions_directions; - expected_merged_readids_positions_directions.push_back({10, 7, 0}); // 0 - expected_merged_readids_positions_directions.push_back({10, 9, 0}); // 0 - expected_merged_readids_positions_directions.push_back({12, 2, 1}); // 0 - expected_merged_readids_positions_directions.push_back({0, 1, 1}); // 1 - expected_merged_readids_positions_directions.push_back({0, 2, 0}); // 1 - expected_merged_readids_positions_directions.push_back({4, 7, 1}); // 2 - expected_merged_readids_positions_directions.push_back({5, 5, 1}); // 2 - expected_merged_readids_positions_directions.push_back({6, 9, 1}); // 2 - expected_merged_readids_positions_directions.push_back({1, 2, 0}); // 3 - expected_merged_readids_positions_directions.push_back({2, 8, 1}); // 3 - expected_merged_readids_positions_directions.push_back({4, 6, 1}); // 3 - expected_merged_readids_positions_directions.push_back({13, 4, 1}); // 3 - expected_merged_readids_positions_directions.push_back({15, 1, 1}); // 3 - expected_merged_readids_positions_directions.push_back({5, 8, 1}); // 4 - expected_merged_readids_positions_directions.push_back({12, 4, 1}); // 5 - expected_merged_readids_positions_directions.push_back({13, 3, 0}); // 5 - expected_merged_readids_positions_directions.push_back({13, 7, 0}); // 5 - expected_merged_readids_positions_directions.push_back({3, 2, 0}); // 6 - expected_merged_readids_positions_directions.push_back({14, 8, 1}); // 6 - expected_merged_readids_positions_directions.push_back({25, 5, 0}); // 6 - expected_merged_readids_positions_directions.push_back({8, 1, 0}); // 7 - expected_merged_readids_positions_directions.push_back({15, 6, 1}); // 7 - expected_merged_readids_positions_directions.push_back({15, 7, 0}); // 7 - expected_merged_readids_positions_directions.push_back({26, 7, 0}); // 7 - expected_merged_readids_positions_directions.push_back({26, 9, 1}); // 7 - expected_merged_readids_positions_directions.push_back({27, 1, 1}); // 7 - expected_merged_readids_positions_directions.push_back({27, 2, 1}); // 7 - expected_merged_readids_positions_directions.push_back({0, 4, 1}); // 8 - expected_merged_readids_positions_directions.push_back({20, 3, 1}); // 8 - expected_merged_readids_positions_directions.push_back({20, 5, 0}); // 8 - expected_merged_readids_positions_directions.push_back({20, 7, 1}); // 8 - expected_merged_readids_positions_directions.push_back({2, 7, 0}); // 9 - expected_merged_readids_positions_directions.push_back({2, 9, 0}); // 9 - expected_merged_readids_positions_directions.push_back({20, 2, 0}); // 9 - expected_merged_readids_positions_directions.push_back({20, 4, 0}); // 9 - expected_merged_readids_positions_directions.push_back({20, 6, 1}); // 9 - expected_merged_readids_positions_directions.push_back({20, 8, 1}); // 9 - - // all elements fit in one sort call - test_merge_sketch_element_arrays(arrays_of_representations, - arrays_of_readids_positions_directions, - 10000000, - expected_merged_representations, - expected_merged_readids_positions_directions); - - std::size_t element_size = sizeof(representation_t) + sizeof(Minimizer::ReadidPositionDirection); - std::size_t data_in_bytes = expected_merged_representations.size() * element_size; - - // merge_sketch_element_arrays needs 2.1*data_in_bytes memory, so passing merge_sketch_element_arrays as available memory will cause it to chunk the merging process - test_merge_sketch_element_arrays(arrays_of_representations, - arrays_of_readids_positions_directions, - data_in_bytes, - expected_merged_representations, - expected_merged_readids_positions_directions); - - // a really small amount of memory - test_merge_sketch_element_arrays(arrays_of_representations, - arrays_of_readids_positions_directions, - 200, - expected_merged_representations, - expected_merged_readids_positions_directions); - - // amount memory too small to do the merge - EXPECT_THROW(test_merge_sketch_element_arrays(arrays_of_representations, - arrays_of_readids_positions_directions, - 100, - expected_merged_representations, - expected_merged_readids_positions_directions), - approximate_sketch_elements_per_bucket_too_small); -} - -// ************ Test generate_sections_for_multithreaded_index_building ************** -void test_generate_sections_for_multithreaded_index_building(const std::vector& input_representations, - const std::vector>& expected_sections) -{ - auto generated_sections = generate_sections_for_multithreaded_index_building(input_representations); - - ASSERT_EQ(generated_sections.size(), expected_sections.size()) << "std::thread::hardware_concurrency: " << std::thread::hardware_concurrency(); - - for (std::size_t i = 0; i < generated_sections.size(); ++i) - { - EXPECT_EQ(generated_sections[i].first, expected_sections[i].first) << "std::thread::hardware_concurrency: " << std::thread::hardware_concurrency() << ", index: " << i; - EXPECT_EQ(generated_sections[i].second, expected_sections[i].second) << "std::thread::hardware_concurrency: " << std::thread::hardware_concurrency() << ", index: " << i; - } -} - -TEST(TestCudamapperIndexGPU, generate_sections_for_multithreaded_index_building_1) -{ - // 0 0 1 1 2 2 3 3 ... - // ^ ^ ^ ^ - // Perfect case, every section has the same number of elements - - //auto number_of_threads = std::thread::hardware_concurrency(); - //number_of_threads = std::max(1u, number_of_threads); - std::uint32_t number_of_threads = 4; - - std::vector representations; - std::vector> expected_sections; - for (std::size_t thread_id = 0; thread_id < number_of_threads; ++thread_id) - { - representations.push_back(thread_id); - representations.push_back(thread_id); - expected_sections.push_back({2 * thread_id, 2 * (thread_id + 1)}); - } - - test_generate_sections_for_multithreaded_index_building(representations, - expected_sections); -} - -TEST(TestCudamapperIndexGPU, generate_sections_for_multithreaded_index_building_2) -{ - // 0 0 0 1 1 2 2 2 3 3 4 4 4 5 5 - // number_of_thread = 6 - // number_of_elements = 15 - // elements_per_section = 15/6 = 2 - // - // * section 0 * - // 0 0 0 1 1 2 2 2 3 3 4 4 4 5 5 - // ^ ^ - // after looking for upper bound for the element left of past_the_last - // 0 0 0 1 1 2 2 2 3 3 4 4 4 5 5 - // ^ ^ - // - // * section 1 * - // 0 0 0 1 1 2 2 2 3 3 4 4 4 5 5 - // ^ ^ - // after looking for upper bound for the element left of past_the_last - // 0 0 0 1 1 2 2 2 3 3 4 4 4 5 5 - // ^ ^ - // - // * section 2 * - // 0 0 0 1 1 2 2 2 3 3 4 4 4 5 5 - // ^ ^ - // after looking for upper bound for the element left of past_the_last - // 0 0 0 1 1 2 2 2 3 3 4 4 4 5 5 - // ^ ^ - // ... - - //auto number_of_threads = std::thread::hardware_concurrency(); - //number_of_threads = std::max(1u, number_of_threads); - std::uint32_t number_of_threads = 4; - - std::vector representations; - std::vector> expected_sections; - for (std::size_t thread_id = 0; thread_id < number_of_threads; ++thread_id) - { - representations.push_back(thread_id); - representations.push_back(thread_id); - if (thread_id % 2 == 0) - representations.push_back(thread_id); - - std::size_t first_element = 0; - if (thread_id != 0) - first_element = expected_sections.back().second; - if (thread_id % 2 == 0) - expected_sections.push_back({first_element, first_element + 3}); - else - expected_sections.push_back({first_element, first_element + 2}); - } - - test_generate_sections_for_multithreaded_index_building(representations, - expected_sections); -} - -TEST(TestCudamapperIndexGPU, generate_sections_for_multithreaded_index_building_3) -{ - // only one representation -> all threads except for the first one get no sections - - //auto number_of_threads = std::thread::hardware_concurrency(); - //number_of_threads = std::max(1u, number_of_threads); - std::uint32_t number_of_threads = 4; - - std::vector representations(2 * number_of_threads, 0); - std::vector> expected_sections; - expected_sections.push_back({0, 2 * number_of_threads}); - - test_generate_sections_for_multithreaded_index_building(representations, - expected_sections); -} - -TEST(TestCudamapperIndexGPU, generate_sections_for_multithreaded_index_building_4) -{ - // only two representation -> all threads except for the first one get no sections - - //auto number_of_threads = std::thread::hardware_concurrency(); - //number_of_threads = std::max(1u, number_of_threads); - std::uint32_t number_of_threads = 4; - - if (number_of_threads <= 2u) - { - return; - std::cout << "Only " << number_of_threads << " threads, no need to execute this test"; - } - - std::vector representations(2 * number_of_threads); - std::fill(std::begin(representations), std::begin(representations) + number_of_threads, 0); - std::fill(std::begin(representations) + number_of_threads, std::end(representations), 1); - std::vector> expected_sections; - expected_sections.push_back({0, number_of_threads}); - expected_sections.push_back({number_of_threads, 2 * number_of_threads}); - - test_generate_sections_for_multithreaded_index_building(representations, - expected_sections); -} - -TEST(TestCudamapperIndexGPU, generate_sections_for_multithreaded_index_building_5) -{ - // less elements in representation than threads - - //auto number_of_threads = std::thread::hardware_concurrency(); - //number_of_threads = std::max(1u, number_of_threads); - std::uint32_t number_of_threads = 4; - - if (number_of_threads <= 2u) - { - return; - std::cout << "Only " << number_of_threads << " threads, no need to execute this test"; - } - - std::vector representations; - representations.push_back(0); // only two elements - representations.push_back(0); - std::vector> expected_sections; - expected_sections.push_back({0, 2}); - - test_generate_sections_for_multithreaded_index_building(representations, - expected_sections); -} - -// ************ Test build_index ************** - -template -void test_build_index(const std::vector& input_representations, - const std::vector& input_readids_positions_directions, - const std::vector>& expected_read_id_and_representation_to_sketch_elements) -{ - std::uint64_t number_of_reads = expected_read_id_and_representation_to_sketch_elements.size(); - - ASSERT_EQ(input_representations.size(), input_readids_positions_directions.size()); - - std::vector generated_positions_in_reads; - std::vector generated_read_ids; - std::vector generated_directions_of_reads; - std::vector> generated_read_id_and_representation_to_sketch_elements; - - build_index(number_of_reads, - input_representations, - input_readids_positions_directions, - generated_positions_in_reads, - generated_read_ids, - generated_directions_of_reads, - generated_read_id_and_representation_to_sketch_elements); - - ASSERT_EQ(input_readids_positions_directions.size(), generated_positions_in_reads.size()); - ASSERT_EQ(input_readids_positions_directions.size(), generated_read_ids.size()); - ASSERT_EQ(input_readids_positions_directions.size(), generated_directions_of_reads.size()); - - for (std::size_t i = 0; i < input_readids_positions_directions.size(); ++i) - { - EXPECT_EQ(input_readids_positions_directions[i].position_in_read_, generated_positions_in_reads[i]) << "index: " << i; - EXPECT_EQ(input_readids_positions_directions[i].read_id_, generated_read_ids[i]) << "index: " << i; - EXPECT_EQ(Minimizer::DirectionOfRepresentation(input_readids_positions_directions[i].direction_), generated_directions_of_reads[i]) << "index: " << i; - } - - ASSERT_EQ(expected_read_id_and_representation_to_sketch_elements.size(), generated_read_id_and_representation_to_sketch_elements.size()); - for (std::size_t read_id = 0; read_id < expected_read_id_and_representation_to_sketch_elements.size(); ++read_id) - { - ASSERT_EQ(expected_read_id_and_representation_to_sketch_elements[read_id].size(), generated_read_id_and_representation_to_sketch_elements[read_id].size()) << "read id: " << read_id; - //for (const auto& foo : generated_read_id_and_representation_to_sketch_elements[read_id]) std::cout << foo.representation_ << std::endl; - for (std::size_t representation_index = 0; representation_index < expected_read_id_and_representation_to_sketch_elements[read_id].size(); ++representation_index) - { - const auto& expected_data = expected_read_id_and_representation_to_sketch_elements[read_id][representation_index]; - const auto& generated_data = generated_read_id_and_representation_to_sketch_elements[read_id][representation_index]; - // check representation - EXPECT_EQ(expected_data.representation_, generated_data.representation_) << "read id: " << read_id << ", representation index: " << representation_index; - // check sketch_elements_for_representation_and_read_id_ - EXPECT_EQ(expected_data.sketch_elements_for_representation_and_read_id_.first_element_, - generated_data.sketch_elements_for_representation_and_read_id_.first_element_) - << "read id: " << read_id << ", representation index: " << representation_index; - EXPECT_EQ(expected_data.sketch_elements_for_representation_and_read_id_.block_size_, - generated_data.sketch_elements_for_representation_and_read_id_.block_size_) - << "read id: " << read_id << ", representation index: " << representation_index; - // check sketch_elements_for_representation_and_all_read_ids_ - EXPECT_EQ(expected_data.sketch_elements_for_representation_and_all_read_ids_.first_element_, - generated_data.sketch_elements_for_representation_and_all_read_ids_.first_element_) - << "read id: " << read_id << ", representation index: " << representation_index; - EXPECT_EQ(expected_data.sketch_elements_for_representation_and_all_read_ids_.block_size_, - generated_data.sketch_elements_for_representation_and_all_read_ids_.block_size_) - << "read id: " << read_id << ", representation index: " << representation_index; - } - } -} - -TEST(TestCudamapperIndexGPU, build_index_GATT_4_1) -{ - // >read_0 - // GATT - - // GATT = 0b10001111 - // AATC = 0b00001101 <- minimizer - - std::vector input_representations({{0b00001101}}); - std::vector input_readids_positions_directions({{0, 0, 1}}); - - std::vector> expected_read_id_and_representation_to_sketch_elements(1); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b00001101, {0, 1}, {0, 1}}); - - test_build_index(input_representations, - input_readids_positions_directions, - expected_read_id_and_representation_to_sketch_elements); -} - -TEST(TestCudamapperIndexGPU, build_index_GATT_2_3) -{ - // >read_0 - // GATT - - // kmer representation: forward, reverse - // GA: <20> 31 - // AT: <03> 03 - // TT: 33 <00> - - // front end minimizers: representation, position_in_read, direction, read_id - // GA : 20 0 F 0 - // GAT: 03 1 F 0 - - // central minimizers - // GATT: 00 2 R 0 - - // back end minimizers - // ATT: 00 2 R 0 - // TT : 00 2 R 0 - - // All minimizers: GA(0f), AT(1f), AA(2r) - - // (2r1) means position 2, reverse direction, read 1 - // (1,2) means array block start at element 1 and has 2 elements - - // 0 1 2 - // data arrays: GA(0f0), AT(1f0), AA(2r0) - // - // read_1(AAG(1,1)(0,2)) means read_1 has "1" minimizer with representation AAG starting at position "1", - // whereas in all reads there are "2" minimizers with representation AAG and they start at position "0" - // read_id_and_representation_to_sketch_elements: read_0(AA(0,1)(0,1), AT(1,1)(1,1), GA(2,1)(2,1) - - std::vector input_representations; - input_representations.push_back(0b1000); - input_representations.push_back(0b0011); - input_representations.push_back(0b0000); - std::vector input_readids_positions_directions; - input_readids_positions_directions.push_back({0, 0, 0}); - input_readids_positions_directions.push_back({0, 1, 0}); - input_readids_positions_directions.push_back({0, 2, 1}); - - std::vector> expected_read_id_and_representation_to_sketch_elements(1); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b1000, {0, 1}, {0, 1}}); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b0011, {1, 1}, {1, 1}}); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b0000, {2, 1}, {2, 1}}); - - test_build_index(input_representations, - input_readids_positions_directions, - expected_read_id_and_representation_to_sketch_elements); -} - -TEST(TestCudamapperIndexGPU, build_index_CCCATACC_3_5) -{ - // >read_0 - // CCCATACC - - // ** CCCATAC ** - - // kmer representation: forward, reverse - // CCC: <111> 222 - // CCA: <110> 322 - // CAT: 103 <032> - // ATA: <030> 303 - // TAC: 301 <230> - // ACC: <011> 223 - - // front end minimizers: representation, position_in_read, direction - // CCC : 111 0 F - // CCCA : 110 1 F - // CCCAT : 032 2 R - // CCCATA: 030 3 F - - // central minimizers - // CCCATAC: 030 3 F - // CCATACC: 011 5 F - - // back end minimizers - // CATACC: 011 5 F - // ATACC : 011 5 F - // TACC : 011 5 F - // ACC : 011 5 F - - // All minimizers: CCC(0f), CCA(1f), ATG(2r), ATA(3f), ACC(5f) - - // (2r1) means position 2, reverse direction, read 1 - // (1,2) means array block start at element 1 and has 2 elements - - // 0 1 2 - // data arrays: ACC(5f0), ATA(3f0), ATG(2r0), CCA(1f0), CCC(0f0) - // - // read_1(AAG(1,1)(0,2)) means read_1 has "1" minimizer with representation AAG starting at position "1", - // whereas in all reads there are "2" minimizers with representation AAG and they start at position "0" - // read_id_and_representation_to_sketch_elements: read_0(AAC(0,1)(0,1), AAT(1,1)(1,1), ATG(2,1)(2,1), CCA(3,1)(3,1), CCC(4,1)(4,1) - - std::vector input_representations; - input_representations.push_back(0b000101); // ACC - input_representations.push_back(0b001100); // ATA - input_representations.push_back(0b001110); // ATG - input_representations.push_back(0b010000); // CAA - input_representations.push_back(0b010101); // CCC - std::vector input_readids_positions_directions; - input_readids_positions_directions.push_back({0, 5, 0}); // ACC - input_readids_positions_directions.push_back({0, 3, 0}); // ATA - input_readids_positions_directions.push_back({0, 2, 1}); // ATG - input_readids_positions_directions.push_back({0, 1, 0}); // CCA - input_readids_positions_directions.push_back({0, 0, 0}); // CCC - - std::vector> expected_read_id_and_representation_to_sketch_elements(1); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b000101, {0, 1}, {0, 1}}); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b001100, {1, 1}, {1, 1}}); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b001110, {2, 1}, {2, 1}}); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b010000, {3, 1}, {3, 1}}); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b010101, {4, 1}, {4, 1}}); - - test_build_index(input_representations, - input_readids_positions_directions, - expected_read_id_and_representation_to_sketch_elements); -} - -TEST(TestCudamapperIndexGPU, build_index_CATCAAG_AAGCTA_3_2) -{ - // >read_0 - // CATCAAG - // >read_1 - // AAGCTA - - // ** CATCAAG ** - - // kmer representation: forward, reverse - // CAT: 103 <032> - // ATC: <031> 203 - // TCA: <310> 320 - // CAA: <100> 332 - // AAG: <002> 133 - - // front end minimizers: representation, position_in_read, direction, read_id - // CAT: 032 0 R 0 - - // central minimizers - // CATC: 031 1 F 0 - // ATCA: 031 1 F 0 - // TCAA: 100 3 F 0 - // CAAG: 002 4 F 0 - - // back end minimizers - // AAG: 002 4 F 0 - - // All minimizers: ATC(1f), CAA(3f), AAG(4f), ATG(0r) - - // ** AAGCTA ** - - // kmer representation: forward, reverse - // AAG: <002> 133 - // AGC: <021> 213 - // GCT: 213 <021> - // CTA: <130> 302 - - // front end minimizers: representation, position_in_read, direction, read_id - // AAG: 002 0 F 1 - - // central minimizers - // AAGC: 002 0 F 1 - // AGCT: 021 2 R 1 // only the last minimizer is saved - // GCTA: 021 2 R 1 - - // back end minimizers - // CTA: 130 3 F 1 - - // All minimizers: AAG(0f), AGC(1f), CTA(3f) - - // (2r1) means position 2, reverse direction, read 1 - // (1,2) means array block start at element 1 and has 2 elements - - // 0 1 2 3 4 5 6 - // data arrays: AAG(4f0), AAG(0f1), AGC(2r1), ATC(1f0), ATG(0r0), CAA(3f0), CTA(3f1) - // - // read_1(AAG(1,1)(0,2)) means read_1 has "1" minimizer with representation AAG starting at position "1", - // whereas in all reads there are "2" minimizers with representation AAG and they start at position "0" - // read_id_and_representation_to_sketch_elements: read_0(AAG(0,1)(0,2), ATC(3,1)(3,1), ATG(4,1)(4,1), CAA(5,1)(5,1)) - // read_1(AAG(1,1)(0,2), AGC(2,1)(2,1), CTA(6,1)(6,1)) - - std::vector input_representations; - input_representations.push_back(0b000010); // AAG - input_representations.push_back(0b000010); // AAG - input_representations.push_back(0b001001); // AGC - input_representations.push_back(0b001101); // ATC - input_representations.push_back(0b001110); // ATG - input_representations.push_back(0b010000); // CAA - input_representations.push_back(0b011100); // CTA - std::vector input_readids_positions_directions; - input_readids_positions_directions.push_back({0, 4, 0}); // AAG - input_readids_positions_directions.push_back({1, 0, 0}); // AAG - input_readids_positions_directions.push_back({1, 2, 1}); // AGC - input_readids_positions_directions.push_back({0, 1, 0}); // ATC - input_readids_positions_directions.push_back({0, 0, 1}); // ATG - input_readids_positions_directions.push_back({0, 3, 0}); // CAA - input_readids_positions_directions.push_back({1, 3, 0}); // CTA - - std::vector> expected_read_id_and_representation_to_sketch_elements(2); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b000010, {0, 1}, {0, 2}}); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b001101, {3, 1}, {3, 1}}); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b001110, {4, 1}, {4, 1}}); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b010000, {5, 1}, {5, 1}}); - expected_read_id_and_representation_to_sketch_elements[1].push_back({0b000010, {1, 1}, {0, 2}}); - expected_read_id_and_representation_to_sketch_elements[1].push_back({0b001001, {2, 1}, {2, 1}}); - expected_read_id_and_representation_to_sketch_elements[1].push_back({0b011100, {6, 1}, {6, 1}}); - - test_build_index(input_representations, - input_readids_positions_directions, - expected_read_id_and_representation_to_sketch_elements); -} - -TEST(TestCudamapperIndexGPU, build_index_AAAACTGAA_GCCAAAG_2_3) -{ - // >read_0 - // AAAACTGAA - // >read_1 - // GCCAAAG - - // ** AAAACTGAA ** - - // kmer representation: forward, reverse - // AA: <00> 33 - // AA: <00> 33 - // AA: <00> 33 - // AC: <01> 23 - // CT: 13 <02> - // TG: 32 <10> - // GA: <20> 31 - // AA: <00> 33 - - // front end minimizers: representation, position_in_read, direction, read_id - // AA : 00 0 F 0 - // AAA: 00 1 F 0 - - // central minimizers - // AAAA: 00 2 F 0 - // AAAC: 00 2 F 0 - // AACT: 00 2 F 0 - // ACTG: 01 3 F 0 - // CTGA: 02 4 R 0 - // TGAA: 00 7 F 0 - - // back end minimizers - // GAA: 00 7 F 0 - // AA : 00 7 F 0 - - // All minimizers: AA(0f), AA(1f), AA(2f), AC(3f), AG(4r), AA (7f) - - // ** GCCAAAG ** - - // kmer representation: forward, reverse - // GC: <21> 21 - // CC: <11> 22 - // CA: <10> 32 - // AA: <00> 33 - // AA: <00> 33 - // AG: <03> 21 - - // front end minimizers: representation, position_in_read, direction, read_id - // GC : 21 0 F 0 - // GCC: 11 1 F 0 - - // central minimizers - // GCCA: 10 2 F 0 - // CCAA: 00 3 F 0 - // CAAA: 00 4 F 0 - // AAAG: 00 4 F 0 - - // back end minimizers - // AAG: 00 4 F 0 - // AG : 03 5 F 0 - - // All minimizers: GC(0f), CC(1f), CA(2f), AA(3f), AA(4f), AG(5f) - - // (2r1) means position 2, reverse direction, read 1 - // (1,2) means array block start at element 1 and has 2 elements - - // 0 1 2 3 4 5 6 7 8 9 10 11 - // data arrays: AA(0f0), AA(1f0), AA(2f0), AA(7f0), AA(3f1), AA(4f1), AC(3f0), AG(4r0), AG(5f1), CA(2f1), CC(1f1), GC(0f1) - // - // read_1(AAG(1,1)(0,2)) means read_1 has "1" minimizer with representation AAG starting at position "1", - // whereas in all reads there are "2" minimizers with representation AAG and they start at position "0" - // read_id_and_representation_to_sketch_elements: read_0(AA(0,4)(0,6), AC(6,1)(6,1), AG(7,1)(7,2) - // read_1(AA(4,2)(0,6), AG(8,1)(7,2), CA(9,1)(9,1), CC(10,1)(10,1), GC(11,1)(11,1) - - std::vector input_representations; - input_representations.push_back(0b0000); // AA - input_representations.push_back(0b0000); // AA - input_representations.push_back(0b0000); // AA - input_representations.push_back(0b0000); // AA - input_representations.push_back(0b0000); // AA - input_representations.push_back(0b0000); // AA - input_representations.push_back(0b0001); // AC - input_representations.push_back(0b0010); // AG - input_representations.push_back(0b0010); // AG - input_representations.push_back(0b0100); // CA - input_representations.push_back(0b0101); // CC - input_representations.push_back(0b1001); // GC - std::vector input_readids_positions_directions; - input_readids_positions_directions.push_back({0, 0, 0}); // AA - input_readids_positions_directions.push_back({0, 1, 0}); // AA - input_readids_positions_directions.push_back({0, 2, 0}); // AA - input_readids_positions_directions.push_back({0, 7, 0}); // AA - input_readids_positions_directions.push_back({1, 3, 0}); // AA - input_readids_positions_directions.push_back({1, 4, 0}); // AA - input_readids_positions_directions.push_back({0, 3, 0}); // AC - input_readids_positions_directions.push_back({0, 4, 1}); // AG - input_readids_positions_directions.push_back({1, 5, 0}); // AG - input_readids_positions_directions.push_back({1, 2, 0}); // CA - input_readids_positions_directions.push_back({1, 1, 0}); // CC - input_readids_positions_directions.push_back({1, 0, 0}); // GC - - std::vector> expected_read_id_and_representation_to_sketch_elements(2); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b0000, {0, 4}, {0, 6}}); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b0001, {6, 1}, {6, 1}}); - expected_read_id_and_representation_to_sketch_elements[0].push_back({0b0010, {7, 1}, {7, 2}}); - expected_read_id_and_representation_to_sketch_elements[1].push_back({0b0000, {4, 2}, {0, 6}}); - expected_read_id_and_representation_to_sketch_elements[1].push_back({0b0010, {8, 1}, {7, 2}}); - expected_read_id_and_representation_to_sketch_elements[1].push_back({0b0100, {9, 1}, {9, 1}}); - expected_read_id_and_representation_to_sketch_elements[1].push_back({0b0101, {10, 1}, {10, 1}}); - expected_read_id_and_representation_to_sketch_elements[1].push_back({0b1001, {11, 1}, {11, 1}}); - - test_build_index(input_representations, - input_readids_positions_directions, - expected_read_id_and_representation_to_sketch_elements); -} - -} // namespace index_gpu -} // namespace details -} // namespace cudamapper -} // namespace claragenomics diff --git a/cudamapper/tests/Test_CudamapperMatcher.cu b/cudamapper/tests/Test_CudamapperMatcher.cu deleted file mode 100644 index 064d7474d..000000000 --- a/cudamapper/tests/Test_CudamapperMatcher.cu +++ /dev/null @@ -1,431 +0,0 @@ -/* -* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -* -* NVIDIA CORPORATION and its licensors retain all intellectual property -* and proprietary rights in and to this software, related documentation -* and any modifications thereto. Any use, reproduction, disclosure or -* distribution of this software and related documentation without an express -* license agreement from NVIDIA CORPORATION is strictly prohibited. -*/ - -#include -#include "gtest/gtest.h" -#include "cudamapper_file_location.hpp" -#include "../src/index_gpu.cuh" -#include "../src/matcher.hpp" -#include "../src/minimizer.hpp" - -namespace claragenomics -{ -namespace cudamapper -{ - -TEST(TestCudamapperMatcher, OneReadOneMinimizer) -{ - // >read_0 - // GATT - - // only one read -> no anchors - std::vector> read_ranges; - const std::pair query_range{0, std::numeric_limits::max()}; - read_ranges.push_back(query_range); - - std::unique_ptr parser = io::create_fasta_parser(std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/gatt.fasta"); - std::vector parsers; - parsers.push_back(parser.get()); - IndexGPU index(parsers, 4, 1, read_ranges); - Matcher matcher(index, 0); - - const thrust::host_vector& anchors = matcher.anchors(); - ASSERT_EQ(anchors.size(), 0u); -} - -TEST(TestCudamapperMatcher, TwoReadsMultipleMiniminizers) -{ - // >read_0 - // CATCAAG - // >read_1 - // AAGCTA - - // CATCAAG - // Central minimizers: - // CATC: CAT, ATG, , GAT - // ATCA: , GAT, TCA, TGA - // TCAA: TCA, TGA, , TTG - // CAAG: CAA, TTG, , CTT - // front end minimizers: CAT, - // beck end minimizers: none - // All minimizers: ATC(1f), CAA(3f), AAG(4f), ATG(0r) - - // AAGCTA - // Central minimizers: - // AAGC: , CTT, AGC, GCT - // AGCT: , GCT, GCT, - // GCTA: GCT, , CTA, TAG - // Front end minimizers: none - // Back end miniminers: , TAG - // All minimizers: AAG(0f), AGC(1f), AGC(2r), CTA(3f) - - // Anchor r0p4 - r1p0 - - std::vector> read_ranges; - const std::pair query_range{0, std::numeric_limits::max()}; - read_ranges.push_back(query_range); - - std::unique_ptr parser = io::create_fasta_parser(std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/catcaag_aagcta.fasta"); - std::vector parsers; - parsers.push_back(parser.get()); - IndexGPU index(parsers, 3, 2, read_ranges); - Matcher matcher(index, 0); - - const thrust::host_vector& anchors = matcher.anchors(); - ASSERT_EQ(anchors.size(), 1u); - EXPECT_EQ(anchors[0].query_read_id_, 0u); - EXPECT_EQ(anchors[0].target_read_id_, 1u); - EXPECT_EQ(anchors[0].query_position_in_read_, 4u); - EXPECT_EQ(anchors[0].target_position_in_read_, 0u); -} - -class TestIndex : public Index -{ -public: - // getters - const std::vector& representations() const override { return representations_; } - const std::vector& positions_in_reads() const override { return positions_in_reads_; } - const std::vector& read_ids() const override { return read_ids_; } - const std::vector& directions_of_reads() const override { return directions_of_reads_; } - std::uint64_t number_of_reads() const override { return number_of_reads_; } - const std::vector& read_id_to_read_name() const override { return read_id_to_read_name_; } - const std::vector& read_id_to_read_length() const override { return read_id_to_read_length_; } - const std::vector>& read_id_and_representation_to_sketch_elements() const override { return read_id_and_representation_to_sketch_elements_; } - - // setters - void representations(const std::vector& val) { representations_ = val; } - void positions_in_reads(const std::vector& val) { positions_in_reads_ = val; } - void read_ids(const std::vector& val) { read_ids_ = val; } - void directions_of_reads(const std::vector& val) { directions_of_reads_ = val; } - void number_of_reads(std::uint64_t val) { number_of_reads_ = val; } - void read_id_to_read_name(const std::vector& val) { read_id_to_read_name_ = val; } - void read_id_and_representation_to_sketch_elements(const std::vector>& val) { read_id_and_representation_to_sketch_elements_ = val; } - std::uint64_t minimum_representation() const override { return 0; }; - std::uint64_t maximum_representation() const override { return 1 << (15 * 2); }; //kmer_size = 15 - bool reached_end_of_input() const override { return false; }; - -private: - std::vector representations_; - std::vector positions_in_reads_; - std::vector read_ids_; - std::vector directions_of_reads_; - std::uint64_t number_of_reads_; - std::vector read_id_to_read_name_; - std::vector read_id_to_read_length_; - std::vector> read_id_and_representation_to_sketch_elements_; -}; - -TEST(TestCudamapperMatcher, CustomIndexTwoReads) -{ - // Two reads, all minimizers have the same representation - TestIndex test_index; - - std::vector positions_in_reads(100); - std::iota(std::begin(positions_in_reads), std::end(positions_in_reads), 0); - test_index.positions_in_reads(positions_in_reads); - - std::vector read_ids(100); - std::fill(std::begin(read_ids), std::next(std::begin(read_ids), 50), 0); - std::fill(std::next(std::begin(read_ids), 50), std::end(read_ids), 1); - test_index.read_ids(read_ids); - - // no need for directions yet - - test_index.number_of_reads(2); - - // no need for read_id_to_read_name - - // pointers - std::vector> read_id_and_representation_to_sketch_elements(2); - read_id_and_representation_to_sketch_elements[0].emplace_back(Index::RepresentationToSketchElements{0x23, {0, 50}, {0, 100}}); - read_id_and_representation_to_sketch_elements[1].emplace_back(Index::RepresentationToSketchElements{0x23, {50, 50}, {0, 100}}); - test_index.read_id_and_representation_to_sketch_elements(read_id_and_representation_to_sketch_elements); - - Matcher matcher(test_index, 0); - - const thrust::host_vector& anchors = matcher.anchors(); - ASSERT_EQ(anchors.size(), 2500u); - - for (std::size_t read_0_sketch_element = 0; read_0_sketch_element < 50; ++read_0_sketch_element) - { - for (std::size_t read_1_sketch_element = 0; read_1_sketch_element < 50; ++read_1_sketch_element) - { - ASSERT_EQ(anchors[read_0_sketch_element * 50 + read_1_sketch_element].query_read_id_, 0u) << read_0_sketch_element << " " << read_1_sketch_element; - ASSERT_EQ(anchors[read_0_sketch_element * 50 + read_1_sketch_element].target_read_id_, 1u) << read_0_sketch_element << " " << read_1_sketch_element; - ASSERT_EQ(anchors[read_0_sketch_element * 50 + read_1_sketch_element].query_position_in_read_, read_0_sketch_element) << read_0_sketch_element << " " << read_1_sketch_element; - ASSERT_EQ(anchors[read_0_sketch_element * 50 + read_1_sketch_element].target_position_in_read_, read_1_sketch_element + 50u) << read_0_sketch_element << " " << read_1_sketch_element; - } - } -} - -TEST(TestCudamapperMatcher, CustomIndexFourReads) -{ - // Read 0: - // representation 0: elems 0 - 49 (50) - // representation 2: elems 50 - 69 (20) - // representation 3: elems 70 - 199 (130) - // representation 5: elems 200 - 269 (70) - // Read 1: - // representation 2: elems 0 - 29 (30) - // representation 3: elems 30 - 99 (70) - // representation 4: elems 100 - 159 (60) - // representation 5: elems 160 - 199 (40) - // Read 2: - // representation 3: elems 0 - 99 (100) - // representation 4: elems 100 - 199 (100) - // representation 5: elems 200 - 299 (100) - // Read 3: - // representation 1: elems 0 - 79 (80) - // representation 3: elems 80 - 159 (80) - // representation 5: elems 160 - 239 (80) - // representation 7: elems 240 - 319 (80) - // - // Total sketch elements: 270 + 200 + 300 + 320 = 1090 - // - // read 0 | read 1 | read 2 | read 3 - // read 0 X | X | X | X - // read 1 2,3,5 | X | X | X - // read 2 3,5 | 3,4,5 | X | X - // read 3 3,5 | 3,5 | 3,5 | X - // - // Total anchors: - // 0-1:2 0-1:3 0-1:5 0-2:3 0-2:5 0-3:3 0-3:5 1-2:3 1-2:4 1-2:5 1-3:3 1-3:5 2-3:3 2-3:5 - // 20*30 + 130*70 + 70*40 + 130*100 + 70*100 + 130*80 + 70*80 + 70*100 + 60*100 + 40*100 + 70*80 + 40*80 + 100*80 + 100*80 = 90300 - - TestIndex test_index; - - // positions - std::vector positions_in_reads(1090); - std::iota(std::begin(positions_in_reads), std::next(std::begin(positions_in_reads), 50), 0); // rep 0, read 0 - std::iota(std::next(std::begin(positions_in_reads), 50), std::next(std::begin(positions_in_reads), 50 + 80), 0); // rep 1, read 3 - std::iota(std::next(std::begin(positions_in_reads), 130), std::next(std::begin(positions_in_reads), 130 + 20), 50); // rep 2, read 0 - std::iota(std::next(std::begin(positions_in_reads), 150), std::next(std::begin(positions_in_reads), 150 + 30), 0); // rep 2, read 1 - std::iota(std::next(std::begin(positions_in_reads), 180), std::next(std::begin(positions_in_reads), 180 + 130), 70); // rep 3, read 0 - std::iota(std::next(std::begin(positions_in_reads), 310), std::next(std::begin(positions_in_reads), 310 + 70), 30); // rep 3, read 1 - std::iota(std::next(std::begin(positions_in_reads), 380), std::next(std::begin(positions_in_reads), 380 + 100), 0); // rep 3, read 2 - std::iota(std::next(std::begin(positions_in_reads), 480), std::next(std::begin(positions_in_reads), 480 + 80), 80); // rep 3, read 3 - std::iota(std::next(std::begin(positions_in_reads), 560), std::next(std::begin(positions_in_reads), 560 + 60), 100); // rep 4, read 1 - std::iota(std::next(std::begin(positions_in_reads), 620), std::next(std::begin(positions_in_reads), 620 + 100), 100); // rep 4, read 2 - std::iota(std::next(std::begin(positions_in_reads), 720), std::next(std::begin(positions_in_reads), 720 + 70), 200); // rep 5, read 0 - std::iota(std::next(std::begin(positions_in_reads), 790), std::next(std::begin(positions_in_reads), 790 + 40), 160); // rep 5, read 1 - std::iota(std::next(std::begin(positions_in_reads), 830), std::next(std::begin(positions_in_reads), 830 + 100), 200); // rep 5, read 2 - std::iota(std::next(std::begin(positions_in_reads), 930), std::next(std::begin(positions_in_reads), 930 + 80), 160); // rep 5, read 3 - std::iota(std::next(std::begin(positions_in_reads), 1010), std::next(std::begin(positions_in_reads), 1010 + 80), 240); // rep 7, read 3 - test_index.positions_in_reads(positions_in_reads); - - // read_ids - std::vector read_ids(1090); - std::fill(std::begin(read_ids), std::next(std::begin(read_ids), 50), 0); // rep 0, read 0 - std::fill(std::next(std::begin(read_ids), 50), std::next(std::begin(read_ids), 50 + 80), 3); // rep 1, read 3 - std::fill(std::next(std::begin(read_ids), 130), std::next(std::begin(read_ids), 130 + 20), 0); // rep 2, read 0 - std::fill(std::next(std::begin(read_ids), 150), std::next(std::begin(read_ids), 150 + 30), 1); // rep 2, read 1 - std::fill(std::next(std::begin(read_ids), 180), std::next(std::begin(read_ids), 180 + 130), 0); // rep 3, read 0 - std::fill(std::next(std::begin(read_ids), 310), std::next(std::begin(read_ids), 310 + 70), 1); // rep 3, read 1 - std::fill(std::next(std::begin(read_ids), 380), std::next(std::begin(read_ids), 380 + 100), 2); // rep 3, read 2 - std::fill(std::next(std::begin(read_ids), 480), std::next(std::begin(read_ids), 480 + 80), 3); // rep 3, read 3 - std::fill(std::next(std::begin(read_ids), 560), std::next(std::begin(read_ids), 560 + 60), 1); // rep 4, read 1 - std::fill(std::next(std::begin(read_ids), 620), std::next(std::begin(read_ids), 620 + 100), 2); // rep 4, read 2 - std::fill(std::next(std::begin(read_ids), 720), std::next(std::begin(read_ids), 720 + 70), 0); // rep 5, read 0 - std::fill(std::next(std::begin(read_ids), 790), std::next(std::begin(read_ids), 790 + 40), 1); // rep 5, read 1 - std::fill(std::next(std::begin(read_ids), 830), std::next(std::begin(read_ids), 830 + 100), 2); // rep 5, read 2 - std::fill(std::next(std::begin(read_ids), 930), std::next(std::begin(read_ids), 930 + 80), 3); // rep 5, read 3 - std::fill(std::next(std::begin(read_ids), 1010), std::next(std::begin(read_ids), 1010 + 80), 3); // rep 7, read 3 - test_index.read_ids(read_ids); - - // no need for directions yet - - test_index.number_of_reads(4); - - // no need for read_id_to_read_name - - // pointers - std::vector> read_id_and_representation_to_sketch_elements(4); - read_id_and_representation_to_sketch_elements[0].emplace_back(Index::RepresentationToSketchElements{0, {0, 50}, {0, 50}}); - read_id_and_representation_to_sketch_elements[3].emplace_back(Index::RepresentationToSketchElements{1, {50, 80}, {50, 80}}); - read_id_and_representation_to_sketch_elements[0].emplace_back(Index::RepresentationToSketchElements{2, {130, 20}, {130, 50}}); - read_id_and_representation_to_sketch_elements[1].emplace_back(Index::RepresentationToSketchElements{2, {150, 30}, {130, 50}}); - read_id_and_representation_to_sketch_elements[0].emplace_back(Index::RepresentationToSketchElements{3, {180, 130}, {180, 380}}); - read_id_and_representation_to_sketch_elements[1].emplace_back(Index::RepresentationToSketchElements{3, {310, 70}, {180, 380}}); - read_id_and_representation_to_sketch_elements[2].emplace_back(Index::RepresentationToSketchElements{3, {380, 100}, {180, 380}}); - read_id_and_representation_to_sketch_elements[3].emplace_back(Index::RepresentationToSketchElements{3, {480, 80}, {180, 380}}); - read_id_and_representation_to_sketch_elements[1].emplace_back(Index::RepresentationToSketchElements{4, {560, 60}, {560, 160}}); - read_id_and_representation_to_sketch_elements[2].emplace_back(Index::RepresentationToSketchElements{4, {620, 100}, {560, 160}}); - read_id_and_representation_to_sketch_elements[0].emplace_back(Index::RepresentationToSketchElements{5, {720, 70}, {720, 290}}); - read_id_and_representation_to_sketch_elements[1].emplace_back(Index::RepresentationToSketchElements{5, {790, 40}, {720, 290}}); - read_id_and_representation_to_sketch_elements[2].emplace_back(Index::RepresentationToSketchElements{5, {830, 100}, {720, 290}}); - read_id_and_representation_to_sketch_elements[3].emplace_back(Index::RepresentationToSketchElements{5, {930, 80}, {720, 290}}); - read_id_and_representation_to_sketch_elements[3].emplace_back(Index::RepresentationToSketchElements{7, {1010, 80}, {1010, 80}}); - test_index.read_id_and_representation_to_sketch_elements(read_id_and_representation_to_sketch_elements); - - Matcher matcher(test_index, 0); - - const thrust::host_vector& anchors = matcher.anchors(); - ASSERT_EQ(anchors.size(), 90300u); - - // Anchors are grouped by query read id and within that by representation (both in increasing order). - // Assume q0p4t2p8 means anchor of read id 0 at position 4 and read id 2 at position 8. - // Assume read 0 has 30 sketch elements with certain representation, read 1 40 and read 2 50. - // Anchors for read 0 as query and that represtnation looks like this: - // q0p0t1p0, q0p0t1p1 .. q0p0t1p39, q0p0t2p0, q0p0t2p1 ... q0p0t2p49, q0p1t1p0, q0p1t1p1 ... q0p1t1p39, q0p1t2p0 .. q0p1t2p49, q0p2p1p0 ... - - // read 0 - rep 2: 20 - // read 1 - rep 2: 30 - for (std::size_t query = 0; query < 20; ++query) - { - for (std::size_t target = 0; target < 30; ++target) - { - ASSERT_EQ(anchors[0 + query * 30 + target].query_read_id_, 0u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[0 + query * 30 + target].target_read_id_, 1u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[0 + query * 30 + target].query_position_in_read_, query + 50u) << "query: " << query << ", target: " << target; // position_in_read for read 0 rep 2 starts from 50 - ASSERT_EQ(anchors[0 + query * 30 + target].target_position_in_read_, target + 0u) << "query: " << query << ", target: " << target; // position_in_read for read 1 rep 2 starts from 0 - } - } - - // read 0 - rep 3: 130 - // read 1 - rep 3: 70 - // read 2 - rep 3: 100 - // read 3 - rep 3: 80 - for (std::size_t query = 0; query < 130; ++query) - { // block starts from 20*30 = 600 - for (std::size_t target = 0; target < 70; ++target) - { // read 1 - no shift - ASSERT_EQ(anchors[600 + query * 250 + target].query_read_id_, 0u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[600 + query * 250 + target].target_read_id_, 1u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[600 + query * 250 + target].query_position_in_read_, query + 70u) << "query: " << query << ", target: " << target; // position_in_read for read 0 rep 3 starts from 70 - ASSERT_EQ(anchors[600 + query * 250 + target].target_position_in_read_, target + 30u) << "query: " << query << ", target: " << target; // position_in_read for read 1 rep 3 starts from 30 - } - for (std::size_t target = 0; target < 100; ++target) - { // read 2 - shift 70 due to read 1 - ASSERT_EQ(anchors[600 + 70 + query * 250 + target].query_read_id_, 0u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[600 + 70 + query * 250 + target].target_read_id_, 2u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[600 + 70 + query * 250 + target].query_position_in_read_, query + 70u) << "query: " << query << ", target: " << target; // position_in_read for read 0 rep 3 starts from 70 - ASSERT_EQ(anchors[600 + 70 + query * 250 + target].target_position_in_read_, target + 0u) << "query: " << query << ", target: " << target; // position_in_read for read 2 rep 3 starts from 0 - } - for (std::size_t target = 0; target < 80; ++target) - { // read 8 - shift 170 due to read 1 and read 2 - ASSERT_EQ(anchors[600 + 170 + query * 250 + target].query_read_id_, 0u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[600 + 170 + query * 250 + target].target_read_id_, 3u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[600 + 170 + query * 250 + target].query_position_in_read_, query + 70u) << "query: " << query << ", target: " << target; // position_in_read for read 0 rep 3 starts from 70 - ASSERT_EQ(anchors[600 + 170 + query * 250 + target].target_position_in_read_, target + 80u) << "query: " << query << ", target: " << target; // position_in_read for read 3 rep 3 starts from 80 - } - } - - // read 0 - rep 5: 70 - // read 1 - rep 5: 40 - // read 2 - rep 5: 100 - // read 3 - rep 5: 80 - for (std::size_t query = 0; query < 70; ++query) - { // block starts from 600 + 130*250 = 33100 - for (std::size_t target = 0; target < 40; ++target) - { // read 1 - no shift - ASSERT_EQ(anchors[33100 + query * 220 + target].query_read_id_, 0u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[33100 + query * 220 + target].target_read_id_, 1u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[33100 + query * 220 + target].query_position_in_read_, query + 200u) << "query: " << query << ", target: " << target; // position_in_read for read 0 rep 5 starts from 200 - ASSERT_EQ(anchors[33100 + query * 220 + target].target_position_in_read_, target + 160u) << "query: " << query << ", target: " << target; // position_in_read for read 1 rep 5 starts from 160 - } - for (std::size_t target = 0; target < 100; ++target) - { // read 2 - shift 40 due to read 1 - ASSERT_EQ(anchors[33100 + 40 + query * 220 + target].query_read_id_, 0u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[33100 + 40 + query * 220 + target].target_read_id_, 2u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[33100 + 40 + query * 220 + target].query_position_in_read_, query + 200u) << "query: " << query << ", target: " << target; // position_in_read for read 0 rep 5 starts from 200 - ASSERT_EQ(anchors[33100 + 40 + query * 220 + target].target_position_in_read_, target + 200u) << "query: " << query << ", target: " << target; // position_in_read for read 2 rep 5 starts from 200 - } - for (std::size_t target = 0; target < 80; ++target) - { // read 8 - shift 140 due to read 1 and read 2 - ASSERT_EQ(anchors[33100 + 140 + query * 220 + target].query_read_id_, 0u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[33100 + 140 + query * 220 + target].target_read_id_, 3u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[33100 + 140 + query * 220 + target].query_position_in_read_, query + 200u) << "query: " << query << ", target: " << target; // position_in_read for read 0 rep 5 starts from 200 - ASSERT_EQ(anchors[33100 + 140 + query * 220 + target].target_position_in_read_, target + 160u) << "query: " << query << ", target: " << target; // position_in_read for read 3 rep 5 starts from 160 - } - } - - // read 1 - rep 3: 70 - // read 2 - rep 3: 100 - // read 3 - rep 3: 80 - for (std::size_t query = 0; query < 70; ++query) - { // block starts from 33100 + 70 * 220 = 48500 - for (std::size_t target = 0; target < 100; ++target) - { // read 2 - no shift - ASSERT_EQ(anchors[48500 + query * 180 + target].query_read_id_, 1u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[48500 + query * 180 + target].target_read_id_, 2u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[48500 + query * 180 + target].query_position_in_read_, query + 30u) << "query: " << query << ", target: " << target; // position_in_read for read 1 rep 3 starts from 30 - ASSERT_EQ(anchors[48500 + query * 180 + target].target_position_in_read_, target + 0u) << "query: " << query << ", target: " << target; // position_in_read for read 2 rep 3 starts from 0 - } - for (std::size_t target = 0; target < 80; ++target) - { // read 3 - shift 100 due to read 2 - ASSERT_EQ(anchors[48500 + 100 + query * 180 + target].query_read_id_, 1u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[48500 + 100 + query * 180 + target].target_read_id_, 3u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[48500 + 100 + query * 180 + target].query_position_in_read_, query + 30u) << "query: " << query << ", target: " << target; // position_in_read for read 1 rep 3 starts from 30 - ASSERT_EQ(anchors[48500 + 100 + query * 180 + target].target_position_in_read_, target + 80u) << "query: " << query << ", target: " << target; // position_in_read for read 2 rep 3 starts from 80 - } - } - - // read 1 - rep 4: 60 - // read 2 - rep 4: 100 - for (std::size_t query = 0; query < 60; ++query) - { // block starts from 48500 + 70 * 180 = 61100 - for (std::size_t target = 0; target < 100; ++target) - { // read 2 - no shift - ASSERT_EQ(anchors[61100 + query * 100 + target].query_read_id_, 1u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[61100 + query * 100 + target].target_read_id_, 2u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[61100 + query * 100 + target].query_position_in_read_, query + 100u) << "query: " << query << ", target: " << target; // position_in_read for read 1 rep 4 starts from 100 - ASSERT_EQ(anchors[61100 + query * 100 + target].target_position_in_read_, target + 100u) << "query: " << query << ", target: " << target; // position_in_read for read 2 rep 4 starts from 100 - } - } - - // read 1 - rep 5: 40 - // read 2 - rep 5: 100 - // read 3 - rep 5: 80 - for (std::size_t query = 0; query < 40; ++query) - { // block starts from 61100 + 60 * 100 = 67100 - for (std::size_t target = 0; target < 100; ++target) - { // read 2 - no shift - ASSERT_EQ(anchors[67100 + query * 180 + target].query_read_id_, 1u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[67100 + query * 180 + target].target_read_id_, 2u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[67100 + query * 180 + target].query_position_in_read_, query + 160u) << "query: " << query << ", target: " << target; // position_in_read for read 1 rep 5 starts from 160 - ASSERT_EQ(anchors[67100 + query * 180 + target].target_position_in_read_, target + 200u) << "query: " << query << ", target: " << target; // position_in_read for read 2 rep 5 starts from 200 - } - for (std::size_t target = 0; target < 80; ++target) - { // read 3 - shift 100 due to read 2 - ASSERT_EQ(anchors[67100 + 100 + query * 180 + target].query_read_id_, 1u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[67100 + 100 + query * 180 + target].target_read_id_, 3u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[67100 + 100 + query * 180 + target].query_position_in_read_, query + 160u) << "query: " << query << ", target: " << target; // position_in_read for read 1 rep 5 starts from 160 - ASSERT_EQ(anchors[67100 + 100 + query * 180 + target].target_position_in_read_, target + 160u) << "query: " << query << ", target: " << target; // position_in_read for read 2 rep 5 starts from 160 - } - } - - // read 2 - rep 3: 100 - // read 3 - rep 3: 80 - for (std::size_t query = 0; query < 100; ++query) - { // block starts from 67100 + 40 * 180 = 74300 - for (std::size_t target = 0; target < 80; ++target) - { // read 3 - no shift - ASSERT_EQ(anchors[74300 + query * 80 + target].query_read_id_, 2u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[74300 + query * 80 + target].target_read_id_, 3u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[74300 + query * 80 + target].query_position_in_read_, query + 0u) << "query: " << query << ", target: " << target; // position_in_read for read 2 rep 3 starts from 0 - ASSERT_EQ(anchors[74300 + query * 80 + target].target_position_in_read_, target + 80u) << "query: " << query << ", target: " << target; // position_in_read for read 3 rep 3 starts from 80 - } - } - - // read 2 - rep 5: 100 - // read 3 - rep 5: 80 - for (std::size_t query = 0; query < 100; ++query) - { // block starts from 74300 + 100*800 = 82300 - for (std::size_t target = 0; target < 80; ++target) - { // read 3 - no shift - ASSERT_EQ(anchors[82300 + query * 80 + target].query_read_id_, 2u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[82300 + query * 80 + target].target_read_id_, 3u) << "query: " << query << ", target: " << target; - ASSERT_EQ(anchors[82300 + query * 80 + target].query_position_in_read_, query + 200u) << "query: " << query << ", target: " << target; // position_in_read for read 2 rep 5 starts from 200 - ASSERT_EQ(anchors[82300 + query * 80 + target].target_position_in_read_, target + 160u) << "query: " << query << ", target: " << target; // position_in_read for read 3 rep 5 starts from 160 - } - } -} -} // namespace cudamapper -} // namespace claragenomics From 95e58d400ba5b49f8e4780ad4aeb2d1c2775b309 Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Fri, 22 Nov 2019 10:41:30 +0100 Subject: [PATCH 099/128] Renamed IndexTwoIndices to Index and IndexGPUTwoIndices to IndexGPU --- cudamapper/CMakeLists.txt | 14 ++--- .../{index_two_indices.hpp => index.hpp} | 8 +-- .../cudamapper/matcher_two_indices.hpp | 6 +- .../claragenomics/cudamapper/overlapper.hpp | 6 +- cudamapper/src/index.cu | 38 +++++++++++++ ...{index_gpu_two_indices.cu => index_gpu.cu} | 2 +- ...ndex_gpu_two_indices.cuh => index_gpu.cuh} | 56 +++++++++---------- cudamapper/src/index_two_indices.cu | 38 ------------- cudamapper/src/main.cu | 34 +++++------ cudamapper/src/matcher_gpu.cu | 4 +- cudamapper/src/matcher_gpu.cuh | 5 +- cudamapper/src/matcher_two_indices.cu | 4 +- cudamapper/src/overlapper_triggered.cu | 4 +- cudamapper/src/overlapper_triggered.hpp | 2 +- cudamapper/tests/CMakeLists.txt | 4 +- ...oIndices.cu => Test_CudamapperIndexGPU.cu} | 44 +++++++-------- cudamapper/tests/Test_CudamapperMatcherGPU.cu | 6 +- cudamapper/tests/mock_index.cuh | 16 +++--- 18 files changed, 145 insertions(+), 146 deletions(-) rename cudamapper/include/claragenomics/cudamapper/{index_two_indices.hpp => index.hpp} (96%) create mode 100644 cudamapper/src/index.cu rename cudamapper/src/{index_gpu_two_indices.cu => index_gpu.cu} (99%) rename cudamapper/src/{index_gpu_two_indices.cuh => index_gpu.cuh} (89%) delete mode 100644 cudamapper/src/index_two_indices.cu rename cudamapper/tests/{Test_CudamapperIndexGPUTwoIndices.cu => Test_CudamapperIndexGPU.cu} (97%) diff --git a/cudamapper/CMakeLists.txt b/cudamapper/CMakeLists.txt index 5b41645e8..15787b2ff 100644 --- a/cudamapper/CMakeLists.txt +++ b/cudamapper/CMakeLists.txt @@ -28,13 +28,13 @@ target_include_directories(minimizer PUBLIC include) target_link_libraries(minimizer logging pthread utils) target_compile_options(minimizer PRIVATE -Werror) -cuda_add_library(index_gpu_two_indices - src/index_two_indices.cu - src/index_gpu_two_indices.cu +cuda_add_library(index_gpu + src/index.cu + src/index_gpu.cu src/minimizer.cu) -target_include_directories(index_gpu_two_indices PUBLIC include) -target_link_libraries(index_gpu_two_indices logging minimizer pthread utils cgaio) -target_compile_options(index_gpu_two_indices PRIVATE -Werror) +target_include_directories(index_gpu PUBLIC include) +target_link_libraries(index_gpu logging minimizer pthread utils cgaio) +target_compile_options(index_gpu PRIVATE -Werror) cuda_add_library(matcher_gpu src/matcher_gpu.cu) @@ -69,7 +69,7 @@ target_include_directories(cudamapper $ ) -target_link_libraries(cudamapper utils index_gpu_two_indices matcher_gpu logging overlapper_triggerred cudamapper_utils) +target_link_libraries(cudamapper utils index_gpu matcher_gpu logging overlapper_triggerred cudamapper_utils) # Add tests folder add_subdirectory(tests) diff --git a/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp b/cudamapper/include/claragenomics/cudamapper/index.hpp similarity index 96% rename from cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp rename to cudamapper/include/claragenomics/cudamapper/index.hpp index 8317ad3cf..a8edaa4e5 100644 --- a/cudamapper/include/claragenomics/cudamapper/index_two_indices.hpp +++ b/cudamapper/include/claragenomics/cudamapper/index.hpp @@ -28,11 +28,11 @@ namespace cudamapper /// \{ /// Index - manages mapping of (k,w)-kmer-representation and all its occurences -class IndexTwoIndices +class Index { public: /// \brief Virtual destructor - virtual ~IndexTwoIndices() = default; + virtual ~Index() = default; /// \brief returns an array of representations of sketch elements /// \return an array of representations of sketch elements @@ -79,8 +79,8 @@ class IndexTwoIndices /// \param kmer_size k - the kmer length /// \param window_size w - the length of the sliding window used to find sketch elements (i.e. the number of adjacent kmers in a window, adjacent = shifted by one basepair) /// \param hash_representations - if true, hash kmer representations - /// \return instance of IndexTwoIndices - static std::unique_ptr + /// \return instance of Index + static std::unique_ptr create_index(const io::FastaParser& parser, const read_id_t first_read_id, const read_id_t past_the_last_read_id, diff --git a/cudamapper/include/claragenomics/cudamapper/matcher_two_indices.hpp b/cudamapper/include/claragenomics/cudamapper/matcher_two_indices.hpp index 8457c45d5..c0da2b4c9 100644 --- a/cudamapper/include/claragenomics/cudamapper/matcher_two_indices.hpp +++ b/cudamapper/include/claragenomics/cudamapper/matcher_two_indices.hpp @@ -12,7 +12,7 @@ #include #include -#include +#include namespace claragenomics { @@ -37,8 +37,8 @@ class MatcherTwoIndices /// \param query_index /// \param target_index /// \return matcher - static std::unique_ptr create_matcher(const IndexTwoIndices& query_index, - const IndexTwoIndices& target_index); + static std::unique_ptr create_matcher(const Index& query_index, + const Index& target_index); }; /// \} diff --git a/cudamapper/include/claragenomics/cudamapper/overlapper.hpp b/cudamapper/include/claragenomics/cudamapper/overlapper.hpp index dd0b2afab..fdc607b34 100644 --- a/cudamapper/include/claragenomics/cudamapper/overlapper.hpp +++ b/cudamapper/include/claragenomics/cudamapper/overlapper.hpp @@ -11,7 +11,7 @@ #pragma once #include -#include "index_two_indices.hpp" +#include "index.hpp" #include "types.hpp" namespace claragenomics @@ -37,8 +37,8 @@ class Overlapper /// \param index_target virtual void get_overlaps(std::vector& overlaps, thrust::device_vector& anchors, - const IndexTwoIndices& index_query, - const IndexTwoIndices& index_target) = 0; + const Index& index_query, + const Index& index_target) = 0; /// \brief prints overlaps to stdout in PAF format static void print_paf(const std::vector& overlaps); diff --git a/cudamapper/src/index.cu b/cudamapper/src/index.cu new file mode 100644 index 000000000..480464138 --- /dev/null +++ b/cudamapper/src/index.cu @@ -0,0 +1,38 @@ +/* +* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. +* +* NVIDIA CORPORATION and its licensors retain all intellectual property +* and proprietary rights in and to this software, related documentation +* and any modifications thereto. Any use, reproduction, disclosure or +* distribution of this software and related documentation without an express +* license agreement from NVIDIA CORPORATION is strictly prohibited. +*/ + +#include +#include +#include "index_gpu.cuh" +#include "minimizer.hpp" + +namespace claragenomics +{ +namespace cudamapper +{ + +std::unique_ptr Index::create_index(const io::FastaParser& parser, + const read_id_t first_read_id, + const read_id_t past_the_last_read_id, + const std::uint64_t kmer_size, + const std::uint64_t window_size, + const bool hash_representations) +{ + CGA_NVTX_RANGE(profiler, "create_index"); + return std::make_unique>(parser, + first_read_id, + past_the_last_read_id, + kmer_size, + window_size, + hash_representations); +} + +} // namespace cudamapper +} // namespace claragenomics diff --git a/cudamapper/src/index_gpu_two_indices.cu b/cudamapper/src/index_gpu.cu similarity index 99% rename from cudamapper/src/index_gpu_two_indices.cu rename to cudamapper/src/index_gpu.cu index 0e1a4ddcc..c62c1b5bd 100644 --- a/cudamapper/src/index_gpu_two_indices.cu +++ b/cudamapper/src/index_gpu.cu @@ -8,7 +8,7 @@ * license agreement from NVIDIA CORPORATION is strictly prohibited. */ -#include "index_gpu_two_indices.cuh" +#include "index_gpu.cuh" namespace claragenomics { diff --git a/cudamapper/src/index_gpu_two_indices.cuh b/cudamapper/src/index_gpu.cuh similarity index 89% rename from cudamapper/src/index_gpu_two_indices.cuh rename to cudamapper/src/index_gpu.cuh index d501c0ea5..1ed545bb1 100644 --- a/cudamapper/src/index_gpu_two_indices.cuh +++ b/cudamapper/src/index_gpu.cuh @@ -16,8 +16,8 @@ #include #include -#include "claragenomics/cudamapper/index_two_indices.hpp" -#include "claragenomics/cudamapper/types.hpp" +#include +#include #include #include #include @@ -42,7 +42,7 @@ namespace cudamapper /// /// \tparam SketchElementImpl any implementation of SketchElement template -class IndexGPUTwoIndices : public IndexTwoIndices +class IndexGPU : public Index { public: /// \brief Constructor @@ -53,12 +53,12 @@ public: /// \param kmer_size k - the kmer length /// \param window_size w - the length of the sliding window used to find sketch elements (i.e. the number of adjacent k-mers in a window, adjacent = shifted by one basepair) /// \param hash_representations - if true, hash kmer representations - IndexGPUTwoIndices(const io::FastaParser& parser, - const read_id_t first_read_id, - const read_id_t past_the_last_read_id, - const std::uint64_t kmer_size, - const std::uint64_t window_size, - const bool hash_representations = true); + IndexGPU(const io::FastaParser& parser, + const read_id_t first_read_id, + const read_id_t past_the_last_read_id, + const std::uint64_t kmer_size, + const std::uint64_t window_size, + const bool hash_representations = true); /// \brief returns an array of representations of sketch elements /// \return an array of representations of sketch elements @@ -221,12 +221,12 @@ __global__ void copy_rest_to_separate_arrays(const ReadidPositionDirection* cons } // namespace details template -IndexGPUTwoIndices::IndexGPUTwoIndices(const io::FastaParser& parser, - const read_id_t first_read_id, - const read_id_t past_the_last_read_id, - const std::uint64_t kmer_size, - const std::uint64_t window_size, - const bool hash_representations) +IndexGPU::IndexGPU(const io::FastaParser& parser, + const read_id_t first_read_id, + const read_id_t past_the_last_read_id, + const std::uint64_t kmer_size, + const std::uint64_t window_size, + const bool hash_representations) : first_read_id_(first_read_id) , kmer_size_(kmer_size) , window_size_(window_size) @@ -239,64 +239,64 @@ IndexGPUTwoIndices::IndexGPUTwoIndices(const io::FastaParser& } template -const thrust::device_vector& IndexGPUTwoIndices::representations() const +const thrust::device_vector& IndexGPU::representations() const { return representations_d_; }; template -const thrust::device_vector& IndexGPUTwoIndices::read_ids() const +const thrust::device_vector& IndexGPU::read_ids() const { return read_ids_d_; } template -const thrust::device_vector& IndexGPUTwoIndices::positions_in_reads() const +const thrust::device_vector& IndexGPU::positions_in_reads() const { return positions_in_reads_d_; } template -const thrust::device_vector& IndexGPUTwoIndices::directions_of_reads() const +const thrust::device_vector& IndexGPU::directions_of_reads() const { return directions_of_reads_d_; } template -const thrust::device_vector& IndexGPUTwoIndices::unique_representations() const +const thrust::device_vector& IndexGPU::unique_representations() const { return unique_representations_d_; } template -const thrust::device_vector& IndexGPUTwoIndices::first_occurrence_of_representations() const +const thrust::device_vector& IndexGPU::first_occurrence_of_representations() const { return first_occurrence_of_representations_d_; } template -const std::string& IndexGPUTwoIndices::read_id_to_read_name(const read_id_t read_id) const +const std::string& IndexGPU::read_id_to_read_name(const read_id_t read_id) const { return read_id_to_read_name_[read_id - first_read_id_]; } template -const std::uint32_t& IndexGPUTwoIndices::read_id_to_read_length(const read_id_t read_id) const +const std::uint32_t& IndexGPU::read_id_to_read_length(const read_id_t read_id) const { return read_id_to_read_length_[read_id - first_read_id_]; } template -std::uint64_t IndexGPUTwoIndices::number_of_reads() const +std::uint64_t IndexGPU::number_of_reads() const { return number_of_reads_; } template -void IndexGPUTwoIndices::generate_index(const io::FastaParser& parser, - const read_id_t first_read_id, - const read_id_t past_the_last_read_id, - const bool hash_representations) +void IndexGPU::generate_index(const io::FastaParser& parser, + const read_id_t first_read_id, + const read_id_t past_the_last_read_id, + const bool hash_representations) { // check if there are any reads to process diff --git a/cudamapper/src/index_two_indices.cu b/cudamapper/src/index_two_indices.cu deleted file mode 100644 index 7ac0c58e6..000000000 --- a/cudamapper/src/index_two_indices.cu +++ /dev/null @@ -1,38 +0,0 @@ -/* -* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. -* -* NVIDIA CORPORATION and its licensors retain all intellectual property -* and proprietary rights in and to this software, related documentation -* and any modifications thereto. Any use, reproduction, disclosure or -* distribution of this software and related documentation without an express -* license agreement from NVIDIA CORPORATION is strictly prohibited. -*/ - -#include "claragenomics/cudamapper/index_two_indices.hpp" -#include -#include "index_gpu_two_indices.cuh" -#include "minimizer.hpp" - -namespace claragenomics -{ -namespace cudamapper -{ - -std::unique_ptr IndexTwoIndices::create_index(const io::FastaParser& parser, - const read_id_t first_read_id, - const read_id_t past_the_last_read_id, - const std::uint64_t kmer_size, - const std::uint64_t window_size, - const bool hash_representations) -{ - CGA_NVTX_RANGE(profiler, "create_index"); - return std::make_unique>(parser, - first_read_id, - past_the_last_read_id, - kmer_size, - window_size, - hash_representations); -} - -} // namespace cudamapper -} // namespace claragenomics diff --git a/cudamapper/src/main.cu b/cudamapper/src/main.cu index fd70ad9a4..bfdb4ceca 100644 --- a/cudamapper/src/main.cu +++ b/cudamapper/src/main.cu @@ -22,9 +22,9 @@ #include #include -#include "claragenomics/cudamapper/index_two_indices.hpp" -#include "claragenomics/cudamapper/matcher_two_indices.hpp" -#include "claragenomics/cudamapper/overlapper.hpp" +#include +#include +#include #include "overlapper_triggered.hpp" static struct option options[] = { @@ -110,8 +110,8 @@ int main(int argc, char* argv[]) // Function for adding new overlaps to writer auto add_overlaps_to_write_queue = [&overlaps_to_write, &overlaps_writer_mtx](claragenomics::cudamapper::Overlapper& overlapper, thrust::device_vector& anchors, - const claragenomics::cudamapper::IndexTwoIndices& index_query, - const claragenomics::cudamapper::IndexTwoIndices& index_target) { + const claragenomics::cudamapper::Index& index_query, + const claragenomics::cudamapper::Index& index_target) { CGA_NVTX_RANGE(profiler, "add_overlaps_to_write_queue"); overlaps_writer_mtx.lock(); overlaps_to_write.push_back(std::vector()); @@ -168,18 +168,18 @@ int main(int argc, char* argv[]) std::cerr << "Query range: " << query_start << " - " << query_end << std::endl; - std::unique_ptr query_index(nullptr); - std::unique_ptr target_index(nullptr); + std::unique_ptr query_index(nullptr); + std::unique_ptr target_index(nullptr); std::unique_ptr matcher(nullptr); { CGA_NVTX_RANGE(profiler, "generate_query_index"); auto start_time = std::chrono::high_resolution_clock::now(); - query_index = claragenomics::cudamapper::IndexTwoIndices::create_index(*query_parser, - query_start, - query_end + 1, // <- past the last - k, - w); + query_index = claragenomics::cudamapper::Index::create_index(*query_parser, + query_start, + query_end + 1, // <- past the last + k, + w); index_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); std::cerr << "Query index generation time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; } @@ -201,11 +201,11 @@ int main(int argc, char* argv[]) { CGA_NVTX_RANGE(profiler, "generate_target_index"); auto start_time = std::chrono::high_resolution_clock::now(); - target_index = claragenomics::cudamapper::IndexTwoIndices::create_index(*target_parser, - target_start, - target_end + 1, // <- past the last - k, - w); + target_index = claragenomics::cudamapper::Index::create_index(*target_parser, + target_start, + target_end + 1, // <- past the last + k, + w); index_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); std::cerr << "Target index generation time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; } diff --git a/cudamapper/src/matcher_gpu.cu b/cudamapper/src/matcher_gpu.cu index 33d457a6b..056c82a4c 100644 --- a/cudamapper/src/matcher_gpu.cu +++ b/cudamapper/src/matcher_gpu.cu @@ -60,8 +60,8 @@ namespace claragenomics namespace cudamapper { -MatcherGPU::MatcherGPU(const IndexTwoIndices& query_index, - const IndexTwoIndices& target_index) +MatcherGPU::MatcherGPU(const Index& query_index, + const Index& target_index) { CGA_NVTX_RANGE(profile, "matcherGPU"); diff --git a/cudamapper/src/matcher_gpu.cuh b/cudamapper/src/matcher_gpu.cuh index 5540af00a..c6003f3b0 100644 --- a/cudamapper/src/matcher_gpu.cuh +++ b/cudamapper/src/matcher_gpu.cuh @@ -13,7 +13,6 @@ #include #include #include -#include #include namespace claragenomics @@ -25,8 +24,8 @@ namespace cudamapper class MatcherGPU : public MatcherTwoIndices { public: - MatcherGPU(const IndexTwoIndices& query_index, - const IndexTwoIndices& target_index); + MatcherGPU(const Index& query_index, + const Index& target_index); thrust::device_vector& anchors() override; diff --git a/cudamapper/src/matcher_two_indices.cu b/cudamapper/src/matcher_two_indices.cu index 15aa833a0..a7bd6caf9 100644 --- a/cudamapper/src/matcher_two_indices.cu +++ b/cudamapper/src/matcher_two_indices.cu @@ -16,8 +16,8 @@ namespace claragenomics namespace cudamapper { -std::unique_ptr MatcherTwoIndices::create_matcher(const IndexTwoIndices& query_index, - const IndexTwoIndices& target_index) +std::unique_ptr MatcherTwoIndices::create_matcher(const Index& query_index, + const Index& target_index) { return std::make_unique(query_index, target_index); } diff --git a/cudamapper/src/overlapper_triggered.cu b/cudamapper/src/overlapper_triggered.cu index b2b84507d..ff675f690 100644 --- a/cudamapper/src/overlapper_triggered.cu +++ b/cudamapper/src/overlapper_triggered.cu @@ -174,8 +174,8 @@ struct CreateOverlap void OverlapperTriggered::get_overlaps(std::vector& fused_overlaps, thrust::device_vector& d_anchors, - const IndexTwoIndices& index_query, - const IndexTwoIndices& index_target) + const Index& index_query, + const Index& index_target) { CGA_NVTX_RANGE(profiler, "OverlapperTriggered::get_overlaps"); const auto tail_length_for_chain = 3; diff --git a/cudamapper/src/overlapper_triggered.hpp b/cudamapper/src/overlapper_triggered.hpp index fd455ce4a..533d6833b 100644 --- a/cudamapper/src/overlapper_triggered.hpp +++ b/cudamapper/src/overlapper_triggered.hpp @@ -37,7 +37,7 @@ class OverlapperTriggered : public Overlapper /// \param index_query Index /// \param index_target /// \return vector of Overlap objects - void get_overlaps(std::vector& overlaps, thrust::device_vector& anchors, const IndexTwoIndices& index_query, const IndexTwoIndices& index_target) override; + void get_overlaps(std::vector& overlaps, thrust::device_vector& anchors, const Index& index_query, const Index& index_target) override; }; } // namespace cudamapper } // namespace claragenomics diff --git a/cudamapper/tests/CMakeLists.txt b/cudamapper/tests/CMakeLists.txt index 10dd21a56..3d874638e 100644 --- a/cudamapper/tests/CMakeLists.txt +++ b/cudamapper/tests/CMakeLists.txt @@ -12,7 +12,7 @@ set(TARGET_NAME cudamappertests) set(SOURCES main.cpp - Test_CudamapperIndexGPUTwoIndices.cu + Test_CudamapperIndexGPU.cu Test_CudamapperMatcherGPU.cu Test_CudamapperMinimizer.cpp Test_CudamapperOverlapperTriggered.cu @@ -23,7 +23,7 @@ include_directories(${cudamapper_data_include_dir}) set(LIBS bioparser - index_gpu_two_indices + index_gpu matcher_gpu overlapper_triggerred cudamapper_utils) diff --git a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu b/cudamapper/tests/Test_CudamapperIndexGPU.cu similarity index 97% rename from cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu rename to cudamapper/tests/Test_CudamapperIndexGPU.cu index 1e39539c1..61b7eed04 100644 --- a/cudamapper/tests/Test_CudamapperIndexGPUTwoIndices.cu +++ b/cudamapper/tests/Test_CudamapperIndexGPU.cu @@ -14,7 +14,7 @@ #include #include "cudamapper_file_location.hpp" -#include "../src/index_gpu_two_indices.cuh" +#include "../src/index_gpu.cuh" #include "../src/minimizer.hpp" #include @@ -68,7 +68,7 @@ void test_find_first_occurrences_of_representations_kernel(const thrust::host_ve } } -TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representations_kernel_small_example) +TEST(TestCudamapperIndexGPU, test_find_first_occurrences_of_representations_kernel_small_example) { thrust::host_vector representation_index_mask_h; thrust::host_vector input_representations_h; @@ -129,7 +129,7 @@ TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representa number_of_threads); } -TEST(TestCudamapperIndexGPUTwoIndices, test_find_first_occurrences_of_representations_kernel_large_example) +TEST(TestCudamapperIndexGPU, test_find_first_occurrences_of_representations_kernel_large_example) { const std::uint64_t total_sketch_elements = 10000000; const std::uint32_t sketch_elements_with_same_representation = 1000; @@ -187,7 +187,7 @@ void test_find_first_occurrences_of_representations(const thrust::host_vector& rep } } -TEST(TestCudamapperIndexGPUTwoIndices, test_create_new_value_mask_small_example) +TEST(TestCudamapperIndexGPU, test_create_new_value_mask_small_example) { thrust::host_vector representations_h; thrust::host_vector expected_new_value_mask_h; @@ -332,7 +332,7 @@ TEST(TestCudamapperIndexGPUTwoIndices, test_create_new_value_mask_small_example) number_of_threads); } -TEST(TestCudamapperIndexGPUTwoIndices, test_create_new_value_mask_small_data_large_example) +TEST(TestCudamapperIndexGPU, test_create_new_value_mask_small_data_large_example) { const std::uint64_t total_sketch_elements = 10000000; const std::uint32_t sketch_elements_with_same_representation = 1000; @@ -393,7 +393,7 @@ void test_function_copy_rest_to_separate_arrays(const thrust::host_vector rest_h; thrust::host_vector expected_read_ids_h; @@ -509,12 +509,12 @@ void test_function(const std::string& filename, const std::uint64_t expected_number_of_reads) { std::unique_ptr parser = io::create_fasta_parser(filename); - IndexGPUTwoIndices index(*parser, - first_read_id, - past_the_last_read_id, - kmer_size, - window_size, - false); + IndexGPU index(*parser, + first_read_id, + past_the_last_read_id, + kmer_size, + window_size, + false); ASSERT_EQ(index.number_of_reads(), expected_number_of_reads); if (0 == expected_number_of_reads) @@ -569,7 +569,7 @@ void test_function(const std::string& filename, EXPECT_EQ(expected_first_occurrence_of_representations.back(), expected_representations.size()); } -TEST(TestCudamapperIndexGPUTwoIndices, GATT_4_1) +TEST(TestCudamapperIndexGPU, GATT_4_1) { // >read_0 // GATT @@ -619,7 +619,7 @@ TEST(TestCudamapperIndexGPUTwoIndices, GATT_4_1) 1); } -TEST(TestCudamapperIndexGPUTwoIndices, GATT_2_3) +TEST(TestCudamapperIndexGPU, GATT_2_3) { // >read_0 // GATT @@ -702,7 +702,7 @@ TEST(TestCudamapperIndexGPUTwoIndices, GATT_2_3) 1); } -TEST(TestCudamapperIndexGPUTwoIndices, CCCATACC_2_8) +TEST(TestCudamapperIndexGPU, CCCATACC_2_8) { // *** Read is shorter than one full window, the result should be empty *** @@ -743,7 +743,7 @@ TEST(TestCudamapperIndexGPUTwoIndices, CCCATACC_2_8) } // TODO: Cover this case as well -//TEST(TestCudamapperIndexGPUTwoIndices, CATCAAG_AAGCTA_3_5) +//TEST(TestCudamapperIndexGPU, CATCAAG_AAGCTA_3_5) //{ // // *** One Read is shorter than one full window, the other is not *** // @@ -828,7 +828,7 @@ TEST(TestCudamapperIndexGPUTwoIndices, CCCATACC_2_8) // 1); // <- only one read goes into index, the other is too short //} -TEST(TestCudamapperIndexGPUTwoIndices, CCCATACC_3_5) +TEST(TestCudamapperIndexGPU, CCCATACC_3_5) { // >read_0 // CCCATACC @@ -933,7 +933,7 @@ TEST(TestCudamapperIndexGPUTwoIndices, CCCATACC_3_5) 1); } -TEST(TestCudamapperIndexGPUTwoIndices, CATCAAG_AAGCTA_3_2) +TEST(TestCudamapperIndexGPU, CATCAAG_AAGCTA_3_2) { // >read_0 // CATCAAG @@ -1068,7 +1068,7 @@ TEST(TestCudamapperIndexGPUTwoIndices, CATCAAG_AAGCTA_3_2) 2); } -TEST(TestCudamapperIndexGPUTwoIndices, AAAACTGAA_GCCAAAG_2_3) +TEST(TestCudamapperIndexGPU, AAAACTGAA_GCCAAAG_2_3) { // >read_0 // AAAACTGAA @@ -1235,7 +1235,7 @@ TEST(TestCudamapperIndexGPUTwoIndices, AAAACTGAA_GCCAAAG_2_3) 2); } -TEST(TestCudamapperIndexGPUTwoIndices, AAAACTGAA_GCCAAAG_2_3_only_second_read_in_index) +TEST(TestCudamapperIndexGPU, AAAACTGAA_GCCAAAG_2_3_only_second_read_in_index) { // >read_0 // AAAACTGAA diff --git a/cudamapper/tests/Test_CudamapperMatcherGPU.cu b/cudamapper/tests/Test_CudamapperMatcherGPU.cu index 45d38c05a..a2e35371f 100644 --- a/cudamapper/tests/Test_CudamapperMatcherGPU.cu +++ b/cudamapper/tests/Test_CudamapperMatcherGPU.cu @@ -329,9 +329,9 @@ TEST(TestCudamapperMatcherGPU, test_generate_anchors_small_example) TEST(TestCudamapperMatcherGPU, OneReadOneMinimizer) { - std::unique_ptr parser = io::create_fasta_parser(std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/gatt.fasta"); - std::unique_ptr query_index = IndexTwoIndices::create_index(*parser, 0, parser->get_num_seqences(), 4, 1); - std::unique_ptr target_index = IndexTwoIndices::create_index(*parser, 0, parser->get_num_seqences(), 4, 1); + std::unique_ptr parser = io::create_fasta_parser(std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/gatt.fasta"); + std::unique_ptr query_index = Index::create_index(*parser, 0, parser->get_num_seqences(), 4, 1); + std::unique_ptr target_index = Index::create_index(*parser, 0, parser->get_num_seqences(), 4, 1); MatcherGPU matcher(*query_index, *target_index); const thrust::host_vector anchors(matcher.anchors()); diff --git a/cudamapper/tests/mock_index.cuh b/cudamapper/tests/mock_index.cuh index 5f18477e3..cb38ee896 100644 --- a/cudamapper/tests/mock_index.cuh +++ b/cudamapper/tests/mock_index.cuh @@ -12,7 +12,7 @@ #include "gmock/gmock.h" -#include "../src/index_gpu_two_indices.cuh" +#include "../src/index_gpu.cuh" #include "../src/minimizer.hpp" #include "cudamapper_file_location.hpp" @@ -21,16 +21,16 @@ namespace claragenomics namespace cudamapper { -class MockIndex : public IndexGPUTwoIndices +class MockIndex : public IndexGPU { public: MockIndex() - : IndexGPUTwoIndices(*(claragenomics::io::create_fasta_parser(std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/gatt.fasta")), - 0, - 0, - 0, - 0, - true) + : IndexGPU(*claragenomics::io::create_fasta_parser(std::string(CUDAMAPPER_BENCHMARK_DATA_DIR) + "/gatt.fasta"), + 0, + 0, + 0, + 0, + true) { } From 832a88163ef5bb20ec6742a3fedf95d9ca29672f Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Fri, 22 Nov 2019 10:47:32 +0100 Subject: [PATCH 100/128] Renamed MatcherTwoIndices to Matcher --- cudamapper/CMakeLists.txt | 2 +- .../{matcher_two_indices.hpp => matcher.hpp} | 10 +++++----- cudamapper/src/main.cu | 8 ++++---- cudamapper/src/{matcher_two_indices.cu => matcher.cu} | 6 +++--- cudamapper/src/matcher_gpu.cuh | 4 ++-- 5 files changed, 15 insertions(+), 15 deletions(-) rename cudamapper/include/claragenomics/cudamapper/{matcher_two_indices.hpp => matcher.hpp} (77%) rename cudamapper/src/{matcher_two_indices.cu => matcher.cu} (71%) diff --git a/cudamapper/CMakeLists.txt b/cudamapper/CMakeLists.txt index 15787b2ff..2c9a2216c 100644 --- a/cudamapper/CMakeLists.txt +++ b/cudamapper/CMakeLists.txt @@ -57,7 +57,7 @@ add_doxygen_source_dir(${CMAKE_CURRENT_SOURCE_DIR}/include) cuda_add_executable(cudamapper src/cudamapper.cpp src/main.cu - src/matcher_two_indices.cu + src/matcher.cu src/overlapper.cpp ) diff --git a/cudamapper/include/claragenomics/cudamapper/matcher_two_indices.hpp b/cudamapper/include/claragenomics/cudamapper/matcher.hpp similarity index 77% rename from cudamapper/include/claragenomics/cudamapper/matcher_two_indices.hpp rename to cudamapper/include/claragenomics/cudamapper/matcher.hpp index c0da2b4c9..bec645207 100644 --- a/cudamapper/include/claragenomics/cudamapper/matcher_two_indices.hpp +++ b/cudamapper/include/claragenomics/cudamapper/matcher.hpp @@ -22,12 +22,12 @@ namespace cudamapper /// \addtogroup cudamapper /// \{ -/// MatcherTwoIndices - base matcher -class MatcherTwoIndices +/// Matcher - base matcher +class Matcher { public: /// \brief Virtual destructor - virtual ~MatcherTwoIndices() = default; + virtual ~Matcher() = default; /// \brief returns anchors /// \return anchors @@ -37,8 +37,8 @@ class MatcherTwoIndices /// \param query_index /// \param target_index /// \return matcher - static std::unique_ptr create_matcher(const Index& query_index, - const Index& target_index); + static std::unique_ptr create_matcher(const Index& query_index, + const Index& target_index); }; /// \} diff --git a/cudamapper/src/main.cu b/cudamapper/src/main.cu index bfdb4ceca..a04101289 100644 --- a/cudamapper/src/main.cu +++ b/cudamapper/src/main.cu @@ -23,7 +23,7 @@ #include #include -#include +#include #include #include "overlapper_triggered.hpp" @@ -170,7 +170,7 @@ int main(int argc, char* argv[]) std::unique_ptr query_index(nullptr); std::unique_ptr target_index(nullptr); - std::unique_ptr matcher(nullptr); + std::unique_ptr matcher(nullptr); { CGA_NVTX_RANGE(profiler, "generate_query_index"); @@ -212,8 +212,8 @@ int main(int argc, char* argv[]) { CGA_NVTX_RANGE(profiler, "generate_matcher"); auto start_time = std::chrono::high_resolution_clock::now(); - matcher = claragenomics::cudamapper::MatcherTwoIndices::create_matcher(*query_index, - *target_index); + matcher = claragenomics::cudamapper::Matcher::create_matcher(*query_index, + *target_index); matcher_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); std::cerr << "Matcher generation time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; } diff --git a/cudamapper/src/matcher_two_indices.cu b/cudamapper/src/matcher.cu similarity index 71% rename from cudamapper/src/matcher_two_indices.cu rename to cudamapper/src/matcher.cu index a7bd6caf9..4843efd0d 100644 --- a/cudamapper/src/matcher_two_indices.cu +++ b/cudamapper/src/matcher.cu @@ -8,7 +8,7 @@ * license agreement from NVIDIA CORPORATION is strictly prohibited. */ -#include "claragenomics/cudamapper/matcher_two_indices.hpp" +#include #include "matcher_gpu.cuh" namespace claragenomics @@ -16,8 +16,8 @@ namespace claragenomics namespace cudamapper { -std::unique_ptr MatcherTwoIndices::create_matcher(const Index& query_index, - const Index& target_index) +std::unique_ptr Matcher::create_matcher(const Index& query_index, + const Index& target_index) { return std::make_unique(query_index, target_index); } diff --git a/cudamapper/src/matcher_gpu.cuh b/cudamapper/src/matcher_gpu.cuh index c6003f3b0..85a96a865 100644 --- a/cudamapper/src/matcher_gpu.cuh +++ b/cudamapper/src/matcher_gpu.cuh @@ -12,7 +12,7 @@ #include #include -#include +#include #include namespace claragenomics @@ -21,7 +21,7 @@ namespace claragenomics namespace cudamapper { -class MatcherGPU : public MatcherTwoIndices +class MatcherGPU : public Matcher { public: MatcherGPU(const Index& query_index, From b93851227fa1e5f7fd562483bb5ff0dcb3ff7aa1 Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Fri, 22 Nov 2019 10:54:36 +0100 Subject: [PATCH 101/128] [cudamapper] Added maximum_kmer_size to new Index --- cudamapper/include/claragenomics/cudamapper/index.hpp | 7 +++++++ cudamapper/src/main.cu | 7 +++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/cudamapper/include/claragenomics/cudamapper/index.hpp b/cudamapper/include/claragenomics/cudamapper/index.hpp index a8edaa4e5..0d9b26db6 100644 --- a/cudamapper/include/claragenomics/cudamapper/index.hpp +++ b/cudamapper/include/claragenomics/cudamapper/index.hpp @@ -72,6 +72,13 @@ class Index /// \return number of reads in input data virtual std::uint64_t number_of_reads() const = 0; + /// \brief Return the maximum kmer length allowable + /// \return Return the maximum kmer length allowable + static uint64_t maximum_kmer_size() + { + return sizeof(representation_t) * 8 / 2; + } + /// \brief generates a mapping of (k,w)-kmer-representation to all of its occurrences for one or more sequences /// \param parser parser for the whole input file (part that goes into this index is determined by first_read_id and past_the_last_read_id) /// \param first_read_id read_id of the first read to the included in this index diff --git a/cudamapper/src/main.cu b/cudamapper/src/main.cu index a04101289..9ec949627 100644 --- a/cudamapper/src/main.cu +++ b/cudamapper/src/main.cu @@ -70,11 +70,11 @@ int main(int argc, char* argv[]) } } - /*if (k > claragenomics::cudamapper::Index::maximum_kmer_size()) + if (k > claragenomics::cudamapper::Index::maximum_kmer_size()) { std::cerr << "kmer of size " << k << " is not allowed, maximum k = " << claragenomics::cudamapper::Index::maximum_kmer_size() << std::endl; exit(1); - }*/ + } // Check remaining argument count. if ((argc - optind) < 2) @@ -261,8 +261,7 @@ void help(int32_t exit_code = 0) options: -k, --kmer-size length of kmer to use for minimizers [15] (Max=)" - //<< claragenomics::cudamapper::Index::maximum_kmer_size() - << ")" + << claragenomics::cudamapper::Index::maximum_kmer_size() << ")" << R"( -w, --window-size length of window to use for minimizers [15])" From 022a3ba4e3b46b33551682d940d7286f4b561f1f Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Fri, 22 Nov 2019 10:58:29 +0100 Subject: [PATCH 102/128] [cudamapper] Fixed some signed/unsigned warning in main.cu --- cudamapper/src/main.cu | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cudamapper/src/main.cu b/cudamapper/src/main.cu index 9ec949627..aa1da7983 100644 --- a/cudamapper/src/main.cu +++ b/cudamapper/src/main.cu @@ -155,16 +155,16 @@ int main(int argc, char* argv[]) }; std::future overlap_result(std::async(std::launch::async, overlaps_writer_func)); - auto overlapper = claragenomics::cudamapper::OverlapperTriggered(); + claragenomics::cudamapper::OverlapperTriggered overlapper; // Track overall time std::chrono::milliseconds index_time = std::chrono::duration_values::zero(); std::chrono::milliseconds matcher_time = std::chrono::duration_values::zero(); std::chrono::milliseconds overlapper_time = std::chrono::duration_values::zero(); - for (size_t query_start = 0; query_start < queries; query_start += index_size) + for (std::int32_t query_start = 0; query_start < queries; query_start += index_size) { // outer loop over query - size_t query_end = std::min(query_start + index_size, static_cast(queries) - 1); + std::int32_t query_end = std::min(query_start + index_size, static_cast(queries) - 1); std::cerr << "Query range: " << query_start << " - " << query_end << std::endl; @@ -184,7 +184,7 @@ int main(int argc, char* argv[]) std::cerr << "Query index generation time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; } - size_t target_start = 0; + std::int32_t target_start = 0; // If all_to_all mode, then we can optimzie by starting the target sequences from the same index as // query because all indices before the current query index are guaranteed to have been processed in // a2a mapping. @@ -194,7 +194,7 @@ int main(int argc, char* argv[]) } for (; target_start < targets; target_start += target_index_size) { - size_t target_end = std::min(target_start + target_index_size, static_cast(targets) - 1); + std::int32_t target_end = std::min(target_start + target_index_size, static_cast(targets) - 1); std::cerr << "Target range: " << target_start << " - " << target_end << std::endl; From 8d2a95eb97dc0f549fa975f194188aa5801366c3 Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Fri, 22 Nov 2019 11:11:31 +0100 Subject: [PATCH 103/128] [cudamapper] Fixed public library headers includes --- .../include/claragenomics/cudamapper/sketch_element.hpp | 2 +- cudamapper/src/cudamapper_utils.hpp | 2 +- cudamapper/src/minimizer.hpp | 4 ++-- cudamapper/src/overlapper.cpp | 2 +- cudamapper/src/overlapper_triggered.cu | 3 +-- cudamapper/src/overlapper_triggered.hpp | 4 ++-- cudamapper/src/sequence.cpp | 2 +- 7 files changed, 9 insertions(+), 10 deletions(-) diff --git a/cudamapper/include/claragenomics/cudamapper/sketch_element.hpp b/cudamapper/include/claragenomics/cudamapper/sketch_element.hpp index d57a303ab..d1e130ba8 100644 --- a/cudamapper/include/claragenomics/cudamapper/sketch_element.hpp +++ b/cudamapper/include/claragenomics/cudamapper/sketch_element.hpp @@ -12,7 +12,7 @@ #include #include -#include "claragenomics/cudamapper/types.hpp" +#include namespace claragenomics { diff --git a/cudamapper/src/cudamapper_utils.hpp b/cudamapper/src/cudamapper_utils.hpp index 37b9e3cd1..7c0ce83b5 100644 --- a/cudamapper/src/cudamapper_utils.hpp +++ b/cudamapper/src/cudamapper_utils.hpp @@ -13,7 +13,7 @@ #include #include -#include "claragenomics/cudamapper/types.hpp" +#include namespace claragenomics { diff --git a/cudamapper/src/minimizer.hpp b/cudamapper/src/minimizer.hpp index 0adff29e0..dc022a0f1 100644 --- a/cudamapper/src/minimizer.hpp +++ b/cudamapper/src/minimizer.hpp @@ -12,8 +12,8 @@ #include #include -#include "claragenomics/cudamapper/sketch_element.hpp" -#include "claragenomics/cudamapper/types.hpp" +#include +#include #include diff --git a/cudamapper/src/overlapper.cpp b/cudamapper/src/overlapper.cpp index c48318c07..3e7f55d29 100644 --- a/cudamapper/src/overlapper.cpp +++ b/cudamapper/src/overlapper.cpp @@ -9,7 +9,7 @@ */ #include -#include "claragenomics/cudamapper/overlapper.hpp" +#include namespace claragenomics { diff --git a/cudamapper/src/overlapper_triggered.cu b/cudamapper/src/overlapper_triggered.cu index ff675f690..bcbe02aad 100644 --- a/cudamapper/src/overlapper_triggered.cu +++ b/cudamapper/src/overlapper_triggered.cu @@ -12,10 +12,9 @@ #include #include -#include "claragenomics/cudamapper/overlapper.hpp" +#include #include "cudamapper_utils.hpp" #include "overlapper_triggered.hpp" -#include #include namespace claragenomics diff --git a/cudamapper/src/overlapper_triggered.hpp b/cudamapper/src/overlapper_triggered.hpp index 533d6833b..c7c4d668a 100644 --- a/cudamapper/src/overlapper_triggered.hpp +++ b/cudamapper/src/overlapper_triggered.hpp @@ -12,8 +12,8 @@ #include -#include "claragenomics/cudamapper/types.hpp" -#include "claragenomics/cudamapper/overlapper.hpp" +#include +#include namespace claragenomics { diff --git a/cudamapper/src/sequence.cpp b/cudamapper/src/sequence.cpp index 3d4b7e45c..7bfa8aea4 100644 --- a/cudamapper/src/sequence.cpp +++ b/cudamapper/src/sequence.cpp @@ -8,7 +8,7 @@ * license agreement from NVIDIA CORPORATION is strictly prohibited. */ -#include "cudamapper/sequence.hpp" +#include #include "bioparser_sequence.hpp" namespace claragenomics From 5fc5dd68482b0f5222c934eb9714dcc4f3a9201a Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Fri, 22 Nov 2019 13:19:46 +0100 Subject: [PATCH 104/128] [cudamapper] Fixed a bug with wrong read ranges in main.cu --- cudamapper/src/main.cu | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/cudamapper/src/main.cu b/cudamapper/src/main.cu index 0f42c3a74..17ca5a6d6 100644 --- a/cudamapper/src/main.cu +++ b/cudamapper/src/main.cu @@ -164,11 +164,11 @@ int main(int argc, char* argv[]) std::chrono::milliseconds matcher_time = std::chrono::duration_values::zero(); std::chrono::milliseconds overlapper_time = std::chrono::duration_values::zero(); - for (size_t query_start = 0; query_start < queries; query_start += index_size) + for (size_t query_start_index = 0; query_start_index < queries; query_start_index += index_size) { // outer loop over query - size_t query_end = std::min(query_start + index_size, static_cast(queries) - 1); + size_t query_one_past_the_end_index = std::min(query_start_index + index_size, static_cast(queries)); - std::cerr << "Query range: " << query_start << " - " << query_end << std::endl; + std::cerr << "Query range: " << query_start_index << " - " << query_one_past_the_end_index - 1 << std::endl; std::unique_ptr query_index(nullptr); std::unique_ptr target_index(nullptr); @@ -178,34 +178,34 @@ int main(int argc, char* argv[]) CGA_NVTX_RANGE(profiler, "generate_query_index"); auto start_time = std::chrono::high_resolution_clock::now(); query_index = claragenomics::cudamapper::IndexTwoIndices::create_index(*query_parser, - query_start, - query_end + 1, // <- past the last + query_start_index, + query_one_past_the_end_index, k, w); index_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); std::cerr << "Query index generation time: " << std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time).count() << "ms" << std::endl; } - size_t target_start = 0; + size_t target_start_index = 0; // If all_to_all mode, then we can optimzie by starting the target sequences from the same index as // query because all indices before the current query index are guaranteed to have been processed in // a2a mapping. if (all_to_all) { - target_start = query_start; + target_start_index = query_start_index; } - for (; target_start < targets; target_start += target_index_size) + for (; target_start_index < targets; target_start_index += target_index_size) { - size_t target_end = std::min(target_start + target_index_size, static_cast(targets) - 1); + size_t target_one_past_the_end_index = std::min(target_start_index + target_index_size, static_cast(targets)); - std::cerr << "Target range: " << target_start << " - " << target_end << std::endl; + std::cerr << "Target range: " << target_start_index << " - " << target_one_past_the_end_index - 1 << std::endl; { CGA_NVTX_RANGE(profiler, "generate_target_index"); auto start_time = std::chrono::high_resolution_clock::now(); target_index = claragenomics::cudamapper::IndexTwoIndices::create_index(*target_parser, - target_start, - target_end + 1, // <- past the last + target_start_index, + target_one_past_the_end_index, k, w); index_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); From ff8230275f3af3e037db2ffc24ef847780a19d31 Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Fri, 22 Nov 2019 13:58:08 +0100 Subject: [PATCH 105/128] [cudamapper] Added a comma to googlemock's macro (apparetly both work, but only the with the comma is documented) --- cudamapper/tests/mock_index.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cudamapper/tests/mock_index.cuh b/cudamapper/tests/mock_index.cuh index 5f18477e3..4157ba20b 100644 --- a/cudamapper/tests/mock_index.cuh +++ b/cudamapper/tests/mock_index.cuh @@ -34,7 +34,7 @@ public: { } - MOCK_METHOD(const std::string&, read_id_to_read_name, (const read_id_t read_id), (const override)); + MOCK_METHOD(const std::string&, read_id_to_read_name, (const read_id_t read_id), (const, override)); MOCK_METHOD(const std::uint32_t&, read_id_to_read_length, (const read_id_t read_id), (const, override)); }; From 06d0b93c604431ce192c3be295521ee1016c858c Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Fri, 22 Nov 2019 14:36:36 +0100 Subject: [PATCH 106/128] [cudamapper] query_one_past_the_end_index and target_one_past_the_end_index renamed to query_end_index and target_end_index --- cudamapper/src/main.cu | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cudamapper/src/main.cu b/cudamapper/src/main.cu index 17ca5a6d6..92e961b1c 100644 --- a/cudamapper/src/main.cu +++ b/cudamapper/src/main.cu @@ -166,9 +166,9 @@ int main(int argc, char* argv[]) for (size_t query_start_index = 0; query_start_index < queries; query_start_index += index_size) { // outer loop over query - size_t query_one_past_the_end_index = std::min(query_start_index + index_size, static_cast(queries)); + size_t query_end_index = std::min(query_start_index + index_size, static_cast(queries)); - std::cerr << "Query range: " << query_start_index << " - " << query_one_past_the_end_index - 1 << std::endl; + std::cerr << "Query range: " << query_start_index << " - " << query_end_index - 1 << std::endl; std::unique_ptr query_index(nullptr); std::unique_ptr target_index(nullptr); @@ -179,7 +179,7 @@ int main(int argc, char* argv[]) auto start_time = std::chrono::high_resolution_clock::now(); query_index = claragenomics::cudamapper::IndexTwoIndices::create_index(*query_parser, query_start_index, - query_one_past_the_end_index, + query_end_index, k, w); index_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); @@ -196,16 +196,16 @@ int main(int argc, char* argv[]) } for (; target_start_index < targets; target_start_index += target_index_size) { - size_t target_one_past_the_end_index = std::min(target_start_index + target_index_size, static_cast(targets)); + size_t target_end_index = std::min(target_start_index + target_index_size, static_cast(targets)); - std::cerr << "Target range: " << target_start_index << " - " << target_one_past_the_end_index - 1 << std::endl; + std::cerr << "Target range: " << target_start_index << " - " << target_end_index - 1 << std::endl; { CGA_NVTX_RANGE(profiler, "generate_target_index"); auto start_time = std::chrono::high_resolution_clock::now(); target_index = claragenomics::cudamapper::IndexTwoIndices::create_index(*target_parser, target_start_index, - target_one_past_the_end_index, + target_end_index, k, w); index_time += std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - start_time); From f49b6b806fd0e9840f28045cd7047a03861ba93a Mon Sep 17 00:00:00 2001 From: Milos Maric Date: Fri, 22 Nov 2019 15:51:06 +0100 Subject: [PATCH 107/128] [cudamapper] Renamed namespace index_gpu_two_indices to index_gpu --- cudamapper/src/index_gpu.cu | 4 ++-- cudamapper/src/index_gpu.cuh | 20 ++++++++++---------- cudamapper/tests/Test_CudamapperIndexGPU.cu | 4 ++-- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cudamapper/src/index_gpu.cu b/cudamapper/src/index_gpu.cu index c62c1b5bd..46323df64 100644 --- a/cudamapper/src/index_gpu.cu +++ b/cudamapper/src/index_gpu.cu @@ -16,7 +16,7 @@ namespace cudamapper { namespace details { -namespace index_gpu_two_indices +namespace index_gpu { void find_first_occurrences_of_representations(thrust::device_vector& unique_representations_d, thrust::device_vector& first_occurrence_index_d, @@ -123,7 +123,7 @@ __global__ void find_first_occurrences_of_representations_kernel(const std::uint } } } -} // namespace index_gpu_two_indices +} // namespace index_gpu } // namespace details } // namespace cudamapper diff --git a/cudamapper/src/index_gpu.cuh b/cudamapper/src/index_gpu.cuh index 1ed545bb1..ba80d4d74 100644 --- a/cudamapper/src/index_gpu.cuh +++ b/cudamapper/src/index_gpu.cuh @@ -126,7 +126,7 @@ private: namespace details { -namespace index_gpu_two_indices +namespace index_gpu { /// \brief Creates compressed representation of index /// @@ -217,7 +217,7 @@ __global__ void copy_rest_to_separate_arrays(const ReadidPositionDirection* cons directions_of_reads_d[i] = DirectionOfRepresentation(rest_d[i].direction_); } -} // namespace index_gpu_two_indices +} // namespace index_gpu } // namespace details template @@ -419,16 +419,16 @@ void IndexGPU::generate_index(const io::FastaParser& parser, const std::uint32_t threads = 256; const std::uint32_t blocks = ceiling_divide(representations_d_.size(), threads); - details::index_gpu_two_indices::copy_rest_to_separate_arrays<<>>(rest_d.data(), - read_ids_d_.data().get(), - positions_in_reads_d_.data().get(), - directions_of_reads_d_.data().get(), - representations_d_.size()); + details::index_gpu::copy_rest_to_separate_arrays<<>>(rest_d.data(), + read_ids_d_.data().get(), + positions_in_reads_d_.data().get(), + directions_of_reads_d_.data().get(), + representations_d_.size()); // now generate the index elements - details::index_gpu_two_indices::find_first_occurrences_of_representations(unique_representations_d_, - first_occurrence_of_representations_d_, - representations_d_); + details::index_gpu::find_first_occurrences_of_representations(unique_representations_d_, + first_occurrence_of_representations_d_, + representations_d_); } } // namespace cudamapper diff --git a/cudamapper/tests/Test_CudamapperIndexGPU.cu b/cudamapper/tests/Test_CudamapperIndexGPU.cu index 61b7eed04..6816f5604 100644 --- a/cudamapper/tests/Test_CudamapperIndexGPU.cu +++ b/cudamapper/tests/Test_CudamapperIndexGPU.cu @@ -26,7 +26,7 @@ namespace cudamapper namespace details { -namespace index_gpu_two_indices +namespace index_gpu { // ************ Test find_first_occurrences_of_representations_kernel ************** @@ -490,7 +490,7 @@ TEST(TestCudamapperIndexGPU, test_function_copy_rest_to_separate_arrays) threads); } -} // namespace index_gpu_two_indices +} // namespace index_gpu } // namespace details void test_function(const std::string& filename, From 7b537ab58d4c971a604729027b54a99914536f8b Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Fri, 22 Nov 2019 18:10:57 +0200 Subject: [PATCH 108/128] [pyclaragenomics] Fix setup --build_output_folder relative path issue Fixes #230 --- pyclaragenomics/setup_pyclaragenomics.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyclaragenomics/setup_pyclaragenomics.py b/pyclaragenomics/setup_pyclaragenomics.py index 9965ab46b..5f19d0a1a 100644 --- a/pyclaragenomics/setup_pyclaragenomics.py +++ b/pyclaragenomics/setup_pyclaragenomics.py @@ -95,11 +95,12 @@ def setup_python_binding(is_develop_mode, pycga_dir, cga_install_dir): if __name__ == "__main__": args = parse_arguments() + cga_build_folder = os.path.realpath(args.build_output_folder) current_dir = os.path.dirname(os.path.realpath(__file__)) - cga_installation_directory = os.path.join(args.build_output_folder, "install") + cga_installation_directory = os.path.join(cga_build_folder, "install") # Build & install Clara Genomics Analysis SDK cmake_proj = CMakeWrapper(cmake_root_dir=os.path.dirname(current_dir), - cmake_build_path=args.build_output_folder, + cmake_build_path=cga_build_folder, cga_install_dir=cga_installation_directory, cmake_extra_args="-Dcga_build_shared=ON") cmake_proj.build() From 6ea8abb007e27f0410350a527a040c735cb4cabb Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Fri, 22 Nov 2019 18:50:33 +0200 Subject: [PATCH 109/128] [CI] Use relative path with --build_output_folder argument Fixes #230 --- ci/common/test-pyclaragenomics.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/common/test-pyclaragenomics.sh b/ci/common/test-pyclaragenomics.sh index e341998d3..3bfedeb31 100644 --- a/ci/common/test-pyclaragenomics.sh +++ b/ci/common/test-pyclaragenomics.sh @@ -19,7 +19,7 @@ cd $PYCLARAGENOMICS_DIR #Install external dependencies. python -m pip install -r requirements.txt -python setup_pyclaragenomics.py +python setup_pyclaragenomics.py --build_output_folder cga_build # Run tests. cd test/ From 5d136d4be7a80755e1f35d5e8ef2070a96a59411 Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Fri, 22 Nov 2019 14:40:01 -0500 Subject: [PATCH 110/128] [pycga] remove unnecessary imports --- pyclaragenomics/claragenomics/bindings/graph.pxd | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pyclaragenomics/claragenomics/bindings/graph.pxd b/pyclaragenomics/claragenomics/bindings/graph.pxd index c34dfed18..908d12a58 100644 --- a/pyclaragenomics/claragenomics/bindings/graph.pxd +++ b/pyclaragenomics/claragenomics/bindings/graph.pxd @@ -14,9 +14,8 @@ # cython: language_level = 3 from libcpp.pair cimport pair -from libcpp.memory cimport unique_ptr from libcpp.string cimport string -from libc.stdint cimport int8_t, int16_t, uint16_t, int32_t +from libc.stdint cimport int32_t from libcpp.vector cimport vector # This file declares public structs and API calls From 6da834dc0f6afe482c11d46c2fb097e1aeab71a1 Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Sun, 24 Nov 2019 13:16:28 +0200 Subject: [PATCH 111/128] [pyclaragenomics] Recator path resolution in setup script --- pyclaragenomics/setup_pyclaragenomics.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/pyclaragenomics/setup_pyclaragenomics.py b/pyclaragenomics/setup_pyclaragenomics.py index 5f19d0a1a..a86f1c947 100644 --- a/pyclaragenomics/setup_pyclaragenomics.py +++ b/pyclaragenomics/setup_pyclaragenomics.py @@ -16,18 +16,11 @@ import subprocess -def get_relative_path(sub_folder_name): - return os.path.join( - os.path.dirname(os.path.realpath(__file__)), - sub_folder_name - ) - - def parse_arguments(): parser = argparse.ArgumentParser(description='build & install Clara Genomics Analysis SDK.') parser.add_argument('--build_output_folder', required=False, - default=get_relative_path("cga_build"), + default="cga_build", help="Choose an output folder for building") parser.add_argument('--develop', required=False, @@ -87,7 +80,7 @@ def setup_python_binding(is_develop_mode, pycga_dir, cga_install_dir): env={ **os.environ, 'PYCGA_DIR': pycga_dir, - 'CGA_INSTALL_DIR': cga_install_dir + 'CGA_INSTALL_DIR': os.path.realpath(cga_install_dir) }, cwd=pycga_dir) @@ -95,12 +88,11 @@ def setup_python_binding(is_develop_mode, pycga_dir, cga_install_dir): if __name__ == "__main__": args = parse_arguments() - cga_build_folder = os.path.realpath(args.build_output_folder) current_dir = os.path.dirname(os.path.realpath(__file__)) - cga_installation_directory = os.path.join(cga_build_folder, "install") + cga_installation_directory = os.path.join(args.build_output_folder, "install") # Build & install Clara Genomics Analysis SDK cmake_proj = CMakeWrapper(cmake_root_dir=os.path.dirname(current_dir), - cmake_build_path=cga_build_folder, + cmake_build_path=args.build_output_folder, cga_install_dir=cga_installation_directory, cmake_extra_args="-Dcga_build_shared=ON") cmake_proj.build() From 77a2e3d87eaa1d3618dec373becd5d6b291382cb Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Mon, 25 Nov 2019 22:39:23 +0200 Subject: [PATCH 112/128] [pyclaragenomics] Create wheel package from pyclaragenomics --- pyclaragenomics/setup.py | 35 ++++++++++++++++-- pyclaragenomics/setup_pyclaragenomics.py | 46 ++++++++++++++++++++---- 2 files changed, 73 insertions(+), 8 deletions(-) diff --git a/pyclaragenomics/setup.py b/pyclaragenomics/setup.py index 6e6bb6e1d..302a4cc00 100755 --- a/pyclaragenomics/setup.py +++ b/pyclaragenomics/setup.py @@ -11,6 +11,7 @@ # import os +import glob from setuptools import setup, find_packages, Extension from Cython.Build import cythonize @@ -22,6 +23,13 @@ def get_verified_path(path): return installed_path +def get_installation_requirments(file_path): + with open(file_path, 'r') as file: + requirements_file_content = \ + [line.strip() for line in file if line.strip() and not line.lstrip().startswith('#')] + return requirements_file_content + + # Must be set before calling pip try: pycga_dir = os.environ['PYCGA_DIR'] @@ -30,6 +38,20 @@ def get_verified_path(path): raise EnvironmentError( 'PYCGA_DIR CGA_INSTALL_DIR environment variables must be set').with_traceback(e.__traceback__) +# Classifiers for PyPI +pycga_classifiers = [ + 'Development Status :: 5 - Production/Stable', + 'Operating System :: POSIX :: Linux', + 'Intended Audience :: Science/Research', + 'Topic :: Scientific/Engineering :: Bio-Informatics', + 'License :: OSI Approved :: Apache Software License', + 'Natural Language :: English', + 'Operating System :: POSIX :: Linux', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', +], extensions = [ Extension( @@ -39,8 +61,8 @@ def get_verified_path(path): "/usr/local/cuda/include", get_verified_path(os.path.join(cga_install_dir, "include")), ], - library_dirs=["/usr/local/cuda/lib64", get_verified_path(os.path.join(cga_install_dir, "lib"))], - runtime_library_dirs=["/usr/local/cuda/lib64", get_verified_path(os.path.join(cga_install_dir, "lib"))], + library_dirs=["/usr/local/cuda/lib64", get_verified_path(os.path.join(cga_install_dir, "lib"))], + runtime_library_dirs=["/usr/local/cuda/lib64", "$ORIGIN/../shared_libs/"], libraries=["cudapoa", "cudaaligner", "cudart"], language="c++", extra_compile_args=["-std=c++14"], @@ -51,7 +73,16 @@ def get_verified_path(path): version='0.3.0', description='NVIDIA genomics python libraries and utiliites', author='NVIDIA Corporation', + url="https://github.com/clara-genomics/ClaraGenomicsAnalysis", + package_data={ + 'claragenomics': glob.glob(os.path.join(pycga_dir, 'claragenomics/shared_libs/*.so')) + }, + install_requires=get_installation_requirments(os.path.join(pycga_dir, 'requirements.txt')), packages=find_packages(where=pycga_dir), + python_requires='>=3.6', + license='Apache License 2.0', + long_description='Python libraries and utilities for manipulating genomics data', + classifiers=pycga_classifiers, ext_modules=cythonize(extensions, compiler_directives={'embedsignature': True}), scripts=[get_verified_path(os.path.join(pycga_dir, 'bin', 'genome_simulator')), get_verified_path(os.path.join(pycga_dir, 'bin', 'assembly_evaluator'))], diff --git a/pyclaragenomics/setup_pyclaragenomics.py b/pyclaragenomics/setup_pyclaragenomics.py index 5f19d0a1a..4165d1f6f 100644 --- a/pyclaragenomics/setup_pyclaragenomics.py +++ b/pyclaragenomics/setup_pyclaragenomics.py @@ -14,7 +14,8 @@ import os.path import os import subprocess - +import shutil +import glob def get_relative_path(sub_folder_name): return os.path.join( @@ -23,16 +24,34 @@ def get_relative_path(sub_folder_name): ) +def copy_all_files_in_directory(src, dest, file_ext="*.so"): + files_to_copy = glob.glob(os.path.join(src, file_ext)) + if not files_to_copy: + raise RuntimeError("No {} files under {}".format(src, file_ext)) + os.makedirs(os.path.dirname(dest), exist_ok=True) + try: + for file in files_to_copy: + shutil.copy(file, dest) + print("{} was copied into {}".format(file, dest)) + except (shutil.Error, PermissionError) as err: + print('Could not copy {}. Error: {}'.format(file, err)) + raise err + + def parse_arguments(): parser = argparse.ArgumentParser(description='build & install Clara Genomics Analysis SDK.') parser.add_argument('--build_output_folder', required=False, default=get_relative_path("cga_build"), help="Choose an output folder for building") + parser.add_argument('--create_wheel_only', + required=False, + action='store_true', + help="Create ") parser.add_argument('--develop', required=False, action='store_true', - help="CInstall using pip editble mode") + help="Install using pip editble mode") return parser.parse_args() @@ -82,14 +101,25 @@ def build(self): self.run_build_cmd() -def setup_python_binding(is_develop_mode, pycga_dir, cga_install_dir): - subprocess.check_call(['pip', 'install'] + (['-e'] if is_develop_mode else []) + ["."], +def setup_python_binding(is_develop_mode, wheel_output_folder, pycga_dir, cga_install_dir): + if wheel_output_folder: + setup_command = ['python', 'setup.py', 'bdist_wheel', '-d', wheel_output_folder] + completion_message = \ + "A wheel file was create for pyclaragenomics under {}".format(wheel_output_folder) + else: + setup_command = ['pip', 'install'] + (['-e'] if is_develop_mode else []) + ["."] + completion_message = \ + "pyclaragenomics was successfully setup in {} mode!".format( + "development" if args.develop else "installation") + + subprocess.check_call(setup_command, env={ **os.environ, 'PYCGA_DIR': pycga_dir, 'CGA_INSTALL_DIR': cga_install_dir }, cwd=pycga_dir) + print(completion_message) if __name__ == "__main__": @@ -104,9 +134,13 @@ def setup_python_binding(is_develop_mode, pycga_dir, cga_install_dir): cga_install_dir=cga_installation_directory, cmake_extra_args="-Dcga_build_shared=ON") cmake_proj.build() + # Copyies shared libraries into clargenomics package + copy_all_files_in_directory( + os.path.join(cga_installation_directory, "lib"), + os.path.join(current_dir, "claragenomics/shared_libs/"), + ) # Setup pyclaragenomics setup_python_binding(is_develop_mode=args.develop, + wheel_output_folder=cga_build_folder if args.create_wheel_only else None, pycga_dir=current_dir, cga_install_dir=cga_installation_directory) - print("pyclaragenomics was successfully setup in {} mode!" - .format("development" if args.develop else "installation")) From 4a4be886fb21ec9c739131cce79c87a1b31a1608 Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Tue, 26 Nov 2019 01:28:52 +0200 Subject: [PATCH 113/128] [pyclaragenomics] Fix Build tests failure and Mike's review comments Fixes #238 --- pyclaragenomics/setup.py | 12 +++++++----- pyclaragenomics/setup_pyclaragenomics.py | 15 +++++++++------ 2 files changed, 16 insertions(+), 11 deletions(-) diff --git a/pyclaragenomics/setup.py b/pyclaragenomics/setup.py index 302a4cc00..aeba1bc0f 100755 --- a/pyclaragenomics/setup.py +++ b/pyclaragenomics/setup.py @@ -9,10 +9,10 @@ # distribution of this software and related documentation without an express # license agreement from NVIDIA CORPORATION is strictly prohibited. # - -import os import glob +import os from setuptools import setup, find_packages, Extension + from Cython.Build import cythonize @@ -34,9 +34,11 @@ def get_installation_requirments(file_path): try: pycga_dir = os.environ['PYCGA_DIR'] cga_install_dir = os.environ['CGA_INSTALL_DIR'] + cga_runtime_lib_dir = os.environ['CGA_RUNTIME_LIB_DIR'] except KeyError as e: raise EnvironmentError( - 'PYCGA_DIR CGA_INSTALL_DIR environment variables must be set').with_traceback(e.__traceback__) + 'PYCGA_DIR CGA_INSTALL_DIR CGA_RUNTIME_LIB_DIR \ + environment variables must be set').with_traceback(e.__traceback__) # Classifiers for PyPI pycga_classifiers = [ @@ -62,7 +64,7 @@ def get_installation_requirments(file_path): get_verified_path(os.path.join(cga_install_dir, "include")), ], library_dirs=["/usr/local/cuda/lib64", get_verified_path(os.path.join(cga_install_dir, "lib"))], - runtime_library_dirs=["/usr/local/cuda/lib64", "$ORIGIN/../shared_libs/"], + runtime_library_dirs=["/usr/local/cuda/lib64", cga_runtime_lib_dir], libraries=["cudapoa", "cudaaligner", "cudart"], language="c++", extra_compile_args=["-std=c++14"], @@ -70,7 +72,7 @@ def get_installation_requirments(file_path): ] setup(name='pyclaragenomics', - version='0.3.0', + version='0.4.0', description='NVIDIA genomics python libraries and utiliites', author='NVIDIA Corporation', url="https://github.com/clara-genomics/ClaraGenomicsAnalysis", diff --git a/pyclaragenomics/setup_pyclaragenomics.py b/pyclaragenomics/setup_pyclaragenomics.py index 4165d1f6f..67d57c6ed 100644 --- a/pyclaragenomics/setup_pyclaragenomics.py +++ b/pyclaragenomics/setup_pyclaragenomics.py @@ -103,20 +103,28 @@ def build(self): def setup_python_binding(is_develop_mode, wheel_output_folder, pycga_dir, cga_install_dir): if wheel_output_folder: + # Copies shared libraries into clargenomics package + copy_all_files_in_directory( + os.path.join(cga_install_dir, "lib"), + os.path.join(pycga_dir, "claragenomics/shared_libs/"), + ) setup_command = ['python', 'setup.py', 'bdist_wheel', '-d', wheel_output_folder] completion_message = \ "A wheel file was create for pyclaragenomics under {}".format(wheel_output_folder) + cga_runtime_lib_dir = os.path.join('$ORIGIN', os.pardir, 'shared_libs') else: setup_command = ['pip', 'install'] + (['-e'] if is_develop_mode else []) + ["."] completion_message = \ "pyclaragenomics was successfully setup in {} mode!".format( "development" if args.develop else "installation") + cga_runtime_lib_dir = os.path.join(cga_install_dir, "lib") subprocess.check_call(setup_command, env={ **os.environ, 'PYCGA_DIR': pycga_dir, - 'CGA_INSTALL_DIR': cga_install_dir + 'CGA_INSTALL_DIR': cga_install_dir, + 'CGA_RUNTIME_LIB_DIR': cga_runtime_lib_dir }, cwd=pycga_dir) print(completion_message) @@ -134,11 +142,6 @@ def setup_python_binding(is_develop_mode, wheel_output_folder, pycga_dir, cga_in cga_install_dir=cga_installation_directory, cmake_extra_args="-Dcga_build_shared=ON") cmake_proj.build() - # Copyies shared libraries into clargenomics package - copy_all_files_in_directory( - os.path.join(cga_installation_directory, "lib"), - os.path.join(current_dir, "claragenomics/shared_libs/"), - ) # Setup pyclaragenomics setup_python_binding(is_develop_mode=args.develop, wheel_output_folder=cga_build_folder if args.create_wheel_only else None, From b26e5c7260043f4ce7c0fbf224e99407afb90d4c Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Tue, 26 Nov 2019 01:30:43 +0200 Subject: [PATCH 114/128] Promote version to 0.4.0 Fixes #238 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cd171cbea..1d66e9a58 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,7 @@ cmake_minimum_required(VERSION 3.10.2) set(CGA_PROJECT_NAME ClaraGenomicsAnalysis) -set(CGA_VERSION 0.3.0) +set(CGA_VERSION 0.4.0) project(${CGA_PROJECT_NAME}) # Process options. From f1a6ab3da21b47debd70c6048572e802f314bc89 Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Tue, 26 Nov 2019 01:36:34 +0200 Subject: [PATCH 115/128] [pyclaragenomics] Fix linter style error Fixes #238 --- pyclaragenomics/setup_pyclaragenomics.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pyclaragenomics/setup_pyclaragenomics.py b/pyclaragenomics/setup_pyclaragenomics.py index 67d57c6ed..e726e2885 100644 --- a/pyclaragenomics/setup_pyclaragenomics.py +++ b/pyclaragenomics/setup_pyclaragenomics.py @@ -17,6 +17,7 @@ import shutil import glob + def get_relative_path(sub_folder_name): return os.path.join( os.path.dirname(os.path.realpath(__file__)), From 951b171d208c60aa2c1c6088487e8afda689804a Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Tue, 26 Nov 2019 02:49:12 +0200 Subject: [PATCH 116/128] [pyclaragenomics] Fix merge errors Fixes #238 --- pyclaragenomics/setup_pyclaragenomics.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pyclaragenomics/setup_pyclaragenomics.py b/pyclaragenomics/setup_pyclaragenomics.py index 930855db7..b2fcef919 100644 --- a/pyclaragenomics/setup_pyclaragenomics.py +++ b/pyclaragenomics/setup_pyclaragenomics.py @@ -102,7 +102,7 @@ def setup_python_binding(is_develop_mode, wheel_output_folder, pycga_dir, cga_in os.path.join(cga_install_dir, "lib"), os.path.join(pycga_dir, "claragenomics/shared_libs/"), ) - setup_command = ['python', 'setup.py', 'bdist_wheel', '-d', os.path.realpath(wheel_output_folder)] + setup_command = ['python', 'setup.py', 'bdist_wheel', '-d', wheel_output_folder] completion_message = \ "A wheel file was create for pyclaragenomics under {}".format(wheel_output_folder) cga_runtime_lib_dir = os.path.join('$ORIGIN', os.pardir, 'shared_libs') @@ -117,7 +117,7 @@ def setup_python_binding(is_develop_mode, wheel_output_folder, pycga_dir, cga_in env={ **os.environ, 'PYCGA_DIR': pycga_dir, - 'CGA_INSTALL_DIR': os.path.realpath(cga_install_dir), + 'CGA_INSTALL_DIR': cga_install_dir, 'CGA_RUNTIME_LIB_DIR': cga_runtime_lib_dir }, cwd=pycga_dir) @@ -136,7 +136,9 @@ def setup_python_binding(is_develop_mode, wheel_output_folder, pycga_dir, cga_in cmake_extra_args="-Dcga_build_shared=ON") cmake_proj.build() # Setup pyclaragenomics - setup_python_binding(is_develop_mode=args.develop, - wheel_output_folder=args.build_output_folder if args.create_wheel_only else None, - pycga_dir=current_dir, - cga_install_dir=cga_installation_directory) + setup_python_binding( + is_develop_mode=args.develop, + wheel_output_folder=os.path.realpath(args.build_output_folder) if args.create_wheel_only else None, + pycga_dir=current_dir, + cga_install_dir=os.path.realpath(cga_installation_directory) + ) From da6a05941cd4ce2c0c5c37060e57713370337f2d Mon Sep 17 00:00:00 2001 From: Mike Vella Date: Tue, 26 Nov 2019 12:17:28 +0000 Subject: [PATCH 117/128] paf_eval bugfix and lint * Fixed bug whereby if query and target were reversed in order overlaps were not being evaluated * Linted for PEP8 compliance * Added printing of TP,FP and FN absolute numbers --- pyclaragenomics/bin/evaluate_paf | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/pyclaragenomics/bin/evaluate_paf b/pyclaragenomics/bin/evaluate_paf index 6ecce9dcf..b0a5335e2 100755 --- a/pyclaragenomics/bin/evaluate_paf +++ b/pyclaragenomics/bin/evaluate_paf @@ -16,6 +16,7 @@ from collections import defaultdict from claragenomics.io import pafio + def match_overlaps(query_0, query_1, target_0, target_1, pos_tolerance): """Given two sets of query and target ranges, check if the query and target ranges fall within a specified tolerance of each other. @@ -33,6 +34,7 @@ def match_overlaps(query_0, query_1, target_0, target_1, pos_tolerance): abs(target_0[0] - target_1[0]) < pos_tolerance and \ abs(target_0[1] - target_1[1]) < pos_tolerance + def evaluate_paf(truth_paf_filepath, test_paf_filepath, pos_tolerance=400, skip_self_mappings=True): """Given a truth and test set PAF file, count number of in/correctly detected, and non-detected overlaps Args: @@ -53,7 +55,7 @@ def evaluate_paf(truth_paf_filepath, test_paf_filepath, pos_tolerance=400, skip_ (truth_overlap.query_sequence_name == truth_overlap.target_sequence_name): continue - key = truth_overlap.query_sequence_name + truth_overlap.target_sequence_name + key = truth_overlap.query_sequence_name + "_" + truth_overlap.target_sequence_name truth_overlaps[key].append(truth_overlap) num_true_overlaps += 1 @@ -62,6 +64,10 @@ def evaluate_paf(truth_paf_filepath, test_paf_filepath, pos_tolerance=400, skip_ false_positive_count = 0 false_negative_count = 0 + print("Counted {} true overlaps".format(num_true_overlaps)) + + seen_test_overlap_keys = set() + for test_overlap in pafio.read_paf(test_paf_filepath): if skip_self_mappings and \ (test_overlap.query_sequence_name == test_overlap.target_sequence_name): @@ -70,9 +76,15 @@ def evaluate_paf(truth_paf_filepath, test_paf_filepath, pos_tolerance=400, skip_ query_0 = (test_overlap.query_start, test_overlap.query_end) target_0 = (test_overlap.target_start, test_overlap.target_end) - key = test_overlap.query_sequence_name + test_overlap.target_sequence_name + key = test_overlap.query_sequence_name + "_" + test_overlap.target_sequence_name key_reversed = test_overlap.target_sequence_name + "_" + test_overlap.query_sequence_name + if (key in seen_test_overlap_keys) or (key_reversed in seen_test_overlap_keys): + continue + + seen_test_overlap_keys.add(key) + seen_test_overlap_keys.add(key_reversed) + found_match = False if key in truth_overlaps: for truth_overlap in truth_overlaps[key]: @@ -125,8 +137,12 @@ if __name__ == "__main__": args = parser.parse_args() - true_positives, false_positives, false_negatives = evaluate_paf(args.truth_paf, args.test_paf, \ - args.pos_tolerance, args.skip_self_mapping) + true_positives, false_positives, false_negatives = evaluate_paf(args.truth_paf, args.test_paf, + args.pos_tolerance, args.skip_self_mapping) + + print("True positives: ", true_positives) + print("False positives: ", false_positives) + print("False negatives: ", false_negatives) precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) From c1b30faa2b12a9a92779b20ba4a9d38c24bc52be Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Wed, 27 Nov 2019 00:10:48 -0500 Subject: [PATCH 118/128] [pycga] move key generation to separate function --- pyclaragenomics/bin/evaluate_paf | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/pyclaragenomics/bin/evaluate_paf b/pyclaragenomics/bin/evaluate_paf index b0a5335e2..acf9a7b42 100755 --- a/pyclaragenomics/bin/evaluate_paf +++ b/pyclaragenomics/bin/evaluate_paf @@ -34,6 +34,16 @@ def match_overlaps(query_0, query_1, target_0, target_1, pos_tolerance): abs(target_0[0] - target_1[0]) < pos_tolerance and \ abs(target_0[1] - target_1[1]) < pos_tolerance +def generate_key(name_1, name_2): + """Given two read names, return key for indexing overlaps. + + Args: + name_1 (str) : Name of first read + name_2 (str): Name of second read + + Returns: a key of concatenated names. + """ + return "{}_{}".format(name_1, name_2) def evaluate_paf(truth_paf_filepath, test_paf_filepath, pos_tolerance=400, skip_self_mappings=True): """Given a truth and test set PAF file, count number of in/correctly detected, and non-detected overlaps @@ -55,7 +65,7 @@ def evaluate_paf(truth_paf_filepath, test_paf_filepath, pos_tolerance=400, skip_ (truth_overlap.query_sequence_name == truth_overlap.target_sequence_name): continue - key = truth_overlap.query_sequence_name + "_" + truth_overlap.target_sequence_name + key = generate_key(truth_overlap.query_sequence_name, truth_overlap.target_sequence_name) truth_overlaps[key].append(truth_overlap) num_true_overlaps += 1 @@ -76,8 +86,8 @@ def evaluate_paf(truth_paf_filepath, test_paf_filepath, pos_tolerance=400, skip_ query_0 = (test_overlap.query_start, test_overlap.query_end) target_0 = (test_overlap.target_start, test_overlap.target_end) - key = test_overlap.query_sequence_name + "_" + test_overlap.target_sequence_name - key_reversed = test_overlap.target_sequence_name + "_" + test_overlap.query_sequence_name + key = generate_key(test_overlap.query_sequence_name, test_overlap.target_sequence_name) + key_reversed = generate_key(test_overlap.target_sequence_name, test_overlap.query_sequence_name) if (key in seen_test_overlap_keys) or (key_reversed in seen_test_overlap_keys): continue From 74c300af6edf5b54304c59890073f4c473fe2954 Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Wed, 27 Nov 2019 16:34:36 +0100 Subject: [PATCH 119/128] Revert "[cudaaligner] Hirschberg+Myers performance through sleep" This reverts commit 8545a44732bc59b0e095a042efcdb62ec3a165e3. --- cudaaligner/src/hirschberg_myers_gpu.cu | 9 --------- 1 file changed, 9 deletions(-) diff --git a/cudaaligner/src/hirschberg_myers_gpu.cu b/cudaaligner/src/hirschberg_myers_gpu.cu index 8bd64677f..a87b3cac5 100644 --- a/cudaaligner/src/hirschberg_myers_gpu.cu +++ b/cudaaligner/src/hirschberg_myers_gpu.cu @@ -580,9 +580,6 @@ public: } __syncwarp(); release_mutex(); -#if __CUDA_ARCH__ >= 700 - asm("nanosleep.u32 100000;"); -#endif } __syncwarp(); release_mutex(); @@ -605,9 +602,6 @@ private: { while (0 != atomicCAS(&(data_->mutex_), 0, 1)) { -#if __CUDA_ARCH__ >= 700 - asm("nanosleep.u32 10000;"); -#endif }; } __threadfence_block(); @@ -620,9 +614,6 @@ private: atomicOr(&(data_->mutex_), 0x0000'0002u); // reserve mutex while (2 != atomicCAS(&(data_->mutex_), 2, 1)) { -#if __CUDA_ARCH__ >= 700 - asm("nanosleep.u32 1000;"); -#endif atomicOr(&(data_->mutex_), 0x0000'0002u); // reserve mutex }; } From 5753034b11ce6df5756459dcb6879db190cbe1e1 Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Wed, 27 Nov 2019 16:35:34 +0100 Subject: [PATCH 120/128] Revert "[cudaaligner] Added a high-priority lock for the mutex to prevent starvation" This reverts commit 97eb976d67cdffcebaddc4d337574d89f869738f. --- cudaaligner/src/hirschberg_myers_gpu.cu | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/cudaaligner/src/hirschberg_myers_gpu.cu b/cudaaligner/src/hirschberg_myers_gpu.cu index a87b3cac5..0260f163c 100644 --- a/cudaaligner/src/hirschberg_myers_gpu.cu +++ b/cudaaligner/src/hirschberg_myers_gpu.cu @@ -488,7 +488,7 @@ __device__ void hirschberg_myers_single_char_warp(int8_t* path, char query_char, template struct parallel_warp_shared_stack_state { - uint32_t mutex_; + int32_t mutex_; int32_t active_warps_; T* buffer_begin_; T* cur_end_; @@ -521,7 +521,7 @@ public: __device__ bool inline push(T const& t) { - lock_mutex_high_priority(); + lock_mutex(); bool success = false; __syncwarp(); @@ -592,7 +592,7 @@ private: __threadfence_block(); if (threadIdx.x % warp_size == 0) { - atomicAnd(&(data_->mutex_), 0x0000'0002u); + atomicExch(&(data_->mutex_), 0); } }; @@ -607,19 +607,6 @@ private: __threadfence_block(); } - __device__ inline void lock_mutex_high_priority() const - { - if (threadIdx.x % warp_size == 0) - { - atomicOr(&(data_->mutex_), 0x0000'0002u); // reserve mutex - while (2 != atomicCAS(&(data_->mutex_), 2, 1)) - { - atomicOr(&(data_->mutex_), 0x0000'0002u); // reserve mutex - }; - } - __threadfence_block(); - } - parallel_warp_shared_stack_state* const data_; }; From 15510b687435c15e6a0bb9e59a188c994573f291 Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Wed, 27 Nov 2019 17:26:51 +0100 Subject: [PATCH 121/128] Revert "[cudaaligner] Hirschberg+Myers use multiple warps per alignment" This reverts commit ea5cb4e695297479fcffb5f7dce544f11d4a5f68. --- .../src/aligner_global_hirschberg_myers.cpp | 13 +- cudaaligner/src/hirschberg_myers_gpu.cu | 245 +++++------------- cudaaligner/src/hirschberg_myers_gpu.cuh | 1 - 3 files changed, 76 insertions(+), 183 deletions(-) diff --git a/cudaaligner/src/aligner_global_hirschberg_myers.cpp b/cudaaligner/src/aligner_global_hirschberg_myers.cpp index 814aed478..25ab2c8ec 100644 --- a/cudaaligner/src/aligner_global_hirschberg_myers.cpp +++ b/cudaaligner/src/aligner_global_hirschberg_myers.cpp @@ -21,16 +21,15 @@ namespace cudaaligner { static constexpr int32_t hirschberg_myers_stackbuffer_size = 64; -static constexpr int32_t hirschberg_myers_warps_per_alignment = 4; static constexpr int32_t hirschberg_myers_switch_to_myers_size = 63; // ideally a value 16*n-1, since memory allocation will require one more element. struct AlignerGlobalHirschbergMyers::Workspace { - Workspace(int32_t max_alignments, int32_t max_n_words, int32_t max_target_length, int32_t warps_per_alignment, int32_t switch_to_myers_size, cudaStream_t stream) + Workspace(int32_t max_alignments, int32_t max_n_words, int32_t max_target_length, int32_t switch_to_myers_size, cudaStream_t stream) : stackbuffer(max_alignments * hirschberg_myers_stackbuffer_size) - , pvs(max_alignments * warps_per_alignment, max_n_words * (switch_to_myers_size + 1), stream) - , mvs(max_alignments * warps_per_alignment, max_n_words * (switch_to_myers_size + 1), stream) - , scores(max_alignments * warps_per_alignment, std::max(max_n_words * (switch_to_myers_size + 1), (max_target_length + 1) * 2), stream) + , pvs(max_alignments, max_n_words * (switch_to_myers_size + 1), stream) + , mvs(max_alignments, max_n_words * (switch_to_myers_size + 1), stream) + , scores(max_alignments, std::max(max_n_words * (switch_to_myers_size + 1), (max_target_length + 1) * 2), stream) , query_patterns(max_alignments, max_n_words * 8, stream) { assert(switch_to_myers_size >= 1); @@ -46,7 +45,7 @@ AlignerGlobalHirschbergMyers::AlignerGlobalHirschbergMyers(int32_t max_query_len : AlignerGlobal(max_query_length, max_target_length, max_alignments, stream, device_id) { scoped_device_switch dev(device_id); - workspace_ = std::make_unique(max_alignments, ceiling_divide(max_query_length, sizeof(hirschbergmyers::WordType)), max_target_length, hirschberg_myers_warps_per_alignment, hirschberg_myers_switch_to_myers_size, stream); + workspace_ = std::make_unique(max_alignments, ceiling_divide(max_query_length, sizeof(hirschbergmyers::WordType)), max_target_length, hirschberg_myers_switch_to_myers_size, stream); } AlignerGlobalHirschbergMyers::~AlignerGlobalHirschbergMyers() @@ -62,7 +61,7 @@ void AlignerGlobalHirschbergMyers::run_alignment(int8_t* results_d, int32_t* res hirschberg_myers_gpu(workspace_->stackbuffer, hirschberg_myers_stackbuffer_size, results_d, result_lengths_d, max_result_length, sequences_d, sequence_lengths_d, max_sequence_length, num_alignments, workspace_->pvs, workspace_->mvs, workspace_->scores, workspace_->query_patterns, - hirschberg_myers_switch_to_myers_size, hirschberg_myers_warps_per_alignment, + hirschberg_myers_switch_to_myers_size, stream); } diff --git a/cudaaligner/src/hirschberg_myers_gpu.cu b/cudaaligner/src/hirschberg_myers_gpu.cu index 0260f163c..2a99c9f00 100644 --- a/cudaaligner/src/hirschberg_myers_gpu.cu +++ b/cudaaligner/src/hirschberg_myers_gpu.cu @@ -169,7 +169,7 @@ __device__ int32_t append_myers_backtrace(int8_t* path, device_matrix_view& query_pattern, char const* query, int32_t query_size) { - const int32_t n_words = ceiling_divide(query_size, word_size); - int32_t idx = threadIdx.y * blockDim.x + threadIdx.x; - const int32_t inc = blockDim.x * blockDim.y; - while (idx < n_words) + const int32_t n_words = ceiling_divide(query_size, word_size); + for (int32_t idx = threadIdx.x; idx < n_words; idx += warp_size) { // TODO query load is inefficient query_pattern(idx, 0) = myers_generate_query_pattern('A', query, query_size, idx * word_size); @@ -228,7 +227,6 @@ __device__ void myers_preprocess(device_matrix_view& query_pattern, ch query_pattern(idx, 5) = myers_generate_query_pattern_reverse('C', query, query_size, idx * word_size); query_pattern(idx, 6) = myers_generate_query_pattern_reverse('T', query, query_size, idx * word_size); query_pattern(idx, 7) = myers_generate_query_pattern_reverse('G', query, query_size, idx * word_size); - idx += inc; } } @@ -357,7 +355,8 @@ myers_compute_scores( } __device__ void hirschberg_myers_compute_path( - int8_t* path, + int8_t*& path, + int32_t* path_length, batched_device_matrices::device_interface* pvi, batched_device_matrices::device_interface* mvi, batched_device_matrices::device_interface* scorei, @@ -371,14 +370,16 @@ __device__ void hirschberg_myers_compute_path( { assert(query_begin < query_end); const int32_t n_words = ceiling_divide(query_end - query_begin, word_size); - device_matrix_view score = scorei->get_matrix_view(blockDim.y * alignment_idx + threadIdx.y, n_words, target_end - target_begin + 1); - device_matrix_view pv = pvi->get_matrix_view(blockDim.y * alignment_idx + threadIdx.y, n_words, target_end - target_begin + 1); - device_matrix_view mv = mvi->get_matrix_view(blockDim.y * alignment_idx + threadIdx.y, n_words, target_end - target_begin + 1); + device_matrix_view score = scorei->get_matrix_view(alignment_idx, n_words, target_end - target_begin + 1); + device_matrix_view pv = pvi->get_matrix_view(alignment_idx, n_words, target_end - target_begin + 1); + device_matrix_view mv = mvi->get_matrix_view(alignment_idx, n_words, target_end - target_begin + 1); myers_compute_scores(pv, mv, score, query_patterns, target_begin, target_end, query_begin, query_end, query_begin - query_begin_absolute, true, false); __syncwarp(); if (threadIdx.x == 0) { - append_myers_backtrace(path, pv, mv, score, query_end - query_begin); + int32_t len = append_myers_backtrace(path, pv, mv, score, query_end - query_begin); + path += len; + *path_length += len; } } @@ -400,13 +401,13 @@ __device__ const char* hirschberg_myers_compute_target_mid_warp( assert(query_mid < query_end); assert(target_begin < target_end); - device_matrix_view score = scorei->get_matrix_view(blockDim.y * alignment_idx + threadIdx.y, target_end - target_begin + 1, 2); + device_matrix_view score = scorei->get_matrix_view(alignment_idx, target_end - target_begin + 1, 2); if (query_begin < query_mid) { const int32_t n_words = ceiling_divide(query_mid - query_begin, word_size); - device_matrix_view pv = pvi->get_matrix_view(blockDim.y * alignment_idx + threadIdx.y, n_words, 1); - device_matrix_view mv = mvi->get_matrix_view(blockDim.y * alignment_idx + threadIdx.y, n_words, 1); + device_matrix_view pv = pvi->get_matrix_view(alignment_idx, n_words, 2); + device_matrix_view mv = mvi->get_matrix_view(alignment_idx, n_words, 2); myers_compute_scores(pv, mv, score, query_patterns, target_begin, target_end, query_begin, query_mid, query_begin - query_begin_absolute, false, false); } else @@ -421,8 +422,8 @@ __device__ const char* hirschberg_myers_compute_target_mid_warp( { const int32_t n_words = ceiling_divide(query_end - query_mid, word_size); - device_matrix_view pv = pvi->get_matrix_view(blockDim.y * alignment_idx + threadIdx.y, n_words, 1); - device_matrix_view mv = mvi->get_matrix_view(blockDim.y * alignment_idx + threadIdx.y, n_words, 1); + device_matrix_view pv = pvi->get_matrix_view(alignment_idx, n_words, 2); + device_matrix_view mv = mvi->get_matrix_view(alignment_idx, n_words, 2); myers_compute_scores(pv, mv, score, query_patterns, target_begin, target_end, query_mid, query_end, query_end_absolute - query_end, false, true); } @@ -453,7 +454,7 @@ __device__ const char* hirschberg_myers_compute_target_mid_warp( return target_begin + midpoint; } -__device__ void hirschberg_myers_single_char_warp(int8_t* path, char query_char, char const* target_begin, char const* target_end) +__device__ void hirschberg_myers_single_char_warp(int8_t*& path, int32_t* path_length, char query_char, char const* target_begin, char const* target_end) { // TODO parallelize if (threadIdx.x == 0) @@ -482,135 +483,68 @@ __device__ void hirschberg_myers_single_char_warp(int8_t* path, char query_char, ++path; --t; } + *path_length += target_end - target_begin; } } template -struct parallel_warp_shared_stack_state -{ - int32_t mutex_; - int32_t active_warps_; - T* buffer_begin_; - T* cur_end_; - T* buffer_end_; -}; - -template -class parallel_warp_shared_stack +class warp_shared_stack { public: - __device__ parallel_warp_shared_stack(parallel_warp_shared_stack_state* data, T* buffer_begin, T* buffer_end) - : data_(data) + __device__ warp_shared_stack(T* buffer_begin, T* buffer_end) + : buffer_begin_(buffer_begin), cur_end_(buffer_begin), buffer_end_(buffer_end) { - assert(buffer_begin < buffer_end); - if (threadIdx.x == 0 && threadIdx.y == 0 && threadIdx.z == 0) - { - data_->mutex_ = 0; - data_->active_warps_ = 0; - data_->buffer_begin_ = buffer_begin; - data_->cur_end_ = buffer_begin; - data_->buffer_end_ = buffer_end; - } - __syncthreads(); - if (threadIdx.x % warp_size == 0) - { - atomicAdd(&data_->active_warps_, 1); - } - __syncthreads(); + assert(buffer_begin_ < buffer_end_); } - __device__ bool inline push(T const& t) + __device__ bool inline push(T const& t, unsigned warp_mask = 0xffff'ffff) { - lock_mutex(); - bool success = false; - __syncwarp(); - - if (data_->buffer_end_ - data_->cur_end_ >= 1) + if (buffer_end_ - cur_end_ >= 1) { - __syncwarp(); - if (threadIdx.x % warp_size == 0) + __syncwarp(warp_mask); + if (threadIdx.x == 0) { - *(data_->cur_end_) = t; - ++(data_->cur_end_); + *cur_end_ = t; } - success = true; + __syncwarp(warp_mask); + ++cur_end_; + return true; } else { - if (threadIdx.x % warp_size == 0) + if (threadIdx.x == 0) { printf("ERROR: stack full!"); } + return false; } - __syncwarp(); - release_mutex(); - return success; } - __device__ inline bool pop(T& element) + __device__ inline void pop() { - lock_mutex(); - if (threadIdx.x % warp_size == 0) - { - assert(data_->active_warps_ > 0); - --(data_->active_warps_); - } - release_mutex(); - bool result = true; - while (1) - { - lock_mutex(); - __syncwarp(); - if (data_->buffer_begin_ < data_->cur_end_) - { - element = *(data_->cur_end_ - 1); - __syncwarp(); - if (threadIdx.x % warp_size == 0) - { - --(data_->cur_end_); - ++(data_->active_warps_); - } - break; - } - else if (data_->active_warps_ <= 0) - { - assert(data_->buffer_begin_ == data_->cur_end_); - result = false; - break; - } - __syncwarp(); - release_mutex(); - } - __syncwarp(); - release_mutex(); - return result; + assert(cur_end_ > buffer_begin_); + if (cur_end_ - 1 >= buffer_begin_) + --cur_end_; } -private: - __device__ inline void release_mutex() + __device__ inline T back() const { - __threadfence_block(); - if (threadIdx.x % warp_size == 0) - { - atomicExch(&(data_->mutex_), 0); - } - }; + assert(cur_end_ - 1 >= buffer_begin_); + return *(cur_end_ - 1); + } - __device__ inline void lock_mutex() + __device__ inline bool empty() const { - if (threadIdx.x % warp_size == 0) - { - while (0 != atomicCAS(&(data_->mutex_), 0, 1)) - { - }; - } - __threadfence_block(); + return buffer_begin_ == cur_end_; } - parallel_warp_shared_stack_state* const data_; +private: + T* buffer_begin_; + T* cur_end_; + T* buffer_end_; }; -__device__ bool hirschberg_myers( +__device__ void hirschberg_myers( query_target_range* stack_buffer_begin, query_target_range* stack_buffer_end, int8_t*& path, @@ -631,33 +565,31 @@ __device__ bool hirschberg_myers( assert(query_begin_absolute <= query_end_absolute); assert(target_begin_absolute <= target_end_absolute); - __shared__ parallel_warp_shared_stack_state stack_data; - parallel_warp_shared_stack stack(&stack_data, stack_buffer_begin, stack_buffer_end); - - if (threadIdx.y == 0) - stack.push({query_begin_absolute, query_end_absolute, target_begin_absolute, target_end_absolute}); + warp_shared_stack stack(stack_buffer_begin, stack_buffer_end); + stack.push({query_begin_absolute, query_end_absolute, target_begin_absolute, target_end_absolute}); assert(pvi->get_max_elements_per_matrix() == mvi->get_max_elements_per_matrix()); assert(scorei->get_max_elements_per_matrix() >= pvi->get_max_elements_per_matrix()); - bool success = true; - query_target_range e; - while (success && stack.pop(e)) + bool success = true; + int32_t length = 0; + while (success && !stack.empty()) { + query_target_range e = stack.back(); + stack.pop(); assert(e.query_begin <= e.query_end); assert(e.target_begin <= e.target_end); - int32_t path_pos = e.query_begin - query_begin_absolute + e.target_begin - target_begin_absolute; if (e.target_begin == e.target_end) { - hirschberg_myers_fill_path_warp(path + path_pos, e.query_end - e.query_begin, static_cast(AlignmentState::deletion)); + hirschberg_myers_fill_path_warp(path, &length, e.query_end - e.query_begin, static_cast(AlignmentState::deletion)); } else if (e.query_begin == e.query_end) { - hirschberg_myers_fill_path_warp(path + path_pos, e.target_end - e.target_begin, static_cast(AlignmentState::insertion)); + hirschberg_myers_fill_path_warp(path, &length, e.target_end - e.target_begin, static_cast(AlignmentState::insertion)); } else if (e.query_begin + 1 == e.query_end) { - hirschberg_myers_single_char_warp(path + path_pos, *e.query_begin, e.target_begin, e.target_end); + hirschberg_myers_single_char_warp(path, &length, *e.query_begin, e.target_begin, e.target_end); } else { @@ -666,7 +598,7 @@ __device__ bool hirschberg_myers( const int32_t n_words = ceiling_divide(e.query_end - e.query_begin, word_size); if ((e.target_end - e.target_begin + 1) * n_words <= pvi->get_max_elements_per_matrix()) { - hirschberg_myers_compute_path(path + path_pos, pvi, mvi, scorei, query_patterns, e.target_begin, e.target_end, e.query_begin, e.query_end, query_begin_absolute, alignment_idx); + hirschberg_myers_compute_path(path, &length, pvi, mvi, scorei, query_patterns, e.target_begin, e.target_end, e.query_begin, e.query_end, query_begin_absolute, alignment_idx); continue; } } @@ -677,36 +609,10 @@ __device__ bool hirschberg_myers( success = success && stack.push({query_mid, e.query_end, target_mid, e.target_end}); } } - __syncthreads(); - return success; -} - -__device__ void initialize_path(int8_t* path, int32_t max_path_length) -{ - int i = blockDim.x * threadIdx.y + threadIdx.x; - while (i < max_path_length) - { - path[i] = static_cast(-1); - i += blockDim.x * blockDim.y; - } -} - -__device__ void compactify_path(int8_t* path, int32_t* path_length, int32_t max_path_length) -{ - if (threadIdx.x == 0 && threadIdx.y == 0) - { - int pos = 0; - for (int i = 0; i < max_path_length; ++i) - { - int8_t p = path[i]; - if (p >= 0) - { - path[pos] = p; - ++pos; - } - } - *path_length = pos; - } + if (!success) + length = 0; + if (threadIdx.x == 0) + *path_length = length; } __global__ void hirschberg_myers_compute_alignment( @@ -731,25 +637,15 @@ __global__ void hirschberg_myers_compute_alignment( if (alignment_idx >= n_alignments) return; - const char* const query_begin = sequences_d + 2 * alignment_idx * max_sequence_length; - const char* const target_begin = sequences_d + (2 * alignment_idx + 1) * max_sequence_length; - const char* const query_end = query_begin + sequence_lengths_d[2 * alignment_idx]; - const char* const target_end = target_begin + sequence_lengths_d[2 * alignment_idx + 1]; - int8_t* path = paths_base + alignment_idx * max_path_length; - initialize_path(path, max_path_length); + const char* const query_begin = sequences_d + 2 * alignment_idx * max_sequence_length; + const char* const target_begin = sequences_d + (2 * alignment_idx + 1) * max_sequence_length; + const char* const query_end = query_begin + sequence_lengths_d[2 * alignment_idx]; + const char* const target_end = target_begin + sequence_lengths_d[2 * alignment_idx + 1]; + int8_t* path = paths_base + alignment_idx * max_path_length; query_target_range* stack_buffer_begin = stack_buffer_base + alignment_idx * stack_buffer_size_per_alignment; device_matrix_view query_patterns = query_patternsi->get_matrix_view(alignment_idx, ceiling_divide(query_end - query_begin, word_size), 8); myers_preprocess(query_patterns, query_begin, query_end - query_begin); - bool success = hirschberg_myers(stack_buffer_begin, stack_buffer_begin + stack_buffer_size_per_alignment, path, path_lengths + alignment_idx, full_myers_threshold, pvi, mvi, scorei, query_patterns, target_begin, target_end, query_begin, query_end, alignment_idx); - if (success) - { - compactify_path(path, path_lengths + alignment_idx, max_path_length); - } - else - { - if (threadIdx.x == 0 && threadIdx.y == 0) - *(path_lengths + alignment_idx) = 0; - } + hirschberg_myers(stack_buffer_begin, stack_buffer_begin + stack_buffer_size_per_alignment, path, path_lengths + alignment_idx, full_myers_threshold, pvi, mvi, scorei, query_patterns, target_begin, target_end, query_begin, query_end, alignment_idx); } } // namespace hirschbergmyers @@ -766,12 +662,11 @@ void hirschberg_myers_gpu(device_buffer& st batched_device_matrices& score, batched_device_matrices& query_patterns, int32_t switch_to_myers_threshold, - int32_t warps_per_alignment, cudaStream_t stream) { using hirschbergmyers::warp_size; - const dim3 threads(warp_size, warps_per_alignment, 1); + const dim3 threads(warp_size, 1, 1); const dim3 blocks(1, 1, ceiling_divide(n_alignments, threads.z)); hirschbergmyers::hirschberg_myers_compute_alignment<<>>(stack_buffer.data(), stack_buffer_size_per_alignment, switch_to_myers_threshold, paths_d, path_lengths_d, max_path_length, pv.get_device_interface(), mv.get_device_interface(), score.get_device_interface(), query_patterns.get_device_interface(), sequences_d, sequence_lengths_d, max_sequence_length, n_alignments); } diff --git a/cudaaligner/src/hirschberg_myers_gpu.cuh b/cudaaligner/src/hirschberg_myers_gpu.cuh index 11b12f2f3..1038d997f 100644 --- a/cudaaligner/src/hirschberg_myers_gpu.cuh +++ b/cudaaligner/src/hirschberg_myers_gpu.cuh @@ -44,7 +44,6 @@ void hirschberg_myers_gpu(device_buffer& st batched_device_matrices& score, batched_device_matrices& query_patterns, int32_t switch_to_myers_threshold, - int32_t warps_per_alignment, cudaStream_t stream); } // end namespace cudaaligner From 9c4d75099f97f58c42cd50056db9beb81d225b05 Mon Sep 17 00:00:00 2001 From: Andreas Hehn Date: Wed, 27 Nov 2019 18:49:12 +0100 Subject: [PATCH 122/128] [cudaaligner] formatting fixes --- cudaaligner/src/hirschberg_myers_gpu.cu | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cudaaligner/src/hirschberg_myers_gpu.cu b/cudaaligner/src/hirschberg_myers_gpu.cu index 2a99c9f00..a307dc539 100644 --- a/cudaaligner/src/hirschberg_myers_gpu.cu +++ b/cudaaligner/src/hirschberg_myers_gpu.cu @@ -215,7 +215,7 @@ __device__ WordType myers_generate_query_pattern_reverse(char x, char const* que __device__ void myers_preprocess(device_matrix_view& query_pattern, char const* query, int32_t query_size) { - const int32_t n_words = ceiling_divide(query_size, word_size); + const int32_t n_words = ceiling_divide(query_size, word_size); for (int32_t idx = threadIdx.x; idx < n_words; idx += warp_size) { // TODO query load is inefficient @@ -492,7 +492,9 @@ class warp_shared_stack { public: __device__ warp_shared_stack(T* buffer_begin, T* buffer_end) - : buffer_begin_(buffer_begin), cur_end_(buffer_begin), buffer_end_(buffer_end) + : buffer_begin_(buffer_begin) + , cur_end_(buffer_begin) + , buffer_end_(buffer_end) { assert(buffer_begin_ < buffer_end_); } From fc7e1710a5590646e6de4e608610d38ff68c5e79 Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Tue, 26 Nov 2019 18:35:46 +0200 Subject: [PATCH 123/128] [pyclaragenomics] Add CI testing for wheel package - fix Joyjit's review comments Fixes #237 [pyclaragenomics] Fix pip install command for CI Fixes #237 [pyclaragenomics] Fix style error Fixes #237 --- ci/common/test-pyclaragenomics.sh | 26 ++++++++++++++++-------- pyclaragenomics/setup_pyclaragenomics.py | 5 ++--- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/ci/common/test-pyclaragenomics.sh b/ci/common/test-pyclaragenomics.sh index 3bfedeb31..d45a3d491 100644 --- a/ci/common/test-pyclaragenomics.sh +++ b/ci/common/test-pyclaragenomics.sh @@ -14,17 +14,27 @@ ###################################### set -e +run_tests() { + cd test/ + if [ "${TEST_ON_GPU}" == '1' ]; then + python -m pytest -m gpu -s + else + python -m pytest -m cpu -s + fi +} + PYCLARAGENOMICS_DIR=$1 cd $PYCLARAGENOMICS_DIR -#Install external dependencies. +# Install external dependencies. python -m pip install -r requirements.txt python setup_pyclaragenomics.py --build_output_folder cga_build +run_tests -# Run tests. -cd test/ -if [ "${TEST_ON_GPU}" == '1' ]; then - python -m pytest -m gpu -s -else - python -m pytest -m cpu -s -fi +cd $PYCLARAGENOMICS_DIR +# Uninstall pyclaragenomics +pip uninstall -y pyclaragenomics +# Test wheel package creation +python setup_pyclaragenomics.py --build_output_folder cga_build_wheel --create_wheel_only +yes | pip install $PYCLARAGENOMICS_DIR/cga_build_wheel/pyclaragenomics-*.whl +run_tests diff --git a/pyclaragenomics/setup_pyclaragenomics.py b/pyclaragenomics/setup_pyclaragenomics.py index b2fcef919..657d0cba4 100644 --- a/pyclaragenomics/setup_pyclaragenomics.py +++ b/pyclaragenomics/setup_pyclaragenomics.py @@ -11,11 +11,10 @@ # import argparse -import os.path +import glob import os -import subprocess import shutil -import glob +import subprocess def copy_all_files_in_directory(src, dest, file_ext="*.so"): From 5bebd4d7e82fa36b2849837b7ff163edf89d1057 Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Wed, 27 Nov 2019 03:34:40 +0200 Subject: [PATCH 124/128] [pyclaragenomics] Use data_files instead of package_data - Move shared libraries copying to setup.py Fixes #240 --- pyclaragenomics/setup.py | 51 +++++++++++++++++------- pyclaragenomics/setup_pyclaragenomics.py | 26 +----------- 2 files changed, 38 insertions(+), 39 deletions(-) diff --git a/pyclaragenomics/setup.py b/pyclaragenomics/setup.py index aeba1bc0f..254577789 100755 --- a/pyclaragenomics/setup.py +++ b/pyclaragenomics/setup.py @@ -9,14 +9,16 @@ # distribution of this software and related documentation without an express # license agreement from NVIDIA CORPORATION is strictly prohibited. # + import glob import os +import shutil from setuptools import setup, find_packages, Extension from Cython.Build import cythonize -def get_verified_path(path): +def get_verified_absolute_path(path): installed_path = os.path.abspath(path) if not os.path.exists(installed_path): raise RuntimeError("No valid path for requested component exists") @@ -30,15 +32,33 @@ def get_installation_requirments(file_path): return requirements_file_content +def copy_all_files_in_directory(src, dest, file_ext="*.so"): + files_to_copy = glob.glob(os.path.join(src, file_ext)) + if not files_to_copy: + raise RuntimeError("No {} files under {}".format(src, file_ext)) + os.makedirs(os.path.dirname(dest), exist_ok=True) + try: + for file in files_to_copy: + shutil.copy(file, dest) + print("{} was copied into {}".format(file, dest)) + except (shutil.Error, PermissionError) as err: + print('Could not copy {}. Error: {}'.format(file, err)) + raise err + + # Must be set before calling pip try: pycga_dir = os.environ['PYCGA_DIR'] cga_install_dir = os.environ['CGA_INSTALL_DIR'] - cga_runtime_lib_dir = os.environ['CGA_RUNTIME_LIB_DIR'] except KeyError as e: raise EnvironmentError( - 'PYCGA_DIR CGA_INSTALL_DIR CGA_RUNTIME_LIB_DIR \ - environment variables must be set').with_traceback(e.__traceback__) + 'PYCGA_DIR CGA_INSTALL_DIR environment variables must be set').with_traceback(e.__traceback__) + +# Copies shared libraries into clargenomics package +copy_all_files_in_directory( + get_verified_absolute_path(os.path.join(cga_install_dir, "lib")), + os.path.join(pycga_dir, "claragenomics/shared_libs/"), +) # Classifiers for PyPI pycga_classifiers = [ @@ -58,13 +78,13 @@ def get_installation_requirments(file_path): extensions = [ Extension( "*", - sources=[os.path.join(pycga_dir, "claragenomics/**/*.pyx")], + sources=[os.path.join("claragenomics/**/*.pyx")], include_dirs=[ "/usr/local/cuda/include", - get_verified_path(os.path.join(cga_install_dir, "include")), + get_verified_absolute_path(os.path.join(cga_install_dir, "include")), ], - library_dirs=["/usr/local/cuda/lib64", get_verified_path(os.path.join(cga_install_dir, "lib"))], - runtime_library_dirs=["/usr/local/cuda/lib64", cga_runtime_lib_dir], + library_dirs=["/usr/local/cuda/lib64", get_verified_absolute_path(os.path.join(cga_install_dir, "lib"))], + runtime_library_dirs=["/usr/local/cuda/lib64", os.path.join('$ORIGIN', os.pardir, 'shared_libs')], libraries=["cudapoa", "cudaaligner", "cudart"], language="c++", extra_compile_args=["-std=c++14"], @@ -76,16 +96,19 @@ def get_installation_requirments(file_path): description='NVIDIA genomics python libraries and utiliites', author='NVIDIA Corporation', url="https://github.com/clara-genomics/ClaraGenomicsAnalysis", - package_data={ - 'claragenomics': glob.glob(os.path.join(pycga_dir, 'claragenomics/shared_libs/*.so')) - }, - install_requires=get_installation_requirments(os.path.join(pycga_dir, 'requirements.txt')), + include_package_data=True, + data_files=[ + ('claragenomics', glob.glob('claragenomics/shared_libs/*.so')) + ], + install_requires=get_installation_requirments( + get_verified_absolute_path(os.path.join(pycga_dir, 'requirements.txt')) + ), packages=find_packages(where=pycga_dir), python_requires='>=3.6', license='Apache License 2.0', long_description='Python libraries and utilities for manipulating genomics data', classifiers=pycga_classifiers, ext_modules=cythonize(extensions, compiler_directives={'embedsignature': True}), - scripts=[get_verified_path(os.path.join(pycga_dir, 'bin', 'genome_simulator')), - get_verified_path(os.path.join(pycga_dir, 'bin', 'assembly_evaluator'))], + scripts=[os.path.join('bin', 'genome_simulator'), + os.path.join('bin', 'assembly_evaluator')], ) diff --git a/pyclaragenomics/setup_pyclaragenomics.py b/pyclaragenomics/setup_pyclaragenomics.py index 657d0cba4..277843a19 100644 --- a/pyclaragenomics/setup_pyclaragenomics.py +++ b/pyclaragenomics/setup_pyclaragenomics.py @@ -11,26 +11,10 @@ # import argparse -import glob import os -import shutil import subprocess -def copy_all_files_in_directory(src, dest, file_ext="*.so"): - files_to_copy = glob.glob(os.path.join(src, file_ext)) - if not files_to_copy: - raise RuntimeError("No {} files under {}".format(src, file_ext)) - os.makedirs(os.path.dirname(dest), exist_ok=True) - try: - for file in files_to_copy: - shutil.copy(file, dest) - print("{} was copied into {}".format(file, dest)) - except (shutil.Error, PermissionError) as err: - print('Could not copy {}. Error: {}'.format(file, err)) - raise err - - def parse_arguments(): parser = argparse.ArgumentParser(description='build & install Clara Genomics Analysis SDK.') parser.add_argument('--build_output_folder', @@ -96,28 +80,20 @@ def build(self): def setup_python_binding(is_develop_mode, wheel_output_folder, pycga_dir, cga_install_dir): if wheel_output_folder: - # Copies shared libraries into clargenomics package - copy_all_files_in_directory( - os.path.join(cga_install_dir, "lib"), - os.path.join(pycga_dir, "claragenomics/shared_libs/"), - ) setup_command = ['python', 'setup.py', 'bdist_wheel', '-d', wheel_output_folder] completion_message = \ "A wheel file was create for pyclaragenomics under {}".format(wheel_output_folder) - cga_runtime_lib_dir = os.path.join('$ORIGIN', os.pardir, 'shared_libs') else: setup_command = ['pip', 'install'] + (['-e'] if is_develop_mode else []) + ["."] completion_message = \ "pyclaragenomics was successfully setup in {} mode!".format( "development" if args.develop else "installation") - cga_runtime_lib_dir = os.path.join(cga_install_dir, "lib") subprocess.check_call(setup_command, env={ **os.environ, 'PYCGA_DIR': pycga_dir, - 'CGA_INSTALL_DIR': cga_install_dir, - 'CGA_RUNTIME_LIB_DIR': cga_runtime_lib_dir + 'CGA_INSTALL_DIR': cga_install_dir }, cwd=pycga_dir) print(completion_message) From 0239352600d9fcf16436fac91a5e6a37cc29fbc9 Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Wed, 27 Nov 2019 05:07:47 +0200 Subject: [PATCH 125/128] [pyclaragenomics] remove pycga from being an env variable to setup.py Fixes #240 --- pyclaragenomics/setup.py | 13 ++++++++----- pyclaragenomics/setup_pyclaragenomics.py | 1 - 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pyclaragenomics/setup.py b/pyclaragenomics/setup.py index 254577789..6790433ef 100755 --- a/pyclaragenomics/setup.py +++ b/pyclaragenomics/setup.py @@ -48,16 +48,19 @@ def copy_all_files_in_directory(src, dest, file_ext="*.so"): # Must be set before calling pip try: - pycga_dir = os.environ['PYCGA_DIR'] cga_install_dir = os.environ['CGA_INSTALL_DIR'] except KeyError as e: raise EnvironmentError( - 'PYCGA_DIR CGA_INSTALL_DIR environment variables must be set').with_traceback(e.__traceback__) + 'CGA_INSTALL_DIR environment variables must be set').with_traceback(e.__traceback__) + +# Get current dir (pyclaragenomics folder is copied into a temp directory created by pip) +current_dir = os.path.dirname(os.path.realpath(__file__)) + # Copies shared libraries into clargenomics package copy_all_files_in_directory( get_verified_absolute_path(os.path.join(cga_install_dir, "lib")), - os.path.join(pycga_dir, "claragenomics/shared_libs/"), + os.path.join(current_dir, "claragenomics/shared_libs/"), ) # Classifiers for PyPI @@ -101,9 +104,9 @@ def copy_all_files_in_directory(src, dest, file_ext="*.so"): ('claragenomics', glob.glob('claragenomics/shared_libs/*.so')) ], install_requires=get_installation_requirments( - get_verified_absolute_path(os.path.join(pycga_dir, 'requirements.txt')) + get_verified_absolute_path(os.path.join(current_dir, 'requirements.txt')) ), - packages=find_packages(where=pycga_dir), + packages=find_packages(where=current_dir), python_requires='>=3.6', license='Apache License 2.0', long_description='Python libraries and utilities for manipulating genomics data', diff --git a/pyclaragenomics/setup_pyclaragenomics.py b/pyclaragenomics/setup_pyclaragenomics.py index 277843a19..8fd709f66 100644 --- a/pyclaragenomics/setup_pyclaragenomics.py +++ b/pyclaragenomics/setup_pyclaragenomics.py @@ -92,7 +92,6 @@ def setup_python_binding(is_develop_mode, wheel_output_folder, pycga_dir, cga_in subprocess.check_call(setup_command, env={ **os.environ, - 'PYCGA_DIR': pycga_dir, 'CGA_INSTALL_DIR': cga_install_dir }, cwd=pycga_dir) From 41c6e203960ab650f151744a541fef74964b63a3 Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Wed, 27 Nov 2019 12:20:48 +0200 Subject: [PATCH 126/128] [pyclaragenomics] Use 'pip wheel' Fixes #240 [pyclaragenomics] Fix classifiers for PyPI Fixes #240 --- ci/common/test-pyclaragenomics.sh | 2 +- pyclaragenomics/setup.py | 26 ++++++++++++------------ pyclaragenomics/setup_pyclaragenomics.py | 10 ++++++--- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/ci/common/test-pyclaragenomics.sh b/ci/common/test-pyclaragenomics.sh index d45a3d491..b237ca93d 100644 --- a/ci/common/test-pyclaragenomics.sh +++ b/ci/common/test-pyclaragenomics.sh @@ -36,5 +36,5 @@ cd $PYCLARAGENOMICS_DIR pip uninstall -y pyclaragenomics # Test wheel package creation python setup_pyclaragenomics.py --build_output_folder cga_build_wheel --create_wheel_only -yes | pip install $PYCLARAGENOMICS_DIR/cga_build_wheel/pyclaragenomics-*.whl +yes | pip install $PYCLARAGENOMICS_DIR/pyclaragenomics_wheel/pyclaragenomics-*.whl run_tests diff --git a/pyclaragenomics/setup.py b/pyclaragenomics/setup.py index 6790433ef..66d4b9cfa 100755 --- a/pyclaragenomics/setup.py +++ b/pyclaragenomics/setup.py @@ -65,18 +65,17 @@ def copy_all_files_in_directory(src, dest, file_ext="*.so"): # Classifiers for PyPI pycga_classifiers = [ - 'Development Status :: 5 - Production/Stable', - 'Operating System :: POSIX :: Linux', - 'Intended Audience :: Science/Research', - 'Topic :: Scientific/Engineering :: Bio-Informatics', - 'License :: OSI Approved :: Apache Software License', - 'Natural Language :: English', - 'Operating System :: POSIX :: Linux', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', -], + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Bio-Informatics", + "License :: OSI Approved :: Apache Software License", + "Natural Language :: English", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9" +] extensions = [ Extension( @@ -101,7 +100,7 @@ def copy_all_files_in_directory(src, dest, file_ext="*.so"): url="https://github.com/clara-genomics/ClaraGenomicsAnalysis", include_package_data=True, data_files=[ - ('claragenomics', glob.glob('claragenomics/shared_libs/*.so')) + ('cga_shared_objects', glob.glob('claragenomics/shared_libs/*.so')) ], install_requires=get_installation_requirments( get_verified_absolute_path(os.path.join(current_dir, 'requirements.txt')) @@ -111,6 +110,7 @@ def copy_all_files_in_directory(src, dest, file_ext="*.so"): license='Apache License 2.0', long_description='Python libraries and utilities for manipulating genomics data', classifiers=pycga_classifiers, + platforms=['any'], ext_modules=cythonize(extensions, compiler_directives={'embedsignature': True}), scripts=[os.path.join('bin', 'genome_simulator'), os.path.join('bin', 'assembly_evaluator')], diff --git a/pyclaragenomics/setup_pyclaragenomics.py b/pyclaragenomics/setup_pyclaragenomics.py index 8fd709f66..bd1e88be1 100644 --- a/pyclaragenomics/setup_pyclaragenomics.py +++ b/pyclaragenomics/setup_pyclaragenomics.py @@ -24,7 +24,7 @@ def parse_arguments(): parser.add_argument('--create_wheel_only', required=False, action='store_true', - help="Create ") + help="Creates a python wheel package from pyclaragenomics (no installation)") parser.add_argument('--develop', required=False, action='store_true', @@ -80,7 +80,11 @@ def build(self): def setup_python_binding(is_develop_mode, wheel_output_folder, pycga_dir, cga_install_dir): if wheel_output_folder: - setup_command = ['python', 'setup.py', 'bdist_wheel', '-d', wheel_output_folder] + setup_command = [ + 'pip', 'wheel', '.', + '--global-option', 'sdist', + '--wheel-dir', wheel_output_folder, '--no-deps' + ] completion_message = \ "A wheel file was create for pyclaragenomics under {}".format(wheel_output_folder) else: @@ -112,7 +116,7 @@ def setup_python_binding(is_develop_mode, wheel_output_folder, pycga_dir, cga_in # Setup pyclaragenomics setup_python_binding( is_develop_mode=args.develop, - wheel_output_folder=os.path.realpath(args.build_output_folder) if args.create_wheel_only else None, + wheel_output_folder='pyclaragenomics_wheel/' if args.create_wheel_only else None, pycga_dir=current_dir, cga_install_dir=os.path.realpath(cga_installation_directory) ) From 375709d1f6a9bcb04b59c9de8a6fd6a1bfdfc387 Mon Sep 17 00:00:00 2001 From: Ohad Mosafi Date: Wed, 27 Nov 2019 23:54:39 +0200 Subject: [PATCH 127/128] [pyclaragenomics] Update README with wheel package usage information and add LICENSE Fixes #240 --- pyclaragenomics/LICENSE | 7 +++++++ pyclaragenomics/README.md | 10 ++++++++++ 2 files changed, 17 insertions(+) create mode 100644 pyclaragenomics/LICENSE diff --git a/pyclaragenomics/LICENSE b/pyclaragenomics/LICENSE new file mode 100644 index 000000000..cd938cef5 --- /dev/null +++ b/pyclaragenomics/LICENSE @@ -0,0 +1,7 @@ +Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. + +NVIDIA CORPORATION and its licensors retain all intellectual property +and proprietary rights in and to this software, related documentation +and any modifications thereto. Any use, reproduction, disclosure or +distribution of this software and related documentation without an express +license agreement from NVIDIA CORPORATION is strictly prohibited. diff --git a/pyclaragenomics/README.md b/pyclaragenomics/README.md index dcc34aac4..2d4b2288c 100644 --- a/pyclaragenomics/README.md +++ b/pyclaragenomics/README.md @@ -30,6 +30,16 @@ To test the installation execute: cd test/ python -m pytest ``` +### Create a Wheel package + +Use the following command in order to package pyclaragenomics into a wheel. (without installing) +``` +pip install -r requirements.txt +python setup_pyclaragenomics.py --create_wheel_only +``` +Compiled CPython 3.5 & 3.6 packages are available at: +- https://pypi.org/project/pyclaragenomics-cuda10-0/#description +- https://pypi.org/project/pyclaragenomics-cuda10-1/#description ## Generating a simulated genome From 7b0bdb5a49adfc48785fd6b7be260926eb579f34 Mon Sep 17 00:00:00 2001 From: Joyjit Daw Date: Wed, 27 Nov 2019 18:36:09 -0500 Subject: [PATCH 128/128] [README] restructuring README --- pyclaragenomics/README.md | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/pyclaragenomics/README.md b/pyclaragenomics/README.md index 2d4b2288c..d1fdbea3e 100644 --- a/pyclaragenomics/README.md +++ b/pyclaragenomics/README.md @@ -4,18 +4,25 @@ Python libraries and utilities for manipulating genomics data ## Installation +### Install from PyPI + +A stable release of pyclaragenomics can be installed from PyPI. Currently only CUDA 10.0 and CUDA 10.1 based packages are supported. +Both of those packages are available for CPython 3.5 and 3.6. + ``` -pip install -r requirements.txt -python setup_pyclaragenomics.py --build_output_folder BUILD_FOLDER +pip install pyclaragenomics-cuda10-0 ``` -*Note* if you are developing pyclaragenomics you should do a develop build instead, changes you make to the source code will then be picked up on immediately: +or ``` -pip install -r requirements.txt -python setup_pyclaragenomics.py --build_output_folder BUILD_FOLDER --develop +pip install pyclaragenomics-cuda10-1 ``` +Details of the packages are available here - +- https://pypi.org/project/pyclaragenomics-cuda10-0/#description +- https://pypi.org/project/pyclaragenomics-cuda10-1/#description + ### Testing installation The following binaries should be on the `PATH` in order for the tests to pass: @@ -30,6 +37,20 @@ To test the installation execute: cd test/ python -m pytest ``` + +### Install from source +``` +pip install -r requirements.txt +python setup_pyclaragenomics.py --build_output_folder BUILD_FOLDER +``` + +*Note* if you are developing pyclaragenomics you should do a develop build instead, changes you make to the source code will then be picked up on immediately: + +``` +pip install -r requirements.txt +python setup_pyclaragenomics.py --build_output_folder BUILD_FOLDER --develop +``` + ### Create a Wheel package Use the following command in order to package pyclaragenomics into a wheel. (without installing) @@ -37,9 +58,6 @@ Use the following command in order to package pyclaragenomics into a wheel. (wit pip install -r requirements.txt python setup_pyclaragenomics.py --create_wheel_only ``` -Compiled CPython 3.5 & 3.6 packages are available at: -- https://pypi.org/project/pyclaragenomics-cuda10-0/#description -- https://pypi.org/project/pyclaragenomics-cuda10-1/#description ## Generating a simulated genome