diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml new file mode 100644 index 00000000..d3c45fe8 --- /dev/null +++ b/.github/workflows/documentation.yml @@ -0,0 +1,44 @@ +name: generate documentation + +# triggers +on: + push: + branches: + - master + - documentation + pull_request: + branches: + - master + +jobs: + build-documentation: + runs-on: ubuntu-latest + steps: + # checkout repository + - name: Checkout cppuddle + uses: actions/checkout@v4 + with: + path: cppuddle + # install dependencies + - name: Dependancies + run: | + sudo apt update + sudo apt-get install -y doxygen graphviz + # CMake + - name: Configure + run: | + cd cppuddle + mkdir build + cd build + cmake -DCPPUDDLE_WITH_DOCUMENTATION=ON .. + # make + - name: Generate + run: | + cd cppuddle/build + make doc + # deploy to github pages + - name: Deploy + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + publish_dir: ./cppuddle/docs/html diff --git a/CMakeLists.txt b/CMakeLists.txt index 2212d40b..d88a3d27 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,6 +39,7 @@ option(CPPUDDLE_WITH_EXECUTOR_RECYCLING "Enables the default executor recycling # Tooling options option(CPPUDDLE_WITH_CLANG_TIDY "Enable clang tidy warnings" OFF) option(CPPUDDLE_WITH_CLANG_FORMAT "Enable clang format target" OFF) +option(CPPUDDLE_WITH_DOCUMENTATION "Build documentation using doxygen." OFF) #------------------------------------------------------------------------------------------------------------ # Define dependencies and conflicts/incompatibilities @@ -146,6 +147,10 @@ if (CPPUDDLE_WITH_CLANG_FORMAT) endif() endif() +if (CPPUDDLE_WITH_DOCUMENTATION) + add_subdirectory(docs) +endif () + #------------------------------------------------------------------------------------------------------------ # Define library targets and installation # (also includes various warnings for non-optimal build configurations) diff --git a/README.md b/README.md index 2af91449..288f6332 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,8 @@ In this use-case, allocating GPU buffers for all sub-grids in advance would have - Executor pools and various scheduling policies (round robin, priority queue, multi-gpu), which rely on reference counting to gauge the current load of a executor instead of querying the device itself. Tested with CUDA, HIP and Kokkos executors provided by HPX / HPX-Kokkos. - Special Executors/Allocators for on-the-fly work GPU aggregation (using HPX). +The documentation of the current master branch is available [here](https://sc-sgs.github.io/CPPuddle/). In particular, the public functionality for the memory recycling in available in the namespace [memory_recycling](https://sc-sgs.github.io/CPPuddle/namespacecppuddle_1_1memory__recycling.html), for the executor pools it is available in the namespace [executor_recycling](https://sc-sgs.github.io/CPPuddle/namespacecppuddle_1_1executor__recycling.html) and the work aggregation (kernel fusion) functionality is available in the namespace [work_aggregation](https://sc-sgs.github.io/CPPuddle/namespacecppuddle_1_1kernel__aggregation.html). + #### Requirements - C++17 diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt new file mode 100644 index 00000000..660b3647 --- /dev/null +++ b/docs/CMakeLists.txt @@ -0,0 +1,36 @@ +find_package(Doxygen REQUIRED OPTIONAL_COMPONENTS dot) + +## configure doxygen +set(DOXYGEN_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/docs") +set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "README.md") +set(DOXYGEN_FILE_PATTERNS "*.hpp;*cpp;*.cuh;*.cl;*.dox") +set(DOXYGEN_EXTENSION_MAPPING "cu=c++;cuh=c++;cl=c++") +set(DOXYGEN_STRIP_FROM_PATH "${PROJECT_SOURCE_DIR}") +set(DOXYGEN_EXCLUDE "") +set(DOXYGEN_ABBREVIATE_BRIEF "") +set(DOXYGEN_QUIET "YES") +set(DOXYGEN_HTML_TIMESTAMP "YES") +set(DOXYGEN_NUM_PROC_THREADS 0) +set(DOXYGEN_WARN_NO_PARAMDOC "YES") +set(DOXYGEN_SORT_MEMBER_DOCS "YES") +set(DOXYGEN_INLINE_INHERITED_MEMB "YES") +set(DOXYGEN_EXCLUDE_SYMBOLS "*_HPP_") + +set(DOXYGEN_DOT_IMAGE_FORMAT "svg") +set(DOXYGEN_INTERACTIVE_SVG "YES") +set(DOXYGEN_INCLUDE_GRAPH "YES") +set(DOXYGEN_EXTRACT_ALL "YES") + +## enable processing of specific attributes and macros +set(DOXYGEN_ENABLE_PREPROCESSING "YES") +set(DOXYGEN_MACRO_EXPANSION "YES") +set(DOXYGEN_EXPAND_ONLY_PREDEF "YES") +set(DOXYGEN_EXPAND_AS_DEFINED "YES") + +## add doxygen as target +doxygen_add_docs( + doc + "${PROJECT_SOURCE_DIR}/include;${PROJECT_SOURCE_DIR}/tests;${PROJECT_SOURCE_DIR}/README.md" + WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" + COMMENT "Generating API documentation with Doxygen" +) diff --git a/include/buffer_manager.hpp b/include/buffer_manager.hpp index 69020e5b..51d54fd3 100644 --- a/include/buffer_manager.hpp +++ b/include/buffer_manager.hpp @@ -15,6 +15,7 @@ #include "cppuddle/memory_recycling/detail/buffer_management.hpp" #include "cppuddle/memory_recycling/std_recycling_allocators.hpp" +/// Deprectated LEGACY namespace. Kept around for compatiblity with old code for now namespace recycler { namespace detail { diff --git a/include/cppuddle/executor_recycling/executor_pools_interface.hpp b/include/cppuddle/executor_recycling/executor_pools_interface.hpp index 49a6d42d..16b7873f 100644 --- a/include/cppuddle/executor_recycling/executor_pools_interface.hpp +++ b/include/cppuddle/executor_recycling/executor_pools_interface.hpp @@ -6,22 +6,31 @@ #ifndef EXECUTOR_POOLS_INTERFACE_HPP #define EXECUTOR_POOLS_INTERFACE_HPP +/// \file +/// Executor recycling public interface + #include "cppuddle/executor_recycling/detail/executor_pools_management.hpp" +/// main CPPuddle namespace namespace cppuddle { +/// CPPuddle namespace containing the executor pool functionality namespace executor_recycling { +/// Round robin pool strategy implementation template using round_robin_pool_impl = detail::round_robin_pool_impl; +/// Priority pool strategy implementation template using priority_pool_impl = detail::priority_pool_impl; +/// Main access to all executor pools using executor_pool = detail::executor_pool; +/// RAII wrapper for executors template using executor_interface = detail::executor_interface; diff --git a/include/cppuddle/kernel_aggregation/kernel_aggregation_interface.hpp b/include/cppuddle/kernel_aggregation/kernel_aggregation_interface.hpp index c7a3b633..b2597be6 100644 --- a/include/cppuddle/kernel_aggregation/kernel_aggregation_interface.hpp +++ b/include/cppuddle/kernel_aggregation/kernel_aggregation_interface.hpp @@ -9,20 +9,35 @@ #include "cppuddle/kernel_aggregation/detail/aggregation_executors_and_allocators.hpp" #include "cppuddle/kernel_aggregation/detail/aggregation_executor_pools.hpp" +/// \file +/// Kernel aggregation public interface + namespace cppuddle { +/// CPPuddle namespace containing the kernel aggregation functionality namespace kernel_aggregation { +/// Possible launch modes: +/// EAGER = launch either when enough kernels aggregated or executor becomes idles +/// STRICT = launch only when enough kernels aggregated (be aware of deadlocks when not +/// enough kernels are available!) +/// ENDLESS = launch only when executor becomes idle using aggregated_executor_modes = cppuddle::kernel_aggregation::detail::aggregated_executor_modes; +/// Allocator to get a buffer slice of a buffer shared with other +/// tasks in the same aggregation region template using allocator_slice = cppuddle::kernel_aggregation::detail::allocator_slice; +/// Executor facilitating the kernel aggregation +/// Contains the executor_slice subclass which is intended to be used +/// by the individual tasks template using aggregated_executor = cppuddle::kernel_aggregation::detail::aggregated_executor; +/// Pool to get an aggregation executor for the desired code region (kernelname) template using aggregation_pool = cppuddle::kernel_aggregation::detail::aggregation_pool #include "buffer_management_interface.hpp" +/// \file +/// Contains the recycling allocators (in the form of type aliases) +/// using the boost aligned_allocator as an underlying allocator + namespace cppuddle { namespace memory_recycling { diff --git a/include/cppuddle/memory_recycling/buffer_management_interface.hpp b/include/cppuddle/memory_recycling/buffer_management_interface.hpp index c5fa44cd..d6bc18f9 100644 --- a/include/cppuddle/memory_recycling/buffer_management_interface.hpp +++ b/include/cppuddle/memory_recycling/buffer_management_interface.hpp @@ -8,7 +8,11 @@ #include "detail/buffer_management.hpp" +/// \file +/// Memory recycling public interface + namespace cppuddle { +/// CPPuddle namespace containing the memory recycling functionality namespace memory_recycling { /// Print performance counters of all buffer managers to stdout diff --git a/include/cppuddle/memory_recycling/cuda_recycling_allocators.hpp b/include/cppuddle/memory_recycling/cuda_recycling_allocators.hpp index b47a4fe2..6cc9c565 100644 --- a/include/cppuddle/memory_recycling/cuda_recycling_allocators.hpp +++ b/include/cppuddle/memory_recycling/cuda_recycling_allocators.hpp @@ -10,6 +10,11 @@ // import cuda_pinned_allocator and cuda_device_allocator #include "detail/cuda_underlying_allocators.hpp" +/// \file +/// Contains the CUDA recycling allocators (in the form of type aliases) +/// for both pinned host memory and device memory. Also contains the required +/// device selector for MultiGPU setups with these allocators. + namespace cppuddle { namespace memory_recycling { diff --git a/include/cppuddle/memory_recycling/hip_recycling_allocators.hpp b/include/cppuddle/memory_recycling/hip_recycling_allocators.hpp index 13b5241b..77f4b618 100644 --- a/include/cppuddle/memory_recycling/hip_recycling_allocators.hpp +++ b/include/cppuddle/memory_recycling/hip_recycling_allocators.hpp @@ -10,6 +10,11 @@ // import hip_pinned_allocator and hip_device_allocator #include "detail/hip_underlying_allocators.hpp" +/// \file +/// Contains the HIP recycling allocators (in the form of type aliases) +/// for both pinned host memory and device memory. Also contains the required +/// device selector for MultiGPU setups with these allocators. + namespace cppuddle { namespace memory_recycling { diff --git a/include/cppuddle/memory_recycling/std_recycling_allocators.hpp b/include/cppuddle/memory_recycling/std_recycling_allocators.hpp index 21fd5c2c..6681c67f 100644 --- a/include/cppuddle/memory_recycling/std_recycling_allocators.hpp +++ b/include/cppuddle/memory_recycling/std_recycling_allocators.hpp @@ -8,6 +8,10 @@ #include "buffer_management_interface.hpp" +/// \file +/// Contains the recycling allocators (in the form of type aliases) +/// using the std memory allocator + namespace cppuddle { namespace memory_recycling { diff --git a/include/cppuddle/memory_recycling/sycl_recycling_allocators.hpp b/include/cppuddle/memory_recycling/sycl_recycling_allocators.hpp index fd494bca..f01e9598 100644 --- a/include/cppuddle/memory_recycling/sycl_recycling_allocators.hpp +++ b/include/cppuddle/memory_recycling/sycl_recycling_allocators.hpp @@ -9,6 +9,10 @@ #include "buffer_management_interface.hpp" #include "detail/sycl_underlying_allocators.hpp" +/// \file +/// Contains the SYCL recycling allocators (in the form of type aliases) +/// for both pinned host memory and device memory. + namespace cppuddle { namespace memory_recycling { diff --git a/include/cppuddle/memory_recycling/util/cuda_recycling_device_buffer.hpp b/include/cppuddle/memory_recycling/util/cuda_recycling_device_buffer.hpp index dbd7e4c8..2a687041 100644 --- a/include/cppuddle/memory_recycling/util/cuda_recycling_device_buffer.hpp +++ b/include/cppuddle/memory_recycling/util/cuda_recycling_device_buffer.hpp @@ -9,11 +9,16 @@ // import recycle_allocator_cuda_device #include "cppuddle/memory_recycling/cuda_recycling_allocators.hpp"" +/// \file +/// Contains a RAII wrappers for CUDA device buffers. Intended to be used with +/// the recycling allocators but technically any allocator should work + namespace cppuddle { namespace memory_recycling { /// RAII wrapper for CUDA device memory +/// (ideally used with a recycling allocator) template ::value, int> = 0> struct cuda_device_buffer { recycle_allocator_cuda_device allocator; @@ -38,6 +43,7 @@ struct cuda_device_buffer { }; /// RAII wrapper for CUDA device memory using a passed aggregated allocator +/// (which ideally should be an allocator_slice from the work aggregation) template ::value, int> = 0> struct cuda_aggregated_device_buffer { T *device_side_buffer; diff --git a/include/cppuddle/memory_recycling/util/hip_recycling_device_buffer.hpp b/include/cppuddle/memory_recycling/util/hip_recycling_device_buffer.hpp index 7f04e3f7..ffc7fd0c 100644 --- a/include/cppuddle/memory_recycling/util/hip_recycling_device_buffer.hpp +++ b/include/cppuddle/memory_recycling/util/hip_recycling_device_buffer.hpp @@ -9,10 +9,15 @@ // import recycle_allocator_hip_device #include "cppuddle/memory_recycling/hip_recycling_allocators.hpp" +/// \file +/// Contains a RAII wrappers for HIP device buffers. Intended to be used with +/// the recycling allocators but technically any allocator should work +// namespace cppuddle { namespace memory_recycling { /// RAII wrapper for HIP device memory +/// (ideally used with a recycling allocator) template ::value, int> = 0> struct hip_device_buffer { recycle_allocator_hip_device allocator; @@ -37,6 +42,7 @@ struct hip_device_buffer { }; /// RAII wrapper for CUDA device memory using a passed aggregated allocator +/// (which ideally should be an allocator_slice from the work aggregation) template ::value, int> = 0> struct hip_aggregated_device_buffer { T *device_side_buffer; diff --git a/include/cppuddle/memory_recycling/util/recycling_kokkos_view.hpp b/include/cppuddle/memory_recycling/util/recycling_kokkos_view.hpp index b8ca526c..97626ebb 100644 --- a/include/cppuddle/memory_recycling/util/recycling_kokkos_view.hpp +++ b/include/cppuddle/memory_recycling/util/recycling_kokkos_view.hpp @@ -11,11 +11,16 @@ #include "cppuddle/memory_recycling/buffer_management_interface.hpp" +/// \file +/// Contains a Kokkos View Wrapper which automatically uses +/// recycled memory. Also contains an aggregated version for +/// usage with the kernel aggragation namespace cppuddle { namespace memory_recycling { - +/// Convienience struct to delete the view once the reference counting +/// reaches 0 template struct view_deleter { alloc_type allocator; @@ -27,6 +32,9 @@ struct view_deleter { } }; +/// Kokkos View that automatically uses a recycling allocator using +/// alloc_type as an underlying allocator. Must be passed an existing allocator object +/// (which should be an allocator_slice from the kernel aggregation functionality) template class aggregated_recycling_view : public kokkos_type { private: @@ -85,6 +93,8 @@ class aggregated_recycling_view : public kokkos_type { }; +/// Kokkos View that automatically uses a recycling allocator using +/// alloc_type as an underlying allocator template class recycling_view : public kokkos_type { private: