diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
new file mode 100644
index 00000000..d3c45fe8
--- /dev/null
+++ b/.github/workflows/documentation.yml
@@ -0,0 +1,44 @@
+name: generate documentation
+
+# triggers
+on:
+  push:
+    branches:
+      - master
+      - documentation
+  pull_request:
+    branches:
+      - master
+
+jobs:
+  build-documentation:
+    runs-on: ubuntu-latest
+    steps:
+      # checkout repository
+      - name: Checkout cppuddle
+        uses: actions/checkout@v4
+        with:
+          path: cppuddle
+      # install dependencies
+      - name: Dependancies
+        run: |
+          sudo apt update
+          sudo apt-get install -y doxygen graphviz
+      # CMake
+      - name: Configure
+        run: |
+          cd cppuddle
+          mkdir build
+          cd build
+          cmake -DCPPUDDLE_WITH_DOCUMENTATION=ON ..
+      # make
+      - name: Generate
+        run: |
+          cd cppuddle/build
+          make doc
+      # deploy to github pages
+      - name: Deploy
+        uses: peaceiris/actions-gh-pages@v3
+        with:
+          github_token: ${{ secrets.GITHUB_TOKEN }}
+          publish_dir: ./cppuddle/docs/html
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 2212d40b..d88a3d27 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -39,6 +39,7 @@ option(CPPUDDLE_WITH_EXECUTOR_RECYCLING "Enables the default executor recycling
 # Tooling options
 option(CPPUDDLE_WITH_CLANG_TIDY "Enable clang tidy warnings" OFF)
 option(CPPUDDLE_WITH_CLANG_FORMAT "Enable clang format target" OFF)
+option(CPPUDDLE_WITH_DOCUMENTATION "Build documentation using doxygen." OFF)
 
 #------------------------------------------------------------------------------------------------------------
 # Define dependencies and conflicts/incompatibilities
@@ -146,6 +147,10 @@ if (CPPUDDLE_WITH_CLANG_FORMAT)
   endif()
 endif()
 
+if (CPPUDDLE_WITH_DOCUMENTATION)
+    add_subdirectory(docs)
+endif ()
+
 #------------------------------------------------------------------------------------------------------------
 # Define library targets and installation
 # (also includes various warnings for non-optimal build configurations)
diff --git a/README.md b/README.md
index 2af91449..288f6332 100644
--- a/README.md
+++ b/README.md
@@ -18,6 +18,8 @@ In this use-case, allocating GPU buffers for all sub-grids in advance would have
 - Executor pools and various scheduling policies (round robin, priority queue, multi-gpu), which rely on reference counting to gauge the current load of a executor instead of querying the device itself. Tested with CUDA, HIP and Kokkos executors provided by HPX / HPX-Kokkos.
 - Special Executors/Allocators for on-the-fly work GPU aggregation (using HPX).
 
+The documentation of the current master branch is available [here](https://sc-sgs.github.io/CPPuddle/). In particular, the public functionality for the memory recycling in available in the namespace [memory_recycling](https://sc-sgs.github.io/CPPuddle/namespacecppuddle_1_1memory__recycling.html), for the executor pools it is available in the namespace [executor_recycling](https://sc-sgs.github.io/CPPuddle/namespacecppuddle_1_1executor__recycling.html) and the work aggregation (kernel fusion) functionality is available in the namespace [work_aggregation](https://sc-sgs.github.io/CPPuddle/namespacecppuddle_1_1kernel__aggregation.html).
+
 #### Requirements
 
 - C++17
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt
new file mode 100644
index 00000000..660b3647
--- /dev/null
+++ b/docs/CMakeLists.txt
@@ -0,0 +1,36 @@
+find_package(Doxygen REQUIRED OPTIONAL_COMPONENTS dot)
+
+## configure doxygen
+set(DOXYGEN_OUTPUT_DIRECTORY "${PROJECT_SOURCE_DIR}/docs")
+set(DOXYGEN_USE_MDFILE_AS_MAINPAGE "README.md")
+set(DOXYGEN_FILE_PATTERNS "*.hpp;*cpp;*.cuh;*.cl;*.dox")
+set(DOXYGEN_EXTENSION_MAPPING "cu=c++;cuh=c++;cl=c++")
+set(DOXYGEN_STRIP_FROM_PATH "${PROJECT_SOURCE_DIR}")
+set(DOXYGEN_EXCLUDE "")
+set(DOXYGEN_ABBREVIATE_BRIEF "")
+set(DOXYGEN_QUIET "YES")
+set(DOXYGEN_HTML_TIMESTAMP "YES")
+set(DOXYGEN_NUM_PROC_THREADS 0)
+set(DOXYGEN_WARN_NO_PARAMDOC "YES")
+set(DOXYGEN_SORT_MEMBER_DOCS "YES")
+set(DOXYGEN_INLINE_INHERITED_MEMB "YES")
+set(DOXYGEN_EXCLUDE_SYMBOLS "*_HPP_")
+
+set(DOXYGEN_DOT_IMAGE_FORMAT "svg")
+set(DOXYGEN_INTERACTIVE_SVG "YES")
+set(DOXYGEN_INCLUDE_GRAPH "YES")
+set(DOXYGEN_EXTRACT_ALL "YES")
+
+## enable processing of specific attributes and macros
+set(DOXYGEN_ENABLE_PREPROCESSING "YES")
+set(DOXYGEN_MACRO_EXPANSION "YES")
+set(DOXYGEN_EXPAND_ONLY_PREDEF "YES")
+set(DOXYGEN_EXPAND_AS_DEFINED "YES")
+
+## add doxygen as target
+doxygen_add_docs(
+        doc
+        "${PROJECT_SOURCE_DIR}/include;${PROJECT_SOURCE_DIR}/tests;${PROJECT_SOURCE_DIR}/README.md"
+        WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
+        COMMENT "Generating API documentation with Doxygen"
+)
diff --git a/include/buffer_manager.hpp b/include/buffer_manager.hpp
index 69020e5b..51d54fd3 100644
--- a/include/buffer_manager.hpp
+++ b/include/buffer_manager.hpp
@@ -15,6 +15,7 @@
 #include "cppuddle/memory_recycling/detail/buffer_management.hpp"
 #include "cppuddle/memory_recycling/std_recycling_allocators.hpp"
 
+/// Deprectated LEGACY namespace. Kept around for compatiblity with old code for now
 namespace recycler {
 
 namespace detail {
diff --git a/include/cppuddle/executor_recycling/executor_pools_interface.hpp b/include/cppuddle/executor_recycling/executor_pools_interface.hpp
index 49a6d42d..16b7873f 100644
--- a/include/cppuddle/executor_recycling/executor_pools_interface.hpp
+++ b/include/cppuddle/executor_recycling/executor_pools_interface.hpp
@@ -6,22 +6,31 @@
 #ifndef EXECUTOR_POOLS_INTERFACE_HPP
 #define EXECUTOR_POOLS_INTERFACE_HPP
 
+/// \file
+/// Executor recycling public interface
+
 #include "cppuddle/executor_recycling/detail/executor_pools_management.hpp"
 
+/// main CPPuddle namespace
 namespace cppuddle {
+/// CPPuddle namespace containing the executor pool functionality
 namespace executor_recycling {
 
+/// Round robin pool strategy implementation
 template <typename Interface>
 using round_robin_pool_impl =
         detail::round_robin_pool_impl<Interface>;
 
+/// Priority pool strategy implementation
 template <typename Interface>
 using priority_pool_impl =
         detail::priority_pool_impl<Interface>;
 
+/// Main access to all executor pools
 using executor_pool =
         detail::executor_pool;
 
+/// RAII wrapper for executors
 template <typename Interface, typename Pool>
 using executor_interface =
         detail::executor_interface<Interface, Pool>;
diff --git a/include/cppuddle/kernel_aggregation/kernel_aggregation_interface.hpp b/include/cppuddle/kernel_aggregation/kernel_aggregation_interface.hpp
index c7a3b633..b2597be6 100644
--- a/include/cppuddle/kernel_aggregation/kernel_aggregation_interface.hpp
+++ b/include/cppuddle/kernel_aggregation/kernel_aggregation_interface.hpp
@@ -9,20 +9,35 @@
 #include "cppuddle/kernel_aggregation/detail/aggregation_executors_and_allocators.hpp"
 #include "cppuddle/kernel_aggregation/detail/aggregation_executor_pools.hpp"
 
+/// \file
+/// Kernel aggregation public interface
+
 namespace cppuddle {
+/// CPPuddle namespace containing the kernel aggregation functionality
 namespace kernel_aggregation {
 
+/// Possible launch modes:
+/// EAGER   = launch either when enough kernels aggregated or executor becomes idles
+/// STRICT  = launch only when enough kernels aggregated (be aware of deadlocks when not
+///           enough kernels are available!)
+/// ENDLESS = launch only when executor becomes idle
 using aggregated_executor_modes =
     cppuddle::kernel_aggregation::detail::aggregated_executor_modes;
 
+/// Allocator to get a buffer slice of a buffer shared with other
+/// tasks in the same aggregation region
 template <typename T, typename Host_Allocator, typename Executor>
 using allocator_slice =
     cppuddle::kernel_aggregation::detail::allocator_slice<T, Host_Allocator, Executor>;
 
+/// Executor facilitating the kernel aggregation
+/// Contains the executor_slice subclass which is intended to be used
+/// by the individual tasks
 template <typename Executor>
 using aggregated_executor =
     cppuddle::kernel_aggregation::detail::aggregated_executor<Executor>;
 
+/// Pool to get an aggregation executor for the desired code region (kernelname)
 template <const char *kernelname, class Interface, class Pool>
 using aggregation_pool =
     cppuddle::kernel_aggregation::detail::aggregation_pool<kernelname, Interface,
diff --git a/include/cppuddle/memory_recycling/aligned_recycling_allocators.hpp b/include/cppuddle/memory_recycling/aligned_recycling_allocators.hpp
index a824e7e0..d609c0c1 100644
--- a/include/cppuddle/memory_recycling/aligned_recycling_allocators.hpp
+++ b/include/cppuddle/memory_recycling/aligned_recycling_allocators.hpp
@@ -9,6 +9,10 @@
 #include <boost/align/aligned_allocator.hpp>
 #include "buffer_management_interface.hpp"
 
+/// \file
+/// Contains the recycling allocators (in the form of type aliases)
+/// using the boost aligned_allocator as an underlying allocator
+
 namespace cppuddle {
 namespace memory_recycling {
 
diff --git a/include/cppuddle/memory_recycling/buffer_management_interface.hpp b/include/cppuddle/memory_recycling/buffer_management_interface.hpp
index c5fa44cd..d6bc18f9 100644
--- a/include/cppuddle/memory_recycling/buffer_management_interface.hpp
+++ b/include/cppuddle/memory_recycling/buffer_management_interface.hpp
@@ -8,7 +8,11 @@
 
 #include "detail/buffer_management.hpp"
 
+/// \file
+/// Memory recycling public interface
+
 namespace cppuddle {
+/// CPPuddle namespace containing the memory recycling functionality
 namespace memory_recycling {
 
 /// Print performance counters of all buffer managers to stdout
diff --git a/include/cppuddle/memory_recycling/cuda_recycling_allocators.hpp b/include/cppuddle/memory_recycling/cuda_recycling_allocators.hpp
index b47a4fe2..6cc9c565 100644
--- a/include/cppuddle/memory_recycling/cuda_recycling_allocators.hpp
+++ b/include/cppuddle/memory_recycling/cuda_recycling_allocators.hpp
@@ -10,6 +10,11 @@
 // import cuda_pinned_allocator and cuda_device_allocator
 #include "detail/cuda_underlying_allocators.hpp"
 
+/// \file
+/// Contains the CUDA recycling allocators (in the form of type aliases)
+/// for both pinned host memory and device memory. Also contains the required
+/// device selector for MultiGPU setups with these allocators.
+
 namespace cppuddle {
 namespace memory_recycling {
 
diff --git a/include/cppuddle/memory_recycling/hip_recycling_allocators.hpp b/include/cppuddle/memory_recycling/hip_recycling_allocators.hpp
index 13b5241b..77f4b618 100644
--- a/include/cppuddle/memory_recycling/hip_recycling_allocators.hpp
+++ b/include/cppuddle/memory_recycling/hip_recycling_allocators.hpp
@@ -10,6 +10,11 @@
 // import hip_pinned_allocator and hip_device_allocator
 #include "detail/hip_underlying_allocators.hpp"
 
+/// \file
+/// Contains the HIP recycling allocators (in the form of type aliases)
+/// for both pinned host memory and device memory. Also contains the required
+/// device selector for MultiGPU setups with these allocators.
+
 namespace cppuddle {
 namespace memory_recycling {
 
diff --git a/include/cppuddle/memory_recycling/std_recycling_allocators.hpp b/include/cppuddle/memory_recycling/std_recycling_allocators.hpp
index 21fd5c2c..6681c67f 100644
--- a/include/cppuddle/memory_recycling/std_recycling_allocators.hpp
+++ b/include/cppuddle/memory_recycling/std_recycling_allocators.hpp
@@ -8,6 +8,10 @@
 
 #include "buffer_management_interface.hpp"
 
+/// \file
+/// Contains the recycling allocators (in the form of type aliases)
+/// using the std memory allocator 
+
 namespace cppuddle {
 namespace memory_recycling {
 
diff --git a/include/cppuddle/memory_recycling/sycl_recycling_allocators.hpp b/include/cppuddle/memory_recycling/sycl_recycling_allocators.hpp
index fd494bca..f01e9598 100644
--- a/include/cppuddle/memory_recycling/sycl_recycling_allocators.hpp
+++ b/include/cppuddle/memory_recycling/sycl_recycling_allocators.hpp
@@ -9,6 +9,10 @@
 #include "buffer_management_interface.hpp"
 #include "detail/sycl_underlying_allocators.hpp"
 
+/// \file
+/// Contains the SYCL recycling allocators (in the form of type aliases)
+/// for both pinned host memory and device memory. 
+
 namespace cppuddle {
 namespace memory_recycling {
 
diff --git a/include/cppuddle/memory_recycling/util/cuda_recycling_device_buffer.hpp b/include/cppuddle/memory_recycling/util/cuda_recycling_device_buffer.hpp
index dbd7e4c8..2a687041 100644
--- a/include/cppuddle/memory_recycling/util/cuda_recycling_device_buffer.hpp
+++ b/include/cppuddle/memory_recycling/util/cuda_recycling_device_buffer.hpp
@@ -9,11 +9,16 @@
 // import recycle_allocator_cuda_device
 #include "cppuddle/memory_recycling/cuda_recycling_allocators.hpp""
 
+/// \file
+/// Contains a RAII wrappers for CUDA device buffers. Intended to be used with
+/// the recycling allocators but technically any allocator should work
+
 namespace cppuddle {
 namespace memory_recycling {
 
 
 /// RAII wrapper for CUDA device memory
+/// (ideally used with a recycling allocator)
 template <typename T, std::enable_if_t<std::is_trivial<T>::value, int> = 0>
 struct cuda_device_buffer {
   recycle_allocator_cuda_device<T> allocator;
@@ -38,6 +43,7 @@ struct cuda_device_buffer {
 };
 
 /// RAII wrapper for CUDA device memory using a passed aggregated allocator
+/// (which ideally should be an allocator_slice from the work aggregation)
 template <typename T, typename Host_Allocator, std::enable_if_t<std::is_trivial<T>::value, int> = 0>
 struct cuda_aggregated_device_buffer {
   T *device_side_buffer;
diff --git a/include/cppuddle/memory_recycling/util/hip_recycling_device_buffer.hpp b/include/cppuddle/memory_recycling/util/hip_recycling_device_buffer.hpp
index 7f04e3f7..ffc7fd0c 100644
--- a/include/cppuddle/memory_recycling/util/hip_recycling_device_buffer.hpp
+++ b/include/cppuddle/memory_recycling/util/hip_recycling_device_buffer.hpp
@@ -9,10 +9,15 @@
 // import recycle_allocator_hip_device
 #include "cppuddle/memory_recycling/hip_recycling_allocators.hpp"
 
+/// \file
+/// Contains a RAII wrappers for HIP device buffers. Intended to be used with
+/// the recycling allocators but technically any allocator should work
+//
 namespace cppuddle {
 namespace memory_recycling {
 
 /// RAII wrapper for HIP device memory
+/// (ideally used with a recycling allocator)
 template <typename T, std::enable_if_t<std::is_trivial<T>::value, int> = 0>
 struct hip_device_buffer {
   recycle_allocator_hip_device<T> allocator;
@@ -37,6 +42,7 @@ struct hip_device_buffer {
 };
 
 /// RAII wrapper for CUDA device memory using a passed aggregated allocator
+/// (which ideally should be an allocator_slice from the work aggregation)
 template <typename T, typename Host_Allocator, std::enable_if_t<std::is_trivial<T>::value, int> = 0>
 struct hip_aggregated_device_buffer {
   T *device_side_buffer;
diff --git a/include/cppuddle/memory_recycling/util/recycling_kokkos_view.hpp b/include/cppuddle/memory_recycling/util/recycling_kokkos_view.hpp
index b8ca526c..97626ebb 100644
--- a/include/cppuddle/memory_recycling/util/recycling_kokkos_view.hpp
+++ b/include/cppuddle/memory_recycling/util/recycling_kokkos_view.hpp
@@ -11,11 +11,16 @@
 
 #include "cppuddle/memory_recycling/buffer_management_interface.hpp"
 
+/// \file
+/// Contains a Kokkos View Wrapper which automatically uses 
+/// recycled memory. Also contains an aggregated version for 
+/// usage with the kernel aggragation
 
 namespace cppuddle {
 namespace memory_recycling {
 
-
+/// Convienience struct to delete the view once the reference counting
+/// reaches 0
 template<typename element_type, typename alloc_type>
 struct view_deleter {
   alloc_type allocator;
@@ -27,6 +32,9 @@ struct view_deleter {
   }
 };
 
+/// Kokkos View that automatically uses a recycling allocator using
+/// alloc_type as an underlying allocator. Must be passed an existing allocator object
+/// (which should be an allocator_slice from the kernel aggregation functionality)
 template <typename kokkos_type, typename alloc_type, typename element_type>
 class aggregated_recycling_view : public kokkos_type {
 private:
@@ -85,6 +93,8 @@ class aggregated_recycling_view : public kokkos_type {
 };
 
 
+/// Kokkos View that automatically uses a recycling allocator using
+/// alloc_type as an underlying allocator
 template <typename kokkos_type, typename alloc_type, typename element_type>
 class recycling_view : public kokkos_type {
 private: