From 1519a8953c0ac84e73689727b8867f626f81c9cc Mon Sep 17 00:00:00 2001 From: Patrick Stotko Date: Mon, 9 Sep 2024 08:51:44 +0200 Subject: [PATCH] algorithm,memory,numeric: Improve thrust compatibility via ADL barriers --- src/stdgpu/algorithm.h | 43 +++++++++++++++++++++++ src/stdgpu/impl/algorithm_detail.h | 6 ++++ src/stdgpu/impl/memory_detail.h | 3 ++ src/stdgpu/impl/numeric_detail.h | 3 ++ src/stdgpu/memory.h | 56 ++++++++++++++++++++++++++++++ src/stdgpu/numeric.h | 20 +++++++++++ 6 files changed, 131 insertions(+) diff --git a/src/stdgpu/algorithm.h b/src/stdgpu/algorithm.h index bde5e07e9..be73a65d7 100644 --- a/src/stdgpu/algorithm.h +++ b/src/stdgpu/algorithm.h @@ -86,6 +86,8 @@ template >)> +void +fill(ExecutionPolicy&& policy, Iterator begin, Iterator end, const T& value); + +template >)> +Iterator +fill_n(ExecutionPolicy&& policy, Iterator begin, Size n, const T& value); + +template >)> +OutputIt +copy(ExecutionPolicy&& policy, InputIt begin, InputIt end, OutputIt output_begin); + +template >)> +OutputIt +copy_n(ExecutionPolicy&& policy, InputIt begin, Size n, OutputIt output_begin); + +} // namespace adl_barrier + +using namespace adl_barrier; +//! @endcond + } // namespace stdgpu #include diff --git a/src/stdgpu/impl/algorithm_detail.h b/src/stdgpu/impl/algorithm_detail.h index 1507c7a9f..fd816d328 100644 --- a/src/stdgpu/impl/algorithm_detail.h +++ b/src/stdgpu/impl/algorithm_detail.h @@ -85,6 +85,8 @@ class fill_functor }; } // namespace detail +namespace adl_barrier +{ template (policy), n, detail::fill_functor(begin, value)); return begin + n; } +} // namespace adl_barrier namespace detail { @@ -131,6 +134,8 @@ class copy_functor }; } // namespace detail +namespace adl_barrier +{ template (begin, output_begin)); return output_begin + n; } +} // namespace adl_barrier } // namespace stdgpu diff --git a/src/stdgpu/impl/memory_detail.h b/src/stdgpu/impl/memory_detail.h index 3ad2f1d9d..e2cc86911 100644 --- a/src/stdgpu/impl/memory_detail.h +++ b/src/stdgpu/impl/memory_detail.h @@ -705,6 +705,8 @@ destroy_at(T* p) p->~T(); } +namespace adl_barrier +{ template dynamic_memory_type diff --git a/src/stdgpu/impl/numeric_detail.h b/src/stdgpu/impl/numeric_detail.h index 2b04832b5..c99117444 100644 --- a/src/stdgpu/impl/numeric_detail.h +++ b/src/stdgpu/impl/numeric_detail.h @@ -49,6 +49,8 @@ class iota_functor }; } // namespace detail +namespace adl_barrier +{ template (end - begin), detail::iota_functor(begin, value)); } +} // namespace adl_barrier template STDGPU_HOST_DEVICE void destroy_at(T* p); +#ifdef STDGPU_RUN_DOXYGEN + /** * \ingroup memory * \brief Writes the given value to into the given range using the copy constructor @@ -882,6 +884,60 @@ template >)> +void +uninitialized_fill(ExecutionPolicy&& policy, Iterator begin, Iterator end, const T& value); + +template >)> +Iterator +uninitialized_fill_n(ExecutionPolicy&& policy, Iterator begin, Size n, const T& value); + +template >)> +OutputIt +uninitialized_copy(ExecutionPolicy&& policy, InputIt begin, InputIt end, OutputIt output_begin); + +template >)> +OutputIt +uninitialized_copy_n(ExecutionPolicy&& policy, InputIt begin, Size n, OutputIt output_begin); + +template >)> +void +destroy(ExecutionPolicy&& policy, Iterator first, Iterator last); + +template >)> +Iterator +destroy_n(ExecutionPolicy&& policy, Iterator first, Size n); + +} // namespace adl_barrier + +using namespace adl_barrier; +//! @endcond + /** * \ingroup memory * \brief Registers the given memory block into the internal memory size manger diff --git a/src/stdgpu/numeric.h b/src/stdgpu/numeric.h index 3df1f341d..ba1dd0187 100644 --- a/src/stdgpu/numeric.h +++ b/src/stdgpu/numeric.h @@ -30,6 +30,8 @@ namespace stdgpu { +#ifdef STDGPU_RUN_DOXYGEN + /** * \ingroup numeric * \brief Writes ascending values {values + i} to the i-th position of the given range @@ -48,6 +50,24 @@ template >)> +void +iota(ExecutionPolicy&& policy, Iterator begin, Iterator end, T value); + +} // namespace adl_barrier + +using namespace adl_barrier; +//! @endcond + /** * \ingroup numeric * \brief Calls the given unary function with an index from the range [0, size) and performs a reduction afterwards