From f4dd2896e37e27fb767b6eed337cc7c5e0e17cfb Mon Sep 17 00:00:00 2001 From: Patrick Stotko Date: Tue, 19 Nov 2024 15:17:27 +0100 Subject: [PATCH] vector: Extend support for custom execution policies --- src/stdgpu/impl/deque_detail.cuh | 2 +- src/stdgpu/impl/vector_detail.cuh | 166 ++++++++++++++++++++++++++---- src/stdgpu/vector.cuh | 125 +++++++++++++++++++++- tests/stdgpu/vector.inc | 32 +++--- 4 files changed, 286 insertions(+), 39 deletions(-) diff --git a/src/stdgpu/impl/deque_detail.cuh b/src/stdgpu/impl/deque_detail.cuh index 04f365b34..fbda4d320 100644 --- a/src/stdgpu/impl/deque_detail.cuh +++ b/src/stdgpu/impl/deque_detail.cuh @@ -77,7 +77,7 @@ deque::destroyDeviceObject(ExecutionPolicy&& policy, deque()) { - device_object.clear(); + device_object.clear(std::forward(policy)); } allocator_traits::deallocate(device_object._allocator, diff --git a/src/stdgpu/impl/vector_detail.cuh b/src/stdgpu/impl/vector_detail.cuh index d89a0da94..159645f2d 100644 --- a/src/stdgpu/impl/vector_detail.cuh +++ b/src/stdgpu/impl/vector_detail.cuh @@ -73,7 +73,7 @@ vector::destroyDeviceObject(ExecutionPolicy&& policy, vector()) { - device_object.clear(); + device_object.clear(std::forward(policy)); } allocator_traits::deallocate(device_object._allocator, @@ -346,7 +346,7 @@ vector_clear_iota(ExecutionPolicy&& policy, vector& v, const T& va { iota(std::forward(policy), device_begin(v.data()), device_end(v.data()), value); v._occupied.set(std::forward(policy)); - v._size.store(static_cast(v.capacity())); + v._size.store(std::forward(policy), static_cast(v.capacity())); } } // namespace detail @@ -370,14 +370,14 @@ vector::insert(ExecutionPolicy&& policy, ValueIterator begin, ValueIterator end) { - if (position != device_end()) + if (position != device_end(std::forward(policy))) { printf("stdgpu::vector::insert : Position not equal to device_end()\n"); return; } index_t N = static_cast(end - begin); - index_t new_size = size() + N; + index_t new_size = size(std::forward(policy)) + N; if (new_size > capacity()) { @@ -392,7 +392,7 @@ vector::insert(ExecutionPolicy&& policy, N, detail::vector_insert(*this, size(), begin)); - _size.store(static_cast(new_size)); + _size.store(std::forward(policy), static_cast(new_size)); } template @@ -408,14 +408,14 @@ template ::erase(ExecutionPolicy&& policy, device_ptr begin, device_ptr end) { - if (end != device_end()) + if (end != device_end(std::forward(policy))) { printf("stdgpu::vector::erase : End iterator not equal to device_end()\n"); return; } index_t N = static_cast(end - begin); - index_t new_size = size() - N; + index_t new_size = size(std::forward(policy)) - N; if (new_size < 0) { @@ -426,7 +426,7 @@ vector::erase(ExecutionPolicy&& policy, device_ptr begin, for_each_index(std::forward(policy), N, detail::vector_erase(*this, new_size)); - _size.store(static_cast(new_size)); + _size.store(std::forward(policy), static_cast(new_size)); } template @@ -436,6 +436,15 @@ vector::empty() const return (size() == 0); } +template +template >)> +inline bool +vector::empty(ExecutionPolicy&& policy) const +{ + return (size(std::forward(policy)) == 0); +} + template inline STDGPU_HOST_DEVICE bool vector::full() const @@ -443,6 +452,15 @@ vector::full() const return (size() == max_size()); } +template +template >)> +inline bool +vector::full(ExecutionPolicy&& policy) const +{ + return (size(std::forward(policy)) == max_size()); +} + template inline STDGPU_HOST_DEVICE index_t vector::size() const @@ -472,6 +490,37 @@ vector::size() const return current_size; } +template +template >)> +inline index_t +vector::size(ExecutionPolicy&& policy) const +{ + index_t current_size = static_cast(_size.load(std::forward(policy))); + + // Check boundary cases where the push/pop caused the pointers to be overful/underful + if (current_size < 0) + { + printf("stdgpu::vector::size : Size out of bounds: %" STDGPU_PRIINDEX " not in [0, %" STDGPU_PRIINDEX + "]. Clamping to 0\n", + current_size, + capacity()); + return 0; + } + if (current_size > capacity()) + { + printf("stdgpu::vector::size : Size out of bounds: %" STDGPU_PRIINDEX " not in [0, %" STDGPU_PRIINDEX + "]. Clamping to %" STDGPU_PRIINDEX "\n", + current_size, + capacity(), + capacity()); + return capacity(); + } + + STDGPU_ENSURES(current_size <= capacity()); + return current_size; +} + template inline STDGPU_HOST_DEVICE index_t vector::max_size() const noexcept @@ -520,14 +569,14 @@ template ::clear(ExecutionPolicy&& policy) { - if (empty()) + if (empty(std::forward(policy))) { return; } if (!detail::is_destroy_optimizable()) { - const index_t current_size = size(); + const index_t current_size = size(std::forward(policy)); detail::unoptimized_destroy(std::forward(policy), stdgpu::device_begin(_data), @@ -536,9 +585,9 @@ vector::clear(ExecutionPolicy&& policy) _occupied.reset(std::forward(policy)); - _size.store(static_cast(0)); + _size.store(std::forward(policy), static_cast(0)); - STDGPU_ENSURES(empty()); + STDGPU_ENSURES(empty(std::forward(policy))); STDGPU_ENSURES(valid(std::forward(policy))); } @@ -561,13 +610,23 @@ vector::valid(ExecutionPolicy&& policy) const return true; } - return (size_valid() && occupied_count_valid(std::forward(policy)) && + return (size_valid(std::forward(policy)) && + occupied_count_valid(std::forward(policy)) && _locks.valid(std::forward(policy))); } template device_ptr vector::device_begin() +{ + return device_begin(execution::device); +} + +template +template >)> +device_ptr +vector::device_begin([[maybe_unused]] ExecutionPolicy&& policy) { return stdgpu::device_begin(_data); } @@ -576,12 +635,30 @@ template device_ptr vector::device_end() { - return device_begin() + size(); + return device_end(execution::device); +} + +template +template >)> +device_ptr +vector::device_end(ExecutionPolicy&& policy) +{ + return stdgpu::device_begin(_data) + size(std::forward(policy)); } template device_ptr vector::device_begin() const +{ + return device_begin(execution::device); +} + +template +template >)> +device_ptr +vector::device_begin([[maybe_unused]] ExecutionPolicy&& policy) const { return stdgpu::device_begin(_data); } @@ -590,12 +667,30 @@ template device_ptr vector::device_end() const { - return device_begin() + size(); + return device_end(execution::device); +} + +template +template >)> +device_ptr +vector::device_end(ExecutionPolicy&& policy) const +{ + return stdgpu::device_begin(_data) + size(std::forward(policy)); } template device_ptr vector::device_cbegin() const +{ + return device_cbegin(execution::device); +} + +template +template >)> +device_ptr +vector::device_cbegin([[maybe_unused]] ExecutionPolicy&& policy) const { return stdgpu::device_cbegin(_data); } @@ -604,21 +699,48 @@ template device_ptr vector::device_cend() const { - return device_cbegin() + size(); + return device_cend(execution::device); +} + +template +template >)> +device_ptr +vector::device_cend(ExecutionPolicy&& policy) const +{ + return stdgpu::device_cbegin(_data) + size(std::forward(policy)); } template stdgpu::device_range vector::device_range() { - return stdgpu::device_range(_data, size()); + return device_range(execution::device); +} + +template +template >)> +stdgpu::device_range +vector::device_range(ExecutionPolicy&& policy) +{ + return stdgpu::device_range(_data, size(std::forward(policy))); } template stdgpu::device_range vector::device_range() const { - return stdgpu::device_range(_data, size()); + return device_range(execution::device); +} + +template +template >)> +stdgpu::device_range +vector::device_range(ExecutionPolicy&& policy) const +{ + return stdgpu::device_range(_data, size(std::forward(policy))); } template @@ -636,17 +758,19 @@ template bool vector::occupied_count_valid(ExecutionPolicy&& policy) const { - index_t size_count = size(); + index_t size_count = size(std::forward(policy)); index_t size_sum = _occupied.count(std::forward(policy)); return (size_count == size_sum); } template +template >)> bool -vector::size_valid() const +vector::size_valid(ExecutionPolicy&& policy) const { - index_t current_size = static_cast(_size.load()); + index_t current_size = static_cast(_size.load(std::forward(policy))); return (0 <= current_size && current_size <= capacity()); } diff --git a/src/stdgpu/vector.cuh b/src/stdgpu/vector.cuh index 71a74c8c8..21262c396 100644 --- a/src/stdgpu/vector.cuh +++ b/src/stdgpu/vector.cuh @@ -290,6 +290,17 @@ public: [[nodiscard]] STDGPU_HOST_DEVICE bool empty() const; + /** + * \brief Checks if the object is empty + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return True if the object is empty, false otherwise + */ + template >)> + [[nodiscard]] bool + empty(ExecutionPolicy&& policy) const; + /** * \brief Checks if the object is full * \return True if the object is full, false otherwise @@ -297,6 +308,17 @@ public: STDGPU_HOST_DEVICE bool full() const; + /** + * \brief Checks if the object is full + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return True if the object is full, false otherwise + */ + template >)> + bool + full(ExecutionPolicy&& policy) const; + /** * \brief Returns the current size * \return The size @@ -304,6 +326,17 @@ public: STDGPU_HOST_DEVICE index_t size() const; + /** + * \brief Returns the current size + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return The size + */ + template >)> + index_t + size(ExecutionPolicy&& policy) const; + /** * \brief Returns the maximal size * \return The maximal size @@ -380,6 +413,17 @@ public: device_ptr device_begin(); + /** + * \brief Creates a pointer to the begin of the device container + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return A pointer to the begin of the object + */ + template >)> + device_ptr + device_begin(ExecutionPolicy&& policy); + /** * \brief Creates a pointer to the end of the device container * \return A pointer to the end of the object @@ -387,6 +431,17 @@ public: device_ptr device_end(); + /** + * \brief Creates a pointer to the end of the device container + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return A pointer to the end of the object + */ + template >)> + device_ptr + device_end(ExecutionPolicy&& policy); + /** * \brief Creates a pointer to the begin of the device container * \return A const pointer to the begin of the object @@ -394,6 +449,17 @@ public: device_ptr device_begin() const; + /** + * \brief Creates a pointer to the begin of the device container + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return A const pointer to the begin of the object + */ + template >)> + device_ptr + device_begin(ExecutionPolicy&& policy) const; + /** * \brief Creates a pointer to the end of the device container * \return A const pointer to the end of the object @@ -401,6 +467,17 @@ public: device_ptr device_end() const; + /** + * \brief Creates a pointer to the end of the device container + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return A const pointer to the end of the object + */ + template >)> + device_ptr + device_end(ExecutionPolicy&& policy) const; + /** * \brief Creates a pointer to the begin of the device container * \return A const pointer to the begin of the object @@ -408,6 +485,17 @@ public: device_ptr device_cbegin() const; + /** + * \brief Creates a pointer to the begin of the device container + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return A const pointer to the begin of the object + */ + template >)> + device_ptr + device_cbegin(ExecutionPolicy&& policy) const; + /** * \brief Creates a pointer to the end of the device container * \return A const pointer to the end of the object @@ -415,6 +503,17 @@ public: device_ptr device_cend() const; + /** + * \brief Creates a pointer to the end of the device container + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return A const pointer to the end of the object + */ + template >)> + device_ptr + device_cend(ExecutionPolicy&& policy) const; + /** * \brief Creates a range of the device container * \return A range of the object @@ -422,6 +521,17 @@ public: stdgpu::device_range device_range(); + /** + * \brief Creates a range of the device container + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return A range of the object + */ + template >)> + stdgpu::device_range + device_range(ExecutionPolicy&& policy); + /** * \brief Creates a range of the device container * \return A const range of the object @@ -429,6 +539,17 @@ public: stdgpu::device_range device_range() const; + /** + * \brief Creates a range of the device container + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return A const range of the object + */ + template >)> + stdgpu::device_range + device_range(ExecutionPolicy&& policy) const; + private: template friend class detail::vector_insert; @@ -447,8 +568,10 @@ private: bool occupied_count_valid(ExecutionPolicy&& policy) const; + template >)> bool - size_valid() const; + size_valid(ExecutionPolicy&& policy) const; using mutex_array_allocator_type = typename stdgpu::allocator_traits::template rebind_alloc; diff --git a/tests/stdgpu/vector.inc b/tests/stdgpu/vector.inc index ac5f81856..df4b9c3db 100644 --- a/tests/stdgpu/vector.inc +++ b/tests/stdgpu/vector.inc @@ -754,14 +754,14 @@ TEST_F(stdgpu_vector, insert_custom_execution_policy) int* values = createDeviceArray(N_insert); stdgpu::iota(policy, stdgpu::device_begin(values), stdgpu::device_end(values), static_cast(N_init) + 1); - pool.insert(policy, pool.device_end(), stdgpu::device_begin(values), stdgpu::device_end(values)); + pool.insert(policy, pool.device_end(policy), stdgpu::device_begin(values), stdgpu::device_end(values)); - ASSERT_EQ(pool.size(), N_init + N_insert); - ASSERT_FALSE(pool.empty()); - ASSERT_FALSE(pool.full()); - ASSERT_TRUE(pool.valid()); + ASSERT_EQ(pool.size(policy), N_init + N_insert); + ASSERT_FALSE(pool.empty(policy)); + ASSERT_FALSE(pool.full(policy)); + ASSERT_TRUE(pool.valid(policy)); - int* host_numbers = copyCreateDevice2HostArray(pool.data(), pool.size()); + int* host_numbers = copyCreateDevice2HostArray(pool.data(), pool.size(policy)); for (stdgpu::index_t i = 0; i < pool.size(); ++i) { EXPECT_EQ(host_numbers[i], i + 1); @@ -865,14 +865,14 @@ TEST_F(stdgpu_vector, erase_custom_execution_policy) fill_vector(pool, N_init); - pool.erase(policy, pool.device_end() - N_erase, pool.device_end()); + pool.erase(policy, pool.device_end(policy) - N_erase, pool.device_end(policy)); - ASSERT_EQ(pool.size(), N_init - N_erase); - ASSERT_FALSE(pool.empty()); - ASSERT_FALSE(pool.full()); - ASSERT_TRUE(pool.valid()); + ASSERT_EQ(pool.size(policy), N_init - N_erase); + ASSERT_FALSE(pool.empty(policy)); + ASSERT_FALSE(pool.full(policy)); + ASSERT_TRUE(pool.valid(policy)); - int* host_numbers = copyCreateDevice2HostArray(pool.data(), pool.size()); + int* host_numbers = copyCreateDevice2HostArray(pool.data(), pool.size(policy)); for (stdgpu::index_t i = 0; i < pool.size(); ++i) { EXPECT_EQ(host_numbers[i], i + 1); @@ -952,10 +952,10 @@ TEST_F(stdgpu_vector, clear_custom_execution_policy) pool.clear(policy); - ASSERT_EQ(pool.size(), 0); - ASSERT_TRUE(pool.empty()); - ASSERT_FALSE(pool.full()); - ASSERT_TRUE(pool.valid()); + ASSERT_EQ(pool.size(policy), 0); + ASSERT_TRUE(pool.empty(policy)); + ASSERT_FALSE(pool.full(policy)); + ASSERT_TRUE(pool.valid(policy)); stdgpu::vector::destroyDeviceObject(pool); }