From a2e132caa5c89cef4bbbdf894ccc64e9137d04e6 Mon Sep 17 00:00:00 2001 From: Patrick Stotko Date: Wed, 20 Nov 2024 08:23:03 +0100 Subject: [PATCH] unordered_map,unordered_set: Extend support for custom execution policies --- src/stdgpu/impl/unordered_base.cuh | 44 +++++++++++++ src/stdgpu/impl/unordered_base_detail.cuh | 76 +++++++++++++++++++---- src/stdgpu/impl/unordered_map_detail.cuh | 36 +++++++++++ src/stdgpu/impl/unordered_set_detail.cuh | 36 +++++++++++ src/stdgpu/unordered_map.cuh | 44 +++++++++++++ src/stdgpu/unordered_set.cuh | 44 +++++++++++++ tests/stdgpu/unordered_datastructure.inc | 28 ++++++--- 7 files changed, 288 insertions(+), 20 deletions(-) diff --git a/src/stdgpu/impl/unordered_base.cuh b/src/stdgpu/impl/unordered_base.cuh index ca6b538e4..db859582b 100644 --- a/src/stdgpu/impl/unordered_base.cuh +++ b/src/stdgpu/impl/unordered_base.cuh @@ -408,6 +408,17 @@ public: [[nodiscard]] STDGPU_HOST_DEVICE bool empty() const; + /** + * \brief Checks if the object is empty + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return True if the object is empty, false otherwise + */ + template >)> + [[nodiscard]] bool + empty(ExecutionPolicy&& policy) const; + /** * \brief Checks if the object is full * \return True if the object is full, false otherwise @@ -415,6 +426,17 @@ public: STDGPU_HOST_DEVICE bool full() const; + /** + * \brief Checks if the object is full + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return True if the object is full, false otherwise + */ + template >)> + bool + full(ExecutionPolicy&& policy) const; + /** * \brief The size * \return The size of the object @@ -422,6 +444,17 @@ public: STDGPU_HOST_DEVICE index_t size() const; + /** + * \brief The size + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return The size of the object + */ + template >)> + index_t + size(ExecutionPolicy&& policy) const; + /** * \brief The maximum size * \return The maximum size @@ -443,6 +476,17 @@ public: STDGPU_HOST_DEVICE float load_factor() const; + /** + * \brief The average number of elements per bucket + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return The average number of elements per bucket + */ + template >)> + float + load_factor(ExecutionPolicy&& policy) const; + /** * \brief The maximum number of elements per bucket * \return The maximum number of elements per bucket diff --git a/src/stdgpu/impl/unordered_base_detail.cuh b/src/stdgpu/impl/unordered_base_detail.cuh index dfc54ceeb..e5e350053 100644 --- a/src/stdgpu/impl/unordered_base_detail.cuh +++ b/src/stdgpu/impl/unordered_base_detail.cuh @@ -168,14 +168,16 @@ template ::value_type> unordered_base::device_range(ExecutionPolicy&& policy) { - _range_indices_end.store(0); + _range_indices_end.store(std::forward(policy), 0); for_each_index(std::forward(policy), total_count(), unordered_base_collect_positions(*this)); - return device_indexed_range(stdgpu::device_range(_range_indices, _range_indices_end.load()), - _values); + return device_indexed_range( + stdgpu::device_range(_range_indices, + _range_indices_end.load(std::forward(policy))), + _values); } template @@ -191,14 +193,15 @@ template ::value_type> unordered_base::device_range(ExecutionPolicy&& policy) const { - _range_indices_end.store(0); + _range_indices_end.store(std::forward(policy), 0); for_each_index(std::forward(policy), total_count(), unordered_base_collect_positions(*this)); return device_indexed_range( - stdgpu::device_range(_range_indices, _range_indices_end.load()), + stdgpu::device_range(_range_indices, + _range_indices_end.load(std::forward(policy))), _values); } @@ -318,7 +321,13 @@ template & base) { - int* flags = createDeviceArray(base.total_count(), 0); + using flags_allocator_type = typename stdgpu::allocator_traits::template rebind_alloc; + flags_allocator_type flags_allocator = flags_allocator_type(base.get_allocator()); + + int* flags = allocator_traits::allocate_filled(std::forward(policy), + flags_allocator, + base.total_count(), + 0); for_each_index(std::forward(policy), base.bucket_count(), @@ -330,7 +339,10 @@ loop_free(ExecutionPolicy&& policy, const unordered_base(), less_equal_one(flags)); - destroyDeviceArray(flags); + allocator_traits::deallocate_filled(std::forward(policy), + flags_allocator, + flags, + base.total_count()); return result; } @@ -447,7 +459,7 @@ inline bool occupied_count_valid(ExecutionPolicy&& policy, const unordered_base& base) { - index_t size_count = base.size(); + index_t size_count = base.size(std::forward(policy)); index_t size_sum = base._occupied.count(std::forward(policy)); return (size_count == size_sum); @@ -1059,6 +1071,15 @@ unordered_base::empty() con return (size() == 0); } +template +template >)> +inline bool +unordered_base::empty(ExecutionPolicy&& policy) const +{ + return (size(std::forward(policy)) == 0); +} + template inline STDGPU_HOST_DEVICE bool unordered_base::full() const @@ -1066,6 +1087,15 @@ unordered_base::full() cons return (size() == total_count()); } +template +template >)> +inline bool +unordered_base::full(ExecutionPolicy&& policy) const +{ + return (size(std::forward(policy)) == total_count()); +} + template inline STDGPU_HOST_DEVICE index_t unordered_base::size() const @@ -1077,6 +1107,19 @@ unordered_base::size() cons return current_size; } +template +template >)> +inline index_t +unordered_base::size(ExecutionPolicy&& policy) const +{ + index_t current_size = _occupied_count.load(std::forward(policy)); + + STDGPU_ENSURES(0 <= current_size); + STDGPU_ENSURES(current_size <= total_count()); + return current_size; +} + template inline STDGPU_HOST_DEVICE index_t unordered_base::max_size() const noexcept @@ -1105,6 +1148,15 @@ unordered_base::load_factor return static_cast(size()) / static_cast(bucket_count()); } +template +template >)> +inline float +unordered_base::load_factor(ExecutionPolicy&& policy) const +{ + return static_cast(size(std::forward(policy))) / static_cast(bucket_count()); +} + template inline STDGPU_HOST_DEVICE float unordered_base::max_load_factor() const @@ -1167,7 +1219,7 @@ template ::clear(ExecutionPolicy&& policy) { - if (empty()) + if (empty(std::forward(policy))) { return; } @@ -1183,7 +1235,7 @@ unordered_base::clear(Execu _occupied.reset(std::forward(policy)); - _occupied_count.store(0); + _occupied_count.store(std::forward(policy), 0); detail::vector_clear_iota(std::forward(policy), _excess_list_positions, bucket_count()); } @@ -1239,7 +1291,7 @@ unordered_base::createDevic detail::vector_clear_iota(std::forward(policy), result._excess_list_positions, bucket_count); - STDGPU_ENSURES(result._excess_list_positions.full()); + STDGPU_ENSURES(result._excess_list_positions.full(std::forward(policy))); return result; } @@ -1254,7 +1306,7 @@ unordered_base::destroyDevi { if (!detail::is_destroy_optimizable()) { - device_object.clear(); + device_object.clear(std::forward(policy)); } device_object._bucket_count = 0; diff --git a/src/stdgpu/impl/unordered_map_detail.cuh b/src/stdgpu/impl/unordered_map_detail.cuh index 5ff3247eb..5af92c43a 100644 --- a/src/stdgpu/impl/unordered_map_detail.cuh +++ b/src/stdgpu/impl/unordered_map_detail.cuh @@ -270,6 +270,15 @@ unordered_map::empty() const return _base.empty(); } +template +template >)> +inline bool +unordered_map::empty(ExecutionPolicy&& policy) const +{ + return _base.empty(std::forward(policy)); +} + template inline STDGPU_HOST_DEVICE bool unordered_map::full() const @@ -277,6 +286,15 @@ unordered_map::full() const return _base.full(); } +template +template >)> +inline bool +unordered_map::full(ExecutionPolicy&& policy) const +{ + return _base.full(std::forward(policy)); +} + template inline STDGPU_HOST_DEVICE index_t unordered_map::size() const @@ -284,6 +302,15 @@ unordered_map::size() const return _base.size(); } +template +template >)> +inline index_t +unordered_map::size(ExecutionPolicy&& policy) const +{ + return _base.size(std::forward(policy)); +} + template inline STDGPU_HOST_DEVICE index_t unordered_map::max_size() const noexcept @@ -305,6 +332,15 @@ unordered_map::load_factor() const return _base.load_factor(); } +template +template >)> +inline float +unordered_map::load_factor(ExecutionPolicy&& policy) const +{ + return _base.load_factor(std::forward(policy)); +} + template inline STDGPU_HOST_DEVICE float unordered_map::max_load_factor() const diff --git a/src/stdgpu/impl/unordered_set_detail.cuh b/src/stdgpu/impl/unordered_set_detail.cuh index 56989f4aa..c6ba8a3cc 100644 --- a/src/stdgpu/impl/unordered_set_detail.cuh +++ b/src/stdgpu/impl/unordered_set_detail.cuh @@ -252,6 +252,15 @@ unordered_set::empty() const return _base.empty(); } +template +template >)> +inline bool +unordered_set::empty(ExecutionPolicy&& policy) const +{ + return _base.empty(std::forward(policy)); +} + template inline STDGPU_HOST_DEVICE bool unordered_set::full() const @@ -259,6 +268,15 @@ unordered_set::full() const return _base.full(); } +template +template >)> +inline bool +unordered_set::full(ExecutionPolicy&& policy) const +{ + return _base.full(std::forward(policy)); +} + template inline STDGPU_HOST_DEVICE index_t unordered_set::size() const @@ -266,6 +284,15 @@ unordered_set::size() const return _base.size(); } +template +template >)> +inline index_t +unordered_set::size(ExecutionPolicy&& policy) const +{ + return _base.size(std::forward(policy)); +} + template inline STDGPU_HOST_DEVICE index_t unordered_set::max_size() const noexcept @@ -287,6 +314,15 @@ unordered_set::load_factor() const return _base.load_factor(); } +template +template >)> +inline float +unordered_set::load_factor(ExecutionPolicy&& policy) const +{ + return _base.load_factor(std::forward(policy)); +} + template inline STDGPU_HOST_DEVICE float unordered_set::max_load_factor() const diff --git a/src/stdgpu/unordered_map.cuh b/src/stdgpu/unordered_map.cuh index a019f5ebb..d29cb91c6 100644 --- a/src/stdgpu/unordered_map.cuh +++ b/src/stdgpu/unordered_map.cuh @@ -428,6 +428,17 @@ public: [[nodiscard]] STDGPU_HOST_DEVICE bool empty() const; + /** + * \brief Checks if the object is empty + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return True if the object is empty, false otherwise + */ + template >)> + [[nodiscard]] bool + empty(ExecutionPolicy&& policy) const; + /** * \brief Checks if the object is full * \return True if the object is full, false otherwise @@ -435,6 +446,17 @@ public: STDGPU_HOST_DEVICE bool full() const; + /** + * \brief Checks if the object is full + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return True if the object is full, false otherwise + */ + template >)> + bool + full(ExecutionPolicy&& policy) const; + /** * \brief The size * \return The size of the object @@ -442,6 +464,17 @@ public: STDGPU_HOST_DEVICE index_t size() const; + /** + * \brief The size + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return The size of the object + */ + template >)> + index_t + size(ExecutionPolicy&& policy) const; + /** * \brief The maximum size * \return The maximum size @@ -463,6 +496,17 @@ public: STDGPU_HOST_DEVICE float load_factor() const; + /** + * \brief The average number of elements per bucket + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return The average number of elements per bucket + */ + template >)> + float + load_factor(ExecutionPolicy&& policy) const; + /** * \brief The maximum number of elements per bucket * \return The maximum number of elements per bucket diff --git a/src/stdgpu/unordered_set.cuh b/src/stdgpu/unordered_set.cuh index 5a2277f77..d8990cd90 100644 --- a/src/stdgpu/unordered_set.cuh +++ b/src/stdgpu/unordered_set.cuh @@ -417,6 +417,17 @@ public: [[nodiscard]] STDGPU_HOST_DEVICE bool empty() const; + /** + * \brief Checks if the object is empty + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return True if the object is empty, false otherwise + */ + template >)> + [[nodiscard]] bool + empty(ExecutionPolicy&& policy) const; + /** * \brief Checks if the object is full * \return True if the object is full, false otherwise @@ -424,6 +435,17 @@ public: STDGPU_HOST_DEVICE bool full() const; + /** + * \brief Checks if the object is full + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return True if the object is full, false otherwise + */ + template >)> + bool + full(ExecutionPolicy&& policy) const; + /** * \brief The size * \return The size of the object @@ -431,6 +453,17 @@ public: STDGPU_HOST_DEVICE index_t size() const; + /** + * \brief The size + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return The size of the object + */ + template >)> + index_t + size(ExecutionPolicy&& policy) const; + /** * \brief The maximum size * \return The maximum size @@ -452,6 +485,17 @@ public: STDGPU_HOST_DEVICE float load_factor() const; + /** + * \brief The average number of elements per bucket + * \tparam ExecutionPolicy The type of the execution policy + * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator + * \return The average number of elements per bucket + */ + template >)> + float + load_factor(ExecutionPolicy&& policy) const; + /** * \brief The maximum number of elements per bucket * \return The maximum number of elements per bucket diff --git a/tests/stdgpu/unordered_datastructure.inc b/tests/stdgpu/unordered_datastructure.inc index fe4b9c425..c0f1c6436 100644 --- a/tests/stdgpu/unordered_datastructure.inc +++ b/tests/stdgpu/unordered_datastructure.inc @@ -248,10 +248,22 @@ TEST_F(STDGPU_UNORDERED_DATASTRUCTURE_TEST_CLASS, bucket_number_collisions) TEST_F(STDGPU_UNORDERED_DATASTRUCTURE_TEST_CLASS, empty_size_limits) { + EXPECT_TRUE(hash_datastructure.empty()); + EXPECT_FALSE(hash_datastructure.full()); EXPECT_LE(hash_datastructure.size(), hash_datastructure.max_size()); EXPECT_LE(hash_datastructure.load_factor(), hash_datastructure.max_load_factor()); } +TEST_F(STDGPU_UNORDERED_DATASTRUCTURE_TEST_CLASS, empty_size_limits_custom_execution_policy) +{ + stdgpu::execution::device_policy policy; + + EXPECT_TRUE(hash_datastructure.empty(policy)); + EXPECT_FALSE(hash_datastructure.full(policy)); + EXPECT_LE(hash_datastructure.size(policy), hash_datastructure.max_size()); + EXPECT_LE(hash_datastructure.load_factor(policy), hash_datastructure.max_load_factor()); +} + TEST_F(STDGPU_UNORDERED_DATASTRUCTURE_TEST_CLASS, hash_objects) { test_unordered_datastructure::key_equal key_equals = hash_datastructure.key_eq(); @@ -1935,8 +1947,8 @@ TEST_F(STDGPU_UNORDERED_DATASTRUCTURE_TEST_CLASS, insert_range_unique_parallel_c stdgpu::device_ptr values_end = stdgpu::device_end(values); hash_datastructure.insert(policy, values_begin, values_end); - EXPECT_FALSE(hash_datastructure.empty()); - EXPECT_EQ(hash_datastructure.size(), N); + EXPECT_FALSE(hash_datastructure.empty(policy)); + EXPECT_EQ(hash_datastructure.size(policy), N); EXPECT_TRUE(hash_datastructure.valid(policy)); destroyDeviceArray(values); @@ -2017,16 +2029,16 @@ TEST_F(STDGPU_UNORDERED_DATASTRUCTURE_TEST_CLASS, erase_range_unique_parallel_cu stdgpu::device_ptr values_end = stdgpu::device_end(values); hash_datastructure.insert(policy, values_begin, values_end); - EXPECT_FALSE(hash_datastructure.empty()); - EXPECT_EQ(hash_datastructure.size(), N); - EXPECT_TRUE(hash_datastructure.valid()); + EXPECT_FALSE(hash_datastructure.empty(policy)); + EXPECT_EQ(hash_datastructure.size(policy), N); + EXPECT_TRUE(hash_datastructure.valid(policy)); stdgpu::device_ptr positions_begin = stdgpu::device_begin(positions); stdgpu::device_ptr positions_end = stdgpu::device_end(positions); hash_datastructure.erase(policy, positions_begin, positions_end); - EXPECT_TRUE(hash_datastructure.empty()); - EXPECT_EQ(hash_datastructure.size(), 0); + EXPECT_TRUE(hash_datastructure.empty(policy)); + EXPECT_EQ(hash_datastructure.size(policy), 0); EXPECT_TRUE(hash_datastructure.valid(policy)); destroyDeviceArray(values); @@ -2489,7 +2501,7 @@ TEST_F(STDGPU_UNORDERED_DATASTRUCTURE_TEST_CLASS, clear_custom_execution_policy) hash_datastructure.clear(policy); - EXPECT_EQ(hash_datastructure.size(), 0); + EXPECT_EQ(hash_datastructure.size(policy), 0); EXPECT_TRUE(hash_datastructure.valid(policy)); destroyHostArray(host_positions);