Skip to content

Commit

Permalink
unordered_map,unordered_set: Extend support for custom execution poli…
Browse files Browse the repository at this point in the history
…cies
  • Loading branch information
stotko committed Nov 20, 2024
1 parent 4907180 commit a2e132c
Show file tree
Hide file tree
Showing 7 changed files with 288 additions and 20 deletions.
44 changes: 44 additions & 0 deletions src/stdgpu/impl/unordered_base.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -408,20 +408,53 @@ public:
[[nodiscard]] STDGPU_HOST_DEVICE bool
empty() const;

/**
* \brief Checks if the object is empty
* \tparam ExecutionPolicy The type of the execution policy
* \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator
* \return True if the object is empty, false otherwise
*/
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
[[nodiscard]] bool
empty(ExecutionPolicy&& policy) const;

/**
* \brief Checks if the object is full
* \return True if the object is full, false otherwise
*/
STDGPU_HOST_DEVICE bool
full() const;

/**
* \brief Checks if the object is full
* \tparam ExecutionPolicy The type of the execution policy
* \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator
* \return True if the object is full, false otherwise
*/
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
bool
full(ExecutionPolicy&& policy) const;

/**
* \brief The size
* \return The size of the object
*/
STDGPU_HOST_DEVICE index_t
size() const;

/**
* \brief The size
* \tparam ExecutionPolicy The type of the execution policy
* \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator
* \return The size of the object
*/
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
index_t
size(ExecutionPolicy&& policy) const;

/**
* \brief The maximum size
* \return The maximum size
Expand All @@ -443,6 +476,17 @@ public:
STDGPU_HOST_DEVICE float
load_factor() const;

/**
* \brief The average number of elements per bucket
* \tparam ExecutionPolicy The type of the execution policy
* \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator
* \return The average number of elements per bucket
*/
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
float
load_factor(ExecutionPolicy&& policy) const;

/**
* \brief The maximum number of elements per bucket
* \return The maximum number of elements per bucket
Expand Down
76 changes: 64 additions & 12 deletions src/stdgpu/impl/unordered_base_detail.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -168,14 +168,16 @@ template <typename ExecutionPolicy,
device_indexed_range<typename unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::value_type>
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::device_range(ExecutionPolicy&& policy)
{
_range_indices_end.store(0);
_range_indices_end.store(std::forward<ExecutionPolicy>(policy), 0);

for_each_index(std::forward<ExecutionPolicy>(policy),
total_count(),
unordered_base_collect_positions<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>(*this));

return device_indexed_range<value_type>(stdgpu::device_range<index_t>(_range_indices, _range_indices_end.load()),
_values);
return device_indexed_range<value_type>(
stdgpu::device_range<index_t>(_range_indices,
_range_indices_end.load(std::forward<ExecutionPolicy>(policy))),
_values);
}

template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual, typename Allocator>
Expand All @@ -191,14 +193,15 @@ template <typename ExecutionPolicy,
device_indexed_range<const typename unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::value_type>
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::device_range(ExecutionPolicy&& policy) const
{
_range_indices_end.store(0);
_range_indices_end.store(std::forward<ExecutionPolicy>(policy), 0);

for_each_index(std::forward<ExecutionPolicy>(policy),
total_count(),
unordered_base_collect_positions<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>(*this));

return device_indexed_range<const value_type>(
stdgpu::device_range<index_t>(_range_indices, _range_indices_end.load()),
stdgpu::device_range<index_t>(_range_indices,
_range_indices_end.load(std::forward<ExecutionPolicy>(policy))),
_values);
}

Expand Down Expand Up @@ -318,7 +321,13 @@ template <typename ExecutionPolicy,
inline bool
loop_free(ExecutionPolicy&& policy, const unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>& base)
{
int* flags = createDeviceArray<int>(base.total_count(), 0);
using flags_allocator_type = typename stdgpu::allocator_traits<Allocator>::template rebind_alloc<int>;
flags_allocator_type flags_allocator = flags_allocator_type(base.get_allocator());

int* flags = allocator_traits<flags_allocator_type>::allocate_filled(std::forward<ExecutionPolicy>(policy),
flags_allocator,
base.total_count(),
0);

for_each_index(std::forward<ExecutionPolicy>(policy),
base.bucket_count(),
Expand All @@ -330,7 +339,10 @@ loop_free(ExecutionPolicy&& policy, const unordered_base<Key, Value, KeyFromValu
logical_and<>(),
less_equal_one(flags));

destroyDeviceArray<int>(flags);
allocator_traits<flags_allocator_type>::deallocate_filled(std::forward<ExecutionPolicy>(policy),
flags_allocator,
flags,
base.total_count());

return result;
}
Expand Down Expand Up @@ -447,7 +459,7 @@ inline bool
occupied_count_valid(ExecutionPolicy&& policy,
const unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>& base)
{
index_t size_count = base.size();
index_t size_count = base.size(std::forward<ExecutionPolicy>(policy));
index_t size_sum = base._occupied.count(std::forward<ExecutionPolicy>(policy));

return (size_count == size_sum);
Expand Down Expand Up @@ -1059,13 +1071,31 @@ unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::empty() con
return (size() == 0);
}

template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual, typename Allocator>
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
inline bool
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::empty(ExecutionPolicy&& policy) const
{
return (size(std::forward<ExecutionPolicy>(policy)) == 0);
}

template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual, typename Allocator>
inline STDGPU_HOST_DEVICE bool
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::full() const
{
return (size() == total_count());
}

template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual, typename Allocator>
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
inline bool
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::full(ExecutionPolicy&& policy) const
{
return (size(std::forward<ExecutionPolicy>(policy)) == total_count());
}

template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual, typename Allocator>
inline STDGPU_HOST_DEVICE index_t
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::size() const
Expand All @@ -1077,6 +1107,19 @@ unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::size() cons
return current_size;
}

template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual, typename Allocator>
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
inline index_t
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::size(ExecutionPolicy&& policy) const
{
index_t current_size = _occupied_count.load(std::forward<ExecutionPolicy>(policy));

STDGPU_ENSURES(0 <= current_size);
STDGPU_ENSURES(current_size <= total_count());
return current_size;
}

template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual, typename Allocator>
inline STDGPU_HOST_DEVICE index_t
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::max_size() const noexcept
Expand Down Expand Up @@ -1105,6 +1148,15 @@ unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::load_factor
return static_cast<float>(size()) / static_cast<float>(bucket_count());
}

template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual, typename Allocator>
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
inline float
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::load_factor(ExecutionPolicy&& policy) const
{
return static_cast<float>(size(std::forward<ExecutionPolicy>(policy))) / static_cast<float>(bucket_count());
}

template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual, typename Allocator>
inline STDGPU_HOST_DEVICE float
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::max_load_factor() const
Expand Down Expand Up @@ -1167,7 +1219,7 @@ template <typename ExecutionPolicy,
void
unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::clear(ExecutionPolicy&& policy)
{
if (empty())
if (empty(std::forward<ExecutionPolicy>(policy)))
{
return;
}
Expand All @@ -1183,7 +1235,7 @@ unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::clear(Execu

_occupied.reset(std::forward<ExecutionPolicy>(policy));

_occupied_count.store(0);
_occupied_count.store(std::forward<ExecutionPolicy>(policy), 0);

detail::vector_clear_iota(std::forward<ExecutionPolicy>(policy), _excess_list_positions, bucket_count());
}
Expand Down Expand Up @@ -1239,7 +1291,7 @@ unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::createDevic

detail::vector_clear_iota(std::forward<ExecutionPolicy>(policy), result._excess_list_positions, bucket_count);

STDGPU_ENSURES(result._excess_list_positions.full());
STDGPU_ENSURES(result._excess_list_positions.full(std::forward<ExecutionPolicy>(policy)));

return result;
}
Expand All @@ -1254,7 +1306,7 @@ unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::destroyDevi
{
if (!detail::is_destroy_optimizable<value_type>())
{
device_object.clear();
device_object.clear(std::forward<ExecutionPolicy>(policy));
}

device_object._bucket_count = 0;
Expand Down
36 changes: 36 additions & 0 deletions src/stdgpu/impl/unordered_map_detail.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -270,20 +270,47 @@ unordered_map<Key, T, Hash, KeyEqual, Allocator>::empty() const
return _base.empty();
}

template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
inline bool
unordered_map<Key, T, Hash, KeyEqual, Allocator>::empty(ExecutionPolicy&& policy) const
{
return _base.empty(std::forward<ExecutionPolicy>(policy));
}

template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
inline STDGPU_HOST_DEVICE bool
unordered_map<Key, T, Hash, KeyEqual, Allocator>::full() const
{
return _base.full();
}

template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
inline bool
unordered_map<Key, T, Hash, KeyEqual, Allocator>::full(ExecutionPolicy&& policy) const
{
return _base.full(std::forward<ExecutionPolicy>(policy));
}

template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
inline STDGPU_HOST_DEVICE index_t
unordered_map<Key, T, Hash, KeyEqual, Allocator>::size() const
{
return _base.size();
}

template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
inline index_t
unordered_map<Key, T, Hash, KeyEqual, Allocator>::size(ExecutionPolicy&& policy) const
{
return _base.size(std::forward<ExecutionPolicy>(policy));
}

template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
inline STDGPU_HOST_DEVICE index_t
unordered_map<Key, T, Hash, KeyEqual, Allocator>::max_size() const noexcept
Expand All @@ -305,6 +332,15 @@ unordered_map<Key, T, Hash, KeyEqual, Allocator>::load_factor() const
return _base.load_factor();
}

template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
inline float
unordered_map<Key, T, Hash, KeyEqual, Allocator>::load_factor(ExecutionPolicy&& policy) const
{
return _base.load_factor(std::forward<ExecutionPolicy>(policy));
}

template <typename Key, typename T, typename Hash, typename KeyEqual, typename Allocator>
inline STDGPU_HOST_DEVICE float
unordered_map<Key, T, Hash, KeyEqual, Allocator>::max_load_factor() const
Expand Down
36 changes: 36 additions & 0 deletions src/stdgpu/impl/unordered_set_detail.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -252,20 +252,47 @@ unordered_set<Key, Hash, KeyEqual, Allocator>::empty() const
return _base.empty();
}

template <typename Key, typename Hash, typename KeyEqual, typename Allocator>
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
inline bool
unordered_set<Key, Hash, KeyEqual, Allocator>::empty(ExecutionPolicy&& policy) const
{
return _base.empty(std::forward<ExecutionPolicy>(policy));
}

template <typename Key, typename Hash, typename KeyEqual, typename Allocator>
inline STDGPU_HOST_DEVICE bool
unordered_set<Key, Hash, KeyEqual, Allocator>::full() const
{
return _base.full();
}

template <typename Key, typename Hash, typename KeyEqual, typename Allocator>
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
inline bool
unordered_set<Key, Hash, KeyEqual, Allocator>::full(ExecutionPolicy&& policy) const
{
return _base.full(std::forward<ExecutionPolicy>(policy));
}

template <typename Key, typename Hash, typename KeyEqual, typename Allocator>
inline STDGPU_HOST_DEVICE index_t
unordered_set<Key, Hash, KeyEqual, Allocator>::size() const
{
return _base.size();
}

template <typename Key, typename Hash, typename KeyEqual, typename Allocator>
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
inline index_t
unordered_set<Key, Hash, KeyEqual, Allocator>::size(ExecutionPolicy&& policy) const
{
return _base.size(std::forward<ExecutionPolicy>(policy));
}

template <typename Key, typename Hash, typename KeyEqual, typename Allocator>
inline STDGPU_HOST_DEVICE index_t
unordered_set<Key, Hash, KeyEqual, Allocator>::max_size() const noexcept
Expand All @@ -287,6 +314,15 @@ unordered_set<Key, Hash, KeyEqual, Allocator>::load_factor() const
return _base.load_factor();
}

template <typename Key, typename Hash, typename KeyEqual, typename Allocator>
template <typename ExecutionPolicy,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(is_execution_policy_v<remove_cvref_t<ExecutionPolicy>>)>
inline float
unordered_set<Key, Hash, KeyEqual, Allocator>::load_factor(ExecutionPolicy&& policy) const
{
return _base.load_factor(std::forward<ExecutionPolicy>(policy));
}

template <typename Key, typename Hash, typename KeyEqual, typename Allocator>
inline STDGPU_HOST_DEVICE float
unordered_set<Key, Hash, KeyEqual, Allocator>::max_load_factor() const
Expand Down
Loading

0 comments on commit a2e132c

Please sign in to comment.