Skip to content

Commit

Permalink
WIP, add gpu_id to stream managers
Browse files Browse the repository at this point in the history
  • Loading branch information
G-071 committed Aug 18, 2023
1 parent 7880d4b commit f3d71c8
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 6 deletions.
24 changes: 21 additions & 3 deletions include/aggregation_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#ifndef WORK_AGGREGATION_MANAGER
#define WORK_AGGREGATION_MANAGER

#include <stdexcept>
#define DEBUG_AGGREGATION_CALLS 1

#include <stdio.h>
Expand Down Expand Up @@ -1014,8 +1015,23 @@ class aggregation_pool {
/// interface
template <typename... Ts>
static void init(size_t number_of_executors, size_t slices_per_executor,
Aggregated_Executor_Modes mode) {
for (size_t gpu_id = 0; gpu_id < max_number_gpus; gpu_id++) {
Aggregated_Executor_Modes mode, std::optional<size_t> overwrite_number_devices) {
if (is_initialized) {
throw std::runtime_error(
std::string("Trying to initialize cppuddle aggregation pool twice") +
" Kernel: " + std::string(kernelname));
}
if (number_devices) {
if (*overwrite_number_devices > max_number_gpus) {
throw std::runtime_error(
std::string(
"Trying to initialize aggregation with more devices than the "
"maximum number of GPUs given at compiletime") +
" Kernel: " + std::string(kernelname));
}
number_devices = *overwrite_number_devices;
}
for (size_t gpu_id = 0; gpu_id < number_devices; gpu_id++) {
std::lock_guard<aggregation_mutex_t> guard(instance()[gpu_id].pool_mutex);
assert(instance()[gpu_id].aggregation_executor_pool.empty());
for (int i = 0; i < number_of_executors; i++) {
Expand All @@ -1029,7 +1045,7 @@ class aggregation_pool {

/// Will always return a valid executor slice
static decltype(auto) request_executor_slice(void) {
const size_t gpu_id = get_device_id();
const size_t gpu_id = get_device_id(number_devices);
/* const size_t gpu_id = 1; */
std::lock_guard<aggregation_mutex_t> guard(instance()[gpu_id].pool_mutex);
assert(!instance()[gpu_id].aggregation_executor_pool.empty());
Expand Down Expand Up @@ -1090,6 +1106,8 @@ class aggregation_pool {
new aggregation_pool[max_number_gpus]};
return pool_instances;
}
static inline size_t number_devices = max_number_gpus;
static inline bool is_initialized = false;
aggregation_pool() = default;

public:
Expand Down
6 changes: 3 additions & 3 deletions include/detail/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@ static_assert(number_instances > 0);
//constexpr size_t instances_per_gpu = number_instances / max_number_gpus;

/// Uses HPX thread information to determine which GPU should be used
inline size_t get_device_id(void) {
inline size_t get_device_id(const size_t number_gpus) {
#if defined(CPPUDDLE_HAVE_HPX)
//return hpx::get_worker_thread_num() / max_num_gpus;
return 0;
assert(number_gpus < max_number_gpus);
return hpx::get_worker_thread_num() / number_gpus;
#else
return 0;
#endif
Expand Down

0 comments on commit f3d71c8

Please sign in to comment.