Fix aggregatoin test

SC-SGS · Aug 23, 2023 · ced646f · ced646f
1 parent de53e04
commit ced646f
Show file tree

Hide file tree

Showing 4 changed files with 7 additions and 8 deletions.
diff --git a/include/aggregation_manager.hpp b/include/aggregation_manager.hpp
@@ -777,7 +777,7 @@ template <typename Executor> class Aggregated_Executor {
 #ifndef NDEBUG
         for (const auto &buffer_entry : buffer_allocations) {
           const auto &[buffer_pointer_any, buffer_size,
-                       buffer_allocation_counter, valid, location_id] =
+                       buffer_allocation_counter, valid, location_id, device_id] =
               buffer_entry;
           assert(!valid);
         }
@@ -908,7 +908,7 @@ template <typename Executor> class Aggregated_Executor {
 #ifndef NDEBUG
     for (const auto &buffer_entry : buffer_allocations) {
       const auto &[buffer_pointer_any, buffer_size, buffer_allocation_counter,
-                   valid, location_id] = buffer_entry;
+                   valid, location_id, device_id] = buffer_entry;
       assert(!valid);
     }
 #endif

diff --git a/include/detail/config.hpp b/include/detail/config.hpp
@@ -62,7 +62,7 @@ static_assert(max_number_gpus > 0);
 /// Uses HPX thread information to determine which GPU should be used
 inline size_t get_device_id(const size_t number_gpus) {
 #if defined(CPPUDDLE_HAVE_HPX) 
-    assert(number_gpus < max_number_gpus);
+    assert(number_gpus <= max_number_gpus);
     return hpx::get_worker_thread_num() % number_gpus; 
 #else
     return 0;

diff --git a/tests/work_aggregation_cuda_triad.cpp b/tests/work_aggregation_cuda_triad.cpp
@@ -28,7 +28,6 @@ __global__ void __launch_bounds__(1024, 2) triad_kernel(float_t *A, const float_
 //===============================================================================
 //===============================================================================
 int hpx_main(int argc, char *argv[]) {
-  static_assert(max_number_gpus == 1, "This test currently does not support MultiGPU builds!");
   // Init parameters
   size_t problem_size{0};
   size_t kernel_size{0};
@@ -209,23 +208,23 @@ int hpx_main(int argc, char *argv[]) {
 
                   recycler::cuda_aggregated_device_buffer<float_t,
                                                           decltype(alloc_device)>
-                      device_A(slice_exec.number_slices * kernel_size, 0,
+                      device_A(slice_exec.number_slices * kernel_size, 
                                alloc_device);
 
                   std::vector<float_t, decltype(alloc_host)> local_B(
                       slice_exec.number_slices * kernel_size, float_t{},
                       alloc_host);
                   recycler::cuda_aggregated_device_buffer<float_t,
                                                           decltype(alloc_device)>
-                      device_B(slice_exec.number_slices * kernel_size, 0,
+                      device_B(slice_exec.number_slices * kernel_size, 
                                alloc_device);
 
                   std::vector<float_t, decltype(alloc_host)> local_C(
                       slice_exec.number_slices * kernel_size, float_t{},
                       alloc_host);
                   recycler::cuda_aggregated_device_buffer<float_t,
                                                           decltype(alloc_device)>
-                      device_C(slice_exec.number_slices * kernel_size, 0,
+                      device_C(slice_exec.number_slices * kernel_size,
                                alloc_device);
 
                   for (size_t i = task_id * kernel_size, j = 0;

diff --git a/tests/work_aggregation_test.cpp b/tests/work_aggregation_test.cpp
@@ -605,7 +605,7 @@ void references_add_test(void) {
     auto &agg_exec =
         std::get<0>(stream_pool::get_interface<
                     Aggregated_Executor<Dummy_Executor>,
-                    round_robin_pool<Aggregated_Executor<Dummy_Executor>>>());
+                    round_robin_pool<Aggregated_Executor<Dummy_Executor>>>(0));
     std::vector<float> erg(512);
     std::vector<hpx::lcos::future<void>> slices_done_futs;