diff --git a/src/interface/swarm.cpp b/src/interface/swarm.cpp index 294338dee0d2..83acaa2aaeb6 100644 --- a/src/interface/swarm.cpp +++ b/src/interface/swarm.cpp @@ -33,6 +33,7 @@ SwarmDeviceContext Swarm::GetDeviceContext() const { context.block_index_ = block_index_; context.neighbor_indices_ = neighbor_indices_; context.cell_sorted_ = cell_sorted_; + context.buffer_sorted_ = buffer_sorted_; context.cell_sorted_begin_ = cell_sorted_begin_; context.cell_sorted_number_ = cell_sorted_number_; @@ -73,9 +74,10 @@ Swarm::Swarm(const std::string &label, const Metadata &metadata, const int nmax_ new_indices_("new_indices_", nmax_pool_), scratch_a_("scratch_a_", nmax_pool_), scratch_b_("scratch_b_", nmax_pool_), num_particles_to_send_("num_particles_to_send_", NMAX_NEIGHBORS), - buffer_counters_("buffer_counters_", NMAX_NEIGHBORS), + buffer_start("buffer_start_", NMAX_NEIGHBORS), neighbor_received_particles_("neighbor_received_particles_", NMAX_NEIGHBORS), - cell_sorted_("cell_sorted_", nmax_pool_), mpiStatus(true) { + cell_sorted_("cell_sorted_", nmax_pool_), + buffer_sorted_("buffer_sorted_", nmax_pool_), mpiStatus(true) { PARTHENON_REQUIRE_THROWS(typeid(Coordinates_t) == typeid(UniformCartesian), "SwarmDeviceContext only supports a uniform Cartesian mesh!"); @@ -209,6 +211,9 @@ void Swarm::SetPoolMax(const std::int64_t nmax_pool) { Kokkos::resize(cell_sorted_, nmax_pool); pmb->LogMemUsage(n_new * sizeof(SwarmKey)); + Kokkos::resize(buffer_sorted_, nmax_pool); + pmb->LogMemUsage(n_new * sizeof(SwarmKey)); + block_index_.Resize(nmax_pool); pmb->LogMemUsage(n_new * sizeof(int)); @@ -490,35 +495,35 @@ void Swarm::SortParticlesByCell() { break; } - if (cell_sorted(start_index).cell_idx_1d_ == cell_idx_1d) { + if (cell_sorted(start_index).sort_idx_ == cell_idx_1d) { if (start_index == 0) { break; - } else if (cell_sorted(start_index - 1).cell_idx_1d_ != cell_idx_1d) { + } else if (cell_sorted(start_index - 1).sort_idx_ != cell_idx_1d) { break; } else { start_index--; continue; } } - if (cell_sorted(start_index).cell_idx_1d_ >= cell_idx_1d) { + if (cell_sorted(start_index).sort_idx_ >= cell_idx_1d) { start_index--; if (start_index < 0) { start_index = -1; break; } - if (cell_sorted(start_index).cell_idx_1d_ < cell_idx_1d) { + if (cell_sorted(start_index).sort_idx_ < cell_idx_1d) { start_index = -1; break; } continue; } - if (cell_sorted(start_index).cell_idx_1d_ < cell_idx_1d) { + if (cell_sorted(start_index).sort_idx_ < cell_idx_1d) { start_index++; if (start_index > max_active_index) { start_index = -1; break; } - if (cell_sorted(start_index).cell_idx_1d_ > cell_idx_1d) { + if (cell_sorted(start_index).sort_idx_ > cell_idx_1d) { start_index = -1; break; } @@ -532,7 +537,7 @@ void Swarm::SortParticlesByCell() { int number = 0; int current_index = start_index; while (current_index <= max_active_index && - cell_sorted(current_index).cell_idx_1d_ == cell_idx_1d) { + cell_sorted(current_index).sort_idx_ == cell_idx_1d) { current_index++; number++; cell_sorted_number(k, j, i) = number; diff --git a/src/interface/swarm.hpp b/src/interface/swarm.hpp index 2058dc58f894..49cd906c2a15 100644 --- a/src/interface/swarm.hpp +++ b/src/interface/swarm.hpp @@ -289,7 +289,7 @@ class Swarm { constexpr static int unset_index_ = -1; ParArray1D num_particles_to_send_; - ParArray1D buffer_counters_; + ParArray1D buffer_start_; ParArray1D neighbor_received_particles_; int total_received_particles_; @@ -298,6 +298,9 @@ class Swarm { ParArray1D cell_sorted_; // 1D per-cell sorted array of key-value swarm memory indices + ParArray1D + buffer_sorted_; // 1D per-buffer sorted array of key-value swarm memory indices + ParArrayND cell_sorted_begin_; // Per-cell array of starting indices in cell_sorted_ diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 107f0e2f1ccf..3ae7781f1d51 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -156,67 +156,6 @@ void Swarm::SetupPersistentMPI() { } } -void Swarm::CountParticlesToSend_() { - auto mask_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), mask_); - auto swarm_d = GetDeviceContext(); - auto pmb = GetBlockPointer(); - const int nbmax = vbswarm->bd_var_.nbmax; - - // Fence to make sure particles aren't currently being transported locally - // TODO(BRR) do this operation on device. - pmb->exec_space.fence(); - const int particle_size = GetParticleDataSize(); - vbswarm->particle_size = particle_size; - - // TODO(BRR) This kernel launch should be folded into the subsequent logic once we - // convert that to kernel-based reductions - auto &x = Get(swarm_position::x::name()).Get(); - auto &y = Get(swarm_position::y::name()).Get(); - auto &z = Get(swarm_position::z::name()).Get(); - const int max_active_index = GetMaxActiveIndex(); - pmb->par_for( - PARTHENON_AUTO_LABEL, 0, max_active_index, KOKKOS_LAMBDA(const int n) { - if (swarm_d.IsActive(n)) { - bool on_current_mesh_block = true; - swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block); - } - }); - - // Facilitate lambda captures - auto &block_index = block_index_; - auto &num_particles_to_send = num_particles_to_send_; - - // Zero out number of particles to send before accumulating - pmb->par_for( - PARTHENON_AUTO_LABEL, 0, NMAX_NEIGHBORS - 1, - KOKKOS_LAMBDA(const int n) { num_particles_to_send[n] = 0; }); - - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, max_active_index, KOKKOS_LAMBDA(const int n) { - if (swarm_d.IsActive(n)) { - bool on_current_mesh_block = true; - swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block); - - if (block_index(n) >= 0) { - Kokkos::atomic_add(&num_particles_to_send(block_index(n)), 1); - } - } - }); - - auto num_particles_to_send_h = num_particles_to_send_.GetHostMirrorAndCopy(); - - // Resize send buffers if too small - for (int n = 0; n < pmb->neighbors.size(); n++) { - const int bufid = pmb->neighbors[n].bufid; - auto sendbuf = vbswarm->bd_var_.send[bufid]; - if (sendbuf.extent(0) < num_particles_to_send_h(n) * particle_size) { - sendbuf = BufArray1D("Buffer", num_particles_to_send_h(n) * particle_size); - vbswarm->bd_var_.send[bufid] = sendbuf; - } - vbswarm->send_size[bufid] = num_particles_to_send_h(n) * particle_size; - } -} - void Swarm::LoadBuffers_() { auto swarm_d = GetDeviceContext(); auto pmb = GetBlockPointer(); @@ -240,25 +179,22 @@ void Swarm::LoadBuffers_() { auto &y = Get(swarm_position::y::name()).Get(); auto &z = Get(swarm_position::z::name()).Get(); - if(max_active_index_ >= 0) { - // Make an n particle sized array of index, buffer pairs (with SwarmKey struct) - ParArray1D buffer_sorted("buffer_sorted", max_active_index_+1); - ParArray1D buffer_start("buffer_start", nneighbor); + if (max_active_index_ >= 0) { + auto &buffer_sorted = buffer_sorted_; + auto &buffer_start = buffer_start_; pmb->par_for( PARTHENON_AUTO_LABEL, 0, max_active_index_, KOKKOS_LAMBDA(const int n) { - if(swarm_d.IsActive(n)) { + if (swarm_d.IsActive(n)) { bool on_current_mesh_block = true; const int m = - swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block); + swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block); buffer_sorted(n) = SwarmKey(m, n); - } - else { - buffer_sorted(n) = SwarmKey(-1, n); + } else { + buffer_sorted(n) = SwarmKey(this_block_, n); } }); - // sort by buffer index sort(buffer_sorted, SwarmKeyComparator(), 0, max_active_index_); @@ -268,28 +204,27 @@ void Swarm::LoadBuffers_() { // Zero out number of particles to send before accumulating pmb->par_for( - PARTHENON_AUTO_LABEL, 0, NMAX_NEIGHBORS - 1, - KOKKOS_LAMBDA(const int n) { num_particles_to_send[n] = 0; }); + PARTHENON_AUTO_LABEL, 0, NMAX_NEIGHBORS - 1, KOKKOS_LAMBDA(const int n) { + num_particles_to_send[n] = 0; + buffer_start[n] = 0; + }); pmb->par_for( PARTHENON_AUTO_LABEL, 0, max_active_index_, KOKKOS_LAMBDA(const int n) { - auto m = buffer_sorted(n).cell_idx_1d_; + auto m = buffer_sorted(n).sort_idx_; // start checks (used for index of particle in buffer) - if (m >= 0 && n ==0 ) { + if (m >= 0 && n == 0) { buffer_start(m) = 0; - } - else if (m >= 0 && m != buffer_sorted(n-1).cell_idx_1d_) { + } else if (m >= 0 && m != buffer_sorted(n - 1).sort_idx_) { buffer_start(m) = n; } - // end checks (used to to size particle buffers) - if (m >= 0 && n == max_active_index ) { - num_particles_to_send(m) = n +1; + if (m >= 0 && n == max_active_index) { + num_particles_to_send(m) = n + 1; + } else if (m >= 0 && m != buffer_sorted(n + 1).sort_idx_) { + num_particles_to_send(m) = n + 1; } - else if (m >= 0 && m != buffer_sorted(n+1).cell_idx_1d_ ) { - num_particles_to_send(m) = n +1; - } - }); + }); // copy values back to host for buffer sizing auto num_particles_to_send_h = num_particles_to_send_.GetHostMirrorAndCopy(); @@ -315,7 +250,7 @@ void Swarm::LoadBuffers_() { PARTHENON_AUTO_LABEL, 0, max_active_index_, KOKKOS_LAMBDA(const int n) { auto p_index = buffer_sorted(n).swarm_idx_; if (swarm_d.IsActive(p_index)) { - const int m = buffer_sorted(n).cell_idx_1d_; + const int m = buffer_sorted(n).sort_idx_; const int bufid = neighbor_buffer_index(m); if (m >= 0) { const int bid = n - buffer_start[m]; @@ -343,10 +278,8 @@ void Swarm::Send(BoundaryCommSubset phase) { const int nneighbor = pmb->neighbors.size(); auto swarm_d = GetDeviceContext(); - // Query particles for those to be sent - //CountParticlesToSend_(); - - // Prepare buffers for send operations + // Potentially resize buffer, get consistent index from particle array, get ready to + // send LoadBuffers_(); // Send buffer data @@ -507,4 +440,3 @@ void Swarm::AllocateComms(std::weak_ptr wpmb) { } } // namespace parthenon - diff --git a/src/interface/swarm_device_context.hpp b/src/interface/swarm_device_context.hpp index 936d2d56ad35..c19d7679787d 100644 --- a/src/interface/swarm_device_context.hpp +++ b/src/interface/swarm_device_context.hpp @@ -25,16 +25,16 @@ struct SwarmKey { SwarmKey() {} KOKKOS_INLINE_FUNCTION SwarmKey(const int cell_idx_1d, const int swarm_idx_1d) - : cell_idx_1d_(cell_idx_1d), swarm_idx_(swarm_idx_1d) {} + : sort_idx_(cell_idx_1d), swarm_idx_(swarm_idx_1d) {} - int cell_idx_1d_; + int sort_idx_; int swarm_idx_; }; struct SwarmKeyComparator { KOKKOS_INLINE_FUNCTION bool operator()(const SwarmKey &s1, const SwarmKey &s2) { - return s1.cell_idx_1d_ < s2.cell_idx_1d_; + return s1.sort_idx_ < s2.sort_idx_; } };