Skip to content

Commit

Permalink
neighborhood ats5
Browse files Browse the repository at this point in the history
  • Loading branch information
MoraruMaxim committed Sep 30, 2024
1 parent 9d5a909 commit d68b19e
Show file tree
Hide file tree
Showing 12 changed files with 450 additions and 6 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ endif()
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
find_package(Filesystem REQUIRED COMPONENTS Experimental Final)

# Use Neighborhood collectives
set(WITH_NEIGHBORHOOD_COLLECTIVES OFF CACHE BOOL "Build with Neighborhood collectives")

set(ENABLE_MPI OFF)
set(NUM_MPI_PROC_TESTING "4" CACHE STRING "Number of mpi processors to use when running tests with MPI")
if (NOT PARTHENON_DISABLE_MPI)
Expand Down
6 changes: 5 additions & 1 deletion benchmarks/burgers/burgers_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,11 @@ TaskCollection BurgersDriver::MakeTaskCollection(BlockList_t &blocks, const int

const auto any = parthenon::BoundaryType::any;

auto start_bnd = tl.AddTask(none, parthenon::StartReceiveBoundBufs<any>, mc1);
#ifdef USE_NEIGHBORHOOD_COLLECTIVES
auto start_bnd = tl.AddTask(none, parthenon::StartReceiveBoundBufs<parthenon::BoundaryType::local>, mc1); //any
#else
auto start_bnd = tl.AddTask(none, parthenon::StartReceiveBoundBufs<parthenon::BoundaryType::any>, mc1); //any
#endif
auto start_flx_recv = tl.AddTask(none, parthenon::StartReceiveFluxCorrections, mc0);

// this is the main task where most of the real work is done
Expand Down
7 changes: 7 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ else ()
set(MPI_OPTION NOT_MPI_PARALLEL)
endif()

# Configure config.hpp
if (WITH_NEIGHBORHOOD_COLLECTIVES)
set(ENABLE_NEIGHBORHOOD_COLLECTIVES USE_NEIGHBORHOOD_COLLECTIVES)
else ()
set(ENABLE_NEIGHBORHOOD_COLLECTIVES DO_NOT_USE_NEIGHBORHOOD_COLLECTIVES)
endif()

# The following lines determine the default loop pattern by setting the
# default defines to the appropriate "tags" in the config.hpp file.
# See `kokkos_abstraction.hpp` for available tags and what they translate to.
Expand Down
38 changes: 38 additions & 0 deletions src/bvals/comms/bnd_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,44 @@ BndInfo BndInfo::GetSetBndInfo(MeshBlock *pmb, const NeighborBlock &nb,
return out;
}

#ifdef USE_NEIGHBORHOOD_COLLECTIVES
BndInfo BndInfo::NeighCommGetSetBndInfo(MeshBlock *pmb, const NeighborBlock &nb,
std::shared_ptr<Variable<Real>> v,
CommBuffer<buf_pool_t<Real>::owner_t> *buf) {
BndInfo out;
buf->Allocate();
out.buf = buf->buffer();
auto buf_state = buf->GetState();
out.buf_allocated = true;

out.allocated = v->IsAllocated();
out.alloc_status = v->GetAllocationStatus();

int Nv = v->GetDim(4);
int Nu = v->GetDim(5);
int Nt = v->GetDim(6);

int mylevel = pmb->loc.level();

auto elements = v->GetTopologicalElements();
out.ntopological_elements = elements.size();
auto idx_range_type = IndexRangeType::BoundaryExteriorRecv;
if (std::abs(nb.ni.ox1) + std::abs(nb.ni.ox2) + std::abs(nb.ni.ox3) == 0)
idx_range_type = IndexRangeType::InteriorRecv;
for (auto el : elements) {
int idx = static_cast<int>(el) % 3;
out.idxer[idx] = CalcIndices(nb, pmb, el, idx_range_type, false, {Nt, Nu, Nv});
}
if (nb.snb.level < mylevel) {
out.var = v->coarse_s.Get();
} else {
out.var = v->data.Get();
}

return out;
}
#endif

ProResInfo ProResInfo::GetInteriorRestrict(MeshBlock *pmb, const NeighborBlock & /*nb*/,
std::shared_ptr<Variable<Real>> v) {
ProResInfo out;
Expand Down
5 changes: 5 additions & 0 deletions src/bvals/comms/bnd_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ struct BndInfo {
static BndInfo GetSetBndInfo(MeshBlock *pmb, const NeighborBlock &nb,
std::shared_ptr<Variable<Real>> v,
CommBuffer<buf_pool_t<Real>::owner_t> *buf);
#ifdef USE_NEIGHBORHOOD_COLLECTIVES
static BndInfo NeighCommGetSetBndInfo(MeshBlock *pmb, const NeighborBlock &nb,
std::shared_ptr<Variable<Real>> v,
CommBuffer<buf_pool_t<Real>::owner_t> *buf);
#endif
static BndInfo GetSendCCFluxCor(MeshBlock *pmb, const NeighborBlock &nb,
std::shared_ptr<Variable<Real>> v,
CommBuffer<buf_pool_t<Real>::owner_t> *buf);
Expand Down
55 changes: 53 additions & 2 deletions src/bvals/comms/boundary_communication.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,15 +141,28 @@ TaskStatus SendBoundBufs(std::shared_ptr<MeshData<Real>> &md) {
if (bound_type == BoundaryType::any || bound_type == BoundaryType::nonlocal)
Kokkos::fence();
#endif


#ifdef USE_NEIGHBORHOOD_COLLECTIVES
if(bound_type == BoundaryType::nonlocal)
pmesh->neigh_token.start_data_exchange_neigh_alltoallv();
else{
for (int ibuf = 0; ibuf < cache.buf_vec.size(); ++ibuf) {
auto &buf = *cache.buf_vec[ibuf];
if (sending_nonzero_flags_h(ibuf) || !Globals::sparse_config.enabled)
buf.Send();
else
buf.SendNull();
}
}
#else
for (int ibuf = 0; ibuf < cache.buf_vec.size(); ++ibuf) {
auto &buf = *cache.buf_vec[ibuf];
if (sending_nonzero_flags_h(ibuf) || !Globals::sparse_config.enabled)
buf.Send();
else
buf.SendNull();
}

#endif // USE_NEIGHBORHOOD_COLLECTIVES
return TaskStatus::complete;
}

Expand Down Expand Up @@ -199,9 +212,28 @@ TaskStatus ReceiveBoundBufs(std::shared_ptr<MeshData<Real>> &md) {
false);

bool all_received = true;
#ifdef USE_NEIGHBORHOOD_COLLECTIVES
if(bound_type == BoundaryType::nonlocal){
all_received = pmesh->neigh_token.test_data_exchange_neigh_alltoallv();

if(all_received){
std::for_each(
std::begin(cache.buf_vec), std::end(cache.buf_vec),
[&all_received](auto pbuf) {
*(pbuf->state_) = BufferState::received;
});
}
}
else{
std::for_each(
std::begin(cache.buf_vec), std::end(cache.buf_vec),
[&all_received](auto pbuf) { all_received = pbuf->TryReceive() && all_received; });
}
#else
std::for_each(
std::begin(cache.buf_vec), std::end(cache.buf_vec),
[&all_received](auto pbuf) { all_received = pbuf->TryReceive() && all_received; });
#endif // USE_NEIGHBORHOOD_COLLECTIVES

int ibound = 0;
if (Globals::sparse_config.enabled) {
Expand Down Expand Up @@ -253,8 +285,18 @@ TaskStatus SetBounds(std::shared_ptr<MeshData<Real>> &md) {
RebuildBufferCache<bound_type, false>(md, nbound, BndInfo::GetSetBndInfo,
ProResInfo::GetNull);
} else {

#ifdef USE_NEIGHBORHOOD_COLLECTIVES
if(bound_type == BoundaryType::nonlocal)
RebuildBufferCache<bound_type, false>(md, nbound, BndInfo::NeighCommGetSetBndInfo,
ProResInfo::GetSet);
else
RebuildBufferCache<bound_type, false>(md, nbound, BndInfo::GetSetBndInfo,
ProResInfo::GetSet);
#else
RebuildBufferCache<bound_type, false>(md, nbound, BndInfo::GetSetBndInfo,
ProResInfo::GetSet);
#endif
}
}
// const Real threshold = Globals::sparse_config.allocation_threshold;
Expand Down Expand Up @@ -346,8 +388,17 @@ TaskStatus ProlongateBounds(std::shared_ptr<MeshData<Real>> &md) {
RebuildBufferCache<bound_type, false>(md, nbound, BndInfo::GetSetBndInfo,
ProResInfo::GetNull);
} else {
#ifdef USE_NEIGHBORHOOD_COLLECTIVES
if(bound_type == BoundaryType::nonlocal)
RebuildBufferCache<bound_type, false>(md, nbound, BndInfo::NeighCommGetSetBndInfo,
ProResInfo::GetSet);
else
RebuildBufferCache<bound_type, false>(md, nbound, BndInfo::GetSetBndInfo,
ProResInfo::GetSet);
#else
RebuildBufferCache<bound_type, false>(md, nbound, BndInfo::GetSetBndInfo,
ProResInfo::GetSet);
#endif
}
}

Expand Down
112 changes: 111 additions & 1 deletion src/bvals/comms/build_boundary_buffers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,41 @@ template <BoundaryType BTYPE>
void BuildBoundaryBufferSubset(std::shared_ptr<MeshData<Real>> &md,
Mesh::comm_buf_map_t &buf_map) {
Mesh *pmesh = md->GetMeshPointer();
#ifdef USE_NEIGHBORHOOD_COLLECTIVES
if(BTYPE == BoundaryType::nonlocal){
ForEachBoundary<BTYPE>(md, [&](auto pmb, sp_mbd_t /*rc*/, nb_t &nb, const sp_cv_t v) {
int receiver_rank = nb.snb.rank;
pmesh->neigh_token.add_buff_info(nb.snb.rank,GetBufferSize(pmb, nb, v),pmesh->tag_map.GetTag(pmb, nb));
int tagg = pmesh->tag_map.GetTag(pmb, nb);
auto comm_label = v->label();
mpi_comm_t comm = pmesh->GetMPIComm(comm_label);
});
pmesh->neigh_token.calculate_off_prefix_sum();
pmesh->neigh_token.alloc_comm_buffers();
}
#endif //USE_NEIGHBORHOOD_COLLECTIVES

ForEachBoundary<BTYPE>(md, [&](auto pmb, sp_mbd_t /*rc*/, nb_t &nb, const sp_cv_t v) {
// Calculate the required size of the buffer for this boundary
int buf_size = GetBufferSize(pmb, nb, v);

// Add a buffer pool if one does not exist for this size
#ifdef USE_NEIGHBORHOOD_COLLECTIVES
if (BTYPE != BoundaryType::nonlocal && pmesh->pool_map.count(buf_size) == 0) {
pmesh->pool_map.emplace(std::make_pair(
buf_size, buf_pool_t<Real>([buf_size](buf_pool_t<Real> *pool) {
using buf_t = buf_pool_t<Real>::base_t;
// TODO(LFR): Make nbuf a user settable parameter
const int nbuf = 200;
buf_t chunk("pool buffer", buf_size * nbuf);
for (int i = 1; i < nbuf; ++i) {
pool->AddFreeObjectToPool(
buf_t(chunk, std::make_pair(i * buf_size, (i + 1) * buf_size)));
}
return buf_t(chunk, std::make_pair(0, buf_size));
})));
}
#else
if (pmesh->pool_map.count(buf_size) == 0) {
pmesh->pool_map.emplace(std::make_pair(
buf_size, buf_pool_t<Real>([buf_size](buf_pool_t<Real> *pool) {
Expand All @@ -63,6 +93,7 @@ void BuildBoundaryBufferSubset(std::shared_ptr<MeshData<Real>> &md,
return buf_t(chunk, std::make_pair(0, buf_size));
})));
}
#endif

const int receiver_rank = nb.snb.rank;
const int sender_rank = Globals::my_rank;
Expand All @@ -87,6 +118,42 @@ void BuildBoundaryBufferSubset(std::shared_ptr<MeshData<Real>> &md,
return buf_pool_t<Real>::owner_t(pmesh->pool_map.at(buf_size).Get());
};

#ifdef USE_NEIGHBORHOOD_COLLECTIVES
int neigh_offset = -1;
int end_neigh_offset = -1;
if(BTYPE == BoundaryType::nonlocal){
auto offset_info = pmesh->neigh_token.per_tag_offsets[receiver_rank][tag];
neigh_offset = offset_info.first;
end_neigh_offset = offset_info.second;
}
#endif

#ifdef USE_NEIGHBORHOOD_COLLECTIVES
if(BTYPE == BoundaryType::nonlocal){
auto neigh_get_resource_method = [pmesh, neigh_offset, end_neigh_offset]() {
auto send_subview = subview(pmesh->neigh_token.send_comm_buffer,std::pair<size_t, size_t>(neigh_offset,end_neigh_offset));
return buf_pool_t<Real>::owner_t( buf_pool_t<Real>::weak_t(std::move(send_subview)));
};

if constexpr (IsSender(BTYPE)) {
auto s_key = SendKey(pmb, nb, v);
if (buf_map.count(s_key) == 0)
buf_map[s_key] = CommBuffer<buf_pool_t<Real>::owner_t>(
tag, sender_rank, receiver_rank, comm, neigh_get_resource_method,
use_sparse_buffers);
}
}
else{
if constexpr (IsSender(BTYPE)) {
auto s_key = SendKey(pmb, nb, v);
if (buf_map.count(s_key) == 0)
buf_map[s_key] = CommBuffer<buf_pool_t<Real>::owner_t>(
tag, sender_rank, receiver_rank, comm, get_resource_method,
use_sparse_buffers);
}
}

#else
// Build send buffer (unless this is a receiving flux boundary)
if constexpr (IsSender(BTYPE)) {
auto s_key = SendKey(pmb, nb, v);
Expand All @@ -95,8 +162,39 @@ void BuildBoundaryBufferSubset(std::shared_ptr<MeshData<Real>> &md,
tag, sender_rank, receiver_rank, comm, get_resource_method,
use_sparse_buffers);
}
#endif // USE_NEIGHBORHOOD_COLLECTIVES (sender)

// Also build the non-local receive buffers here
#ifdef USE_NEIGHBORHOOD_COLLECTIVES
if(BTYPE == BoundaryType::nonlocal){
auto neigh_recv_get_resource_method = [pmesh, neigh_offset, end_neigh_offset]() {
auto recv_subview = subview(pmesh->neigh_token.recv_comm_buffer,std::pair<size_t, size_t>(neigh_offset,end_neigh_offset));
return buf_pool_t<Real>::owner_t( buf_pool_t<Real>::weak_t(recv_subview));
};

if constexpr (IsReceiver(BTYPE)) {
if (sender_rank != receiver_rank) {
auto r_key = ReceiveKey(pmb, nb, v);
if (buf_map.count(r_key) == 0)
buf_map[r_key] = CommBuffer<buf_pool_t<Real>::owner_t>(
tag, receiver_rank, sender_rank, comm, neigh_recv_get_resource_method,
use_sparse_buffers);
}
}

}
else{
if constexpr (IsReceiver(BTYPE)) {
if (sender_rank != receiver_rank) {
auto r_key = ReceiveKey(pmb, nb, v);
if (buf_map.count(r_key) == 0)
buf_map[r_key] = CommBuffer<buf_pool_t<Real>::owner_t>(
tag, receiver_rank, sender_rank, comm, get_resource_method,
use_sparse_buffers);
}
}
}
#else
if constexpr (IsReceiver(BTYPE)) {
if (sender_rank != receiver_rank) {
auto r_key = ReceiveKey(pmb, nb, v);
Expand All @@ -106,6 +204,7 @@ void BuildBoundaryBufferSubset(std::shared_ptr<MeshData<Real>> &md,
use_sparse_buffers);
}
}
#endif // USE_NEIGHBORHOOD_COLLECTIVES (receiver)
});
}
} // namespace
Expand All @@ -121,7 +220,17 @@ TaskStatus BuildBoundaryBuffers(std::shared_ptr<MeshData<Real>> &md) {
// after all MeshData call BuildBoundaryBuffers
all_caches.clear();

BuildBoundaryBufferSubset<BoundaryType::any>(md, pmesh->boundary_comm_map);
#ifdef USE_NEIGHBORHOOD_COLLECTIVES
pmesh->neigh_token.start_building_buffers();
#endif
// Moraru : separate local and nonlocal
BuildBoundaryBufferSubset<BoundaryType::nonlocal>(md, pmesh->boundary_comm_map);
#ifdef USE_NEIGHBORHOOD_COLLECTIVES
pmesh->neigh_token.end_building_buffers();
#endif

BuildBoundaryBufferSubset<BoundaryType::local>(md, pmesh->boundary_comm_map);

BuildBoundaryBufferSubset<BoundaryType::flxcor_send>(md,
pmesh->boundary_comm_flxcor_map);
BuildBoundaryBufferSubset<BoundaryType::flxcor_recv>(md,
Expand All @@ -135,6 +244,7 @@ TaskStatus BuildGMGBoundaryBuffers(std::shared_ptr<MeshData<Real>> &md) {
Mesh *pmesh = md->GetMeshPointer();
auto &all_caches = md->GetBvarsCache();

std::cout<<"-- CALL TO BuildGMGBoundaryBuffers"<<std::endl;
// Clear the fast access vectors for this block since they are no longer valid
// after all MeshData call BuildBoundaryBuffers
all_caches.clear();
Expand Down
Loading

0 comments on commit d68b19e

Please sign in to comment.