diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 4b95511d82f5..9e1cbcfad76f 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -12,9 +12,12 @@ on: jobs: docs: name: build and deploy docs - runs-on: ubuntu-latest + # not using latest due to issues with pip user packages, see + # https://github.com/actions/runner-images/issues/10781 and + # https://github.com/actions/runner-images/issues/10636 + runs-on: ubuntu-22.04 - steps: + steps: - name: Checkout code uses: actions/checkout@v2 with: @@ -23,9 +26,9 @@ jobs: run: export DEBIAN_FRONTEND=noninteractive - name: install dependencies run: | - pip install --break-system-packages sphinx - pip install --break-system-packages sphinx-rtd-theme - pip install --break-system-packages sphinx-multiversion + pip install sphinx + pip install sphinx-rtd-theme + pip install sphinx-multiversion - name: build docs run: | echo "Repo = ${GITHUB_REPOSITORY}" diff --git a/CHANGELOG.md b/CHANGELOG.md index 56dcce3b2177..1d361fc0567f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Current develop ### Added (new features/APIs/variables/...) +- [[PR 1103]](https://github.com/parthenon-hpc-lab/parthenon/pull/1103) Add sparsity to vector wave equation test - [[PR 1185]](https://github.com/parthenon-hpc-lab/parthenon/pull/1185/files) Bugfix to particle defragmentation - [[PR 1184]](https://github.com/parthenon-hpc-lab/parthenon/pull/1184) Fix swarm block neighbor indexing in 1D, 2D - [[PR 1183]](https://github.com/parthenon-hpc-lab/parthenon/pull/1183) Fix particle leapfrog example initialization data @@ -11,6 +12,8 @@ - [[PR 1161]](https://github.com/parthenon-hpc-lab/parthenon/pull/1161) Make flux field Metadata accessible, add Metadata::CellMemAligned flag, small perfomance upgrades ### Changed (changing behavior/API/variables/...) +- [[PR 1206]](https://github.com/parthenon-hpc-lab/parthenon/pull/1206) Leapfrog fix +- [[PR1203]](https://github.com/parthenon-hpc-lab/parthenon/pull/1203) Pin Ubuntu CI image - [[PR1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag - [[PR 1187]](https://github.com/parthenon-hpc-lab/parthenon/pull/1187) Make DataCollection::Add safer and generalize MeshBlockData::Initialize - [[Issue 1165]](https://github.com/parthenon-hpc-lab/parthenon/issues/1165) Bump Kokkos submodule to 4.4.1 @@ -19,6 +22,7 @@ ### Fixed (not changing behavior/API/variables/...) - [[PR 1188]](https://github.com/parthenon-hpc-lab/parthenon/pull/1188) Fix hdf5 output issue for metadata none variables, update test. +- [[PR 1170]](https://github.com/parthenon-hpc-lab/parthenon/pull/1170) Fixed incorrect initialization of array by a const not constexpr - [[PR 1189]](https://github.com/parthenon-hpc-lab/parthenon/pull/1189) Address CUDA MPI/ICP issue with Kokkos <=4.4.1 - [[PR 1178]](https://github.com/parthenon-hpc-lab/parthenon/pull/1178) Fix issue with mesh pointer when using relative residual tolerance in BiCGSTAB solver. - [[PR 1173]](https://github.com/parthenon-hpc-lab/parthenon/pull/1173) Make debugging easier by making parthenon throw an error if ParameterInput is different on multiple MPI ranks. diff --git a/example/fine_advection/advection_driver.cpp b/example/fine_advection/advection_driver.cpp index c02b4cd555ff..ca55c04f8278 100644 --- a/example/fine_advection/advection_driver.cpp +++ b/example/fine_advection/advection_driver.cpp @@ -21,6 +21,7 @@ #include "amr_criteria/refinement_package.hpp" #include "bvals/comms/bvals_in_one.hpp" #include "interface/metadata.hpp" +#include "interface/state_descriptor.hpp" #include "interface/update.hpp" #include "mesh/meshblock_pack.hpp" #include "parthenon/driver.hpp" @@ -72,6 +73,12 @@ TaskCollection AdvectionDriver::MakeTaskCollection(BlockList_t &blocks, const in TaskCollection tc; TaskID none(0); + std::shared_ptr pkg = + pmesh->packages.Get("advection_package"); + const auto do_regular_advection = pkg->Param("do_regular_advection"); + const auto do_fine_advection = pkg->Param("do_fine_advection"); + const auto do_CT_advection = pkg->Param("do_CT_advection"); + // Build MeshBlockData containers that will be included in MeshData containers. It is // gross that this has to be done by hand. const auto &stage_name = integrator->stage_name; @@ -118,35 +125,49 @@ TaskCollection AdvectionDriver::MakeTaskCollection(BlockList_t &blocks, const in auto flx = none; auto flx_fine = none; for (auto face : faces) { - flx = flx | tl.AddTask(none, advection_package::CalculateFluxes, desc, - face, parthenon::CellLevel::same, mc0.get()); - flx_fine = flx_fine | - tl.AddTask(none, advection_package::CalculateFluxes, - desc_fine, face, parthenon::CellLevel::fine, mc0.get()); + if (do_regular_advection) { + flx = flx | tl.AddTask(none, advection_package::CalculateFluxes, + desc, face, parthenon::CellLevel::same, mc0.get()); + } + if (do_fine_advection) { + flx_fine = flx_fine | + tl.AddTask(none, advection_package::CalculateFluxes, + desc_fine, face, parthenon::CellLevel::fine, mc0.get()); + } } auto vf_dep = none; - for (auto edge : std::vector{TE::E1, TE::E2, TE::E3}) { - vf_dep = tl.AddTask(vf_dep, advection_package::CalculateVectorFluxes, edge, - parthenon::CellLevel::same, 1.0, mc0.get()); - vf_dep = tl.AddTask(vf_dep, advection_package::CalculateVectorFluxes, edge, - parthenon::CellLevel::same, -1.0, mc0.get()); + if (do_CT_advection) { + for (auto edge : std::vector{TE::E1, TE::E2, TE::E3}) { + vf_dep = tl.AddTask(vf_dep, advection_package::CalculateVectorFluxes, edge, + parthenon::CellLevel::same, 1.0, mc0.get()); + vf_dep = tl.AddTask(vf_dep, advection_package::CalculateVectorFluxes, edge, + parthenon::CellLevel::same, -1.0, mc0.get()); + } } auto set_flx = parthenon::AddFluxCorrectionTasks( start_flxcor | flx | flx_fine | vf_dep, tl, mc0, pmesh->multilevel); - auto update = - AddUpdateTasks(set_flx, tl, parthenon::CellLevel::same, TT::Cell, beta, dt, desc, - mbase.get(), mc0.get(), mdudt.get(), mc1.get()); + auto update = set_flx; + if (do_regular_advection) { + update = AddUpdateTasks(set_flx, tl, parthenon::CellLevel::same, TT::Cell, beta, dt, + desc, mbase.get(), mc0.get(), mdudt.get(), mc1.get()); + } - auto update_fine = - AddUpdateTasks(set_flx, tl, parthenon::CellLevel::fine, TT::Cell, beta, dt, - desc_fine, mbase.get(), mc0.get(), mdudt.get(), mc1.get()); + auto update_fine = set_flx; + if (do_fine_advection) { + update_fine = + AddUpdateTasks(set_flx, tl, parthenon::CellLevel::fine, TT::Cell, beta, dt, + desc_fine, mbase.get(), mc0.get(), mdudt.get(), mc1.get()); + } - auto update_vec = - AddUpdateTasks(set_flx, tl, parthenon::CellLevel::same, TT::Face, beta, dt, - desc_vec, mbase.get(), mc0.get(), mdudt.get(), mc1.get()); + auto update_vec = set_flx; + if (do_CT_advection) { + update_vec = + AddUpdateTasks(set_flx, tl, parthenon::CellLevel::same, TT::Face, beta, dt, + desc_vec, mbase.get(), mc0.get(), mdudt.get(), mc1.get()); + } auto boundaries = parthenon::AddBoundaryExchangeTasks( update | update_vec | update_fine | start_send, tl, mc1, pmesh->multilevel); @@ -155,7 +176,8 @@ TaskCollection AdvectionDriver::MakeTaskCollection(BlockList_t &blocks, const in tl.AddTask(boundaries, parthenon::Update::FillDerived>, mc1.get()); if (stage == integrator->nstages) { - auto new_dt = tl.AddTask(fill_derived, EstimateTimestep>, mc1.get()); + auto dealloc = tl.AddTask(fill_derived, SparseDealloc, mc1.get()); + auto new_dt = tl.AddTask(dealloc, EstimateTimestep>, mc1.get()); if (pmesh->adaptive) { auto tag_refine = tl.AddTask(new_dt, parthenon::Refinement::Tag>, mc1.get()); diff --git a/example/fine_advection/advection_package.cpp b/example/fine_advection/advection_package.cpp index 8ca6c6458b20..428d91b9245f 100644 --- a/example/fine_advection/advection_package.cpp +++ b/example/fine_advection/advection_package.cpp @@ -65,34 +65,58 @@ std::shared_ptr Initialize(ParameterInput *pin) { } pkg->AddParam<>("profile", profile_str); - pkg->AddField( - Metadata({Metadata::Cell, Metadata::Fine, Metadata::Independent, - Metadata::WithFluxes, Metadata::FillGhost})); - - pkg->AddField(Metadata({Metadata::Cell, Metadata::Independent, - Metadata::WithFluxes, Metadata::FillGhost})); - pkg->AddField( - Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy})); - - Metadata m( - {Metadata::Face, Metadata::Independent, Metadata::WithFluxes, Metadata::FillGhost}); - m.RegisterRefinementOps(); - pkg->AddField(m); - pkg->AddField(m); - pkg->AddField(Metadata( - {Metadata::Cell, Metadata::Derived, Metadata::OneCopy}, std::vector{4})); - pkg->AddField(Metadata( - {Metadata::Face, Metadata::Derived, Metadata::OneCopy}, std::vector{2})); - pkg->AddField(Metadata( - {Metadata::Cell, Metadata::Derived, Metadata::OneCopy}, std::vector{3})); - pkg->AddField(Metadata( - {Metadata::Cell, Metadata::Derived, Metadata::OneCopy}, std::vector{3})); - pkg->AddField( - Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy})); - pkg->AddField( - Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy})); + bool do_regular_advection = + pin->GetOrAddBoolean("Advection", "do_regular_advection", true); + pkg->AddParam<>("do_regular_advection", do_regular_advection); + if (do_regular_advection) { + int shape_size = pin->GetOrAddInteger("Advection", "shape_size", 1); + int sparse_size = pin->GetOrAddInteger("Advection", "sparse_size", 1); + pkg->AddParam<>("sparse_size", sparse_size); + Real alloc_threshold = pin->GetOrAddReal("Advection", "alloc_threshold", 1.e-6); + Real dealloc_threshold = pin->GetOrAddReal("Advection", "dealloc_threshold", 5.e-7); + Metadata m({Metadata::Cell, Metadata::Independent, Metadata::WithFluxes, + Metadata::FillGhost, Metadata::Sparse}, + std::vector{shape_size}); + m.SetSparseThresholds(alloc_threshold, dealloc_threshold, 0.0); + std::vector sparse_idxs(sparse_size); + std::iota(sparse_idxs.begin(), sparse_idxs.end(), 0); + pkg->AddSparsePool(m, sparse_idxs); + } + + bool do_fine_advection = pin->GetOrAddBoolean("Advection", "do_fine_advection", true); + pkg->AddParam<>("do_fine_advection", do_fine_advection); + if (do_fine_advection) { + pkg->AddField( + Metadata({Metadata::Cell, Metadata::Fine, Metadata::Independent, + Metadata::WithFluxes, Metadata::FillGhost})); + + pkg->AddField( + Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy})); + } + + bool do_CT_advection = pin->GetOrAddBoolean("Advection", "do_CT_advection", true); + pkg->AddParam<>("do_CT_advection", do_CT_advection); + if (do_CT_advection) { + auto m = Metadata({Metadata::Face, Metadata::Independent, Metadata::WithFluxes, + Metadata::FillGhost}); + m.RegisterRefinementOps(); + pkg->AddField(m); + pkg->AddField(m); + pkg->AddField(Metadata( + {Metadata::Cell, Metadata::Derived, Metadata::OneCopy}, std::vector{4})); + pkg->AddField(Metadata( + {Metadata::Face, Metadata::Derived, Metadata::OneCopy}, std::vector{2})); + pkg->AddField(Metadata( + {Metadata::Cell, Metadata::Derived, Metadata::OneCopy}, std::vector{3})); + pkg->AddField(Metadata( + {Metadata::Cell, Metadata::Derived, Metadata::OneCopy}, std::vector{3})); + pkg->AddField( + Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy})); + pkg->AddField( + Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy})); + } pkg->CheckRefinementBlock = CheckRefinement; pkg->EstimateTimestepMesh = EstimateTimestep; @@ -101,35 +125,42 @@ std::shared_ptr Initialize(ParameterInput *pin) { } AmrTag CheckRefinement(MeshBlockData *rc) { - // refine on advected, for example. could also be a derived quantity - static auto desc = parthenon::MakePackDescriptor(rc); - auto pack = desc.GetPack(rc); + std::shared_ptr pkg = + rc->GetMeshPointer()->packages.Get("advection_package"); + auto do_regular_advection = pkg->Param("do_regular_advection"); + if (do_regular_advection) { + // refine on advected, for example. could also be a derived quantity + static auto desc = parthenon::MakePackDescriptor(rc); + auto pack = desc.GetPack(rc); - auto pmb = rc->GetBlockPointer(); - IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::entire); - IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::entire); - IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::entire); + auto pmb = rc->GetBlockPointer(); + IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::entire); + IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::entire); + IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::entire); - typename Kokkos::MinMax::value_type minmax; - parthenon::par_reduce( - parthenon::loop_pattern_mdrange_tag, PARTHENON_AUTO_LABEL, DevExecSpace(), - pack.GetLowerBoundHost(0), pack.GetUpperBoundHost(0), kb.s, kb.e, jb.s, jb.e, ib.s, - ib.e, - KOKKOS_LAMBDA(const int n, const int k, const int j, const int i, - typename Kokkos::MinMax::value_type &lminmax) { - lminmax.min_val = - (pack(n, k, j, i) < lminmax.min_val ? pack(n, k, j, i) : lminmax.min_val); - lminmax.max_val = - (pack(n, k, j, i) > lminmax.max_val ? pack(n, k, j, i) : lminmax.max_val); - }, - Kokkos::MinMax(minmax)); + typename Kokkos::MinMax::value_type minmax; + parthenon::par_reduce( + parthenon::loop_pattern_mdrange_tag, PARTHENON_AUTO_LABEL, DevExecSpace(), 0, + pack.GetNBlocks() - 1, // Runs from [0, 0] since pack built from MeshBlockData + pack.GetLowerBoundHost(0), pack.GetUpperBoundHost(0), kb.s, kb.e, jb.s, jb.e, + ib.s, ib.e, + KOKKOS_LAMBDA(const int b, const int n, const int k, const int j, const int i, + typename Kokkos::MinMax::value_type &lminmax) { + lminmax.min_val = (pack(b, n, k, j, i) < lminmax.min_val ? pack(b, n, k, j, i) + : lminmax.min_val); + lminmax.max_val = (pack(b, n, k, j, i) > lminmax.max_val ? pack(b, n, k, j, i) + : lminmax.max_val); + }, + Kokkos::MinMax(minmax)); - auto pkg = pmb->packages.Get("advection_package"); - const auto &refine_tol = pkg->Param("refine_tol"); - const auto &derefine_tol = pkg->Param("derefine_tol"); + auto pkg = pmb->packages.Get("advection_package"); + const auto &refine_tol = pkg->Param("refine_tol"); + const auto &derefine_tol = pkg->Param("derefine_tol"); - if (minmax.max_val > refine_tol && minmax.min_val < derefine_tol) return AmrTag::refine; - if (minmax.max_val < derefine_tol) return AmrTag::derefine; + if (minmax.max_val > refine_tol && minmax.min_val < derefine_tol) + return AmrTag::refine; + if (minmax.max_val < derefine_tol) return AmrTag::derefine; + } return AmrTag::same; } @@ -175,74 +206,84 @@ TaskStatus FillDerived(MeshData *md) { md); auto pack = desc.GetPack(md); + std::shared_ptr pkg = + md->GetMeshPointer()->packages.Get("advection_package"); + IndexRange ib = md->GetBoundsI(IndexDomain::interior); IndexRange jb = md->GetBoundsJ(IndexDomain::interior); IndexRange kb = md->GetBoundsK(IndexDomain::interior); const int ndim = md->GetMeshPointer()->ndim; const int nghost = parthenon::Globals::nghost; - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { - const int kf = (ndim > 2) ? (k - nghost) * 2 + nghost : k; - const int jf = (ndim > 1) ? (j - nghost) * 2 + nghost : j; - const int fi = (ndim > 0) ? (i - nghost) * 2 + nghost : i; - pack(b, Conserved::phi_fine_restricted(), k, j, i) = 0.0; - Real ntot = 0.0; - for (int ioff = 0; ioff <= (ndim > 0); ++ioff) - for (int joff = 0; joff <= (ndim > 1); ++joff) - for (int koff = 0; koff <= (ndim > 2); ++koff) { - ntot += 1.0; - pack(b, Conserved::phi_fine_restricted(), k, j, i) += - pack(b, Conserved::phi_fine(), kf + koff, jf + joff, fi + ioff); - } - pack(b, Conserved::phi_fine_restricted(), k, j, i) /= ntot; - }); - - using TE = parthenon::TopologicalElement; - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { - pack(b, Conserved::C_cc(0), k, j, i) = - 0.5 * (pack(b, TE::F1, Conserved::C(), k, j, i) + - pack(b, TE::F1, Conserved::C(), k, j, i + (ndim > 0))); - pack(b, Conserved::C_cc(1), k, j, i) = - 0.5 * (pack(b, TE::F2, Conserved::C(), k, j, i) + - pack(b, TE::F2, Conserved::C(), k, j + (ndim > 1), i)); - pack(b, Conserved::C_cc(2), k, j, i) = - 0.5 * (pack(b, TE::F3, Conserved::C(), k, j, i) + - pack(b, TE::F3, Conserved::C(), k + (ndim > 2), j, i)); - auto &coords = pack.GetCoordinates(b); - pack(b, Conserved::divC(), k, j, i) = - (pack(b, TE::F1, Conserved::C(), k, j, i + (ndim > 0)) - - pack(b, TE::F1, Conserved::C(), k, j, i)) / - coords.Dxc(k, j, i) + - (pack(b, TE::F2, Conserved::C(), k, j + (ndim > 1), i) - - pack(b, TE::F2, Conserved::C(), k, j, i)) / - coords.Dxc(k, j, i) + - (pack(b, TE::F3, Conserved::C(), k + (ndim > 2), j, i) - - pack(b, TE::F3, Conserved::C(), k, j, i)) / - coords.Dxc(k, j, i); - - pack(b, Conserved::D_cc(0), k, j, i) = - 0.5 * (pack(b, TE::F1, Conserved::D(), k, j, i) + - pack(b, TE::F1, Conserved::D(), k, j, i + (ndim > 0))); - pack(b, Conserved::D_cc(1), k, j, i) = - 0.5 * (pack(b, TE::F2, Conserved::D(), k, j, i) + - pack(b, TE::F2, Conserved::D(), k, j + (ndim > 1), i)); - pack(b, Conserved::D_cc(2), k, j, i) = - 0.5 * (pack(b, TE::F3, Conserved::D(), k, j, i) + - pack(b, TE::F3, Conserved::D(), k + (ndim > 2), j, i)); - pack(b, Conserved::divD(), k, j, i) = - (pack(b, TE::F1, Conserved::D(), k, j, i + (ndim > 0)) - - pack(b, TE::F1, Conserved::D(), k, j, i)) / - coords.Dxc(k, j, i) + - (pack(b, TE::F2, Conserved::D(), k, j + (ndim > 1), i) - - pack(b, TE::F2, Conserved::D(), k, j, i)) / - coords.Dxc(k, j, i) + - (pack(b, TE::F3, Conserved::D(), k + (ndim > 2), j, i) - - pack(b, TE::F3, Conserved::D(), k, j, i)) / - coords.Dxc(k, j, i); - }); + + auto do_fine_advection = pkg->Param("do_fine_advection"); + if (do_fine_advection) { + parthenon::par_for( + PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, + ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { + const int kf = (ndim > 2) ? (k - nghost) * 2 + nghost : k; + const int jf = (ndim > 1) ? (j - nghost) * 2 + nghost : j; + const int fi = (ndim > 0) ? (i - nghost) * 2 + nghost : i; + pack(b, Conserved::phi_fine_restricted(), k, j, i) = 0.0; + Real ntot = 0.0; + for (int koff = 0; koff <= (ndim > 2); ++koff) + for (int joff = 0; joff <= (ndim > 1); ++joff) + for (int ioff = 0; ioff <= (ndim > 0); ++ioff) { + ntot += 1.0; + pack(b, Conserved::phi_fine_restricted(), k, j, i) += + pack(b, Conserved::phi_fine(), kf + koff, jf + joff, fi + ioff); + } + pack(b, Conserved::phi_fine_restricted(), k, j, i) /= ntot; + }); + } + + auto do_CT_advection = pkg->Param("do_CT_advection"); + if (do_CT_advection) { + using TE = parthenon::TopologicalElement; + parthenon::par_for( + PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, + ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { + pack(b, Conserved::C_cc(0), k, j, i) = + 0.5 * (pack(b, TE::F1, Conserved::C(), k, j, i) + + pack(b, TE::F1, Conserved::C(), k, j, i + (ndim > 0))); + pack(b, Conserved::C_cc(1), k, j, i) = + 0.5 * (pack(b, TE::F2, Conserved::C(), k, j, i) + + pack(b, TE::F2, Conserved::C(), k, j + (ndim > 1), i)); + pack(b, Conserved::C_cc(2), k, j, i) = + 0.5 * (pack(b, TE::F3, Conserved::C(), k, j, i) + + pack(b, TE::F3, Conserved::C(), k + (ndim > 2), j, i)); + auto &coords = pack.GetCoordinates(b); + pack(b, Conserved::divC(), k, j, i) = + (pack(b, TE::F1, Conserved::C(), k, j, i + (ndim > 0)) - + pack(b, TE::F1, Conserved::C(), k, j, i)) / + coords.Dxc(k, j, i) + + (pack(b, TE::F2, Conserved::C(), k, j + (ndim > 1), i) - + pack(b, TE::F2, Conserved::C(), k, j, i)) / + coords.Dxc(k, j, i) + + (pack(b, TE::F3, Conserved::C(), k + (ndim > 2), j, i) - + pack(b, TE::F3, Conserved::C(), k, j, i)) / + coords.Dxc(k, j, i); + + pack(b, Conserved::D_cc(0), k, j, i) = + 0.5 * (pack(b, TE::F1, Conserved::D(), k, j, i) + + pack(b, TE::F1, Conserved::D(), k, j, i + (ndim > 0))); + pack(b, Conserved::D_cc(1), k, j, i) = + 0.5 * (pack(b, TE::F2, Conserved::D(), k, j, i) + + pack(b, TE::F2, Conserved::D(), k, j + (ndim > 1), i)); + pack(b, Conserved::D_cc(2), k, j, i) = + 0.5 * (pack(b, TE::F3, Conserved::D(), k, j, i) + + pack(b, TE::F3, Conserved::D(), k + (ndim > 2), j, i)); + pack(b, Conserved::divD(), k, j, i) = + (pack(b, TE::F1, Conserved::D(), k, j, i + (ndim > 0)) - + pack(b, TE::F1, Conserved::D(), k, j, i)) / + coords.Dxc(k, j, i) + + (pack(b, TE::F2, Conserved::D(), k, j + (ndim > 1), i) - + pack(b, TE::F2, Conserved::D(), k, j, i)) / + coords.Dxc(k, j, i) + + (pack(b, TE::F3, Conserved::D(), k + (ndim > 2), j, i) - + pack(b, TE::F3, Conserved::D(), k, j, i)) / + coords.Dxc(k, j, i); + }); + } return TaskStatus::complete; } } // namespace advection_package diff --git a/example/fine_advection/advection_package.hpp b/example/fine_advection/advection_package.hpp index 87e0c0b2edab..12b6b9b65aad 100644 --- a/example/fine_advection/advection_package.hpp +++ b/example/fine_advection/advection_package.hpp @@ -19,6 +19,7 @@ #include #include +#include #include #define VARIABLE(ns, varname) \ @@ -79,13 +80,20 @@ TaskStatus CalculateFluxes(pack_desc_t &desc, parthenon::TopologicalElement FACE IndexRange ib = md->GetBoundsI(cl, IndexDomain::interior, FACE); IndexRange jb = md->GetBoundsJ(cl, IndexDomain::interior, FACE); IndexRange kb = md->GetBoundsK(cl, IndexDomain::interior, FACE); - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, pack.GetLowerBoundHost(0), - pack.GetUpperBoundHost(0), // Warning: only works for dense variables - kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int l, const int k, const int j, const int i) { - // Calculate the flux using upwind donor cell reconstruction - pack.flux(b, FACE, l, k, j, i) = v * pack(b, l, k + koff, j + joff, i + ioff); + constexpr int unused_scratch_size = 0; + constexpr int unused_scratch_level = 1; + parthenon::par_for_outer( + PARTHENON_AUTO_LABEL, unused_scratch_size, unused_scratch_level, 0, + pack.GetNBlocks() - 1, kb.s, kb.e, + KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e}); + for (int l = pack.GetLowerBound(b); l <= pack.GetUpperBound(b); ++l) { + parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { + const auto [j, i] = idxer(idx); + // Calculate the flux using upwind donor cell reconstruction + pack.flux(b, FACE, l, k, j, i) = v * pack(b, l, k + koff, j + joff, i + ioff); + }); + } }); return TaskStatus::complete; } @@ -116,23 +124,33 @@ TaskStatus CalculateVectorFluxes(parthenon::TopologicalElement edge, int koff = edge == TE::E3 ? ndim > 2 : 0; int joff = edge == TE::E2 ? ndim > 1 : 0; int ioff = edge == TE::E1 ? ndim > 0 : 0; - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s - (ndim > 2), - kb.e + (ndim > 2), jb.s - (ndim > 1), jb.e + (ndim > 1), ib.s - 1, ib.e + 1, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { - // Piecewise linear in the orthogonal directions, could do something better here - pack(b, TE::CC, recon(0), k, j, i) = - 0.5 * (pack(b, fe, flux_var(), k, j, i) + - pack(b, fe, flux_var(), k + koff, j + joff, i + ioff)); - pack(b, TE::CC, recon(1), k, j, i) = - 0.5 * (pack(b, fe, flux_var(), k, j, i) + - pack(b, fe, flux_var(), k + koff, j + joff, i + ioff)); - pack(b, TE::CC, recon(2), k, j, i) = - 0.5 * (pack(b, fe, flux_var(), k, j, i) + - pack(b, fe, flux_var(), k + koff, j + joff, i + ioff)); - pack(b, TE::CC, recon(3), k, j, i) = - 0.5 * (pack(b, fe, flux_var(), k, j, i) + - pack(b, fe, flux_var(), k + koff, j + joff, i + ioff)); + constexpr int scratch_size = 0; + constexpr int scratch_level = 1; + parthenon::par_for_outer( + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, + kb.s - (ndim > 2), kb.e + (ndim > 2), + KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + parthenon::Indexer2D idxer({jb.s - (ndim > 1), jb.e + (ndim > 1)}, + {ib.s - (ndim > 0), ib.e + (ndim > 0)}); + if (pack.GetLowerBound(b, flux_var()) <= pack.GetUpperBound(b, flux_var())) { + parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { + const auto [j, i] = idxer(idx); + // Piecewise linear in the orthogonal directions, could do something better + // here + pack(b, TE::CC, recon(0), k, j, i) = + 0.5 * (pack(b, fe, flux_var(), k, j, i) + + pack(b, fe, flux_var(), k + koff, j + joff, i + ioff)); + pack(b, TE::CC, recon(1), k, j, i) = + 0.5 * (pack(b, fe, flux_var(), k, j, i) + + pack(b, fe, flux_var(), k + koff, j + joff, i + ioff)); + pack(b, TE::CC, recon(2), k, j, i) = + 0.5 * (pack(b, fe, flux_var(), k, j, i) + + pack(b, fe, flux_var(), k + koff, j + joff, i + ioff)); + pack(b, TE::CC, recon(3), k, j, i) = + 0.5 * (pack(b, fe, flux_var(), k, j, i) + + pack(b, fe, flux_var(), k + koff, j + joff, i + ioff)); + }); + } }); // 2. Calculate the quartant averaged flux @@ -143,20 +161,26 @@ TaskStatus CalculateVectorFluxes(parthenon::TopologicalElement edge, ib = md->GetBoundsI(cl, IndexDomain::interior, edge); jb = md->GetBoundsJ(cl, IndexDomain::interior, edge); kb = md->GetBoundsK(cl, IndexDomain::interior, edge); - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { - pack.flux(b, edge, var(), k, j, i) = 0.0; - for (int dk = -koff; dk < 1; ++dk) - for (int dj = -joff; dj < 1; ++dj) - for (int di = -ioff; di < 1; ++di) { - // TODO(LFR): Pick out the correct component of the reconstructed flux, - // current version is not an issue for piecewise constant flux though. - pack.flux(b, edge, var(), k, j, i) += - pack(b, TE::CC, recon(0), k + dk, j + dj, i + di); - } - pack.flux(b, edge, var(), k, j, i) /= npoints; - pack.flux(b, edge, var(), k, j, i) *= fac; + parthenon::par_for_outer( + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, kb.s, + kb.e, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e}); + if (pack.GetLowerBound(b, var()) <= pack.GetUpperBound(b, var())) { + parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { + const auto [j, i] = idxer(idx); + pack.flux(b, edge, var(), k, j, i) = 0.0; + for (int dk = -koff; dk < 1; ++dk) + for (int dj = -joff; dj < 1; ++dj) + for (int di = -ioff; di < 1; ++di) { + // TODO(LFR): Pick out the correct component of the reconstructed flux, + // current version is not an issue for piecewise constant flux though. + pack.flux(b, edge, var(), k, j, i) += + pack(b, TE::CC, recon(0), k + dk, j + dj, i + di); + } + pack.flux(b, edge, var(), k, j, i) /= npoints; + pack.flux(b, edge, var(), k, j, i) *= fac; + }); + } }); // 3. Reconstruct the transverse components of the advected field at the edge @@ -167,13 +191,21 @@ TaskStatus CalculateVectorFluxes(parthenon::TopologicalElement edge, ib = md->GetBoundsI(cl, IndexDomain::interior, f); jb = md->GetBoundsJ(cl, IndexDomain::interior, f); kb = md->GetBoundsK(cl, IndexDomain::interior, f); - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s - (ndim > 2), - kb.e + (ndim > 2), jb.s - (ndim > 1), jb.e + (ndim > 1), ib.s - 1, ib.e + 1, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { - // Piecewise linear in the orthogonal directions, could do something better here - pack(b, f, recon_f(0), k, j, i) = pack(b, f, var(), k, j, i); - pack(b, f, recon_f(1), k, j, i) = pack(b, f, var(), k, j, i); + parthenon::par_for_outer( + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, + kb.s - (ndim > 2), kb.e + (ndim > 2), + KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + parthenon::Indexer2D idxer({jb.s - (ndim > 1), jb.e + (ndim > 1)}, + {ib.s - (ndim > 0), ib.e + (ndim > 0)}); + if (pack.GetLowerBound(b, var()) <= pack.GetUpperBound(b, var())) { + parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { + const auto [j, i] = idxer(idx); + // Piecewise linear in the orthogonal directions, could do something better + // here + pack(b, f, recon_f(0), k, j, i) = pack(b, f, var(), k, j, i); + pack(b, f, recon_f(1), k, j, i) = pack(b, f, var(), k, j, i); + }); + } }); } @@ -189,15 +221,21 @@ TaskStatus CalculateVectorFluxes(parthenon::TopologicalElement edge, d1[static_cast(f1) % 3] = 0; d2[static_cast(edge) % 3] = 0; d2[static_cast(f2) % 3] = 0; - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { - pack.flux(b, edge, var(), k, j, i) += - 0.5 * (pack(b, f1, recon_f(0), k, j, i) - - pack(b, f1, recon_f(1), k - d1[2], j - d1[1], i - d1[0])); - pack.flux(b, edge, var(), k, j, i) -= - 0.5 * (pack(b, f2, recon_f(0), k, j, i) - - pack(b, f2, recon_f(1), k - d2[2], j - d2[1], i - d2[0])); + parthenon::par_for_outer( + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, kb.s, + kb.e, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e}); + if (pack.GetLowerBound(b, var()) <= pack.GetUpperBound(b, var())) { + parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { + const auto [j, i] = idxer(idx); + pack.flux(b, edge, var(), k, j, i) += + 0.5 * (pack(b, f1, recon_f(0), k, j, i) - + pack(b, f1, recon_f(1), k - d1[2], j - d1[1], i - d1[0])); + pack.flux(b, edge, var(), k, j, i) -= + 0.5 * (pack(b, f2, recon_f(0), k, j, i) - + pack(b, f2, recon_f(1), k - d2[2], j - d2[1], i - d2[0])); + }); + } }); // Add in the diffusive component diff --git a/example/fine_advection/parthenon_app_inputs.cpp b/example/fine_advection/parthenon_app_inputs.cpp index ab970175e9cf..8025970f35ca 100644 --- a/example/fine_advection/parthenon_app_inputs.cpp +++ b/example/fine_advection/parthenon_app_inputs.cpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2020-2021. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -50,6 +50,9 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) { auto pkg = pmb->packages.Get("advection_package"); const auto &profile = pkg->Param("profile"); + auto do_regular_advection = pkg->Param("do_regular_advection"); + auto do_fine_advection = pkg->Param("do_fine_advection"); + auto do_CT_advection = pkg->Param("do_CT_advection"); auto cellbounds = pmb->cellbounds; IndexRange ib = cellbounds.GetBoundsI(IndexDomain::interior); @@ -59,6 +62,11 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) { auto coords = pmb->coords; using namespace advection_package::Conserved; + if (do_regular_advection) { + const int sparse_size = pkg->Param("sparse_size"); + for (int s = 0; s < sparse_size; ++s) + pmb->AllocSparseID(phi::name(), s); + } static auto desc = parthenon::MakePackDescriptor(data.get()); auto pack = desc.GetPack(data.get()); @@ -68,78 +76,105 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) { if (profile == "hard_sphere") profile_type = 2; if (profile == "block") profile_type = 3; Real amp = 1.0; - const int b = 0; const int ndim = pmb->pmy_mesh->ndim; const int nghost = parthenon::Globals::nghost; - parthenon::par_for( - PARTHENON_AUTO_LABEL, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int k, const int j, const int i) { - const int kf = ndim > 2 ? (k - nghost) * 2 + nghost : k; - const int jf = ndim > 1 ? (j - nghost) * 2 + nghost : j; - const int fi = ndim > 0 ? (i - nghost) * 2 + nghost : i; - if (profile_type == 0) { - PARTHENON_FAIL("Profile type wave not implemented."); - } else if (profile_type == 1) { - Real rsq = coords.Xc<1>(i) * coords.Xc<1>(i) + - coords.Xc<2>(j) * coords.Xc<2>(j) + - coords.Xc<3>(k) * coords.Xc<3>(k); - pack(b, phi(), k, j, i) = 1. + amp * exp(-100.0 * rsq); - for (int ioff = 0; ioff <= (ndim > 0); ++ioff) - for (int joff = 0; joff <= (ndim > 1); ++joff) - for (int koff = 0; koff <= (ndim > 2); ++koff) { - pack(b, phi_fine(), kf + koff, jf + joff, fi + ioff) = - 1. + amp * exp(-100.0 * rsq); - } - } else if (profile_type == 2) { - Real rsq = coords.Xc<1>(i) * coords.Xc<1>(i) + - coords.Xc<2>(j) * coords.Xc<2>(j) + - coords.Xc<3>(k) * coords.Xc<3>(k); - pack(b, phi(), k, j, i) = (rsq < 0.15 * 0.15 ? 1.0 : 0.0); - for (int ioff = 0; ioff <= (ndim > 0); ++ioff) - for (int joff = 0; joff <= (ndim > 1); ++joff) - for (int koff = 0; koff <= (ndim > 2); ++koff) { - pack(b, phi_fine(), kf + koff, jf + joff, fi + ioff) = - (rsq < 0.15 * 0.15 ? 1.0 : 0.0); - } - } else { - pack(b, phi(), k, j, i) = 0.0; - for (int ioff = 0; ioff <= (ndim > 0); ++ioff) - for (int joff = 0; joff <= (ndim > 1); ++joff) - for (int koff = 0; koff <= (ndim > 2); ++koff) { - pack(b, phi_fine(), kf + koff, jf + joff, fi + ioff) = 0.0; - } - } - }); - - ib = cellbounds.GetBoundsI(IndexDomain::interior, TE::F2); - jb = cellbounds.GetBoundsJ(IndexDomain::interior, TE::F2); - kb = cellbounds.GetBoundsK(IndexDomain::interior, TE::F2); - const Real x0 = 0.2; - parthenon::par_for( - PARTHENON_AUTO_LABEL, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int k, const int j, const int i) { - auto &coords = pack.GetCoordinates(b); - Real xlo = coords.X(k, j, i); - Real xhi = coords.X(k, j, i + 1); - Real Alo = std::abs(xlo) < x0 ? sign(xlo, xhi) * (x0 - std::abs(xlo)) : 0.0; - Real Ahi = std::abs(xhi) < x0 ? sign(xhi, xlo) * (x0 - std::abs(xhi)) : 0.0; - pack(b, TE::F2, C(), k, j, i) = (Alo - Ahi) / (xhi - xlo); - }); - - ib = cellbounds.GetBoundsI(IndexDomain::interior, TE::F3); - jb = cellbounds.GetBoundsJ(IndexDomain::interior, TE::F3); - kb = cellbounds.GetBoundsK(IndexDomain::interior, TE::F3); - parthenon::par_for( - PARTHENON_AUTO_LABEL, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int k, const int j, const int i) { - auto &coords = pack.GetCoordinates(b); - Real xlo = coords.X(k, j, i); - Real xhi = coords.X(k, j, i + 1); - Real Alo = std::abs(xlo) < x0 ? sign(xlo, xhi) * (x0 - std::abs(xlo)) : 0.0; - Real Ahi = std::abs(xhi) < x0 ? sign(xhi, xlo) * (x0 - std::abs(xhi)) : 0.0; - pack(b, TE::F3, D(), k, j, i) = (Alo - Ahi) / (xhi - xlo); - }); + + if (do_regular_advection) { + parthenon::par_for( + PARTHENON_AUTO_LABEL, pack.GetLowerBoundHost(b, phi()), + pack.GetUpperBoundHost(b, phi()), kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int l, const int k, const int j, const int i) { + const int kf = ndim > 2 ? (k - nghost) * 2 + nghost : k; + const int jf = ndim > 1 ? (j - nghost) * 2 + nghost : j; + const int fi = ndim > 0 ? (i - nghost) * 2 + nghost : i; + if (profile_type == 0) { + PARTHENON_FAIL("Profile type wave not implemented."); + } else if (profile_type == 1) { + Real rsq = coords.Xc<1>(i) * coords.Xc<1>(i) + + coords.Xc<2>(j) * coords.Xc<2>(j) + + coords.Xc<3>(k) * coords.Xc<3>(k); + pack(b, phi(l), k, j, i) = 1. + amp * exp(-100.0 * rsq); + } else if (profile_type == 2) { + Real rsq = coords.Xc<1>(i) * coords.Xc<1>(i) + + coords.Xc<2>(j) * coords.Xc<2>(j) + + coords.Xc<3>(k) * coords.Xc<3>(k); + pack(b, phi(l), k, j, i) = (rsq < 0.15 * 0.15 ? 1.0 : 0.0); + } else { + pack(b, phi(l), k, j, i) = 0.0; + } + }); + } + + if (do_fine_advection) { + parthenon::par_for( + PARTHENON_AUTO_LABEL, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int k, const int j, const int i) { + const int kf = ndim > 2 ? (k - nghost) * 2 + nghost : k; + const int jf = ndim > 1 ? (j - nghost) * 2 + nghost : j; + const int fi = ndim > 0 ? (i - nghost) * 2 + nghost : i; + if (profile_type == 0) { + PARTHENON_FAIL("Profile type wave not implemented."); + } else if (profile_type == 1) { + Real rsq = coords.Xc<1>(i) * coords.Xc<1>(i) + + coords.Xc<2>(j) * coords.Xc<2>(j) + + coords.Xc<3>(k) * coords.Xc<3>(k); + for (int koff = 0; koff <= (ndim > 2); ++koff) + for (int joff = 0; joff <= (ndim > 1); ++joff) + for (int ioff = 0; ioff <= (ndim > 0); ++ioff) { + pack(b, phi_fine(), kf + koff, jf + joff, fi + ioff) = + 1. + amp * exp(-100.0 * rsq); + } + } else if (profile_type == 2) { + Real rsq = coords.Xc<1>(i) * coords.Xc<1>(i) + + coords.Xc<2>(j) * coords.Xc<2>(j) + + coords.Xc<3>(k) * coords.Xc<3>(k); + for (int koff = 0; koff <= (ndim > 2); ++koff) + for (int joff = 0; joff <= (ndim > 1); ++joff) + for (int ioff = 0; ioff <= (ndim > 0); ++ioff) { + pack(b, phi_fine(), kf + koff, jf + joff, fi + ioff) = + (rsq < 0.15 * 0.15 ? 1.0 : 0.0); + } + } else { + for (int koff = 0; koff <= (ndim > 2); ++koff) + for (int joff = 0; joff <= (ndim > 1); ++joff) + for (int ioff = 0; ioff <= (ndim > 0); ++ioff) { + pack(b, phi_fine(), kf + koff, jf + joff, fi + ioff) = 0.0; + } + } + }); + } + + if (do_CT_advection) { + ib = cellbounds.GetBoundsI(IndexDomain::interior, TE::F2); + jb = cellbounds.GetBoundsJ(IndexDomain::interior, TE::F2); + kb = cellbounds.GetBoundsK(IndexDomain::interior, TE::F2); + const Real x0 = 0.2; + parthenon::par_for( + PARTHENON_AUTO_LABEL, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int k, const int j, const int i) { + auto &coords = pack.GetCoordinates(b); + Real xlo = coords.X(k, j, i); + Real xhi = coords.X(k, j, i + 1); + Real Alo = std::abs(xlo) < x0 ? sign(xlo, xhi) * (x0 - std::abs(xlo)) : 0.0; + Real Ahi = std::abs(xhi) < x0 ? sign(xhi, xlo) * (x0 - std::abs(xhi)) : 0.0; + pack(b, TE::F2, C(), k, j, i) = (Alo - Ahi) / (xhi - xlo); + }); + + ib = cellbounds.GetBoundsI(IndexDomain::interior, TE::F3); + jb = cellbounds.GetBoundsJ(IndexDomain::interior, TE::F3); + kb = cellbounds.GetBoundsK(IndexDomain::interior, TE::F3); + parthenon::par_for( + PARTHENON_AUTO_LABEL, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int k, const int j, const int i) { + auto &coords = pack.GetCoordinates(b); + Real xlo = coords.X(k, j, i); + Real xhi = coords.X(k, j, i + 1); + Real Alo = std::abs(xlo) < x0 ? sign(xlo, xhi) * (x0 - std::abs(xlo)) : 0.0; + Real Ahi = std::abs(xhi) < x0 ? sign(xhi, xlo) * (x0 - std::abs(xhi)) : 0.0; + pack(b, TE::F3, D(), k, j, i) = (Alo - Ahi) / (xhi - xlo); + }); + } } Packages_t ProcessPackages(std::unique_ptr &pin) { diff --git a/example/fine_advection/parthinput.advection b/example/fine_advection/parthinput.advection index 31ee255c5053..b3bf7ca10d0c 100644 --- a/example/fine_advection/parthinput.advection +++ b/example/fine_advection/parthinput.advection @@ -57,6 +57,13 @@ profile = hard_sphere refine_tol = 0.3 # control the package specific refinement tagging function derefine_tol = 0.03 +# Include advection equation for regular resolution CC fields +do_regular_advection = true +# Include advection equation for fine resolution CC fields +do_fine_advection = true +# Include vector wave equation using CT +do_CT_advection = true + file_type = rst dt = 0.05 diff --git a/example/fine_advection/stokes.hpp b/example/fine_advection/stokes.hpp index 27b529def8a5..fdc3ec408402 100644 --- a/example/fine_advection/stokes.hpp +++ b/example/fine_advection/stokes.hpp @@ -19,6 +19,7 @@ #include #include +#include namespace advection_example { using namespace parthenon::driver::prelude; @@ -36,21 +37,27 @@ TaskStatus WeightedSumDataElement(parthenon::CellLevel cl, IndexRange jb = in1->GetBoundsJ(cl, IndexDomain::entire, te); IndexRange kb = in1->GetBoundsK(cl, IndexDomain::entire, te); - PARTHENON_REQUIRE(pack1.GetLowerBoundHost(0) == pack2.GetLowerBoundHost(0), - "Packs are different size."); - PARTHENON_REQUIRE(pack1.GetLowerBoundHost(0) == pack_out.GetLowerBoundHost(0), - "Packs are different size."); - PARTHENON_REQUIRE(pack1.GetUpperBoundHost(0) == pack2.GetUpperBoundHost(0), - "Packs are different size."); - PARTHENON_REQUIRE(pack1.GetUpperBoundHost(0) == pack_out.GetUpperBoundHost(0), - "Packs are different size."); - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack1.GetNBlocks() - 1, pack1.GetLowerBoundHost(0), - pack1.GetUpperBoundHost(0), // This is safe for dense vars - kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int l, const int k, const int j, const int i) { - pack_out(b, te, l, k, j, i) = - w1 * pack1(b, te, l, k, j, i) + w2 * pack2(b, te, l, k, j, i); + constexpr int scratch_size = 0; + constexpr int scratch_level = 1; + parthenon::par_for_outer( + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack1.GetNBlocks() - 1, kb.s, + kb.e, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e}); + PARTHENON_REQUIRE(pack1.GetLowerBound(b) == pack2.GetLowerBound(b), + "Packs are different size."); + PARTHENON_REQUIRE(pack1.GetLowerBound(b) == pack_out.GetLowerBound(b), + "Packs are different size."); + PARTHENON_REQUIRE(pack1.GetUpperBound(b) == pack2.GetUpperBound(b), + "Packs are different size."); + PARTHENON_REQUIRE(pack1.GetUpperBound(b) == pack_out.GetUpperBound(b), + "Packs are different size."); + for (int l = pack1.GetLowerBound(b); l <= pack1.GetUpperBound(b); ++l) { + parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { + const auto [j, i] = idxer(idx); + pack_out(b, te, l, k, j, i) = + w1 * pack1(b, te, l, k, j, i) + w2 * pack2(b, te, l, k, j, i); + }); + } }); return TaskStatus::complete; } @@ -73,12 +80,18 @@ void StokesZero(parthenon::CellLevel cl, parthenon::TopologicalElement TeVar, IndexRange jb = out->GetBoundsJ(cl, IndexDomain::interior, TeVar); IndexRange kb = out->GetBoundsK(cl, IndexDomain::interior, TeVar); - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack_out.GetNBlocks() - 1, pack_out.GetLowerBoundHost(0), - pack_out.GetUpperBoundHost(0), // This is safe for dense vars only - kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int l, const int k, const int j, const int i) { - pack_out(b, TeVar, l, k, j, i) = 0.0; + constexpr int scratch_size = 0; + constexpr int scratch_level = 1; + parthenon::par_for_outer( + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack_out.GetNBlocks() - 1, + kb.s, kb.e, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e}); + for (int l = pack_out.GetLowerBound(b); l <= pack_out.GetUpperBound(b); ++l) { + parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { + const auto [j, i] = idxer(idx); + pack_out(b, TeVar, l, k, j, i) = 0.0; + }); + } }); } @@ -103,22 +116,29 @@ void StokesComponent(Real fac, parthenon::CellLevel cl, koff = ndim > 2 ? koff : 0; joff = ndim > 1 ? joff : 0; - PARTHENON_REQUIRE(pack_in.GetLowerBoundHost(0) == pack_out.GetLowerBoundHost(0), - "Packs are different size."); - PARTHENON_REQUIRE(pack_in.GetUpperBoundHost(0) == pack_out.GetUpperBoundHost(0), - "Packs are different size."); - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack_in.GetNBlocks() - 1, pack_in.GetLowerBoundHost(0), - pack_in.GetUpperBoundHost(0), // This is safe for dense vars only - kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int l, const int k, const int j, const int i) { + constexpr int scratch_size = 0; + constexpr int scratch_level = 1; + parthenon::par_for_outer( + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack_out.GetNBlocks() - 1, + kb.s, kb.e, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { auto &coords = pack_in.GetCoordinates(b); - pack_out(b, TeVar, l, k, j, i) += - fac * - (coords.Volume(cl, TeFlux, k, j, i) * pack_in.flux(b, TeFlux, l, k, j, i) - - coords.Volume(cl, TeFlux, k + koff, j + joff, i + ioff) * - pack_in.flux(b, TeFlux, l, k + koff, j + joff, i + ioff)) / - coords.Volume(cl, TeVar, k, j, i); + parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e}); + PARTHENON_REQUIRE(pack_in.GetLowerBound(b) == pack_out.GetLowerBound(b), + "Packs are different size."); + PARTHENON_REQUIRE(pack_in.GetUpperBound(b) == pack_out.GetUpperBound(b), + "Packs are different size."); + for (int l = pack_out.GetLowerBound(b); l <= pack_out.GetUpperBound(b); ++l) { + parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { + const auto [j, i] = idxer(idx); + pack_out(b, TeVar, l, k, j, i) += + fac * + (coords.Volume(cl, TeFlux, k, j, i) * + pack_in.flux(b, TeFlux, l, k, j, i) - + coords.Volume(cl, TeFlux, k + koff, j + joff, i + ioff) * + pack_in.flux(b, TeFlux, l, k + koff, j + joff, i + ioff)) / + coords.Volume(cl, TeVar, k, j, i); + }); + } }); } diff --git a/example/particles/particles.cpp b/example/particles/particles.cpp index a01a121f75fa..0f250e9ab02f 100644 --- a/example/particles/particles.cpp +++ b/example/particles/particles.cpp @@ -288,17 +288,18 @@ TaskStatus CreateSomeParticles(MeshBlock *pmb, const double t0) { } while (r > 0.5); // Randomly sample direction perpendicular to origin - Real theta = acos(2. * rng_gen.drand() - 1.); - Real phi = 2. * M_PI * rng_gen.drand(); - v(0, n) = sin(theta) * cos(phi); - v(1, n) = sin(theta) * sin(phi); - v(2, n) = cos(theta); + const Real mu = 2.0 * rng_gen.drand() - 1.0; + const Real phi = 2. * M_PI * rng_gen.drand(); + const Real stheta = std::sqrt(1.0 - mu * mu); + v(0, n) = stheta * cos(phi); + v(1, n) = stheta * sin(phi); + v(2, n) = mu; // Project v onto plane normal to sphere Real vdN = v(0, n) * x(n) + v(1, n) * y(n) + v(2, n) * z(n); - Real NdN = r * r; - v(0, n) = v(0, n) - vdN / NdN * x(n); - v(1, n) = v(1, n) - vdN / NdN * y(n); - v(2, n) = v(2, n) - vdN / NdN * z(n); + Real inverse_NdN = 1. / (r * r); + v(0, n) = v(0, n) - vdN * inverse_NdN * x(n); + v(1, n) = v(1, n) - vdN * inverse_NdN * y(n); + v(2, n) = v(2, n) - vdN * inverse_NdN * z(n); // Normalize Real v_tmp = sqrt(v(0, n) * v(0, n) + v(1, n) * v(1, n) + v(2, n) * v(2, n)); @@ -335,11 +336,12 @@ TaskStatus CreateSomeParticles(MeshBlock *pmb, const double t0) { z(n) = minx_k + nx_k * dx_k * rng_gen.drand(); // Randomly sample direction on the unit sphere, fixing speed - Real theta = acos(2. * rng_gen.drand() - 1.); - Real phi = 2. * M_PI * rng_gen.drand(); - v(0, n) = vel * sin(theta) * cos(phi); - v(1, n) = vel * sin(theta) * sin(phi); - v(2, n) = vel * cos(theta); + const Real mu = 2.0 * rng_gen.drand() - 1.0; + const Real phi = 2. * M_PI * rng_gen.drand(); + const Real stheta = std::sqrt(1.0 - mu * mu); + v(0, n) = vel * stheta * cos(phi); + v(1, n) = vel * stheta * sin(phi); + v(2, n) = vel * mu; // Create particles at the beginning of the timestep t(n) = t0; diff --git a/src/interface/swarm.cpp b/src/interface/swarm.cpp index 294338dee0d2..39f1e8a28123 100644 --- a/src/interface/swarm.cpp +++ b/src/interface/swarm.cpp @@ -33,6 +33,7 @@ SwarmDeviceContext Swarm::GetDeviceContext() const { context.block_index_ = block_index_; context.neighbor_indices_ = neighbor_indices_; context.cell_sorted_ = cell_sorted_; + context.buffer_sorted_ = buffer_sorted_; context.cell_sorted_begin_ = cell_sorted_begin_; context.cell_sorted_number_ = cell_sorted_number_; @@ -73,9 +74,10 @@ Swarm::Swarm(const std::string &label, const Metadata &metadata, const int nmax_ new_indices_("new_indices_", nmax_pool_), scratch_a_("scratch_a_", nmax_pool_), scratch_b_("scratch_b_", nmax_pool_), num_particles_to_send_("num_particles_to_send_", NMAX_NEIGHBORS), - buffer_counters_("buffer_counters_", NMAX_NEIGHBORS), + buffer_start_("buffer_start_", NMAX_NEIGHBORS), neighbor_received_particles_("neighbor_received_particles_", NMAX_NEIGHBORS), - cell_sorted_("cell_sorted_", nmax_pool_), mpiStatus(true) { + cell_sorted_("cell_sorted_", nmax_pool_), + buffer_sorted_("buffer_sorted_", nmax_pool_), mpiStatus(true) { PARTHENON_REQUIRE_THROWS(typeid(Coordinates_t) == typeid(UniformCartesian), "SwarmDeviceContext only supports a uniform Cartesian mesh!"); @@ -209,6 +211,9 @@ void Swarm::SetPoolMax(const std::int64_t nmax_pool) { Kokkos::resize(cell_sorted_, nmax_pool); pmb->LogMemUsage(n_new * sizeof(SwarmKey)); + Kokkos::resize(buffer_sorted_, nmax_pool); + pmb->LogMemUsage(n_new * sizeof(SwarmKey)); + block_index_.Resize(nmax_pool); pmb->LogMemUsage(n_new * sizeof(int)); @@ -490,35 +495,35 @@ void Swarm::SortParticlesByCell() { break; } - if (cell_sorted(start_index).cell_idx_1d_ == cell_idx_1d) { + if (cell_sorted(start_index).sort_idx_ == cell_idx_1d) { if (start_index == 0) { break; - } else if (cell_sorted(start_index - 1).cell_idx_1d_ != cell_idx_1d) { + } else if (cell_sorted(start_index - 1).sort_idx_ != cell_idx_1d) { break; } else { start_index--; continue; } } - if (cell_sorted(start_index).cell_idx_1d_ >= cell_idx_1d) { + if (cell_sorted(start_index).sort_idx_ >= cell_idx_1d) { start_index--; if (start_index < 0) { start_index = -1; break; } - if (cell_sorted(start_index).cell_idx_1d_ < cell_idx_1d) { + if (cell_sorted(start_index).sort_idx_ < cell_idx_1d) { start_index = -1; break; } continue; } - if (cell_sorted(start_index).cell_idx_1d_ < cell_idx_1d) { + if (cell_sorted(start_index).sort_idx_ < cell_idx_1d) { start_index++; if (start_index > max_active_index) { start_index = -1; break; } - if (cell_sorted(start_index).cell_idx_1d_ > cell_idx_1d) { + if (cell_sorted(start_index).sort_idx_ > cell_idx_1d) { start_index = -1; break; } @@ -532,7 +537,7 @@ void Swarm::SortParticlesByCell() { int number = 0; int current_index = start_index; while (current_index <= max_active_index && - cell_sorted(current_index).cell_idx_1d_ == cell_idx_1d) { + cell_sorted(current_index).sort_idx_ == cell_idx_1d) { current_index++; number++; cell_sorted_number(k, j, i) = number; diff --git a/src/interface/swarm.hpp b/src/interface/swarm.hpp index 2058dc58f894..49cd906c2a15 100644 --- a/src/interface/swarm.hpp +++ b/src/interface/swarm.hpp @@ -289,7 +289,7 @@ class Swarm { constexpr static int unset_index_ = -1; ParArray1D num_particles_to_send_; - ParArray1D buffer_counters_; + ParArray1D buffer_start_; ParArray1D neighbor_received_particles_; int total_received_particles_; @@ -298,6 +298,9 @@ class Swarm { ParArray1D cell_sorted_; // 1D per-cell sorted array of key-value swarm memory indices + ParArray1D + buffer_sorted_; // 1D per-buffer sorted array of key-value swarm memory indices + ParArrayND cell_sorted_begin_; // Per-cell array of starting indices in cell_sorted_ diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 3ee4e2af7512..054ded34fabb 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -156,72 +156,14 @@ void Swarm::SetupPersistentMPI() { } } -void Swarm::CountParticlesToSend_() { - auto mask_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), mask_); - auto swarm_d = GetDeviceContext(); - auto pmb = GetBlockPointer(); - const int nbmax = vbswarm->bd_var_.nbmax; - - // Fence to make sure particles aren't currently being transported locally - // TODO(BRR) do this operation on device. - pmb->exec_space.fence(); - const int particle_size = GetParticleDataSize(); - vbswarm->particle_size = particle_size; - - // TODO(BRR) This kernel launch should be folded into the subsequent logic once we - // convert that to kernel-based reductions - auto &x = Get(swarm_position::x::name()).Get(); - auto &y = Get(swarm_position::y::name()).Get(); - auto &z = Get(swarm_position::z::name()).Get(); - const int max_active_index = GetMaxActiveIndex(); - pmb->par_for( - PARTHENON_AUTO_LABEL, 0, max_active_index, KOKKOS_LAMBDA(const int n) { - if (swarm_d.IsActive(n)) { - bool on_current_mesh_block = true; - swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block); - } - }); - - // Facilitate lambda captures - auto &block_index = block_index_; - auto &num_particles_to_send = num_particles_to_send_; - - // Zero out number of particles to send before accumulating - pmb->par_for( - PARTHENON_AUTO_LABEL, 0, NMAX_NEIGHBORS - 1, - KOKKOS_LAMBDA(const int n) { num_particles_to_send[n] = 0; }); - - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, max_active_index, KOKKOS_LAMBDA(const int n) { - if (swarm_d.IsActive(n)) { - bool on_current_mesh_block = true; - swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block); - - if (block_index(n) >= 0) { - Kokkos::atomic_add(&num_particles_to_send(block_index(n)), 1); - } - } - }); - - auto num_particles_to_send_h = num_particles_to_send_.GetHostMirrorAndCopy(); - - // Resize send buffers if too small - for (int n = 0; n < pmb->neighbors.size(); n++) { - const int bufid = pmb->neighbors[n].bufid; - auto sendbuf = vbswarm->bd_var_.send[bufid]; - if (sendbuf.extent(0) < num_particles_to_send_h(n) * particle_size) { - sendbuf = BufArray1D("Buffer", num_particles_to_send_h(n) * particle_size); - vbswarm->bd_var_.send[bufid] = sendbuf; - } - vbswarm->send_size[bufid] = num_particles_to_send_h(n) * particle_size; - } -} - void Swarm::LoadBuffers_() { auto swarm_d = GetDeviceContext(); auto pmb = GetBlockPointer(); const int particle_size = GetParticleDataSize(); + vbswarm->particle_size = particle_size; const int nneighbor = pmb->neighbors.size(); + // Fence to make sure particles aren't currently being transported locally + pmb->exec_space.fence(); auto &int_vector = std::get()>(vectors_); auto &real_vector = std::get()>(vectors_); @@ -236,42 +178,103 @@ void Swarm::LoadBuffers_() { auto &y = Get(swarm_position::y::name()).Get(); auto &z = Get(swarm_position::z::name()).Get(); - // Zero buffer index counters - auto &buffer_counters = buffer_counters_; - pmb->par_for( - PARTHENON_AUTO_LABEL, 0, NMAX_NEIGHBORS - 1, - KOKKOS_LAMBDA(const int n) { buffer_counters[n] = 0; }); + if (max_active_index_ >= 0) { + auto &buffer_sorted = buffer_sorted_; + auto &buffer_start = buffer_start_; - auto &bdvar = vbswarm->bd_var_; - auto neighbor_buffer_index = neighbor_buffer_index_; - // Loop over active particles and use atomic operations to find indices into buffers if - // this particle will be sent. - pmb->par_for( - PARTHENON_AUTO_LABEL, 0, max_active_index_, KOKKOS_LAMBDA(const int n) { - if (swarm_d.IsActive(n)) { - bool on_current_mesh_block = true; - const int m = - swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block); - const int bufid = neighbor_buffer_index(m); - - if (m >= 0) { - const int bid = Kokkos::atomic_fetch_add(&buffer_counters(m), 1); - int buffer_index = bid * particle_size; - swarm_d.MarkParticleForRemoval(n); - for (int i = 0; i < realPackDim; i++) { - bdvar.send[bufid](buffer_index) = vreal(i, n); - buffer_index++; - } - for (int i = 0; i < intPackDim; i++) { - bdvar.send[bufid](buffer_index) = static_cast(vint(i, n)); - buffer_index++; + pmb->par_for( + PARTHENON_AUTO_LABEL, 0, max_active_index_, KOKKOS_LAMBDA(const int n) { + if (swarm_d.IsActive(n)) { + bool on_current_mesh_block = true; + const int m = + swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block); + buffer_sorted(n) = SwarmKey(m, n); + } else { + buffer_sorted(n) = SwarmKey(this_block_, n); + } + }); + + // sort by buffer index + sort(buffer_sorted, SwarmKeyComparator(), 0, max_active_index_); + + // use discontinuity check to determine start of a buffer in buffer_sorted array + auto &num_particles_to_send = num_particles_to_send_; + auto max_active_index = max_active_index_; + + // Zero out number of particles to send before accumulating + pmb->par_for( + PARTHENON_AUTO_LABEL, 0, NMAX_NEIGHBORS - 1, KOKKOS_LAMBDA(const int n) { + num_particles_to_send[n] = 0; + buffer_start[n] = 0; + }); + + pmb->par_for( + PARTHENON_AUTO_LABEL, 0, max_active_index_, KOKKOS_LAMBDA(const int n) { + auto m = buffer_sorted(n).sort_idx_; + // start checks (used for index of particle in buffer) + if (m >= 0 && n == 0) { + buffer_start(m) = 0; + } else if (m >= 0 && m != buffer_sorted(n - 1).sort_idx_) { + buffer_start(m) = n; + } + // end checks (used to to size particle buffers) + if (m >= 0 && n == max_active_index) { + num_particles_to_send(m) = n + 1; + } else if (m >= 0 && m != buffer_sorted(n + 1).sort_idx_) { + num_particles_to_send(m) = n + 1; + } + }); + + // copy values back to host for buffer sizing + auto num_particles_to_send_h = num_particles_to_send_.GetHostMirrorAndCopy(); + auto buffer_start_h = buffer_start.GetHostMirrorAndCopy(); + + // Resize send buffers if too small + for (int n = 0; n < pmb->neighbors.size(); n++) { + num_particles_to_send_h(n) -= buffer_start_h(n); + const int bufid = pmb->neighbors[n].bufid; + auto sendbuf = vbswarm->bd_var_.send[bufid]; + if (sendbuf.extent(0) < num_particles_to_send_h(n) * particle_size) { + sendbuf = BufArray1D("Buffer", num_particles_to_send_h(n) * particle_size); + vbswarm->bd_var_.send[bufid] = sendbuf; + } + vbswarm->send_size[bufid] = num_particles_to_send_h(n) * particle_size; + } + + auto &bdvar = vbswarm->bd_var_; + auto neighbor_buffer_index = neighbor_buffer_index_; + // Loop over active particles buffer_sorted, use index n and buffer_start to + /// get index in buffer m, pack that particle in buffer + pmb->par_for( + PARTHENON_AUTO_LABEL, 0, max_active_index_, KOKKOS_LAMBDA(const int n) { + auto p_index = buffer_sorted(n).swarm_idx_; + if (swarm_d.IsActive(p_index)) { + const int m = buffer_sorted(n).sort_idx_; + const int bufid = neighbor_buffer_index(m); + if (m >= 0) { + const int bid = n - buffer_start[m]; + int buffer_index = bid * particle_size; + swarm_d.MarkParticleForRemoval(p_index); + for (int i = 0; i < realPackDim; i++) { + bdvar.send[bufid](buffer_index) = vreal(i, p_index); + buffer_index++; + } + for (int i = 0; i < intPackDim; i++) { + bdvar.send[bufid](buffer_index) = static_cast(vint(i, p_index)); + buffer_index++; + } } } - } - }); + }); - // Remove particles that were loaded to send to another block from this block - RemoveMarkedParticles(); + // Remove particles that were loaded to send to another block from this block + RemoveMarkedParticles(); + } else { + for (int n = 0; n < pmb->neighbors.size(); n++) { + const int bufid = pmb->neighbors[n].bufid; + vbswarm->send_size[bufid] = 0; + } + } } void Swarm::Send(BoundaryCommSubset phase) { @@ -279,10 +282,8 @@ void Swarm::Send(BoundaryCommSubset phase) { const int nneighbor = pmb->neighbors.size(); auto swarm_d = GetDeviceContext(); - // Query particles for those to be sent - CountParticlesToSend_(); - - // Prepare buffers for send operations + // Potentially resize buffer, get consistent index from particle array, get ready to + // send LoadBuffers_(); // Send buffer data diff --git a/src/interface/swarm_device_context.hpp b/src/interface/swarm_device_context.hpp index 936d2d56ad35..b16daf0d0cdf 100644 --- a/src/interface/swarm_device_context.hpp +++ b/src/interface/swarm_device_context.hpp @@ -25,16 +25,16 @@ struct SwarmKey { SwarmKey() {} KOKKOS_INLINE_FUNCTION SwarmKey(const int cell_idx_1d, const int swarm_idx_1d) - : cell_idx_1d_(cell_idx_1d), swarm_idx_(swarm_idx_1d) {} + : sort_idx_(cell_idx_1d), swarm_idx_(swarm_idx_1d) {} - int cell_idx_1d_; + int sort_idx_; int swarm_idx_; }; struct SwarmKeyComparator { KOKKOS_INLINE_FUNCTION bool operator()(const SwarmKey &s1, const SwarmKey &s2) { - return s1.cell_idx_1d_ < s2.cell_idx_1d_; + return s1.sort_idx_ < s2.sort_idx_; } }; @@ -139,6 +139,7 @@ class SwarmDeviceContext { ParArrayND block_index_; ParArrayND neighbor_indices_; // 4x4x4 array of possible block AMR regions ParArray1D cell_sorted_; + ParArray1D buffer_sorted_; ParArrayND cell_sorted_begin_; ParArrayND cell_sorted_number_; int ndim_; diff --git a/src/outputs/output_utils.hpp b/src/outputs/output_utils.hpp index 3b7e370cde9d..e9fcee04c986 100644 --- a/src/outputs/output_utils.hpp +++ b/src/outputs/output_utils.hpp @@ -50,7 +50,7 @@ namespace OutputUtils { // Helper struct containing some information about a variable struct VarInfo { public: - static constexpr int VNDIM = MAX_VARIABLE_DIMENSION; + static constexpr const int VNDIM = MAX_VARIABLE_DIMENSION; std::string label; int num_components; int tensor_rank; // 0- to 3-D for cell-centered variables, 0- to 6-D for arbitrary shape diff --git a/src/outputs/restart_hdf5.cpp b/src/outputs/restart_hdf5.cpp index cababddbfafe..8078fbd5fa9a 100644 --- a/src/outputs/restart_hdf5.cpp +++ b/src/outputs/restart_hdf5.cpp @@ -3,7 +3,7 @@ // Copyright(C) 2020-2024 The Parthenon collaboration // Licensed under the 3-clause BSD License, see LICENSE file for details //======================================================================================== -// (C) (or copyright) 2020-2021. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -213,7 +213,7 @@ void RestartReaderHDF5::ReadBlocks(const std::string &name, IndexRange range, #else // HDF5 enabled auto hdl = OpenDataset(name); - const int VNDIM = info.VNDIM; + constexpr int VNDIM = OutputUtils::VarInfo::VNDIM; /** Select hyperslab in dataset **/ int total_dim = 0; diff --git a/src/utils/hash.hpp b/src/utils/hash.hpp index 77642a64dba3..4f1ff6557c74 100644 --- a/src/utils/hash.hpp +++ b/src/utils/hash.hpp @@ -31,9 +31,8 @@ std::size_t hash_combine(std::size_t lhs, const T &v, Rest &&...rest) { lhs ^= rhs + 0x9e3779b9 + (lhs << 6) + (lhs >> 2); if constexpr (sizeof...(Rest) > 0) { return hash_combine(lhs, std::forward(rest)...); - } else { - return lhs; } + return lhs; } template ::value - 1> diff --git a/src/utils/sort.hpp b/src/utils/sort.hpp index 97e9c77a88e4..a4ab8139dab3 100644 --- a/src/utils/sort.hpp +++ b/src/utils/sort.hpp @@ -61,7 +61,7 @@ void sort(ParArray1D data, KeyComparator comparator, size_t min_idx, size_t max_idx) { PARTHENON_DEBUG_REQUIRE(min_idx < data.extent(0), "Invalid minimum sort index!"); PARTHENON_DEBUG_REQUIRE(max_idx < data.extent(0), "Invalid maximum sort index!"); -#ifdef KOKKOS_ENABLE_CUDA +#if defined(KOKKOS_ENABLE_CUDA) #ifdef __clang__ PARTHENON_FAIL("sort is using thrust and there exists an incompatibility with clang, " "see https://github.com/lanl/parthenon/issues/647 for more details. We " @@ -74,6 +74,13 @@ void sort(ParArray1D data, KeyComparator comparator, size_t min_idx, thrust::device_ptr last_d = thrust::device_pointer_cast(data.data()) + max_idx + 1; thrust::sort(first_d, last_d, comparator); #endif +#elif defined(KOKKOS_ENABLE_HIP) + auto data_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), data); + std::sort(data_h.data() + min_idx, data_h.data() + max_idx + 1, comparator); + Kokkos::deep_copy(data, data_h); + // TODO(BRR) With Kokkos 4.4, switch to Kokkos::sort + // auto sub_data = Kokkos::subview(data, std::make_pair(min_idx, max_idx + 1)); + // Kokkos::sort(sub_data, comparator); #else if (std::is_same::value) { std::sort(data.data() + min_idx, data.data() + max_idx + 1, comparator); @@ -89,7 +96,7 @@ template void sort(ParArray1D data, size_t min_idx, size_t max_idx) { PARTHENON_DEBUG_REQUIRE(min_idx < data.extent(0), "Invalid minimum sort index!"); PARTHENON_DEBUG_REQUIRE(max_idx < data.extent(0), "Invalid maximum sort index!"); -#ifdef KOKKOS_ENABLE_CUDA +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) #ifdef __clang__ PARTHENON_FAIL("sort is using thrust and there exists an incompatibility with clang, " "see https://github.com/lanl/parthenon/issues/647 for more details. We " @@ -102,6 +109,12 @@ void sort(ParArray1D data, size_t min_idx, size_t max_idx) { thrust::device_ptr last_d = thrust::device_pointer_cast(data.data()) + max_idx + 1; thrust::sort(first_d, last_d); #endif + auto data_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), data); + std::sort(data_h.data() + min_idx, data_h.data() + max_idx + 1); + Kokkos::deep_copy(data, data_h); + // TODO(BRR) With Kokkos 4.4, switch to Kokkos::sort + // auto sub_data = Kokkos::subview(data, std::make_pair(min_idx, max_idx + 1)); + // Kokkos::sort(sub_data); #else if (std::is_same::value) { std::sort(data.data() + min_idx, data.data() + max_idx + 1);