From 8f7da47ec832e71462a18cb03e88407c74c96eeb Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 11 Jun 2024 16:37:00 -0600 Subject: [PATCH 01/29] Make things work for sparse variables --- example/fine_advection/advection_package.hpp | 18 +++--- example/fine_advection/stokes.hpp | 62 ++++++++++++-------- 2 files changed, 48 insertions(+), 32 deletions(-) diff --git a/example/fine_advection/advection_package.hpp b/example/fine_advection/advection_package.hpp index 87e0c0b2edab..9dad47446fc0 100644 --- a/example/fine_advection/advection_package.hpp +++ b/example/fine_advection/advection_package.hpp @@ -79,13 +79,17 @@ TaskStatus CalculateFluxes(pack_desc_t &desc, parthenon::TopologicalElement FACE IndexRange ib = md->GetBoundsI(cl, IndexDomain::interior, FACE); IndexRange jb = md->GetBoundsJ(cl, IndexDomain::interior, FACE); IndexRange kb = md->GetBoundsK(cl, IndexDomain::interior, FACE); - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, pack.GetLowerBoundHost(0), - pack.GetUpperBoundHost(0), // Warning: only works for dense variables - kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int l, const int k, const int j, const int i) { - // Calculate the flux using upwind donor cell reconstruction - pack.flux(b, FACE, l, k, j, i) = v * pack(b, l, k + koff, j + joff, i + ioff); + constexpr int scratch_size = 0; + constexpr int scratch_level = 1; + parthenon::par_for_outer( + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, kb.s, kb.e, + KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + for (int l = pack.GetLowerBound(b); l <= pack.GetUpperBound(b); ++l) { + parthenon::par_for_inner(member, jb.s, jb.e, ib.s, ib.e, [&](const int j, const int i) { + // Calculate the flux using upwind donor cell reconstruction + pack.flux(b, FACE, l, k, j, i) = v * pack(b, l, k + koff, j + joff, i + ioff); + }); + } }); return TaskStatus::complete; } diff --git a/example/fine_advection/stokes.hpp b/example/fine_advection/stokes.hpp index 3e2d2f42f54b..b78ca2af83d7 100644 --- a/example/fine_advection/stokes.hpp +++ b/example/fine_advection/stokes.hpp @@ -35,14 +35,18 @@ TaskStatus WeightedSumDataElement(parthenon::CellLevel cl, IndexRange ib = in1->GetBoundsI(cl, IndexDomain::entire, te); IndexRange jb = in1->GetBoundsJ(cl, IndexDomain::entire, te); IndexRange kb = in1->GetBoundsK(cl, IndexDomain::entire, te); - - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack1.GetNBlocks() - 1, pack1.GetLowerBoundHost(0), - pack1.GetUpperBoundHost(0), // This is safe for dense vars - kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int l, const int k, const int j, const int i) { - pack_out(b, te, l, k, j, i) = - w1 * pack1(b, te, l, k, j, i) + w2 * pack2(b, te, l, k, j, i); + + constexpr int scratch_size = 0; + constexpr int scratch_level = 1; + parthenon::par_for_outer( + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack1.GetNBlocks() - 1, kb.s, kb.e, + KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + for (int l = pack1.GetLowerBound(b); l <= pack1.GetUpperBound(b); ++l) { + parthenon::par_for_inner(member, jb.s, jb.e, ib.s, ib.e, [&](const int j, const int i) { + pack_out(b, te, l, k, j, i) = + w1 * pack1(b, te, l, k, j, i) + w2 * pack2(b, te, l, k, j, i); + }); + } }); return TaskStatus::complete; } @@ -65,12 +69,16 @@ void StokesZero(parthenon::CellLevel cl, parthenon::TopologicalElement TeVar, IndexRange jb = out->GetBoundsJ(cl, IndexDomain::interior, TeVar); IndexRange kb = out->GetBoundsK(cl, IndexDomain::interior, TeVar); - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack_out.GetNBlocks() - 1, pack_out.GetLowerBoundHost(0), - pack_out.GetUpperBoundHost(0), // This is safe for dense vars only - kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int l, const int k, const int j, const int i) { - pack_out(b, TeVar, l, k, j, i) = 0.0; + constexpr int scratch_size = 0; + constexpr int scratch_level = 1; + parthenon::par_for_outer( + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack_out.GetNBlocks() - 1, kb.s, kb.e, + KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + for (int l = pack_out.GetLowerBound(b); l <= pack_out.GetUpperBound(b); ++l) { + parthenon::par_for_inner(member, jb.s, jb.e, ib.s, ib.e, [&](const int j, const int i) { + pack_out(b, TeVar, l, k, j, i) = 0.0; + }); + } }); } @@ -95,18 +103,22 @@ void StokesComponent(Real fac, parthenon::CellLevel cl, koff = ndim > 2 ? koff : 0; joff = ndim > 1 ? joff : 0; - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack_in.GetNBlocks() - 1, pack_in.GetLowerBoundHost(0), - pack_in.GetUpperBoundHost(0), // This is safe for dense vars only - kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int l, const int k, const int j, const int i) { + constexpr int scratch_size = 0; + constexpr int scratch_level = 1; + parthenon::par_for_outer( + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack_out.GetNBlocks() - 1, kb.s, kb.e, + KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { auto &coords = pack_in.GetCoordinates(b); - pack_out(b, TeVar, l, k, j, i) += - fac * - (coords.Volume(cl, TeFlux, k, j, i) * pack_in.flux(b, TeFlux, l, k, j, i) - - coords.Volume(cl, TeFlux, k + koff, j + joff, i + ioff) * - pack_in.flux(b, TeFlux, l, k + koff, j + joff, i + ioff)) / - coords.Volume(cl, TeVar, k, j, i); + for (int l = pack_out.GetLowerBound(b); l <= pack_out.GetUpperBound(b); ++l) { + parthenon::par_for_inner(member, jb.s, jb.e, ib.s, ib.e, [&](const int j, const int i) { + pack_out(b, TeVar, l, k, j, i) += + fac * + (coords.Volume(cl, TeFlux, k, j, i) * pack_in.flux(b, TeFlux, l, k, j, i) - + coords.Volume(cl, TeFlux, k + koff, j + joff, i + ioff) * + pack_in.flux(b, TeFlux, l, k + koff, j + joff, i + ioff)) / + coords.Volume(cl, TeVar, k, j, i); + }); + } }); } From 62c0c7adcf120e05dbe3f91821f8f1b4b567ebb4 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 11 Jun 2024 16:37:14 -0600 Subject: [PATCH 02/29] Make scalar advection sparse --- example/fine_advection/advection_driver.cpp | 3 ++- example/fine_advection/advection_package.cpp | 9 ++++++--- example/fine_advection/parthenon_app_inputs.cpp | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/example/fine_advection/advection_driver.cpp b/example/fine_advection/advection_driver.cpp index 6aa226ff68e5..3c6c43521371 100644 --- a/example/fine_advection/advection_driver.cpp +++ b/example/fine_advection/advection_driver.cpp @@ -162,7 +162,8 @@ TaskCollection AdvectionDriver::MakeTaskCollection(BlockList_t &blocks, const in tl.AddTask(boundaries, parthenon::Update::FillDerived>, mc1.get()); if (stage == integrator->nstages) { - auto new_dt = tl.AddTask(fill_derived, EstimateTimestep>, mc1.get()); + auto dealloc = tl.AddTask(fill_derived, SparseDealloc, mc1.get()); + auto new_dt = tl.AddTask(dealloc, EstimateTimestep>, mc1.get()); if (pmesh->adaptive) { auto tag_refine = tl.AddTask(new_dt, parthenon::Refinement::Tag>, mc1.get()); diff --git a/example/fine_advection/advection_package.cpp b/example/fine_advection/advection_package.cpp index d44dc65a3425..b6376263b8f5 100644 --- a/example/fine_advection/advection_package.cpp +++ b/example/fine_advection/advection_package.cpp @@ -69,13 +69,16 @@ std::shared_ptr Initialize(ParameterInput *pin) { pkg->AddField( Metadata({Metadata::Cell, Metadata::Fine, Metadata::Independent, Metadata::WithFluxes, Metadata::FillGhost})); + + Metadata m({Metadata::Cell, Metadata::Independent, + Metadata::WithFluxes, Metadata::FillGhost, Metadata::Sparse}); + m.SetSparseThresholds(1.e-6, 5.e-7, 0.0); + pkg->AddSparsePool(m, std::vector{0}); - pkg->AddField(Metadata({Metadata::Cell, Metadata::Independent, - Metadata::WithFluxes, Metadata::FillGhost})); pkg->AddField( Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy})); - Metadata m( + m = Metadata( {Metadata::Face, Metadata::Independent, Metadata::WithFluxes, Metadata::FillGhost}); m.RegisterRefinementOpscoords; using namespace advection_package::Conserved; + pmb->AllocSparseID(phi::name(), 0); static auto desc = parthenon::MakePackDescriptor(data.get()); auto pack = desc.GetPack(data.get()); @@ -67,7 +68,6 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) { if (profile == "hard_sphere") profile_type = 2; if (profile == "block") profile_type = 3; Real amp = 1.0; - const int b = 0; const int ndim = pmb->pmy_mesh->ndim; const int nghost = parthenon::Globals::nghost; From cc87aaf985a23fc694aed261a64c98460fbe295b Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 11 Jun 2024 17:27:22 -0600 Subject: [PATCH 03/29] Fix restarts with sparse and fine fields --- src/parthenon_manager.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/parthenon_manager.cpp b/src/parthenon_manager.cpp index cca701ae64be..d3c5cb7f862e 100644 --- a/src/parthenon_manager.cpp +++ b/src/parthenon_manager.cpp @@ -289,7 +289,7 @@ void ParthenonManager::RestartPackages(Mesh &rm, RestartReader &resfile) { // Allocate space based on largest vector int max_vlen = 1; int num_sparse = 0; - size_t nCells = 1; + size_t max_nCells = 1; for (const auto &v_info : all_vars_info) { const auto &label = v_info.label; @@ -314,7 +314,7 @@ void ParthenonManager::RestartPackages(Mesh &rm, RestartReader &resfile) { bsize.push_back(out_jb.e - out_jb.s + 1); bsize.push_back(out_kb.e - out_kb.s + 1); - nCells = std::max(nCells, bsize[0] * bsize[1] * bsize[2]); + max_nCells = std::max(max_nCells, bsize[0] * bsize[1] * bsize[2]); } // make sure we have all sparse variables that are in the restart file @@ -322,7 +322,7 @@ void ParthenonManager::RestartPackages(Mesh &rm, RestartReader &resfile) { num_sparse == sparse_info.num_sparse, "Mismatch between sparse fields in simulation and restart file"); - std::vector tmp(static_cast(nb) * nCells * max_vlen); + std::vector tmp(static_cast(nb) * max_nCells * max_vlen); for (const auto &v_info : all_vars_info) { const auto vlen = v_info.num_components; const auto &label = v_info.label; @@ -352,6 +352,12 @@ void ParthenonManager::RestartPackages(Mesh &rm, RestartReader &resfile) { pmb->meshblock_data.Get()->GetVarPtr(label)->dealloc_count = dealloc_count; } else { // nothing to read for this block, advance reading index + IndexRange out_ib = v_info.cellbounds.GetBoundsI(theDomain); + IndexRange out_jb = v_info.cellbounds.GetBoundsJ(theDomain); + IndexRange out_kb = v_info.cellbounds.GetBoundsK(theDomain); + int nCells = (out_ib.e - out_ib.s + 1); + nCells *= (out_jb.e - out_jb.s + 1); + nCells *= (out_kb.e - out_kb.s + 1); index += nCells * vlen; continue; } From 6add08be955315dd9f67722f61f51342edd179e7 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 11 Jun 2024 19:07:13 -0600 Subject: [PATCH 04/29] possibly fix some restart stuff --- src/outputs/restart_hdf5.cpp | 2 -- src/parthenon_manager.cpp | 10 ++++------ 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/outputs/restart_hdf5.cpp b/src/outputs/restart_hdf5.cpp index 0740b7d74311..de837ff89122 100644 --- a/src/outputs/restart_hdf5.cpp +++ b/src/outputs/restart_hdf5.cpp @@ -227,8 +227,6 @@ void RestartReaderHDF5::ReadBlocks(const std::string &name, IndexRange range, "Buffer (size " + std::to_string(dataVec.size()) + ") is too small for dataset " + name + " (size " + std::to_string(total_count) + ")"); - PARTHENON_HDF5_CHECK( - H5Sselect_hyperslab(hdl.dataspace, H5S_SELECT_SET, offset, NULL, count, NULL)); const H5S memspace = H5S::FromHIDCheck(H5Screate_simple(total_dim, count, NULL)); PARTHENON_HDF5_CHECK( diff --git a/src/parthenon_manager.cpp b/src/parthenon_manager.cpp index d3c5cb7f862e..50070e8856b7 100644 --- a/src/parthenon_manager.cpp +++ b/src/parthenon_manager.cpp @@ -287,9 +287,8 @@ void ParthenonManager::RestartPackages(Mesh &rm, RestartReader &resfile) { } // Allocate space based on largest vector - int max_vlen = 1; int num_sparse = 0; - size_t max_nCells = 1; + size_t max_size = 1; for (const auto &v_info : all_vars_info) { const auto &label = v_info.label; @@ -304,7 +303,6 @@ void ParthenonManager::RestartPackages(Mesh &rm, RestartReader &resfile) { "Dense field " + label + " is marked as sparse in restart file"); } - max_vlen = std::max(max_vlen, v_info.num_components); IndexRange out_ib = v_info.cellbounds.GetBoundsI(theDomain); IndexRange out_jb = v_info.cellbounds.GetBoundsJ(theDomain); IndexRange out_kb = v_info.cellbounds.GetBoundsK(theDomain); @@ -314,7 +312,7 @@ void ParthenonManager::RestartPackages(Mesh &rm, RestartReader &resfile) { bsize.push_back(out_jb.e - out_jb.s + 1); bsize.push_back(out_kb.e - out_kb.s + 1); - max_nCells = std::max(max_nCells, bsize[0] * bsize[1] * bsize[2]); + max_size = std::max(max_size, v_info.TensorSize() * bsize[0] * bsize[1] * bsize[2]); } // make sure we have all sparse variables that are in the restart file @@ -322,9 +320,9 @@ void ParthenonManager::RestartPackages(Mesh &rm, RestartReader &resfile) { num_sparse == sparse_info.num_sparse, "Mismatch between sparse fields in simulation and restart file"); - std::vector tmp(static_cast(nb) * max_nCells * max_vlen); + std::vector tmp(static_cast(nb) * max_size); for (const auto &v_info : all_vars_info) { - const auto vlen = v_info.num_components; + const auto vlen = v_info.TensorSize(); const auto &label = v_info.label; if (Globals::my_rank == 0) { From 6868e3f1e1848e39535425de96c4a653902551c9 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 12 Jun 2024 11:05:52 -0600 Subject: [PATCH 05/29] work around par_for_inner --- example/fine_advection/advection_package.hpp | 5 ++++- example/fine_advection/stokes.hpp | 13 ++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/example/fine_advection/advection_package.hpp b/example/fine_advection/advection_package.hpp index 9dad47446fc0..776792c30e42 100644 --- a/example/fine_advection/advection_package.hpp +++ b/example/fine_advection/advection_package.hpp @@ -19,6 +19,7 @@ #include #include +#include #include #define VARIABLE(ns, varname) \ @@ -84,8 +85,10 @@ TaskStatus CalculateFluxes(pack_desc_t &desc, parthenon::TopologicalElement FACE parthenon::par_for_outer( PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, kb.s, kb.e, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e}); for (int l = pack.GetLowerBound(b); l <= pack.GetUpperBound(b); ++l) { - parthenon::par_for_inner(member, jb.s, jb.e, ib.s, ib.e, [&](const int j, const int i) { + parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { + const auto [j, i] = idxer(idx); // Calculate the flux using upwind donor cell reconstruction pack.flux(b, FACE, l, k, j, i) = v * pack(b, l, k + koff, j + joff, i + ioff); }); diff --git a/example/fine_advection/stokes.hpp b/example/fine_advection/stokes.hpp index b78ca2af83d7..6c1f02a16b0e 100644 --- a/example/fine_advection/stokes.hpp +++ b/example/fine_advection/stokes.hpp @@ -19,6 +19,7 @@ #include #include +#include namespace advection_example { using namespace parthenon::driver::prelude; @@ -41,8 +42,10 @@ TaskStatus WeightedSumDataElement(parthenon::CellLevel cl, parthenon::par_for_outer( PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack1.GetNBlocks() - 1, kb.s, kb.e, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e}); for (int l = pack1.GetLowerBound(b); l <= pack1.GetUpperBound(b); ++l) { - parthenon::par_for_inner(member, jb.s, jb.e, ib.s, ib.e, [&](const int j, const int i) { + parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { + const auto [j, i] = idxer(idx); pack_out(b, te, l, k, j, i) = w1 * pack1(b, te, l, k, j, i) + w2 * pack2(b, te, l, k, j, i); }); @@ -74,8 +77,10 @@ void StokesZero(parthenon::CellLevel cl, parthenon::TopologicalElement TeVar, parthenon::par_for_outer( PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack_out.GetNBlocks() - 1, kb.s, kb.e, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e}); for (int l = pack_out.GetLowerBound(b); l <= pack_out.GetUpperBound(b); ++l) { - parthenon::par_for_inner(member, jb.s, jb.e, ib.s, ib.e, [&](const int j, const int i) { + parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { + const auto [j, i] = idxer(idx); pack_out(b, TeVar, l, k, j, i) = 0.0; }); } @@ -109,8 +114,10 @@ void StokesComponent(Real fac, parthenon::CellLevel cl, PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack_out.GetNBlocks() - 1, kb.s, kb.e, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { auto &coords = pack_in.GetCoordinates(b); + parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e}); for (int l = pack_out.GetLowerBound(b); l <= pack_out.GetUpperBound(b); ++l) { - parthenon::par_for_inner(member, jb.s, jb.e, ib.s, ib.e, [&](const int j, const int i) { + parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { + const auto [j, i] = idxer(idx); pack_out(b, TeVar, l, k, j, i) += fac * (coords.Volume(cl, TeFlux, k, j, i) * pack_in.flux(b, TeFlux, l, k, j, i) - From 71d5aeb9c7704c46a9d36d7ea10bc33ecdfe8fe5 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Wed, 12 Jun 2024 11:09:36 -0600 Subject: [PATCH 06/29] format --- example/fine_advection/advection_package.cpp | 6 +++--- example/fine_advection/advection_package.hpp | 4 ++-- example/fine_advection/stokes.hpp | 17 +++++++++-------- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/example/fine_advection/advection_package.cpp b/example/fine_advection/advection_package.cpp index b6376263b8f5..75d2aaa64091 100644 --- a/example/fine_advection/advection_package.cpp +++ b/example/fine_advection/advection_package.cpp @@ -69,9 +69,9 @@ std::shared_ptr Initialize(ParameterInput *pin) { pkg->AddField( Metadata({Metadata::Cell, Metadata::Fine, Metadata::Independent, Metadata::WithFluxes, Metadata::FillGhost})); - - Metadata m({Metadata::Cell, Metadata::Independent, - Metadata::WithFluxes, Metadata::FillGhost, Metadata::Sparse}); + + Metadata m({Metadata::Cell, Metadata::Independent, Metadata::WithFluxes, + Metadata::FillGhost, Metadata::Sparse}); m.SetSparseThresholds(1.e-6, 5.e-7, 0.0); pkg->AddSparsePool(m, std::vector{0}); diff --git a/example/fine_advection/advection_package.hpp b/example/fine_advection/advection_package.hpp index 776792c30e42..b2a1b6aeab8e 100644 --- a/example/fine_advection/advection_package.hpp +++ b/example/fine_advection/advection_package.hpp @@ -83,8 +83,8 @@ TaskStatus CalculateFluxes(pack_desc_t &desc, parthenon::TopologicalElement FACE constexpr int scratch_size = 0; constexpr int scratch_level = 1; parthenon::par_for_outer( - PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, kb.s, kb.e, - KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, kb.s, + kb.e, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e}); for (int l = pack.GetLowerBound(b); l <= pack.GetUpperBound(b); ++l) { parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { diff --git a/example/fine_advection/stokes.hpp b/example/fine_advection/stokes.hpp index 6c1f02a16b0e..93426c26f11f 100644 --- a/example/fine_advection/stokes.hpp +++ b/example/fine_advection/stokes.hpp @@ -36,12 +36,12 @@ TaskStatus WeightedSumDataElement(parthenon::CellLevel cl, IndexRange ib = in1->GetBoundsI(cl, IndexDomain::entire, te); IndexRange jb = in1->GetBoundsJ(cl, IndexDomain::entire, te); IndexRange kb = in1->GetBoundsK(cl, IndexDomain::entire, te); - + constexpr int scratch_size = 0; constexpr int scratch_level = 1; parthenon::par_for_outer( - PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack1.GetNBlocks() - 1, kb.s, kb.e, - KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack1.GetNBlocks() - 1, kb.s, + kb.e, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e}); for (int l = pack1.GetLowerBound(b); l <= pack1.GetUpperBound(b); ++l) { parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { @@ -75,8 +75,8 @@ void StokesZero(parthenon::CellLevel cl, parthenon::TopologicalElement TeVar, constexpr int scratch_size = 0; constexpr int scratch_level = 1; parthenon::par_for_outer( - PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack_out.GetNBlocks() - 1, kb.s, kb.e, - KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack_out.GetNBlocks() - 1, + kb.s, kb.e, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e}); for (int l = pack_out.GetLowerBound(b); l <= pack_out.GetUpperBound(b); ++l) { parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { @@ -111,8 +111,8 @@ void StokesComponent(Real fac, parthenon::CellLevel cl, constexpr int scratch_size = 0; constexpr int scratch_level = 1; parthenon::par_for_outer( - PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack_out.GetNBlocks() - 1, kb.s, kb.e, - KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack_out.GetNBlocks() - 1, + kb.s, kb.e, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { auto &coords = pack_in.GetCoordinates(b); parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e}); for (int l = pack_out.GetLowerBound(b); l <= pack_out.GetUpperBound(b); ++l) { @@ -120,7 +120,8 @@ void StokesComponent(Real fac, parthenon::CellLevel cl, const auto [j, i] = idxer(idx); pack_out(b, TeVar, l, k, j, i) += fac * - (coords.Volume(cl, TeFlux, k, j, i) * pack_in.flux(b, TeFlux, l, k, j, i) - + (coords.Volume(cl, TeFlux, k, j, i) * + pack_in.flux(b, TeFlux, l, k, j, i) - coords.Volume(cl, TeFlux, k + koff, j + joff, i + ioff) * pack_in.flux(b, TeFlux, l, k + koff, j + joff, i + ioff)) / coords.Volume(cl, TeVar, k, j, i); From 005292e1fa114354bf9098fa1ea9483b93a7ce7a Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Mon, 17 Jun 2024 16:10:04 -0600 Subject: [PATCH 07/29] make vec loops work with sparse (partial) --- example/fine_advection/advection_package.hpp | 124 ++++++++++++------- 1 file changed, 77 insertions(+), 47 deletions(-) diff --git a/example/fine_advection/advection_package.hpp b/example/fine_advection/advection_package.hpp index b2a1b6aeab8e..62fcb3b76e05 100644 --- a/example/fine_advection/advection_package.hpp +++ b/example/fine_advection/advection_package.hpp @@ -123,23 +123,32 @@ TaskStatus CalculateVectorFluxes(parthenon::TopologicalElement edge, int koff = edge == TE::E3 ? ndim > 2 : 0; int joff = edge == TE::E2 ? ndim > 1 : 0; int ioff = edge == TE::E1 ? ndim > 0 : 0; - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s - (ndim > 2), - kb.e + (ndim > 2), jb.s - (ndim > 1), jb.e + (ndim > 1), ib.s - 1, ib.e + 1, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { - // Piecewise linear in the orthogonal directions, could do something better here - pack(b, TE::CC, recon(0), k, j, i) = - 0.5 * (pack(b, fe, flux_var(), k, j, i) + - pack(b, fe, flux_var(), k + koff, j + joff, i + ioff)); - pack(b, TE::CC, recon(1), k, j, i) = - 0.5 * (pack(b, fe, flux_var(), k, j, i) + - pack(b, fe, flux_var(), k + koff, j + joff, i + ioff)); - pack(b, TE::CC, recon(2), k, j, i) = - 0.5 * (pack(b, fe, flux_var(), k, j, i) + - pack(b, fe, flux_var(), k + koff, j + joff, i + ioff)); - pack(b, TE::CC, recon(3), k, j, i) = - 0.5 * (pack(b, fe, flux_var(), k, j, i) + - pack(b, fe, flux_var(), k + koff, j + joff, i + ioff)); + constexpr int scratch_size = 0; + constexpr int scratch_level = 1; + parthenon::par_for_outer( + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, + kb.s - (ndim > 2), kb.e + (ndim > 2), + KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + parthenon::Indexer2D idxer({jb.s - (ndim > 1), jb.e + (ndim > 1)}, + {ib.s - (ndim > 0), ib.e + (ndim > 0)}); + if (pack.GetLowerBound(b, flux_var()) <= pack.GetUpperBound(b, flux_var())) { + parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { + const auto [j, i] = idxer(idx); + // Piecewise linear in the orthogonal directions, could do something better here + pack(b, TE::CC, recon(0), k, j, i) = + 0.5 * (pack(b, fe, flux_var(), k, j, i) + + pack(b, fe, flux_var(), k + koff, j + joff, i + ioff)); + pack(b, TE::CC, recon(1), k, j, i) = + 0.5 * (pack(b, fe, flux_var(), k, j, i) + + pack(b, fe, flux_var(), k + koff, j + joff, i + ioff)); + pack(b, TE::CC, recon(2), k, j, i) = + 0.5 * (pack(b, fe, flux_var(), k, j, i) + + pack(b, fe, flux_var(), k + koff, j + joff, i + ioff)); + pack(b, TE::CC, recon(3), k, j, i) = + 0.5 * (pack(b, fe, flux_var(), k, j, i) + + pack(b, fe, flux_var(), k + koff, j + joff, i + ioff)); + }); + } }); // 2. Calculate the quartant averaged flux @@ -150,20 +159,27 @@ TaskStatus CalculateVectorFluxes(parthenon::TopologicalElement edge, ib = md->GetBoundsI(cl, IndexDomain::interior, edge); jb = md->GetBoundsJ(cl, IndexDomain::interior, edge); kb = md->GetBoundsK(cl, IndexDomain::interior, edge); - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { - pack.flux(b, edge, var(), k, j, i) = 0.0; - for (int dk = -koff; dk < 1; ++dk) - for (int dj = -joff; dj < 1; ++dj) - for (int di = -ioff; di < 1; ++di) { - // TODO(LFR): Pick out the correct component of the reconstructed flux, - // current version is not an issue for piecewise constant flux though. - pack.flux(b, edge, var(), k, j, i) += - pack(b, TE::CC, recon(0), k + dk, j + dj, i + di); - } - pack.flux(b, edge, var(), k, j, i) /= npoints; - pack.flux(b, edge, var(), k, j, i) *= fac; + parthenon::par_for_outer( + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, + kb.s, kb.e, + KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e}); + if (pack.GetLowerBound(b, var()) <= pack.GetUpperBound(b, var())) { + parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { + const auto [j, i] = idxer(idx); + pack.flux(b, edge, var(), k, j, i) = 0.0; + for (int dk = -koff; dk < 1; ++dk) + for (int dj = -joff; dj < 1; ++dj) + for (int di = -ioff; di < 1; ++di) { + // TODO(LFR): Pick out the correct component of the reconstructed flux, + // current version is not an issue for piecewise constant flux though. + pack.flux(b, edge, var(), k, j, i) += + pack(b, TE::CC, recon(0), k + dk, j + dj, i + di); + } + pack.flux(b, edge, var(), k, j, i) /= npoints; + pack.flux(b, edge, var(), k, j, i) *= fac; + }); + } }); // 3. Reconstruct the transverse components of the advected field at the edge @@ -174,13 +190,20 @@ TaskStatus CalculateVectorFluxes(parthenon::TopologicalElement edge, ib = md->GetBoundsI(cl, IndexDomain::interior, f); jb = md->GetBoundsJ(cl, IndexDomain::interior, f); kb = md->GetBoundsK(cl, IndexDomain::interior, f); - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s - (ndim > 2), - kb.e + (ndim > 2), jb.s - (ndim > 1), jb.e + (ndim > 1), ib.s - 1, ib.e + 1, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { - // Piecewise linear in the orthogonal directions, could do something better here - pack(b, f, recon_f(0), k, j, i) = pack(b, f, var(), k, j, i); - pack(b, f, recon_f(1), k, j, i) = pack(b, f, var(), k, j, i); + parthenon::par_for_outer( + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, + kb.s - (ndim > 2), kb.e + (ndim > 2), + KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + parthenon::Indexer2D idxer({jb.s - (ndim > 1), jb.e + (ndim > 1)}, + {ib.s - (ndim > 0), ib.e + (ndim > 0)}); + if (pack.GetLowerBound(b, var()) <= pack.GetUpperBound(b, var())) { + parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { + const auto [j, i] = idxer(idx); + // Piecewise linear in the orthogonal directions, could do something better here + pack(b, f, recon_f(0), k, j, i) = pack(b, f, var(), k, j, i); + pack(b, f, recon_f(1), k, j, i) = pack(b, f, var(), k, j, i); + }); + } }); } @@ -196,15 +219,22 @@ TaskStatus CalculateVectorFluxes(parthenon::TopologicalElement edge, d1[static_cast(f1) % 3] = 0; d2[static_cast(edge) % 3] = 0; d2[static_cast(f2) % 3] = 0; - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { - pack.flux(b, edge, var(), k, j, i) += - 0.5 * (pack(b, f1, recon_f(0), k, j, i) - - pack(b, f1, recon_f(1), k - d1[2], j - d1[1], i - d1[0])); - pack.flux(b, edge, var(), k, j, i) -= - 0.5 * (pack(b, f2, recon_f(0), k, j, i) - - pack(b, f2, recon_f(1), k - d2[2], j - d2[1], i - d2[0])); + parthenon::par_for_outer( + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, + kb.s, kb.e, + KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e}); + if (pack.GetLowerBound(b, var()) <= pack.GetUpperBound(b, var())) { + parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { + const auto [j, i] = idxer(idx); + pack.flux(b, edge, var(), k, j, i) += + 0.5 * (pack(b, f1, recon_f(0), k, j, i) - + pack(b, f1, recon_f(1), k - d1[2], j - d1[1], i - d1[0])); + pack.flux(b, edge, var(), k, j, i) -= + 0.5 * (pack(b, f2, recon_f(0), k, j, i) - + pack(b, f2, recon_f(1), k - d2[2], j - d2[1], i - d2[0])); + }); + } }); // Add in the diffusive component From acc3bce540f921bf98f78d2f5c37da058ea5cd51 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Mon, 14 Oct 2024 19:19:56 -0600 Subject: [PATCH 08/29] fix pack bug --- example/fine_advection/advection_package.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/example/fine_advection/advection_package.cpp b/example/fine_advection/advection_package.cpp index 1e50505cc42d..c597af6284b7 100644 --- a/example/fine_advection/advection_package.cpp +++ b/example/fine_advection/advection_package.cpp @@ -116,14 +116,15 @@ AmrTag CheckRefinement(MeshBlockData *rc) { typename Kokkos::MinMax::value_type minmax; parthenon::par_reduce( parthenon::loop_pattern_mdrange_tag, PARTHENON_AUTO_LABEL, DevExecSpace(), + 0, pack.GetNBlocks() - 1, // Runs from [0, 0] since pack built from MeshBlockData pack.GetLowerBoundHost(0), pack.GetUpperBoundHost(0), kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int n, const int k, const int j, const int i, + KOKKOS_LAMBDA(const int b, const int n, const int k, const int j, const int i, typename Kokkos::MinMax::value_type &lminmax) { lminmax.min_val = - (pack(n, k, j, i) < lminmax.min_val ? pack(n, k, j, i) : lminmax.min_val); + (pack(b, n, k, j, i) < lminmax.min_val ? pack(b, n, k, j, i) : lminmax.min_val); lminmax.max_val = - (pack(n, k, j, i) > lminmax.max_val ? pack(n, k, j, i) : lminmax.max_val); + (pack(b, n, k, j, i) > lminmax.max_val ? pack(b, n, k, j, i) : lminmax.max_val); }, Kokkos::MinMax(minmax)); From 1dd0e5c5d29f2709dda9b254cd88bf84bdd0ca11 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 15 Oct 2024 14:39:08 -0600 Subject: [PATCH 09/29] fix bug and add options for sizing regular advection fields --- example/fine_advection/advection_driver.cpp | 56 ++-- example/fine_advection/advection_package.cpp | 277 ++++++++++-------- .../fine_advection/parthenon_app_inputs.cpp | 111 +++---- 3 files changed, 255 insertions(+), 189 deletions(-) diff --git a/example/fine_advection/advection_driver.cpp b/example/fine_advection/advection_driver.cpp index c960bba0045f..4f3ec5a50213 100644 --- a/example/fine_advection/advection_driver.cpp +++ b/example/fine_advection/advection_driver.cpp @@ -21,6 +21,7 @@ #include "amr_criteria/refinement_package.hpp" #include "bvals/comms/bvals_in_one.hpp" #include "interface/metadata.hpp" +#include "interface/state_descriptor.hpp" #include "interface/update.hpp" #include "mesh/meshblock_pack.hpp" #include "parthenon/driver.hpp" @@ -71,6 +72,11 @@ TaskCollection AdvectionDriver::MakeTaskCollection(BlockList_t &blocks, const in using namespace parthenon::Update; TaskCollection tc; TaskID none(0); + + std::shared_ptr pkg = pmesh->packages.Get("advection_package"); + const auto do_regular_advection = pkg->Param("do_regular_advection"); + const auto do_fine_advection = pkg->Param("do_fine_advection"); + const auto do_CT_advection = pkg->Param("do_CT_advection"); // Build MeshBlockData containers that will be included in MeshData containers. It is // gross that this has to be done by hand. @@ -118,35 +124,47 @@ TaskCollection AdvectionDriver::MakeTaskCollection(BlockList_t &blocks, const in auto flx = none; auto flx_fine = none; for (auto face : faces) { - flx = flx | tl.AddTask(none, advection_package::CalculateFluxes, desc, - face, parthenon::CellLevel::same, mc0.get()); - flx_fine = flx_fine | - tl.AddTask(none, advection_package::CalculateFluxes, - desc_fine, face, parthenon::CellLevel::fine, mc0.get()); + if (do_regular_advection) { + flx = flx | tl.AddTask(none, advection_package::CalculateFluxes, desc, + face, parthenon::CellLevel::same, mc0.get()); + } + if (do_fine_advection) { + flx_fine = flx_fine | + tl.AddTask(none, advection_package::CalculateFluxes, + desc_fine, face, parthenon::CellLevel::fine, mc0.get()); + } } auto vf_dep = none; - for (auto edge : std::vector{TE::E1, TE::E2, TE::E3}) { - vf_dep = tl.AddTask(vf_dep, advection_package::CalculateVectorFluxes, edge, - parthenon::CellLevel::same, 1.0, mc0.get()); - vf_dep = tl.AddTask(vf_dep, advection_package::CalculateVectorFluxes, edge, - parthenon::CellLevel::same, -1.0, mc0.get()); + if (do_CT_advection) { + for (auto edge : std::vector{TE::E1, TE::E2, TE::E3}) { + vf_dep = tl.AddTask(vf_dep, advection_package::CalculateVectorFluxes, edge, + parthenon::CellLevel::same, 1.0, mc0.get()); + vf_dep = tl.AddTask(vf_dep, advection_package::CalculateVectorFluxes, edge, + parthenon::CellLevel::same, -1.0, mc0.get()); + } } auto set_flx = parthenon::AddFluxCorrectionTasks( start_flxcor | flx | flx_fine | vf_dep, tl, mc0, pmesh->multilevel); - auto update = - AddUpdateTasks(set_flx, tl, parthenon::CellLevel::same, TT::Cell, beta, dt, desc, - mbase.get(), mc0.get(), mdudt.get(), mc1.get()); + auto update = set_flx; + if (do_regular_advection) { + update = AddUpdateTasks(set_flx, tl, parthenon::CellLevel::same, TT::Cell, beta, dt, desc, + mbase.get(), mc0.get(), mdudt.get(), mc1.get()); + } - auto update_fine = - AddUpdateTasks(set_flx, tl, parthenon::CellLevel::fine, TT::Cell, beta, dt, - desc_fine, mbase.get(), mc0.get(), mdudt.get(), mc1.get()); + auto update_fine = set_flx; + if (do_fine_advection) { + update_fine = AddUpdateTasks(set_flx, tl, parthenon::CellLevel::fine, TT::Cell, beta, dt, + desc_fine, mbase.get(), mc0.get(), mdudt.get(), mc1.get()); + } - auto update_vec = - AddUpdateTasks(set_flx, tl, parthenon::CellLevel::same, TT::Face, beta, dt, - desc_vec, mbase.get(), mc0.get(), mdudt.get(), mc1.get()); + auto update_vec = set_flx; + if (do_CT_advection) { + update_vec = AddUpdateTasks(set_flx, tl, parthenon::CellLevel::same, TT::Face, beta, dt, + desc_vec, mbase.get(), mc0.get(), mdudt.get(), mc1.get()); + } auto boundaries = parthenon::AddBoundaryExchangeTasks( update | update_vec | update_fine | start_send, tl, mc1, pmesh->multilevel); diff --git a/example/fine_advection/advection_package.cpp b/example/fine_advection/advection_package.cpp index c597af6284b7..2d5dc62795d7 100644 --- a/example/fine_advection/advection_package.cpp +++ b/example/fine_advection/advection_package.cpp @@ -65,37 +65,57 @@ std::shared_ptr Initialize(ParameterInput *pin) { } pkg->AddParam<>("profile", profile_str); - pkg->AddField( - Metadata({Metadata::Cell, Metadata::Fine, Metadata::Independent, - Metadata::WithFluxes, Metadata::FillGhost})); - - Metadata m({Metadata::Cell, Metadata::Independent, Metadata::WithFluxes, - Metadata::FillGhost, Metadata::Sparse}); - m.SetSparseThresholds(1.e-6, 5.e-7, 0.0); - pkg->AddSparsePool(m, std::vector{0}); - - pkg->AddField( - Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy})); - - m = Metadata( - {Metadata::Face, Metadata::Independent, Metadata::WithFluxes, Metadata::FillGhost}); - m.RegisterRefinementOps(); - pkg->AddField(m); - pkg->AddField(m); - pkg->AddField(Metadata( - {Metadata::Cell, Metadata::Derived, Metadata::OneCopy}, std::vector{4})); - pkg->AddField(Metadata( - {Metadata::Face, Metadata::Derived, Metadata::OneCopy}, std::vector{2})); - pkg->AddField(Metadata( - {Metadata::Cell, Metadata::Derived, Metadata::OneCopy}, std::vector{3})); - pkg->AddField(Metadata( - {Metadata::Cell, Metadata::Derived, Metadata::OneCopy}, std::vector{3})); - pkg->AddField( - Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy})); - pkg->AddField( - Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy})); + + bool do_regular_advection = pin->GetOrAddBoolean("Advection", "do_regular_advection", true); + pkg->AddParam<>("do_regular_advection", do_regular_advection); + if (do_regular_advection) { + int shape_size = pin->GetOrAddInteger("Advection", "shape_size", 1); + int sparse_size = pin->GetOrAddInteger("Advection", "sparse_size", 1); + Real alloc_threshold = pin->GetOrAddReal("Advection", "alloc_threshold", 1.e-6); + Real dealloc_threshold = pin->GetOrAddReal("Advection", "dealloc_threshold", 5.e-7); + Metadata m({Metadata::Cell, Metadata::Independent, Metadata::WithFluxes, + Metadata::FillGhost, Metadata::Sparse}, std::vector{shape_size}); + m.SetSparseThresholds(alloc_threshold, dealloc_threshold, 0.0); + std::vector sparse_idxs(sparse_size); + std::iota(sparse_idxs.begin(), sparse_idxs.end(), 0); + pkg->AddSparsePool(m, sparse_idxs); + } + + + bool do_fine_advection = pin->GetOrAddBoolean("Advection", "do_fine_advection", true); + pkg->AddParam<>("do_fine_advection", do_fine_advection); + if (do_fine_advection) { + pkg->AddField( + Metadata({Metadata::Cell, Metadata::Fine, Metadata::Independent, + Metadata::WithFluxes, Metadata::FillGhost})); + + pkg->AddField( + Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy})); + } + + bool do_CT_advection = pin->GetOrAddBoolean("Advection", "do_CT_advection", true); + pkg->AddParam<>("do_CT_advection", do_CT_advection); + if (do_CT_advection) { + auto m = Metadata( + {Metadata::Face, Metadata::Independent, Metadata::WithFluxes, Metadata::FillGhost}); + m.RegisterRefinementOps(); + pkg->AddField(m); + pkg->AddField(m); + pkg->AddField(Metadata( + {Metadata::Cell, Metadata::Derived, Metadata::OneCopy}, std::vector{4})); + pkg->AddField(Metadata( + {Metadata::Face, Metadata::Derived, Metadata::OneCopy}, std::vector{2})); + pkg->AddField(Metadata( + {Metadata::Cell, Metadata::Derived, Metadata::OneCopy}, std::vector{3})); + pkg->AddField(Metadata( + {Metadata::Cell, Metadata::Derived, Metadata::OneCopy}, std::vector{3})); + pkg->AddField( + Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy})); + pkg->AddField( + Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy})); + } pkg->CheckRefinementBlock = CheckRefinement; pkg->EstimateTimestepMesh = EstimateTimestep; @@ -104,36 +124,41 @@ std::shared_ptr Initialize(ParameterInput *pin) { } AmrTag CheckRefinement(MeshBlockData *rc) { - // refine on advected, for example. could also be a derived quantity - static auto desc = parthenon::MakePackDescriptor(rc); - auto pack = desc.GetPack(rc); + std::shared_ptr pkg = + rc->GetMeshPointer()->packages.Get("advection_package"); + auto do_regular_advection = pkg->Param("do_regular_advection"); + if (do_regular_advection) { + // refine on advected, for example. could also be a derived quantity + static auto desc = parthenon::MakePackDescriptor(rc); + auto pack = desc.GetPack(rc); - auto pmb = rc->GetBlockPointer(); - IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::entire); - IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::entire); - IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::entire); + auto pmb = rc->GetBlockPointer(); + IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::entire); + IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::entire); + IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::entire); - typename Kokkos::MinMax::value_type minmax; - parthenon::par_reduce( - parthenon::loop_pattern_mdrange_tag, PARTHENON_AUTO_LABEL, DevExecSpace(), - 0, pack.GetNBlocks() - 1, // Runs from [0, 0] since pack built from MeshBlockData - pack.GetLowerBoundHost(0), pack.GetUpperBoundHost(0), kb.s, kb.e, jb.s, jb.e, ib.s, - ib.e, - KOKKOS_LAMBDA(const int b, const int n, const int k, const int j, const int i, - typename Kokkos::MinMax::value_type &lminmax) { - lminmax.min_val = - (pack(b, n, k, j, i) < lminmax.min_val ? pack(b, n, k, j, i) : lminmax.min_val); - lminmax.max_val = - (pack(b, n, k, j, i) > lminmax.max_val ? pack(b, n, k, j, i) : lminmax.max_val); - }, - Kokkos::MinMax(minmax)); + typename Kokkos::MinMax::value_type minmax; + parthenon::par_reduce( + parthenon::loop_pattern_mdrange_tag, PARTHENON_AUTO_LABEL, DevExecSpace(), + 0, pack.GetNBlocks() - 1, // Runs from [0, 0] since pack built from MeshBlockData + pack.GetLowerBoundHost(0), pack.GetUpperBoundHost(0), kb.s, kb.e, jb.s, jb.e, ib.s, + ib.e, + KOKKOS_LAMBDA(const int b, const int n, const int k, const int j, const int i, + typename Kokkos::MinMax::value_type &lminmax) { + lminmax.min_val = + (pack(b, n, k, j, i) < lminmax.min_val ? pack(b, n, k, j, i) : lminmax.min_val); + lminmax.max_val = + (pack(b, n, k, j, i) > lminmax.max_val ? pack(b, n, k, j, i) : lminmax.max_val); + }, + Kokkos::MinMax(minmax)); - auto pkg = pmb->packages.Get("advection_package"); - const auto &refine_tol = pkg->Param("refine_tol"); - const auto &derefine_tol = pkg->Param("derefine_tol"); + auto pkg = pmb->packages.Get("advection_package"); + const auto &refine_tol = pkg->Param("refine_tol"); + const auto &derefine_tol = pkg->Param("derefine_tol"); - if (minmax.max_val > refine_tol && minmax.min_val < derefine_tol) return AmrTag::refine; - if (minmax.max_val < derefine_tol) return AmrTag::derefine; + if (minmax.max_val > refine_tol && minmax.min_val < derefine_tol) return AmrTag::refine; + if (minmax.max_val < derefine_tol) return AmrTag::derefine; + } return AmrTag::same; } @@ -178,75 +203,85 @@ TaskStatus FillDerived(MeshData *md) { Conserved::D_cc, Conserved::divC, Conserved::divD>( md); auto pack = desc.GetPack(md); - + + std::shared_ptr pkg = + md->GetMeshPointer()->packages.Get("advection_package"); + IndexRange ib = md->GetBoundsI(IndexDomain::interior); IndexRange jb = md->GetBoundsJ(IndexDomain::interior); IndexRange kb = md->GetBoundsK(IndexDomain::interior); const int ndim = md->GetMeshPointer()->ndim; const int nghost = parthenon::Globals::nghost; - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { - const int kf = (ndim > 2) ? (k - nghost) * 2 + nghost : k; - const int jf = (ndim > 1) ? (j - nghost) * 2 + nghost : j; - const int fi = (ndim > 0) ? (i - nghost) * 2 + nghost : i; - pack(b, Conserved::phi_fine_restricted(), k, j, i) = 0.0; - Real ntot = 0.0; - for (int ioff = 0; ioff <= (ndim > 0); ++ioff) - for (int joff = 0; joff <= (ndim > 1); ++joff) - for (int koff = 0; koff <= (ndim > 2); ++koff) { - ntot += 1.0; - pack(b, Conserved::phi_fine_restricted(), k, j, i) += - pack(b, Conserved::phi_fine(), kf + koff, jf + joff, fi + ioff); - } - pack(b, Conserved::phi_fine_restricted(), k, j, i) /= ntot; - }); - - using TE = parthenon::TopologicalElement; - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { - pack(b, Conserved::C_cc(0), k, j, i) = - 0.5 * (pack(b, TE::F1, Conserved::C(), k, j, i) + - pack(b, TE::F1, Conserved::C(), k, j, i + (ndim > 0))); - pack(b, Conserved::C_cc(1), k, j, i) = - 0.5 * (pack(b, TE::F2, Conserved::C(), k, j, i) + - pack(b, TE::F2, Conserved::C(), k, j + (ndim > 1), i)); - pack(b, Conserved::C_cc(2), k, j, i) = - 0.5 * (pack(b, TE::F3, Conserved::C(), k, j, i) + - pack(b, TE::F3, Conserved::C(), k + (ndim > 2), j, i)); - auto &coords = pack.GetCoordinates(b); - pack(b, Conserved::divC(), k, j, i) = - (pack(b, TE::F1, Conserved::C(), k, j, i + (ndim > 0)) - - pack(b, TE::F1, Conserved::C(), k, j, i)) / - coords.Dxc(k, j, i) + - (pack(b, TE::F2, Conserved::C(), k, j + (ndim > 1), i) - - pack(b, TE::F2, Conserved::C(), k, j, i)) / - coords.Dxc(k, j, i) + - (pack(b, TE::F3, Conserved::C(), k + (ndim > 2), j, i) - - pack(b, TE::F3, Conserved::C(), k, j, i)) / - coords.Dxc(k, j, i); - - pack(b, Conserved::D_cc(0), k, j, i) = - 0.5 * (pack(b, TE::F1, Conserved::D(), k, j, i) + - pack(b, TE::F1, Conserved::D(), k, j, i + (ndim > 0))); - pack(b, Conserved::D_cc(1), k, j, i) = - 0.5 * (pack(b, TE::F2, Conserved::D(), k, j, i) + - pack(b, TE::F2, Conserved::D(), k, j + (ndim > 1), i)); - pack(b, Conserved::D_cc(2), k, j, i) = - 0.5 * (pack(b, TE::F3, Conserved::D(), k, j, i) + - pack(b, TE::F3, Conserved::D(), k + (ndim > 2), j, i)); - pack(b, Conserved::divD(), k, j, i) = - (pack(b, TE::F1, Conserved::D(), k, j, i + (ndim > 0)) - - pack(b, TE::F1, Conserved::D(), k, j, i)) / - coords.Dxc(k, j, i) + - (pack(b, TE::F2, Conserved::D(), k, j + (ndim > 1), i) - - pack(b, TE::F2, Conserved::D(), k, j, i)) / - coords.Dxc(k, j, i) + - (pack(b, TE::F3, Conserved::D(), k + (ndim > 2), j, i) - - pack(b, TE::F3, Conserved::D(), k, j, i)) / - coords.Dxc(k, j, i); - }); + + auto do_fine_advection = pkg->Param("do_fine_advection"); + if (do_fine_advection) { + parthenon::par_for( + PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { + const int kf = (ndim > 2) ? (k - nghost) * 2 + nghost : k; + const int jf = (ndim > 1) ? (j - nghost) * 2 + nghost : j; + const int fi = (ndim > 0) ? (i - nghost) * 2 + nghost : i; + pack(b, Conserved::phi_fine_restricted(), k, j, i) = 0.0; + Real ntot = 0.0; + for (int ioff = 0; ioff <= (ndim > 0); ++ioff) + for (int joff = 0; joff <= (ndim > 1); ++joff) + for (int koff = 0; koff <= (ndim > 2); ++koff) { + ntot += 1.0; + pack(b, Conserved::phi_fine_restricted(), k, j, i) += + pack(b, Conserved::phi_fine(), kf + koff, jf + joff, fi + ioff); + } + pack(b, Conserved::phi_fine_restricted(), k, j, i) /= ntot; + }); + } + + auto do_CT_advection = pkg->Param("do_CT_advection"); + if (do_CT_advection) { + using TE = parthenon::TopologicalElement; + parthenon::par_for( + PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { + pack(b, Conserved::C_cc(0), k, j, i) = + 0.5 * (pack(b, TE::F1, Conserved::C(), k, j, i) + + pack(b, TE::F1, Conserved::C(), k, j, i + (ndim > 0))); + pack(b, Conserved::C_cc(1), k, j, i) = + 0.5 * (pack(b, TE::F2, Conserved::C(), k, j, i) + + pack(b, TE::F2, Conserved::C(), k, j + (ndim > 1), i)); + pack(b, Conserved::C_cc(2), k, j, i) = + 0.5 * (pack(b, TE::F3, Conserved::C(), k, j, i) + + pack(b, TE::F3, Conserved::C(), k + (ndim > 2), j, i)); + auto &coords = pack.GetCoordinates(b); + pack(b, Conserved::divC(), k, j, i) = + (pack(b, TE::F1, Conserved::C(), k, j, i + (ndim > 0)) - + pack(b, TE::F1, Conserved::C(), k, j, i)) / + coords.Dxc(k, j, i) + + (pack(b, TE::F2, Conserved::C(), k, j + (ndim > 1), i) - + pack(b, TE::F2, Conserved::C(), k, j, i)) / + coords.Dxc(k, j, i) + + (pack(b, TE::F3, Conserved::C(), k + (ndim > 2), j, i) - + pack(b, TE::F3, Conserved::C(), k, j, i)) / + coords.Dxc(k, j, i); + + pack(b, Conserved::D_cc(0), k, j, i) = + 0.5 * (pack(b, TE::F1, Conserved::D(), k, j, i) + + pack(b, TE::F1, Conserved::D(), k, j, i + (ndim > 0))); + pack(b, Conserved::D_cc(1), k, j, i) = + 0.5 * (pack(b, TE::F2, Conserved::D(), k, j, i) + + pack(b, TE::F2, Conserved::D(), k, j + (ndim > 1), i)); + pack(b, Conserved::D_cc(2), k, j, i) = + 0.5 * (pack(b, TE::F3, Conserved::D(), k, j, i) + + pack(b, TE::F3, Conserved::D(), k + (ndim > 2), j, i)); + pack(b, Conserved::divD(), k, j, i) = + (pack(b, TE::F1, Conserved::D(), k, j, i + (ndim > 0)) - + pack(b, TE::F1, Conserved::D(), k, j, i)) / + coords.Dxc(k, j, i) + + (pack(b, TE::F2, Conserved::D(), k, j + (ndim > 1), i) - + pack(b, TE::F2, Conserved::D(), k, j, i)) / + coords.Dxc(k, j, i) + + (pack(b, TE::F3, Conserved::D(), k + (ndim > 2), j, i) - + pack(b, TE::F3, Conserved::D(), k, j, i)) / + coords.Dxc(k, j, i); + }); + } return TaskStatus::complete; } } // namespace advection_package diff --git a/example/fine_advection/parthenon_app_inputs.cpp b/example/fine_advection/parthenon_app_inputs.cpp index 626607a15e1a..6900c67bb152 100644 --- a/example/fine_advection/parthenon_app_inputs.cpp +++ b/example/fine_advection/parthenon_app_inputs.cpp @@ -49,6 +49,9 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) { auto pkg = pmb->packages.Get("advection_package"); const auto &profile = pkg->Param("profile"); + auto do_regular_advection = pkg->Param("do_regular_advection"); + auto do_fine_advection = pkg->Param("do_fine_advection"); + auto do_CT_advection = pkg->Param("do_CT_advection"); auto cellbounds = pmb->cellbounds; IndexRange ib = cellbounds.GetBoundsI(IndexDomain::interior); @@ -83,62 +86,72 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) { Real rsq = coords.Xc<1>(i) * coords.Xc<1>(i) + coords.Xc<2>(j) * coords.Xc<2>(j) + coords.Xc<3>(k) * coords.Xc<3>(k); - pack(b, phi(), k, j, i) = 1. + amp * exp(-100.0 * rsq); - for (int ioff = 0; ioff <= (ndim > 0); ++ioff) - for (int joff = 0; joff <= (ndim > 1); ++joff) - for (int koff = 0; koff <= (ndim > 2); ++koff) { - pack(b, phi_fine(), kf + koff, jf + joff, fi + ioff) = - 1. + amp * exp(-100.0 * rsq); - } + if (do_regular_advection) + pack(b, phi(), k, j, i) = 1. + amp * exp(-100.0 * rsq); + if (do_fine_advection) { + for (int ioff = 0; ioff <= (ndim > 0); ++ioff) + for (int joff = 0; joff <= (ndim > 1); ++joff) + for (int koff = 0; koff <= (ndim > 2); ++koff) { + pack(b, phi_fine(), kf + koff, jf + joff, fi + ioff) = + 1. + amp * exp(-100.0 * rsq); + } + } } else if (profile_type == 2) { Real rsq = coords.Xc<1>(i) * coords.Xc<1>(i) + coords.Xc<2>(j) * coords.Xc<2>(j) + coords.Xc<3>(k) * coords.Xc<3>(k); - pack(b, phi(), k, j, i) = (rsq < 0.15 * 0.15 ? 1.0 : 0.0); - for (int ioff = 0; ioff <= (ndim > 0); ++ioff) - for (int joff = 0; joff <= (ndim > 1); ++joff) - for (int koff = 0; koff <= (ndim > 2); ++koff) { - pack(b, phi_fine(), kf + koff, jf + joff, fi + ioff) = - (rsq < 0.15 * 0.15 ? 1.0 : 0.0); - } + if (do_regular_advection) + pack(b, phi(), k, j, i) = (rsq < 0.15 * 0.15 ? 1.0 : 0.0); + if (do_fine_advection) { + for (int ioff = 0; ioff <= (ndim > 0); ++ioff) + for (int joff = 0; joff <= (ndim > 1); ++joff) + for (int koff = 0; koff <= (ndim > 2); ++koff) { + pack(b, phi_fine(), kf + koff, jf + joff, fi + ioff) = + (rsq < 0.15 * 0.15 ? 1.0 : 0.0); + } + } } else { - pack(b, phi(), k, j, i) = 0.0; - for (int ioff = 0; ioff <= (ndim > 0); ++ioff) - for (int joff = 0; joff <= (ndim > 1); ++joff) - for (int koff = 0; koff <= (ndim > 2); ++koff) { - pack(b, phi_fine(), kf + koff, jf + joff, fi + ioff) = 0.0; - } + if (do_regular_advection) + pack(b, phi(), k, j, i) = 0.0; + if (do_fine_advection) { + for (int ioff = 0; ioff <= (ndim > 0); ++ioff) + for (int joff = 0; joff <= (ndim > 1); ++joff) + for (int koff = 0; koff <= (ndim > 2); ++koff) { + pack(b, phi_fine(), kf + koff, jf + joff, fi + ioff) = 0.0; + } + } } }); - - ib = cellbounds.GetBoundsI(IndexDomain::interior, TE::F2); - jb = cellbounds.GetBoundsJ(IndexDomain::interior, TE::F2); - kb = cellbounds.GetBoundsK(IndexDomain::interior, TE::F2); - const Real x0 = 0.2; - parthenon::par_for( - PARTHENON_AUTO_LABEL, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int k, const int j, const int i) { - auto &coords = pack.GetCoordinates(b); - Real xlo = coords.X(k, j, i); - Real xhi = coords.X(k, j, i + 1); - Real Alo = std::abs(xlo) < x0 ? sign(xlo, xhi) * (x0 - std::abs(xlo)) : 0.0; - Real Ahi = std::abs(xhi) < x0 ? sign(xhi, xlo) * (x0 - std::abs(xhi)) : 0.0; - pack(b, TE::F2, C(), k, j, i) = (Alo - Ahi) / (xhi - xlo); - }); - - ib = cellbounds.GetBoundsI(IndexDomain::interior, TE::F3); - jb = cellbounds.GetBoundsJ(IndexDomain::interior, TE::F3); - kb = cellbounds.GetBoundsK(IndexDomain::interior, TE::F3); - parthenon::par_for( - PARTHENON_AUTO_LABEL, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int k, const int j, const int i) { - auto &coords = pack.GetCoordinates(b); - Real xlo = coords.X(k, j, i); - Real xhi = coords.X(k, j, i + 1); - Real Alo = std::abs(xlo) < x0 ? sign(xlo, xhi) * (x0 - std::abs(xlo)) : 0.0; - Real Ahi = std::abs(xhi) < x0 ? sign(xhi, xlo) * (x0 - std::abs(xhi)) : 0.0; - pack(b, TE::F3, D(), k, j, i) = (Alo - Ahi) / (xhi - xlo); - }); + if (do_CT_advection) { + ib = cellbounds.GetBoundsI(IndexDomain::interior, TE::F2); + jb = cellbounds.GetBoundsJ(IndexDomain::interior, TE::F2); + kb = cellbounds.GetBoundsK(IndexDomain::interior, TE::F2); + const Real x0 = 0.2; + parthenon::par_for( + PARTHENON_AUTO_LABEL, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int k, const int j, const int i) { + auto &coords = pack.GetCoordinates(b); + Real xlo = coords.X(k, j, i); + Real xhi = coords.X(k, j, i + 1); + Real Alo = std::abs(xlo) < x0 ? sign(xlo, xhi) * (x0 - std::abs(xlo)) : 0.0; + Real Ahi = std::abs(xhi) < x0 ? sign(xhi, xlo) * (x0 - std::abs(xhi)) : 0.0; + pack(b, TE::F2, C(), k, j, i) = (Alo - Ahi) / (xhi - xlo); + }); + + ib = cellbounds.GetBoundsI(IndexDomain::interior, TE::F3); + jb = cellbounds.GetBoundsJ(IndexDomain::interior, TE::F3); + kb = cellbounds.GetBoundsK(IndexDomain::interior, TE::F3); + parthenon::par_for( + PARTHENON_AUTO_LABEL, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int k, const int j, const int i) { + auto &coords = pack.GetCoordinates(b); + Real xlo = coords.X(k, j, i); + Real xhi = coords.X(k, j, i + 1); + Real Alo = std::abs(xlo) < x0 ? sign(xlo, xhi) * (x0 - std::abs(xlo)) : 0.0; + Real Ahi = std::abs(xhi) < x0 ? sign(xhi, xlo) * (x0 - std::abs(xhi)) : 0.0; + pack(b, TE::F3, D(), k, j, i) = (Alo - Ahi) / (xhi - xlo); + }); + } } Packages_t ProcessPackages(std::unique_ptr &pin) { From 3911b424a35444c2dfcc1184954bbccd495425a0 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 15 Oct 2024 14:40:39 -0600 Subject: [PATCH 10/29] format --- example/fine_advection/advection_driver.cpp | 21 ++++--- example/fine_advection/advection_package.cpp | 57 ++++++++++--------- example/fine_advection/advection_package.hpp | 24 ++++---- .../fine_advection/parthenon_app_inputs.cpp | 3 +- 4 files changed, 54 insertions(+), 51 deletions(-) diff --git a/example/fine_advection/advection_driver.cpp b/example/fine_advection/advection_driver.cpp index 4f3ec5a50213..ca55c04f8278 100644 --- a/example/fine_advection/advection_driver.cpp +++ b/example/fine_advection/advection_driver.cpp @@ -72,8 +72,9 @@ TaskCollection AdvectionDriver::MakeTaskCollection(BlockList_t &blocks, const in using namespace parthenon::Update; TaskCollection tc; TaskID none(0); - - std::shared_ptr pkg = pmesh->packages.Get("advection_package"); + + std::shared_ptr pkg = + pmesh->packages.Get("advection_package"); const auto do_regular_advection = pkg->Param("do_regular_advection"); const auto do_fine_advection = pkg->Param("do_fine_advection"); const auto do_CT_advection = pkg->Param("do_CT_advection"); @@ -125,8 +126,8 @@ TaskCollection AdvectionDriver::MakeTaskCollection(BlockList_t &blocks, const in auto flx_fine = none; for (auto face : faces) { if (do_regular_advection) { - flx = flx | tl.AddTask(none, advection_package::CalculateFluxes, desc, - face, parthenon::CellLevel::same, mc0.get()); + flx = flx | tl.AddTask(none, advection_package::CalculateFluxes, + desc, face, parthenon::CellLevel::same, mc0.get()); } if (do_fine_advection) { flx_fine = flx_fine | @@ -150,20 +151,22 @@ TaskCollection AdvectionDriver::MakeTaskCollection(BlockList_t &blocks, const in auto update = set_flx; if (do_regular_advection) { - update = AddUpdateTasks(set_flx, tl, parthenon::CellLevel::same, TT::Cell, beta, dt, desc, - mbase.get(), mc0.get(), mdudt.get(), mc1.get()); + update = AddUpdateTasks(set_flx, tl, parthenon::CellLevel::same, TT::Cell, beta, dt, + desc, mbase.get(), mc0.get(), mdudt.get(), mc1.get()); } auto update_fine = set_flx; if (do_fine_advection) { - update_fine = AddUpdateTasks(set_flx, tl, parthenon::CellLevel::fine, TT::Cell, beta, dt, + update_fine = + AddUpdateTasks(set_flx, tl, parthenon::CellLevel::fine, TT::Cell, beta, dt, desc_fine, mbase.get(), mc0.get(), mdudt.get(), mc1.get()); } auto update_vec = set_flx; if (do_CT_advection) { - update_vec = AddUpdateTasks(set_flx, tl, parthenon::CellLevel::same, TT::Face, beta, dt, - desc_vec, mbase.get(), mc0.get(), mdudt.get(), mc1.get()); + update_vec = + AddUpdateTasks(set_flx, tl, parthenon::CellLevel::same, TT::Face, beta, dt, + desc_vec, mbase.get(), mc0.get(), mdudt.get(), mc1.get()); } auto boundaries = parthenon::AddBoundaryExchangeTasks( diff --git a/example/fine_advection/advection_package.cpp b/example/fine_advection/advection_package.cpp index 2d5dc62795d7..6a20eb777e54 100644 --- a/example/fine_advection/advection_package.cpp +++ b/example/fine_advection/advection_package.cpp @@ -65,39 +65,39 @@ std::shared_ptr Initialize(ParameterInput *pin) { } pkg->AddParam<>("profile", profile_str); - - bool do_regular_advection = pin->GetOrAddBoolean("Advection", "do_regular_advection", true); + bool do_regular_advection = + pin->GetOrAddBoolean("Advection", "do_regular_advection", true); pkg->AddParam<>("do_regular_advection", do_regular_advection); - if (do_regular_advection) { + if (do_regular_advection) { int shape_size = pin->GetOrAddInteger("Advection", "shape_size", 1); int sparse_size = pin->GetOrAddInteger("Advection", "sparse_size", 1); Real alloc_threshold = pin->GetOrAddReal("Advection", "alloc_threshold", 1.e-6); Real dealloc_threshold = pin->GetOrAddReal("Advection", "dealloc_threshold", 5.e-7); Metadata m({Metadata::Cell, Metadata::Independent, Metadata::WithFluxes, - Metadata::FillGhost, Metadata::Sparse}, std::vector{shape_size}); + Metadata::FillGhost, Metadata::Sparse}, + std::vector{shape_size}); m.SetSparseThresholds(alloc_threshold, dealloc_threshold, 0.0); - std::vector sparse_idxs(sparse_size); + std::vector sparse_idxs(sparse_size); std::iota(sparse_idxs.begin(), sparse_idxs.end(), 0); pkg->AddSparsePool(m, sparse_idxs); } - bool do_fine_advection = pin->GetOrAddBoolean("Advection", "do_fine_advection", true); pkg->AddParam<>("do_fine_advection", do_fine_advection); - if (do_fine_advection) { + if (do_fine_advection) { pkg->AddField( Metadata({Metadata::Cell, Metadata::Fine, Metadata::Independent, Metadata::WithFluxes, Metadata::FillGhost})); pkg->AddField( Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy})); - } + } bool do_CT_advection = pin->GetOrAddBoolean("Advection", "do_CT_advection", true); pkg->AddParam<>("do_CT_advection", do_CT_advection); - if (do_CT_advection) { - auto m = Metadata( - {Metadata::Face, Metadata::Independent, Metadata::WithFluxes, Metadata::FillGhost}); + if (do_CT_advection) { + auto m = Metadata({Metadata::Face, Metadata::Independent, Metadata::WithFluxes, + Metadata::FillGhost}); m.RegisterRefinementOps(); @@ -115,7 +115,7 @@ std::shared_ptr Initialize(ParameterInput *pin) { Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy})); pkg->AddField( Metadata({Metadata::Cell, Metadata::Derived, Metadata::OneCopy})); - } + } pkg->CheckRefinementBlock = CheckRefinement; pkg->EstimateTimestepMesh = EstimateTimestep; @@ -139,16 +139,16 @@ AmrTag CheckRefinement(MeshBlockData *rc) { typename Kokkos::MinMax::value_type minmax; parthenon::par_reduce( - parthenon::loop_pattern_mdrange_tag, PARTHENON_AUTO_LABEL, DevExecSpace(), - 0, pack.GetNBlocks() - 1, // Runs from [0, 0] since pack built from MeshBlockData - pack.GetLowerBoundHost(0), pack.GetUpperBoundHost(0), kb.s, kb.e, jb.s, jb.e, ib.s, - ib.e, + parthenon::loop_pattern_mdrange_tag, PARTHENON_AUTO_LABEL, DevExecSpace(), 0, + pack.GetNBlocks() - 1, // Runs from [0, 0] since pack built from MeshBlockData + pack.GetLowerBoundHost(0), pack.GetUpperBoundHost(0), kb.s, kb.e, jb.s, jb.e, + ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int n, const int k, const int j, const int i, typename Kokkos::MinMax::value_type &lminmax) { - lminmax.min_val = - (pack(b, n, k, j, i) < lminmax.min_val ? pack(b, n, k, j, i) : lminmax.min_val); - lminmax.max_val = - (pack(b, n, k, j, i) > lminmax.max_val ? pack(b, n, k, j, i) : lminmax.max_val); + lminmax.min_val = (pack(b, n, k, j, i) < lminmax.min_val ? pack(b, n, k, j, i) + : lminmax.min_val); + lminmax.max_val = (pack(b, n, k, j, i) > lminmax.max_val ? pack(b, n, k, j, i) + : lminmax.max_val); }, Kokkos::MinMax(minmax)); @@ -156,7 +156,8 @@ AmrTag CheckRefinement(MeshBlockData *rc) { const auto &refine_tol = pkg->Param("refine_tol"); const auto &derefine_tol = pkg->Param("derefine_tol"); - if (minmax.max_val > refine_tol && minmax.min_val < derefine_tol) return AmrTag::refine; + if (minmax.max_val > refine_tol && minmax.min_val < derefine_tol) + return AmrTag::refine; if (minmax.max_val < derefine_tol) return AmrTag::derefine; } return AmrTag::same; @@ -203,21 +204,21 @@ TaskStatus FillDerived(MeshData *md) { Conserved::D_cc, Conserved::divC, Conserved::divD>( md); auto pack = desc.GetPack(md); - + std::shared_ptr pkg = md->GetMeshPointer()->packages.Get("advection_package"); - + IndexRange ib = md->GetBoundsI(IndexDomain::interior); IndexRange jb = md->GetBoundsJ(IndexDomain::interior); IndexRange kb = md->GetBoundsK(IndexDomain::interior); const int ndim = md->GetMeshPointer()->ndim; const int nghost = parthenon::Globals::nghost; - + auto do_fine_advection = pkg->Param("do_fine_advection"); if (do_fine_advection) { parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { + PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, + ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { const int kf = (ndim > 2) ? (k - nghost) * 2 + nghost : k; const int jf = (ndim > 1) ? (j - nghost) * 2 + nghost : j; const int fi = (ndim > 0) ? (i - nghost) * 2 + nghost : i; @@ -238,8 +239,8 @@ TaskStatus FillDerived(MeshData *md) { if (do_CT_advection) { using TE = parthenon::TopologicalElement; parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { + PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, + ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { pack(b, Conserved::C_cc(0), k, j, i) = 0.5 * (pack(b, TE::F1, Conserved::C(), k, j, i) + pack(b, TE::F1, Conserved::C(), k, j, i + (ndim > 0))); diff --git a/example/fine_advection/advection_package.hpp b/example/fine_advection/advection_package.hpp index 62fcb3b76e05..6e3f8e03f232 100644 --- a/example/fine_advection/advection_package.hpp +++ b/example/fine_advection/advection_package.hpp @@ -126,15 +126,16 @@ TaskStatus CalculateVectorFluxes(parthenon::TopologicalElement edge, constexpr int scratch_size = 0; constexpr int scratch_level = 1; parthenon::par_for_outer( - PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, - kb.s - (ndim > 2), kb.e + (ndim > 2), + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, + kb.s - (ndim > 2), kb.e + (ndim > 2), KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { parthenon::Indexer2D idxer({jb.s - (ndim > 1), jb.e + (ndim > 1)}, {ib.s - (ndim > 0), ib.e + (ndim > 0)}); if (pack.GetLowerBound(b, flux_var()) <= pack.GetUpperBound(b, flux_var())) { parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { const auto [j, i] = idxer(idx); - // Piecewise linear in the orthogonal directions, could do something better here + // Piecewise linear in the orthogonal directions, could do something better + // here pack(b, TE::CC, recon(0), k, j, i) = 0.5 * (pack(b, fe, flux_var(), k, j, i) + pack(b, fe, flux_var(), k + koff, j + joff, i + ioff)); @@ -160,9 +161,8 @@ TaskStatus CalculateVectorFluxes(parthenon::TopologicalElement edge, jb = md->GetBoundsJ(cl, IndexDomain::interior, edge); kb = md->GetBoundsK(cl, IndexDomain::interior, edge); parthenon::par_for_outer( - PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, - kb.s, kb.e, - KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, kb.s, + kb.e, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e}); if (pack.GetLowerBound(b, var()) <= pack.GetUpperBound(b, var())) { parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { @@ -191,15 +191,16 @@ TaskStatus CalculateVectorFluxes(parthenon::TopologicalElement edge, jb = md->GetBoundsJ(cl, IndexDomain::interior, f); kb = md->GetBoundsK(cl, IndexDomain::interior, f); parthenon::par_for_outer( - PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, - kb.s - (ndim > 2), kb.e + (ndim > 2), + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, + kb.s - (ndim > 2), kb.e + (ndim > 2), KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { parthenon::Indexer2D idxer({jb.s - (ndim > 1), jb.e + (ndim > 1)}, {ib.s - (ndim > 0), ib.e + (ndim > 0)}); if (pack.GetLowerBound(b, var()) <= pack.GetUpperBound(b, var())) { parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { const auto [j, i] = idxer(idx); - // Piecewise linear in the orthogonal directions, could do something better here + // Piecewise linear in the orthogonal directions, could do something better + // here pack(b, f, recon_f(0), k, j, i) = pack(b, f, var(), k, j, i); pack(b, f, recon_f(1), k, j, i) = pack(b, f, var(), k, j, i); }); @@ -220,9 +221,8 @@ TaskStatus CalculateVectorFluxes(parthenon::TopologicalElement edge, d2[static_cast(edge) % 3] = 0; d2[static_cast(f2) % 3] = 0; parthenon::par_for_outer( - PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, - kb.s, kb.e, - KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, kb.s, + kb.e, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e}); if (pack.GetLowerBound(b, var()) <= pack.GetUpperBound(b, var())) { parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { diff --git a/example/fine_advection/parthenon_app_inputs.cpp b/example/fine_advection/parthenon_app_inputs.cpp index 6900c67bb152..b856011202f8 100644 --- a/example/fine_advection/parthenon_app_inputs.cpp +++ b/example/fine_advection/parthenon_app_inputs.cpp @@ -111,8 +111,7 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) { } } } else { - if (do_regular_advection) - pack(b, phi(), k, j, i) = 0.0; + if (do_regular_advection) pack(b, phi(), k, j, i) = 0.0; if (do_fine_advection) { for (int ioff = 0; ioff <= (ndim > 0); ++ioff) for (int joff = 0; joff <= (ndim > 1); ++joff) From 14fa3a4fbe6b34036819450c833e18d621190103 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 15 Oct 2024 15:48:27 -0600 Subject: [PATCH 11/29] working with abitrary number of fields --- example/fine_advection/advection_package.cpp | 1 + .../fine_advection/parthenon_app_inputs.cpp | 83 ++++++++++++------- 2 files changed, 54 insertions(+), 30 deletions(-) diff --git a/example/fine_advection/advection_package.cpp b/example/fine_advection/advection_package.cpp index 6a20eb777e54..648dfe4ac83f 100644 --- a/example/fine_advection/advection_package.cpp +++ b/example/fine_advection/advection_package.cpp @@ -71,6 +71,7 @@ std::shared_ptr Initialize(ParameterInput *pin) { if (do_regular_advection) { int shape_size = pin->GetOrAddInteger("Advection", "shape_size", 1); int sparse_size = pin->GetOrAddInteger("Advection", "sparse_size", 1); + pkg->AddParam<>("sparse_size", sparse_size); Real alloc_threshold = pin->GetOrAddReal("Advection", "alloc_threshold", 1.e-6); Real dealloc_threshold = pin->GetOrAddReal("Advection", "dealloc_threshold", 5.e-7); Metadata m({Metadata::Cell, Metadata::Independent, Metadata::WithFluxes, diff --git a/example/fine_advection/parthenon_app_inputs.cpp b/example/fine_advection/parthenon_app_inputs.cpp index b856011202f8..335be6a65e02 100644 --- a/example/fine_advection/parthenon_app_inputs.cpp +++ b/example/fine_advection/parthenon_app_inputs.cpp @@ -61,7 +61,11 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) { auto coords = pmb->coords; using namespace advection_package::Conserved; - pmb->AllocSparseID(phi::name(), 0); + if (do_regular_advection) { + const int sparse_size = pkg->Param("sparse_size"); + for (int s = 0; s < sparse_size; ++s) + pmb->AllocSparseID(phi::name(), s); + } static auto desc = parthenon::MakePackDescriptor(data.get()); auto pack = desc.GetPack(data.get()); @@ -74,53 +78,72 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) { const int b = 0; const int ndim = pmb->pmy_mesh->ndim; const int nghost = parthenon::Globals::nghost; - parthenon::par_for( - PARTHENON_AUTO_LABEL, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int k, const int j, const int i) { - const int kf = ndim > 2 ? (k - nghost) * 2 + nghost : k; - const int jf = ndim > 1 ? (j - nghost) * 2 + nghost : j; - const int fi = ndim > 0 ? (i - nghost) * 2 + nghost : i; - if (profile_type == 0) { - PARTHENON_FAIL("Profile type wave not implemented."); - } else if (profile_type == 1) { - Real rsq = coords.Xc<1>(i) * coords.Xc<1>(i) + - coords.Xc<2>(j) * coords.Xc<2>(j) + - coords.Xc<3>(k) * coords.Xc<3>(k); - if (do_regular_advection) - pack(b, phi(), k, j, i) = 1. + amp * exp(-100.0 * rsq); - if (do_fine_advection) { + + if (do_regular_advection) { + parthenon::par_for( + PARTHENON_AUTO_LABEL, pack.GetLowerBoundHost(b, phi()), + pack.GetUpperBoundHost(b, phi()), kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int l, const int k, const int j, const int i) { + const int kf = ndim > 2 ? (k - nghost) * 2 + nghost : k; + const int jf = ndim > 1 ? (j - nghost) * 2 + nghost : j; + const int fi = ndim > 0 ? (i - nghost) * 2 + nghost : i; + if (profile_type == 0) { + PARTHENON_FAIL("Profile type wave not implemented."); + } else if (profile_type == 1) { + Real rsq = coords.Xc<1>(i) * coords.Xc<1>(i) + + coords.Xc<2>(j) * coords.Xc<2>(j) + + coords.Xc<3>(k) * coords.Xc<3>(k); + pack(b, phi(l), k, j, i) = 1. + amp * exp(-100.0 * rsq); + } else if (profile_type == 2) { + Real rsq = coords.Xc<1>(i) * coords.Xc<1>(i) + + coords.Xc<2>(j) * coords.Xc<2>(j) + + coords.Xc<3>(k) * coords.Xc<3>(k); + pack(b, phi(l), k, j, i) = (rsq < 0.15 * 0.15 ? 1.0 : 0.0); + } else { + pack(b, phi(l), k, j, i) = 0.0; + } + }); + } + + if (do_fine_advection) { + parthenon::par_for( + PARTHENON_AUTO_LABEL, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int k, const int j, const int i) { + const int kf = ndim > 2 ? (k - nghost) * 2 + nghost : k; + const int jf = ndim > 1 ? (j - nghost) * 2 + nghost : j; + const int fi = ndim > 0 ? (i - nghost) * 2 + nghost : i; + if (profile_type == 0) { + PARTHENON_FAIL("Profile type wave not implemented."); + } else if (profile_type == 1) { + Real rsq = coords.Xc<1>(i) * coords.Xc<1>(i) + + coords.Xc<2>(j) * coords.Xc<2>(j) + + coords.Xc<3>(k) * coords.Xc<3>(k); for (int ioff = 0; ioff <= (ndim > 0); ++ioff) for (int joff = 0; joff <= (ndim > 1); ++joff) for (int koff = 0; koff <= (ndim > 2); ++koff) { pack(b, phi_fine(), kf + koff, jf + joff, fi + ioff) = 1. + amp * exp(-100.0 * rsq); } - } - } else if (profile_type == 2) { - Real rsq = coords.Xc<1>(i) * coords.Xc<1>(i) + - coords.Xc<2>(j) * coords.Xc<2>(j) + - coords.Xc<3>(k) * coords.Xc<3>(k); - if (do_regular_advection) - pack(b, phi(), k, j, i) = (rsq < 0.15 * 0.15 ? 1.0 : 0.0); - if (do_fine_advection) { + } else if (profile_type == 2) { + Real rsq = coords.Xc<1>(i) * coords.Xc<1>(i) + + coords.Xc<2>(j) * coords.Xc<2>(j) + + coords.Xc<3>(k) * coords.Xc<3>(k); for (int ioff = 0; ioff <= (ndim > 0); ++ioff) for (int joff = 0; joff <= (ndim > 1); ++joff) for (int koff = 0; koff <= (ndim > 2); ++koff) { pack(b, phi_fine(), kf + koff, jf + joff, fi + ioff) = (rsq < 0.15 * 0.15 ? 1.0 : 0.0); } - } - } else { - if (do_regular_advection) pack(b, phi(), k, j, i) = 0.0; - if (do_fine_advection) { + } else { for (int ioff = 0; ioff <= (ndim > 0); ++ioff) for (int joff = 0; joff <= (ndim > 1); ++joff) for (int koff = 0; koff <= (ndim > 2); ++koff) { pack(b, phi_fine(), kf + koff, jf + joff, fi + ioff) = 0.0; } } - } - }); + }); + } + if (do_CT_advection) { ib = cellbounds.GetBoundsI(IndexDomain::interior, TE::F2); jb = cellbounds.GetBoundsJ(IndexDomain::interior, TE::F2); From b77c052071ebde2af1e348ff1b7987a279b2ddb4 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 15 Oct 2024 16:16:36 -0600 Subject: [PATCH 12/29] small --- src/parthenon_manager.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parthenon_manager.cpp b/src/parthenon_manager.cpp index e0de1fd9ccc3..26b5909e8775 100644 --- a/src/parthenon_manager.cpp +++ b/src/parthenon_manager.cpp @@ -304,6 +304,7 @@ void ParthenonManager::RestartPackages(Mesh &rm, RestartReader &resfile) { "Dense field " + label + " is marked as sparse in restart file"); } + max_fillsize = std::max(max_fillsize, static_cast(v_info.FillSize(theDomain))); } From 6a0097a519c235d9487a1af2f0050a79fd8a804f Mon Sep 17 00:00:00 2001 From: Adam Dempsey Date: Thu, 31 Oct 2024 07:38:34 -0600 Subject: [PATCH 13/29] Remove else from if constexpr when there are returns --- src/utils/hash.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/utils/hash.hpp b/src/utils/hash.hpp index 77642a64dba3..4f1ff6557c74 100644 --- a/src/utils/hash.hpp +++ b/src/utils/hash.hpp @@ -31,9 +31,8 @@ std::size_t hash_combine(std::size_t lhs, const T &v, Rest &&...rest) { lhs ^= rhs + 0x9e3779b9 + (lhs << 6) + (lhs >> 2); if constexpr (sizeof...(Rest) > 0) { return hash_combine(lhs, std::forward(rest)...); - } else { - return lhs; } + return lhs; } template ::value - 1> From f397d14c3f80481c6cd6452748148f52c9bf1de6 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 31 Oct 2024 10:54:09 -0600 Subject: [PATCH 14/29] Jonah's fix for this CI issue --- .github/workflows/docs.yml | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 4b95511d82f5..9e1cbcfad76f 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -12,9 +12,12 @@ on: jobs: docs: name: build and deploy docs - runs-on: ubuntu-latest + # not using latest due to issues with pip user packages, see + # https://github.com/actions/runner-images/issues/10781 and + # https://github.com/actions/runner-images/issues/10636 + runs-on: ubuntu-22.04 - steps: + steps: - name: Checkout code uses: actions/checkout@v2 with: @@ -23,9 +26,9 @@ jobs: run: export DEBIAN_FRONTEND=noninteractive - name: install dependencies run: | - pip install --break-system-packages sphinx - pip install --break-system-packages sphinx-rtd-theme - pip install --break-system-packages sphinx-multiversion + pip install sphinx + pip install sphinx-rtd-theme + pip install sphinx-multiversion - name: build docs run: | echo "Repo = ${GITHUB_REPOSITORY}" From 9ec0cf8fc6c485da6bb404a7a7d6879333530a84 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 31 Oct 2024 10:55:43 -0600 Subject: [PATCH 15/29] CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7405fd170490..bbd37785d197 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - [[PR 1161]](https://github.com/parthenon-hpc-lab/parthenon/pull/1161) Make flux field Metadata accessible, add Metadata::CellMemAligned flag, small perfomance upgrades ### Changed (changing behavior/API/variables/...) +- [[PR1203]](https://github.com/parthenon-hpc-lab/parthenon/pull/1203) Pin Ubuntu CI image - [[PR1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag - [[PR 1187]](https://github.com/parthenon-hpc-lab/parthenon/pull/1187) Make DataCollection::Add safer and generalize MeshBlockData::Initialize - [[Issue 1165]](https://github.com/parthenon-hpc-lab/parthenon/issues/1165) Bump Kokkos submodule to 4.4.1 From b5364b7e2777fff9850b1c2823ccd136d0ad4c0b Mon Sep 17 00:00:00 2001 From: Alex Long Date: Thu, 31 Oct 2024 22:35:00 -0600 Subject: [PATCH 16/29] Consolidate buffer packing functions with less atomics (#1199) * Consolidate buffer packing functions with less atomics + Fold in buffer sizing to the load buffer function + Use a sort and a discontinuity check instead of an atomic_fetch_add inside of LoadBuffer_ to get particle index into buffer + Reduce transcendental functions call in particle sourcing in particle example * Address PR comments + Call the member variable in SwarmKey the sort_key + Remove CountParticlesInBuffer function + Add buffer_start and buffer_sorted as swarm member variables * Update example/particles/particles.cpp Co-authored-by: Ben Ryan * Update src/interface/swarm_comms.cpp Co-authored-by: Ben Ryan --------- Co-authored-by: Ben Ryan Co-authored-by: Ben Ryan --- example/particles/particles.cpp | 30 ++-- src/interface/swarm.cpp | 23 +-- src/interface/swarm.hpp | 5 +- src/interface/swarm_comms.cpp | 190 ++++++++++++------------- src/interface/swarm_device_context.hpp | 7 +- 5 files changed, 131 insertions(+), 124 deletions(-) diff --git a/example/particles/particles.cpp b/example/particles/particles.cpp index a01a121f75fa..0f250e9ab02f 100644 --- a/example/particles/particles.cpp +++ b/example/particles/particles.cpp @@ -288,17 +288,18 @@ TaskStatus CreateSomeParticles(MeshBlock *pmb, const double t0) { } while (r > 0.5); // Randomly sample direction perpendicular to origin - Real theta = acos(2. * rng_gen.drand() - 1.); - Real phi = 2. * M_PI * rng_gen.drand(); - v(0, n) = sin(theta) * cos(phi); - v(1, n) = sin(theta) * sin(phi); - v(2, n) = cos(theta); + const Real mu = 2.0 * rng_gen.drand() - 1.0; + const Real phi = 2. * M_PI * rng_gen.drand(); + const Real stheta = std::sqrt(1.0 - mu * mu); + v(0, n) = stheta * cos(phi); + v(1, n) = stheta * sin(phi); + v(2, n) = mu; // Project v onto plane normal to sphere Real vdN = v(0, n) * x(n) + v(1, n) * y(n) + v(2, n) * z(n); - Real NdN = r * r; - v(0, n) = v(0, n) - vdN / NdN * x(n); - v(1, n) = v(1, n) - vdN / NdN * y(n); - v(2, n) = v(2, n) - vdN / NdN * z(n); + Real inverse_NdN = 1. / (r * r); + v(0, n) = v(0, n) - vdN * inverse_NdN * x(n); + v(1, n) = v(1, n) - vdN * inverse_NdN * y(n); + v(2, n) = v(2, n) - vdN * inverse_NdN * z(n); // Normalize Real v_tmp = sqrt(v(0, n) * v(0, n) + v(1, n) * v(1, n) + v(2, n) * v(2, n)); @@ -335,11 +336,12 @@ TaskStatus CreateSomeParticles(MeshBlock *pmb, const double t0) { z(n) = minx_k + nx_k * dx_k * rng_gen.drand(); // Randomly sample direction on the unit sphere, fixing speed - Real theta = acos(2. * rng_gen.drand() - 1.); - Real phi = 2. * M_PI * rng_gen.drand(); - v(0, n) = vel * sin(theta) * cos(phi); - v(1, n) = vel * sin(theta) * sin(phi); - v(2, n) = vel * cos(theta); + const Real mu = 2.0 * rng_gen.drand() - 1.0; + const Real phi = 2. * M_PI * rng_gen.drand(); + const Real stheta = std::sqrt(1.0 - mu * mu); + v(0, n) = vel * stheta * cos(phi); + v(1, n) = vel * stheta * sin(phi); + v(2, n) = vel * mu; // Create particles at the beginning of the timestep t(n) = t0; diff --git a/src/interface/swarm.cpp b/src/interface/swarm.cpp index 294338dee0d2..39f1e8a28123 100644 --- a/src/interface/swarm.cpp +++ b/src/interface/swarm.cpp @@ -33,6 +33,7 @@ SwarmDeviceContext Swarm::GetDeviceContext() const { context.block_index_ = block_index_; context.neighbor_indices_ = neighbor_indices_; context.cell_sorted_ = cell_sorted_; + context.buffer_sorted_ = buffer_sorted_; context.cell_sorted_begin_ = cell_sorted_begin_; context.cell_sorted_number_ = cell_sorted_number_; @@ -73,9 +74,10 @@ Swarm::Swarm(const std::string &label, const Metadata &metadata, const int nmax_ new_indices_("new_indices_", nmax_pool_), scratch_a_("scratch_a_", nmax_pool_), scratch_b_("scratch_b_", nmax_pool_), num_particles_to_send_("num_particles_to_send_", NMAX_NEIGHBORS), - buffer_counters_("buffer_counters_", NMAX_NEIGHBORS), + buffer_start_("buffer_start_", NMAX_NEIGHBORS), neighbor_received_particles_("neighbor_received_particles_", NMAX_NEIGHBORS), - cell_sorted_("cell_sorted_", nmax_pool_), mpiStatus(true) { + cell_sorted_("cell_sorted_", nmax_pool_), + buffer_sorted_("buffer_sorted_", nmax_pool_), mpiStatus(true) { PARTHENON_REQUIRE_THROWS(typeid(Coordinates_t) == typeid(UniformCartesian), "SwarmDeviceContext only supports a uniform Cartesian mesh!"); @@ -209,6 +211,9 @@ void Swarm::SetPoolMax(const std::int64_t nmax_pool) { Kokkos::resize(cell_sorted_, nmax_pool); pmb->LogMemUsage(n_new * sizeof(SwarmKey)); + Kokkos::resize(buffer_sorted_, nmax_pool); + pmb->LogMemUsage(n_new * sizeof(SwarmKey)); + block_index_.Resize(nmax_pool); pmb->LogMemUsage(n_new * sizeof(int)); @@ -490,35 +495,35 @@ void Swarm::SortParticlesByCell() { break; } - if (cell_sorted(start_index).cell_idx_1d_ == cell_idx_1d) { + if (cell_sorted(start_index).sort_idx_ == cell_idx_1d) { if (start_index == 0) { break; - } else if (cell_sorted(start_index - 1).cell_idx_1d_ != cell_idx_1d) { + } else if (cell_sorted(start_index - 1).sort_idx_ != cell_idx_1d) { break; } else { start_index--; continue; } } - if (cell_sorted(start_index).cell_idx_1d_ >= cell_idx_1d) { + if (cell_sorted(start_index).sort_idx_ >= cell_idx_1d) { start_index--; if (start_index < 0) { start_index = -1; break; } - if (cell_sorted(start_index).cell_idx_1d_ < cell_idx_1d) { + if (cell_sorted(start_index).sort_idx_ < cell_idx_1d) { start_index = -1; break; } continue; } - if (cell_sorted(start_index).cell_idx_1d_ < cell_idx_1d) { + if (cell_sorted(start_index).sort_idx_ < cell_idx_1d) { start_index++; if (start_index > max_active_index) { start_index = -1; break; } - if (cell_sorted(start_index).cell_idx_1d_ > cell_idx_1d) { + if (cell_sorted(start_index).sort_idx_ > cell_idx_1d) { start_index = -1; break; } @@ -532,7 +537,7 @@ void Swarm::SortParticlesByCell() { int number = 0; int current_index = start_index; while (current_index <= max_active_index && - cell_sorted(current_index).cell_idx_1d_ == cell_idx_1d) { + cell_sorted(current_index).sort_idx_ == cell_idx_1d) { current_index++; number++; cell_sorted_number(k, j, i) = number; diff --git a/src/interface/swarm.hpp b/src/interface/swarm.hpp index 2058dc58f894..49cd906c2a15 100644 --- a/src/interface/swarm.hpp +++ b/src/interface/swarm.hpp @@ -289,7 +289,7 @@ class Swarm { constexpr static int unset_index_ = -1; ParArray1D num_particles_to_send_; - ParArray1D buffer_counters_; + ParArray1D buffer_start_; ParArray1D neighbor_received_particles_; int total_received_particles_; @@ -298,6 +298,9 @@ class Swarm { ParArray1D cell_sorted_; // 1D per-cell sorted array of key-value swarm memory indices + ParArray1D + buffer_sorted_; // 1D per-buffer sorted array of key-value swarm memory indices + ParArrayND cell_sorted_begin_; // Per-cell array of starting indices in cell_sorted_ diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 3ee4e2af7512..7b236b6e4e1f 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -156,72 +156,14 @@ void Swarm::SetupPersistentMPI() { } } -void Swarm::CountParticlesToSend_() { - auto mask_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), mask_); - auto swarm_d = GetDeviceContext(); - auto pmb = GetBlockPointer(); - const int nbmax = vbswarm->bd_var_.nbmax; - - // Fence to make sure particles aren't currently being transported locally - // TODO(BRR) do this operation on device. - pmb->exec_space.fence(); - const int particle_size = GetParticleDataSize(); - vbswarm->particle_size = particle_size; - - // TODO(BRR) This kernel launch should be folded into the subsequent logic once we - // convert that to kernel-based reductions - auto &x = Get(swarm_position::x::name()).Get(); - auto &y = Get(swarm_position::y::name()).Get(); - auto &z = Get(swarm_position::z::name()).Get(); - const int max_active_index = GetMaxActiveIndex(); - pmb->par_for( - PARTHENON_AUTO_LABEL, 0, max_active_index, KOKKOS_LAMBDA(const int n) { - if (swarm_d.IsActive(n)) { - bool on_current_mesh_block = true; - swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block); - } - }); - - // Facilitate lambda captures - auto &block_index = block_index_; - auto &num_particles_to_send = num_particles_to_send_; - - // Zero out number of particles to send before accumulating - pmb->par_for( - PARTHENON_AUTO_LABEL, 0, NMAX_NEIGHBORS - 1, - KOKKOS_LAMBDA(const int n) { num_particles_to_send[n] = 0; }); - - parthenon::par_for( - PARTHENON_AUTO_LABEL, 0, max_active_index, KOKKOS_LAMBDA(const int n) { - if (swarm_d.IsActive(n)) { - bool on_current_mesh_block = true; - swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block); - - if (block_index(n) >= 0) { - Kokkos::atomic_add(&num_particles_to_send(block_index(n)), 1); - } - } - }); - - auto num_particles_to_send_h = num_particles_to_send_.GetHostMirrorAndCopy(); - - // Resize send buffers if too small - for (int n = 0; n < pmb->neighbors.size(); n++) { - const int bufid = pmb->neighbors[n].bufid; - auto sendbuf = vbswarm->bd_var_.send[bufid]; - if (sendbuf.extent(0) < num_particles_to_send_h(n) * particle_size) { - sendbuf = BufArray1D("Buffer", num_particles_to_send_h(n) * particle_size); - vbswarm->bd_var_.send[bufid] = sendbuf; - } - vbswarm->send_size[bufid] = num_particles_to_send_h(n) * particle_size; - } -} - void Swarm::LoadBuffers_() { auto swarm_d = GetDeviceContext(); auto pmb = GetBlockPointer(); const int particle_size = GetParticleDataSize(); + vbswarm->particle_size = particle_size; const int nneighbor = pmb->neighbors.size(); + // Fence to make sure particles aren't currently being transported locally + pmb->exec_space.fence(); auto &int_vector = std::get()>(vectors_); auto &real_vector = std::get()>(vectors_); @@ -236,42 +178,98 @@ void Swarm::LoadBuffers_() { auto &y = Get(swarm_position::y::name()).Get(); auto &z = Get(swarm_position::z::name()).Get(); - // Zero buffer index counters - auto &buffer_counters = buffer_counters_; - pmb->par_for( - PARTHENON_AUTO_LABEL, 0, NMAX_NEIGHBORS - 1, - KOKKOS_LAMBDA(const int n) { buffer_counters[n] = 0; }); + if (max_active_index_ >= 0) { + auto &buffer_sorted = buffer_sorted_; + auto &buffer_start = buffer_start_; - auto &bdvar = vbswarm->bd_var_; - auto neighbor_buffer_index = neighbor_buffer_index_; - // Loop over active particles and use atomic operations to find indices into buffers if - // this particle will be sent. - pmb->par_for( - PARTHENON_AUTO_LABEL, 0, max_active_index_, KOKKOS_LAMBDA(const int n) { - if (swarm_d.IsActive(n)) { - bool on_current_mesh_block = true; - const int m = - swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block); - const int bufid = neighbor_buffer_index(m); - - if (m >= 0) { - const int bid = Kokkos::atomic_fetch_add(&buffer_counters(m), 1); - int buffer_index = bid * particle_size; - swarm_d.MarkParticleForRemoval(n); - for (int i = 0; i < realPackDim; i++) { - bdvar.send[bufid](buffer_index) = vreal(i, n); - buffer_index++; - } - for (int i = 0; i < intPackDim; i++) { - bdvar.send[bufid](buffer_index) = static_cast(vint(i, n)); - buffer_index++; + pmb->par_for( + PARTHENON_AUTO_LABEL, 0, max_active_index_, KOKKOS_LAMBDA(const int n) { + if (swarm_d.IsActive(n)) { + bool on_current_mesh_block = true; + const int m = + swarm_d.GetNeighborBlockIndex(n, x(n), y(n), z(n), on_current_mesh_block); + buffer_sorted(n) = SwarmKey(m, n); + } else { + buffer_sorted(n) = SwarmKey(this_block_, n); + } + }); + + // sort by buffer index + sort(buffer_sorted, SwarmKeyComparator(), 0, max_active_index_); + + // use discontinuity check to determine start of a buffer in buffer_sorted array + auto &num_particles_to_send = num_particles_to_send_; + auto max_active_index = max_active_index_; + + // Zero out number of particles to send before accumulating + pmb->par_for( + PARTHENON_AUTO_LABEL, 0, NMAX_NEIGHBORS - 1, KOKKOS_LAMBDA(const int n) { + num_particles_to_send[n] = 0; + buffer_start[n] = 0; + }); + + pmb->par_for( + PARTHENON_AUTO_LABEL, 0, max_active_index_, KOKKOS_LAMBDA(const int n) { + auto m = buffer_sorted(n).sort_idx_; + // start checks (used for index of particle in buffer) + if (m >= 0 && n == 0) { + buffer_start(m) = 0; + } else if (m >= 0 && m != buffer_sorted(n - 1).sort_idx_) { + buffer_start(m) = n; + } + // end checks (used to to size particle buffers) + if (m >= 0 && n == max_active_index) { + num_particles_to_send(m) = n + 1; + } else if (m >= 0 && m != buffer_sorted(n + 1).sort_idx_) { + num_particles_to_send(m) = n + 1; + } + }); + + // copy values back to host for buffer sizing + auto num_particles_to_send_h = num_particles_to_send_.GetHostMirrorAndCopy(); + auto buffer_start_h = buffer_start.GetHostMirrorAndCopy(); + + // Resize send buffers if too small + for (int n = 0; n < pmb->neighbors.size(); n++) { + num_particles_to_send_h(n) -= buffer_start_h(n); + const int bufid = pmb->neighbors[n].bufid; + auto sendbuf = vbswarm->bd_var_.send[bufid]; + if (sendbuf.extent(0) < num_particles_to_send_h(n) * particle_size) { + sendbuf = BufArray1D("Buffer", num_particles_to_send_h(n) * particle_size); + vbswarm->bd_var_.send[bufid] = sendbuf; + } + vbswarm->send_size[bufid] = num_particles_to_send_h(n) * particle_size; + } + + auto &bdvar = vbswarm->bd_var_; + auto neighbor_buffer_index = neighbor_buffer_index_; + // Loop over active particles buffer_sorted, use index n and buffer_start to + /// get index in buffer m, pack that particle in buffer + pmb->par_for( + PARTHENON_AUTO_LABEL, 0, max_active_index_, KOKKOS_LAMBDA(const int n) { + auto p_index = buffer_sorted(n).swarm_idx_; + if (swarm_d.IsActive(p_index)) { + const int m = buffer_sorted(n).sort_idx_; + const int bufid = neighbor_buffer_index(m); + if (m >= 0) { + const int bid = n - buffer_start[m]; + int buffer_index = bid * particle_size; + swarm_d.MarkParticleForRemoval(p_index); + for (int i = 0; i < realPackDim; i++) { + bdvar.send[bufid](buffer_index) = vreal(i, p_index); + buffer_index++; + } + for (int i = 0; i < intPackDim; i++) { + bdvar.send[bufid](buffer_index) = static_cast(vint(i, p_index)); + buffer_index++; + } } } - } - }); + }); - // Remove particles that were loaded to send to another block from this block - RemoveMarkedParticles(); + // Remove particles that were loaded to send to another block from this block + RemoveMarkedParticles(); + } } void Swarm::Send(BoundaryCommSubset phase) { @@ -279,10 +277,8 @@ void Swarm::Send(BoundaryCommSubset phase) { const int nneighbor = pmb->neighbors.size(); auto swarm_d = GetDeviceContext(); - // Query particles for those to be sent - CountParticlesToSend_(); - - // Prepare buffers for send operations + // Potentially resize buffer, get consistent index from particle array, get ready to + // send LoadBuffers_(); // Send buffer data diff --git a/src/interface/swarm_device_context.hpp b/src/interface/swarm_device_context.hpp index 936d2d56ad35..b16daf0d0cdf 100644 --- a/src/interface/swarm_device_context.hpp +++ b/src/interface/swarm_device_context.hpp @@ -25,16 +25,16 @@ struct SwarmKey { SwarmKey() {} KOKKOS_INLINE_FUNCTION SwarmKey(const int cell_idx_1d, const int swarm_idx_1d) - : cell_idx_1d_(cell_idx_1d), swarm_idx_(swarm_idx_1d) {} + : sort_idx_(cell_idx_1d), swarm_idx_(swarm_idx_1d) {} - int cell_idx_1d_; + int sort_idx_; int swarm_idx_; }; struct SwarmKeyComparator { KOKKOS_INLINE_FUNCTION bool operator()(const SwarmKey &s1, const SwarmKey &s2) { - return s1.cell_idx_1d_ < s2.cell_idx_1d_; + return s1.sort_idx_ < s2.sort_idx_; } }; @@ -139,6 +139,7 @@ class SwarmDeviceContext { ParArrayND block_index_; ParArrayND neighbor_indices_; // 4x4x4 array of possible block AMR regions ParArray1D cell_sorted_; + ParArray1D buffer_sorted_; ParArrayND cell_sorted_begin_; ParArrayND cell_sorted_number_; int ndim_; From ddc6500d438fa8adbd522cdb2c3a2f0b4f0c2525 Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Mon, 4 Nov 2024 18:55:01 +0100 Subject: [PATCH 17/29] [Trivial] Fix type used for array init (#1170) * Fix type used for array init * init array with constexpr expression * CC --------- Co-authored-by: Jonah Miller --- CHANGELOG.md | 1 + src/outputs/output_utils.hpp | 2 +- src/outputs/restart_hdf5.cpp | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bbd37785d197..17f8a009242d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ - [[PR 1172]](https://github.com/parthenon-hpc-lab/parthenon/pull/1172) Make parthenon manager robust against external MPI init and finalize calls ### Fixed (not changing behavior/API/variables/...) +- [[PR 1170]](https://github.com/parthenon-hpc-lab/parthenon/pull/1170) Fixed incorrect initialization of array by a const not constexpr - [[PR 1189]](https://github.com/parthenon-hpc-lab/parthenon/pull/1189) Address CUDA MPI/ICP issue with Kokkos <=4.4.1 - [[PR 1178]](https://github.com/parthenon-hpc-lab/parthenon/pull/1178) Fix issue with mesh pointer when using relative residual tolerance in BiCGSTAB solver. - [[PR 1173]](https://github.com/parthenon-hpc-lab/parthenon/pull/1173) Make debugging easier by making parthenon throw an error if ParameterInput is different on multiple MPI ranks. diff --git a/src/outputs/output_utils.hpp b/src/outputs/output_utils.hpp index 5f95fbbaedc2..5c094ef688d7 100644 --- a/src/outputs/output_utils.hpp +++ b/src/outputs/output_utils.hpp @@ -50,7 +50,7 @@ namespace OutputUtils { // Helper struct containing some information about a variable struct VarInfo { public: - static constexpr int VNDIM = MAX_VARIABLE_DIMENSION; + static constexpr const int VNDIM = MAX_VARIABLE_DIMENSION; std::string label; int num_components; int tensor_rank; // 0- to 3-D for cell-centered variables, 0- to 6-D for arbitrary shape diff --git a/src/outputs/restart_hdf5.cpp b/src/outputs/restart_hdf5.cpp index cababddbfafe..8078fbd5fa9a 100644 --- a/src/outputs/restart_hdf5.cpp +++ b/src/outputs/restart_hdf5.cpp @@ -3,7 +3,7 @@ // Copyright(C) 2020-2024 The Parthenon collaboration // Licensed under the 3-clause BSD License, see LICENSE file for details //======================================================================================== -// (C) (or copyright) 2020-2021. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC @@ -213,7 +213,7 @@ void RestartReaderHDF5::ReadBlocks(const std::string &name, IndexRange range, #else // HDF5 enabled auto hdl = OpenDataset(name); - const int VNDIM = info.VNDIM; + constexpr int VNDIM = OutputUtils::VarInfo::VNDIM; /** Select hyperslab in dataset **/ int total_dim = 0; From f2171ff36d9f3c627d9702b794a965963d23e790 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Thu, 7 Nov 2024 20:43:27 -0700 Subject: [PATCH 18/29] Leapfrog fix (#1206) * Missing send size init * cleanup, CHANGELOG * verbose CI * further CI debugging * This should be working... * This should be fixed... but I get a segfault on GPU * Is it my AMD GPU thats wrong? * Missing a return statement * retest * Oops missing statement * Revert test * revert workflow --- CHANGELOG.md | 1 + src/interface/swarm_comms.cpp | 5 +++++ src/utils/sort.hpp | 17 +++++++++++++++-- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 17f8a009242d..c29941247e43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - [[PR 1161]](https://github.com/parthenon-hpc-lab/parthenon/pull/1161) Make flux field Metadata accessible, add Metadata::CellMemAligned flag, small perfomance upgrades ### Changed (changing behavior/API/variables/...) +- [[PR 1206]](https://github.com/parthenon-hpc-lab/parthenon/pull/1206) Leapfrog fix - [[PR1203]](https://github.com/parthenon-hpc-lab/parthenon/pull/1203) Pin Ubuntu CI image - [[PR1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag - [[PR 1187]](https://github.com/parthenon-hpc-lab/parthenon/pull/1187) Make DataCollection::Add safer and generalize MeshBlockData::Initialize diff --git a/src/interface/swarm_comms.cpp b/src/interface/swarm_comms.cpp index 7b236b6e4e1f..054ded34fabb 100644 --- a/src/interface/swarm_comms.cpp +++ b/src/interface/swarm_comms.cpp @@ -269,6 +269,11 @@ void Swarm::LoadBuffers_() { // Remove particles that were loaded to send to another block from this block RemoveMarkedParticles(); + } else { + for (int n = 0; n < pmb->neighbors.size(); n++) { + const int bufid = pmb->neighbors[n].bufid; + vbswarm->send_size[bufid] = 0; + } } } diff --git a/src/utils/sort.hpp b/src/utils/sort.hpp index 97e9c77a88e4..a4ab8139dab3 100644 --- a/src/utils/sort.hpp +++ b/src/utils/sort.hpp @@ -61,7 +61,7 @@ void sort(ParArray1D data, KeyComparator comparator, size_t min_idx, size_t max_idx) { PARTHENON_DEBUG_REQUIRE(min_idx < data.extent(0), "Invalid minimum sort index!"); PARTHENON_DEBUG_REQUIRE(max_idx < data.extent(0), "Invalid maximum sort index!"); -#ifdef KOKKOS_ENABLE_CUDA +#if defined(KOKKOS_ENABLE_CUDA) #ifdef __clang__ PARTHENON_FAIL("sort is using thrust and there exists an incompatibility with clang, " "see https://github.com/lanl/parthenon/issues/647 for more details. We " @@ -74,6 +74,13 @@ void sort(ParArray1D data, KeyComparator comparator, size_t min_idx, thrust::device_ptr last_d = thrust::device_pointer_cast(data.data()) + max_idx + 1; thrust::sort(first_d, last_d, comparator); #endif +#elif defined(KOKKOS_ENABLE_HIP) + auto data_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), data); + std::sort(data_h.data() + min_idx, data_h.data() + max_idx + 1, comparator); + Kokkos::deep_copy(data, data_h); + // TODO(BRR) With Kokkos 4.4, switch to Kokkos::sort + // auto sub_data = Kokkos::subview(data, std::make_pair(min_idx, max_idx + 1)); + // Kokkos::sort(sub_data, comparator); #else if (std::is_same::value) { std::sort(data.data() + min_idx, data.data() + max_idx + 1, comparator); @@ -89,7 +96,7 @@ template void sort(ParArray1D data, size_t min_idx, size_t max_idx) { PARTHENON_DEBUG_REQUIRE(min_idx < data.extent(0), "Invalid minimum sort index!"); PARTHENON_DEBUG_REQUIRE(max_idx < data.extent(0), "Invalid maximum sort index!"); -#ifdef KOKKOS_ENABLE_CUDA +#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP) #ifdef __clang__ PARTHENON_FAIL("sort is using thrust and there exists an incompatibility with clang, " "see https://github.com/lanl/parthenon/issues/647 for more details. We " @@ -102,6 +109,12 @@ void sort(ParArray1D data, size_t min_idx, size_t max_idx) { thrust::device_ptr last_d = thrust::device_pointer_cast(data.data()) + max_idx + 1; thrust::sort(first_d, last_d); #endif + auto data_h = Kokkos::create_mirror_view_and_copy(HostMemSpace(), data); + std::sort(data_h.data() + min_idx, data_h.data() + max_idx + 1); + Kokkos::deep_copy(data, data_h); + // TODO(BRR) With Kokkos 4.4, switch to Kokkos::sort + // auto sub_data = Kokkos::subview(data, std::make_pair(min_idx, max_idx + 1)); + // Kokkos::sort(sub_data); #else if (std::is_same::value) { std::sort(data.data() + min_idx, data.data() + max_idx + 1); From 64473d70df5461fcf10ab52fd603aad310cb46b1 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Fri, 8 Nov 2024 09:55:59 -0700 Subject: [PATCH 19/29] CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c29941247e43..554debf3c568 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## Current develop ### Added (new features/APIs/variables/...) +- [[PR 1103]](https://github.com/parthenon-hpc-lab/parthenon/pull/1103) Add sparsity to vector wave equation test - [[PR 1185]](https://github.com/parthenon-hpc-lab/parthenon/pull/1185/files) Bugfix to particle defragmentation - [[PR 1184]](https://github.com/parthenon-hpc-lab/parthenon/pull/1184) Fix swarm block neighbor indexing in 1D, 2D - [[PR 1183]](https://github.com/parthenon-hpc-lab/parthenon/pull/1183) Fix particle leapfrog example initialization data From 0f6cec4e113cc0c8241497521e633fadf3734791 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Fri, 8 Nov 2024 09:57:08 -0700 Subject: [PATCH 20/29] copyright --- example/fine_advection/parthenon_app_inputs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/fine_advection/parthenon_app_inputs.cpp b/example/fine_advection/parthenon_app_inputs.cpp index 9f8c7f80e5c6..35236015e391 100644 --- a/example/fine_advection/parthenon_app_inputs.cpp +++ b/example/fine_advection/parthenon_app_inputs.cpp @@ -1,5 +1,5 @@ //======================================================================================== -// (C) (or copyright) 2020-2021. Triad National Security, LLC. All rights reserved. +// (C) (or copyright) 2020-2024. Triad National Security, LLC. All rights reserved. // // This program was produced under U.S. Government contract 89233218CNA000001 for Los // Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC From 5272708ee9ea031455c4e742bd2962d956e924ce Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 12 Nov 2024 13:19:16 -0700 Subject: [PATCH 21/29] Philipp comments --- example/fine_advection/advection_package.cpp | 4 ++-- example/fine_advection/advection_package.hpp | 9 +++++---- example/fine_advection/parthenon_app_inputs.cpp | 12 ++++++------ 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/example/fine_advection/advection_package.cpp b/example/fine_advection/advection_package.cpp index 648dfe4ac83f..428d91b9245f 100644 --- a/example/fine_advection/advection_package.cpp +++ b/example/fine_advection/advection_package.cpp @@ -225,9 +225,9 @@ TaskStatus FillDerived(MeshData *md) { const int fi = (ndim > 0) ? (i - nghost) * 2 + nghost : i; pack(b, Conserved::phi_fine_restricted(), k, j, i) = 0.0; Real ntot = 0.0; - for (int ioff = 0; ioff <= (ndim > 0); ++ioff) + for (int koff = 0; koff <= (ndim > 2); ++koff) for (int joff = 0; joff <= (ndim > 1); ++joff) - for (int koff = 0; koff <= (ndim > 2); ++koff) { + for (int ioff = 0; ioff <= (ndim > 0); ++ioff) { ntot += 1.0; pack(b, Conserved::phi_fine_restricted(), k, j, i) += pack(b, Conserved::phi_fine(), kf + koff, jf + joff, fi + ioff); diff --git a/example/fine_advection/advection_package.hpp b/example/fine_advection/advection_package.hpp index 6e3f8e03f232..12b6b9b65aad 100644 --- a/example/fine_advection/advection_package.hpp +++ b/example/fine_advection/advection_package.hpp @@ -80,11 +80,12 @@ TaskStatus CalculateFluxes(pack_desc_t &desc, parthenon::TopologicalElement FACE IndexRange ib = md->GetBoundsI(cl, IndexDomain::interior, FACE); IndexRange jb = md->GetBoundsJ(cl, IndexDomain::interior, FACE); IndexRange kb = md->GetBoundsK(cl, IndexDomain::interior, FACE); - constexpr int scratch_size = 0; - constexpr int scratch_level = 1; + constexpr int unused_scratch_size = 0; + constexpr int unused_scratch_level = 1; parthenon::par_for_outer( - PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1, kb.s, - kb.e, KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { + PARTHENON_AUTO_LABEL, unused_scratch_size, unused_scratch_level, 0, + pack.GetNBlocks() - 1, kb.s, kb.e, + KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) { parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e}); for (int l = pack.GetLowerBound(b); l <= pack.GetUpperBound(b); ++l) { parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) { diff --git a/example/fine_advection/parthenon_app_inputs.cpp b/example/fine_advection/parthenon_app_inputs.cpp index 35236015e391..8025970f35ca 100644 --- a/example/fine_advection/parthenon_app_inputs.cpp +++ b/example/fine_advection/parthenon_app_inputs.cpp @@ -119,9 +119,9 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) { Real rsq = coords.Xc<1>(i) * coords.Xc<1>(i) + coords.Xc<2>(j) * coords.Xc<2>(j) + coords.Xc<3>(k) * coords.Xc<3>(k); - for (int ioff = 0; ioff <= (ndim > 0); ++ioff) + for (int koff = 0; koff <= (ndim > 2); ++koff) for (int joff = 0; joff <= (ndim > 1); ++joff) - for (int koff = 0; koff <= (ndim > 2); ++koff) { + for (int ioff = 0; ioff <= (ndim > 0); ++ioff) { pack(b, phi_fine(), kf + koff, jf + joff, fi + ioff) = 1. + amp * exp(-100.0 * rsq); } @@ -129,16 +129,16 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) { Real rsq = coords.Xc<1>(i) * coords.Xc<1>(i) + coords.Xc<2>(j) * coords.Xc<2>(j) + coords.Xc<3>(k) * coords.Xc<3>(k); - for (int ioff = 0; ioff <= (ndim > 0); ++ioff) + for (int koff = 0; koff <= (ndim > 2); ++koff) for (int joff = 0; joff <= (ndim > 1); ++joff) - for (int koff = 0; koff <= (ndim > 2); ++koff) { + for (int ioff = 0; ioff <= (ndim > 0); ++ioff) { pack(b, phi_fine(), kf + koff, jf + joff, fi + ioff) = (rsq < 0.15 * 0.15 ? 1.0 : 0.0); } } else { - for (int ioff = 0; ioff <= (ndim > 0); ++ioff) + for (int koff = 0; koff <= (ndim > 2); ++koff) for (int joff = 0; joff <= (ndim > 1); ++joff) - for (int koff = 0; koff <= (ndim > 2); ++koff) { + for (int ioff = 0; ioff <= (ndim > 0); ++ioff) { pack(b, phi_fine(), kf + koff, jf + joff, fi + ioff) = 0.0; } } From 84fc520f7360ff41402d10363dbd3d951da35a57 Mon Sep 17 00:00:00 2001 From: Luke Roberts Date: Tue, 12 Nov 2024 13:22:47 -0700 Subject: [PATCH 22/29] more Philipp comments --- example/fine_advection/parthinput.advection | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/example/fine_advection/parthinput.advection b/example/fine_advection/parthinput.advection index 31ee255c5053..b3bf7ca10d0c 100644 --- a/example/fine_advection/parthinput.advection +++ b/example/fine_advection/parthinput.advection @@ -57,6 +57,13 @@ profile = hard_sphere refine_tol = 0.3 # control the package specific refinement tagging function derefine_tol = 0.03 +# Include advection equation for regular resolution CC fields +do_regular_advection = true +# Include advection equation for fine resolution CC fields +do_fine_advection = true +# Include vector wave equation using CT +do_CT_advection = true + file_type = rst dt = 0.05 From b3a2b8da99b036fa75fa6c6f4aaa0047f438399c Mon Sep 17 00:00:00 2001 From: Brandon Barker Date: Wed, 13 Nov 2024 10:53:16 -0500 Subject: [PATCH 23/29] Fix: Bounds in output for Metadata::None variables (#1188) * fix: metadata::none output bounds * tst: add option in advection example to test metadata::none output. Add to hdf5 regression tests * changelog * gold file SHA * update metadata_none_Var in advection example to fit block sizes. update golds. * updates to metadata none var test * update SHA for golds --- CHANGELOG.md | 1 + CMakeLists.txt | 4 +-- example/advection/advection_package.cpp | 33 +++++++++++++++++++ src/outputs/output_utils.hpp | 6 ++-- .../test_suites/output_hdf5/output_hdf5.py | 12 +++++++ .../output_hdf5/parthinput.advection | 4 ++- 6 files changed, 54 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 554debf3c568..1d361fc0567f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - [[PR 1172]](https://github.com/parthenon-hpc-lab/parthenon/pull/1172) Make parthenon manager robust against external MPI init and finalize calls ### Fixed (not changing behavior/API/variables/...) +- [[PR 1188]](https://github.com/parthenon-hpc-lab/parthenon/pull/1188) Fix hdf5 output issue for metadata none variables, update test. - [[PR 1170]](https://github.com/parthenon-hpc-lab/parthenon/pull/1170) Fixed incorrect initialization of array by a const not constexpr - [[PR 1189]](https://github.com/parthenon-hpc-lab/parthenon/pull/1189) Address CUDA MPI/ICP issue with Kokkos <=4.4.1 - [[PR 1178]](https://github.com/parthenon-hpc-lab/parthenon/pull/1178) Fix issue with mesh pointer when using relative residual tolerance in BiCGSTAB solver. diff --git a/CMakeLists.txt b/CMakeLists.txt index 61fd4fd3cfff..590f277ee8d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -75,9 +75,9 @@ include(cmake/Format.cmake) include(cmake/Lint.cmake) # regression test reference data -set(REGRESSION_GOLD_STANDARD_VER 24 CACHE STRING "Version of gold standard to download and use") +set(REGRESSION_GOLD_STANDARD_VER 25 CACHE STRING "Version of gold standard to download and use") set(REGRESSION_GOLD_STANDARD_HASH - "SHA512=e220df92a335131131e42ddb52dc221a6dbd6bb56361483b4af0292620eeb82ffb21ef3b95fd9a7c5cc158fb754da0bf1a1015bec98b5bbad05f4bceb1ee99bc" + "SHA512=314dc8312366d81ba33d1fde25812e9a7697b2f529de29e22662df0d458f1c4bc5b5bb4e649888170f66ffec0df1be20a9cf401944531c1c1ad835e26eaad28f" CACHE STRING "Hash of default gold standard file to download") option(REGRESSION_GOLD_STANDARD_SYNC "Automatically sync gold standard files." ON) diff --git a/example/advection/advection_package.cpp b/example/advection/advection_package.cpp index f799a2909262..64e4ea30712d 100644 --- a/example/advection/advection_package.cpp +++ b/example/advection/advection_package.cpp @@ -196,6 +196,20 @@ std::shared_ptr Initialize(ParameterInput *pin) { m = Metadata({Metadata::Cell, Metadata::OneCopy}, std::vector({1})); pkg->AddField("my_derived_var", m); + // Create a Metadata::None variable for IO testing purposes. + // Only load if test_metadata_none is specified in the Advection block + auto test_metadata_none = + pin->GetOrAddBoolean("Advection", "test_metadata_none", false); + pkg->AddParam("test_metadata_none", test_metadata_none); + if (test_metadata_none) { + const int nx1 = pin->GetOrAddInteger("parthenon/meshblock", "nx1", 1); + const int nx2 = pin->GetOrAddInteger("parthenon/meshblock", "nx2", 1); + const int nx3 = pin->GetOrAddInteger("parthenon/meshblock", "nx3", 1); + std::vector test_shape = {nx1 + 1, nx2 + 1, nx3 + 1, 3}; + m = Metadata({Metadata::OneCopy, Metadata::None}, test_shape); + pkg->AddField("metadata_none_var", m); + } + // List (vector) of HistoryOutputVar that will all be enrolled as output variables parthenon::HstVar_list hst_vars = {}; // Now we add a couple of callback functions @@ -281,6 +295,7 @@ AmrTag CheckRefinement(MeshBlockData *rc) { void PreFill(MeshBlockData *rc) { auto pmb = rc->GetBlockPointer(); auto pkg = pmb->packages.Get("advection_package"); + const bool test_metadata_none = pkg->Param("test_metadata_none"); bool fill_derived = pkg->Param("fill_derived"); if (fill_derived) { @@ -302,6 +317,24 @@ void PreFill(MeshBlockData *rc) { v(out + n, k, j, i) = 1.0 - v(in + n, k, j, i); }); } + + // Fill the metadata::None var with index gymnastics. + if (test_metadata_none) { + const int nx1 = pmb->cellbounds.ncellsi(IndexDomain::interior); + const int nx2 = pmb->cellbounds.ncellsj(IndexDomain::interior); + const int nx3 = pmb->cellbounds.ncellsk(IndexDomain::interior); + + // packing in principle unnecessary/convoluted here and just done for demonstration + std::vector vars({"metadata_none_var"}); + PackIndexMap imap; + const auto &v = rc->PackVariables(vars, imap); + + pmb->par_for( + PARTHENON_AUTO_LABEL, 0, 2, 0, nx3, 0, nx2, 0, nx1, + KOKKOS_LAMBDA(const int n, const int k, const int j, const int i) { + v(n, k, j, i) = n + k + j + i; + }); + } } // this is the package registered function to fill derived diff --git a/src/outputs/output_utils.hpp b/src/outputs/output_utils.hpp index 5c094ef688d7..e9fcee04c986 100644 --- a/src/outputs/output_utils.hpp +++ b/src/outputs/output_utils.hpp @@ -312,11 +312,11 @@ void PackOrUnpackVar(const VarInfo &info, bool do_ghosts, idx_t &idx, Function_t auto [kb, jb, ib] = info.GetPaddedBoundsKJI(domain); if (info.where == MetadataFlag({Metadata::None})) { kb.s = 0; - kb.e = shape[4]; + kb.e = std::max(0, shape[4] - 1); jb.s = 0; - jb.e = shape[5]; + jb.e = std::max(0, shape[5] - 1); ib.s = 0; - ib.e = shape[6]; + ib.e = std::max(0, shape[6] - 1); } for (int topo = 0; topo < shape[0]; ++topo) { for (int t = 0; t < shape[1]; ++t) { diff --git a/tst/regression/test_suites/output_hdf5/output_hdf5.py b/tst/regression/test_suites/output_hdf5/output_hdf5.py index cbef0aba2aa7..fd68c8e17851 100644 --- a/tst/regression/test_suites/output_hdf5/output_hdf5.py +++ b/tst/regression/test_suites/output_hdf5/output_hdf5.py @@ -190,6 +190,18 @@ def Analyse(self, parameters): ) analyze_status = False + # check contents of matadata_none_var + for dim in [2, 3]: + data = phdf.phdf(f"advection_{dim}d.out0.final.phdf") + profile = data.Get("metadata_none_var", flatten=False) + for index in np.ndindex(profile.shape): + ib, j, k, l, m = index + expected_value = l + k + j + m + actual_value = profile[ib, j, k, l, m] + if expected_value != actual_value: + print("metadata_none_var is incorrect") + analyze_status = False + # Checking Parthenon histograms versus numpy ones for dim in [2, 3]: # 1D histogram with binning of a variable with bins defined by a var diff --git a/tst/regression/test_suites/output_hdf5/parthinput.advection b/tst/regression/test_suites/output_hdf5/parthinput.advection index 3061d2bff961..118a82b58e56 100644 --- a/tst/regression/test_suites/output_hdf5/parthinput.advection +++ b/tst/regression/test_suites/output_hdf5/parthinput.advection @@ -54,6 +54,7 @@ vx = 1.0 vy = 1.0 vz = 1.0 profile = hard_sphere +test_metadata_none = true refine_tol = 0.3 # control the package specific refinement tagging function derefine_tol = 0.03 @@ -64,7 +65,8 @@ file_type = hdf5 dt = 1.0 variables = advected, one_minus_advected, & # comments are ok one_minus_advected_sq, & # on every (& characters are ok in comments) - one_minus_sqrt_one_minus_advected_sq # line + one_minus_sqrt_one_minus_advected_sq, & # line + metadata_none_var file_type = hst From cbc36bdfcfb6e664c93515cfc9536cae33f1b13c Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 13 Nov 2024 09:42:19 -0700 Subject: [PATCH 24/29] Modify how we loop through packages to enroll history outputs --- src/outputs/history.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/outputs/history.cpp b/src/outputs/history.cpp index 2e1310062d48..41ab26a516f8 100644 --- a/src/outputs/history.cpp +++ b/src/outputs/history.cpp @@ -93,8 +93,14 @@ void HistoryOutput::WriteOutputFile(Mesh *pm, ParameterInput *pin, SimTime *tm, md_base->Initialize(pm->block_list, pm); } - // Loop over all packages of the application - for (const auto &pkg : packages) { + // Loop over all packages of the application in alphabetical order + std::vector keys; + for (const auto& pair : packages) { + keys.push_back(pair.first); + } + std::sort(keys.begin(), keys.end()); + for (const auto &key : keys) { + const auto &pkg = packages[key]; const auto ¶ms = pkg.second->AllParams(); // Check if the package has enrolled scalar history functions which are stored in the From 37929e9cfa8edce8d03f4957619196b76bbaa6fb Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 13 Nov 2024 09:45:56 -0700 Subject: [PATCH 25/29] CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d361fc0567f..a4e0a5ca9cd2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - [[PR 1161]](https://github.com/parthenon-hpc-lab/parthenon/pull/1161) Make flux field Metadata accessible, add Metadata::CellMemAligned flag, small perfomance upgrades ### Changed (changing behavior/API/variables/...) +- [[PR 1209]](https://github.com/parthenon-hpc-lab/parthenon/pull/1209) Ordered history output - [[PR 1206]](https://github.com/parthenon-hpc-lab/parthenon/pull/1206) Leapfrog fix - [[PR1203]](https://github.com/parthenon-hpc-lab/parthenon/pull/1203) Pin Ubuntu CI image - [[PR1177]](https://github.com/parthenon-hpc-lab/parthenon/pull/1177) Make mesh-level boundary conditions usable without the "user" flag From cf158716c75ea74aec5b325906c23bf912c0c594 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 13 Nov 2024 09:47:36 -0700 Subject: [PATCH 26/29] formatting --- src/outputs/history.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/outputs/history.cpp b/src/outputs/history.cpp index 41ab26a516f8..51b4e523e7f0 100644 --- a/src/outputs/history.cpp +++ b/src/outputs/history.cpp @@ -95,8 +95,8 @@ void HistoryOutput::WriteOutputFile(Mesh *pm, ParameterInput *pin, SimTime *tm, // Loop over all packages of the application in alphabetical order std::vector keys; - for (const auto& pair : packages) { - keys.push_back(pair.first); + for (const auto &pair : packages) { + keys.push_back(pair.first); } std::sort(keys.begin(), keys.end()); for (const auto &key : keys) { From 6939da2a07c34ae2366c45fae5d1d0fa429d38c6 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 13 Nov 2024 09:58:34 -0700 Subject: [PATCH 27/29] Oops key vs pair --- src/outputs/history.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/outputs/history.cpp b/src/outputs/history.cpp index 51b4e523e7f0..2aade658fe00 100644 --- a/src/outputs/history.cpp +++ b/src/outputs/history.cpp @@ -101,7 +101,7 @@ void HistoryOutput::WriteOutputFile(Mesh *pm, ParameterInput *pin, SimTime *tm, std::sort(keys.begin(), keys.end()); for (const auto &key : keys) { const auto &pkg = packages[key]; - const auto ¶ms = pkg.second->AllParams(); + const auto ¶ms = pkg->AllParams(); // Check if the package has enrolled scalar history functions which are stored in the // Params under the `hist_param_key` name. From 0968c3ea8c4e0877041b2c5ee154fa8c15c5ec92 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 13 Nov 2024 10:01:15 -0700 Subject: [PATCH 28/29] Add note to docs about ordering of history outputs h/t AMD --- doc/sphinx/src/outputs.rst | 4 +++- src/outputs/history.cpp | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/doc/sphinx/src/outputs.rst b/doc/sphinx/src/outputs.rst index 7196e4777bb3..fa44e27da298 100644 --- a/doc/sphinx/src/outputs.rst +++ b/doc/sphinx/src/outputs.rst @@ -194,7 +194,9 @@ block might look like This will produce a text file (``.hst``) output file every 1 units of simulation time. The content of the file is determined by the functions -enrolled by specific packages, see :ref:`state history output`. +enrolled by specific packages, see :ref:`state history output`. Per-package history +outputs will always be in alphabetical order by package name, which may not match +the order in which packages were added to a simulation. Histograms ---------- diff --git a/src/outputs/history.cpp b/src/outputs/history.cpp index 2aade658fe00..913a722496a7 100644 --- a/src/outputs/history.cpp +++ b/src/outputs/history.cpp @@ -93,7 +93,8 @@ void HistoryOutput::WriteOutputFile(Mesh *pm, ParameterInput *pin, SimTime *tm, md_base->Initialize(pm->block_list, pm); } - // Loop over all packages of the application in alphabetical order + // Loop over all packages of the application in alphabetical order to ensure consistency + // of ordering of data in columns. std::vector keys; for (const auto &pair : packages) { keys.push_back(pair.first); From 817ea62fefd9bb31dd1a62d9e8ccf4111ad3ce16 Mon Sep 17 00:00:00 2001 From: Ben Ryan Date: Wed, 13 Nov 2024 11:16:33 -0700 Subject: [PATCH 29/29] Oops include_what_you_use --- src/outputs/history.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/outputs/history.cpp b/src/outputs/history.cpp index 913a722496a7..074bea4feb0c 100644 --- a/src/outputs/history.cpp +++ b/src/outputs/history.cpp @@ -18,6 +18,7 @@ // \brief writes history output data, volume-averaged quantities that are output // frequently in time to trace their history. +#include #include #include #include