Skip to content

Commit

Permalink
make vec loops work with sparse (partial)
Browse files Browse the repository at this point in the history
  • Loading branch information
lroberts36 committed Oct 15, 2024
1 parent 34e3605 commit 005292e
Showing 1 changed file with 77 additions and 47 deletions.
124 changes: 77 additions & 47 deletions example/fine_advection/advection_package.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,23 +123,32 @@ TaskStatus CalculateVectorFluxes(parthenon::TopologicalElement edge,
int koff = edge == TE::E3 ? ndim > 2 : 0;
int joff = edge == TE::E2 ? ndim > 1 : 0;
int ioff = edge == TE::E1 ? ndim > 0 : 0;
parthenon::par_for(
PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s - (ndim > 2),
kb.e + (ndim > 2), jb.s - (ndim > 1), jb.e + (ndim > 1), ib.s - 1, ib.e + 1,
KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
// Piecewise linear in the orthogonal directions, could do something better here
pack(b, TE::CC, recon(0), k, j, i) =
0.5 * (pack(b, fe, flux_var(), k, j, i) +
pack(b, fe, flux_var(), k + koff, j + joff, i + ioff));
pack(b, TE::CC, recon(1), k, j, i) =
0.5 * (pack(b, fe, flux_var(), k, j, i) +
pack(b, fe, flux_var(), k + koff, j + joff, i + ioff));
pack(b, TE::CC, recon(2), k, j, i) =
0.5 * (pack(b, fe, flux_var(), k, j, i) +
pack(b, fe, flux_var(), k + koff, j + joff, i + ioff));
pack(b, TE::CC, recon(3), k, j, i) =
0.5 * (pack(b, fe, flux_var(), k, j, i) +
pack(b, fe, flux_var(), k + koff, j + joff, i + ioff));
constexpr int scratch_size = 0;
constexpr int scratch_level = 1;
parthenon::par_for_outer(
PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1,
kb.s - (ndim > 2), kb.e + (ndim > 2),
KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) {
parthenon::Indexer2D idxer({jb.s - (ndim > 1), jb.e + (ndim > 1)},
{ib.s - (ndim > 0), ib.e + (ndim > 0)});
if (pack.GetLowerBound(b, flux_var()) <= pack.GetUpperBound(b, flux_var())) {
parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) {
const auto [j, i] = idxer(idx);
// Piecewise linear in the orthogonal directions, could do something better here
pack(b, TE::CC, recon(0), k, j, i) =
0.5 * (pack(b, fe, flux_var(), k, j, i) +
pack(b, fe, flux_var(), k + koff, j + joff, i + ioff));
pack(b, TE::CC, recon(1), k, j, i) =
0.5 * (pack(b, fe, flux_var(), k, j, i) +
pack(b, fe, flux_var(), k + koff, j + joff, i + ioff));
pack(b, TE::CC, recon(2), k, j, i) =
0.5 * (pack(b, fe, flux_var(), k, j, i) +
pack(b, fe, flux_var(), k + koff, j + joff, i + ioff));
pack(b, TE::CC, recon(3), k, j, i) =
0.5 * (pack(b, fe, flux_var(), k, j, i) +
pack(b, fe, flux_var(), k + koff, j + joff, i + ioff));
});
}
});

// 2. Calculate the quartant averaged flux
Expand All @@ -150,20 +159,27 @@ TaskStatus CalculateVectorFluxes(parthenon::TopologicalElement edge,
ib = md->GetBoundsI(cl, IndexDomain::interior, edge);
jb = md->GetBoundsJ(cl, IndexDomain::interior, edge);
kb = md->GetBoundsK(cl, IndexDomain::interior, edge);
parthenon::par_for(
PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
pack.flux(b, edge, var(), k, j, i) = 0.0;
for (int dk = -koff; dk < 1; ++dk)
for (int dj = -joff; dj < 1; ++dj)
for (int di = -ioff; di < 1; ++di) {
// TODO(LFR): Pick out the correct component of the reconstructed flux,
// current version is not an issue for piecewise constant flux though.
pack.flux(b, edge, var(), k, j, i) +=
pack(b, TE::CC, recon(0), k + dk, j + dj, i + di);
}
pack.flux(b, edge, var(), k, j, i) /= npoints;
pack.flux(b, edge, var(), k, j, i) *= fac;
parthenon::par_for_outer(
PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1,
kb.s, kb.e,
KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) {
parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e});
if (pack.GetLowerBound(b, var()) <= pack.GetUpperBound(b, var())) {
parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) {
const auto [j, i] = idxer(idx);
pack.flux(b, edge, var(), k, j, i) = 0.0;
for (int dk = -koff; dk < 1; ++dk)
for (int dj = -joff; dj < 1; ++dj)
for (int di = -ioff; di < 1; ++di) {
// TODO(LFR): Pick out the correct component of the reconstructed flux,
// current version is not an issue for piecewise constant flux though.
pack.flux(b, edge, var(), k, j, i) +=
pack(b, TE::CC, recon(0), k + dk, j + dj, i + di);
}
pack.flux(b, edge, var(), k, j, i) /= npoints;
pack.flux(b, edge, var(), k, j, i) *= fac;
});
}
});

// 3. Reconstruct the transverse components of the advected field at the edge
Expand All @@ -174,13 +190,20 @@ TaskStatus CalculateVectorFluxes(parthenon::TopologicalElement edge,
ib = md->GetBoundsI(cl, IndexDomain::interior, f);
jb = md->GetBoundsJ(cl, IndexDomain::interior, f);
kb = md->GetBoundsK(cl, IndexDomain::interior, f);
parthenon::par_for(
PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s - (ndim > 2),
kb.e + (ndim > 2), jb.s - (ndim > 1), jb.e + (ndim > 1), ib.s - 1, ib.e + 1,
KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
// Piecewise linear in the orthogonal directions, could do something better here
pack(b, f, recon_f(0), k, j, i) = pack(b, f, var(), k, j, i);
pack(b, f, recon_f(1), k, j, i) = pack(b, f, var(), k, j, i);
parthenon::par_for_outer(
PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1,
kb.s - (ndim > 2), kb.e + (ndim > 2),
KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) {
parthenon::Indexer2D idxer({jb.s - (ndim > 1), jb.e + (ndim > 1)},
{ib.s - (ndim > 0), ib.e + (ndim > 0)});
if (pack.GetLowerBound(b, var()) <= pack.GetUpperBound(b, var())) {
parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) {
const auto [j, i] = idxer(idx);
// Piecewise linear in the orthogonal directions, could do something better here
pack(b, f, recon_f(0), k, j, i) = pack(b, f, var(), k, j, i);
pack(b, f, recon_f(1), k, j, i) = pack(b, f, var(), k, j, i);
});
}
});
}

Expand All @@ -196,15 +219,22 @@ TaskStatus CalculateVectorFluxes(parthenon::TopologicalElement edge,
d1[static_cast<int>(f1) % 3] = 0;
d2[static_cast<int>(edge) % 3] = 0;
d2[static_cast<int>(f2) % 3] = 0;
parthenon::par_for(
PARTHENON_AUTO_LABEL, 0, pack.GetNBlocks() - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
pack.flux(b, edge, var(), k, j, i) +=
0.5 * (pack(b, f1, recon_f(0), k, j, i) -
pack(b, f1, recon_f(1), k - d1[2], j - d1[1], i - d1[0]));
pack.flux(b, edge, var(), k, j, i) -=
0.5 * (pack(b, f2, recon_f(0), k, j, i) -
pack(b, f2, recon_f(1), k - d2[2], j - d2[1], i - d2[0]));
parthenon::par_for_outer(
PARTHENON_AUTO_LABEL, scratch_size, scratch_level, 0, pack.GetNBlocks() - 1,
kb.s, kb.e,
KOKKOS_LAMBDA(parthenon::team_mbr_t member, const int b, const int k) {
parthenon::Indexer2D idxer({jb.s, jb.e}, {ib.s, ib.e});
if (pack.GetLowerBound(b, var()) <= pack.GetUpperBound(b, var())) {
parthenon::par_for_inner(member, 0, idxer.size() - 1, [&](const int idx) {
const auto [j, i] = idxer(idx);
pack.flux(b, edge, var(), k, j, i) +=
0.5 * (pack(b, f1, recon_f(0), k, j, i) -
pack(b, f1, recon_f(1), k - d1[2], j - d1[1], i - d1[0]));
pack.flux(b, edge, var(), k, j, i) -=
0.5 * (pack(b, f2, recon_f(0), k, j, i) -
pack(b, f2, recon_f(1), k - d2[2], j - d2[1], i - d2[0]));
});
}
});

// Add in the diffusive component
Expand Down

0 comments on commit 005292e

Please sign in to comment.