Skip to content

Commit

Permalink
Enable parthenon::par_reduce for MD loops with Kokkos 1D Range (#1130)
Browse files Browse the repository at this point in the history
* wrap 3D flat loop abstractions

* add 4D loop and test

* add specialization for const int &

* added mdrange loops to par_reduce tests

* refactor flatloop specialization

* clean up

* formatting

* linting

* templating functor index types

* moved to a single functor

* Update CHANGELOG.md

* remove unused Nj

---------

Co-authored-by: Cloud User <ec2-user@ip-10-0-2-191.us-east-2.compute.internal>
Co-authored-by: Luke Roberts <lfroberts@lanl.gov>
  • Loading branch information
3 people authored Jul 4, 2024
1 parent 2e0e981 commit 801766e
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 30 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Current develop

### Added (new features/APIs/variables/...)
- [[PR 1130]](https://github.com/parthenon-hpc-lab/parthenon/pull/1130) Enable `parthenon::par_reduce` for MD loops with Kokkos 1D Range
- [[PR 1119]](https://github.com/parthenon-hpc-lab/parthenon/pull/1119) Formalize MeshData partitioning.
- [[PR 1128]](https://github.com/parthenon-hpc-lab/parthenon/pull/1128) Add cycle and nbtotal to hst
- [[PR 1099]](https://github.com/parthenon-hpc-lab/parthenon/pull/1099) Functionality for outputting task graphs in GraphViz format.
Expand Down
85 changes: 59 additions & 26 deletions src/kokkos_abstraction.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,35 @@ par_dispatch(LoopPatternMDRange, const std::string &name, DevExecSpace exec_spac
function, std::forward<Args>(args)...);
}

template <typename, typename>
class FlatFunctor;

template <typename F, typename... Args>
auto MakeFlatFunctor(F &function, Args... args) {
return FlatFunctor<F, decltype(&F::operator())>(function, std::forward<Args>(args)...);
}

template <typename Function, typename R, typename T, typename Index, typename... FArgs>
class FlatFunctor<Function, R (T::*)(Index, Index, Index, FArgs...) const> {
int NjNi, Ni, kl, jl, il;
Function function;

public:
FlatFunctor(const Function _function, const int _NjNi, const int _Ni, const int _kl,
const int _jl, const int _il)
: function(_function), NjNi(_NjNi), Ni(_Ni), kl(_kl), jl(_jl), il(_il) {}
KOKKOS_INLINE_FUNCTION
void operator()(const int &idx, FArgs &&...fargs) const {
int k = idx / NjNi;
int j = (idx - k * NjNi) / Ni;
int i = idx - k * NjNi - j * Ni;
k += kl;
j += jl;
i += il;
function(k, j, i, std::forward<FArgs>(fargs)...);
}
};

// 3D loop using Kokkos 1D Range
template <typename Tag, typename Function, class... Args>
inline typename std::enable_if<sizeof...(Args) <= 1, void>::type
Expand All @@ -270,18 +299,9 @@ par_dispatch(LoopPatternFlatRange, const std::string &name, DevExecSpace exec_sp
const int Ni = iu - il + 1;
const int NkNjNi = Nk * Nj * Ni;
const int NjNi = Nj * Ni;
kokkos_dispatch(
tag, name, Kokkos::RangePolicy<>(exec_space, 0, NkNjNi),
KOKKOS_LAMBDA(const int &idx) {
int k = idx / NjNi;
int j = (idx - k * NjNi) / Ni;
int i = idx - k * NjNi - j * Ni;
k += kl;
j += jl;
i += il;
function(k, j, i);
},
std::forward<Args>(args)...);
kokkos_dispatch(tag, name, Kokkos::RangePolicy<>(exec_space, 0, NkNjNi),
MakeFlatFunctor(function, NjNi, Ni, kl, jl, il),
std::forward<Args>(args)...);
}

// 3D loop using MDRange loops
Expand Down Expand Up @@ -372,6 +392,30 @@ inline void par_dispatch(LoopPatternSimdFor, const std::string &name,
function(k, j, i);
}

template <typename Function, typename R, typename T, typename Index, typename... FArgs>
class FlatFunctor<Function, R (T::*)(Index, Index, Index, Index, FArgs...) const> {
int NkNjNi, NjNi, Ni, nl, kl, jl, il;
Function function;

public:
FlatFunctor(const Function _function, const int _NkNjNi, const int _NjNi, const int _Ni,
const int _nl, const int _kl, const int _jl, const int _il)
: function(_function), NkNjNi(_NkNjNi), NjNi(_NjNi), Ni(_Ni), nl(_nl), kl(_kl),
jl(_jl), il(_il) {}
KOKKOS_INLINE_FUNCTION
void operator()(const int &idx, FArgs &&...fargs) const {
int n = idx / NkNjNi;
int k = (idx - n * NkNjNi) / NjNi;
int j = (idx - n * NkNjNi - k * NjNi) / Ni;
int i = idx - n * NkNjNi - k * NjNi - j * Ni;
n += nl;
k += kl;
j += jl;
i += il;
function(n, k, j, i, std::forward<FArgs>(fargs)...);
}
};

// 4D loop using Kokkos 1D Range
template <typename Tag, typename Function, class... Args>
inline typename std::enable_if<sizeof...(Args) <= 1, void>::type
Expand All @@ -387,20 +431,9 @@ par_dispatch(LoopPatternFlatRange, const std::string &name, DevExecSpace exec_sp
const int NnNkNjNi = Nn * Nk * Nj * Ni;
const int NkNjNi = Nk * Nj * Ni;
const int NjNi = Nj * Ni;
kokkos_dispatch(
tag, name, Kokkos::RangePolicy<>(exec_space, 0, NnNkNjNi),
KOKKOS_LAMBDA(const int &idx) {
int n = idx / NkNjNi;
int k = (idx - n * NkNjNi) / NjNi;
int j = (idx - n * NkNjNi - k * NjNi) / Ni;
int i = idx - n * NkNjNi - k * NjNi - j * Ni;
n += nl;
k += kl;
j += jl;
i += il;
function(n, k, j, i);
},
std::forward<Args>(args)...);
kokkos_dispatch(tag, name, Kokkos::RangePolicy<>(exec_space, 0, NnNkNjNi),
MakeFlatFunctor(function, NkNjNi, NjNi, Ni, nl, kl, jl, il),
std::forward<Args>(args)...);
}

// 4D loop using MDRange loops
Expand Down
81 changes: 77 additions & 4 deletions tst/unit/kokkos_abstraction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -500,12 +500,85 @@ bool test_wrapper_reduce_1d(T loop_pattern, DevExecSpace exec_space) {
return total == test_tot;
}

template <class T>
bool test_wrapper_reduce_3d(T loop_pattern, DevExecSpace exec_space) {
constexpr int N = 10;
parthenon::ParArray3D<int> buffer("Testing buffer", N, N, N);
// Initialize data
parthenon::par_for(
loop_pattern, "Initialize parallel reduce array", exec_space, 0, N - 1, 0, N - 1, 0,
N - 1, KOKKOS_LAMBDA(const int k, const int j, const int i) {
buffer(k, j, i) = i + j + k;
});
int tot = 0;
for (int k = 0; k < N; ++k) {
for (int j = 0; j < N; ++j) {
for (int i = 0; i < N; ++i) {
tot += i + j + k;
}
}
}
int test_tot = 0;
parthenon::par_reduce(
loop_pattern, "Sum via par reduce", exec_space, 0, N - 1, 0, N - 1, 0, N - 1,
KOKKOS_LAMBDA(const int k, const int j, const int i, int &t) { t += i + j + k; },
Kokkos::Sum<int>(test_tot));
return tot == test_tot;
}

template <class T>
bool test_wrapper_reduce_4d(T loop_pattern, DevExecSpace exec_space) {
constexpr int N = 10;
parthenon::ParArray4D<int> buffer("Testing buffer", N, N, N, N);
// Initialize data
parthenon::par_for(
loop_pattern, "Initialize parallel reduce array", exec_space, 0, N - 1, 0, N - 1, 0,
N - 1, 0, N - 1, KOKKOS_LAMBDA(const int n, const int k, const int j, const int i) {
buffer(n, k, j, i) = i + j + k + n;
});
int tot = 0;
for (int n = 0; n < N; ++n) {
for (int k = 0; k < N; ++k) {
for (int j = 0; j < N; ++j) {
for (int i = 0; i < N; ++i) {
tot += i + j + k + n;
}
}
}
}
int test_tot = 0;
parthenon::par_reduce(
loop_pattern, "Sum via par reduce", exec_space, 0, N - 1, 0, N - 1, 0, N - 1, 0,
N - 1,
KOKKOS_LAMBDA(const int n, const int k, const int j, const int i, int &t) {
t += i + j + k + n;
},
Kokkos::Sum<int>(test_tot));
return tot == test_tot;
}

TEST_CASE("Parallel reduce", "[par_reduce]") {
auto default_exec_space = DevExecSpace();
REQUIRE(test_wrapper_reduce_1d(parthenon::loop_pattern_flatrange_tag,
default_exec_space) == true);
if constexpr (std::is_same<DevExecSpace, Kokkos::Serial>::value) {
REQUIRE(test_wrapper_reduce_1d(parthenon::loop_pattern_simdfor_tag,
SECTION("1D loops") {
REQUIRE(test_wrapper_reduce_1d(parthenon::loop_pattern_flatrange_tag,
default_exec_space) == true);
if constexpr (std::is_same<DevExecSpace, Kokkos::Serial>::value) {
REQUIRE(test_wrapper_reduce_1d(parthenon::loop_pattern_simdfor_tag,
default_exec_space) == true);
}
}

SECTION("3D loops") {
REQUIRE(test_wrapper_reduce_3d(parthenon::loop_pattern_flatrange_tag,
default_exec_space) == true);
REQUIRE(test_wrapper_reduce_3d(parthenon::loop_pattern_mdrange_tag,
default_exec_space) == true);
}

SECTION("4D loops") {
REQUIRE(test_wrapper_reduce_4d(parthenon::loop_pattern_flatrange_tag,
default_exec_space) == true);
REQUIRE(test_wrapper_reduce_4d(parthenon::loop_pattern_mdrange_tag,
default_exec_space) == true);
}
}

0 comments on commit 801766e

Please sign in to comment.