diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index 7b422f98c..2a7b75583 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -9,19 +9,11 @@ * SPDX-License-Identifier: BSD-3-Clause * ****************************************************************************/ -// Example taken from DataTransferKit -// (https://github.com/ORNL-CEES/DataTransferKit) -// with MLS resolution from -// (http://dx.doi.org/10.1016/j.jcp.2015.11.055) -// and -// (A conservative mesh-free approach for fluid-structure interface problems) - #include #include #include -#include #include #include @@ -33,137 +25,160 @@ using ExecutionSpace = Kokkos::DefaultExecutionSpace; using MemorySpace = ExecutionSpace::memory_space; +using HostExecutionSpace = Kokkos::DefaultHostExecutionSpace; +using HostMemorySpace = HostExecutionSpace::memory_space; + // Function to approximate -KOKKOS_INLINE_FUNCTION float manufactured_solution(ArborX::Point const &p) +struct Step { - return Kokkos::cos(5 * p[2]) * p[0] + p[1] + 1; -} + KOKKOS_INLINE_FUNCTION static float eval(ArborX::Point const &p) + { + return !Kokkos::signbit(p[0]) * 1.f; + } -int main(int argc, char *argv[]) -{ - MPI_Init(&argc, &argv); - Kokkos::ScopeGuard guard(argc, argv); + template + static Kokkos::View + map(ExecutionSpace const &space, + Kokkos::View const &ps) + { + Kokkos::View evals("Example::evals", ps.extent(0)); + Kokkos::parallel_for( + "Example::evaluation", + Kokkos::RangePolicy(space, 0, ps.extent(0)), + KOKKOS_LAMBDA(int const i) { evals(i) = eval(ps(i)); }); + return evals; + } +}; - ExecutionSpace space{}; - MPI_Comm mpi_comm = MPI_COMM_WORLD; +Kokkos::Array, 2> +createPointClouds(HostExecutionSpace const &hspace, ExecutionSpace const &space, + MPI_Comm comm, std::size_t points_num) +{ int mpi_size, mpi_rank; - MPI_Comm_size(mpi_comm, &mpi_size); - MPI_Comm_rank(mpi_comm, &mpi_rank); + MPI_Comm_size(comm, &mpi_size); + MPI_Comm_rank(comm, &mpi_rank); + + Kokkos::Array, 2> + point_clouds_host{Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::points_cloud_0"), + points_num), + Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::points_cloud_1"), + points_num)}; - static constexpr std::size_t total_source_points = 1024 * 512; - std::size_t local_source_points_num = total_source_points / mpi_size; - static constexpr std::size_t total_target_points = 1024; - std::size_t local_target_points_num = total_target_points / mpi_size; - static constexpr double cube_side = 5; - - Kokkos::View source_points( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::source_points"), - local_source_points_num); - auto source_points_host = Kokkos::create_mirror_view(source_points); - Kokkos::View target_points( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::target_points"), - local_source_points_num); - auto target_points_host = Kokkos::create_mirror_view(target_points); - - // source and target points are within a 5x5x5 cube if (mpi_rank == 0) { - Kokkos::View all_source_points( - Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::all_source_points"), - total_source_points); - filledBoxCloud(cube_side / 2, all_source_points); - MPI_Scatter( - all_source_points.data(), local_source_points_num * 3 * sizeof(float), - MPI_BYTE, source_points_host.data(), - local_source_points_num * 3 * sizeof(float), MPI_BYTE, 0, mpi_comm); - - Kokkos::View all_target_points( - Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::all_target_points"), - total_target_points); - filledBoxCloud(cube_side / 2, all_target_points); - MPI_Scatter( - all_target_points.data(), local_target_points_num * 3 * sizeof(float), - MPI_BYTE, target_points_host.data(), - local_target_points_num * 3 * sizeof(float), MPI_BYTE, 0, mpi_comm); + Kokkos::Array, 2> + all_point_clouds{Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::all_points_cloud_0"), + points_num * mpi_size), + Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::all_points_cloud_1"), + points_num * mpi_size)}; + + filledBoxCloud(.5, all_point_clouds[0]); + filledBoxCloud(.5, all_point_clouds[1]); + + MPI_Scatter(all_point_clouds[0].data(), points_num * 3 * sizeof(float), + MPI_BYTE, point_clouds_host[0].data(), + points_num * 3 * sizeof(float), MPI_BYTE, 0, comm); + MPI_Scatter(all_point_clouds[1].data(), points_num * 3 * sizeof(float), + MPI_BYTE, point_clouds_host[1].data(), + points_num * 3 * sizeof(float), MPI_BYTE, 0, comm); } else { - MPI_Scatter(nullptr, local_source_points_num * 3 * sizeof(float), MPI_BYTE, - source_points_host.data(), - local_source_points_num * 3 * sizeof(float), MPI_BYTE, 0, - mpi_comm); - - MPI_Scatter(nullptr, local_target_points_num * 3 * sizeof(float), MPI_BYTE, - target_points_host.data(), - local_target_points_num * 3 * sizeof(float), MPI_BYTE, 0, - mpi_comm); + MPI_Scatter(nullptr, 0, MPI_BYTE, point_clouds_host[0].data(), + points_num * 3 * sizeof(float), MPI_BYTE, 0, comm); + MPI_Scatter(nullptr, 0, MPI_BYTE, point_clouds_host[1].data(), + points_num * 3 * sizeof(float), MPI_BYTE, 0, comm); } - Kokkos::deep_copy(space, source_points, source_points_host); - Kokkos::deep_copy(space, target_points, target_points_host); - - // Create the transform from a point cloud to another - MovingLeastSquares mls(mpi_comm, space, source_points, - target_points, Details::degree<2>, - Details::wendland<0>); - - // Compute source values - Kokkos::View source_values("Example::source_values", - local_source_points_num); Kokkos::parallel_for( - "Example::source_evaluation", - Kokkos::RangePolicy(space, 0, local_source_points_num), + "Example::flatten_points", + Kokkos::RangePolicy(hspace, 0, points_num), KOKKOS_LAMBDA(int const i) { - source_values(i) = manufactured_solution(source_points(i)); + point_clouds_host[0](i)[2] = 0; + point_clouds_host[1](i)[2] = 0; }); - // Compute target values from source ones - auto target_values = mls.apply(space, source_values); - auto target_values_host = Kokkos::create_mirror_view(target_values); - Kokkos::deep_copy(space, target_values_host, target_values); + Kokkos::Array, 2> point_clouds{ + Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::points_cloud_0"), + points_num), + Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::points_cloud_1"), + points_num)}; + Kokkos::deep_copy(space, point_clouds[0], point_clouds_host[0]); + Kokkos::deep_copy(space, point_clouds[1], point_clouds_host[1]); - // Compute target values via evaluation - Kokkos::View target_values_exact( - "Example::target_values_exact", local_target_points_num); - Kokkos::parallel_for( - "Example::target_evaluation", - Kokkos::RangePolicy(space, 0, local_target_points_num), - KOKKOS_LAMBDA(int const i) { - target_values_exact(i) = manufactured_solution(target_points(i)); - }); + return point_clouds; +} + +template +Kokkos::Array, 2> createMLSObjects( + MPI_Comm comm, ExecutionSpace const &space, + Kokkos::View const &point_clouds_0, + Kokkos::View const &point_clouds_1, + Deg const °, RBF const &rbf) +{ + return {MovingLeastSquares(comm, space, point_clouds_0, + point_clouds_1, deg, rbf), + MovingLeastSquares(comm, space, point_clouds_1, + point_clouds_0, deg, rbf)}; +} + +void doError(MPI_Comm comm, ExecutionSpace const &space, + Kokkos::View const &points, + Kokkos::View const &approx, + Kokkos::View const &values) +{ + int mpi_size, mpi_rank; + MPI_Comm_size(comm, &mpi_size); + MPI_Comm_rank(comm, &mpi_rank); // Compute local error - static constexpr float epsilon = std::numeric_limits::epsilon(); using ErrType = typename Kokkos::MaxLoc::value_type; ErrType error{0, 0}; + float error_sum = 0; Kokkos::parallel_reduce( "Example::error_computation", - Kokkos::RangePolicy(space, 0, local_target_points_num), - KOKKOS_LAMBDA(int const i, ErrType &loc_error) { - float abs_error = - Kokkos::abs(target_values(i) - target_values_exact(i)); - float abs_value = Kokkos::abs(target_values_exact(i)) + - epsilon; - - if (loc_error.val < abs_error / abs_value) + Kokkos::RangePolicy(space, 0, approx.extent(0)), + KOKKOS_LAMBDA(int const i, ErrType &loc_error, float &loc_error_sum) { + float abs_error = Kokkos::abs(approx(i) - values(i)); + + loc_error_sum += abs_error; + if (loc_error.val < abs_error) { - loc_error.val = abs_error / abs_value; + loc_error.val = abs_error; loc_error.loc = i; } }, - Kokkos::MaxLoc(error)); + Kokkos::MaxLoc(error), Kokkos::Sum(error_sum)); + + auto approx_host = Kokkos::create_mirror_view(approx); + auto points_host = Kokkos::create_mirror_view(points); + Kokkos::deep_copy(space, approx_host, approx); + Kokkos::deep_copy(space, points_host, points); std::tuple error_obj{ - error.val, target_points_host(error.loc), target_values_host(error.loc)}; + error.val, points_host(error.loc), approx_host(error.loc)}; + // Compute global error if (mpi_rank == 0) { + float error_sum_global; std::vector all_error_obj(mpi_size); MPI_Gather(&error_obj, sizeof(decltype(error_obj)), MPI_BYTE, all_error_obj.data(), sizeof(decltype(error_obj)), MPI_BYTE, 0, - mpi_comm); + comm); + MPI_Reduce(&error_sum, &error_sum_global, 1, MPI_FLOAT, MPI_SUM, 0, comm); for (int i = 0; i < mpi_size; i++) if (std::get<0>(error_obj) < std::get<0>(all_error_obj[i])) @@ -171,15 +186,76 @@ int main(int argc, char *argv[]) float error = std::get<0>(error_obj), approx = std::get<2>(error_obj); auto point = std::get<1>(error_obj); - std::cout << "Maximum error: " << error << " at point " << point[0] << ", " - << point[1] << ", " << point[2] - << "\nTrue value: " << manufactured_solution(point) - << "\nComputed: " << approx << std::endl; + std::cout << "Mean error: " + << error_sum_global / (points.extent(0) * mpi_size) + << "\nMaximum error: " << error << " at point " << point[0] + << ", " << point[1] << "\n True value: " << Step::eval(point) + << "\n Computed: " << approx << std::endl; } else { MPI_Gather(&error_obj, sizeof(decltype(error_obj)), MPI_BYTE, nullptr, - sizeof(decltype(error_obj)), MPI_BYTE, 0, mpi_comm); + sizeof(decltype(error_obj)), MPI_BYTE, 0, comm); + MPI_Reduce(&error_sum, nullptr, 1, MPI_FLOAT, MPI_SUM, 0, comm); + } +} + +Kokkos::View +doOne(MPI_Comm comm, ExecutionSpace const &space, + Kokkos::View const &tgt, + Kokkos::View const &values, + Kokkos::View const &true_values, + MovingLeastSquares &mls) +{ + auto tgt_values = mls.apply(space, values); + doError(comm, space, tgt, tgt_values, true_values); + return tgt_values; +} + +int main(int argc, char *argv[]) +{ + static constexpr std::size_t total_points = 1024 * 128; + static constexpr std::size_t num_back_forth = 50; + static constexpr auto deg = Details::degree<4>; + static constexpr auto rbf = Details::wu<2>; + + MPI_Init(&argc, &argv); + Kokkos::ScopeGuard guard(argc, argv); + + ExecutionSpace space{}; + HostExecutionSpace host_space{}; + MPI_Comm mpi_comm = MPI_COMM_WORLD; + int mpi_size, mpi_rank; + MPI_Comm_size(mpi_comm, &mpi_size); + MPI_Comm_rank(mpi_comm, &mpi_rank); + + auto point_clouds = + createPointClouds(host_space, space, mpi_comm, total_points / mpi_size); + + // Create the transform from a point cloud to another + auto mlss = createMLSObjects(mpi_comm, space, point_clouds[0], + point_clouds[1], deg, rbf); + + Kokkos::Array, 2> true_values{ + Step::map(space, point_clouds[0]), Step::map(space, point_clouds[1])}; + + Kokkos::View source_values = true_values[0]; + for (int i = 0; i < num_back_forth * 2; i++) + { + if (mpi_rank == 0) + std::cout << "=== TURN " << i + 1 << std::endl; + + Kokkos::View target = + point_clouds[1 - (i % 2)]; + Kokkos::View tgt_true_values = + true_values[1 - (i % 2)]; + MovingLeastSquares &mls = mlss[i % 2]; + + source_values = + doOne(mpi_comm, space, target, source_values, tgt_true_values, mls); + + if (mpi_rank == 0) + std::cout << "===\n" << std::endl; } MPI_Finalize();