From 12105e86effa76970776821e1e1689e643269bcc Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Fri, 28 Jul 2023 13:38:57 -0400 Subject: [PATCH 01/44] New example program --- .gitignore | 1 + examples/CMakeLists.txt | 1 + examples/moving_least_squares/CMakeLists.txt | 3 +++ .../moving_least_squares/moving_least_squares.cpp | 14 ++++++++++++++ 4 files changed, 19 insertions(+) create mode 100644 examples/moving_least_squares/CMakeLists.txt create mode 100644 examples/moving_least_squares/moving_least_squares.cpp diff --git a/.gitignore b/.gitignore index 48439bce0..488cc1fd5 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ *.swp .#* /build* +.vscode \ No newline at end of file diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 389a6bcdf..6d486bc72 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -10,6 +10,7 @@ add_subdirectory(callback) add_subdirectory(dbscan) add_subdirectory(molecular_dynamics) add_subdirectory(simple_intersection) +add_subdirectory(moving_least_squares) find_package(Boost COMPONENTS program_options) if(Boost_FOUND) diff --git a/examples/moving_least_squares/CMakeLists.txt b/examples/moving_least_squares/CMakeLists.txt new file mode 100644 index 000000000..d9d9c6e45 --- /dev/null +++ b/examples/moving_least_squares/CMakeLists.txt @@ -0,0 +1,3 @@ +add_executable(ArborX_Example_MovingLeastSquare.exe moving_least_squares.cpp) +target_link_libraries(ArborX_Example_MovingLeastSquare.exe ArborX::ArborX) +add_test(NAME ArborX_Example_MovingLeastSquare COMMAND ArborX_Example_MovingLeastSquare.exe) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp new file mode 100644 index 000000000..fb4fc5368 --- /dev/null +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -0,0 +1,14 @@ +/**************************************************************************** + * Copyright (c) 2017-2023 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include + +int main(int argc, char *argv[]) { return 0; } From 9289fc65f2442db21261fa141d01b3fd7f9bdb17 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Fri, 28 Jul 2023 16:29:47 -0400 Subject: [PATCH 02/44] Source and target point creation --- .../moving_least_squares.cpp | 105 +++++++++++++++++- 1 file changed, 104 insertions(+), 1 deletion(-) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index fb4fc5368..9fc54a098 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -9,6 +9,109 @@ * SPDX-License-Identifier: BSD-3-Clause * ****************************************************************************/ +// Example taken from DataTransferKit +// (https://github.com/ORNL-CEES/DataTransferKit) + #include +#include +#include + +using ExecutionSpace = Kokkos::DefaultExecutionSpace; +using MemorySpace = ExecutionSpace::memory_space; + +std::ostream &operator<<(std::ostream &os, ArborX::Point const &p) +{ + os << '(' << p[0] << ',' << p[1] << ',' << p[2] << ')'; + return os; +} + +class RBFWendland_0 +{ +public: + RBFWendland_0(double radius) + : _radius(radius) + {} + + KOKKOS_INLINE_FUNCTION double operator()(double x) + { + x /= _radius; + return (1. - x) * (1. - x); + } + +private: + double _radius; +}; + +struct MVPolynomialBasis_Quad_3D +{ + static constexpr std::size_t size = 10; + + template + KOKKOS_INLINE_FUNCTION Kokkos::Array + operator()(Double3D const &p) const + { + return {{1., p[0], p[1], p[2], p[0] * p[0], p[0] * p[1], p[0] * p[2], + p[1] * p[1], p[1] * p[2], p[2] * p[2]}}; + } +}; + +// Func to evaluate +template +KOKKOS_INLINE_FUNCTION double func(Double3D const &p) { + return Kokkos::sin(p[0]) * Kokkos::cos(p[1]) + p[2]; +} + +int main(int argc, char *argv[]) +{ + Kokkos::ScopeGuard guard(argc, argv); + + constexpr float cube_half_side = 10.; // [-10, 10]^3 cube + constexpr float cube_side = 2 * cube_half_side; + constexpr std::size_t source_points_side = 100; // [-10, 10]^3 grid + constexpr std::size_t target_points_num = 10'000; // random [-10, 10]^3 + + constexpr std::size_t source_points_num = + source_points_side * source_points_side * source_points_side; + + auto source_points = Kokkos::View( + "source_points", source_points_num); + auto target_points = Kokkos::View( + "target_points", target_points_num); + + // Generate source points + Kokkos::parallel_for( + "source_fill", + Kokkos::MDRangePolicy>( + {0, 0, 0}, + {source_points_side, source_points_side, source_points_side}), + KOKKOS_LAMBDA (int const i, int const j, int const k) { + source_points( + i * source_points_side * source_points_side + + j * source_points_side + + k + ) = ArborX::Point { + (static_cast(i) / (source_points_side - 1) - .5f) * cube_side, + (static_cast(j) / (source_points_side - 1) - .5f) * cube_side, + (static_cast(k) / (source_points_side - 1) - .5f) * cube_side + }; + }); + + // Generate target points + auto random_pool = + Kokkos::Random_XorShift64_Pool(time(nullptr)); + Kokkos::parallel_for( + "target_fill", + Kokkos::RangePolicy(0, target_points_num), + KOKKOS_LAMBDA (const int i) { + auto gen = random_pool.get_state(); + target_points(i) = ArborX::Point { + gen.frand(0., 1.), + gen.frand(0., 1.), + gen.frand(0., 1.), + }; + }); -int main(int argc, char *argv[]) { return 0; } + // Arrange source points as tree + auto source_tree = + ArborX::BVH(ExecutionSpace{}, source_points); +} From 8c97beb345e7bfbeaa6ddc16b5948371451f6723 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Mon, 31 Jul 2023 16:30:45 -0400 Subject: [PATCH 03/44] Completion of MLS (not tested) --- .../moving_least_squares.cpp | 216 +++++++++++++++++- 1 file changed, 207 insertions(+), 9 deletions(-) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index 9fc54a098..63da4f979 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -11,11 +11,16 @@ // Example taken from DataTransferKit // (https://github.com/ORNL-CEES/DataTransferKit) +// with MLS resolution from +// (http://dx.doi.org/10.1016/j.jcp.2015.11.055) #include #include #include +#include +#include + using ExecutionSpace = Kokkos::DefaultExecutionSpace; using MemorySpace = ExecutionSpace::memory_space; @@ -25,20 +30,14 @@ std::ostream &operator<<(std::ostream &os, ArborX::Point const &p) return os; } -class RBFWendland_0 +struct RBFWendland_0 { -public: - RBFWendland_0(double radius) - : _radius(radius) - {} - KOKKOS_INLINE_FUNCTION double operator()(double x) { x /= _radius; return (1. - x) * (1. - x); } -private: double _radius; }; @@ -65,10 +64,11 @@ int main(int argc, char *argv[]) { Kokkos::ScopeGuard guard(argc, argv); - constexpr float cube_half_side = 10.; // [-10, 10]^3 cube + constexpr float cube_half_side = 10.; // [-10, 10]^3 cube constexpr float cube_side = 2 * cube_half_side; constexpr std::size_t source_points_side = 100; // [-10, 10]^3 grid - constexpr std::size_t target_points_num = 10'000; // random [-10, 10]^3 + constexpr std::size_t target_points_num = 10'000; // random [-10, 10]^3 + constexpr std::size_t num_neighbors = MVPolynomialBasis_Quad_3D::size; // ??? constexpr std::size_t source_points_num = source_points_side * source_points_side * source_points_side; @@ -114,4 +114,202 @@ int main(int argc, char *argv[]) // Arrange source points as tree auto source_tree = ArborX::BVH(ExecutionSpace{}, source_points); + + // Create the queries + // For each target point we query the closest source points + auto queries = Kokkos::View*, MemorySpace>( + "queries", target_points_num); + Kokkos::parallel_for( + "make_queries", + Kokkos::RangePolicy(0, target_points_num), + KOKKOS_LAMBDA (const int i) { + queries(i) = ArborX::nearest(target_points(i), num_neighbors); + }); + + // Perform the query + auto indices = Kokkos::View("indices", 0); + auto offsets = Kokkos::View("offsets", 0); + source_tree.query(ExecutionSpace{}, queries, indices, offsets); + + // Now that we have the neighbors, we recompute their position using + // their target point as the origin. + // This is used as an optimisation later in the algorithm + auto tr_source_points = Kokkos::View( + "tr_source_points", target_points_num, num_neighbors); + Kokkos::parallel_for( + "transform_source_points", + Kokkos::RangePolicy(0, target_points_num), + KOKKOS_LAMBDA (const int i) { + for (int j = offsets(i); j < offsets(i+1); j++) { + tr_source_points(i, j - offsets(i)) = ArborX::Point { + source_points(j)[0] - target_points(i)[0], + source_points(j)[1] - target_points(i)[1], + source_points(j)[2] - target_points(i)[2], + }; + } + }); + + // Compute the radii for the weight (phi) vector + auto radii = Kokkos::View( + "radii", target_points_num); + constexpr double epsilon = std::numeric_limits::epsilon(); + Kokkos::parallel_for( + "radii_computation", + Kokkos::RangePolicy(0, target_points_num), + KOKKOS_LAMBDA (const int i) { + double radius = 10. * epsilon; + + for (int j = 0; j < num_neighbors; j++) { + double norm = ArborX::Details::distance( + tr_source_points(i, j), + ArborX::Point{0., 0., 0.}); + radius = (radius < norm) ? norm : radius; + } + + radii(i) = 1.1 * radius; + }); + + // Compute the weight (phi) vector + auto phi = Kokkos::View( + "phi", target_points_num, num_neighbors); + Kokkos::parallel_for( + "phi_computation", + Kokkos::RangePolicy(0, phi.extent(0)), + KOKKOS_LAMBDA (const int i) { + auto rbf = RBFWendland_0 { radii(i) }; + + for (int j = 0; j < phi.extent(1); j++) { + double norm = ArborX::Details::distance( + tr_source_points(i, j), + ArborX::Point{0., 0., 0.}); + phi(i, j) = rbf(norm); + } + }); + + // Compute multivariable Vandermonde (P) matrix + auto p = Kokkos::View( + "vandermonde", + target_points_num, + num_neighbors, + MVPolynomialBasis_Quad_3D::size + ); + Kokkos::parallel_for( + "vandermonde_computation", + Kokkos::MDRangePolicy>( + {0, 0}, {target_points_num, num_neighbors}), + KOKKOS_LAMBDA (const int i, const int j) { + auto basis = MVPolynomialBasis_Quad_3D{}(tr_source_points(i, j)); + + for (int k = 0; k < MVPolynomialBasis_Quad_3D::size; k++) { + p(i, j, k) = basis[k]; + } + }); + + // Compute moment (A) matrix + auto a = Kokkos::View( + "A", + target_points_num, + MVPolynomialBasis_Quad_3D::size, + MVPolynomialBasis_Quad_3D::size + ); + Kokkos::parallel_for( + "A_computation", + Kokkos::MDRangePolicy>( + {0, 0, 0}, + { + target_points_num, + MVPolynomialBasis_Quad_3D::size, + MVPolynomialBasis_Quad_3D::size + }), + KOKKOS_LAMBDA (const int i, const int j, const int k) { + double tmp = 0; + for (int l = 0; l < num_neighbors; l++) { + tmp += p(i, l, j) * p(i, l, k) * phi(i, l); + } + + a(i, j, k) = tmp; + }); + + // Inverse moment matrix + // Gaussian inverse method. Both matrix are used and modifications on the + // first one are applied to the second + // Kind of works, errors out quite often. + // A better method should be employed (SVD?) + auto a_inv = Kokkos::View( + "A_inv", + target_points_num, + MVPolynomialBasis_Quad_3D::size, + MVPolynomialBasis_Quad_3D::size + ); + Kokkos::parallel_for( + "A_inv_computation", + Kokkos::RangePolicy(0, target_points_num), + KOKKOS_LAMBDA (const int i) { + for (int j = 0; j < MVPolynomialBasis_Quad_3D::size; j++) { + for (int k = 0; k < MVPolynomialBasis_Quad_3D::size; k++) { + a_inv(i, j, k) = (j == k) * 1.; + } + } + + // This needs to be done for every column + for (int j = 0; j < MVPolynomialBasis_Quad_3D::size; j++) { + + // We find the line with a non-negative element on column j + int k = j; + for (; k < MVPolynomialBasis_Quad_3D::size; k++) { + if (a(i, k, j) != 0.0) break; + } + + // We divide the line with said value + double tmp = a(i, k, j); + for (int l = 0; l < MVPolynomialBasis_Quad_3D::size; l++) { + a(i, k, l) /= tmp; + a_inv(i, k, l) /= tmp; + } + + // If line and column are not the same, move the column to the top + if (k != j) { + for (int l = 0; l < MVPolynomialBasis_Quad_3D::size; l++) { + double tmp = a(i, k, l); + a(i, k, l) = a(i, j, l); + a(i, j, l) = tmp; + + tmp = a_inv(i, k, l); + a_inv(i, k, l) = a_inv(i, j, l); + a_inv(i, j, l) = tmp; + } + } + + // Now, set at zero all other elements of the column (Ll <- Ll - a*Lj) + for (int l = 0; l < MVPolynomialBasis_Quad_3D::size; l++) { + if (l == j || a(i, l, j) == 0.0) continue; + double mul = a(i, l, j); + + for (int m = 0; m < MVPolynomialBasis_Quad_3D::size; m++) { + a(i, l, m) -= mul * a(i, j, m); + a_inv(i, l, m) -= mul * a_inv(i, j, m); + } + a(i, l, j) = 0.0; + } + + // Now a_inv should contain the inverse of a + } + }); + + // Compute the coefficients + auto coeffs = Kokkos::View( + "coefficients", target_points_num, MVPolynomialBasis_Quad_3D::size); + Kokkos::parallel_for( + "coefficients_computation", + Kokkos::MDRangePolicy>( + {0, 0}, {target_points_num, MVPolynomialBasis_Quad_3D::size}), + KOKKOS_LAMBDA (const int i, const int j) { + double tmp = 0; + + for (int k = 0; k < MVPolynomialBasis_Quad_3D::size; k++) { + tmp += a_inv(i, 0, j) * p(i, k, j) * phi(i, k); + } + + coeffs(i, j) = tmp; + }); } From ea3a2b4d946a09453084ed3eca02fde3ea8f3daf Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Tue, 1 Aug 2023 11:38:36 -0400 Subject: [PATCH 04/44] Small fixes and values computation --- .../moving_least_squares.cpp | 170 ++++++++++-------- 1 file changed, 95 insertions(+), 75 deletions(-) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index 63da4f979..8c6fc7eb4 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -16,20 +16,12 @@ #include #include -#include #include -#include using ExecutionSpace = Kokkos::DefaultExecutionSpace; using MemorySpace = ExecutionSpace::memory_space; -std::ostream &operator<<(std::ostream &os, ArborX::Point const &p) -{ - os << '(' << p[0] << ',' << p[1] << ',' << p[2] << ')'; - return os; -} - struct RBFWendland_0 { KOKKOS_INLINE_FUNCTION double operator()(double x) @@ -41,75 +33,57 @@ struct RBFWendland_0 double _radius; }; -struct MVPolynomialBasis_Quad_3D +struct MVPolynomialBasis_3D { - static constexpr std::size_t size = 10; + static constexpr std::size_t size = 4; template KOKKOS_INLINE_FUNCTION Kokkos::Array operator()(Double3D const &p) const { - return {{1., p[0], p[1], p[2], p[0] * p[0], p[0] * p[1], p[0] * p[2], - p[1] * p[1], p[1] * p[2], p[2] * p[2]}}; + return {{1., p[0], p[1], p[2]}}; } }; // Func to evaluate template KOKKOS_INLINE_FUNCTION double func(Double3D const &p) { - return Kokkos::sin(p[0]) * Kokkos::cos(p[1]) + p[2]; + return Kokkos::sin(p[0]) + Kokkos::cos(p[1]) + p[2]; } int main(int argc, char *argv[]) { Kokkos::ScopeGuard guard(argc, argv); - - constexpr float cube_half_side = 10.; // [-10, 10]^3 cube - constexpr float cube_side = 2 * cube_half_side; - constexpr std::size_t source_points_side = 100; // [-10, 10]^3 grid - constexpr std::size_t target_points_num = 10'000; // random [-10, 10]^3 - constexpr std::size_t num_neighbors = MVPolynomialBasis_Quad_3D::size; // ??? - - constexpr std::size_t source_points_num = - source_points_side * source_points_side * source_points_side; + constexpr std::size_t num_neighbors = 10; + constexpr std::size_t source_points_num = 10; + constexpr std::size_t target_points_num = 4; auto source_points = Kokkos::View( "source_points", source_points_num); auto target_points = Kokkos::View( "target_points", target_points_num); + auto source_points_host = Kokkos::create_mirror_view(source_points); + auto target_points_host = Kokkos::create_mirror_view(target_points); // Generate source points - Kokkos::parallel_for( - "source_fill", - Kokkos::MDRangePolicy>( - {0, 0, 0}, - {source_points_side, source_points_side, source_points_side}), - KOKKOS_LAMBDA (int const i, int const j, int const k) { - source_points( - i * source_points_side * source_points_side + - j * source_points_side + - k - ) = ArborX::Point { - (static_cast(i) / (source_points_side - 1) - .5f) * cube_side, - (static_cast(j) / (source_points_side - 1) - .5f) * cube_side, - (static_cast(k) / (source_points_side - 1) - .5f) * cube_side - }; - }); + source_points_host(0) = ArborX::Point { 1., 1., 0. }; + source_points_host(1) = ArborX::Point { -1., 1., 0. }; + source_points_host(2) = ArborX::Point { -1., -1., 0. }; + source_points_host(3) = ArborX::Point { 1., -1., 0. }; + source_points_host(4) = ArborX::Point { 0., 0., 1. }; + source_points_host(5) = ArborX::Point { 1., 1., 2. }; + source_points_host(6) = ArborX::Point { -1., 1., 2. }; + source_points_host(7) = ArborX::Point { -1., -1., 2. }; + source_points_host(8) = ArborX::Point { 1., -1., 2. }; + source_points_host(9) = ArborX::Point { 0., 0., -1. }; + Kokkos::deep_copy(source_points, source_points_host); // Generate target points - auto random_pool = - Kokkos::Random_XorShift64_Pool(time(nullptr)); - Kokkos::parallel_for( - "target_fill", - Kokkos::RangePolicy(0, target_points_num), - KOKKOS_LAMBDA (const int i) { - auto gen = random_pool.get_state(); - target_points(i) = ArborX::Point { - gen.frand(0., 1.), - gen.frand(0., 1.), - gen.frand(0., 1.), - }; - }); + target_points_host(0) = ArborX::Point { 0., 0., 0. }; + target_points_host(1) = ArborX::Point { .5, .5, 0. }; + target_points_host(2) = ArborX::Point { -.5, .5, 1. }; + target_points_host(3) = ArborX::Point { 0., -.5, 1.5 }; + Kokkos::deep_copy(target_points, target_points_host); // Arrange source points as tree auto source_tree = @@ -142,9 +116,9 @@ int main(int argc, char *argv[]) KOKKOS_LAMBDA (const int i) { for (int j = offsets(i); j < offsets(i+1); j++) { tr_source_points(i, j - offsets(i)) = ArborX::Point { - source_points(j)[0] - target_points(i)[0], - source_points(j)[1] - target_points(i)[1], - source_points(j)[2] - target_points(i)[2], + source_points(indices(j))[0] - target_points(i)[0], + source_points(indices(j))[1] - target_points(i)[1], + source_points(indices(j))[2] - target_points(i)[2], }; } }); @@ -191,16 +165,16 @@ int main(int argc, char *argv[]) "vandermonde", target_points_num, num_neighbors, - MVPolynomialBasis_Quad_3D::size + MVPolynomialBasis_3D::size ); Kokkos::parallel_for( "vandermonde_computation", Kokkos::MDRangePolicy>( {0, 0}, {target_points_num, num_neighbors}), KOKKOS_LAMBDA (const int i, const int j) { - auto basis = MVPolynomialBasis_Quad_3D{}(tr_source_points(i, j)); + auto basis = MVPolynomialBasis_3D{}(tr_source_points(i, j)); - for (int k = 0; k < MVPolynomialBasis_Quad_3D::size; k++) { + for (int k = 0; k < MVPolynomialBasis_3D::size; k++) { p(i, j, k) = basis[k]; } }); @@ -209,8 +183,8 @@ int main(int argc, char *argv[]) auto a = Kokkos::View( "A", target_points_num, - MVPolynomialBasis_Quad_3D::size, - MVPolynomialBasis_Quad_3D::size + MVPolynomialBasis_3D::size, + MVPolynomialBasis_3D::size ); Kokkos::parallel_for( "A_computation", @@ -218,8 +192,8 @@ int main(int argc, char *argv[]) {0, 0, 0}, { target_points_num, - MVPolynomialBasis_Quad_3D::size, - MVPolynomialBasis_Quad_3D::size + MVPolynomialBasis_3D::size, + MVPolynomialBasis_3D::size }), KOKKOS_LAMBDA (const int i, const int j, const int k) { double tmp = 0; @@ -238,38 +212,38 @@ int main(int argc, char *argv[]) auto a_inv = Kokkos::View( "A_inv", target_points_num, - MVPolynomialBasis_Quad_3D::size, - MVPolynomialBasis_Quad_3D::size + MVPolynomialBasis_3D::size, + MVPolynomialBasis_3D::size ); Kokkos::parallel_for( "A_inv_computation", Kokkos::RangePolicy(0, target_points_num), KOKKOS_LAMBDA (const int i) { - for (int j = 0; j < MVPolynomialBasis_Quad_3D::size; j++) { - for (int k = 0; k < MVPolynomialBasis_Quad_3D::size; k++) { + for (int j = 0; j < MVPolynomialBasis_3D::size; j++) { + for (int k = 0; k < MVPolynomialBasis_3D::size; k++) { a_inv(i, j, k) = (j == k) * 1.; } } // This needs to be done for every column - for (int j = 0; j < MVPolynomialBasis_Quad_3D::size; j++) { + for (int j = 0; j < MVPolynomialBasis_3D::size; j++) { // We find the line with a non-negative element on column j int k = j; - for (; k < MVPolynomialBasis_Quad_3D::size; k++) { + for (; k < MVPolynomialBasis_3D::size; k++) { if (a(i, k, j) != 0.0) break; } // We divide the line with said value double tmp = a(i, k, j); - for (int l = 0; l < MVPolynomialBasis_Quad_3D::size; l++) { + for (int l = 0; l < MVPolynomialBasis_3D::size; l++) { a(i, k, l) /= tmp; a_inv(i, k, l) /= tmp; } // If line and column are not the same, move the column to the top if (k != j) { - for (int l = 0; l < MVPolynomialBasis_Quad_3D::size; l++) { + for (int l = 0; l < MVPolynomialBasis_3D::size; l++) { double tmp = a(i, k, l); a(i, k, l) = a(i, j, l); a(i, j, l) = tmp; @@ -281,11 +255,11 @@ int main(int argc, char *argv[]) } // Now, set at zero all other elements of the column (Ll <- Ll - a*Lj) - for (int l = 0; l < MVPolynomialBasis_Quad_3D::size; l++) { + for (int l = 0; l < MVPolynomialBasis_3D::size; l++) { if (l == j || a(i, l, j) == 0.0) continue; double mul = a(i, l, j); - for (int m = 0; m < MVPolynomialBasis_Quad_3D::size; m++) { + for (int m = 0; m < MVPolynomialBasis_3D::size; m++) { a(i, l, m) -= mul * a(i, j, m); a_inv(i, l, m) -= mul * a_inv(i, j, m); } @@ -298,18 +272,64 @@ int main(int argc, char *argv[]) // Compute the coefficients auto coeffs = Kokkos::View( - "coefficients", target_points_num, MVPolynomialBasis_Quad_3D::size); + "coefficients", target_points_num, num_neighbors); Kokkos::parallel_for( "coefficients_computation", Kokkos::MDRangePolicy>( - {0, 0}, {target_points_num, MVPolynomialBasis_Quad_3D::size}), + {0, 0}, {target_points_num, num_neighbors}), KOKKOS_LAMBDA (const int i, const int j) { double tmp = 0; - for (int k = 0; k < MVPolynomialBasis_Quad_3D::size; k++) { - tmp += a_inv(i, 0, j) * p(i, k, j) * phi(i, k); + for (int k = 0; k < MVPolynomialBasis_3D::size; k++) { + tmp += a_inv(i, 0, k) * p(i, j, k) * phi(i, j); } coeffs(i, j) = tmp; }); + + // Compute source values + auto source_values = Kokkos::View( + "source_values", source_points_num); + Kokkos::parallel_for( + "source_evaluation", + Kokkos::RangePolicy(0, source_points_num), + KOKKOS_LAMBDA (const int i) { + source_values(i) = func(source_points(i)); + }); + + // Compute target values via interpolation + auto target_values = Kokkos::View( + "target_values", target_points_num); + Kokkos::parallel_for( + "target_interpolation", + Kokkos::RangePolicy(0, target_points_num), + KOKKOS_LAMBDA (const int i) { + double tmp = 0; + for (int j = offsets(i); j < offsets(i+i); j++) { + tmp += coeffs(i, j - offsets(i)) * source_values(indices(j)); + } + target_values(i) = tmp; + }); + + // Compute target values via evaluation + auto target_values_exact = Kokkos::View( + "target_values_exact", target_points_num); + Kokkos::parallel_for( + "target_evaluation", + Kokkos::RangePolicy(0, target_points_num), + KOKKOS_LAMBDA (const int i) { + target_values_exact(i) = func(target_points(i)); + }); + + // Show difference + auto target_values_host = Kokkos::create_mirror_view(target_values); + Kokkos::deep_copy(target_values_host, target_values); + auto target_values_exact_host = Kokkos::create_mirror_view(target_values_exact); + Kokkos::deep_copy(target_values_exact_host, target_values_exact); + + for (int i = 0; i < target_points_num; i++) { + std::cout << "====\n" + << target_values_host(i) << '\n' + << target_values_exact_host(i) << "\n====\n"; + } } From 357bda3043757afc71490af1c282a71c23a763e2 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Tue, 1 Aug 2023 15:39:20 -0400 Subject: [PATCH 05/44] Small fixup (memory and kernel names, removing templates, ...) --- .../moving_least_squares.cpp | 97 ++++++++++--------- 1 file changed, 51 insertions(+), 46 deletions(-) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index 8c6fc7eb4..be10663da 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -1,5 +1,5 @@ /**************************************************************************** - * Copyright (c) 2017-2023 by the ArborX authors * + * Copyright (c) 2023 by the ArborX authors * * All rights reserved. * * * * This file is part of the ArborX library. ArborX is * @@ -37,31 +37,32 @@ struct MVPolynomialBasis_3D { static constexpr std::size_t size = 4; - template KOKKOS_INLINE_FUNCTION Kokkos::Array - operator()(Double3D const &p) const + operator()(ArborX::Point const &p) const { return {{1., p[0], p[1], p[2]}}; } }; -// Func to evaluate -template -KOKKOS_INLINE_FUNCTION double func(Double3D const &p) { - return Kokkos::sin(p[0]) + Kokkos::cos(p[1]) + p[2]; +// Function to approximate +KOKKOS_INLINE_FUNCTION double manufactured_solution(ArborX::Point const &p) +{ + return p[2] + p[0]; } int main(int argc, char *argv[]) { Kokkos::ScopeGuard guard(argc, argv); - constexpr std::size_t num_neighbors = 10; + constexpr std::size_t num_neighbors = 5; constexpr std::size_t source_points_num = 10; constexpr std::size_t target_points_num = 4; auto source_points = Kokkos::View( - "source_points", source_points_num); + Kokkos::view_alloc(Kokkos::WithoutInitializing, "MLS_EX::source_points"), + source_points_num); auto target_points = Kokkos::View( - "target_points", target_points_num); + Kokkos::view_alloc(Kokkos::WithoutInitializing, "MLS_EX::target_points"), + target_points_num); auto source_points_host = Kokkos::create_mirror_view(source_points); auto target_points_host = Kokkos::create_mirror_view(target_points); @@ -80,38 +81,37 @@ int main(int argc, char *argv[]) // Generate target points target_points_host(0) = ArborX::Point { 0., 0., 0. }; - target_points_host(1) = ArborX::Point { .5, .5, 0. }; + target_points_host(1) = ArborX::Point { .5, .5, .5 }; target_points_host(2) = ArborX::Point { -.5, .5, 1. }; - target_points_host(3) = ArborX::Point { 0., -.5, 1.5 }; + target_points_host(3) = ArborX::Point { .1, -.33, 1.5 }; Kokkos::deep_copy(target_points, target_points_host); - // Arrange source points as tree - auto source_tree = - ArborX::BVH(ExecutionSpace{}, source_points); + // Organize source points as tree + ArborX::BVH source_tree(ExecutionSpace{}, source_points); // Create the queries // For each target point we query the closest source points auto queries = Kokkos::View*, MemorySpace>( - "queries", target_points_num); + "MLS_EX::queries", target_points_num); Kokkos::parallel_for( - "make_queries", + "MLS_EX::make_queries", Kokkos::RangePolicy(0, target_points_num), KOKKOS_LAMBDA (const int i) { queries(i) = ArborX::nearest(target_points(i), num_neighbors); }); // Perform the query - auto indices = Kokkos::View("indices", 0); - auto offsets = Kokkos::View("offsets", 0); + auto indices = Kokkos::View("MLS_EX::indices", 0); + auto offsets = Kokkos::View("MLS_EX::offsets", 0); source_tree.query(ExecutionSpace{}, queries, indices, offsets); // Now that we have the neighbors, we recompute their position using // their target point as the origin. // This is used as an optimisation later in the algorithm auto tr_source_points = Kokkos::View( - "tr_source_points", target_points_num, num_neighbors); + "MLS_EX::tr_source_points", target_points_num, num_neighbors); Kokkos::parallel_for( - "transform_source_points", + "MLS_EX::transform_source_points", Kokkos::RangePolicy(0, target_points_num), KOKKOS_LAMBDA (const int i) { for (int j = offsets(i); j < offsets(i+1); j++) { @@ -125,10 +125,10 @@ int main(int argc, char *argv[]) // Compute the radii for the weight (phi) vector auto radii = Kokkos::View( - "radii", target_points_num); + "MLS_EX::radii", target_points_num); constexpr double epsilon = std::numeric_limits::epsilon(); Kokkos::parallel_for( - "radii_computation", + "MLS_EX::radii_computation", Kokkos::RangePolicy(0, target_points_num), KOKKOS_LAMBDA (const int i) { double radius = 10. * epsilon; @@ -145,9 +145,9 @@ int main(int argc, char *argv[]) // Compute the weight (phi) vector auto phi = Kokkos::View( - "phi", target_points_num, num_neighbors); + "MLS_EX::phi", target_points_num, num_neighbors); Kokkos::parallel_for( - "phi_computation", + "MLS_EX::phi_computation", Kokkos::RangePolicy(0, phi.extent(0)), KOKKOS_LAMBDA (const int i) { auto rbf = RBFWendland_0 { radii(i) }; @@ -162,13 +162,13 @@ int main(int argc, char *argv[]) // Compute multivariable Vandermonde (P) matrix auto p = Kokkos::View( - "vandermonde", + "MLS_EX::vandermonde", target_points_num, num_neighbors, MVPolynomialBasis_3D::size ); Kokkos::parallel_for( - "vandermonde_computation", + "MLS_EX::vandermonde_computation", Kokkos::MDRangePolicy>( {0, 0}, {target_points_num, num_neighbors}), KOKKOS_LAMBDA (const int i, const int j) { @@ -181,13 +181,13 @@ int main(int argc, char *argv[]) // Compute moment (A) matrix auto a = Kokkos::View( - "A", + "MLS_EX::A", target_points_num, MVPolynomialBasis_3D::size, MVPolynomialBasis_3D::size ); Kokkos::parallel_for( - "A_computation", + "MLS_EX::A_computation", Kokkos::MDRangePolicy>( {0, 0, 0}, { @@ -210,13 +210,13 @@ int main(int argc, char *argv[]) // Kind of works, errors out quite often. // A better method should be employed (SVD?) auto a_inv = Kokkos::View( - "A_inv", + "MLS_EX::A_inv", target_points_num, MVPolynomialBasis_3D::size, MVPolynomialBasis_3D::size ); Kokkos::parallel_for( - "A_inv_computation", + "MLS_EX::A_inv_computation", Kokkos::RangePolicy(0, target_points_num), KOKKOS_LAMBDA (const int i) { for (int j = 0; j < MVPolynomialBasis_3D::size; j++) { @@ -272,9 +272,9 @@ int main(int argc, char *argv[]) // Compute the coefficients auto coeffs = Kokkos::View( - "coefficients", target_points_num, num_neighbors); + "MLS_EX::coefficients", target_points_num, num_neighbors); Kokkos::parallel_for( - "coefficients_computation", + "MLS_EX::coefficients_computation", Kokkos::MDRangePolicy>( {0, 0}, {target_points_num, num_neighbors}), KOKKOS_LAMBDA (const int i, const int j) { @@ -289,19 +289,19 @@ int main(int argc, char *argv[]) // Compute source values auto source_values = Kokkos::View( - "source_values", source_points_num); + "MLS_EX::source_values", source_points_num); Kokkos::parallel_for( - "source_evaluation", + "MLS_EX::source_evaluation", Kokkos::RangePolicy(0, source_points_num), KOKKOS_LAMBDA (const int i) { - source_values(i) = func(source_points(i)); + source_values(i) = manufactured_solution(source_points(i)); }); // Compute target values via interpolation auto target_values = Kokkos::View( - "target_values", target_points_num); + "MLS_EX::target_values", target_points_num); Kokkos::parallel_for( - "target_interpolation", + "MLS_EX::target_interpolation", Kokkos::RangePolicy(0, target_points_num), KOKKOS_LAMBDA (const int i) { double tmp = 0; @@ -313,23 +313,28 @@ int main(int argc, char *argv[]) // Compute target values via evaluation auto target_values_exact = Kokkos::View( - "target_values_exact", target_points_num); + "MLS_EX::target_values_exact", target_points_num); Kokkos::parallel_for( - "target_evaluation", + "MLS_EX::target_evaluation", Kokkos::RangePolicy(0, target_points_num), KOKKOS_LAMBDA (const int i) { - target_values_exact(i) = func(target_points(i)); + target_values_exact(i) = manufactured_solution(target_points(i)); }); // Show difference - auto target_values_host = Kokkos::create_mirror_view(target_values); + auto target_values_host = + Kokkos::create_mirror_view(target_values); Kokkos::deep_copy(target_values_host, target_values); - auto target_values_exact_host = Kokkos::create_mirror_view(target_values_exact); + auto target_values_exact_host = + Kokkos::create_mirror_view(target_values_exact); Kokkos::deep_copy(target_values_exact_host, target_values_exact); for (int i = 0; i < target_points_num; i++) { - std::cout << "====\n" - << target_values_host(i) << '\n' - << target_values_exact_host(i) << "\n====\n"; + double error = + Kokkos::abs(target_values_host(i) - target_values_exact_host(i)); + std::cout << "==== Target " << i << '\n' + << "Interpolation: " << target_values_host(i) << '\n' + << "Real value : " << target_values_exact_host(i) << '\n' + << "Absolute err.: " << error << "\n====\n"; } } From 4c22a2285b0807e2e089de3bdf1e23983cc55434 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Tue, 1 Aug 2023 15:51:04 -0400 Subject: [PATCH 06/44] source points generation (20x20x20 cube) --- .../moving_least_squares.cpp | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index be10663da..a30a8d47b 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -54,7 +54,8 @@ int main(int argc, char *argv[]) { Kokkos::ScopeGuard guard(argc, argv); constexpr std::size_t num_neighbors = 5; - constexpr std::size_t source_points_num = 10; + constexpr std::size_t cube_side = 4; + constexpr std::size_t source_points_num = cube_side * cube_side * cube_side; constexpr std::size_t target_points_num = 4; auto source_points = Kokkos::View( @@ -63,27 +64,28 @@ int main(int argc, char *argv[]) auto target_points = Kokkos::View( Kokkos::view_alloc(Kokkos::WithoutInitializing, "MLS_EX::target_points"), target_points_num); - auto source_points_host = Kokkos::create_mirror_view(source_points); auto target_points_host = Kokkos::create_mirror_view(target_points); - // Generate source points - source_points_host(0) = ArborX::Point { 1., 1., 0. }; - source_points_host(1) = ArborX::Point { -1., 1., 0. }; - source_points_host(2) = ArborX::Point { -1., -1., 0. }; - source_points_host(3) = ArborX::Point { 1., -1., 0. }; - source_points_host(4) = ArborX::Point { 0., 0., 1. }; - source_points_host(5) = ArborX::Point { 1., 1., 2. }; - source_points_host(6) = ArborX::Point { -1., 1., 2. }; - source_points_host(7) = ArborX::Point { -1., -1., 2. }; - source_points_host(8) = ArborX::Point { 1., -1., 2. }; - source_points_host(9) = ArborX::Point { 0., 0., -1. }; - Kokkos::deep_copy(source_points, source_points_host); + // Generate source points (Organized within a [-10, 10]^3 cube) + Kokkos::parallel_for( + "MLS_EX::source_points_init", + Kokkos::MDRangePolicy>( + {0, 0, 0}, {cube_side, cube_side, cube_side}), + KOKKOS_LAMBDA (const int i, const int j, const int k) { + source_points(i * cube_side * cube_side + + j * cube_side + + k ) = ArborX::Point { + 20.f * (float(i) / (cube_side - 1) - .5f), + 20.f * (float(j) / (cube_side - 1) - .5f), + 20.f * (float(k) / (cube_side - 1) - .5f) + }; + }); // Generate target points - target_points_host(0) = ArborX::Point { 0., 0., 0. }; - target_points_host(1) = ArborX::Point { .5, .5, .5 }; - target_points_host(2) = ArborX::Point { -.5, .5, 1. }; - target_points_host(3) = ArborX::Point { .1, -.33, 1.5 }; + target_points_host(0) = ArborX::Point { 0.f, 0.f, 0.f }; + target_points_host(1) = ArborX::Point { 5.f, 5.f, 5.f }; + target_points_host(2) = ArborX::Point { -5.f, 5.f, 3.f }; + target_points_host(3) = ArborX::Point { 1.f, -3.3f, 7.f }; Kokkos::deep_copy(target_points, target_points_host); // Organize source points as tree From 25864002aad4cba9e1dbaffa359b9b5e4ca34173 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Tue, 1 Aug 2023 15:59:51 -0400 Subject: [PATCH 07/44] double to float --- .../moving_least_squares.cpp | 48 +++++++++---------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index a30a8d47b..bcec2991c 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -24,20 +24,20 @@ using MemorySpace = ExecutionSpace::memory_space; struct RBFWendland_0 { - KOKKOS_INLINE_FUNCTION double operator()(double x) + KOKKOS_INLINE_FUNCTION float operator()(float x) { x /= _radius; return (1. - x) * (1. - x); } - double _radius; + float _radius; }; struct MVPolynomialBasis_3D { static constexpr std::size_t size = 4; - KOKKOS_INLINE_FUNCTION Kokkos::Array + KOKKOS_INLINE_FUNCTION Kokkos::Array operator()(ArborX::Point const &p) const { return {{1., p[0], p[1], p[2]}}; @@ -45,7 +45,7 @@ struct MVPolynomialBasis_3D }; // Function to approximate -KOKKOS_INLINE_FUNCTION double manufactured_solution(ArborX::Point const &p) +KOKKOS_INLINE_FUNCTION float manufactured_solution(ArborX::Point const &p) { return p[2] + p[0]; } @@ -126,17 +126,17 @@ int main(int argc, char *argv[]) }); // Compute the radii for the weight (phi) vector - auto radii = Kokkos::View( + auto radii = Kokkos::View( "MLS_EX::radii", target_points_num); - constexpr double epsilon = std::numeric_limits::epsilon(); + constexpr float epsilon = std::numeric_limits::epsilon(); Kokkos::parallel_for( "MLS_EX::radii_computation", Kokkos::RangePolicy(0, target_points_num), KOKKOS_LAMBDA (const int i) { - double radius = 10. * epsilon; + float radius = 10. * epsilon; for (int j = 0; j < num_neighbors; j++) { - double norm = ArborX::Details::distance( + float norm = ArborX::Details::distance( tr_source_points(i, j), ArborX::Point{0., 0., 0.}); radius = (radius < norm) ? norm : radius; @@ -146,7 +146,7 @@ int main(int argc, char *argv[]) }); // Compute the weight (phi) vector - auto phi = Kokkos::View( + auto phi = Kokkos::View( "MLS_EX::phi", target_points_num, num_neighbors); Kokkos::parallel_for( "MLS_EX::phi_computation", @@ -155,7 +155,7 @@ int main(int argc, char *argv[]) auto rbf = RBFWendland_0 { radii(i) }; for (int j = 0; j < phi.extent(1); j++) { - double norm = ArborX::Details::distance( + float norm = ArborX::Details::distance( tr_source_points(i, j), ArborX::Point{0., 0., 0.}); phi(i, j) = rbf(norm); @@ -163,7 +163,7 @@ int main(int argc, char *argv[]) }); // Compute multivariable Vandermonde (P) matrix - auto p = Kokkos::View( + auto p = Kokkos::View( "MLS_EX::vandermonde", target_points_num, num_neighbors, @@ -182,7 +182,7 @@ int main(int argc, char *argv[]) }); // Compute moment (A) matrix - auto a = Kokkos::View( + auto a = Kokkos::View( "MLS_EX::A", target_points_num, MVPolynomialBasis_3D::size, @@ -198,7 +198,7 @@ int main(int argc, char *argv[]) MVPolynomialBasis_3D::size }), KOKKOS_LAMBDA (const int i, const int j, const int k) { - double tmp = 0; + float tmp = 0; for (int l = 0; l < num_neighbors; l++) { tmp += p(i, l, j) * p(i, l, k) * phi(i, l); } @@ -211,7 +211,7 @@ int main(int argc, char *argv[]) // first one are applied to the second // Kind of works, errors out quite often. // A better method should be employed (SVD?) - auto a_inv = Kokkos::View( + auto a_inv = Kokkos::View( "MLS_EX::A_inv", target_points_num, MVPolynomialBasis_3D::size, @@ -237,7 +237,7 @@ int main(int argc, char *argv[]) } // We divide the line with said value - double tmp = a(i, k, j); + float tmp = a(i, k, j); for (int l = 0; l < MVPolynomialBasis_3D::size; l++) { a(i, k, l) /= tmp; a_inv(i, k, l) /= tmp; @@ -246,7 +246,7 @@ int main(int argc, char *argv[]) // If line and column are not the same, move the column to the top if (k != j) { for (int l = 0; l < MVPolynomialBasis_3D::size; l++) { - double tmp = a(i, k, l); + float tmp = a(i, k, l); a(i, k, l) = a(i, j, l); a(i, j, l) = tmp; @@ -259,7 +259,7 @@ int main(int argc, char *argv[]) // Now, set at zero all other elements of the column (Ll <- Ll - a*Lj) for (int l = 0; l < MVPolynomialBasis_3D::size; l++) { if (l == j || a(i, l, j) == 0.0) continue; - double mul = a(i, l, j); + float mul = a(i, l, j); for (int m = 0; m < MVPolynomialBasis_3D::size; m++) { a(i, l, m) -= mul * a(i, j, m); @@ -273,14 +273,14 @@ int main(int argc, char *argv[]) }); // Compute the coefficients - auto coeffs = Kokkos::View( + auto coeffs = Kokkos::View( "MLS_EX::coefficients", target_points_num, num_neighbors); Kokkos::parallel_for( "MLS_EX::coefficients_computation", Kokkos::MDRangePolicy>( {0, 0}, {target_points_num, num_neighbors}), KOKKOS_LAMBDA (const int i, const int j) { - double tmp = 0; + float tmp = 0; for (int k = 0; k < MVPolynomialBasis_3D::size; k++) { tmp += a_inv(i, 0, k) * p(i, j, k) * phi(i, j); @@ -290,7 +290,7 @@ int main(int argc, char *argv[]) }); // Compute source values - auto source_values = Kokkos::View( + auto source_values = Kokkos::View( "MLS_EX::source_values", source_points_num); Kokkos::parallel_for( "MLS_EX::source_evaluation", @@ -300,13 +300,13 @@ int main(int argc, char *argv[]) }); // Compute target values via interpolation - auto target_values = Kokkos::View( + auto target_values = Kokkos::View( "MLS_EX::target_values", target_points_num); Kokkos::parallel_for( "MLS_EX::target_interpolation", Kokkos::RangePolicy(0, target_points_num), KOKKOS_LAMBDA (const int i) { - double tmp = 0; + float tmp = 0; for (int j = offsets(i); j < offsets(i+i); j++) { tmp += coeffs(i, j - offsets(i)) * source_values(indices(j)); } @@ -314,7 +314,7 @@ int main(int argc, char *argv[]) }); // Compute target values via evaluation - auto target_values_exact = Kokkos::View( + auto target_values_exact = Kokkos::View( "MLS_EX::target_values_exact", target_points_num); Kokkos::parallel_for( "MLS_EX::target_evaluation", @@ -332,7 +332,7 @@ int main(int argc, char *argv[]) Kokkos::deep_copy(target_values_exact_host, target_values_exact); for (int i = 0; i < target_points_num; i++) { - double error = + float error = Kokkos::abs(target_values_host(i) - target_values_exact_host(i)); std::cout << "==== Target " << i << '\n' << "Interpolation: " << target_values_host(i) << '\n' From da4671bbb5142aa1b59341bbe5f209a6660fa745 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Tue, 1 Aug 2023 16:04:47 -0400 Subject: [PATCH 08/44] clang format --- .../moving_least_squares.cpp | 405 +++++++++--------- 1 file changed, 203 insertions(+), 202 deletions(-) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index bcec2991c..af23ba5eb 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -15,6 +15,7 @@ // (http://dx.doi.org/10.1016/j.jcp.2015.11.055) #include + #include #include @@ -48,7 +49,7 @@ struct MVPolynomialBasis_3D KOKKOS_INLINE_FUNCTION float manufactured_solution(ArborX::Point const &p) { return p[2] + p[0]; -} +} int main(int argc, char *argv[]) { @@ -59,48 +60,45 @@ int main(int argc, char *argv[]) constexpr std::size_t target_points_num = 4; auto source_points = Kokkos::View( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "MLS_EX::source_points"), - source_points_num); + Kokkos::view_alloc(Kokkos::WithoutInitializing, "MLS_EX::source_points"), + source_points_num); auto target_points = Kokkos::View( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "MLS_EX::target_points"), - target_points_num); + Kokkos::view_alloc(Kokkos::WithoutInitializing, "MLS_EX::target_points"), + target_points_num); auto target_points_host = Kokkos::create_mirror_view(target_points); // Generate source points (Organized within a [-10, 10]^3 cube) Kokkos::parallel_for( - "MLS_EX::source_points_init", - Kokkos::MDRangePolicy>( - {0, 0, 0}, {cube_side, cube_side, cube_side}), - KOKKOS_LAMBDA (const int i, const int j, const int k) { - source_points(i * cube_side * cube_side + - j * cube_side + - k ) = ArborX::Point { - 20.f * (float(i) / (cube_side - 1) - .5f), - 20.f * (float(j) / (cube_side - 1) - .5f), - 20.f * (float(k) / (cube_side - 1) - .5f) - }; - }); + "MLS_EX::source_points_init", + Kokkos::MDRangePolicy>( + {0, 0, 0}, {cube_side, cube_side, cube_side}), + KOKKOS_LAMBDA(int const i, int const j, int const k) { + source_points(i * cube_side * cube_side + j * cube_side + k) = + ArborX::Point{20.f * (float(i) / (cube_side - 1) - .5f), + 20.f * (float(j) / (cube_side - 1) - .5f), + 20.f * (float(k) / (cube_side - 1) - .5f)}; + }); // Generate target points - target_points_host(0) = ArborX::Point { 0.f, 0.f, 0.f }; - target_points_host(1) = ArborX::Point { 5.f, 5.f, 5.f }; - target_points_host(2) = ArborX::Point { -5.f, 5.f, 3.f }; - target_points_host(3) = ArborX::Point { 1.f, -3.3f, 7.f }; + target_points_host(0) = ArborX::Point{0.f, 0.f, 0.f}; + target_points_host(1) = ArborX::Point{5.f, 5.f, 5.f}; + target_points_host(2) = ArborX::Point{-5.f, 5.f, 3.f}; + target_points_host(3) = ArborX::Point{1.f, -3.3f, 7.f}; Kokkos::deep_copy(target_points, target_points_host); // Organize source points as tree ArborX::BVH source_tree(ExecutionSpace{}, source_points); - + // Create the queries // For each target point we query the closest source points - auto queries = Kokkos::View*, MemorySpace>( - "MLS_EX::queries", target_points_num); + auto queries = Kokkos::View *, MemorySpace>( + "MLS_EX::queries", target_points_num); Kokkos::parallel_for( - "MLS_EX::make_queries", - Kokkos::RangePolicy(0, target_points_num), - KOKKOS_LAMBDA (const int i) { - queries(i) = ArborX::nearest(target_points(i), num_neighbors); - }); + "MLS_EX::make_queries", + Kokkos::RangePolicy(0, target_points_num), + KOKKOS_LAMBDA(int const i) { + queries(i) = ArborX::nearest(target_points(i), num_neighbors); + }); // Perform the query auto indices = Kokkos::View("MLS_EX::indices", 0); @@ -110,230 +108,233 @@ int main(int argc, char *argv[]) // Now that we have the neighbors, we recompute their position using // their target point as the origin. // This is used as an optimisation later in the algorithm - auto tr_source_points = Kokkos::View( - "MLS_EX::tr_source_points", target_points_num, num_neighbors); + auto tr_source_points = Kokkos::View( + "MLS_EX::tr_source_points", target_points_num, num_neighbors); Kokkos::parallel_for( - "MLS_EX::transform_source_points", - Kokkos::RangePolicy(0, target_points_num), - KOKKOS_LAMBDA (const int i) { - for (int j = offsets(i); j < offsets(i+1); j++) { - tr_source_points(i, j - offsets(i)) = ArborX::Point { - source_points(indices(j))[0] - target_points(i)[0], - source_points(indices(j))[1] - target_points(i)[1], - source_points(indices(j))[2] - target_points(i)[2], - }; - } - }); + "MLS_EX::transform_source_points", + Kokkos::RangePolicy(0, target_points_num), + KOKKOS_LAMBDA(int const i) { + for (int j = offsets(i); j < offsets(i + 1); j++) + { + tr_source_points(i, j - offsets(i)) = ArborX::Point{ + source_points(indices(j))[0] - target_points(i)[0], + source_points(indices(j))[1] - target_points(i)[1], + source_points(indices(j))[2] - target_points(i)[2], + }; + } + }); // Compute the radii for the weight (phi) vector - auto radii = Kokkos::View( - "MLS_EX::radii", target_points_num); + auto radii = + Kokkos::View("MLS_EX::radii", target_points_num); constexpr float epsilon = std::numeric_limits::epsilon(); Kokkos::parallel_for( - "MLS_EX::radii_computation", - Kokkos::RangePolicy(0, target_points_num), - KOKKOS_LAMBDA (const int i) { - float radius = 10. * epsilon; - - for (int j = 0; j < num_neighbors; j++) { - float norm = ArborX::Details::distance( - tr_source_points(i, j), - ArborX::Point{0., 0., 0.}); - radius = (radius < norm) ? norm : radius; - } + "MLS_EX::radii_computation", + Kokkos::RangePolicy(0, target_points_num), + KOKKOS_LAMBDA(int const i) { + float radius = 10. * epsilon; + + for (int j = 0; j < num_neighbors; j++) + { + float norm = ArborX::Details::distance(tr_source_points(i, j), + ArborX::Point{0., 0., 0.}); + radius = (radius < norm) ? norm : radius; + } - radii(i) = 1.1 * radius; - }); + radii(i) = 1.1 * radius; + }); // Compute the weight (phi) vector - auto phi = Kokkos::View( - "MLS_EX::phi", target_points_num, num_neighbors); + auto phi = Kokkos::View( + "MLS_EX::phi", target_points_num, num_neighbors); Kokkos::parallel_for( - "MLS_EX::phi_computation", - Kokkos::RangePolicy(0, phi.extent(0)), - KOKKOS_LAMBDA (const int i) { - auto rbf = RBFWendland_0 { radii(i) }; - - for (int j = 0; j < phi.extent(1); j++) { - float norm = ArborX::Details::distance( - tr_source_points(i, j), - ArborX::Point{0., 0., 0.}); - phi(i, j) = rbf(norm); - } - }); + "MLS_EX::phi_computation", + Kokkos::RangePolicy(0, phi.extent(0)), + KOKKOS_LAMBDA(int const i) { + auto rbf = RBFWendland_0{radii(i)}; + + for (int j = 0; j < phi.extent(1); j++) + { + float norm = ArborX::Details::distance(tr_source_points(i, j), + ArborX::Point{0., 0., 0.}); + phi(i, j) = rbf(norm); + } + }); // Compute multivariable Vandermonde (P) matrix - auto p = Kokkos::View( - "MLS_EX::vandermonde", - target_points_num, - num_neighbors, - MVPolynomialBasis_3D::size - ); + auto p = Kokkos::View( + "MLS_EX::vandermonde", target_points_num, num_neighbors, + MVPolynomialBasis_3D::size); Kokkos::parallel_for( - "MLS_EX::vandermonde_computation", - Kokkos::MDRangePolicy>( - {0, 0}, {target_points_num, num_neighbors}), - KOKKOS_LAMBDA (const int i, const int j) { - auto basis = MVPolynomialBasis_3D{}(tr_source_points(i, j)); - - for (int k = 0; k < MVPolynomialBasis_3D::size; k++) { - p(i, j, k) = basis[k]; - } - }); + "MLS_EX::vandermonde_computation", + Kokkos::MDRangePolicy>( + {0, 0}, {target_points_num, num_neighbors}), + KOKKOS_LAMBDA(int const i, int const j) { + auto basis = MVPolynomialBasis_3D{}(tr_source_points(i, j)); + + for (int k = 0; k < MVPolynomialBasis_3D::size; k++) + { + p(i, j, k) = basis[k]; + } + }); // Compute moment (A) matrix - auto a = Kokkos::View( - "MLS_EX::A", - target_points_num, - MVPolynomialBasis_3D::size, - MVPolynomialBasis_3D::size - ); + auto a = Kokkos::View("MLS_EX::A", target_points_num, + MVPolynomialBasis_3D::size, + MVPolynomialBasis_3D::size); Kokkos::parallel_for( - "MLS_EX::A_computation", - Kokkos::MDRangePolicy>( - {0, 0, 0}, - { - target_points_num, - MVPolynomialBasis_3D::size, - MVPolynomialBasis_3D::size - }), - KOKKOS_LAMBDA (const int i, const int j, const int k) { - float tmp = 0; - for (int l = 0; l < num_neighbors; l++) { - tmp += p(i, l, j) * p(i, l, k) * phi(i, l); - } - - a(i, j, k) = tmp; - }); + "MLS_EX::A_computation", + Kokkos::MDRangePolicy>( + {0, 0, 0}, {target_points_num, MVPolynomialBasis_3D::size, + MVPolynomialBasis_3D::size}), + KOKKOS_LAMBDA(int const i, int const j, int const k) { + float tmp = 0; + for (int l = 0; l < num_neighbors; l++) + { + tmp += p(i, l, j) * p(i, l, k) * phi(i, l); + } + + a(i, j, k) = tmp; + }); // Inverse moment matrix // Gaussian inverse method. Both matrix are used and modifications on the // first one are applied to the second // Kind of works, errors out quite often. // A better method should be employed (SVD?) - auto a_inv = Kokkos::View( - "MLS_EX::A_inv", - target_points_num, - MVPolynomialBasis_3D::size, - MVPolynomialBasis_3D::size - ); + auto a_inv = Kokkos::View( + "MLS_EX::A_inv", target_points_num, MVPolynomialBasis_3D::size, + MVPolynomialBasis_3D::size); Kokkos::parallel_for( - "MLS_EX::A_inv_computation", - Kokkos::RangePolicy(0, target_points_num), - KOKKOS_LAMBDA (const int i) { - for (int j = 0; j < MVPolynomialBasis_3D::size; j++) { - for (int k = 0; k < MVPolynomialBasis_3D::size; k++) { - a_inv(i, j, k) = (j == k) * 1.; - } - } - - // This needs to be done for every column - for (int j = 0; j < MVPolynomialBasis_3D::size; j++) { - - // We find the line with a non-negative element on column j - int k = j; - for (; k < MVPolynomialBasis_3D::size; k++) { - if (a(i, k, j) != 0.0) break; + "MLS_EX::A_inv_computation", + Kokkos::RangePolicy(0, target_points_num), + KOKKOS_LAMBDA(int const i) { + for (int j = 0; j < MVPolynomialBasis_3D::size; j++) + { + for (int k = 0; k < MVPolynomialBasis_3D::size; k++) + { + a_inv(i, j, k) = (j == k) * 1.; + } } - // We divide the line with said value - float tmp = a(i, k, j); - for (int l = 0; l < MVPolynomialBasis_3D::size; l++) { - a(i, k, l) /= tmp; - a_inv(i, k, l) /= tmp; - } + // This needs to be done for every column + for (int j = 0; j < MVPolynomialBasis_3D::size; j++) + { - // If line and column are not the same, move the column to the top - if (k != j) { - for (int l = 0; l < MVPolynomialBasis_3D::size; l++) { - float tmp = a(i, k, l); - a(i, k, l) = a(i, j, l); - a(i, j, l) = tmp; + // We find the line with a non-negative element on column j + int k = j; + for (; k < MVPolynomialBasis_3D::size; k++) + { + if (a(i, k, j) != 0.0) + break; + } - tmp = a_inv(i, k, l); - a_inv(i, k, l) = a_inv(i, j, l); - a_inv(i, j, l) = tmp; + // We divide the line with said value + float tmp = a(i, k, j); + for (int l = 0; l < MVPolynomialBasis_3D::size; l++) + { + a(i, k, l) /= tmp; + a_inv(i, k, l) /= tmp; } - } - // Now, set at zero all other elements of the column (Ll <- Ll - a*Lj) - for (int l = 0; l < MVPolynomialBasis_3D::size; l++) { - if (l == j || a(i, l, j) == 0.0) continue; - float mul = a(i, l, j); + // If line and column are not the same, move the column to the top + if (k != j) + { + for (int l = 0; l < MVPolynomialBasis_3D::size; l++) + { + float tmp = a(i, k, l); + a(i, k, l) = a(i, j, l); + a(i, j, l) = tmp; + + tmp = a_inv(i, k, l); + a_inv(i, k, l) = a_inv(i, j, l); + a_inv(i, j, l) = tmp; + } + } - for (int m = 0; m < MVPolynomialBasis_3D::size; m++) { - a(i, l, m) -= mul * a(i, j, m); - a_inv(i, l, m) -= mul * a_inv(i, j, m); + // Now, set at zero all other elements of the column (Ll <- Ll - a*Lj) + for (int l = 0; l < MVPolynomialBasis_3D::size; l++) + { + if (l == j || a(i, l, j) == 0.0) + continue; + float mul = a(i, l, j); + + for (int m = 0; m < MVPolynomialBasis_3D::size; m++) + { + a(i, l, m) -= mul * a(i, j, m); + a_inv(i, l, m) -= mul * a_inv(i, j, m); + } + a(i, l, j) = 0.0; } - a(i, l, j) = 0.0; - } - // Now a_inv should contain the inverse of a - } - }); + // Now a_inv should contain the inverse of a + } + }); // Compute the coefficients - auto coeffs = Kokkos::View( - "MLS_EX::coefficients", target_points_num, num_neighbors); + auto coeffs = Kokkos::View( + "MLS_EX::coefficients", target_points_num, num_neighbors); Kokkos::parallel_for( - "MLS_EX::coefficients_computation", - Kokkos::MDRangePolicy>( - {0, 0}, {target_points_num, num_neighbors}), - KOKKOS_LAMBDA (const int i, const int j) { - float tmp = 0; - - for (int k = 0; k < MVPolynomialBasis_3D::size; k++) { - tmp += a_inv(i, 0, k) * p(i, j, k) * phi(i, j); - } + "MLS_EX::coefficients_computation", + Kokkos::MDRangePolicy>( + {0, 0}, {target_points_num, num_neighbors}), + KOKKOS_LAMBDA(int const i, int const j) { + float tmp = 0; + + for (int k = 0; k < MVPolynomialBasis_3D::size; k++) + { + tmp += a_inv(i, 0, k) * p(i, j, k) * phi(i, j); + } - coeffs(i, j) = tmp; - }); + coeffs(i, j) = tmp; + }); // Compute source values - auto source_values = Kokkos::View( - "MLS_EX::source_values", source_points_num); + auto source_values = Kokkos::View( + "MLS_EX::source_values", source_points_num); Kokkos::parallel_for( - "MLS_EX::source_evaluation", - Kokkos::RangePolicy(0, source_points_num), - KOKKOS_LAMBDA (const int i) { - source_values(i) = manufactured_solution(source_points(i)); - }); + "MLS_EX::source_evaluation", + Kokkos::RangePolicy(0, source_points_num), + KOKKOS_LAMBDA(int const i) { + source_values(i) = manufactured_solution(source_points(i)); + }); // Compute target values via interpolation - auto target_values = Kokkos::View( - "MLS_EX::target_values", target_points_num); + auto target_values = Kokkos::View( + "MLS_EX::target_values", target_points_num); Kokkos::parallel_for( - "MLS_EX::target_interpolation", - Kokkos::RangePolicy(0, target_points_num), - KOKKOS_LAMBDA (const int i) { - float tmp = 0; - for (int j = offsets(i); j < offsets(i+i); j++) { - tmp += coeffs(i, j - offsets(i)) * source_values(indices(j)); - } - target_values(i) = tmp; - }); + "MLS_EX::target_interpolation", + Kokkos::RangePolicy(0, target_points_num), + KOKKOS_LAMBDA(int const i) { + float tmp = 0; + for (int j = offsets(i); j < offsets(i + i); j++) + { + tmp += coeffs(i, j - offsets(i)) * source_values(indices(j)); + } + target_values(i) = tmp; + }); // Compute target values via evaluation - auto target_values_exact = Kokkos::View( - "MLS_EX::target_values_exact", target_points_num); + auto target_values_exact = Kokkos::View( + "MLS_EX::target_values_exact", target_points_num); Kokkos::parallel_for( - "MLS_EX::target_evaluation", - Kokkos::RangePolicy(0, target_points_num), - KOKKOS_LAMBDA (const int i) { - target_values_exact(i) = manufactured_solution(target_points(i)); - }); + "MLS_EX::target_evaluation", + Kokkos::RangePolicy(0, target_points_num), + KOKKOS_LAMBDA(int const i) { + target_values_exact(i) = manufactured_solution(target_points(i)); + }); // Show difference - auto target_values_host = - Kokkos::create_mirror_view(target_values); + auto target_values_host = Kokkos::create_mirror_view(target_values); Kokkos::deep_copy(target_values_host, target_values); auto target_values_exact_host = - Kokkos::create_mirror_view(target_values_exact); + Kokkos::create_mirror_view(target_values_exact); Kokkos::deep_copy(target_values_exact_host, target_values_exact); - for (int i = 0; i < target_points_num; i++) { + for (int i = 0; i < target_points_num; i++) + { float error = - Kokkos::abs(target_values_host(i) - target_values_exact_host(i)); + Kokkos::abs(target_values_host(i) - target_values_exact_host(i)); std::cout << "==== Target " << i << '\n' << "Interpolation: " << target_values_host(i) << '\n' << "Real value : " << target_values_exact_host(i) << '\n' From 4f44e1c453c06cceb9b9161edecb9f5052dfc725 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Wed, 2 Aug 2023 11:46:57 -0400 Subject: [PATCH 09/44] Fixup (memory and kernel names, floats, error, exec space) --- examples/moving_least_squares/CMakeLists.txt | 6 +- .../moving_least_squares.cpp | 119 +++++++++--------- 2 files changed, 65 insertions(+), 60 deletions(-) diff --git a/examples/moving_least_squares/CMakeLists.txt b/examples/moving_least_squares/CMakeLists.txt index d9d9c6e45..a22bdbe30 100644 --- a/examples/moving_least_squares/CMakeLists.txt +++ b/examples/moving_least_squares/CMakeLists.txt @@ -1,3 +1,3 @@ -add_executable(ArborX_Example_MovingLeastSquare.exe moving_least_squares.cpp) -target_link_libraries(ArborX_Example_MovingLeastSquare.exe ArborX::ArborX) -add_test(NAME ArborX_Example_MovingLeastSquare COMMAND ArborX_Example_MovingLeastSquare.exe) +add_executable(ArborX_Example_MovingLeastSquares.exe moving_least_squares.cpp) +target_link_libraries(ArborX_Example_MovingLeastSquares.exe ArborX::ArborX) +add_test(NAME ArborX_Example_MovingLeastSquares COMMAND ArborX_Example_MovingLeastSquares.exe) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index af23ba5eb..847a9ac21 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -28,7 +28,7 @@ struct RBFWendland_0 KOKKOS_INLINE_FUNCTION float operator()(float x) { x /= _radius; - return (1. - x) * (1. - x); + return (1.f - x) * (1.f - x); } float _radius; @@ -41,7 +41,7 @@ struct MVPolynomialBasis_3D KOKKOS_INLINE_FUNCTION Kokkos::Array operator()(ArborX::Point const &p) const { - return {{1., p[0], p[1], p[2]}}; + return {{1.f, p[0], p[1], p[2]}}; } }; @@ -59,19 +59,21 @@ int main(int argc, char *argv[]) constexpr std::size_t source_points_num = cube_side * cube_side * cube_side; constexpr std::size_t target_points_num = 4; + ExecutionSpace space{}; + auto source_points = Kokkos::View( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "MLS_EX::source_points"), + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::source_points"), source_points_num); auto target_points = Kokkos::View( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "MLS_EX::target_points"), + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::target_points"), target_points_num); auto target_points_host = Kokkos::create_mirror_view(target_points); // Generate source points (Organized within a [-10, 10]^3 cube) Kokkos::parallel_for( - "MLS_EX::source_points_init", + "Example::source_points_init", Kokkos::MDRangePolicy>( - {0, 0, 0}, {cube_side, cube_side, cube_side}), + space, {0, 0, 0}, {cube_side, cube_side, cube_side}), KOKKOS_LAMBDA(int const i, int const j, int const k) { source_points(i * cube_side * cube_side + j * cube_side + k) = ArborX::Point{20.f * (float(i) / (cube_side - 1) - .5f), @@ -87,32 +89,32 @@ int main(int argc, char *argv[]) Kokkos::deep_copy(target_points, target_points_host); // Organize source points as tree - ArborX::BVH source_tree(ExecutionSpace{}, source_points); + ArborX::BVH source_tree(space, source_points); // Create the queries // For each target point we query the closest source points auto queries = Kokkos::View *, MemorySpace>( - "MLS_EX::queries", target_points_num); + "Example::queries", target_points_num); Kokkos::parallel_for( - "MLS_EX::make_queries", - Kokkos::RangePolicy(0, target_points_num), + "Example::make_queries", + Kokkos::RangePolicy(space, 0, target_points_num), KOKKOS_LAMBDA(int const i) { queries(i) = ArborX::nearest(target_points(i), num_neighbors); }); // Perform the query - auto indices = Kokkos::View("MLS_EX::indices", 0); - auto offsets = Kokkos::View("MLS_EX::offsets", 0); - source_tree.query(ExecutionSpace{}, queries, indices, offsets); + auto indices = Kokkos::View("Example::indices", 0); + auto offsets = Kokkos::View("Example::offsets", 0); + source_tree.query(space, queries, indices, offsets); // Now that we have the neighbors, we recompute their position using // their target point as the origin. // This is used as an optimisation later in the algorithm auto tr_source_points = Kokkos::View( - "MLS_EX::tr_source_points", target_points_num, num_neighbors); + "Example::tr_source_points", target_points_num, num_neighbors); Kokkos::parallel_for( - "MLS_EX::transform_source_points", - Kokkos::RangePolicy(0, target_points_num), + "Example::transform_source_points", + Kokkos::RangePolicy(space, 0, target_points_num), KOKKOS_LAMBDA(int const i) { for (int j = offsets(i); j < offsets(i + 1); j++) { @@ -126,49 +128,49 @@ int main(int argc, char *argv[]) // Compute the radii for the weight (phi) vector auto radii = - Kokkos::View("MLS_EX::radii", target_points_num); + Kokkos::View("Example::radii", target_points_num); constexpr float epsilon = std::numeric_limits::epsilon(); Kokkos::parallel_for( - "MLS_EX::radii_computation", - Kokkos::RangePolicy(0, target_points_num), + "Example::radii_computation", + Kokkos::RangePolicy(space, 0, target_points_num), KOKKOS_LAMBDA(int const i) { - float radius = 10. * epsilon; + float radius = 10.f * epsilon; for (int j = 0; j < num_neighbors; j++) { float norm = ArborX::Details::distance(tr_source_points(i, j), - ArborX::Point{0., 0., 0.}); + ArborX::Point{0.f, 0.f, 0.f}); radius = (radius < norm) ? norm : radius; } - radii(i) = 1.1 * radius; + radii(i) = 1.1f * radius; }); // Compute the weight (phi) vector auto phi = Kokkos::View( - "MLS_EX::phi", target_points_num, num_neighbors); + "Example::phi", target_points_num, num_neighbors); Kokkos::parallel_for( - "MLS_EX::phi_computation", - Kokkos::RangePolicy(0, phi.extent(0)), + "Example::phi_computation", + Kokkos::RangePolicy(space, 0, target_points_num), KOKKOS_LAMBDA(int const i) { auto rbf = RBFWendland_0{radii(i)}; - for (int j = 0; j < phi.extent(1); j++) + for (int j = 0; j < num_neighbors; j++) { float norm = ArborX::Details::distance(tr_source_points(i, j), - ArborX::Point{0., 0., 0.}); + ArborX::Point{0.f, 0.f, 0.f}); phi(i, j) = rbf(norm); } }); // Compute multivariable Vandermonde (P) matrix auto p = Kokkos::View( - "MLS_EX::vandermonde", target_points_num, num_neighbors, + "Example::vandermonde", target_points_num, num_neighbors, MVPolynomialBasis_3D::size); Kokkos::parallel_for( - "MLS_EX::vandermonde_computation", + "Example::vandermonde_computation", Kokkos::MDRangePolicy>( - {0, 0}, {target_points_num, num_neighbors}), + space, {0, 0}, {target_points_num, num_neighbors}), KOKKOS_LAMBDA(int const i, int const j) { auto basis = MVPolynomialBasis_3D{}(tr_source_points(i, j)); @@ -179,14 +181,15 @@ int main(int argc, char *argv[]) }); // Compute moment (A) matrix - auto a = Kokkos::View("MLS_EX::A", target_points_num, + auto a = Kokkos::View("Example::A", target_points_num, MVPolynomialBasis_3D::size, MVPolynomialBasis_3D::size); Kokkos::parallel_for( - "MLS_EX::A_computation", + "Example::A_computation", Kokkos::MDRangePolicy>( - {0, 0, 0}, {target_points_num, MVPolynomialBasis_3D::size, - MVPolynomialBasis_3D::size}), + space, {0, 0, 0}, + {target_points_num, MVPolynomialBasis_3D::size, + MVPolynomialBasis_3D::size}), KOKKOS_LAMBDA(int const i, int const j, int const k) { float tmp = 0; for (int l = 0; l < num_neighbors; l++) @@ -203,17 +206,17 @@ int main(int argc, char *argv[]) // Kind of works, errors out quite often. // A better method should be employed (SVD?) auto a_inv = Kokkos::View( - "MLS_EX::A_inv", target_points_num, MVPolynomialBasis_3D::size, + "Example::A_inv", target_points_num, MVPolynomialBasis_3D::size, MVPolynomialBasis_3D::size); Kokkos::parallel_for( - "MLS_EX::A_inv_computation", - Kokkos::RangePolicy(0, target_points_num), + "Example::A_inv_computation", + Kokkos::RangePolicy(space, 0, target_points_num), KOKKOS_LAMBDA(int const i) { for (int j = 0; j < MVPolynomialBasis_3D::size; j++) { for (int k = 0; k < MVPolynomialBasis_3D::size; k++) { - a_inv(i, j, k) = (j == k) * 1.; + a_inv(i, j, k) = (j == k) * 1.f; } } @@ -225,7 +228,7 @@ int main(int argc, char *argv[]) int k = j; for (; k < MVPolynomialBasis_3D::size; k++) { - if (a(i, k, j) != 0.0) + if (a(i, k, j) != 0.f) break; } @@ -255,7 +258,7 @@ int main(int argc, char *argv[]) // Now, set at zero all other elements of the column (Ll <- Ll - a*Lj) for (int l = 0; l < MVPolynomialBasis_3D::size; l++) { - if (l == j || a(i, l, j) == 0.0) + if (l == j || a(i, l, j) == 0.f) continue; float mul = a(i, l, j); @@ -264,7 +267,7 @@ int main(int argc, char *argv[]) a(i, l, m) -= mul * a(i, j, m); a_inv(i, l, m) -= mul * a_inv(i, j, m); } - a(i, l, j) = 0.0; + a(i, l, j) = 0.f; } // Now a_inv should contain the inverse of a @@ -273,11 +276,11 @@ int main(int argc, char *argv[]) // Compute the coefficients auto coeffs = Kokkos::View( - "MLS_EX::coefficients", target_points_num, num_neighbors); + "Example::coefficients", target_points_num, num_neighbors); Kokkos::parallel_for( - "MLS_EX::coefficients_computation", + "Example::coefficients_computation", Kokkos::MDRangePolicy>( - {0, 0}, {target_points_num, num_neighbors}), + space, {0, 0}, {target_points_num, num_neighbors}), KOKKOS_LAMBDA(int const i, int const j) { float tmp = 0; @@ -291,20 +294,20 @@ int main(int argc, char *argv[]) // Compute source values auto source_values = Kokkos::View( - "MLS_EX::source_values", source_points_num); + "Example::source_values", source_points_num); Kokkos::parallel_for( - "MLS_EX::source_evaluation", - Kokkos::RangePolicy(0, source_points_num), + "Example::source_evaluation", + Kokkos::RangePolicy(space, 0, source_points_num), KOKKOS_LAMBDA(int const i) { source_values(i) = manufactured_solution(source_points(i)); }); // Compute target values via interpolation auto target_values = Kokkos::View( - "MLS_EX::target_values", target_points_num); + "Example::target_values", target_points_num); Kokkos::parallel_for( - "MLS_EX::target_interpolation", - Kokkos::RangePolicy(0, target_points_num), + "Example::target_interpolation", + Kokkos::RangePolicy(space, 0, target_points_num), KOKKOS_LAMBDA(int const i) { float tmp = 0; for (int j = offsets(i); j < offsets(i + i); j++) @@ -316,10 +319,10 @@ int main(int argc, char *argv[]) // Compute target values via evaluation auto target_values_exact = Kokkos::View( - "MLS_EX::target_values_exact", target_points_num); + "Example::target_values_exact", target_points_num); Kokkos::parallel_for( - "MLS_EX::target_evaluation", - Kokkos::RangePolicy(0, target_points_num), + "Example::target_evaluation", + Kokkos::RangePolicy(space, 0, target_points_num), KOKKOS_LAMBDA(int const i) { target_values_exact(i) = manufactured_solution(target_points(i)); }); @@ -331,13 +334,15 @@ int main(int argc, char *argv[]) Kokkos::create_mirror_view(target_values_exact); Kokkos::deep_copy(target_values_exact_host, target_values_exact); + float error = 0.f; for (int i = 0; i < target_points_num; i++) { - float error = - Kokkos::abs(target_values_host(i) - target_values_exact_host(i)); + error = Kokkos::max( + Kokkos::abs(target_values_host(i) - target_values_exact_host(i)), + error); std::cout << "==== Target " << i << '\n' << "Interpolation: " << target_values_host(i) << '\n' - << "Real value : " << target_values_exact_host(i) << '\n' - << "Absolute err.: " << error << "\n====\n"; + << "Real value : " << target_values_exact_host(i) << '\n'; } + std::cout << "====\nMaximum error: " << error << std::endl; } From 15b150bc3b275cef382791a3d48ca42f3bc2d278 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Wed, 2 Aug 2023 13:35:41 -0400 Subject: [PATCH 10/44] Modifies predicates array into AccessTraits --- .../moving_least_squares.cpp | 38 ++++++++++++------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index 847a9ac21..4440e8ef7 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -45,6 +45,28 @@ struct MVPolynomialBasis_3D } }; +struct TargetPoints +{ + Kokkos::View target_points; + std::size_t num_neighbors; +}; + +template <> +struct ArborX::AccessTraits +{ + static KOKKOS_FUNCTION std::size_t size(TargetPoints const &tp) + { + return tp.target_points.extent(0); + } + + static KOKKOS_FUNCTION auto get(TargetPoints const &tp, std::size_t i) + { + return ArborX::nearest(tp.target_points(i), tp.num_neighbors); + } + + using memory_space = MemorySpace; +}; + // Function to approximate KOKKOS_INLINE_FUNCTION float manufactured_solution(ArborX::Point const &p) { @@ -54,7 +76,7 @@ KOKKOS_INLINE_FUNCTION float manufactured_solution(ArborX::Point const &p) int main(int argc, char *argv[]) { Kokkos::ScopeGuard guard(argc, argv); - constexpr std::size_t num_neighbors = 5; + constexpr std::size_t num_neighbors = 7; constexpr std::size_t cube_side = 4; constexpr std::size_t source_points_num = cube_side * cube_side * cube_side; constexpr std::size_t target_points_num = 4; @@ -91,21 +113,11 @@ int main(int argc, char *argv[]) // Organize source points as tree ArborX::BVH source_tree(space, source_points); - // Create the queries - // For each target point we query the closest source points - auto queries = Kokkos::View *, MemorySpace>( - "Example::queries", target_points_num); - Kokkos::parallel_for( - "Example::make_queries", - Kokkos::RangePolicy(space, 0, target_points_num), - KOKKOS_LAMBDA(int const i) { - queries(i) = ArborX::nearest(target_points(i), num_neighbors); - }); - // Perform the query auto indices = Kokkos::View("Example::indices", 0); auto offsets = Kokkos::View("Example::offsets", 0); - source_tree.query(space, queries, indices, offsets); + source_tree.query(space, TargetPoints{target_points, num_neighbors}, indices, + offsets); // Now that we have the neighbors, we recompute their position using // their target point as the origin. From d69f348ad305907b4014b975fe4f46d34d115cb7 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Wed, 2 Aug 2023 13:42:46 -0400 Subject: [PATCH 11/44] Correct declaration convention --- .../moving_least_squares.cpp | 47 +++++++++---------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index 4440e8ef7..182c66e90 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -83,10 +83,10 @@ int main(int argc, char *argv[]) ExecutionSpace space{}; - auto source_points = Kokkos::View( + Kokkos::View source_points( Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::source_points"), source_points_num); - auto target_points = Kokkos::View( + Kokkos::View target_points( Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::target_points"), target_points_num); auto target_points_host = Kokkos::create_mirror_view(target_points); @@ -114,15 +114,15 @@ int main(int argc, char *argv[]) ArborX::BVH source_tree(space, source_points); // Perform the query - auto indices = Kokkos::View("Example::indices", 0); - auto offsets = Kokkos::View("Example::offsets", 0); + Kokkos::View indices("Example::indices", 0); + Kokkos::View offsets("Example::offsets", 0); source_tree.query(space, TargetPoints{target_points, num_neighbors}, indices, offsets); // Now that we have the neighbors, we recompute their position using // their target point as the origin. // This is used as an optimisation later in the algorithm - auto tr_source_points = Kokkos::View( + Kokkos::View tr_source_points( "Example::tr_source_points", target_points_num, num_neighbors); Kokkos::parallel_for( "Example::transform_source_points", @@ -139,8 +139,7 @@ int main(int argc, char *argv[]) }); // Compute the radii for the weight (phi) vector - auto radii = - Kokkos::View("Example::radii", target_points_num); + Kokkos::View radii("Example::radii", target_points_num); constexpr float epsilon = std::numeric_limits::epsilon(); Kokkos::parallel_for( "Example::radii_computation", @@ -159,13 +158,13 @@ int main(int argc, char *argv[]) }); // Compute the weight (phi) vector - auto phi = Kokkos::View( - "Example::phi", target_points_num, num_neighbors); + Kokkos::View phi("Example::phi", target_points_num, + num_neighbors); Kokkos::parallel_for( "Example::phi_computation", Kokkos::RangePolicy(space, 0, target_points_num), KOKKOS_LAMBDA(int const i) { - auto rbf = RBFWendland_0{radii(i)}; + RBFWendland_0 rbf{radii(i)}; for (int j = 0; j < num_neighbors; j++) { @@ -176,9 +175,9 @@ int main(int argc, char *argv[]) }); // Compute multivariable Vandermonde (P) matrix - auto p = Kokkos::View( - "Example::vandermonde", target_points_num, num_neighbors, - MVPolynomialBasis_3D::size); + Kokkos::View p("Example::vandermonde", + target_points_num, num_neighbors, + MVPolynomialBasis_3D::size); Kokkos::parallel_for( "Example::vandermonde_computation", Kokkos::MDRangePolicy>( @@ -193,9 +192,9 @@ int main(int argc, char *argv[]) }); // Compute moment (A) matrix - auto a = Kokkos::View("Example::A", target_points_num, - MVPolynomialBasis_3D::size, - MVPolynomialBasis_3D::size); + Kokkos::View a("Example::A", target_points_num, + MVPolynomialBasis_3D::size, + MVPolynomialBasis_3D::size); Kokkos::parallel_for( "Example::A_computation", Kokkos::MDRangePolicy>( @@ -217,7 +216,7 @@ int main(int argc, char *argv[]) // first one are applied to the second // Kind of works, errors out quite often. // A better method should be employed (SVD?) - auto a_inv = Kokkos::View( + Kokkos::View a_inv( "Example::A_inv", target_points_num, MVPolynomialBasis_3D::size, MVPolynomialBasis_3D::size); Kokkos::parallel_for( @@ -287,8 +286,8 @@ int main(int argc, char *argv[]) }); // Compute the coefficients - auto coeffs = Kokkos::View( - "Example::coefficients", target_points_num, num_neighbors); + Kokkos::View coeffs("Example::coefficients", + target_points_num, num_neighbors); Kokkos::parallel_for( "Example::coefficients_computation", Kokkos::MDRangePolicy>( @@ -305,8 +304,8 @@ int main(int argc, char *argv[]) }); // Compute source values - auto source_values = Kokkos::View( - "Example::source_values", source_points_num); + Kokkos::View source_values("Example::source_values", + source_points_num); Kokkos::parallel_for( "Example::source_evaluation", Kokkos::RangePolicy(space, 0, source_points_num), @@ -315,8 +314,8 @@ int main(int argc, char *argv[]) }); // Compute target values via interpolation - auto target_values = Kokkos::View( - "Example::target_values", target_points_num); + Kokkos::View target_values("Example::target_values", + target_points_num); Kokkos::parallel_for( "Example::target_interpolation", Kokkos::RangePolicy(space, 0, target_points_num), @@ -330,7 +329,7 @@ int main(int argc, char *argv[]) }); // Compute target values via evaluation - auto target_values_exact = Kokkos::View( + Kokkos::View target_values_exact( "Example::target_values_exact", target_points_num); Kokkos::parallel_for( "Example::target_evaluation", From f6cd686882a2d99119a07ecee169237521f104f5 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Wed, 2 Aug 2023 15:15:11 -0400 Subject: [PATCH 12/44] Typo fix, execution spaces in deep copies and range policies simplification --- .../moving_least_squares.cpp | 49 ++++++++++--------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index 182c66e90..9962c0b21 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -36,12 +36,13 @@ struct RBFWendland_0 struct MVPolynomialBasis_3D { - static constexpr std::size_t size = 4; + static constexpr std::size_t size = 10; KOKKOS_INLINE_FUNCTION Kokkos::Array operator()(ArborX::Point const &p) const { - return {{1.f, p[0], p[1], p[2]}}; + return {{1.f, p[0], p[1], p[2], p[0] * p[0], p[0] * p[1], p[0] * p[2], + p[1] * p[1], p[1] * p[2], p[2] * p[2]}}; } }; @@ -70,13 +71,13 @@ struct ArborX::AccessTraits // Function to approximate KOKKOS_INLINE_FUNCTION float manufactured_solution(ArborX::Point const &p) { - return p[2] + p[0]; + return p[2] * p[1] + p[0]; } int main(int argc, char *argv[]) { Kokkos::ScopeGuard guard(argc, argv); - constexpr std::size_t num_neighbors = 7; + constexpr std::size_t num_neighbors = 10; constexpr std::size_t cube_side = 4; constexpr std::size_t source_points_num = cube_side * cube_side * cube_side; constexpr std::size_t target_points_num = 4; @@ -94,8 +95,8 @@ int main(int argc, char *argv[]) // Generate source points (Organized within a [-10, 10]^3 cube) Kokkos::parallel_for( "Example::source_points_init", - Kokkos::MDRangePolicy>( - space, {0, 0, 0}, {cube_side, cube_side, cube_side}), + Kokkos::MDRangePolicy>(space, {0, 0, 0}, + {cube_side, cube_side, cube_side}), KOKKOS_LAMBDA(int const i, int const j, int const k) { source_points(i * cube_side * cube_side + j * cube_side + k) = ArborX::Point{20.f * (float(i) / (cube_side - 1) - .5f), @@ -104,11 +105,11 @@ int main(int argc, char *argv[]) }); // Generate target points - target_points_host(0) = ArborX::Point{0.f, 0.f, 0.f}; + target_points_host(0) = ArborX::Point{1.f, 0.f, 1.f}; target_points_host(1) = ArborX::Point{5.f, 5.f, 5.f}; target_points_host(2) = ArborX::Point{-5.f, 5.f, 3.f}; target_points_host(3) = ArborX::Point{1.f, -3.3f, 7.f}; - Kokkos::deep_copy(target_points, target_points_host); + Kokkos::deep_copy(space, target_points, target_points_host); // Organize source points as tree ArborX::BVH source_tree(space, source_points); @@ -126,7 +127,7 @@ int main(int argc, char *argv[]) "Example::tr_source_points", target_points_num, num_neighbors); Kokkos::parallel_for( "Example::transform_source_points", - Kokkos::RangePolicy(space, 0, target_points_num), + Kokkos::RangePolicy(space, 0, target_points_num), KOKKOS_LAMBDA(int const i) { for (int j = offsets(i); j < offsets(i + 1); j++) { @@ -143,7 +144,7 @@ int main(int argc, char *argv[]) constexpr float epsilon = std::numeric_limits::epsilon(); Kokkos::parallel_for( "Example::radii_computation", - Kokkos::RangePolicy(space, 0, target_points_num), + Kokkos::RangePolicy(space, 0, target_points_num), KOKKOS_LAMBDA(int const i) { float radius = 10.f * epsilon; @@ -162,7 +163,7 @@ int main(int argc, char *argv[]) num_neighbors); Kokkos::parallel_for( "Example::phi_computation", - Kokkos::RangePolicy(space, 0, target_points_num), + Kokkos::RangePolicy(space, 0, target_points_num), KOKKOS_LAMBDA(int const i) { RBFWendland_0 rbf{radii(i)}; @@ -180,7 +181,7 @@ int main(int argc, char *argv[]) MVPolynomialBasis_3D::size); Kokkos::parallel_for( "Example::vandermonde_computation", - Kokkos::MDRangePolicy>( + Kokkos::MDRangePolicy>( space, {0, 0}, {target_points_num, num_neighbors}), KOKKOS_LAMBDA(int const i, int const j) { auto basis = MVPolynomialBasis_3D{}(tr_source_points(i, j)); @@ -197,10 +198,10 @@ int main(int argc, char *argv[]) MVPolynomialBasis_3D::size); Kokkos::parallel_for( "Example::A_computation", - Kokkos::MDRangePolicy>( - space, {0, 0, 0}, - {target_points_num, MVPolynomialBasis_3D::size, - MVPolynomialBasis_3D::size}), + Kokkos::MDRangePolicy>(space, {0, 0, 0}, + {target_points_num, + MVPolynomialBasis_3D::size, + MVPolynomialBasis_3D::size}), KOKKOS_LAMBDA(int const i, int const j, int const k) { float tmp = 0; for (int l = 0; l < num_neighbors; l++) @@ -221,7 +222,7 @@ int main(int argc, char *argv[]) MVPolynomialBasis_3D::size); Kokkos::parallel_for( "Example::A_inv_computation", - Kokkos::RangePolicy(space, 0, target_points_num), + Kokkos::RangePolicy(space, 0, target_points_num), KOKKOS_LAMBDA(int const i) { for (int j = 0; j < MVPolynomialBasis_3D::size; j++) { @@ -290,7 +291,7 @@ int main(int argc, char *argv[]) target_points_num, num_neighbors); Kokkos::parallel_for( "Example::coefficients_computation", - Kokkos::MDRangePolicy>( + Kokkos::MDRangePolicy>( space, {0, 0}, {target_points_num, num_neighbors}), KOKKOS_LAMBDA(int const i, int const j) { float tmp = 0; @@ -308,7 +309,7 @@ int main(int argc, char *argv[]) source_points_num); Kokkos::parallel_for( "Example::source_evaluation", - Kokkos::RangePolicy(space, 0, source_points_num), + Kokkos::RangePolicy(space, 0, source_points_num), KOKKOS_LAMBDA(int const i) { source_values(i) = manufactured_solution(source_points(i)); }); @@ -318,10 +319,10 @@ int main(int argc, char *argv[]) target_points_num); Kokkos::parallel_for( "Example::target_interpolation", - Kokkos::RangePolicy(space, 0, target_points_num), + Kokkos::RangePolicy(space, 0, target_points_num), KOKKOS_LAMBDA(int const i) { float tmp = 0; - for (int j = offsets(i); j < offsets(i + i); j++) + for (int j = offsets(i); j < offsets(i + 1); j++) { tmp += coeffs(i, j - offsets(i)) * source_values(indices(j)); } @@ -333,17 +334,17 @@ int main(int argc, char *argv[]) "Example::target_values_exact", target_points_num); Kokkos::parallel_for( "Example::target_evaluation", - Kokkos::RangePolicy(space, 0, target_points_num), + Kokkos::RangePolicy(space, 0, target_points_num), KOKKOS_LAMBDA(int const i) { target_values_exact(i) = manufactured_solution(target_points(i)); }); // Show difference auto target_values_host = Kokkos::create_mirror_view(target_values); - Kokkos::deep_copy(target_values_host, target_values); + Kokkos::deep_copy(space, target_values_host, target_values); auto target_values_exact_host = Kokkos::create_mirror_view(target_values_exact); - Kokkos::deep_copy(target_values_exact_host, target_values_exact); + Kokkos::deep_copy(space, target_values_exact_host, target_values_exact); float error = 0.f; for (int i = 0; i < target_points_num; i++) From 793a5f228858c972fcb88771063d3250cb5fc4ca Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Thu, 3 Aug 2023 16:26:57 -0400 Subject: [PATCH 13/44] Switching from gaussian inverse to SVD --- .../moving_least_squares.cpp | 167 +++++++++++++----- 1 file changed, 122 insertions(+), 45 deletions(-) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index 9962c0b21..8daa56a9e 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -18,6 +18,7 @@ #include +#include #include using ExecutionSpace = Kokkos::DefaultExecutionSpace; @@ -77,8 +78,10 @@ KOKKOS_INLINE_FUNCTION float manufactured_solution(ArborX::Point const &p) int main(int argc, char *argv[]) { Kokkos::ScopeGuard guard(argc, argv); - constexpr std::size_t num_neighbors = 10; - constexpr std::size_t cube_side = 4; + + constexpr float epsilon = std::numeric_limits::epsilon(); + constexpr std::size_t num_neighbors = 20; + constexpr std::size_t cube_side = 10; constexpr std::size_t source_points_num = cube_side * cube_side * cube_side; constexpr std::size_t target_points_num = 4; @@ -141,7 +144,6 @@ int main(int argc, char *argv[]) // Compute the radii for the weight (phi) vector Kokkos::View radii("Example::radii", target_points_num); - constexpr float epsilon = std::numeric_limits::epsilon(); Kokkos::parallel_for( "Example::radii_computation", Kokkos::RangePolicy(space, 0, target_points_num), @@ -212,14 +214,22 @@ int main(int argc, char *argv[]) a(i, j, k) = tmp; }); - // Inverse moment matrix - // Gaussian inverse method. Both matrix are used and modifications on the - // first one are applied to the second - // Kind of works, errors out quite often. - // A better method should be employed (SVD?) + // Pseudo-inverse moment matrix using SVD + // We must find U, E (diagonal and positive) and V such that A = U.E.V^T + // We also know that A is symmetric (by construction), so U = SV where S is + // a sign matrix (only 1 or -1 in the diagonal, 0 elsewhere). + // Thus A = U.E.S.U^T + static constexpr float pi_4 = M_PI_4; Kokkos::View a_inv( "Example::A_inv", target_points_num, MVPolynomialBasis_3D::size, MVPolynomialBasis_3D::size); + Kokkos::View svd_u( + "Example::SVD::U", target_points_num, MVPolynomialBasis_3D::size, + MVPolynomialBasis_3D::size); + Kokkos::View svd_es( + "Example::SVD::E.S", target_points_num, MVPolynomialBasis_3D::size, + MVPolynomialBasis_3D::size); + Kokkos::deep_copy(space, svd_es, a); Kokkos::parallel_for( "Example::A_inv_computation", Kokkos::RangePolicy(space, 0, target_points_num), @@ -228,61 +238,128 @@ int main(int argc, char *argv[]) { for (int k = 0; k < MVPolynomialBasis_3D::size; k++) { - a_inv(i, j, k) = (j == k) * 1.f; + svd_u(i, j, k) = (j == k) * 1.f; } } - // This needs to be done for every column - for (int j = 0; j < MVPolynomialBasis_3D::size; j++) - { - - // We find the line with a non-negative element on column j - int k = j; - for (; k < MVPolynomialBasis_3D::size; k++) + // This finds the biggest off-diagonal value of E.S as well as its + // coordinates. Being symmetric, we can always check on the upper + // triangle (and always have q > p) + auto argmax = [=](int &p, int &q) { + float max = 0.f; + p = -1; + q = -1; + for (int j = 0; j < MVPolynomialBasis_3D::size; j++) { - if (a(i, k, j) != 0.f) - break; + for (int k = j + 1; k < MVPolynomialBasis_3D::size; k++) + { + float val = Kokkos::abs(svd_es(i, j, k)); + if (max < val) + { + max = val; + p = j; + q = k; + } + } } - // We divide the line with said value - float tmp = a(i, k, j); - for (int l = 0; l < MVPolynomialBasis_3D::size; l++) + return max; + }; + + // Iterative approach, we will "deconstruct" E.S until only the diagonal + // is relevent inside the matrix + // It is possible to prove that, at each step, the "norm" of the matrix + // is strictly less that of the previous + int p, q; + float norm = argmax(p, q); + while (norm > epsilon) + { + // Our submatrix is now + // +----------+----------+ +---+---+ + // | es(p, p) | es(p, q) | | a | b | + // +----------+----------+ = +---+---+ + // | es(q, p) | es(q, q) | | b | c | + // +----------+----------+ +---+---+ + float a = svd_es(i, p, p); + float b = svd_es(i, p, q); + float c = svd_es(i, q, q); + + float theta, u, v; + if (a == c) { - a(i, k, l) /= tmp; - a_inv(i, k, l) /= tmp; + theta = pi_4; + u = a + b; + v = a - b; } + else + { + theta = .5f * Kokkos::atanf((2.f * b) / (a - c)); + float cos2 = Kokkos::cosf(2.f * theta); + u = .5f * (a + c + (a - c) / cos2); + v = .5f * (a + c - (a - c) / cos2); + } + float cos = Kokkos::cosf(theta); + float sin = Kokkos::sinf(theta); - // If line and column are not the same, move the column to the top - if (k != j) + // We must now apply the rotation matrix to the left + // and right of E.S and on the right of U + + // Left of E.S (mult by R(theta)^T) + for (int j = 0; j < MVPolynomialBasis_3D::size; j++) { - for (int l = 0; l < MVPolynomialBasis_3D::size; l++) - { - float tmp = a(i, k, l); - a(i, k, l) = a(i, j, l); - a(i, j, l) = tmp; + float es_ipj = svd_es(i, p, j); + float es_iqj = svd_es(i, q, j); + svd_es(i, p, j) = cos * es_ipj + sin * es_iqj; + svd_es(i, q, j) = -sin * es_ipj + cos * es_iqj; + } - tmp = a_inv(i, k, l); - a_inv(i, k, l) = a_inv(i, j, l); - a_inv(i, j, l) = tmp; - } + // Right of E.S (mult by R(theta)) + for (int j = 0; j < MVPolynomialBasis_3D::size; j++) + { + float es_ijp = svd_es(i, j, p); + float es_ijq = svd_es(i, j, q); + svd_es(i, j, p) = cos * es_ijp + sin * es_ijq; + svd_es(i, j, q) = -sin * es_ijp + cos * es_ijq; } - // Now, set at zero all other elements of the column (Ll <- Ll - a*Lj) - for (int l = 0; l < MVPolynomialBasis_3D::size; l++) + // Right of U (mult by R(theta)) + for (int j = 0; j < MVPolynomialBasis_3D::size; j++) { - if (l == j || a(i, l, j) == 0.f) - continue; - float mul = a(i, l, j); + float u_ijp = svd_u(i, j, p); + float u_ijq = svd_u(i, j, q); + svd_u(i, j, p) = cos * u_ijp + sin * u_ijq; + svd_u(i, j, q) = -sin * u_ijp + cos * u_ijq; + } + + // These should theorically hold but is it ok to force them to their + // real value? + svd_es(i, p, p) = u; + svd_es(i, q, q) = v; + svd_es(i, p, q) = 0.f; + svd_es(i, q, p) = 0.f; + + norm = argmax(p, q); + } - for (int m = 0; m < MVPolynomialBasis_3D::size; m++) + // We should now have a correct U and E.S + // We'll compute the pseudo inverse of A by taking the + // pseudo inverse of E.S which is simply inverting the diagonal of + // E.S + for (int j = 0; j < MVPolynomialBasis_3D::size; j++) + { + for (int k = 0; k < MVPolynomialBasis_3D::size; k++) + { + float value = 0.; + for (int l = 0; l < MVPolynomialBasis_3D::size; l++) { - a(i, l, m) -= mul * a(i, j, m); - a_inv(i, l, m) -= mul * a_inv(i, j, m); + if (Kokkos::abs(svd_es(i, l, l)) >= epsilon) + { + value += svd_u(i, j, l) * svd_u(i, l, k) / svd_es(i, l, l); + } } - a(i, l, j) = 0.f; - } - // Now a_inv should contain the inverse of a + a_inv(i, j, k) = value; + } } }); From 15ca7a18cb4449acec1ff440381c989d122c0131 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Fri, 4 Aug 2023 11:00:06 -0400 Subject: [PATCH 14/44] Specifying ExecutionSpace in RangePolicies --- .../moving_least_squares/moving_least_squares.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index 8daa56a9e..ba563753e 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -130,7 +130,7 @@ int main(int argc, char *argv[]) "Example::tr_source_points", target_points_num, num_neighbors); Kokkos::parallel_for( "Example::transform_source_points", - Kokkos::RangePolicy(space, 0, target_points_num), + Kokkos::RangePolicy(space, 0, target_points_num), KOKKOS_LAMBDA(int const i) { for (int j = offsets(i); j < offsets(i + 1); j++) { @@ -146,7 +146,7 @@ int main(int argc, char *argv[]) Kokkos::View radii("Example::radii", target_points_num); Kokkos::parallel_for( "Example::radii_computation", - Kokkos::RangePolicy(space, 0, target_points_num), + Kokkos::RangePolicy(space, 0, target_points_num), KOKKOS_LAMBDA(int const i) { float radius = 10.f * epsilon; @@ -165,7 +165,7 @@ int main(int argc, char *argv[]) num_neighbors); Kokkos::parallel_for( "Example::phi_computation", - Kokkos::RangePolicy(space, 0, target_points_num), + Kokkos::RangePolicy(space, 0, target_points_num), KOKKOS_LAMBDA(int const i) { RBFWendland_0 rbf{radii(i)}; @@ -232,7 +232,7 @@ int main(int argc, char *argv[]) Kokkos::deep_copy(space, svd_es, a); Kokkos::parallel_for( "Example::A_inv_computation", - Kokkos::RangePolicy(space, 0, target_points_num), + Kokkos::RangePolicy(space, 0, target_points_num), KOKKOS_LAMBDA(int const i) { for (int j = 0; j < MVPolynomialBasis_3D::size; j++) { @@ -386,7 +386,7 @@ int main(int argc, char *argv[]) source_points_num); Kokkos::parallel_for( "Example::source_evaluation", - Kokkos::RangePolicy(space, 0, source_points_num), + Kokkos::RangePolicy(space, 0, source_points_num), KOKKOS_LAMBDA(int const i) { source_values(i) = manufactured_solution(source_points(i)); }); @@ -396,7 +396,7 @@ int main(int argc, char *argv[]) target_points_num); Kokkos::parallel_for( "Example::target_interpolation", - Kokkos::RangePolicy(space, 0, target_points_num), + Kokkos::RangePolicy(space, 0, target_points_num), KOKKOS_LAMBDA(int const i) { float tmp = 0; for (int j = offsets(i); j < offsets(i + 1); j++) @@ -411,7 +411,7 @@ int main(int argc, char *argv[]) "Example::target_values_exact", target_points_num); Kokkos::parallel_for( "Example::target_evaluation", - Kokkos::RangePolicy(space, 0, target_points_num), + Kokkos::RangePolicy(space, 0, target_points_num), KOKKOS_LAMBDA(int const i) { target_values_exact(i) = manufactured_solution(target_points(i)); }); From 62de5ed3c817e9971ee53cd711cd6e3efc3e2d69 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Mon, 7 Aug 2023 11:51:24 -0400 Subject: [PATCH 15/44] Fixing wrong SVD calculation --- .../moving_least_squares/moving_least_squares.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index ba563753e..046f17bf7 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -72,7 +72,7 @@ struct ArborX::AccessTraits // Function to approximate KOKKOS_INLINE_FUNCTION float manufactured_solution(ArborX::Point const &p) { - return p[2] * p[1] + p[0]; + return Kokkos::sin(p[0]) * p[2] + p[1]; } int main(int argc, char *argv[]) @@ -80,7 +80,7 @@ int main(int argc, char *argv[]) Kokkos::ScopeGuard guard(argc, argv); constexpr float epsilon = std::numeric_limits::epsilon(); - constexpr std::size_t num_neighbors = 20; + constexpr std::size_t num_neighbors = MVPolynomialBasis_3D::size; constexpr std::size_t cube_side = 10; constexpr std::size_t source_points_num = cube_side * cube_side * cube_side; constexpr std::size_t target_points_num = 4; @@ -344,17 +344,17 @@ int main(int argc, char *argv[]) // We should now have a correct U and E.S // We'll compute the pseudo inverse of A by taking the // pseudo inverse of E.S which is simply inverting the diagonal of - // E.S + // E.S. We have pseudoA = U^T.pseudoES.U for (int j = 0; j < MVPolynomialBasis_3D::size; j++) { for (int k = 0; k < MVPolynomialBasis_3D::size; k++) { - float value = 0.; + float value = 0.f; for (int l = 0; l < MVPolynomialBasis_3D::size; l++) { if (Kokkos::abs(svd_es(i, l, l)) >= epsilon) { - value += svd_u(i, j, l) * svd_u(i, l, k) / svd_es(i, l, l); + value += svd_u(i, j, l) * svd_u(i, k, l) / svd_es(i, l, l); } } @@ -371,7 +371,7 @@ int main(int argc, char *argv[]) Kokkos::MDRangePolicy>( space, {0, 0}, {target_points_num, num_neighbors}), KOKKOS_LAMBDA(int const i, int const j) { - float tmp = 0; + float tmp = 0.f; for (int k = 0; k < MVPolynomialBasis_3D::size; k++) { From e7f7918d8a8981278ec60c083162a3afb3b10347 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Tue, 8 Aug 2023 15:25:25 -0400 Subject: [PATCH 16/44] Adding MPI (unstable) --- .../moving_least_squares.cpp | 192 ++++++++++++++++-- 1 file changed, 172 insertions(+), 20 deletions(-) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index 046f17bf7..72a046320 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -20,9 +20,13 @@ #include #include +#include + +#include using ExecutionSpace = Kokkos::DefaultExecutionSpace; using MemorySpace = ExecutionSpace::memory_space; +using DeviceSpace = Kokkos::Device; struct RBFWendland_0 { @@ -69,6 +73,23 @@ struct ArborX::AccessTraits using memory_space = MemorySpace; }; +/* +0: ==== Target 0 +0: Interpolation: 0.717408 +0: Real value : 0.841471 +0: ==== Target 1 +0: Interpolation: 0.210617 +0: Real value : 0.205379 +0: ==== Target 2 +0: Interpolation: 7.36529 +0: Real value : 7.87677 +0: ==== Target 3 +0: Interpolation: 1.41947 +0: Real value : 2.5903 +0: ==== +0: Maximum error: 1.17083 +*/ + // Function to approximate KOKKOS_INLINE_FUNCTION float manufactured_solution(ArborX::Point const &p) { @@ -77,19 +98,26 @@ KOKKOS_INLINE_FUNCTION float manufactured_solution(ArborX::Point const &p) int main(int argc, char *argv[]) { + MPI_Init(&argc, &argv); Kokkos::ScopeGuard guard(argc, argv); constexpr float epsilon = std::numeric_limits::epsilon(); constexpr std::size_t num_neighbors = MVPolynomialBasis_3D::size; - constexpr std::size_t cube_side = 10; + constexpr std::size_t cube_side = 20; constexpr std::size_t source_points_num = cube_side * cube_side * cube_side; constexpr std::size_t target_points_num = 4; ExecutionSpace space{}; + MPI_Comm mpi_comm = MPI_COMM_WORLD; + int mpi_size, mpi_rank; + MPI_Comm_size(mpi_comm, &mpi_size); + MPI_Comm_rank(mpi_comm, &mpi_rank); + + std::size_t local_source_points_num = source_points_num / mpi_size; Kokkos::View source_points( Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::source_points"), - source_points_num); + local_source_points_num); Kokkos::View target_points( Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::target_points"), target_points_num); @@ -98,13 +126,14 @@ int main(int argc, char *argv[]) // Generate source points (Organized within a [-10, 10]^3 cube) Kokkos::parallel_for( "Example::source_points_init", - Kokkos::MDRangePolicy>(space, {0, 0, 0}, - {cube_side, cube_side, cube_side}), + Kokkos::MDRangePolicy>( + space, {0, 0, 0}, {cube_side, cube_side, cube_side / mpi_size}), KOKKOS_LAMBDA(int const i, int const j, int const k) { source_points(i * cube_side * cube_side + j * cube_side + k) = ArborX::Point{20.f * (float(i) / (cube_side - 1) - .5f), 20.f * (float(j) / (cube_side - 1) - .5f), - 20.f * (float(k) / (cube_side - 1) - .5f)}; + 20.f * (float(k) / (cube_side - 1) - .5f + + (float(mpi_rank) / mpi_size))}; }); // Generate target points @@ -115,13 +144,102 @@ int main(int argc, char *argv[]) Kokkos::deep_copy(space, target_points, target_points_host); // Organize source points as tree - ArborX::BVH source_tree(space, source_points); + ArborX::DistributedTree source_tree(mpi_comm, space, + source_points); - // Perform the query - Kokkos::View indices("Example::indices", 0); + // Perform the query and split the indices/ranks + Kokkos::View *, MemorySpace> index_ranks( + "Example::index_ranks", 0); Kokkos::View offsets("Example::offsets", 0); - source_tree.query(space, TargetPoints{target_points, num_neighbors}, indices, - offsets); + source_tree.query(space, TargetPoints{target_points, num_neighbors}, + index_ranks, offsets); + Kokkos::View local_indices( + "Example::local_indices", target_points_num * num_neighbors); + Kokkos::View local_ranks( + "Example::local_ranks", target_points_num * num_neighbors); + Kokkos::parallel_for( + "Example::index_ranks_split", + Kokkos::RangePolicy(space, 0, + target_points_num * num_neighbors), + KOKKOS_LAMBDA(int const i) { + local_indices(i) = index_ranks(i).first; + local_ranks(i) = index_ranks(i).second; + }); + + // Before moving on, we must gather the coordinates of all the requested + // source points. DTK does that by distributing in a "who wants what" matter + // The distribution is done in two phases. A first pass where every process + // receives the information on "who wants what" from them. Then a second pass + // is done where values are set up and sent back to processes + + // First pass setup + ArborX::Details::Distributor distributor_first(mpi_comm); + int const local_requests_num = + distributor_first.createFromSends(space, local_ranks); + + // "Middlemen" buffers + // - mpi_mid_in_indices(i) corresponds to an index that zill be used to + // construct the final value + // - mpi_mid_rank(i) corresponds to the request origin for value (i) + // - mpi_mid_indices(i) corresponds to the point's index in the nn query + // from which mpi_mid_points(i) is attached to + Kokkos::View mpi_mid_in_indices( + "Example::mpi_mid_in_indices", local_requests_num); + Kokkos::View mpi_mid_indices("Example::mpi_mid_indices", + local_requests_num); + Kokkos::View mpi_mid_ranks("Example::mpi_mid_ranks", + local_requests_num); + Kokkos::View mpi_mid_points( + "Example::mpi_mid_points", local_requests_num); + + // First pass comms + Kokkos::View mpi_tmp("Example::mpi_tmp", + target_points_num * num_neighbors); + ArborX::iota(space, mpi_tmp); + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + space, distributor_first, mpi_tmp, mpi_mid_in_indices); + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + space, distributor_first, local_indices, mpi_mid_indices); + Kokkos::deep_copy(space, mpi_tmp, mpi_rank); + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + space, distributor_first, mpi_tmp, mpi_mid_ranks); + Kokkos::parallel_for( + "Example::mpi_mid_points_fill", + Kokkos::RangePolicy(space, 0, local_requests_num), + KOKKOS_LAMBDA(int const i) { + mpi_mid_points(i) = source_points(mpi_mid_indices(i)); + }); + + // This process now knows "who wants what" and is ready to send everything + // back + + // Second pass setup + ArborX::Details::Distributor distributor_second(mpi_comm); + int const local_responses_num = + distributor_second.createFromSends(space, mpi_mid_ranks); + Kokkos::View local_untreated_source_points( + "Example::local_untreated_source_points", + target_points_num * num_neighbors); + // We have local_responses_num == target_points_num * num_neighbors + + // Temporary buffers + Kokkos::View mpi_tmp_in_indices( + "Examples::mpi_tmp_in_indices", local_responses_num); + Kokkos::View mpi_tmp_points( + "Examples::mpi_tmp_points", local_responses_num); + + // Second pass comms + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + space, distributor_second, mpi_mid_points, mpi_tmp_points); + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + space, distributor_second, mpi_mid_in_indices, mpi_tmp_in_indices); + Kokkos::parallel_for( + "Example::local_untreated_source_points_fill", + Kokkos::RangePolicy(space, 0, local_responses_num), + KOKKOS_LAMBDA(int const i) { + local_untreated_source_points(mpi_tmp_in_indices(i)) = + mpi_tmp_points(i); + }); // Now that we have the neighbors, we recompute their position using // their target point as the origin. @@ -135,9 +253,9 @@ int main(int argc, char *argv[]) for (int j = offsets(i); j < offsets(i + 1); j++) { tr_source_points(i, j - offsets(i)) = ArborX::Point{ - source_points(indices(j))[0] - target_points(i)[0], - source_points(indices(j))[1] - target_points(i)[1], - source_points(indices(j))[2] - target_points(i)[2], + local_untreated_source_points(j)[0] - target_points(i)[0], + local_untreated_source_points(j)[1] - target_points(i)[1], + local_untreated_source_points(j)[2] - target_points(i)[2], }; } }); @@ -383,14 +501,40 @@ int main(int argc, char *argv[]) // Compute source values Kokkos::View source_values("Example::source_values", - source_points_num); + local_source_points_num); Kokkos::parallel_for( "Example::source_evaluation", - Kokkos::RangePolicy(space, 0, source_points_num), + Kokkos::RangePolicy(space, 0, local_source_points_num), KOKKOS_LAMBDA(int const i) { source_values(i) = manufactured_solution(source_points(i)); }); + // To approximate the function, we have to gather the correct source values + // We have to redo part of the earlier passes + Kokkos::View mpi_mid_values("Example::mpi_mid_values", + local_requests_num); + Kokkos::parallel_for( + "Example::mpi_mid_values_fill", + Kokkos::RangePolicy(space, 0, local_requests_num), + KOKKOS_LAMBDA(int const i) { + mpi_mid_values(i) = source_values(mpi_mid_indices(i)); + }); + + Kokkos::View local_untreated_source_values( + "Example::local_untreated_source_values", + target_points_num * num_neighbors); + Kokkos::View mpi_tmp_values("Examples::mpi_tmp_values", + local_responses_num); + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + space, distributor_second, mpi_mid_values, mpi_tmp_values); + Kokkos::parallel_for( + "Example::local_untreated_source_values_fill", + Kokkos::RangePolicy(space, 0, local_responses_num), + KOKKOS_LAMBDA(int const i) { + local_untreated_source_values(mpi_tmp_in_indices(i)) = + mpi_tmp_values(i); + }); + // Compute target values via interpolation Kokkos::View target_values("Example::target_values", target_points_num); @@ -401,7 +545,7 @@ int main(int argc, char *argv[]) float tmp = 0; for (int j = offsets(i); j < offsets(i + 1); j++) { - tmp += coeffs(i, j - offsets(i)) * source_values(indices(j)); + tmp += coeffs(i, j - offsets(i)) * local_untreated_source_values(j); } target_values(i) = tmp; }); @@ -423,15 +567,23 @@ int main(int argc, char *argv[]) Kokkos::create_mirror_view(target_values_exact); Kokkos::deep_copy(space, target_values_exact_host, target_values_exact); + std::stringstream ss{}; float error = 0.f; for (int i = 0; i < target_points_num; i++) { error = Kokkos::max( Kokkos::abs(target_values_host(i) - target_values_exact_host(i)), error); - std::cout << "==== Target " << i << '\n' - << "Interpolation: " << target_values_host(i) << '\n' - << "Real value : " << target_values_exact_host(i) << '\n'; + ss << mpi_rank << ": ==== Target " << i << '\n' + << mpi_rank << ": Interpolation: " << target_values_host(i) + << '\n' + << mpi_rank << ": Real value : " << target_values_exact_host(i) + << '\n'; } - std::cout << "====\nMaximum error: " << error << std::endl; + ss << mpi_rank << ": ====\n" + << mpi_rank << ": Maximum error: " << error << std::endl; + + std::cout << ss.str(); + MPI_Finalize(); + return 0; } From d0932edcc44f93f805c213059cda66afd9fdc54c Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Tue, 8 Aug 2023 16:31:53 -0400 Subject: [PATCH 17/44] Relative error and misc fixes --- .../moving_least_squares.cpp | 46 ++++++------------- 1 file changed, 15 insertions(+), 31 deletions(-) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index 72a046320..58cf30b91 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -73,23 +73,6 @@ struct ArborX::AccessTraits using memory_space = MemorySpace; }; -/* -0: ==== Target 0 -0: Interpolation: 0.717408 -0: Real value : 0.841471 -0: ==== Target 1 -0: Interpolation: 0.210617 -0: Real value : 0.205379 -0: ==== Target 2 -0: Interpolation: 7.36529 -0: Real value : 7.87677 -0: ==== Target 3 -0: Interpolation: 1.41947 -0: Real value : 2.5903 -0: ==== -0: Maximum error: 1.17083 -*/ - // Function to approximate KOKKOS_INLINE_FUNCTION float manufactured_solution(ArborX::Point const &p) { @@ -124,16 +107,17 @@ int main(int argc, char *argv[]) auto target_points_host = Kokkos::create_mirror_view(target_points); // Generate source points (Organized within a [-10, 10]^3 cube) + std::size_t thickness = cube_side / mpi_size; Kokkos::parallel_for( "Example::source_points_init", - Kokkos::MDRangePolicy>( - space, {0, 0, 0}, {cube_side, cube_side, cube_side / mpi_size}), + Kokkos::MDRangePolicy>(space, {0, 0, 0}, + {cube_side, cube_side, thickness}), KOKKOS_LAMBDA(int const i, int const j, int const k) { - source_points(i * cube_side * cube_side + j * cube_side + k) = - ArborX::Point{20.f * (float(i) / (cube_side - 1) - .5f), - 20.f * (float(j) / (cube_side - 1) - .5f), - 20.f * (float(k) / (cube_side - 1) - .5f + - (float(mpi_rank) / mpi_size))}; + source_points(i * cube_side * cube_side + j * cube_side + + k) = ArborX::Point{ + 20.f * (float(i) / (cube_side - 1) - .5f), + 20.f * (float(j) / (cube_side - 1) - .5f), + 20.f * (float(k + thickness * mpi_rank) / (cube_side - 1) - .5f)}; }); // Generate target points @@ -178,7 +162,7 @@ int main(int argc, char *argv[]) distributor_first.createFromSends(space, local_ranks); // "Middlemen" buffers - // - mpi_mid_in_indices(i) corresponds to an index that zill be used to + // - mpi_mid_in_indices(i) corresponds to an index that will be used to // construct the final value // - mpi_mid_rank(i) corresponds to the request origin for value (i) // - mpi_mid_indices(i) corresponds to the point's index in the nn query @@ -572,16 +556,16 @@ int main(int argc, char *argv[]) for (int i = 0; i < target_points_num; i++) { error = Kokkos::max( - Kokkos::abs(target_values_host(i) - target_values_exact_host(i)), + Kokkos::abs(target_values_host(i) - target_values_exact_host(i)) / + Kokkos::abs(target_values_exact_host(i)), error); ss << mpi_rank << ": ==== Target " << i << '\n' - << mpi_rank << ": Interpolation: " << target_values_host(i) - << '\n' - << mpi_rank << ": Real value : " << target_values_exact_host(i) - << '\n'; + << mpi_rank << ": Interpolation: " << target_values_host(i) << '\n' + << mpi_rank << ": Real value : " << target_values_exact_host(i) + << '\n'; } ss << mpi_rank << ": ====\n" - << mpi_rank << ": Maximum error: " << error << std::endl; + << mpi_rank << ": Maximum relative error: " << error << std::endl; std::cout << ss.str(); MPI_Finalize(); From 67db96a86a1ddcbbb43fe0716332f8812edcaaf4 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Wed, 9 Aug 2023 15:34:25 -0400 Subject: [PATCH 18/44] Separation and templation of SVD inverse --- .../moving_least_squares.cpp | 185 ++------------ .../symmetric_pseudoinverse_svd.hpp | 229 ++++++++++++++++++ 2 files changed, 252 insertions(+), 162 deletions(-) create mode 100644 examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index 58cf30b91..36b069621 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -22,6 +22,7 @@ #include #include +#include "symmetric_pseudoinverse_svd.hpp" #include using ExecutionSpace = Kokkos::DefaultExecutionSpace; @@ -316,154 +317,10 @@ int main(int argc, char *argv[]) a(i, j, k) = tmp; }); - // Pseudo-inverse moment matrix using SVD - // We must find U, E (diagonal and positive) and V such that A = U.E.V^T - // We also know that A is symmetric (by construction), so U = SV where S is - // a sign matrix (only 1 or -1 in the diagonal, 0 elsewhere). - // Thus A = U.E.S.U^T - static constexpr float pi_4 = M_PI_4; - Kokkos::View a_inv( - "Example::A_inv", target_points_num, MVPolynomialBasis_3D::size, - MVPolynomialBasis_3D::size); - Kokkos::View svd_u( - "Example::SVD::U", target_points_num, MVPolynomialBasis_3D::size, - MVPolynomialBasis_3D::size); - Kokkos::View svd_es( - "Example::SVD::E.S", target_points_num, MVPolynomialBasis_3D::size, - MVPolynomialBasis_3D::size); - Kokkos::deep_copy(space, svd_es, a); - Kokkos::parallel_for( - "Example::A_inv_computation", - Kokkos::RangePolicy(space, 0, target_points_num), - KOKKOS_LAMBDA(int const i) { - for (int j = 0; j < MVPolynomialBasis_3D::size; j++) - { - for (int k = 0; k < MVPolynomialBasis_3D::size; k++) - { - svd_u(i, j, k) = (j == k) * 1.f; - } - } - - // This finds the biggest off-diagonal value of E.S as well as its - // coordinates. Being symmetric, we can always check on the upper - // triangle (and always have q > p) - auto argmax = [=](int &p, int &q) { - float max = 0.f; - p = -1; - q = -1; - for (int j = 0; j < MVPolynomialBasis_3D::size; j++) - { - for (int k = j + 1; k < MVPolynomialBasis_3D::size; k++) - { - float val = Kokkos::abs(svd_es(i, j, k)); - if (max < val) - { - max = val; - p = j; - q = k; - } - } - } - - return max; - }; - - // Iterative approach, we will "deconstruct" E.S until only the diagonal - // is relevent inside the matrix - // It is possible to prove that, at each step, the "norm" of the matrix - // is strictly less that of the previous - int p, q; - float norm = argmax(p, q); - while (norm > epsilon) - { - // Our submatrix is now - // +----------+----------+ +---+---+ - // | es(p, p) | es(p, q) | | a | b | - // +----------+----------+ = +---+---+ - // | es(q, p) | es(q, q) | | b | c | - // +----------+----------+ +---+---+ - float a = svd_es(i, p, p); - float b = svd_es(i, p, q); - float c = svd_es(i, q, q); - - float theta, u, v; - if (a == c) - { - theta = pi_4; - u = a + b; - v = a - b; - } - else - { - theta = .5f * Kokkos::atanf((2.f * b) / (a - c)); - float cos2 = Kokkos::cosf(2.f * theta); - u = .5f * (a + c + (a - c) / cos2); - v = .5f * (a + c - (a - c) / cos2); - } - float cos = Kokkos::cosf(theta); - float sin = Kokkos::sinf(theta); - - // We must now apply the rotation matrix to the left - // and right of E.S and on the right of U - - // Left of E.S (mult by R(theta)^T) - for (int j = 0; j < MVPolynomialBasis_3D::size; j++) - { - float es_ipj = svd_es(i, p, j); - float es_iqj = svd_es(i, q, j); - svd_es(i, p, j) = cos * es_ipj + sin * es_iqj; - svd_es(i, q, j) = -sin * es_ipj + cos * es_iqj; - } - - // Right of E.S (mult by R(theta)) - for (int j = 0; j < MVPolynomialBasis_3D::size; j++) - { - float es_ijp = svd_es(i, j, p); - float es_ijq = svd_es(i, j, q); - svd_es(i, j, p) = cos * es_ijp + sin * es_ijq; - svd_es(i, j, q) = -sin * es_ijp + cos * es_ijq; - } - - // Right of U (mult by R(theta)) - for (int j = 0; j < MVPolynomialBasis_3D::size; j++) - { - float u_ijp = svd_u(i, j, p); - float u_ijq = svd_u(i, j, q); - svd_u(i, j, p) = cos * u_ijp + sin * u_ijq; - svd_u(i, j, q) = -sin * u_ijp + cos * u_ijq; - } - - // These should theorically hold but is it ok to force them to their - // real value? - svd_es(i, p, p) = u; - svd_es(i, q, q) = v; - svd_es(i, p, q) = 0.f; - svd_es(i, q, p) = 0.f; - - norm = argmax(p, q); - } - - // We should now have a correct U and E.S - // We'll compute the pseudo inverse of A by taking the - // pseudo inverse of E.S which is simply inverting the diagonal of - // E.S. We have pseudoA = U^T.pseudoES.U - for (int j = 0; j < MVPolynomialBasis_3D::size; j++) - { - for (int k = 0; k < MVPolynomialBasis_3D::size; k++) - { - float value = 0.f; - for (int l = 0; l < MVPolynomialBasis_3D::size; l++) - { - if (Kokkos::abs(svd_es(i, l, l)) >= epsilon) - { - value += svd_u(i, j, l) * svd_u(i, k, l) / svd_es(i, l, l); - } - } - - a_inv(i, j, k) = value; - } - } - }); + // Compute the pseudo inverse + auto a_inv = + SymmPseudoInverseSVD::compute_pseudo_inverses(space, a); // Compute the coefficients Kokkos::View coeffs("Example::coefficients", @@ -551,23 +408,27 @@ int main(int argc, char *argv[]) Kokkos::create_mirror_view(target_values_exact); Kokkos::deep_copy(space, target_values_exact_host, target_values_exact); - std::stringstream ss{}; - float error = 0.f; - for (int i = 0; i < target_points_num; i++) + if (mpi_rank == 0) { - error = Kokkos::max( - Kokkos::abs(target_values_host(i) - target_values_exact_host(i)) / - Kokkos::abs(target_values_exact_host(i)), - error); - ss << mpi_rank << ": ==== Target " << i << '\n' - << mpi_rank << ": Interpolation: " << target_values_host(i) << '\n' - << mpi_rank << ": Real value : " << target_values_exact_host(i) - << '\n'; + std::stringstream ss{}; + float error = 0.f; + for (int i = 0; i < target_points_num; i++) + { + error = Kokkos::max( + Kokkos::abs(target_values_host(i) - target_values_exact_host(i)) / + Kokkos::abs(target_values_exact_host(i)), + error); + ss << mpi_rank << ": ==== Target " << i << '\n' + << mpi_rank << ": Interpolation: " << target_values_host(i) << '\n' + << mpi_rank << ": Real value : " << target_values_exact_host(i) + << '\n'; + } + ss << mpi_rank << ": ====\n" + << mpi_rank << ": Maximum relative error: " << error << std::endl; + + std::cout << ss.str(); } - ss << mpi_rank << ": ====\n" - << mpi_rank << ": Maximum relative error: " << error << std::endl; - std::cout << ss.str(); MPI_Finalize(); return 0; } diff --git a/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp b/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp new file mode 100644 index 000000000..57f33c665 --- /dev/null +++ b/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp @@ -0,0 +1,229 @@ +/**************************************************************************** + * Copyright (c) 2023 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#pragma once + +#include + +#include +#include +#include + +// Pseudo-inverse moment matrix using SVD +// We must find U, E (diagonal and positive) and V such that A = U.E.V^T +// We also know that A is symmetric (by construction), so U = SV where S is +// a sign matrix (only 1 or -1 in the diagonal, 0 elsewhere). +// Thus A = U.E.S.U^T +template +class SymmPseudoInverseSVD +{ +public: + static Kokkos::View + compute_pseudo_inverses(ExecutionSpace const &space, + Kokkos::View const &mats) + { + SymmPseudoInverseSVD spis(space, mats); + + // Iterative approach, we will "deconstruct" E.S until only the diagonal + // is relevent inside the matrix + // It is possible to prove that, at each step, the "norm" of the matrix + // is strictly less that of the previous + Kokkos::parallel_for( + "Example::SVD::compute_U_ES", + Kokkos::RangePolicy(space, 0, spis._num_matrices), + KOKKOS_LAMBDA(std::size_t i) { + std::size_t p, q; + ValueType norm = spis.argmax_off_diagonal(i, p, q); + while (norm > spis._epsilon) + { + spis.compute_u_es_single(i, p, q); + norm = spis.argmax_off_diagonal(i, p, q); + } + }); + + // From the SVD results, the pseudo inverse would be + // U . [ E^-1.S ] . U^T + Kokkos::parallel_for( + "Example::SVD::fill_inv", + Kokkos::MDRangePolicy>( + space, {0, 0, 0}, {spis._num_matrices, spis._size, spis._size}), + KOKKOS_LAMBDA(std::size_t i, std::size_t j, std::size_t k) { + spis.fill_inv(i, j, k); + }); + + return spis._inv; + } + +private: + // U and E.S are computed, we can now build the inverse + // U . [ E^-1.S ] . U^T + KOKKOS_FUNCTION void fill_inv(std::size_t i, std::size_t j, std::size_t k) const + { + ValueType value = _zero; + for (std::size_t l = 0; l < _size; l++) + { + ValueType v = _es(i, l, l); + if (Kokkos::abs(v) > _epsilon) + { + value += _u(i, j, l) * _u(i, k, l) / v; + } + } + + _inv(i, j, k) = value; + } + + // We found the biggest value in our off-diagonal. We will remove it by + // computing a "local" svd and update U and E.S + KOKKOS_FUNCTION void compute_u_es_single(std::size_t i, std::size_t p, + std::size_t q) const + { + ValueType a = _es(i, p, p); + ValueType b = _es(i, p, q); + ValueType c = _es(i, q, q); + + // Our submatrix is now + // +----------+----------+ +---+---+ + // | es(p, p) | es(p, q) | | a | b | + // +----------+----------+ = +---+---+ + // | es(q, p) | es(q, q) | | b | c | + // +----------+----------+ +---+---+ + + // Lets compute u, v and theta such that + // +---+---+ +---+---+ + // | a | b | | u | 0 | + // +---+---+ = R(theta) * +---+---+ * R(theta)^T + // | b | c | | 0 | v | + // +---+---+ +---+---+ + + ValueType theta, u, v; + if (a == c) // <-- better to check if |a - c| < epsilon? + { + theta = _pi_4; + u = a + b; + v = a - b; + } + else + { + theta = _half * Kokkos::atan((_two * b) / (a - c)); + ValueType a_c_cos2 = (a - c) / Kokkos::cos(_two * theta); + u = _half * (a + c + a_c_cos2); + v = _half * (a + c - a_c_cos2); + } + ValueType cos = Kokkos::cos(theta); + ValueType sin = Kokkos::sin(theta); + + // Now lets compute the following new values for U amd E.S + // E.S <- R'(theta)^T . E.S . R'(theta) + // U <- U . R'(theta) + + // R'(theta)^T . E.S + for (std::size_t j = 0; j < _size; j++) + { + float es_ipj = _es(i, p, j); + float es_iqj = _es(i, q, j); + _es(i, p, j) = cos * es_ipj + sin * es_iqj; + _es(i, q, j) = -sin * es_ipj + cos * es_iqj; + } + + // [R'(theta)^T . E.S] . R'(theta) + for (std::size_t j = 0; j < _size; j++) + { + float es_ijp = _es(i, j, p); + float es_ijq = _es(i, j, q); + _es(i, j, p) = cos * es_ijp + sin * es_ijq; + _es(i, j, q) = -sin * es_ijp + cos * es_ijq; + } + + // U . R'(theta) + for (std::size_t j = 0; j < _size; j++) + { + float u_ijp = _u(i, j, p); + float u_ijq = _u(i, j, q); + _u(i, j, p) = cos * u_ijp + sin * u_ijq; + _u(i, j, q) = -sin * u_ijp + cos * u_ijq; + } + + // These should theorically hold but is it ok to force them to their + // real value? + _es(i, p, p) = u; + _es(i, q, q) = v; + _es(i, p, q) = _zero; + _es(i, q, p) = _zero; + } + + // This finds the biggest off-diagonal value of E.S as well as its + // coordinates. Being symmetric, we can always check on the upper + // triangle (and always have q > p) + KOKKOS_FUNCTION ValueType argmax_off_diagonal(std::size_t i, std::size_t &p, + std::size_t &q) const + { + ValueType max = _zero; + p = q = 0; + for (std::size_t j = 0; j < _size; j++) + { + for (std::size_t k = j + 1; k < _size; k++) + { + ValueType val = Kokkos::abs(_es(i, j, k)); + if (max < val) + { + max = val; + p = j; + q = k; + } + } + } + + return max; + } + + KOKKOS_FUNCTION + SymmPseudoInverseSVD(ExecutionSpace const &space, + Kokkos::View const &mats) + : _num_matrices(mats.extent(0)) + , _size(mats.extent(1)) + { + // mats must be an array of (symmetric) square matrices + assert(mats.extent(1) == mats.extent(2)); + + _es = Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::SVD::ES"), + mats.layout()); + Kokkos::deep_copy(space, _es, mats); + + _u = Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::SVD::U"), + mats.layout()); + Kokkos::parallel_for( + "Example::SVD::U_init", + Kokkos::MDRangePolicy>(space, {0, 0, 0}, + {_num_matrices, _size, _size}), + KOKKOS_LAMBDA(std::size_t i, std::size_t j, std::size_t k) { + _u(i, j, k) = ValueType((j == k)); + }); + + _inv = Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::SVD::inv"), + mats.layout()); + } + + Kokkos::View _es; + Kokkos::View _u; + Kokkos::View _inv; + std::size_t _num_matrices; + std::size_t _size; + + static constexpr ValueType _pi_4 = ValueType(M_PI_4); + static constexpr ValueType _epsilon = + std::numeric_limits::epsilon(); + static constexpr ValueType _half = ValueType(0.5); + static constexpr ValueType _two = ValueType(2); + static constexpr ValueType _zero = ValueType(0); +}; \ No newline at end of file From fdf04435b567ab113cce79ce30b93571378b8ae3 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Wed, 9 Aug 2023 15:55:19 -0400 Subject: [PATCH 19/44] MPI fixed --- examples/moving_least_squares/moving_least_squares.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index 36b069621..d9ecfd1bd 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -114,7 +114,7 @@ int main(int argc, char *argv[]) Kokkos::MDRangePolicy>(space, {0, 0, 0}, {cube_side, cube_side, thickness}), KOKKOS_LAMBDA(int const i, int const j, int const k) { - source_points(i * cube_side * cube_side + j * cube_side + + source_points(i * cube_side * thickness + j * thickness + k) = ArborX::Point{ 20.f * (float(i) / (cube_side - 1) - .5f), 20.f * (float(j) / (cube_side - 1) - .5f), From 7ccec9e1cb2690182ad8a0773b7982f4ada9fd5c Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Wed, 9 Aug 2023 16:17:04 -0400 Subject: [PATCH 20/44] clang format --- .../moving_least_squares.cpp | 33 +++++++++---------- .../symmetric_pseudoinverse_svd.hpp | 3 +- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index d9ecfd1bd..b036fcfce 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -408,26 +408,23 @@ int main(int argc, char *argv[]) Kokkos::create_mirror_view(target_values_exact); Kokkos::deep_copy(space, target_values_exact_host, target_values_exact); - if (mpi_rank == 0) + std::stringstream ss{}; + float error = 0.f; + for (int i = 0; i < target_points_num; i++) { - std::stringstream ss{}; - float error = 0.f; - for (int i = 0; i < target_points_num; i++) - { - error = Kokkos::max( - Kokkos::abs(target_values_host(i) - target_values_exact_host(i)) / - Kokkos::abs(target_values_exact_host(i)), - error); - ss << mpi_rank << ": ==== Target " << i << '\n' - << mpi_rank << ": Interpolation: " << target_values_host(i) << '\n' - << mpi_rank << ": Real value : " << target_values_exact_host(i) - << '\n'; - } - ss << mpi_rank << ": ====\n" - << mpi_rank << ": Maximum relative error: " << error << std::endl; - - std::cout << ss.str(); + error = Kokkos::max( + Kokkos::abs(target_values_host(i) - target_values_exact_host(i)) / + Kokkos::abs(target_values_exact_host(i)), + error); + /* + ss << mpi_rank << ": ==== Target " << i << '\n' + << mpi_rank << ": Interpolation: " << target_values_host(i) << '\n' + << mpi_rank << ": Real value : " << target_values_exact_host(i) + << '\n'; */ } + ss << mpi_rank << ": Maximum relative error: " << error << std::endl; + + std::cout << ss.str(); MPI_Finalize(); return 0; diff --git a/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp b/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp index 57f33c665..62d7a08f7 100644 --- a/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp +++ b/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp @@ -65,7 +65,8 @@ class SymmPseudoInverseSVD private: // U and E.S are computed, we can now build the inverse // U . [ E^-1.S ] . U^T - KOKKOS_FUNCTION void fill_inv(std::size_t i, std::size_t j, std::size_t k) const + KOKKOS_FUNCTION void fill_inv(std::size_t i, std::size_t j, + std::size_t k) const { ValueType value = _zero; for (std::size_t l = 0; l < _size; l++) From df15ad8c03025331dcbdc17de340cd74e3cdc2ca Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Thu, 10 Aug 2023 10:36:53 -0400 Subject: [PATCH 21/44] Templation of MPI communication --- .../moving_least_squares.cpp | 120 ++------------- examples/moving_least_squares/mpi_comms.hpp | 145 ++++++++++++++++++ 2 files changed, 158 insertions(+), 107 deletions(-) create mode 100644 examples/moving_least_squares/mpi_comms.hpp diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index b036fcfce..dc5f2459c 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -22,6 +22,7 @@ #include #include +#include "mpi_comms.hpp" #include "symmetric_pseudoinverse_svd.hpp" #include @@ -151,80 +152,9 @@ int main(int argc, char *argv[]) local_ranks(i) = index_ranks(i).second; }); - // Before moving on, we must gather the coordinates of all the requested - // source points. DTK does that by distributing in a "who wants what" matter - // The distribution is done in two phases. A first pass where every process - // receives the information on "who wants what" from them. Then a second pass - // is done where values are set up and sent back to processes - - // First pass setup - ArborX::Details::Distributor distributor_first(mpi_comm); - int const local_requests_num = - distributor_first.createFromSends(space, local_ranks); - - // "Middlemen" buffers - // - mpi_mid_in_indices(i) corresponds to an index that will be used to - // construct the final value - // - mpi_mid_rank(i) corresponds to the request origin for value (i) - // - mpi_mid_indices(i) corresponds to the point's index in the nn query - // from which mpi_mid_points(i) is attached to - Kokkos::View mpi_mid_in_indices( - "Example::mpi_mid_in_indices", local_requests_num); - Kokkos::View mpi_mid_indices("Example::mpi_mid_indices", - local_requests_num); - Kokkos::View mpi_mid_ranks("Example::mpi_mid_ranks", - local_requests_num); - Kokkos::View mpi_mid_points( - "Example::mpi_mid_points", local_requests_num); - - // First pass comms - Kokkos::View mpi_tmp("Example::mpi_tmp", - target_points_num * num_neighbors); - ArborX::iota(space, mpi_tmp); - ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( - space, distributor_first, mpi_tmp, mpi_mid_in_indices); - ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( - space, distributor_first, local_indices, mpi_mid_indices); - Kokkos::deep_copy(space, mpi_tmp, mpi_rank); - ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( - space, distributor_first, mpi_tmp, mpi_mid_ranks); - Kokkos::parallel_for( - "Example::mpi_mid_points_fill", - Kokkos::RangePolicy(space, 0, local_requests_num), - KOKKOS_LAMBDA(int const i) { - mpi_mid_points(i) = source_points(mpi_mid_indices(i)); - }); - - // This process now knows "who wants what" and is ready to send everything - // back - - // Second pass setup - ArborX::Details::Distributor distributor_second(mpi_comm); - int const local_responses_num = - distributor_second.createFromSends(space, mpi_mid_ranks); - Kokkos::View local_untreated_source_points( - "Example::local_untreated_source_points", - target_points_num * num_neighbors); - // We have local_responses_num == target_points_num * num_neighbors - - // Temporary buffers - Kokkos::View mpi_tmp_in_indices( - "Examples::mpi_tmp_in_indices", local_responses_num); - Kokkos::View mpi_tmp_points( - "Examples::mpi_tmp_points", local_responses_num); - - // Second pass comms - ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( - space, distributor_second, mpi_mid_points, mpi_tmp_points); - ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( - space, distributor_second, mpi_mid_in_indices, mpi_tmp_in_indices); - Kokkos::parallel_for( - "Example::local_untreated_source_points_fill", - Kokkos::RangePolicy(space, 0, local_responses_num), - KOKKOS_LAMBDA(int const i) { - local_untreated_source_points(mpi_tmp_in_indices(i)) = - mpi_tmp_points(i); - }); + MPIComms comms(space, mpi_comm, local_indices, + local_ranks); + auto local_source_points = comms.distribute(space, source_points); // Now that we have the neighbors, we recompute their position using // their target point as the origin. @@ -238,9 +168,9 @@ int main(int argc, char *argv[]) for (int j = offsets(i); j < offsets(i + 1); j++) { tr_source_points(i, j - offsets(i)) = ArborX::Point{ - local_untreated_source_points(j)[0] - target_points(i)[0], - local_untreated_source_points(j)[1] - target_points(i)[1], - local_untreated_source_points(j)[2] - target_points(i)[2], + local_source_points(j)[0] - target_points(i)[0], + local_source_points(j)[1] - target_points(i)[1], + local_source_points(j)[2] - target_points(i)[2], }; } }); @@ -350,31 +280,7 @@ int main(int argc, char *argv[]) source_values(i) = manufactured_solution(source_points(i)); }); - // To approximate the function, we have to gather the correct source values - // We have to redo part of the earlier passes - Kokkos::View mpi_mid_values("Example::mpi_mid_values", - local_requests_num); - Kokkos::parallel_for( - "Example::mpi_mid_values_fill", - Kokkos::RangePolicy(space, 0, local_requests_num), - KOKKOS_LAMBDA(int const i) { - mpi_mid_values(i) = source_values(mpi_mid_indices(i)); - }); - - Kokkos::View local_untreated_source_values( - "Example::local_untreated_source_values", - target_points_num * num_neighbors); - Kokkos::View mpi_tmp_values("Examples::mpi_tmp_values", - local_responses_num); - ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( - space, distributor_second, mpi_mid_values, mpi_tmp_values); - Kokkos::parallel_for( - "Example::local_untreated_source_values_fill", - Kokkos::RangePolicy(space, 0, local_responses_num), - KOKKOS_LAMBDA(int const i) { - local_untreated_source_values(mpi_tmp_in_indices(i)) = - mpi_tmp_values(i); - }); + auto local_source_values = comms.distribute(space, source_values); // Compute target values via interpolation Kokkos::View target_values("Example::target_values", @@ -386,7 +292,7 @@ int main(int argc, char *argv[]) float tmp = 0; for (int j = offsets(i); j < offsets(i + 1); j++) { - tmp += coeffs(i, j - offsets(i)) * local_untreated_source_values(j); + tmp += coeffs(i, j - offsets(i)) * local_source_values(j); } target_values(i) = tmp; }); @@ -416,11 +322,11 @@ int main(int argc, char *argv[]) Kokkos::abs(target_values_host(i) - target_values_exact_host(i)) / Kokkos::abs(target_values_exact_host(i)), error); - /* + ss << mpi_rank << ": ==== Target " << i << '\n' - << mpi_rank << ": Interpolation: " << target_values_host(i) << '\n' - << mpi_rank << ": Real value : " << target_values_exact_host(i) - << '\n'; */ + << mpi_rank << ": Interpolation: " << target_values_host(i) << '\n' + << mpi_rank << ": Real value : " << target_values_exact_host(i) + << '\n'; } ss << mpi_rank << ": Maximum relative error: " << error << std::endl; diff --git a/examples/moving_least_squares/mpi_comms.hpp b/examples/moving_least_squares/mpi_comms.hpp new file mode 100644 index 000000000..8fc04ffda --- /dev/null +++ b/examples/moving_least_squares/mpi_comms.hpp @@ -0,0 +1,145 @@ +/**************************************************************************** + * Copyright (c) 2023 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#pragma once + +#include + +#include + +#include + +#include + +template +class MPIComms +{ +public: + MPIComms(ExecutionSpace const &space, MPI_Comm comm, + Kokkos::View indices, + Kokkos::View ranks) + : _distributor_back(comm) + { + assert(indices.extent(0) == ranks.extent(0)); + std::size_t data_len = indices.extent(0); + int rank; + MPI_Comm_rank(comm, &rank); + + Kokkos::View mpi_tmp( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MPI::tmp"), + data_len); + + // Computes what will be common to every exchange. Every time + // someone wants to get the value from the same set of elements, + // they will use the same list of recv and send indices. + // The rank data will be saved inside the back distributor, + // as the front one is not relevant once the recv indices + // are computed. + + // This builds for each process a local array indicating how much + // informatiom will be gathered + ArborX::Details::Distributor distributor_forth(comm); + _num_requests = distributor_forth.createFromSends(space, ranks); + + // This creates the temporary buffer that will help when producing the + // array that rebuilds the output + Kokkos::View mpi_rev_indices( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MPI::rev_indices"), + _num_requests); + ArborX::iota(space, mpi_tmp); + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + space, distributor_forth, mpi_tmp, mpi_rev_indices); + + // This retrieves which source index a process wants and gives it to + // the process owning the source + _mpi_send_indices = Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MPI::send_indices"), + _num_requests); + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + space, distributor_forth, indices, _mpi_send_indices); + + // This builds the temporary buffer that will create the reverse + // distributor to dispatch the values + Kokkos::View mpi_rev_ranks( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MPI::rev_ranks"), + _num_requests); + Kokkos::deep_copy(space, mpi_tmp, rank); + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + space, distributor_forth, mpi_tmp, mpi_rev_ranks); + + // This will create the reverse of the previous distributor + _num_responses = _distributor_back.createFromSends(space, mpi_rev_ranks); + + // There should be enough responses to perfectly fill what was requested + assert(_num_responses == data_len); + + // The we send back the requested indices so that each process can rebuild + // the output + _mpi_recv_indices = Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MPI::recv_indices"), + _num_responses); + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + space, _distributor_back, mpi_rev_indices, _mpi_recv_indices); + } + + template + Kokkos::View + distribute(ExecutionSpace const &space, + Kokkos::View const &source) + { + // We know what each process want so we prepare the data to be sent + Kokkos::View data_to_send( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MPI::data_to_send"), + _num_requests); + Kokkos::parallel_for( + "Example::MPI::data_to_send_fill", + Kokkos::RangePolicy(space, 0, _num_requests), + KOKKOS_CLASS_LAMBDA(int const i) { + data_to_send(i) = source(_mpi_send_indices(i)); + }); + + // Then we properly send it, and each process has what it wants, but in the + // wrong order + Kokkos::View data_to_recv( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MPI::data_to_recv"), + _num_responses); + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + space, _distributor_back, data_to_send, data_to_recv); + + // So we fix this by moving everything + Kokkos::View output( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MPI::output"), + _num_responses); + Kokkos::parallel_for( + "Example::MPI::output_fill", + Kokkos::RangePolicy(space, 0, _num_responses), + KOKKOS_CLASS_LAMBDA(int const i) { + output(_mpi_recv_indices(i)) = data_to_recv(i); + }); + + return output; + } + +private: + using device = Kokkos::Device; + + Kokkos::View _mpi_send_indices; + Kokkos::View _mpi_recv_indices; + ArborX::Details::Distributor _distributor_back; + std::size_t _num_requests; + std::size_t _num_responses; +}; \ No newline at end of file From 0d3f7239a3cc376923cab1501c8d1908ce7c94c2 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Thu, 10 Aug 2023 12:45:40 -0400 Subject: [PATCH 22/44] clang format --- examples/moving_least_squares/mpi_comms.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/moving_least_squares/mpi_comms.hpp b/examples/moving_least_squares/mpi_comms.hpp index 8fc04ffda..96aef8c0e 100644 --- a/examples/moving_least_squares/mpi_comms.hpp +++ b/examples/moving_least_squares/mpi_comms.hpp @@ -24,8 +24,8 @@ class MPIComms { public: MPIComms(ExecutionSpace const &space, MPI_Comm comm, - Kokkos::View indices, - Kokkos::View ranks) + Kokkos::View indices, + Kokkos::View ranks) : _distributor_back(comm) { assert(indices.extent(0) == ranks.extent(0)); From 4edbe1972ac9c6cf7a2980ebe7f12e74c2634362 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Thu, 10 Aug 2023 13:36:40 -0400 Subject: [PATCH 23/44] Switching from std::size_t to int const and removing missing floats --- .../symmetric_pseudoinverse_svd.hpp | 43 +++++++++---------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp b/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp index 62d7a08f7..6be2cf5d7 100644 --- a/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp +++ b/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp @@ -39,8 +39,8 @@ class SymmPseudoInverseSVD Kokkos::parallel_for( "Example::SVD::compute_U_ES", Kokkos::RangePolicy(space, 0, spis._num_matrices), - KOKKOS_LAMBDA(std::size_t i) { - std::size_t p, q; + KOKKOS_LAMBDA(int const i) { + int p, q; ValueType norm = spis.argmax_off_diagonal(i, p, q); while (norm > spis._epsilon) { @@ -55,7 +55,7 @@ class SymmPseudoInverseSVD "Example::SVD::fill_inv", Kokkos::MDRangePolicy>( space, {0, 0, 0}, {spis._num_matrices, spis._size, spis._size}), - KOKKOS_LAMBDA(std::size_t i, std::size_t j, std::size_t k) { + KOKKOS_LAMBDA(int const i, int const j, int const k) { spis.fill_inv(i, j, k); }); @@ -65,11 +65,10 @@ class SymmPseudoInverseSVD private: // U and E.S are computed, we can now build the inverse // U . [ E^-1.S ] . U^T - KOKKOS_FUNCTION void fill_inv(std::size_t i, std::size_t j, - std::size_t k) const + KOKKOS_FUNCTION void fill_inv(int const i, int const j, int const k) const { ValueType value = _zero; - for (std::size_t l = 0; l < _size; l++) + for (int l = 0; l < _size; l++) { ValueType v = _es(i, l, l); if (Kokkos::abs(v) > _epsilon) @@ -83,8 +82,8 @@ class SymmPseudoInverseSVD // We found the biggest value in our off-diagonal. We will remove it by // computing a "local" svd and update U and E.S - KOKKOS_FUNCTION void compute_u_es_single(std::size_t i, std::size_t p, - std::size_t q) const + KOKKOS_FUNCTION void compute_u_es_single(int const i, int const p, + int const q) const { ValueType a = _es(i, p, p); ValueType b = _es(i, p, q); @@ -126,28 +125,28 @@ class SymmPseudoInverseSVD // U <- U . R'(theta) // R'(theta)^T . E.S - for (std::size_t j = 0; j < _size; j++) + for (int j = 0; j < _size; j++) { - float es_ipj = _es(i, p, j); - float es_iqj = _es(i, q, j); + ValueType es_ipj = _es(i, p, j); + ValueType es_iqj = _es(i, q, j); _es(i, p, j) = cos * es_ipj + sin * es_iqj; _es(i, q, j) = -sin * es_ipj + cos * es_iqj; } // [R'(theta)^T . E.S] . R'(theta) - for (std::size_t j = 0; j < _size; j++) + for (int j = 0; j < _size; j++) { - float es_ijp = _es(i, j, p); - float es_ijq = _es(i, j, q); + ValueType es_ijp = _es(i, j, p); + ValueType es_ijq = _es(i, j, q); _es(i, j, p) = cos * es_ijp + sin * es_ijq; _es(i, j, q) = -sin * es_ijp + cos * es_ijq; } // U . R'(theta) - for (std::size_t j = 0; j < _size; j++) + for (int j = 0; j < _size; j++) { - float u_ijp = _u(i, j, p); - float u_ijq = _u(i, j, q); + ValueType u_ijp = _u(i, j, p); + ValueType u_ijq = _u(i, j, q); _u(i, j, p) = cos * u_ijp + sin * u_ijq; _u(i, j, q) = -sin * u_ijp + cos * u_ijq; } @@ -163,14 +162,14 @@ class SymmPseudoInverseSVD // This finds the biggest off-diagonal value of E.S as well as its // coordinates. Being symmetric, we can always check on the upper // triangle (and always have q > p) - KOKKOS_FUNCTION ValueType argmax_off_diagonal(std::size_t i, std::size_t &p, - std::size_t &q) const + KOKKOS_FUNCTION ValueType argmax_off_diagonal(int const i, int &p, + int &q) const { ValueType max = _zero; p = q = 0; - for (std::size_t j = 0; j < _size; j++) + for (int j = 0; j < _size; j++) { - for (std::size_t k = j + 1; k < _size; k++) + for (int k = j + 1; k < _size; k++) { ValueType val = Kokkos::abs(_es(i, j, k)); if (max < val) @@ -206,7 +205,7 @@ class SymmPseudoInverseSVD "Example::SVD::U_init", Kokkos::MDRangePolicy>(space, {0, 0, 0}, {_num_matrices, _size, _size}), - KOKKOS_LAMBDA(std::size_t i, std::size_t j, std::size_t k) { + KOKKOS_LAMBDA(int const i, int const j, int const k) { _u(i, j, k) = ValueType((j == k)); }); From 3a9afcf570202d8f272d8252ba812ebc6343752e Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Thu, 10 Aug 2023 14:37:59 -0400 Subject: [PATCH 24/44] Templation of the proper MLS computation --- .../moving_least_squares/mls_computation.hpp | 233 ++++++++++++++++++ .../moving_least_squares.cpp | 137 +--------- 2 files changed, 240 insertions(+), 130 deletions(-) create mode 100644 examples/moving_least_squares/mls_computation.hpp diff --git a/examples/moving_least_squares/mls_computation.hpp b/examples/moving_least_squares/mls_computation.hpp new file mode 100644 index 000000000..02c6c580a --- /dev/null +++ b/examples/moving_least_squares/mls_computation.hpp @@ -0,0 +1,233 @@ +/**************************************************************************** + * Copyright (c) 2023 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#pragma once + +#include + +#include + +#include + +#include "symmetric_pseudoinverse_svd.hpp" + +template +class MLSComputation +{ +public: + MLSComputation( + ExecutionSpace const &space, + Kokkos::View const &source_points, + Kokkos::View const &target_points) + { + // There must be a list of num_neighbors source points for each + // target point + _num_neighbors = source_points.extent(0) / target_points.extent(0); + assert(source_points.extent(0) == target_points.extent(0) * _num_neighbors); + _num_targets = target_points.extent(0); + + auto source_ref_target = + translate_to_target(space, source_points, target_points); + + auto radii = compute_radii(space, source_ref_target); + auto phi = compute_weight(space, source_ref_target, radii); + auto p = compute_vandermonde(space, source_ref_target); + + auto a = compute_moment(space, phi, p); + auto a_inv = + SymmPseudoInverseSVD::compute_pseudo_inverses(space, a); + + compute_coefficients(space, phi, p, a_inv); + } + + Kokkos::View + eval(ExecutionSpace const &space, + Kokkos::View const &source_values) + { + Kokkos::View target_values( + "Example::MLS::target_values", _num_targets); + Kokkos::parallel_for( + "Example::MLS::target_interpolation", + Kokkos::RangePolicy(space, 0, _num_targets), + KOKKOS_LAMBDA(int const i) { + ValueType tmp = _zero; + for (int j = 0; j < _num_neighbors; j++) + { + tmp += _coeffs(i, j) * source_values(i * _num_neighbors + j); + } + target_values(i) = tmp; + }); + + return target_values; + } + +private: + Kokkos::View translate_to_target( + ExecutionSpace const &space, + Kokkos::View const &source_points, + Kokkos::View const &target_points) + { + // We center each group around the target as it ables you to + // optimize the final computation + Kokkos::View source_ref_target( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MLS::source_ref_target"), + _num_targets, _num_neighbors); + Kokkos::parallel_for( + "Example::MLS::source_ref_target_fill", + Kokkos::MDRangePolicy>(space, {0, 0}, + {_num_targets, _num_neighbors}), + KOKKOS_LAMBDA(int const i, int const j) { + ArborX::Point src = source_points(i * _num_neighbors + j); + ArborX::Point tgt = target_points(i); + source_ref_target(i, j) = ArborX::Point{ + src[0] - tgt[0], + src[1] - tgt[1], + src[2] - tgt[2], + }; + }); + + return source_ref_target; + } + + Kokkos::View compute_radii( + ExecutionSpace const &space, + Kokkos::View const &source_ref_target) + { + Kokkos::View radii( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLS::radii"), + _num_targets); + Kokkos::parallel_for( + "Example::MLS::radii_computation", + Kokkos::RangePolicy(space, 0, _num_targets), + KOKKOS_LAMBDA(int const i) { + ValueType radius = _ten * _epsilon; + for (int j = 0; j < _num_neighbors; j++) + { + ValueType norm = + ArborX::Details::distance(source_ref_target(i, j), _origin); + radius = (radius < norm) ? norm : radius; + } + radii(i) = _one_extra * radius; + }); + + return radii; + } + + Kokkos::View compute_weight( + ExecutionSpace const &space, + Kokkos::View const &source_ref_target, + Kokkos::View const &radii) + { + Kokkos::View phi( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLS::phi"), + _num_targets, _num_neighbors); + Kokkos::parallel_for( + "Example::MLS::phi_computation", + Kokkos::MDRangePolicy>(space, {0, 0}, + {_num_targets, _num_neighbors}), + KOKKOS_LAMBDA(int const i, int const j) { + RBF rbf{radii(i)}; + ValueType norm = + ArborX::Details::distance(source_ref_target(i, j), _origin); + phi(i, j) = rbf(norm); + }); + + return phi; + } + + Kokkos::View compute_vandermonde( + ExecutionSpace const &space, + Kokkos::View const &source_ref_target) + { + // Instead of relying on an external type, could it be produced + // automatically? + Kokkos::View p( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MLS::vandermonde"), + _num_targets, _num_neighbors, PolynomialBasis::size); + Kokkos::parallel_for( + "Example::MLS::vandermonde_computation", + Kokkos::MDRangePolicy>(space, {0, 0}, + {_num_targets, _num_neighbors}), + KOKKOS_LAMBDA(int const i, int const j) { + auto basis = PolynomialBasis::basis(source_ref_target(i, j)); + for (int k = 0; k < PolynomialBasis::size; k++) + { + p(i, j, k) = basis[k]; + } + }); + + return p; + } + + Kokkos::View + compute_moment(ExecutionSpace const &space, + Kokkos::View const &phi, + Kokkos::View const &p) + { + Kokkos::View a( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLS::moment"), + _num_targets, PolynomialBasis::size, PolynomialBasis::size); + Kokkos::parallel_for( + "Example::MLS::moment_computation", + Kokkos::MDRangePolicy>( + space, {0, 0, 0}, + {_num_targets, PolynomialBasis::size, PolynomialBasis::size}), + KOKKOS_LAMBDA(int const i, int const j, int const k) { + ValueType tmp = _zero; + for (int l = 0; l < _num_neighbors; l++) + { + tmp += p(i, l, j) * p(i, l, k) * phi(i, l); + } + a(i, j, k) = tmp; + }); + + return a; + } + + void + compute_coefficients(ExecutionSpace const &space, + Kokkos::View const &phi, + Kokkos::View const &p, + Kokkos::View const &a_inv) + { + _coeffs = Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MLS::coefficients"), + _num_targets, _num_neighbors); + Kokkos::parallel_for( + "Example::MLS::coefficients", + Kokkos::MDRangePolicy>(space, {0, 0}, + {_num_targets, _num_neighbors}), + KOKKOS_LAMBDA(int const i, int const j) { + ValueType tmp = _zero; + for (int k = 0; k < PolynomialBasis::size; k++) + { + tmp += a_inv(i, 0, k) * p(i, j, k) * phi(i, j); + } + _coeffs(i, j) = tmp; + }); + } + + Kokkos::View _coeffs; + std::size_t _num_targets; + std::size_t _num_neighbors; + + static constexpr ValueType _zero = ValueType(0); + static constexpr ValueType _ten = ValueType(10); + static constexpr ValueType _epsilon = + std::numeric_limits::epsilon(); + static constexpr ValueType _one_extra = ValueType(1.1); + static constexpr ArborX::Point _origin = ArborX::Point{0, 0, 0}; +}; \ No newline at end of file diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index dc5f2459c..a9b7ae8f4 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -22,8 +22,8 @@ #include #include +#include "mls_computation.hpp" #include "mpi_comms.hpp" -#include "symmetric_pseudoinverse_svd.hpp" #include using ExecutionSpace = Kokkos::DefaultExecutionSpace; @@ -45,8 +45,8 @@ struct MVPolynomialBasis_3D { static constexpr std::size_t size = 10; - KOKKOS_INLINE_FUNCTION Kokkos::Array - operator()(ArborX::Point const &p) const + KOKKOS_INLINE_FUNCTION static Kokkos::Array + basis(ArborX::Point const &p) { return {{1.f, p[0], p[1], p[2], p[0] * p[0], p[0] * p[1], p[0] * p[2], p[1] * p[1], p[1] * p[2], p[2] * p[2]}}; @@ -156,119 +156,9 @@ int main(int argc, char *argv[]) local_ranks); auto local_source_points = comms.distribute(space, source_points); - // Now that we have the neighbors, we recompute their position using - // their target point as the origin. - // This is used as an optimisation later in the algorithm - Kokkos::View tr_source_points( - "Example::tr_source_points", target_points_num, num_neighbors); - Kokkos::parallel_for( - "Example::transform_source_points", - Kokkos::RangePolicy(space, 0, target_points_num), - KOKKOS_LAMBDA(int const i) { - for (int j = offsets(i); j < offsets(i + 1); j++) - { - tr_source_points(i, j - offsets(i)) = ArborX::Point{ - local_source_points(j)[0] - target_points(i)[0], - local_source_points(j)[1] - target_points(i)[1], - local_source_points(j)[2] - target_points(i)[2], - }; - } - }); - - // Compute the radii for the weight (phi) vector - Kokkos::View radii("Example::radii", target_points_num); - Kokkos::parallel_for( - "Example::radii_computation", - Kokkos::RangePolicy(space, 0, target_points_num), - KOKKOS_LAMBDA(int const i) { - float radius = 10.f * epsilon; - - for (int j = 0; j < num_neighbors; j++) - { - float norm = ArborX::Details::distance(tr_source_points(i, j), - ArborX::Point{0.f, 0.f, 0.f}); - radius = (radius < norm) ? norm : radius; - } - - radii(i) = 1.1f * radius; - }); - - // Compute the weight (phi) vector - Kokkos::View phi("Example::phi", target_points_num, - num_neighbors); - Kokkos::parallel_for( - "Example::phi_computation", - Kokkos::RangePolicy(space, 0, target_points_num), - KOKKOS_LAMBDA(int const i) { - RBFWendland_0 rbf{radii(i)}; - - for (int j = 0; j < num_neighbors; j++) - { - float norm = ArborX::Details::distance(tr_source_points(i, j), - ArborX::Point{0.f, 0.f, 0.f}); - phi(i, j) = rbf(norm); - } - }); - - // Compute multivariable Vandermonde (P) matrix - Kokkos::View p("Example::vandermonde", - target_points_num, num_neighbors, - MVPolynomialBasis_3D::size); - Kokkos::parallel_for( - "Example::vandermonde_computation", - Kokkos::MDRangePolicy>( - space, {0, 0}, {target_points_num, num_neighbors}), - KOKKOS_LAMBDA(int const i, int const j) { - auto basis = MVPolynomialBasis_3D{}(tr_source_points(i, j)); - - for (int k = 0; k < MVPolynomialBasis_3D::size; k++) - { - p(i, j, k) = basis[k]; - } - }); - - // Compute moment (A) matrix - Kokkos::View a("Example::A", target_points_num, - MVPolynomialBasis_3D::size, - MVPolynomialBasis_3D::size); - Kokkos::parallel_for( - "Example::A_computation", - Kokkos::MDRangePolicy>(space, {0, 0, 0}, - {target_points_num, - MVPolynomialBasis_3D::size, - MVPolynomialBasis_3D::size}), - KOKKOS_LAMBDA(int const i, int const j, int const k) { - float tmp = 0; - for (int l = 0; l < num_neighbors; l++) - { - tmp += p(i, l, j) * p(i, l, k) * phi(i, l); - } - - a(i, j, k) = tmp; - }); - - // Compute the pseudo inverse - auto a_inv = - SymmPseudoInverseSVD::compute_pseudo_inverses(space, a); - - // Compute the coefficients - Kokkos::View coeffs("Example::coefficients", - target_points_num, num_neighbors); - Kokkos::parallel_for( - "Example::coefficients_computation", - Kokkos::MDRangePolicy>( - space, {0, 0}, {target_points_num, num_neighbors}), - KOKKOS_LAMBDA(int const i, int const j) { - float tmp = 0.f; - - for (int k = 0; k < MVPolynomialBasis_3D::size; k++) - { - tmp += a_inv(i, 0, k) * p(i, j, k) * phi(i, j); - } - - coeffs(i, j) = tmp; - }); + MLSComputation + mlsc(space, local_source_points, target_points); // Compute source values Kokkos::View source_values("Example::source_values", @@ -282,20 +172,7 @@ int main(int argc, char *argv[]) auto local_source_values = comms.distribute(space, source_values); - // Compute target values via interpolation - Kokkos::View target_values("Example::target_values", - target_points_num); - Kokkos::parallel_for( - "Example::target_interpolation", - Kokkos::RangePolicy(space, 0, target_points_num), - KOKKOS_LAMBDA(int const i) { - float tmp = 0; - for (int j = offsets(i); j < offsets(i + 1); j++) - { - tmp += coeffs(i, j - offsets(i)) * local_source_values(j); - } - target_values(i) = tmp; - }); + auto target_values = mlsc.eval(space, local_source_values); // Compute target values via evaluation Kokkos::View target_values_exact( From 68e199ac7ea4f33d701fc0c823a75917dd644776 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Thu, 10 Aug 2023 14:41:22 -0400 Subject: [PATCH 25/44] CMake MPI check --- examples/CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 6d486bc72..15b7e5b7f 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -10,7 +10,6 @@ add_subdirectory(callback) add_subdirectory(dbscan) add_subdirectory(molecular_dynamics) add_subdirectory(simple_intersection) -add_subdirectory(moving_least_squares) find_package(Boost COMPONENTS program_options) if(Boost_FOUND) @@ -18,3 +17,7 @@ if(Boost_FOUND) add_subdirectory(raytracing) add_subdirectory(brute_force) endif() + +if(ARBORX_ENABLE_MPI) + add_subdirectory(moving_least_squares) +endif() From 894e80f68bcb9cc4bec99ef3aea7a39f60809b62 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Mon, 14 Aug 2023 13:01:32 -0400 Subject: [PATCH 26/44] Templated Moving Least Squares --- examples/moving_least_squares/mls.hpp | 115 ++++++++++++++++++ .../moving_least_squares/mls_computation.hpp | 43 ++++--- .../moving_least_squares.cpp | 66 +--------- examples/moving_least_squares/mpi_comms.hpp | 4 + 4 files changed, 149 insertions(+), 79 deletions(-) create mode 100644 examples/moving_least_squares/mls.hpp diff --git a/examples/moving_least_squares/mls.hpp b/examples/moving_least_squares/mls.hpp new file mode 100644 index 000000000..1ffb90c90 --- /dev/null +++ b/examples/moving_least_squares/mls.hpp @@ -0,0 +1,115 @@ +/**************************************************************************** + * Copyright (c) 2023 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#pragma once + +#include + +#include + +#include + +#include "mls_computation.hpp" +#include "mpi_comms.hpp" + +template +struct TargetPoints +{ + Kokkos::View target_points; + std::size_t num_neighbors; +}; + +template +class MLS +{ +public: + MLS(ExecutionSpace const &space, MPI_Comm comm, std::size_t num_neighbors, + Kokkos::View const &source_points, + Kokkos::View const &target_points) + : _num_neighbors(num_neighbors) + , _src_size(source_points.extent(0)) + , _tgt_size(target_points.extent(0)) + , _comms(comm) + { + // There must be enough source points + assert(_src_size >= _num_neighbors); + + // Organize source points as tree + ArborX::DistributedTree source_tree(comm, space, + source_points); + + // Perform the query + Kokkos::View *, MemorySpace> index_ranks( + "Example::MLS::index_ranks", 0); + Kokkos::View offsets("Example::MLS::offsets", 0); + source_tree.query(space, + TargetPoints{target_points, _num_neighbors}, + index_ranks, offsets); + + // Split indices/ranks + Kokkos::View local_indices( + "Example::MLS::local_indices", _tgt_size * _num_neighbors); + Kokkos::View local_ranks("Example::MLS::local_ranks", + _tgt_size * _num_neighbors); + Kokkos::parallel_for( + "Example::MLS::index_ranks_split", + Kokkos::RangePolicy(space, 0, + _tgt_size * _num_neighbors), + KOKKOS_LAMBDA(int const i) { + local_indices(i) = index_ranks(i).first; + local_ranks(i) = index_ranks(i).second; + }); + + // Set up comms and local source points + _comms = MPIComms(space, comm, local_indices, + local_ranks); + auto local_source_points = _comms.distribute(space, source_points); + + // Compute the internal MLS + _mlsc = + MLSComputation(space, local_source_points, target_points); + } + + Kokkos::View + evaluate(ExecutionSpace const &space, + Kokkos::View const &source_values) + { + assert(source_values.extent(0) == _src_size); + return _mlsc.evaluate(space, _comms.distribute(space, source_values)); + } + +private: + MLSComputation + _mlsc; + MPIComms _comms; + std::size_t _num_neighbors; + std::size_t _src_size; + std::size_t _tgt_size; +}; + +template +struct ArborX::AccessTraits, ArborX::PredicatesTag> +{ + static KOKKOS_FUNCTION std::size_t size(TargetPoints const &tp) + { + return tp.target_points.extent(0); + } + + static KOKKOS_FUNCTION auto get(TargetPoints const &tp, + std::size_t i) + { + return ArborX::nearest(tp.target_points(i), tp.num_neighbors); + } + + using memory_space = MemorySpace; +}; \ No newline at end of file diff --git a/examples/moving_least_squares/mls_computation.hpp b/examples/moving_least_squares/mls_computation.hpp index 02c6c580a..ac13df190 100644 --- a/examples/moving_least_squares/mls_computation.hpp +++ b/examples/moving_least_squares/mls_computation.hpp @@ -24,16 +24,18 @@ template const &source_points, Kokkos::View const &target_points) + : _num_neighbors(source_points.extent(0) / target_points.extent(0)) + , _num_targets(target_points.extent(0)) { // There must be a list of num_neighbors source points for each // target point - _num_neighbors = source_points.extent(0) / target_points.extent(0); - assert(source_points.extent(0) == target_points.extent(0) * _num_neighbors); - _num_targets = target_points.extent(0); + assert(source_points.extent(0) == _num_targets * _num_neighbors); auto source_ref_target = translate_to_target(space, source_points, target_points); @@ -51,13 +53,15 @@ class MLSComputation } Kokkos::View - eval(ExecutionSpace const &space, - Kokkos::View const &source_values) + evaluate(ExecutionSpace const &space, + Kokkos::View const &source_values) { + assert(source_values.extent(0) == _num_targets * _num_neighbors); + Kokkos::View target_values( - "Example::MLS::target_values", _num_targets); + "Example::MLSC::target_values", _num_targets); Kokkos::parallel_for( - "Example::MLS::target_interpolation", + "Example::MLSC::target_interpolation", Kokkos::RangePolicy(space, 0, _num_targets), KOKKOS_LAMBDA(int const i) { ValueType tmp = _zero; @@ -81,10 +85,10 @@ class MLSComputation // optimize the final computation Kokkos::View source_ref_target( Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::MLS::source_ref_target"), + "Example::MLSC::source_ref_target"), _num_targets, _num_neighbors); Kokkos::parallel_for( - "Example::MLS::source_ref_target_fill", + "Example::MLSC::source_ref_target_fill", Kokkos::MDRangePolicy>(space, {0, 0}, {_num_targets, _num_neighbors}), KOKKOS_LAMBDA(int const i, int const j) { @@ -105,10 +109,10 @@ class MLSComputation Kokkos::View const &source_ref_target) { Kokkos::View radii( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLS::radii"), + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLSC::radii"), _num_targets); Kokkos::parallel_for( - "Example::MLS::radii_computation", + "Example::MLSC::radii_computation", Kokkos::RangePolicy(space, 0, _num_targets), KOKKOS_LAMBDA(int const i) { ValueType radius = _ten * _epsilon; @@ -130,10 +134,10 @@ class MLSComputation Kokkos::View const &radii) { Kokkos::View phi( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLS::phi"), + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLSC::phi"), _num_targets, _num_neighbors); Kokkos::parallel_for( - "Example::MLS::phi_computation", + "Example::MLSC::phi_computation", Kokkos::MDRangePolicy>(space, {0, 0}, {_num_targets, _num_neighbors}), KOKKOS_LAMBDA(int const i, int const j) { @@ -154,10 +158,10 @@ class MLSComputation // automatically? Kokkos::View p( Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::MLS::vandermonde"), + "Example::MLSC::vandermonde"), _num_targets, _num_neighbors, PolynomialBasis::size); Kokkos::parallel_for( - "Example::MLS::vandermonde_computation", + "Example::MLSC::vandermonde_computation", Kokkos::MDRangePolicy>(space, {0, 0}, {_num_targets, _num_neighbors}), KOKKOS_LAMBDA(int const i, int const j) { @@ -177,10 +181,11 @@ class MLSComputation Kokkos::View const &p) { Kokkos::View a( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLS::moment"), + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MLSC::moment"), _num_targets, PolynomialBasis::size, PolynomialBasis::size); Kokkos::parallel_for( - "Example::MLS::moment_computation", + "Example::MLSC::moment_computation", Kokkos::MDRangePolicy>( space, {0, 0, 0}, {_num_targets, PolynomialBasis::size, PolynomialBasis::size}), @@ -204,10 +209,10 @@ class MLSComputation { _coeffs = Kokkos::View( Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::MLS::coefficients"), + "Example::MLSC::coefficients"), _num_targets, _num_neighbors); Kokkos::parallel_for( - "Example::MLS::coefficients", + "Example::MLSC::coefficients", Kokkos::MDRangePolicy>(space, {0, 0}, {_num_targets, _num_neighbors}), KOKKOS_LAMBDA(int const i, int const j) { diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index a9b7ae8f4..f77fa8567 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -18,12 +18,9 @@ #include -#include -#include #include -#include "mls_computation.hpp" -#include "mpi_comms.hpp" +#include "mls.hpp" #include using ExecutionSpace = Kokkos::DefaultExecutionSpace; @@ -53,28 +50,6 @@ struct MVPolynomialBasis_3D } }; -struct TargetPoints -{ - Kokkos::View target_points; - std::size_t num_neighbors; -}; - -template <> -struct ArborX::AccessTraits -{ - static KOKKOS_FUNCTION std::size_t size(TargetPoints const &tp) - { - return tp.target_points.extent(0); - } - - static KOKKOS_FUNCTION auto get(TargetPoints const &tp, std::size_t i) - { - return ArborX::nearest(tp.target_points(i), tp.num_neighbors); - } - - using memory_space = MemorySpace; -}; - // Function to approximate KOKKOS_INLINE_FUNCTION float manufactured_solution(ArborX::Point const &p) { @@ -86,7 +61,6 @@ int main(int argc, char *argv[]) MPI_Init(&argc, &argv); Kokkos::ScopeGuard guard(argc, argv); - constexpr float epsilon = std::numeric_limits::epsilon(); constexpr std::size_t num_neighbors = MVPolynomialBasis_3D::size; constexpr std::size_t cube_side = 20; constexpr std::size_t source_points_num = cube_side * cube_side * cube_side; @@ -129,36 +103,9 @@ int main(int argc, char *argv[]) target_points_host(3) = ArborX::Point{1.f, -3.3f, 7.f}; Kokkos::deep_copy(space, target_points, target_points_host); - // Organize source points as tree - ArborX::DistributedTree source_tree(mpi_comm, space, - source_points); - - // Perform the query and split the indices/ranks - Kokkos::View *, MemorySpace> index_ranks( - "Example::index_ranks", 0); - Kokkos::View offsets("Example::offsets", 0); - source_tree.query(space, TargetPoints{target_points, num_neighbors}, - index_ranks, offsets); - Kokkos::View local_indices( - "Example::local_indices", target_points_num * num_neighbors); - Kokkos::View local_ranks( - "Example::local_ranks", target_points_num * num_neighbors); - Kokkos::parallel_for( - "Example::index_ranks_split", - Kokkos::RangePolicy(space, 0, - target_points_num * num_neighbors), - KOKKOS_LAMBDA(int const i) { - local_indices(i) = index_ranks(i).first; - local_ranks(i) = index_ranks(i).second; - }); - - MPIComms comms(space, mpi_comm, local_indices, - local_ranks); - auto local_source_points = comms.distribute(space, source_points); - - MLSComputation - mlsc(space, local_source_points, target_points); + // Create the transform from a point cloud to another + MLS + mls(space, mpi_comm, num_neighbors, source_points, target_points); // Compute source values Kokkos::View source_values("Example::source_values", @@ -170,9 +117,8 @@ int main(int argc, char *argv[]) source_values(i) = manufactured_solution(source_points(i)); }); - auto local_source_values = comms.distribute(space, source_values); - - auto target_values = mlsc.eval(space, local_source_values); + // Compute target values from source ones + auto target_values = mls.evaluate(space, source_values); // Compute target values via evaluation Kokkos::View target_values_exact( diff --git a/examples/moving_least_squares/mpi_comms.hpp b/examples/moving_least_squares/mpi_comms.hpp index 96aef8c0e..754b3db4d 100644 --- a/examples/moving_least_squares/mpi_comms.hpp +++ b/examples/moving_least_squares/mpi_comms.hpp @@ -23,6 +23,10 @@ template class MPIComms { public: + MPIComms(MPI_Comm comm) + : _distributor_back(comm) + {} + MPIComms(ExecutionSpace const &space, MPI_Comm comm, Kokkos::View indices, Kokkos::View ranks) From 1e6d4a5bb9a152dda56678a040b5056ddc654a24 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Mon, 14 Aug 2023 16:17:44 -0400 Subject: [PATCH 27/44] Better RBF --- examples/moving_least_squares/mls_computation.hpp | 3 +-- examples/moving_least_squares/moving_least_squares.cpp | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/examples/moving_least_squares/mls_computation.hpp b/examples/moving_least_squares/mls_computation.hpp index ac13df190..cd2358096 100644 --- a/examples/moving_least_squares/mls_computation.hpp +++ b/examples/moving_least_squares/mls_computation.hpp @@ -141,10 +141,9 @@ class MLSComputation Kokkos::MDRangePolicy>(space, {0, 0}, {_num_targets, _num_neighbors}), KOKKOS_LAMBDA(int const i, int const j) { - RBF rbf{radii(i)}; ValueType norm = ArborX::Details::distance(source_ref_target(i, j), _origin); - phi(i, j) = rbf(norm); + phi(i, j) = RBF::evaluate(norm / radii(i)); }); return phi; diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index f77fa8567..108c6f841 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -29,13 +29,10 @@ using DeviceSpace = Kokkos::Device; struct RBFWendland_0 { - KOKKOS_INLINE_FUNCTION float operator()(float x) + KOKKOS_INLINE_FUNCTION static float evaluate(float x) { - x /= _radius; return (1.f - x) * (1.f - x); } - - float _radius; }; struct MVPolynomialBasis_3D From 52fe5e11c57304913dcd0c62360346a234a02552 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Tue, 15 Aug 2023 09:41:56 -0400 Subject: [PATCH 28/44] Removed DeviceType, duplicated communicators --- examples/moving_least_squares/mls.hpp | 7 ++- .../moving_least_squares/mls_computation.hpp | 6 +-- .../moving_least_squares.cpp | 5 +- examples/moving_least_squares/mpi_comms.hpp | 50 ++++++++++++------- 4 files changed, 41 insertions(+), 27 deletions(-) diff --git a/examples/moving_least_squares/mls.hpp b/examples/moving_least_squares/mls.hpp index 1ffb90c90..39ad86f8a 100644 --- a/examples/moving_least_squares/mls.hpp +++ b/examples/moving_least_squares/mls.hpp @@ -38,7 +38,6 @@ class MLS : _num_neighbors(num_neighbors) , _src_size(source_points.extent(0)) , _tgt_size(target_points.extent(0)) - , _comms(comm) { // There must be enough source points assert(_src_size >= _num_neighbors); @@ -81,11 +80,11 @@ class MLS } Kokkos::View - evaluate(ExecutionSpace const &space, - Kokkos::View const &source_values) + apply(ExecutionSpace const &space, + Kokkos::View const &source_values) { assert(source_values.extent(0) == _src_size); - return _mlsc.evaluate(space, _comms.distribute(space, source_values)); + return _mlsc.apply(space, _comms.distribute(space, source_values)); } private: diff --git a/examples/moving_least_squares/mls_computation.hpp b/examples/moving_least_squares/mls_computation.hpp index cd2358096..032fda78f 100644 --- a/examples/moving_least_squares/mls_computation.hpp +++ b/examples/moving_least_squares/mls_computation.hpp @@ -53,8 +53,8 @@ class MLSComputation } Kokkos::View - evaluate(ExecutionSpace const &space, - Kokkos::View const &source_values) + apply(ExecutionSpace const &space, + Kokkos::View const &source_values) { assert(source_values.extent(0) == _num_targets * _num_neighbors); @@ -143,7 +143,7 @@ class MLSComputation KOKKOS_LAMBDA(int const i, int const j) { ValueType norm = ArborX::Details::distance(source_ref_target(i, j), _origin); - phi(i, j) = RBF::evaluate(norm / radii(i)); + phi(i, j) = RBF::apply(norm / radii(i)); }); return phi; diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index 108c6f841..425465b2d 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -25,11 +25,10 @@ using ExecutionSpace = Kokkos::DefaultExecutionSpace; using MemorySpace = ExecutionSpace::memory_space; -using DeviceSpace = Kokkos::Device; struct RBFWendland_0 { - KOKKOS_INLINE_FUNCTION static float evaluate(float x) + KOKKOS_INLINE_FUNCTION static float apply(float x) { return (1.f - x) * (1.f - x); } @@ -115,7 +114,7 @@ int main(int argc, char *argv[]) }); // Compute target values from source ones - auto target_values = mls.evaluate(space, source_values); + auto target_values = mls.apply(space, source_values); // Compute target values via evaluation Kokkos::View target_values_exact( diff --git a/examples/moving_least_squares/mpi_comms.hpp b/examples/moving_least_squares/mpi_comms.hpp index 754b3db4d..e83bb1565 100644 --- a/examples/moving_least_squares/mpi_comms.hpp +++ b/examples/moving_least_squares/mpi_comms.hpp @@ -16,6 +16,8 @@ #include #include +#include +#include #include @@ -23,19 +25,31 @@ template class MPIComms { public: - MPIComms(MPI_Comm comm) - : _distributor_back(comm) - {} + MPIComms() = default; MPIComms(ExecutionSpace const &space, MPI_Comm comm, Kokkos::View indices, Kokkos::View ranks) - : _distributor_back(comm) { assert(indices.extent(0) == ranks.extent(0)); std::size_t data_len = indices.extent(0); + + _comm.reset( + [comm]() { + auto p = new MPI_Comm; + MPI_Comm_dup(comm, p); + return p; + }(), + [](MPI_Comm *p) { + int mpi_finalized; + MPI_Finalized(&mpi_finalized); + if (!mpi_finalized) + MPI_Comm_free(p); + delete p; + }); + int rank; - MPI_Comm_rank(comm, &rank); + MPI_Comm_rank(*_comm, &rank); Kokkos::View mpi_tmp( Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MPI::tmp"), @@ -50,7 +64,7 @@ class MPIComms // This builds for each process a local array indicating how much // informatiom will be gathered - ArborX::Details::Distributor distributor_forth(comm); + ArborX::Details::Distributor distributor_forth(*_comm); _num_requests = distributor_forth.createFromSends(space, ranks); // This creates the temporary buffer that will help when producing the @@ -60,7 +74,7 @@ class MPIComms "Example::MPI::rev_indices"), _num_requests); ArborX::iota(space, mpi_tmp); - ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( space, distributor_forth, mpi_tmp, mpi_rev_indices); // This retrieves which source index a process wants and gives it to @@ -69,7 +83,7 @@ class MPIComms Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MPI::send_indices"), _num_requests); - ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( space, distributor_forth, indices, _mpi_send_indices); // This builds the temporary buffer that will create the reverse @@ -79,11 +93,12 @@ class MPIComms "Example::MPI::rev_ranks"), _num_requests); Kokkos::deep_copy(space, mpi_tmp, rank); - ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( space, distributor_forth, mpi_tmp, mpi_rev_ranks); // This will create the reverse of the previous distributor - _num_responses = _distributor_back.createFromSends(space, mpi_rev_ranks); + _distributor_back = ArborX::Details::Distributor(*_comm); + _num_responses = _distributor_back->createFromSends(space, mpi_rev_ranks); // There should be enough responses to perfectly fill what was requested assert(_num_responses == data_len); @@ -94,8 +109,8 @@ class MPIComms Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MPI::recv_indices"), _num_responses); - ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( - space, _distributor_back, mpi_rev_indices, _mpi_recv_indices); + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + space, *_distributor_back, mpi_rev_indices, _mpi_recv_indices); } template @@ -103,6 +118,8 @@ class MPIComms distribute(ExecutionSpace const &space, Kokkos::View const &source) { + assert(_distributor_back.has_value()); + // We know what each process want so we prepare the data to be sent Kokkos::View data_to_send( Kokkos::view_alloc(Kokkos::WithoutInitializing, @@ -121,8 +138,8 @@ class MPIComms Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MPI::data_to_recv"), _num_responses); - ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( - space, _distributor_back, data_to_send, data_to_recv); + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + space, *_distributor_back, data_to_send, data_to_recv); // So we fix this by moving everything Kokkos::View output( @@ -139,11 +156,10 @@ class MPIComms } private: - using device = Kokkos::Device; - + std::shared_ptr _comm; Kokkos::View _mpi_send_indices; Kokkos::View _mpi_recv_indices; - ArborX::Details::Distributor _distributor_back; + std::optional> _distributor_back; std::size_t _num_requests; std::size_t _num_responses; }; \ No newline at end of file From db39716690710bd9031d2503893649ef908624f1 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Tue, 15 Aug 2023 09:53:40 -0400 Subject: [PATCH 29/44] Style fixes and num_neighbors as an optional arg --- examples/moving_least_squares/mls.hpp | 5 +-- .../moving_least_squares/mls_computation.hpp | 36 +++++++++---------- .../moving_least_squares.cpp | 3 +- .../symmetric_pseudoinverse_svd.hpp | 21 ++++++----- 4 files changed, 32 insertions(+), 33 deletions(-) diff --git a/examples/moving_least_squares/mls.hpp b/examples/moving_least_squares/mls.hpp index 39ad86f8a..57ad2f961 100644 --- a/examples/moving_least_squares/mls.hpp +++ b/examples/moving_least_squares/mls.hpp @@ -32,9 +32,10 @@ template const &source_points, - Kokkos::View const &target_points) + Kokkos::View const &target_points, + std::size_t num_neighbors = PolynomialBasis::size) : _num_neighbors(num_neighbors) , _src_size(source_points.extent(0)) , _tgt_size(target_points.extent(0)) diff --git a/examples/moving_least_squares/mls_computation.hpp b/examples/moving_least_squares/mls_computation.hpp index 032fda78f..8b1cd97ec 100644 --- a/examples/moving_least_squares/mls_computation.hpp +++ b/examples/moving_least_squares/mls_computation.hpp @@ -38,18 +38,18 @@ class MLSComputation assert(source_points.extent(0) == _num_targets * _num_neighbors); auto source_ref_target = - translate_to_target(space, source_points, target_points); + translateToTarget(space, source_points, target_points); - auto radii = compute_radii(space, source_ref_target); - auto phi = compute_weight(space, source_ref_target, radii); - auto p = compute_vandermonde(space, source_ref_target); + auto radii = computeRadii(space, source_ref_target); + auto phi = computeWeight(space, source_ref_target, radii); + auto p = computeVandermonde(space, source_ref_target); - auto a = compute_moment(space, phi, p); + auto a = computeMoment(space, phi, p); auto a_inv = SymmPseudoInverseSVD::compute_pseudo_inverses(space, a); + MemorySpace>::computePseudoInverses(space, a); - compute_coefficients(space, phi, p, a_inv); + computeCoefficients(space, phi, p, a_inv); } Kokkos::View @@ -76,7 +76,7 @@ class MLSComputation } private: - Kokkos::View translate_to_target( + Kokkos::View translateToTarget( ExecutionSpace const &space, Kokkos::View const &source_points, Kokkos::View const &target_points) @@ -104,7 +104,7 @@ class MLSComputation return source_ref_target; } - Kokkos::View compute_radii( + Kokkos::View computeRadii( ExecutionSpace const &space, Kokkos::View const &source_ref_target) { @@ -128,7 +128,7 @@ class MLSComputation return radii; } - Kokkos::View compute_weight( + Kokkos::View computeWeight( ExecutionSpace const &space, Kokkos::View const &source_ref_target, Kokkos::View const &radii) @@ -149,7 +149,7 @@ class MLSComputation return phi; } - Kokkos::View compute_vandermonde( + Kokkos::View computeVandermonde( ExecutionSpace const &space, Kokkos::View const &source_ref_target) { @@ -175,9 +175,9 @@ class MLSComputation } Kokkos::View - compute_moment(ExecutionSpace const &space, - Kokkos::View const &phi, - Kokkos::View const &p) + computeMoment(ExecutionSpace const &space, + Kokkos::View const &phi, + Kokkos::View const &p) { Kokkos::View a( Kokkos::view_alloc(Kokkos::WithoutInitializing, @@ -201,10 +201,10 @@ class MLSComputation } void - compute_coefficients(ExecutionSpace const &space, - Kokkos::View const &phi, - Kokkos::View const &p, - Kokkos::View const &a_inv) + computeCoefficients(ExecutionSpace const &space, + Kokkos::View const &phi, + Kokkos::View const &p, + Kokkos::View const &a_inv) { _coeffs = Kokkos::View( Kokkos::view_alloc(Kokkos::WithoutInitializing, diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index 425465b2d..8ea6f0bb1 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -57,7 +57,6 @@ int main(int argc, char *argv[]) MPI_Init(&argc, &argv); Kokkos::ScopeGuard guard(argc, argv); - constexpr std::size_t num_neighbors = MVPolynomialBasis_3D::size; constexpr std::size_t cube_side = 20; constexpr std::size_t source_points_num = cube_side * cube_side * cube_side; constexpr std::size_t target_points_num = 4; @@ -101,7 +100,7 @@ int main(int argc, char *argv[]) // Create the transform from a point cloud to another MLS - mls(space, mpi_comm, num_neighbors, source_points, target_points); + mls(space, mpi_comm, source_points, target_points); // Compute source values Kokkos::View source_values("Example::source_values", diff --git a/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp b/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp index 6be2cf5d7..833d6c5aa 100644 --- a/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp +++ b/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp @@ -27,8 +27,8 @@ class SymmPseudoInverseSVD { public: static Kokkos::View - compute_pseudo_inverses(ExecutionSpace const &space, - Kokkos::View const &mats) + computePseudoInverses(ExecutionSpace const &space, + Kokkos::View const &mats) { SymmPseudoInverseSVD spis(space, mats); @@ -41,11 +41,11 @@ class SymmPseudoInverseSVD Kokkos::RangePolicy(space, 0, spis._num_matrices), KOKKOS_LAMBDA(int const i) { int p, q; - ValueType norm = spis.argmax_off_diagonal(i, p, q); + ValueType norm = spis.argmaxOffDiagonal(i, p, q); while (norm > spis._epsilon) { - spis.compute_u_es_single(i, p, q); - norm = spis.argmax_off_diagonal(i, p, q); + spis.computeUESSingle(i, p, q); + norm = spis.argmaxOffDiagonal(i, p, q); } }); @@ -56,7 +56,7 @@ class SymmPseudoInverseSVD Kokkos::MDRangePolicy>( space, {0, 0, 0}, {spis._num_matrices, spis._size, spis._size}), KOKKOS_LAMBDA(int const i, int const j, int const k) { - spis.fill_inv(i, j, k); + spis.fillInv(i, j, k); }); return spis._inv; @@ -65,7 +65,7 @@ class SymmPseudoInverseSVD private: // U and E.S are computed, we can now build the inverse // U . [ E^-1.S ] . U^T - KOKKOS_FUNCTION void fill_inv(int const i, int const j, int const k) const + KOKKOS_FUNCTION void fillInv(int const i, int const j, int const k) const { ValueType value = _zero; for (int l = 0; l < _size; l++) @@ -82,8 +82,8 @@ class SymmPseudoInverseSVD // We found the biggest value in our off-diagonal. We will remove it by // computing a "local" svd and update U and E.S - KOKKOS_FUNCTION void compute_u_es_single(int const i, int const p, - int const q) const + KOKKOS_FUNCTION void computeUESSingle(int const i, int const p, + int const q) const { ValueType a = _es(i, p, p); ValueType b = _es(i, p, q); @@ -162,8 +162,7 @@ class SymmPseudoInverseSVD // This finds the biggest off-diagonal value of E.S as well as its // coordinates. Being symmetric, we can always check on the upper // triangle (and always have q > p) - KOKKOS_FUNCTION ValueType argmax_off_diagonal(int const i, int &p, - int &q) const + KOKKOS_FUNCTION ValueType argmaxOffDiagonal(int const i, int &p, int &q) const { ValueType max = _zero; p = q = 0; From 03b600c94791ea4c0c8906b5654e4ae33d890cff Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Tue, 15 Aug 2023 10:03:22 -0400 Subject: [PATCH 30/44] Moving ExecutionSpace templates --- examples/moving_least_squares/mls.hpp | 13 +++++++------ examples/moving_least_squares/mls_computation.hpp | 14 +++++++++++--- .../moving_least_squares/moving_least_squares.cpp | 2 +- examples/moving_least_squares/mpi_comms.hpp | 5 +++-- .../symmetric_pseudoinverse_svd.hpp | 5 +++-- 5 files changed, 25 insertions(+), 14 deletions(-) diff --git a/examples/moving_least_squares/mls.hpp b/examples/moving_least_squares/mls.hpp index 57ad2f961..05a461933 100644 --- a/examples/moving_least_squares/mls.hpp +++ b/examples/moving_least_squares/mls.hpp @@ -28,10 +28,11 @@ struct TargetPoints }; template + typename MemorySpace> class MLS { public: + template MLS(ExecutionSpace const &space, MPI_Comm comm, Kokkos::View const &source_points, Kokkos::View const &target_points, @@ -70,16 +71,16 @@ class MLS }); // Set up comms and local source points - _comms = MPIComms(space, comm, local_indices, - local_ranks); + _comms = MPIComms(space, comm, local_indices, local_ranks); auto local_source_points = _comms.distribute(space, source_points); // Compute the internal MLS _mlsc = - MLSComputation(space, local_source_points, target_points); } + template Kokkos::View apply(ExecutionSpace const &space, Kokkos::View const &source_values) @@ -89,9 +90,9 @@ class MLS } private: - MLSComputation + MLSComputation _mlsc; - MPIComms _comms; + MPIComms _comms; std::size_t _num_neighbors; std::size_t _src_size; std::size_t _tgt_size; diff --git a/examples/moving_least_squares/mls_computation.hpp b/examples/moving_least_squares/mls_computation.hpp index 8b1cd97ec..99e754770 100644 --- a/examples/moving_least_squares/mls_computation.hpp +++ b/examples/moving_least_squares/mls_computation.hpp @@ -20,12 +20,13 @@ #include "symmetric_pseudoinverse_svd.hpp" template + typename MemorySpace> class MLSComputation { public: MLSComputation() = default; + template MLSComputation( ExecutionSpace const &space, Kokkos::View const &source_points, @@ -46,12 +47,13 @@ class MLSComputation auto a = computeMoment(space, phi, p); auto a_inv = - SymmPseudoInverseSVD::computePseudoInverses(space, a); + SymmPseudoInverseSVD::computePseudoInverses( + space, a); computeCoefficients(space, phi, p, a_inv); } + template Kokkos::View apply(ExecutionSpace const &space, Kokkos::View const &source_values) @@ -76,6 +78,7 @@ class MLSComputation } private: + template Kokkos::View translateToTarget( ExecutionSpace const &space, Kokkos::View const &source_points, @@ -104,6 +107,7 @@ class MLSComputation return source_ref_target; } + template Kokkos::View computeRadii( ExecutionSpace const &space, Kokkos::View const &source_ref_target) @@ -128,6 +132,7 @@ class MLSComputation return radii; } + template Kokkos::View computeWeight( ExecutionSpace const &space, Kokkos::View const &source_ref_target, @@ -149,6 +154,7 @@ class MLSComputation return phi; } + template Kokkos::View computeVandermonde( ExecutionSpace const &space, Kokkos::View const &source_ref_target) @@ -174,6 +180,7 @@ class MLSComputation return p; } + template Kokkos::View computeMoment(ExecutionSpace const &space, Kokkos::View const &phi, @@ -200,6 +207,7 @@ class MLSComputation return a; } + template void computeCoefficients(ExecutionSpace const &space, Kokkos::View const &phi, diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index 8ea6f0bb1..632ab1dac 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -99,7 +99,7 @@ int main(int argc, char *argv[]) Kokkos::deep_copy(space, target_points, target_points_host); // Create the transform from a point cloud to another - MLS + MLS mls(space, mpi_comm, source_points, target_points); // Compute source values diff --git a/examples/moving_least_squares/mpi_comms.hpp b/examples/moving_least_squares/mpi_comms.hpp index e83bb1565..492d0512d 100644 --- a/examples/moving_least_squares/mpi_comms.hpp +++ b/examples/moving_least_squares/mpi_comms.hpp @@ -21,12 +21,13 @@ #include -template +template class MPIComms { public: MPIComms() = default; + template MPIComms(ExecutionSpace const &space, MPI_Comm comm, Kokkos::View indices, Kokkos::View ranks) @@ -113,7 +114,7 @@ class MPIComms space, *_distributor_back, mpi_rev_indices, _mpi_recv_indices); } - template + template Kokkos::View distribute(ExecutionSpace const &space, Kokkos::View const &source) diff --git a/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp b/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp index 833d6c5aa..ade92775c 100644 --- a/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp +++ b/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp @@ -22,10 +22,11 @@ // We also know that A is symmetric (by construction), so U = SV where S is // a sign matrix (only 1 or -1 in the diagonal, 0 elsewhere). // Thus A = U.E.S.U^T -template +template class SymmPseudoInverseSVD { public: + template static Kokkos::View computePseudoInverses(ExecutionSpace const &space, Kokkos::View const &mats) @@ -183,7 +184,7 @@ class SymmPseudoInverseSVD return max; } - KOKKOS_FUNCTION + template SymmPseudoInverseSVD(ExecutionSpace const &space, Kokkos::View const &mats) : _num_matrices(mats.extent(0)) From dec46be7e0899fd3eb7a57dd9b024824160b474c Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Tue, 15 Aug 2023 11:39:13 -0400 Subject: [PATCH 31/44] Swicthing to AccessTraits for user inputs (attempt) --- examples/moving_least_squares/common.hpp | 24 +++++++ examples/moving_least_squares/mls.hpp | 71 ++++++++++--------- .../moving_least_squares/mls_computation.hpp | 37 ++++++---- .../moving_least_squares.cpp | 4 +- examples/moving_least_squares/mpi_comms.hpp | 54 +++++++++++--- 5 files changed, 128 insertions(+), 62 deletions(-) create mode 100644 examples/moving_least_squares/common.hpp diff --git a/examples/moving_least_squares/common.hpp b/examples/moving_least_squares/common.hpp new file mode 100644 index 000000000..2e3a32da2 --- /dev/null +++ b/examples/moving_least_squares/common.hpp @@ -0,0 +1,24 @@ +/**************************************************************************** + * Copyright (c) 2023 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#pragma once + +#include + +#include + +namespace Details +{ +template +using inner_value_t = std::decay_t::get), T const &, + int>>; +} // namespace Details \ No newline at end of file diff --git a/examples/moving_least_squares/mls.hpp b/examples/moving_least_squares/mls.hpp index 05a461933..99a139b8b 100644 --- a/examples/moving_least_squares/mls.hpp +++ b/examples/moving_least_squares/mls.hpp @@ -20,26 +20,46 @@ #include "mls_computation.hpp" #include "mpi_comms.hpp" -template +template struct TargetPoints { - Kokkos::View target_points; + Points target_points; std::size_t num_neighbors; }; +template +struct ArborX::AccessTraits, + ArborX::PredicatesTag> +{ + static KOKKOS_FUNCTION std::size_t + size(TargetPoints const &tp) + { + return tp.target_points.extent(0); + } + + static KOKKOS_FUNCTION auto get(TargetPoints const &tp, + std::size_t i) + { + return ArborX::nearest(tp.target_points(i), tp.num_neighbors); + } + + using memory_space = MemorySpace; +}; + template class MLS { public: - template - MLS(ExecutionSpace const &space, MPI_Comm comm, - Kokkos::View const &source_points, - Kokkos::View const &target_points, + template + MLS(ExecutionSpace const &space, MPI_Comm comm, Points const &source_points, + Points const &target_points, std::size_t num_neighbors = PolynomialBasis::size) : _num_neighbors(num_neighbors) - , _src_size(source_points.extent(0)) - , _tgt_size(target_points.extent(0)) + , _src_size(ArborX::AccessTraits::size( + source_points)) + , _tgt_size(ArborX::AccessTraits::size( + target_points)) { // There must be enough source points assert(_src_size >= _num_neighbors); @@ -52,9 +72,9 @@ class MLS Kokkos::View *, MemorySpace> index_ranks( "Example::MLS::index_ranks", 0); Kokkos::View offsets("Example::MLS::offsets", 0); - source_tree.query(space, - TargetPoints{target_points, _num_neighbors}, - index_ranks, offsets); + source_tree.query( + space, TargetPoints{target_points, _num_neighbors}, + index_ranks, offsets); // Split indices/ranks Kokkos::View local_indices( @@ -72,12 +92,11 @@ class MLS // Set up comms and local source points _comms = MPIComms(space, comm, local_indices, local_ranks); - auto local_source_points = _comms.distribute(space, source_points); + auto local_source_points = _comms.distributeArborX(space, source_points); // Compute the internal MLS - _mlsc = - MLSComputation(space, local_source_points, target_points); + _mlsc = MLSComputation( + space, local_source_points, target_points); } template @@ -86,31 +105,13 @@ class MLS Kokkos::View const &source_values) { assert(source_values.extent(0) == _src_size); - return _mlsc.apply(space, _comms.distribute(space, source_values)); + return _mlsc.apply(space, _comms.distributeView(space, source_values)); } private: - MLSComputation - _mlsc; + MLSComputation _mlsc; MPIComms _comms; std::size_t _num_neighbors; std::size_t _src_size; std::size_t _tgt_size; -}; - -template -struct ArborX::AccessTraits, ArborX::PredicatesTag> -{ - static KOKKOS_FUNCTION std::size_t size(TargetPoints const &tp) - { - return tp.target_points.extent(0); - } - - static KOKKOS_FUNCTION auto get(TargetPoints const &tp, - std::size_t i) - { - return ArborX::nearest(tp.target_points(i), tp.num_neighbors); - } - - using memory_space = MemorySpace; }; \ No newline at end of file diff --git a/examples/moving_least_squares/mls_computation.hpp b/examples/moving_least_squares/mls_computation.hpp index 99e754770..fb1f641d0 100644 --- a/examples/moving_least_squares/mls_computation.hpp +++ b/examples/moving_least_squares/mls_computation.hpp @@ -17,6 +17,7 @@ #include +#include "common.hpp" #include "symmetric_pseudoinverse_svd.hpp" template - MLSComputation( - ExecutionSpace const &space, - Kokkos::View const &source_points, - Kokkos::View const &target_points) - : _num_neighbors(source_points.extent(0) / target_points.extent(0)) - , _num_targets(target_points.extent(0)) + template + MLSComputation(ExecutionSpace const &space, + Kokkos::View *, + MemorySpace> const &source_points, + Points const &target_points) + : _num_neighbors( + source_points.extent(0) / + ArborX::AccessTraits::size( + target_points)) + , _num_targets(ArborX::AccessTraits::size( + target_points)) { // There must be a list of num_neighbors source points for each // target point @@ -78,12 +83,16 @@ class MLSComputation } private: - template - Kokkos::View translateToTarget( - ExecutionSpace const &space, - Kokkos::View const &source_points, - Kokkos::View const &target_points) + template + Kokkos::View + translateToTarget(ExecutionSpace const &space, + Kokkos::View *, + MemorySpace> const &source_points, + Points const &target_points) { + using point_t = Details::inner_value_t; + using access = ArborX::AccessTraits; + // We center each group around the target as it ables you to // optimize the final computation Kokkos::View source_ref_target( @@ -95,8 +104,8 @@ class MLSComputation Kokkos::MDRangePolicy>(space, {0, 0}, {_num_targets, _num_neighbors}), KOKKOS_LAMBDA(int const i, int const j) { - ArborX::Point src = source_points(i * _num_neighbors + j); - ArborX::Point tgt = target_points(i); + point_t src = source_points(i * _num_neighbors + j); + point_t tgt = access::get(target_points, i); source_ref_target(i, j) = ArborX::Point{ src[0] - tgt[0], src[1] - tgt[1], diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index 632ab1dac..fb7262fe5 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -99,8 +99,8 @@ int main(int argc, char *argv[]) Kokkos::deep_copy(space, target_points, target_points_host); // Create the transform from a point cloud to another - MLS - mls(space, mpi_comm, source_points, target_points); + MLS mls( + space, mpi_comm, source_points, target_points); // Compute source values Kokkos::View source_values("Example::source_values", diff --git a/examples/moving_least_squares/mpi_comms.hpp b/examples/moving_least_squares/mpi_comms.hpp index 492d0512d..bcd01d98e 100644 --- a/examples/moving_least_squares/mpi_comms.hpp +++ b/examples/moving_least_squares/mpi_comms.hpp @@ -19,6 +19,7 @@ #include #include +#include "common.hpp" #include template @@ -114,10 +115,33 @@ class MPIComms space, *_distributor_back, mpi_rev_indices, _mpi_recv_indices); } + template + Kokkos::View *, MemorySpace> + distributeArborX(ExecutionSpace const &space, Values const &source) + { + using value_t = Details::inner_value_t; + using access = ArborX::AccessTraits; + assert(_distributor_back.has_value()); + + // We know what each process want so we prepare the data to be sent + Kokkos::View data_to_send( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MPI::data_to_send"), + _num_requests); + Kokkos::parallel_for( + "Example::MPI::data_to_send_fill", + Kokkos::RangePolicy(space, 0, _num_requests), + KOKKOS_CLASS_LAMBDA(int const i) { + data_to_send(i) = access::get(source, _mpi_send_indices(i)); + }); + + return distribute(space, data_to_send); + } + template Kokkos::View - distribute(ExecutionSpace const &space, - Kokkos::View const &source) + distributeView(ExecutionSpace const &space, + Kokkos::View const &source) { assert(_distributor_back.has_value()); @@ -133,7 +157,23 @@ class MPIComms data_to_send(i) = source(_mpi_send_indices(i)); }); - // Then we properly send it, and each process has what it wants, but in the + return distribute(space, data_to_send); + } + +private: + std::shared_ptr _comm; + Kokkos::View _mpi_send_indices; + Kokkos::View _mpi_recv_indices; + std::optional> _distributor_back; + std::size_t _num_requests; + std::size_t _num_responses; + + template + Kokkos::View + distribute(ExecutionSpace const &space, + Kokkos::View const &data_to_send) + { + // We properly send the data, and each process has what it wants, but in the // wrong order Kokkos::View data_to_recv( Kokkos::view_alloc(Kokkos::WithoutInitializing, @@ -155,12 +195,4 @@ class MPIComms return output; } - -private: - std::shared_ptr _comm; - Kokkos::View _mpi_send_indices; - Kokkos::View _mpi_recv_indices; - std::optional> _distributor_back; - std::size_t _num_requests; - std::size_t _num_responses; }; \ No newline at end of file From 210243db50ed6fb7a371a05b890eabf847a5ff1f Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Tue, 15 Aug 2023 16:22:25 -0400 Subject: [PATCH 32/44] Simplification of traits access --- examples/moving_least_squares/common.hpp | 8 +++-- examples/moving_least_squares/mls.hpp | 29 ++++++++----------- .../moving_least_squares/mls_computation.hpp | 12 +++----- examples/moving_least_squares/mpi_comms.hpp | 4 +-- 4 files changed, 23 insertions(+), 30 deletions(-) diff --git a/examples/moving_least_squares/common.hpp b/examples/moving_least_squares/common.hpp index 2e3a32da2..df0c0abdd 100644 --- a/examples/moving_least_squares/common.hpp +++ b/examples/moving_least_squares/common.hpp @@ -18,7 +18,9 @@ namespace Details { template -using inner_value_t = std::decay_t::get), T const &, - int>>; +using access = ArborX::AccessTraits; + +template +using inner_value_t = std::decay_t< + std::invoke_result_t::get), T const &, int>>; } // namespace Details \ No newline at end of file diff --git a/examples/moving_least_squares/mls.hpp b/examples/moving_least_squares/mls.hpp index 99a139b8b..5c264f299 100644 --- a/examples/moving_least_squares/mls.hpp +++ b/examples/moving_least_squares/mls.hpp @@ -17,33 +17,31 @@ #include +#include "common.hpp" #include "mls_computation.hpp" #include "mpi_comms.hpp" -template +template struct TargetPoints { Points target_points; std::size_t num_neighbors; }; -template -struct ArborX::AccessTraits, - ArborX::PredicatesTag> +template +struct ArborX::AccessTraits, ArborX::PredicatesTag> { - static KOKKOS_FUNCTION std::size_t - size(TargetPoints const &tp) + static KOKKOS_FUNCTION std::size_t size(TargetPoints const &tp) { return tp.target_points.extent(0); } - static KOKKOS_FUNCTION auto get(TargetPoints const &tp, - std::size_t i) + static KOKKOS_FUNCTION auto get(TargetPoints const &tp, std::size_t i) { return ArborX::nearest(tp.target_points(i), tp.num_neighbors); } - using memory_space = MemorySpace; + using memory_space = typename ::Details::access::memory_space; }; template ::size( - source_points)) - , _tgt_size(ArborX::AccessTraits::size( - target_points)) + , _src_size(Details::access::size(source_points)) + , _tgt_size(Details::access::size(target_points)) { - // There must be enough source points assert(_src_size >= _num_neighbors); // Organize source points as tree @@ -72,9 +67,9 @@ class MLS Kokkos::View *, MemorySpace> index_ranks( "Example::MLS::index_ranks", 0); Kokkos::View offsets("Example::MLS::offsets", 0); - source_tree.query( - space, TargetPoints{target_points, _num_neighbors}, - index_ranks, offsets); + source_tree.query(space, + TargetPoints{target_points, _num_neighbors}, + index_ranks, offsets); // Split indices/ranks Kokkos::View local_indices( diff --git a/examples/moving_least_squares/mls_computation.hpp b/examples/moving_least_squares/mls_computation.hpp index fb1f641d0..7202cf42d 100644 --- a/examples/moving_least_squares/mls_computation.hpp +++ b/examples/moving_least_squares/mls_computation.hpp @@ -32,12 +32,9 @@ class MLSComputation Kokkos::View *, MemorySpace> const &source_points, Points const &target_points) - : _num_neighbors( - source_points.extent(0) / - ArborX::AccessTraits::size( - target_points)) - , _num_targets(ArborX::AccessTraits::size( - target_points)) + : _num_neighbors(source_points.extent(0) / + Details::access::size(target_points)) + , _num_targets(Details::access::size(target_points)) { // There must be a list of num_neighbors source points for each // target point @@ -91,7 +88,6 @@ class MLSComputation Points const &target_points) { using point_t = Details::inner_value_t; - using access = ArborX::AccessTraits; // We center each group around the target as it ables you to // optimize the final computation @@ -105,7 +101,7 @@ class MLSComputation {_num_targets, _num_neighbors}), KOKKOS_LAMBDA(int const i, int const j) { point_t src = source_points(i * _num_neighbors + j); - point_t tgt = access::get(target_points, i); + point_t tgt = Details::access::get(target_points, i); source_ref_target(i, j) = ArborX::Point{ src[0] - tgt[0], src[1] - tgt[1], diff --git a/examples/moving_least_squares/mpi_comms.hpp b/examples/moving_least_squares/mpi_comms.hpp index bcd01d98e..ad6abb6f2 100644 --- a/examples/moving_least_squares/mpi_comms.hpp +++ b/examples/moving_least_squares/mpi_comms.hpp @@ -120,7 +120,6 @@ class MPIComms distributeArborX(ExecutionSpace const &space, Values const &source) { using value_t = Details::inner_value_t; - using access = ArborX::AccessTraits; assert(_distributor_back.has_value()); // We know what each process want so we prepare the data to be sent @@ -132,7 +131,8 @@ class MPIComms "Example::MPI::data_to_send_fill", Kokkos::RangePolicy(space, 0, _num_requests), KOKKOS_CLASS_LAMBDA(int const i) { - data_to_send(i) = access::get(source, _mpi_send_indices(i)); + data_to_send(i) = + Details::access::get(source, _mpi_send_indices(i)); }); return distribute(space, data_to_send); From b1267dd455a893e24194de16f865de43badf790b Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Tue, 15 Aug 2023 16:43:45 -0400 Subject: [PATCH 33/44] Assertions for public interfaces --- examples/moving_least_squares/mls.hpp | 12 ++++++++++++ examples/moving_least_squares/mls_computation.hpp | 11 +++++++++++ examples/moving_least_squares/mpi_comms.hpp | 12 ++++++++++++ .../symmetric_pseudoinverse_svd.hpp | 5 +++++ 4 files changed, 40 insertions(+) diff --git a/examples/moving_least_squares/mls.hpp b/examples/moving_least_squares/mls.hpp index 5c264f299..999e27710 100644 --- a/examples/moving_least_squares/mls.hpp +++ b/examples/moving_least_squares/mls.hpp @@ -12,6 +12,7 @@ #pragma once #include +#include #include @@ -57,6 +58,15 @@ class MLS , _src_size(Details::access::size(source_points)) , _tgt_size(Details::access::size(target_points)) { + static_assert( + KokkosExt::is_accessible_from::value); + static_assert(KokkosExt::is_accessible_from< + typename Details::access::memory_space, + ExecutionSpace>::value); + ArborX::Details::check_valid_access_traits(ArborX::PrimitivesTag{}, + source_points); + + // A minimum nuber of source points are needed assert(_src_size >= _num_neighbors); // Organize source points as tree @@ -99,6 +109,8 @@ class MLS apply(ExecutionSpace const &space, Kokkos::View const &source_values) { + static_assert( + KokkosExt::is_accessible_from::value); assert(source_values.extent(0) == _src_size); return _mlsc.apply(space, _comms.distributeView(space, source_values)); } diff --git a/examples/moving_least_squares/mls_computation.hpp b/examples/moving_least_squares/mls_computation.hpp index 7202cf42d..24adc5710 100644 --- a/examples/moving_least_squares/mls_computation.hpp +++ b/examples/moving_least_squares/mls_computation.hpp @@ -12,6 +12,7 @@ #pragma once #include +#include #include @@ -36,6 +37,14 @@ class MLSComputation Details::access::size(target_points)) , _num_targets(Details::access::size(target_points)) { + static_assert( + KokkosExt::is_accessible_from::value); + static_assert(KokkosExt::is_accessible_from< + typename Details::access::memory_space, + ExecutionSpace>::value); + ArborX::Details::check_valid_access_traits(ArborX::PrimitivesTag{}, + target_points); + // There must be a list of num_neighbors source points for each // target point assert(source_points.extent(0) == _num_targets * _num_neighbors); @@ -60,6 +69,8 @@ class MLSComputation apply(ExecutionSpace const &space, Kokkos::View const &source_values) { + static_assert( + KokkosExt::is_accessible_from::value); assert(source_values.extent(0) == _num_targets * _num_neighbors); Kokkos::View target_values( diff --git a/examples/moving_least_squares/mpi_comms.hpp b/examples/moving_least_squares/mpi_comms.hpp index ad6abb6f2..f32eddb9a 100644 --- a/examples/moving_least_squares/mpi_comms.hpp +++ b/examples/moving_least_squares/mpi_comms.hpp @@ -12,6 +12,7 @@ #pragma once #include +#include #include @@ -33,6 +34,8 @@ class MPIComms Kokkos::View indices, Kokkos::View ranks) { + static_assert( + KokkosExt::is_accessible_from::value); assert(indices.extent(0) == ranks.extent(0)); std::size_t data_len = indices.extent(0); @@ -120,6 +123,13 @@ class MPIComms distributeArborX(ExecutionSpace const &space, Values const &source) { using value_t = Details::inner_value_t; + static_assert( + KokkosExt::is_accessible_from::value); + static_assert(KokkosExt::is_accessible_from< + typename Details::access::memory_space, + ExecutionSpace>::value); + ArborX::Details::check_valid_access_traits(ArborX::PrimitivesTag{}, source); + assert(_distributor_back.has_value()); // We know what each process want so we prepare the data to be sent @@ -143,6 +153,8 @@ class MPIComms distributeView(ExecutionSpace const &space, Kokkos::View const &source) { + static_assert( + KokkosExt::is_accessible_from::value); assert(_distributor_back.has_value()); // We know what each process want so we prepare the data to be sent diff --git a/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp b/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp index ade92775c..e901f660c 100644 --- a/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp +++ b/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp @@ -11,6 +11,8 @@ #pragma once +#include + #include #include @@ -31,6 +33,9 @@ class SymmPseudoInverseSVD computePseudoInverses(ExecutionSpace const &space, Kokkos::View const &mats) { + static_assert( + KokkosExt::is_accessible_from::value); + SymmPseudoInverseSVD spis(space, mats); // Iterative approach, we will "deconstruct" E.S until only the diagonal From 9315a4c1a33d7cfe965c5be4342253025fa55fe7 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Thu, 17 Aug 2023 11:57:22 -0400 Subject: [PATCH 34/44] Replacing code using ArborX's code and convention --- examples/moving_least_squares/common.hpp | 26 ---------- examples/moving_least_squares/mls.hpp | 47 ++++++++++++------- .../moving_least_squares/mls_computation.hpp | 45 +++++++++++------- .../moving_least_squares.cpp | 2 +- examples/moving_least_squares/mpi_comms.hpp | 21 +++++---- 5 files changed, 71 insertions(+), 70 deletions(-) delete mode 100644 examples/moving_least_squares/common.hpp diff --git a/examples/moving_least_squares/common.hpp b/examples/moving_least_squares/common.hpp deleted file mode 100644 index df0c0abdd..000000000 --- a/examples/moving_least_squares/common.hpp +++ /dev/null @@ -1,26 +0,0 @@ -/**************************************************************************** - * Copyright (c) 2023 by the ArborX authors * - * All rights reserved. * - * * - * This file is part of the ArborX library. ArborX is * - * distributed under a BSD 3-clause license. For the licensing terms see * - * the LICENSE file in the top-level directory. * - * * - * SPDX-License-Identifier: BSD-3-Clause * - ****************************************************************************/ - -#pragma once - -#include - -#include - -namespace Details -{ -template -using access = ArborX::AccessTraits; - -template -using inner_value_t = std::decay_t< - std::invoke_result_t::get), T const &, int>>; -} // namespace Details \ No newline at end of file diff --git a/examples/moving_least_squares/mls.hpp b/examples/moving_least_squares/mls.hpp index 999e27710..0566515cc 100644 --- a/examples/moving_least_squares/mls.hpp +++ b/examples/moving_least_squares/mls.hpp @@ -18,7 +18,6 @@ #include -#include "common.hpp" #include "mls_computation.hpp" #include "mpi_comms.hpp" @@ -34,15 +33,21 @@ struct ArborX::AccessTraits, ArborX::PredicatesTag> { static KOKKOS_FUNCTION std::size_t size(TargetPoints const &tp) { - return tp.target_points.extent(0); + return ArborX::AccessTraits::size( + tp.target_points); } static KOKKOS_FUNCTION auto get(TargetPoints const &tp, std::size_t i) { - return ArborX::nearest(tp.target_points(i), tp.num_neighbors); + return ArborX::nearest( + ArborX::AccessTraits::get( + tp.target_points, i), + tp.num_neighbors); } - using memory_space = typename ::Details::access::memory_space; + using memory_space = + typename ArborX::AccessTraits::memory_space; }; template - MLS(ExecutionSpace const &space, MPI_Comm comm, Points const &source_points, + MLS(MPI_Comm comm, ExecutionSpace const &space, Points const &source_points, Points const &target_points, std::size_t num_neighbors = PolynomialBasis::size) : _num_neighbors(num_neighbors) - , _src_size(Details::access::size(source_points)) - , _tgt_size(Details::access::size(target_points)) + , _src_size(ArborX::AccessTraits::size( + source_points)) + , _tgt_size(ArborX::AccessTraits::size( + target_points)) { static_assert( KokkosExt::is_accessible_from::value); - static_assert(KokkosExt::is_accessible_from< - typename Details::access::memory_space, - ExecutionSpace>::value); + static_assert( + KokkosExt::is_accessible_from< + typename ArborX::AccessTraits::memory_space, + ExecutionSpace>::value); ArborX::Details::check_valid_access_traits(ArborX::PrimitivesTag{}, source_points); @@ -74,7 +83,7 @@ class MLS source_points); // Perform the query - Kokkos::View *, MemorySpace> index_ranks( + Kokkos::View index_ranks( "Example::MLS::index_ranks", 0); Kokkos::View offsets("Example::MLS::offsets", 0); source_tree.query(space, @@ -83,20 +92,24 @@ class MLS // Split indices/ranks Kokkos::View local_indices( - "Example::MLS::local_indices", _tgt_size * _num_neighbors); - Kokkos::View local_ranks("Example::MLS::local_ranks", - _tgt_size * _num_neighbors); + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MLS::local_indices"), + _tgt_size * _num_neighbors); + Kokkos::View local_ranks( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MLS::local_ranks"), + _tgt_size * _num_neighbors); Kokkos::parallel_for( "Example::MLS::index_ranks_split", Kokkos::RangePolicy(space, 0, _tgt_size * _num_neighbors), KOKKOS_LAMBDA(int const i) { - local_indices(i) = index_ranks(i).first; - local_ranks(i) = index_ranks(i).second; + local_indices(i) = index_ranks(i).index; + local_ranks(i) = index_ranks(i).rank; }); // Set up comms and local source points - _comms = MPIComms(space, comm, local_indices, local_ranks); + _comms = MPIComms(comm, space, local_indices, local_ranks); auto local_source_points = _comms.distributeArborX(space, source_points); // Compute the internal MLS diff --git a/examples/moving_least_squares/mls_computation.hpp b/examples/moving_least_squares/mls_computation.hpp index 24adc5710..6734d2f1b 100644 --- a/examples/moving_least_squares/mls_computation.hpp +++ b/examples/moving_least_squares/mls_computation.hpp @@ -18,7 +18,6 @@ #include -#include "common.hpp" #include "symmetric_pseudoinverse_svd.hpp" template - MLSComputation(ExecutionSpace const &space, - Kokkos::View *, - MemorySpace> const &source_points, - Points const &target_points) - : _num_neighbors(source_points.extent(0) / - Details::access::size(target_points)) - , _num_targets(Details::access::size(target_points)) + MLSComputation( + ExecutionSpace const &space, + Kokkos::View< + typename ArborX::Details::AccessTraitsHelper< + ArborX::AccessTraits>::type *, + MemorySpace> const &source_points, + Points const &target_points) + : _num_targets(ArborX::AccessTraits::size( + target_points)) { static_assert( KokkosExt::is_accessible_from::value); - static_assert(KokkosExt::is_accessible_from< - typename Details::access::memory_space, - ExecutionSpace>::value); + static_assert( + KokkosExt::is_accessible_from< + typename ArborX::AccessTraits::memory_space, + ExecutionSpace>::value); ArborX::Details::check_valid_access_traits(ArborX::PrimitivesTag{}, target_points); // There must be a list of num_neighbors source points for each // target point + _num_neighbors = source_points.extent(0) / _num_targets; assert(source_points.extent(0) == _num_targets * _num_neighbors); auto source_ref_target = @@ -92,13 +96,16 @@ class MLSComputation private: template - Kokkos::View - translateToTarget(ExecutionSpace const &space, - Kokkos::View *, - MemorySpace> const &source_points, - Points const &target_points) + Kokkos::View translateToTarget( + ExecutionSpace const &space, + Kokkos::View< + typename ArborX::Details::AccessTraitsHelper< + ArborX::AccessTraits>::type *, + MemorySpace> const &source_points, + Points const &target_points) { - using point_t = Details::inner_value_t; + using point_t = typename ArborX::Details::AccessTraitsHelper< + ArborX::AccessTraits>::type; // We center each group around the target as it ables you to // optimize the final computation @@ -112,7 +119,9 @@ class MLSComputation {_num_targets, _num_neighbors}), KOKKOS_LAMBDA(int const i, int const j) { point_t src = source_points(i * _num_neighbors + j); - point_t tgt = Details::access::get(target_points, i); + point_t tgt = + ArborX::AccessTraits::get( + target_points, i); source_ref_target(i, j) = ArborX::Point{ src[0] - tgt[0], src[1] - tgt[1], diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index fb7262fe5..c1cc293c7 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -100,7 +100,7 @@ int main(int argc, char *argv[]) // Create the transform from a point cloud to another MLS mls( - space, mpi_comm, source_points, target_points); + mpi_comm, space, source_points, target_points); // Compute source values Kokkos::View source_values("Example::source_values", diff --git a/examples/moving_least_squares/mpi_comms.hpp b/examples/moving_least_squares/mpi_comms.hpp index f32eddb9a..e39a22f99 100644 --- a/examples/moving_least_squares/mpi_comms.hpp +++ b/examples/moving_least_squares/mpi_comms.hpp @@ -20,7 +20,6 @@ #include #include -#include "common.hpp" #include template @@ -30,7 +29,7 @@ class MPIComms MPIComms() = default; template - MPIComms(ExecutionSpace const &space, MPI_Comm comm, + MPIComms(MPI_Comm comm, ExecutionSpace const &space, Kokkos::View indices, Kokkos::View ranks) { @@ -119,15 +118,20 @@ class MPIComms } template - Kokkos::View *, MemorySpace> + Kokkos::View>::type *, + MemorySpace> distributeArborX(ExecutionSpace const &space, Values const &source) { - using value_t = Details::inner_value_t; + using value_t = typename ArborX::Details::AccessTraitsHelper< + ArborX::AccessTraits>::type; static_assert( KokkosExt::is_accessible_from::value); - static_assert(KokkosExt::is_accessible_from< - typename Details::access::memory_space, - ExecutionSpace>::value); + static_assert( + KokkosExt::is_accessible_from< + typename ArborX::AccessTraits::memory_space, + ExecutionSpace>::value); ArborX::Details::check_valid_access_traits(ArborX::PrimitivesTag{}, source); assert(_distributor_back.has_value()); @@ -142,7 +146,8 @@ class MPIComms Kokkos::RangePolicy(space, 0, _num_requests), KOKKOS_CLASS_LAMBDA(int const i) { data_to_send(i) = - Details::access::get(source, _mpi_send_indices(i)); + ArborX::AccessTraits::get( + source, _mpi_send_indices(i)); }); return distribute(space, data_to_send); From daf9822ffe222c09156b5ca3a6c6e662aabbe87c Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Thu, 17 Aug 2023 15:19:27 -0400 Subject: [PATCH 35/44] Better symmetric pseudo inverse (free function and better template) --- .../DetailsSymmetricPseudoInverseSVD.hpp | 209 ++++++++++++++++ .../moving_least_squares/mls_computation.hpp | 6 +- .../symmetric_pseudoinverse_svd.hpp | 234 ------------------ 3 files changed, 211 insertions(+), 238 deletions(-) create mode 100644 examples/moving_least_squares/DetailsSymmetricPseudoInverseSVD.hpp delete mode 100644 examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp diff --git a/examples/moving_least_squares/DetailsSymmetricPseudoInverseSVD.hpp b/examples/moving_least_squares/DetailsSymmetricPseudoInverseSVD.hpp new file mode 100644 index 000000000..1ed19be09 --- /dev/null +++ b/examples/moving_least_squares/DetailsSymmetricPseudoInverseSVD.hpp @@ -0,0 +1,209 @@ +/**************************************************************************** + * Copyright (c) 2023 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#pragma once + +#include + +#include + +#include +#include +#include + +namespace Details +{ + +// This finds the biggest off-diagonal value of E.S as well as its +// coordinates. Being symmetric, we can always check on the upper +// triangle (and always have q > p) +template +KOKKOS_FUNCTION typename Matrices::non_const_value_type +spisvdArgmaxOffDiagonal(Matrices const &es, int const i, int &p, int &q) +{ + using value_t = typename Matrices::non_const_value_type; + + std::size_t const size = es.extent(1); + value_t max = 0; + p = q = 0; + + for (int j = 0; j < size; j++) + { + for (int k = j + 1; k < size; k++) + { + value_t val = Kokkos::abs(es(i, j, k)); + if (max < val) + { + max = val; + p = j; + q = k; + } + } + } + + return max; +} + +// Pseudo-inverse of symmetric matrices using SVD +// We must find U, E (diagonal and positive) and V such that A = U.E.V^T +// We also know that A is symmetric (by construction), so U = SV where S is +// a sign matrix (only 1 or -1 in the diagonal, 0 elsewhere). +// Thus A = U.E.S.U^T and A^-1 = U.[ E^-1.S ].U^T +template +Kokkos::View +symmetricPseudoInverseSVD(ExecutionSpace const &space, Matrices const &mats) +{ + using value_t = typename Matrices::non_const_value_type; + using memory_space = typename Matrices::memory_space; + + std::size_t const num_matrices = mats.extent(0); + std::size_t const size = mats.extent(1); + constexpr value_t epsilon = std::numeric_limits::epsilon(); + constexpr value_t pi_4 = value_t(M_PI_4); + + // ==> Initialisation + // E.S is the input matrix + // U is the identity + Kokkos::View es( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::SPISVD::ES"), + mats.layout()); + Kokkos::View u( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::SPISVD::U"), + mats.layout()); + Kokkos::parallel_for( + "Example::SPISVD::ES_U_init", + Kokkos::MDRangePolicy>(space, {0, 0, 0}, + {num_matrices, size, size}), + KOKKOS_LAMBDA(int const i, int const j, int const k) { + es(i, j, k) = value_t(mats(i, j, k)); + u(i, j, k) = value_t((j == k)); + }); + + // ==> Loop + // Iterative approach, we will "deconstruct" E.S until only the diagonal + // is relevent inside the matrix + // It is possible to prove that, at each step, the "norm" of the matrix + // is strictly less that of the previous + // For all the loops, the following equality holds: A = U.E.S.U^T + Kokkos::parallel_for( + "Example::SPISVD::compute_ES_U", + Kokkos::RangePolicy(space, 0, num_matrices), + KOKKOS_LAMBDA(int const i) { + int p, q; + value_t norm = spisvdArgmaxOffDiagonal(es, i, p, q); + while (norm > epsilon) + { + value_t a = es(i, p, p); + value_t b = es(i, p, q); + value_t c = es(i, q, q); + + // Our submatrix is now + // +----------+----------+ +---+---+ + // | es(p, p) | es(p, q) | | a | b | + // +----------+----------+ = +---+---+ + // | es(q, p) | es(q, q) | | b | c | + // +----------+----------+ +---+---+ + + // Lets compute x, y and theta such that + // +---+---+ +---+---+ + // | a | b | | x | 0 | + // +---+---+ = R(theta) * +---+---+ * R(theta)^T + // | b | c | | 0 | y | + // +---+---+ +---+---+ + + value_t theta, x, y; + if (a == c) // <-- better to check if |a - c| < epsilon? + { + theta = pi_4; + x = a + b; + y = a - b; + } + else + { + theta = Kokkos::atan((2 * b) / (a - c)) / 2; + value_t a_c_cos2 = (a - c) / Kokkos::cos(2 * theta); + x = (a + c + a_c_cos2) / 2; + y = (a + c - a_c_cos2) / 2; + } + value_t cos = Kokkos::cos(theta); + value_t sin = Kokkos::sin(theta); + + // Now lets compute the following new values for U amd E.S + // E.S <- R'(theta)^T . E.S . R'(theta) + // U <- U . R'(theta) + + // R'(theta)^T . E.S + for (int j = 0; j < size; j++) + { + value_t es_ipj = es(i, p, j); + value_t es_iqj = es(i, q, j); + es(i, p, j) = cos * es_ipj + sin * es_iqj; + es(i, q, j) = -sin * es_ipj + cos * es_iqj; + } + + // [R'(theta)^T . E.S] . R'(theta) + for (int j = 0; j < size; j++) + { + value_t es_ijp = es(i, j, p); + value_t es_ijq = es(i, j, q); + es(i, j, p) = cos * es_ijp + sin * es_ijq; + es(i, j, q) = -sin * es_ijp + cos * es_ijq; + } + + // U . R'(theta) + for (int j = 0; j < size; j++) + { + value_t u_ijp = u(i, j, p); + value_t u_ijq = u(i, j, q); + u(i, j, p) = cos * u_ijp + sin * u_ijq; + u(i, j, q) = -sin * u_ijp + cos * u_ijq; + } + + // These should theorically hold but is it ok to force them to their + // real value? + es(i, p, p) = x; + es(i, q, q) = y; + es(i, p, q) = 0; + es(i, q, p) = 0; + + norm = spisvdArgmaxOffDiagonal(es, i, p, q); + } + }); + + // ==> Output + // U and E.S are computed, we can now build the inverse + // U.[ E^-1.S ].U^T + Kokkos::View inv( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::SPISVD::inv"), + mats.layout()); + Kokkos::parallel_for( + "Example::SPISVD::inv_fill", + Kokkos::MDRangePolicy>(space, {0, 0, 0}, + {num_matrices, size, size}), + KOKKOS_LAMBDA(int const i, int const j, int const k) { + value_t value = 0; + for (int l = 0; l < size; l++) + { + value_t v = es(i, l, l); + if (Kokkos::abs(v) > epsilon) + { + value += u(i, j, l) * u(i, k, l) / v; + } + } + + inv(i, j, k) = value; + }); + + return inv; +} + +} // namespace Details \ No newline at end of file diff --git a/examples/moving_least_squares/mls_computation.hpp b/examples/moving_least_squares/mls_computation.hpp index 6734d2f1b..d79ba0be9 100644 --- a/examples/moving_least_squares/mls_computation.hpp +++ b/examples/moving_least_squares/mls_computation.hpp @@ -18,7 +18,7 @@ #include -#include "symmetric_pseudoinverse_svd.hpp" +#include "DetailsSymmetricPseudoInverseSVD.hpp" template @@ -61,9 +61,7 @@ class MLSComputation auto p = computeVandermonde(space, source_ref_target); auto a = computeMoment(space, phi, p); - auto a_inv = - SymmPseudoInverseSVD::computePseudoInverses( - space, a); + auto a_inv = Details::symmetricPseudoInverseSVD(space, a); computeCoefficients(space, phi, p, a_inv); } diff --git a/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp b/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp deleted file mode 100644 index e901f660c..000000000 --- a/examples/moving_least_squares/symmetric_pseudoinverse_svd.hpp +++ /dev/null @@ -1,234 +0,0 @@ -/**************************************************************************** - * Copyright (c) 2023 by the ArborX authors * - * All rights reserved. * - * * - * This file is part of the ArborX library. ArborX is * - * distributed under a BSD 3-clause license. For the licensing terms see * - * the LICENSE file in the top-level directory. * - * * - * SPDX-License-Identifier: BSD-3-Clause * - ****************************************************************************/ - -#pragma once - -#include - -#include - -#include -#include -#include - -// Pseudo-inverse moment matrix using SVD -// We must find U, E (diagonal and positive) and V such that A = U.E.V^T -// We also know that A is symmetric (by construction), so U = SV where S is -// a sign matrix (only 1 or -1 in the diagonal, 0 elsewhere). -// Thus A = U.E.S.U^T -template -class SymmPseudoInverseSVD -{ -public: - template - static Kokkos::View - computePseudoInverses(ExecutionSpace const &space, - Kokkos::View const &mats) - { - static_assert( - KokkosExt::is_accessible_from::value); - - SymmPseudoInverseSVD spis(space, mats); - - // Iterative approach, we will "deconstruct" E.S until only the diagonal - // is relevent inside the matrix - // It is possible to prove that, at each step, the "norm" of the matrix - // is strictly less that of the previous - Kokkos::parallel_for( - "Example::SVD::compute_U_ES", - Kokkos::RangePolicy(space, 0, spis._num_matrices), - KOKKOS_LAMBDA(int const i) { - int p, q; - ValueType norm = spis.argmaxOffDiagonal(i, p, q); - while (norm > spis._epsilon) - { - spis.computeUESSingle(i, p, q); - norm = spis.argmaxOffDiagonal(i, p, q); - } - }); - - // From the SVD results, the pseudo inverse would be - // U . [ E^-1.S ] . U^T - Kokkos::parallel_for( - "Example::SVD::fill_inv", - Kokkos::MDRangePolicy>( - space, {0, 0, 0}, {spis._num_matrices, spis._size, spis._size}), - KOKKOS_LAMBDA(int const i, int const j, int const k) { - spis.fillInv(i, j, k); - }); - - return spis._inv; - } - -private: - // U and E.S are computed, we can now build the inverse - // U . [ E^-1.S ] . U^T - KOKKOS_FUNCTION void fillInv(int const i, int const j, int const k) const - { - ValueType value = _zero; - for (int l = 0; l < _size; l++) - { - ValueType v = _es(i, l, l); - if (Kokkos::abs(v) > _epsilon) - { - value += _u(i, j, l) * _u(i, k, l) / v; - } - } - - _inv(i, j, k) = value; - } - - // We found the biggest value in our off-diagonal. We will remove it by - // computing a "local" svd and update U and E.S - KOKKOS_FUNCTION void computeUESSingle(int const i, int const p, - int const q) const - { - ValueType a = _es(i, p, p); - ValueType b = _es(i, p, q); - ValueType c = _es(i, q, q); - - // Our submatrix is now - // +----------+----------+ +---+---+ - // | es(p, p) | es(p, q) | | a | b | - // +----------+----------+ = +---+---+ - // | es(q, p) | es(q, q) | | b | c | - // +----------+----------+ +---+---+ - - // Lets compute u, v and theta such that - // +---+---+ +---+---+ - // | a | b | | u | 0 | - // +---+---+ = R(theta) * +---+---+ * R(theta)^T - // | b | c | | 0 | v | - // +---+---+ +---+---+ - - ValueType theta, u, v; - if (a == c) // <-- better to check if |a - c| < epsilon? - { - theta = _pi_4; - u = a + b; - v = a - b; - } - else - { - theta = _half * Kokkos::atan((_two * b) / (a - c)); - ValueType a_c_cos2 = (a - c) / Kokkos::cos(_two * theta); - u = _half * (a + c + a_c_cos2); - v = _half * (a + c - a_c_cos2); - } - ValueType cos = Kokkos::cos(theta); - ValueType sin = Kokkos::sin(theta); - - // Now lets compute the following new values for U amd E.S - // E.S <- R'(theta)^T . E.S . R'(theta) - // U <- U . R'(theta) - - // R'(theta)^T . E.S - for (int j = 0; j < _size; j++) - { - ValueType es_ipj = _es(i, p, j); - ValueType es_iqj = _es(i, q, j); - _es(i, p, j) = cos * es_ipj + sin * es_iqj; - _es(i, q, j) = -sin * es_ipj + cos * es_iqj; - } - - // [R'(theta)^T . E.S] . R'(theta) - for (int j = 0; j < _size; j++) - { - ValueType es_ijp = _es(i, j, p); - ValueType es_ijq = _es(i, j, q); - _es(i, j, p) = cos * es_ijp + sin * es_ijq; - _es(i, j, q) = -sin * es_ijp + cos * es_ijq; - } - - // U . R'(theta) - for (int j = 0; j < _size; j++) - { - ValueType u_ijp = _u(i, j, p); - ValueType u_ijq = _u(i, j, q); - _u(i, j, p) = cos * u_ijp + sin * u_ijq; - _u(i, j, q) = -sin * u_ijp + cos * u_ijq; - } - - // These should theorically hold but is it ok to force them to their - // real value? - _es(i, p, p) = u; - _es(i, q, q) = v; - _es(i, p, q) = _zero; - _es(i, q, p) = _zero; - } - - // This finds the biggest off-diagonal value of E.S as well as its - // coordinates. Being symmetric, we can always check on the upper - // triangle (and always have q > p) - KOKKOS_FUNCTION ValueType argmaxOffDiagonal(int const i, int &p, int &q) const - { - ValueType max = _zero; - p = q = 0; - for (int j = 0; j < _size; j++) - { - for (int k = j + 1; k < _size; k++) - { - ValueType val = Kokkos::abs(_es(i, j, k)); - if (max < val) - { - max = val; - p = j; - q = k; - } - } - } - - return max; - } - - template - SymmPseudoInverseSVD(ExecutionSpace const &space, - Kokkos::View const &mats) - : _num_matrices(mats.extent(0)) - , _size(mats.extent(1)) - { - // mats must be an array of (symmetric) square matrices - assert(mats.extent(1) == mats.extent(2)); - - _es = Kokkos::View( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::SVD::ES"), - mats.layout()); - Kokkos::deep_copy(space, _es, mats); - - _u = Kokkos::View( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::SVD::U"), - mats.layout()); - Kokkos::parallel_for( - "Example::SVD::U_init", - Kokkos::MDRangePolicy>(space, {0, 0, 0}, - {_num_matrices, _size, _size}), - KOKKOS_LAMBDA(int const i, int const j, int const k) { - _u(i, j, k) = ValueType((j == k)); - }); - - _inv = Kokkos::View( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::SVD::inv"), - mats.layout()); - } - - Kokkos::View _es; - Kokkos::View _u; - Kokkos::View _inv; - std::size_t _num_matrices; - std::size_t _size; - - static constexpr ValueType _pi_4 = ValueType(M_PI_4); - static constexpr ValueType _epsilon = - std::numeric_limits::epsilon(); - static constexpr ValueType _half = ValueType(0.5); - static constexpr ValueType _two = ValueType(2); - static constexpr ValueType _zero = ValueType(0); -}; \ No newline at end of file From 0c6af8134fcef2c308dfb6c5bd01025cb20bdbed Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Fri, 18 Aug 2023 08:53:28 -0400 Subject: [PATCH 36/44] Better MLS computations, more permissive templates --- .../DetailsMovingLeastSquaresComputation.hpp | 206 ++++++++++++++ .../DetailsSymmetricPseudoInverseSVD.hpp | 2 - examples/moving_least_squares/mls.hpp | 8 +- .../moving_least_squares/mls_computation.hpp | 268 ------------------ 4 files changed, 210 insertions(+), 274 deletions(-) create mode 100644 examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp delete mode 100644 examples/moving_least_squares/mls_computation.hpp diff --git a/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp b/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp new file mode 100644 index 000000000..b789b4351 --- /dev/null +++ b/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp @@ -0,0 +1,206 @@ +/**************************************************************************** + * Copyright (c) 2023 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#pragma once + +#include + +#include + +#include + +#include "DetailsSymmetricPseudoInverseSVD.hpp" + +namespace Details +{ + +template +class MovingLeastSquaresComputation +{ +public: + MovingLeastSquaresComputation() = default; + + template + MovingLeastSquaresComputation(ExecutionSpace const &space, + SourcePoints const &source_points, + TargetPoints const &target_points, + PolynomialBasis const &, + RadialBasisFunction const &) + { + using src_acc = ArborX::AccessTraits; + using tgt_acc = ArborX::AccessTraits; + + _num_targets = tgt_acc::size(target_points); + _num_neighbors = src_acc::size(source_points) / _num_targets; + constexpr CoefficientType epsilon = + std::numeric_limits::epsilon(); + constexpr ArborX::Point origin = ArborX::Point{0, 0, 0}; + + // We center each group of points around the target as it ables us to + // optimize the final computation and transfer point types into ours + // TODO: Use multidimensional points! + Kokkos::View source_ref_target( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MLSC::source_ref_target"), + _num_targets, _num_neighbors); + Kokkos::parallel_for( + "Example::MLSC::source_ref_target_fill", + Kokkos::MDRangePolicy>(space, {0, 0}, + {_num_targets, _num_neighbors}), + KOKKOS_LAMBDA(int const i, int const j) { + auto src = src_acc::get(source_points, i * _num_neighbors + j); + auto tgt = tgt_acc::get(target_points, i); + source_ref_target(i, j) = ArborX::Point{ + src[0] - tgt[0], + src[1] - tgt[1], + src[2] - tgt[2], + }; + }); + + // To properly use the RBF, we need to decide for a radius around each + // target point that encapsulates all of the points + Kokkos::View radii( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLSC::radii"), + _num_targets); + Kokkos::parallel_for( + "Example::MLSC::radii_computation", + Kokkos::RangePolicy(space, 0, _num_targets), + KOKKOS_LAMBDA(int const i) { + CoefficientType radius = 10 * epsilon; + + for (int j = 0; j < _num_neighbors; j++) + { + CoefficientType norm = + ArborX::Details::distance(source_ref_target(i, j), origin); + radius = (radius < norm) ? norm : radius; + } + + // The one at the limit would be valued at 0 due to how RBF works + radii(i) = 1.1 * radius; + }); + + // Once the radius is computed, the wieght follows by evaluating the RBF at + // each source point with their proper radii + Kokkos::View phi( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLSC::phi"), + _num_targets, _num_neighbors); + Kokkos::parallel_for( + "Example::MLSC::phi_computation", + Kokkos::MDRangePolicy>(space, {0, 0}, + {_num_targets, _num_neighbors}), + KOKKOS_LAMBDA(int const i, int const j) { + CoefficientType norm = + ArborX::Details::distance(source_ref_target(i, j), origin); + phi(i, j) = RadialBasisFunction::apply(norm / radii(i)); + }); + + // We then need to create the Vandermonde matrix for each source point + // Instead of relying on an external type, could it be produced + // automatically? + Kokkos::View p( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MLSC::vandermonde"), + _num_targets, _num_neighbors, PolynomialBasis::size); + Kokkos::parallel_for( + "Example::MLSC::vandermonde_computation", + Kokkos::MDRangePolicy>(space, {0, 0}, + {_num_targets, _num_neighbors}), + KOKKOS_LAMBDA(int const i, int const j) { + auto basis = PolynomialBasis::basis(source_ref_target(i, j)); + + for (int k = 0; k < PolynomialBasis::size; k++) + { + p(i, j, k) = basis[k]; + } + }); + + // From the weight and Vandermonde matrices, we can compute the moment + // matrix as A = P^T.PHI.P + Kokkos::View a( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MLSC::moment"), + _num_targets, PolynomialBasis::size, PolynomialBasis::size); + Kokkos::parallel_for( + "Example::MLSC::moment_computation", + Kokkos::MDRangePolicy>( + space, {0, 0, 0}, + {_num_targets, PolynomialBasis::size, PolynomialBasis::size}), + KOKKOS_LAMBDA(int const i, int const j, int const k) { + CoefficientType tmp = 0; + + for (int l = 0; l < _num_neighbors; l++) + { + tmp += p(i, l, j) * p(i, l, k) * phi(i, l); + } + + a(i, j, k) = tmp; + }); + + // We then take the pseudo-inverse of that moment matrix. + auto a_inv = symmetricPseudoInverseSVD(space, a); + + // We finally build the coefficients as C = [1 0 0 ...].A^-1.P^T.PHI + _coeffs = Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MLSC::coefficients"), + _num_targets, _num_neighbors); + Kokkos::parallel_for( + "Example::MLSC::coefficients", + Kokkos::MDRangePolicy>(space, {0, 0}, + {_num_targets, _num_neighbors}), + KOKKOS_LAMBDA(int const i, int const j) { + CoefficientType tmp = 0; + + for (int k = 0; k < PolynomialBasis::size; k++) + { + tmp += a_inv(i, 0, k) * p(i, j, k) * phi(i, j); + } + + _coeffs(i, j) = tmp; + }); + } + + template + Kokkos::View + apply(ExecutionSpace const &space, SourceValues const &source_values) + { + using value_t = typename SourceValues::non_const_value_type; + using memory_space = typename SourceValues::memory_space; + + Kokkos::View target_values( + "Example::MLSC::target_values", _num_targets); + Kokkos::parallel_for( + "Example::MLSC::target_interpolation", + Kokkos::RangePolicy(space, 0, _num_targets), + KOKKOS_LAMBDA(int const i) { + value_t tmp = 0; + + for (int j = 0; j < _num_neighbors; j++) + { + tmp += _coeffs(i, j) * source_values(i * _num_neighbors + j); + } + + target_values(i) = tmp; + }); + + return target_values; + } + +private: + Kokkos::View _coeffs; + std::size_t _num_targets; + std::size_t _num_neighbors; +}; + +} // namespace Details diff --git a/examples/moving_least_squares/DetailsSymmetricPseudoInverseSVD.hpp b/examples/moving_least_squares/DetailsSymmetricPseudoInverseSVD.hpp index 1ed19be09..7d697a435 100644 --- a/examples/moving_least_squares/DetailsSymmetricPseudoInverseSVD.hpp +++ b/examples/moving_least_squares/DetailsSymmetricPseudoInverseSVD.hpp @@ -11,8 +11,6 @@ #pragma once -#include - #include #include diff --git a/examples/moving_least_squares/mls.hpp b/examples/moving_least_squares/mls.hpp index 0566515cc..d213056c2 100644 --- a/examples/moving_least_squares/mls.hpp +++ b/examples/moving_least_squares/mls.hpp @@ -18,7 +18,7 @@ #include -#include "mls_computation.hpp" +#include "DetailsMovingLeastSquaresComputation.hpp" #include "mpi_comms.hpp" template @@ -113,8 +113,8 @@ class MLS auto local_source_points = _comms.distributeArborX(space, source_points); // Compute the internal MLS - _mlsc = MLSComputation( - space, local_source_points, target_points); + _mlsc = Details::MovingLeastSquaresComputation( + space, local_source_points, target_points, PolynomialBasis{}, RBF{}); } template @@ -129,7 +129,7 @@ class MLS } private: - MLSComputation _mlsc; + Details::MovingLeastSquaresComputation _mlsc; MPIComms _comms; std::size_t _num_neighbors; std::size_t _src_size; diff --git a/examples/moving_least_squares/mls_computation.hpp b/examples/moving_least_squares/mls_computation.hpp deleted file mode 100644 index d79ba0be9..000000000 --- a/examples/moving_least_squares/mls_computation.hpp +++ /dev/null @@ -1,268 +0,0 @@ -/**************************************************************************** - * Copyright (c) 2023 by the ArborX authors * - * All rights reserved. * - * * - * This file is part of the ArborX library. ArborX is * - * distributed under a BSD 3-clause license. For the licensing terms see * - * the LICENSE file in the top-level directory. * - * * - * SPDX-License-Identifier: BSD-3-Clause * - ****************************************************************************/ - -#pragma once - -#include -#include - -#include - -#include - -#include "DetailsSymmetricPseudoInverseSVD.hpp" - -template -class MLSComputation -{ -public: - MLSComputation() = default; - - template - MLSComputation( - ExecutionSpace const &space, - Kokkos::View< - typename ArborX::Details::AccessTraitsHelper< - ArborX::AccessTraits>::type *, - MemorySpace> const &source_points, - Points const &target_points) - : _num_targets(ArborX::AccessTraits::size( - target_points)) - { - static_assert( - KokkosExt::is_accessible_from::value); - static_assert( - KokkosExt::is_accessible_from< - typename ArborX::AccessTraits::memory_space, - ExecutionSpace>::value); - ArborX::Details::check_valid_access_traits(ArborX::PrimitivesTag{}, - target_points); - - // There must be a list of num_neighbors source points for each - // target point - _num_neighbors = source_points.extent(0) / _num_targets; - assert(source_points.extent(0) == _num_targets * _num_neighbors); - - auto source_ref_target = - translateToTarget(space, source_points, target_points); - - auto radii = computeRadii(space, source_ref_target); - auto phi = computeWeight(space, source_ref_target, radii); - auto p = computeVandermonde(space, source_ref_target); - - auto a = computeMoment(space, phi, p); - auto a_inv = Details::symmetricPseudoInverseSVD(space, a); - - computeCoefficients(space, phi, p, a_inv); - } - - template - Kokkos::View - apply(ExecutionSpace const &space, - Kokkos::View const &source_values) - { - static_assert( - KokkosExt::is_accessible_from::value); - assert(source_values.extent(0) == _num_targets * _num_neighbors); - - Kokkos::View target_values( - "Example::MLSC::target_values", _num_targets); - Kokkos::parallel_for( - "Example::MLSC::target_interpolation", - Kokkos::RangePolicy(space, 0, _num_targets), - KOKKOS_LAMBDA(int const i) { - ValueType tmp = _zero; - for (int j = 0; j < _num_neighbors; j++) - { - tmp += _coeffs(i, j) * source_values(i * _num_neighbors + j); - } - target_values(i) = tmp; - }); - - return target_values; - } - -private: - template - Kokkos::View translateToTarget( - ExecutionSpace const &space, - Kokkos::View< - typename ArborX::Details::AccessTraitsHelper< - ArborX::AccessTraits>::type *, - MemorySpace> const &source_points, - Points const &target_points) - { - using point_t = typename ArborX::Details::AccessTraitsHelper< - ArborX::AccessTraits>::type; - - // We center each group around the target as it ables you to - // optimize the final computation - Kokkos::View source_ref_target( - Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::MLSC::source_ref_target"), - _num_targets, _num_neighbors); - Kokkos::parallel_for( - "Example::MLSC::source_ref_target_fill", - Kokkos::MDRangePolicy>(space, {0, 0}, - {_num_targets, _num_neighbors}), - KOKKOS_LAMBDA(int const i, int const j) { - point_t src = source_points(i * _num_neighbors + j); - point_t tgt = - ArborX::AccessTraits::get( - target_points, i); - source_ref_target(i, j) = ArborX::Point{ - src[0] - tgt[0], - src[1] - tgt[1], - src[2] - tgt[2], - }; - }); - - return source_ref_target; - } - - template - Kokkos::View computeRadii( - ExecutionSpace const &space, - Kokkos::View const &source_ref_target) - { - Kokkos::View radii( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLSC::radii"), - _num_targets); - Kokkos::parallel_for( - "Example::MLSC::radii_computation", - Kokkos::RangePolicy(space, 0, _num_targets), - KOKKOS_LAMBDA(int const i) { - ValueType radius = _ten * _epsilon; - for (int j = 0; j < _num_neighbors; j++) - { - ValueType norm = - ArborX::Details::distance(source_ref_target(i, j), _origin); - radius = (radius < norm) ? norm : radius; - } - radii(i) = _one_extra * radius; - }); - - return radii; - } - - template - Kokkos::View computeWeight( - ExecutionSpace const &space, - Kokkos::View const &source_ref_target, - Kokkos::View const &radii) - { - Kokkos::View phi( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLSC::phi"), - _num_targets, _num_neighbors); - Kokkos::parallel_for( - "Example::MLSC::phi_computation", - Kokkos::MDRangePolicy>(space, {0, 0}, - {_num_targets, _num_neighbors}), - KOKKOS_LAMBDA(int const i, int const j) { - ValueType norm = - ArborX::Details::distance(source_ref_target(i, j), _origin); - phi(i, j) = RBF::apply(norm / radii(i)); - }); - - return phi; - } - - template - Kokkos::View computeVandermonde( - ExecutionSpace const &space, - Kokkos::View const &source_ref_target) - { - // Instead of relying on an external type, could it be produced - // automatically? - Kokkos::View p( - Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::MLSC::vandermonde"), - _num_targets, _num_neighbors, PolynomialBasis::size); - Kokkos::parallel_for( - "Example::MLSC::vandermonde_computation", - Kokkos::MDRangePolicy>(space, {0, 0}, - {_num_targets, _num_neighbors}), - KOKKOS_LAMBDA(int const i, int const j) { - auto basis = PolynomialBasis::basis(source_ref_target(i, j)); - for (int k = 0; k < PolynomialBasis::size; k++) - { - p(i, j, k) = basis[k]; - } - }); - - return p; - } - - template - Kokkos::View - computeMoment(ExecutionSpace const &space, - Kokkos::View const &phi, - Kokkos::View const &p) - { - Kokkos::View a( - Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::MLSC::moment"), - _num_targets, PolynomialBasis::size, PolynomialBasis::size); - Kokkos::parallel_for( - "Example::MLSC::moment_computation", - Kokkos::MDRangePolicy>( - space, {0, 0, 0}, - {_num_targets, PolynomialBasis::size, PolynomialBasis::size}), - KOKKOS_LAMBDA(int const i, int const j, int const k) { - ValueType tmp = _zero; - for (int l = 0; l < _num_neighbors; l++) - { - tmp += p(i, l, j) * p(i, l, k) * phi(i, l); - } - a(i, j, k) = tmp; - }); - - return a; - } - - template - void - computeCoefficients(ExecutionSpace const &space, - Kokkos::View const &phi, - Kokkos::View const &p, - Kokkos::View const &a_inv) - { - _coeffs = Kokkos::View( - Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::MLSC::coefficients"), - _num_targets, _num_neighbors); - Kokkos::parallel_for( - "Example::MLSC::coefficients", - Kokkos::MDRangePolicy>(space, {0, 0}, - {_num_targets, _num_neighbors}), - KOKKOS_LAMBDA(int const i, int const j) { - ValueType tmp = _zero; - for (int k = 0; k < PolynomialBasis::size; k++) - { - tmp += a_inv(i, 0, k) * p(i, j, k) * phi(i, j); - } - _coeffs(i, j) = tmp; - }); - } - - Kokkos::View _coeffs; - std::size_t _num_targets; - std::size_t _num_neighbors; - - static constexpr ValueType _zero = ValueType(0); - static constexpr ValueType _ten = ValueType(10); - static constexpr ValueType _epsilon = - std::numeric_limits::epsilon(); - static constexpr ValueType _one_extra = ValueType(1.1); - static constexpr ArborX::Point _origin = ArborX::Point{0, 0, 0}; -}; \ No newline at end of file From a1bd2917467c22a0da89d4568f44a7b3abb62b01 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Fri, 18 Aug 2023 09:44:32 -0400 Subject: [PATCH 37/44] Proper usage of AccessTraits for MPI comms --- ... DetailsDistributedTreePostQueryComms.hpp} | 151 ++++++++---------- .../DetailsMovingLeastSquaresComputation.hpp | 2 - .../DetailsSymmetricPseudoInverseSVD.hpp | 1 - examples/moving_least_squares/mls.hpp | 29 +--- 4 files changed, 69 insertions(+), 114 deletions(-) rename examples/moving_least_squares/{mpi_comms.hpp => DetailsDistributedTreePostQueryComms.hpp} (61%) diff --git a/examples/moving_least_squares/mpi_comms.hpp b/examples/moving_least_squares/DetailsDistributedTreePostQueryComms.hpp similarity index 61% rename from examples/moving_least_squares/mpi_comms.hpp rename to examples/moving_least_squares/DetailsDistributedTreePostQueryComms.hpp index e39a22f99..6d56aceea 100644 --- a/examples/moving_least_squares/mpi_comms.hpp +++ b/examples/moving_least_squares/DetailsDistributedTreePostQueryComms.hpp @@ -12,31 +12,28 @@ #pragma once #include -#include #include -#include #include #include #include +namespace Details +{ + template -class MPIComms +class DistributedTreePostQueryComms { public: - MPIComms() = default; + DistributedTreePostQueryComms() = default; - template - MPIComms(MPI_Comm comm, ExecutionSpace const &space, - Kokkos::View indices, - Kokkos::View ranks) + template + DistributedTreePostQueryComms(MPI_Comm comm, ExecutionSpace const &space, + IndicesAndRanks const &indices_and_ranks) { - static_assert( - KokkosExt::is_accessible_from::value); - assert(indices.extent(0) == ranks.extent(0)); - std::size_t data_len = indices.extent(0); + std::size_t data_len = indices_and_ranks.extent(0); _comm.reset( [comm]() { @@ -56,8 +53,25 @@ class MPIComms MPI_Comm_rank(*_comm, &rank); Kokkos::View mpi_tmp( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MPI::tmp"), + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::DTPQC::tmp"), + data_len); + + // Split indices/ranks + Kokkos::View indices( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::DTPQC::indices"), data_len); + Kokkos::View ranks( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::DTPQC::ranks"), + data_len); + Kokkos::parallel_for( + "Example::DTPQC::indices_and_ranks_split", + Kokkos::RangePolicy(space, 0, data_len), + KOKKOS_LAMBDA(int const i) { + indices(i) = indices_and_ranks(i).index; + ranks(i) = indices_and_ranks(i).rank; + }); // Computes what will be common to every exchange. Every time // someone wants to get the value from the same set of elements, @@ -75,7 +89,7 @@ class MPIComms // array that rebuilds the output Kokkos::View mpi_rev_indices( Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::MPI::rev_indices"), + "Example::DTPQC::rev_indices"), _num_requests); ArborX::iota(space, mpi_tmp); ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( @@ -85,7 +99,7 @@ class MPIComms // the process owning the source _mpi_send_indices = Kokkos::View( Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::MPI::send_indices"), + "Example::DTPQC::send_indices"), _num_requests); ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( space, distributor_forth, indices, _mpi_send_indices); @@ -94,117 +108,68 @@ class MPIComms // distributor to dispatch the values Kokkos::View mpi_rev_ranks( Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::MPI::rev_ranks"), + "Example::DTPQC::rev_ranks"), _num_requests); Kokkos::deep_copy(space, mpi_tmp, rank); ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( space, distributor_forth, mpi_tmp, mpi_rev_ranks); // This will create the reverse of the previous distributor - _distributor_back = ArborX::Details::Distributor(*_comm); - _num_responses = _distributor_back->createFromSends(space, mpi_rev_ranks); + _distributor = ArborX::Details::Distributor(*_comm); + _num_responses = _distributor->createFromSends(space, mpi_rev_ranks); // There should be enough responses to perfectly fill what was requested - assert(_num_responses == data_len); + // i.e. _num_responses == data_len // The we send back the requested indices so that each process can rebuild - // the output + // their output _mpi_recv_indices = Kokkos::View( Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::MPI::recv_indices"), + "Example::DTPQC::recv_indices"), _num_responses); ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( - space, *_distributor_back, mpi_rev_indices, _mpi_recv_indices); + space, *_distributor, mpi_rev_indices, _mpi_recv_indices); } template Kokkos::View>::type *, - MemorySpace> - distributeArborX(ExecutionSpace const &space, Values const &source) + typename ArborX::AccessTraits< + Values, ArborX::PrimitivesTag>::memory_space> + distribute(ExecutionSpace const &space, Values const &source) { - using value_t = typename ArborX::Details::AccessTraitsHelper< - ArborX::AccessTraits>::type; - static_assert( - KokkosExt::is_accessible_from::value); - static_assert( - KokkosExt::is_accessible_from< - typename ArborX::AccessTraits::memory_space, - ExecutionSpace>::value); - ArborX::Details::check_valid_access_traits(ArborX::PrimitivesTag{}, source); - - assert(_distributor_back.has_value()); + using src_acc = ArborX::AccessTraits; + using value_t = typename ArborX::Details::AccessTraitsHelper::type; + using memory_space = typename src_acc::memory_space; // We know what each process want so we prepare the data to be sent Kokkos::View data_to_send( Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::MPI::data_to_send"), - _num_requests); - Kokkos::parallel_for( - "Example::MPI::data_to_send_fill", - Kokkos::RangePolicy(space, 0, _num_requests), - KOKKOS_CLASS_LAMBDA(int const i) { - data_to_send(i) = - ArborX::AccessTraits::get( - source, _mpi_send_indices(i)); - }); - - return distribute(space, data_to_send); - } - - template - Kokkos::View - distributeView(ExecutionSpace const &space, - Kokkos::View const &source) - { - static_assert( - KokkosExt::is_accessible_from::value); - assert(_distributor_back.has_value()); - - // We know what each process want so we prepare the data to be sent - Kokkos::View data_to_send( - Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::MPI::data_to_send"), + "Example::DTPQC::data_to_send"), _num_requests); Kokkos::parallel_for( - "Example::MPI::data_to_send_fill", + "Example::DTPQC::data_to_send_fill", Kokkos::RangePolicy(space, 0, _num_requests), KOKKOS_CLASS_LAMBDA(int const i) { - data_to_send(i) = source(_mpi_send_indices(i)); + data_to_send(i) = src_acc::get(source, _mpi_send_indices(i)); }); - return distribute(space, data_to_send); - } - -private: - std::shared_ptr _comm; - Kokkos::View _mpi_send_indices; - Kokkos::View _mpi_recv_indices; - std::optional> _distributor_back; - std::size_t _num_requests; - std::size_t _num_responses; - - template - Kokkos::View - distribute(ExecutionSpace const &space, - Kokkos::View const &data_to_send) - { // We properly send the data, and each process has what it wants, but in the // wrong order - Kokkos::View data_to_recv( + Kokkos::View data_to_recv( Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::MPI::data_to_recv"), + "Example::DTPQC::data_to_recv"), _num_responses); ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( - space, *_distributor_back, data_to_send, data_to_recv); + space, *_distributor, data_to_send, data_to_recv); // So we fix this by moving everything - Kokkos::View output( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MPI::output"), + Kokkos::View output( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::DTPQC::output"), _num_responses); Kokkos::parallel_for( - "Example::MPI::output_fill", + "Example::DTPQC::output_fill", Kokkos::RangePolicy(space, 0, _num_responses), KOKKOS_CLASS_LAMBDA(int const i) { output(_mpi_recv_indices(i)) = data_to_recv(i); @@ -212,4 +177,14 @@ class MPIComms return output; } -}; \ No newline at end of file + +private: + std::shared_ptr _comm; + Kokkos::View _mpi_send_indices; + Kokkos::View _mpi_recv_indices; + std::optional> _distributor; + std::size_t _num_requests; + std::size_t _num_responses; +}; + +} // namespace Details diff --git a/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp b/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp index b789b4351..0d089297f 100644 --- a/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp +++ b/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp @@ -15,8 +15,6 @@ #include -#include - #include "DetailsSymmetricPseudoInverseSVD.hpp" namespace Details diff --git a/examples/moving_least_squares/DetailsSymmetricPseudoInverseSVD.hpp b/examples/moving_least_squares/DetailsSymmetricPseudoInverseSVD.hpp index 7d697a435..985ab5bf2 100644 --- a/examples/moving_least_squares/DetailsSymmetricPseudoInverseSVD.hpp +++ b/examples/moving_least_squares/DetailsSymmetricPseudoInverseSVD.hpp @@ -13,7 +13,6 @@ #include -#include #include #include diff --git a/examples/moving_least_squares/mls.hpp b/examples/moving_least_squares/mls.hpp index d213056c2..9f5560dd7 100644 --- a/examples/moving_least_squares/mls.hpp +++ b/examples/moving_least_squares/mls.hpp @@ -18,8 +18,8 @@ #include +#include "DetailsDistributedTreePostQueryComms.hpp" #include "DetailsMovingLeastSquaresComputation.hpp" -#include "mpi_comms.hpp" template struct TargetPoints @@ -90,27 +90,10 @@ class MLS TargetPoints{target_points, _num_neighbors}, index_ranks, offsets); - // Split indices/ranks - Kokkos::View local_indices( - Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::MLS::local_indices"), - _tgt_size * _num_neighbors); - Kokkos::View local_ranks( - Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::MLS::local_ranks"), - _tgt_size * _num_neighbors); - Kokkos::parallel_for( - "Example::MLS::index_ranks_split", - Kokkos::RangePolicy(space, 0, - _tgt_size * _num_neighbors), - KOKKOS_LAMBDA(int const i) { - local_indices(i) = index_ranks(i).index; - local_ranks(i) = index_ranks(i).rank; - }); - // Set up comms and local source points - _comms = MPIComms(comm, space, local_indices, local_ranks); - auto local_source_points = _comms.distributeArborX(space, source_points); + _comms = Details::DistributedTreePostQueryComms(comm, space, + index_ranks); + auto local_source_points = _comms.distribute(space, source_points); // Compute the internal MLS _mlsc = Details::MovingLeastSquaresComputation( @@ -125,12 +108,12 @@ class MLS static_assert( KokkosExt::is_accessible_from::value); assert(source_values.extent(0) == _src_size); - return _mlsc.apply(space, _comms.distributeView(space, source_values)); + return _mlsc.apply(space, _comms.distribute(space, source_values)); } private: Details::MovingLeastSquaresComputation _mlsc; - MPIComms _comms; + Details::DistributedTreePostQueryComms _comms; std::size_t _num_neighbors; std::size_t _src_size; std::size_t _tgt_size; From a7650db0c379775c9e0cdf963ce779ddcce46f5a Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Fri, 18 Aug 2023 10:24:54 -0400 Subject: [PATCH 38/44] Proper MLS public interface --- .../DetailsMovingLeastSquaresComputation.hpp | 2 +- .../MovingLeastSquares.hpp | 108 ++++++++++++++++ examples/moving_least_squares/mls.hpp | 120 ------------------ .../moving_least_squares.cpp | 9 +- 4 files changed, 115 insertions(+), 124 deletions(-) create mode 100644 examples/moving_least_squares/MovingLeastSquares.hpp delete mode 100644 examples/moving_least_squares/mls.hpp diff --git a/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp b/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp index 0d089297f..ebe5951f0 100644 --- a/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp +++ b/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp @@ -20,7 +20,7 @@ namespace Details { -template +template class MovingLeastSquaresComputation { public: diff --git a/examples/moving_least_squares/MovingLeastSquares.hpp b/examples/moving_least_squares/MovingLeastSquares.hpp new file mode 100644 index 000000000..fe78e8bce --- /dev/null +++ b/examples/moving_least_squares/MovingLeastSquares.hpp @@ -0,0 +1,108 @@ +/**************************************************************************** + * Copyright (c) 2023 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#pragma once + +#include + +#include + +#include "DetailsDistributedTreePostQueryComms.hpp" +#include "DetailsMovingLeastSquaresComputation.hpp" + +namespace Details +{ + +// This is done to avoid clashing with another predicate access trait +template +struct TargetPointsPredicateWrapper +{ + Points target_points; + std::size_t num_neighbors; +}; + +} // namespace Details + +template +struct ArborX::AccessTraits, + ArborX::PredicatesTag> +{ + static KOKKOS_FUNCTION std::size_t + size(::Details::TargetPointsPredicateWrapper const &tp) + { + return ArborX::AccessTraits::size( + tp.target_points); + } + + static KOKKOS_FUNCTION auto + get(::Details::TargetPointsPredicateWrapper const &tp, std::size_t i) + { + return ArborX::nearest( + ArborX::AccessTraits::get( + tp.target_points, i), + tp.num_neighbors); + } + + using memory_space = + typename ArborX::AccessTraits::memory_space; +}; + +// Public interface to compute the moving least squares approximation between a +// souce and target point cloud +template +class MovingLeastSquares +{ +public: + template + MovingLeastSquares(MPI_Comm comm, ExecustionSpace const &space, + SourcePoints const &source_points, + TargetPoints const &target_points, + PolynomialBasis const &pb, RadialBasisFunction const &rbf, + std::size_t num_neighbors = PolynomialBasis::size) + { + // Organize the source points as a tree and create the predicates + ArborX::DistributedTree source_tree(comm, space, + source_points); + Details::TargetPointsPredicateWrapper predicates{ + target_points, num_neighbors}; + + // Makes the NN query + Kokkos::View indices_and_ranks( + "Example::MLS::indices_and_ranks", 0); + Kokkos::View offsets("Example::MLS::offsets", 0); + source_tree.query(space, predicates, indices_and_ranks, offsets); + + // Set up comms and collect the points for a local MLS + _comms = Details::DistributedTreePostQueryComms( + comm, space, indices_and_ranks); + auto local_source_points = _comms.distribute(space, source_points); + + // Finally, compute the local MLS for the local target points + _mlsc = Details::MovingLeastSquaresComputation( + space, local_source_points, target_points, pb, rbf); + } + + template + auto apply(ExecutionSpace const &space, SourceValues const &source_values) + { + // Distribute and compute the result + return _mlsc.apply(space, _comms.distribute(space, source_values)); + } + +private: + Details::MovingLeastSquaresComputation + _mlsc; + Details::DistributedTreePostQueryComms _comms; +}; \ No newline at end of file diff --git a/examples/moving_least_squares/mls.hpp b/examples/moving_least_squares/mls.hpp deleted file mode 100644 index 9f5560dd7..000000000 --- a/examples/moving_least_squares/mls.hpp +++ /dev/null @@ -1,120 +0,0 @@ -/**************************************************************************** - * Copyright (c) 2023 by the ArborX authors * - * All rights reserved. * - * * - * This file is part of the ArborX library. ArborX is * - * distributed under a BSD 3-clause license. For the licensing terms see * - * the LICENSE file in the top-level directory. * - * * - * SPDX-License-Identifier: BSD-3-Clause * - ****************************************************************************/ - -#pragma once - -#include -#include - -#include - -#include - -#include "DetailsDistributedTreePostQueryComms.hpp" -#include "DetailsMovingLeastSquaresComputation.hpp" - -template -struct TargetPoints -{ - Points target_points; - std::size_t num_neighbors; -}; - -template -struct ArborX::AccessTraits, ArborX::PredicatesTag> -{ - static KOKKOS_FUNCTION std::size_t size(TargetPoints const &tp) - { - return ArborX::AccessTraits::size( - tp.target_points); - } - - static KOKKOS_FUNCTION auto get(TargetPoints const &tp, std::size_t i) - { - return ArborX::nearest( - ArborX::AccessTraits::get( - tp.target_points, i), - tp.num_neighbors); - } - - using memory_space = - typename ArborX::AccessTraits::memory_space; -}; - -template -class MLS -{ -public: - template - MLS(MPI_Comm comm, ExecutionSpace const &space, Points const &source_points, - Points const &target_points, - std::size_t num_neighbors = PolynomialBasis::size) - : _num_neighbors(num_neighbors) - , _src_size(ArborX::AccessTraits::size( - source_points)) - , _tgt_size(ArborX::AccessTraits::size( - target_points)) - { - static_assert( - KokkosExt::is_accessible_from::value); - static_assert( - KokkosExt::is_accessible_from< - typename ArborX::AccessTraits::memory_space, - ExecutionSpace>::value); - ArborX::Details::check_valid_access_traits(ArborX::PrimitivesTag{}, - source_points); - - // A minimum nuber of source points are needed - assert(_src_size >= _num_neighbors); - - // Organize source points as tree - ArborX::DistributedTree source_tree(comm, space, - source_points); - - // Perform the query - Kokkos::View index_ranks( - "Example::MLS::index_ranks", 0); - Kokkos::View offsets("Example::MLS::offsets", 0); - source_tree.query(space, - TargetPoints{target_points, _num_neighbors}, - index_ranks, offsets); - - // Set up comms and local source points - _comms = Details::DistributedTreePostQueryComms(comm, space, - index_ranks); - auto local_source_points = _comms.distribute(space, source_points); - - // Compute the internal MLS - _mlsc = Details::MovingLeastSquaresComputation( - space, local_source_points, target_points, PolynomialBasis{}, RBF{}); - } - - template - Kokkos::View - apply(ExecutionSpace const &space, - Kokkos::View const &source_values) - { - static_assert( - KokkosExt::is_accessible_from::value); - assert(source_values.extent(0) == _src_size); - return _mlsc.apply(space, _comms.distribute(space, source_values)); - } - -private: - Details::MovingLeastSquaresComputation _mlsc; - Details::DistributedTreePostQueryComms _comms; - std::size_t _num_neighbors; - std::size_t _src_size; - std::size_t _tgt_size; -}; \ No newline at end of file diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index c1cc293c7..f7969731b 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -13,6 +13,8 @@ // (https://github.com/ORNL-CEES/DataTransferKit) // with MLS resolution from // (http://dx.doi.org/10.1016/j.jcp.2015.11.055) +// and +// (A conservative mesh-free approach for fluid-structure interface problems) #include @@ -20,7 +22,7 @@ #include -#include "mls.hpp" +#include "MovingLeastSquares.hpp" #include using ExecutionSpace = Kokkos::DefaultExecutionSpace; @@ -99,8 +101,9 @@ int main(int argc, char *argv[]) Kokkos::deep_copy(space, target_points, target_points_host); // Create the transform from a point cloud to another - MLS mls( - mpi_comm, space, source_points, target_points); + MovingLeastSquares mls( + mpi_comm, space, source_points, target_points, MVPolynomialBasis_3D{}, + RBFWendland_0{}); // Compute source values Kokkos::View source_values("Example::source_values", From 8fc8a75bf0033341c386b2113bd69622c99810af Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Fri, 18 Aug 2023 11:24:50 -0400 Subject: [PATCH 39/44] Extra RBFs --- .../DetailsRadialBasisFunctions.hpp | 73 +++++++++++++++++++ .../MovingLeastSquares.hpp | 4 +- .../moving_least_squares.cpp | 14 +--- 3 files changed, 79 insertions(+), 12 deletions(-) create mode 100644 examples/moving_least_squares/DetailsRadialBasisFunctions.hpp diff --git a/examples/moving_least_squares/DetailsRadialBasisFunctions.hpp b/examples/moving_least_squares/DetailsRadialBasisFunctions.hpp new file mode 100644 index 000000000..9d0d43551 --- /dev/null +++ b/examples/moving_least_squares/DetailsRadialBasisFunctions.hpp @@ -0,0 +1,73 @@ +/**************************************************************************** + * Copyright (c) 2023 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#pragma once + +#include + +#include + +#define RBF_DECL(name) \ + template \ + struct name + +#define RBF_DEF(name, n, func) \ + template <> \ + struct name \ + { \ + template \ + KOKKOS_INLINE_FUNCTION static T apply(T x) \ + { \ + return func; \ + } \ + } + +namespace Details +{ + +RBF_DECL(Wendland); +RBF_DEF(Wendland, 0, (1 - x) * (1 - x)); +RBF_DEF(Wendland, 2, (1 - x) * (1 - x) * (1 - x) * (1 - x) * (4 * x + 1)); +RBF_DEF(Wendland, 4, + (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * + (35 * x * x + 18 * x + 3)); +RBF_DEF(Wendland, 6, + (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * + (1 - x) * (32 * x * x * x + 25 * x * x + 8 * x + 1)); + +RBF_DECL(Wu); +RBF_DEF(Wu, 2, + (1 - x) * (1 - x) * (1 - x) * (1 - x) * + (3 * x * x * x + 12 * x + 16 * x + 4)); +RBF_DEF(Wu, 4, + (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * + (5 * x * x * x * x * x + 30 * x * x * x * x + 72 * x * x * x + + 82 * x * x + 36 * x + 6)); + +RBF_DECL(Buhmann); +RBF_DEF(Buhmann, 2, + 2 * x * x * x * x * log(x) - T(7) / 2 * x * x * x * x + + T(16) / 3 * x * x * x - 2 * x * x + T(1) / 6); +RBF_DEF(Buhmann, 3, + 1 * x * x * x * x * x * x * x * x - T(84) / 5 * x * x * x * x * x * x + + T(1024) / 5 * x * x * x * x * sqrt(x) - 378 * x * x * x * x + + T(1024) / 5 * x * x * x * sqrt(x) - T(84) / 5 * x * x + 1); +RBF_DEF(Buhmann, 4, + T(99) / 35 * x * x * x * x * x * x * x * x - + 132 * x * x * x * x * x * x + + T(9216) / 35 * x * x * x * x * x * sqrt(x) - + T(11264) / 35 * x * x * x * x * sqrt(x) + 198 * x * x * x * x - + T(396) / 5 * x * x + 1); + +} // namespace Details + +#undef RBF_DECL +#undef RBF_DEF \ No newline at end of file diff --git a/examples/moving_least_squares/MovingLeastSquares.hpp b/examples/moving_least_squares/MovingLeastSquares.hpp index fe78e8bce..ce1382cd0 100644 --- a/examples/moving_least_squares/MovingLeastSquares.hpp +++ b/examples/moving_least_squares/MovingLeastSquares.hpp @@ -62,10 +62,10 @@ template class MovingLeastSquares { public: - template - MovingLeastSquares(MPI_Comm comm, ExecustionSpace const &space, + MovingLeastSquares(MPI_Comm comm, ExecutionSpace const &space, SourcePoints const &source_points, TargetPoints const &target_points, PolynomialBasis const &pb, RadialBasisFunction const &rbf, diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index f7969731b..139e1230d 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -20,22 +20,16 @@ #include +#include #include +#include "DetailsRadialBasisFunctions.hpp" #include "MovingLeastSquares.hpp" #include using ExecutionSpace = Kokkos::DefaultExecutionSpace; using MemorySpace = ExecutionSpace::memory_space; -struct RBFWendland_0 -{ - KOKKOS_INLINE_FUNCTION static float apply(float x) - { - return (1.f - x) * (1.f - x); - } -}; - struct MVPolynomialBasis_3D { static constexpr std::size_t size = 10; @@ -51,7 +45,7 @@ struct MVPolynomialBasis_3D // Function to approximate KOKKOS_INLINE_FUNCTION float manufactured_solution(ArborX::Point const &p) { - return Kokkos::sin(p[0]) * p[2] + p[1]; + return p[2] + p[1]; } int main(int argc, char *argv[]) @@ -103,7 +97,7 @@ int main(int argc, char *argv[]) // Create the transform from a point cloud to another MovingLeastSquares mls( mpi_comm, space, source_points, target_points, MVPolynomialBasis_3D{}, - RBFWendland_0{}); + Details::Wendland<0>{}); // Compute source values Kokkos::View source_values("Example::source_values", From f3556deb07f131fc458e65d1b3439df24bcfd6ae Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Mon, 21 Aug 2023 10:22:36 -0400 Subject: [PATCH 40/44] NVCC and CUDA compliance (compilation errors) --- .../DetailsDistributedTreePostQueryComms.hpp | 45 ++-- .../DetailsMovingLeastSquaresComputation.hpp | 232 +++++++++++++----- .../DetailsSymmetricPseudoInverseSVD.hpp | 8 +- .../moving_least_squares.cpp | 4 +- 4 files changed, 202 insertions(+), 87 deletions(-) diff --git a/examples/moving_least_squares/DetailsDistributedTreePostQueryComms.hpp b/examples/moving_least_squares/DetailsDistributedTreePostQueryComms.hpp index 6d56aceea..6c4e7ec34 100644 --- a/examples/moving_least_squares/DetailsDistributedTreePostQueryComms.hpp +++ b/examples/moving_least_squares/DetailsDistributedTreePostQueryComms.hpp @@ -57,21 +57,10 @@ class DistributedTreePostQueryComms data_len); // Split indices/ranks - Kokkos::View indices( - Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::DTPQC::indices"), - data_len); - Kokkos::View ranks( - Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::DTPQC::ranks"), - data_len); - Kokkos::parallel_for( - "Example::DTPQC::indices_and_ranks_split", - Kokkos::RangePolicy(space, 0, data_len), - KOKKOS_LAMBDA(int const i) { - indices(i) = indices_and_ranks(i).index; - ranks(i) = indices_and_ranks(i).rank; - }); + Kokkos::Array, 2> split_indices_ranks = + indicesAndRanksSplit(space, indices_and_ranks, data_len); + Kokkos::View indices = split_indices_ranks[0]; + Kokkos::View ranks = split_indices_ranks[1]; // Computes what will be common to every exchange. Every time // someone wants to get the value from the same set of elements, @@ -178,6 +167,32 @@ class DistributedTreePostQueryComms return output; } + template + static Kokkos::Array, 2> + indicesAndRanksSplit(ExecutionSpace const &space, + IndicesAndRanks const &indices_and_ranks, + std::size_t data_len) + { + Kokkos::View indices( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::DTPQC::indices"), + data_len); + Kokkos::View ranks( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::DTPQC::ranks"), + data_len); + + Kokkos::parallel_for( + "Example::DTPQC::indices_and_ranks_split", + Kokkos::RangePolicy(space, 0, data_len), + KOKKOS_LAMBDA(int const i) { + indices(i) = indices_and_ranks(i).index; + ranks(i) = indices_and_ranks(i).rank; + }); + + return {{indices, ranks}}; + } + private: std::shared_ptr _comm; Kokkos::View _mpi_send_indices; diff --git a/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp b/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp index ebe5951f0..6d37ff690 100644 --- a/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp +++ b/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp @@ -32,31 +32,105 @@ class MovingLeastSquaresComputation MovingLeastSquaresComputation(ExecutionSpace const &space, SourcePoints const &source_points, TargetPoints const &target_points, - PolynomialBasis const &, - RadialBasisFunction const &) + PolynomialBasis const &pb, + RadialBasisFunction const &rbf) { using src_acc = ArborX::AccessTraits; using tgt_acc = ArborX::AccessTraits; _num_targets = tgt_acc::size(target_points); _num_neighbors = src_acc::size(source_points) / _num_targets; - constexpr CoefficientType epsilon = - std::numeric_limits::epsilon(); - constexpr ArborX::Point origin = ArborX::Point{0, 0, 0}; // We center each group of points around the target as it ables us to // optimize the final computation and transfer point types into ours // TODO: Use multidimensional points! + Kokkos::View source_ref_target = + sourceRefTargetFill(space, source_points, target_points, _num_targets, + _num_neighbors); + + // To properly use the RBF, we need to decide for a radius around each + // target point that encapsulates all of the points + Kokkos::View radii = radiiComputation( + space, source_ref_target, _num_targets, _num_neighbors); + + // Once the radius is computed, the wieght follows by evaluating the RBF at + // each source point with their proper radii + Kokkos::View phi = weightComputation( + space, source_ref_target, radii, _num_targets, _num_neighbors, rbf); + + // We then need to create the Vandermonde matrix for each source point + // Instead of relying on an external type, could it be produced + // automatically? + Kokkos::View p = vandermondeComputation( + space, source_ref_target, _num_targets, _num_neighbors, pb); + + // From the weight and Vandermonde matrices, we can compute the moment + // matrix as A = P^T.PHI.P + Kokkos::View a = + momentComputation(space, phi, p, _num_targets, _num_neighbors, pb); + + // We then take the pseudo-inverse of that moment matrix. + Kokkos::View a_inv = + symmetricPseudoInverseSVD(space, a); + + // We finally build the coefficients as C = [1 0 0 ...].A^-1.P^T.PHI + _coeffs = coefficientsComputation(space, phi, p, a_inv, _num_targets, + _num_neighbors, pb); + } + + template + Kokkos::View + apply(ExecutionSpace const &space, SourceValues const &source_values) + { + using value_t = typename SourceValues::non_const_value_type; + using memory_space = typename SourceValues::memory_space; + + std::size_t num_neighbors = _num_neighbors; + Kokkos::View coeffs = _coeffs; + + Kokkos::View target_values( + "Example::MLSC::target_values", _num_targets); + + Kokkos::parallel_for( + "Example::MLSC::target_interpolation", + Kokkos::RangePolicy(space, 0, _num_targets), + KOKKOS_LAMBDA(int const i) { + value_t tmp = 0; + + for (int j = 0; j < num_neighbors; j++) + { + tmp += coeffs(i, j) * source_values(i * num_neighbors + j); + } + + target_values(i) = tmp; + }); + + return target_values; + } + + template + static Kokkos::View + sourceRefTargetFill(ExecutionSpace const &space, + SourcePoints const &source_points, + TargetPoints const &target_points, + std::size_t num_targets, std::size_t num_neighbors) + { + using src_acc = ArborX::AccessTraits; + using tgt_acc = ArborX::AccessTraits; + Kokkos::View source_ref_target( Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLSC::source_ref_target"), - _num_targets, _num_neighbors); + num_targets, num_neighbors); + Kokkos::parallel_for( "Example::MLSC::source_ref_target_fill", - Kokkos::MDRangePolicy>(space, {0, 0}, - {_num_targets, _num_neighbors}), + Kokkos::MDRangePolicy>( + space, {0, 0}, {num_targets, num_neighbors}), KOKKOS_LAMBDA(int const i, int const j) { - auto src = src_acc::get(source_points, i * _num_neighbors + j); + auto src = src_acc::get(source_points, i * num_neighbors + j); auto tgt = tgt_acc::get(target_points, i); source_ref_target(i, j) = ArborX::Point{ src[0] - tgt[0], @@ -65,18 +139,30 @@ class MovingLeastSquaresComputation }; }); - // To properly use the RBF, we need to decide for a radius around each - // target point that encapsulates all of the points + return source_ref_target; + } + + template + static Kokkos::View radiiComputation( + ExecutionSpace const &space, + Kokkos::View const &source_ref_target, + std::size_t num_targets, std::size_t num_neighbors) + { + constexpr CoefficientType epsilon = + std::numeric_limits::epsilon(); + constexpr ArborX::Point origin = ArborX::Point{0, 0, 0}; + Kokkos::View radii( Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLSC::radii"), - _num_targets); + num_targets); + Kokkos::parallel_for( "Example::MLSC::radii_computation", - Kokkos::RangePolicy(space, 0, _num_targets), + Kokkos::RangePolicy(space, 0, num_targets), KOKKOS_LAMBDA(int const i) { CoefficientType radius = 10 * epsilon; - for (int j = 0; j < _num_neighbors; j++) + for (int j = 0; j < num_neighbors; j++) { CoefficientType norm = ArborX::Details::distance(source_ref_target(i, j), origin); @@ -87,32 +173,52 @@ class MovingLeastSquaresComputation radii(i) = 1.1 * radius; }); - // Once the radius is computed, the wieght follows by evaluating the RBF at - // each source point with their proper radii + return radii; + } + + template + static Kokkos::View weightComputation( + ExecutionSpace const &space, + Kokkos::View const &source_ref_target, + Kokkos::View const &radii, + std::size_t num_targets, std::size_t num_neighbors, + RadialBasisFunction const &) + { + constexpr ArborX::Point origin = ArborX::Point{0, 0, 0}; + Kokkos::View phi( Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLSC::phi"), - _num_targets, _num_neighbors); + num_targets, num_neighbors); + Kokkos::parallel_for( "Example::MLSC::phi_computation", - Kokkos::MDRangePolicy>(space, {0, 0}, - {_num_targets, _num_neighbors}), + Kokkos::MDRangePolicy>( + space, {0, 0}, {num_targets, num_neighbors}), KOKKOS_LAMBDA(int const i, int const j) { CoefficientType norm = ArborX::Details::distance(source_ref_target(i, j), origin); phi(i, j) = RadialBasisFunction::apply(norm / radii(i)); }); - // We then need to create the Vandermonde matrix for each source point - // Instead of relying on an external type, could it be produced - // automatically? + return phi; + } + + template + static Kokkos::View vandermondeComputation( + ExecutionSpace const &space, + Kokkos::View const &source_ref_target, + std::size_t num_targets, std::size_t num_neighbors, + PolynomialBasis const &) + { Kokkos::View p( Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLSC::vandermonde"), - _num_targets, _num_neighbors, PolynomialBasis::size); + num_targets, num_neighbors, PolynomialBasis::size); + Kokkos::parallel_for( "Example::MLSC::vandermonde_computation", - Kokkos::MDRangePolicy>(space, {0, 0}, - {_num_targets, _num_neighbors}), + Kokkos::MDRangePolicy>( + space, {0, 0}, {num_targets, num_neighbors}), KOKKOS_LAMBDA(int const i, int const j) { auto basis = PolynomialBasis::basis(source_ref_target(i, j)); @@ -122,21 +228,31 @@ class MovingLeastSquaresComputation } }); - // From the weight and Vandermonde matrices, we can compute the moment - // matrix as A = P^T.PHI.P + return p; + } + + template + static Kokkos::View + momentComputation(ExecutionSpace const &space, + Kokkos::View const &phi, + Kokkos::View const &p, + std::size_t num_targets, std::size_t num_neighbors, + PolynomialBasis const &) + { Kokkos::View a( Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLSC::moment"), - _num_targets, PolynomialBasis::size, PolynomialBasis::size); + num_targets, PolynomialBasis::size, PolynomialBasis::size); + Kokkos::parallel_for( "Example::MLSC::moment_computation", - Kokkos::MDRangePolicy>( + Kokkos::MDRangePolicy>( space, {0, 0, 0}, - {_num_targets, PolynomialBasis::size, PolynomialBasis::size}), + {num_targets, PolynomialBasis::size, PolynomialBasis::size}), KOKKOS_LAMBDA(int const i, int const j, int const k) { CoefficientType tmp = 0; - for (int l = 0; l < _num_neighbors; l++) + for (int l = 0; l < num_neighbors; l++) { tmp += p(i, l, j) * p(i, l, k) * phi(i, l); } @@ -144,18 +260,27 @@ class MovingLeastSquaresComputation a(i, j, k) = tmp; }); - // We then take the pseudo-inverse of that moment matrix. - auto a_inv = symmetricPseudoInverseSVD(space, a); + return a; + } - // We finally build the coefficients as C = [1 0 0 ...].A^-1.P^T.PHI - _coeffs = Kokkos::View( + template + static Kokkos::View coefficientsComputation( + ExecutionSpace const &space, + Kokkos::View const &phi, + Kokkos::View const &p, + Kokkos::View const &a_inv, + std::size_t num_targets, std::size_t num_neighbors, + PolynomialBasis const &) + { + Kokkos::View coeffs( Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLSC::coefficients"), - _num_targets, _num_neighbors); + num_targets, num_neighbors); + Kokkos::parallel_for( - "Example::MLSC::coefficients", - Kokkos::MDRangePolicy>(space, {0, 0}, - {_num_targets, _num_neighbors}), + "Example::MLSC::coefficients_computation", + Kokkos::MDRangePolicy>( + space, {0, 0}, {num_targets, num_neighbors}), KOKKOS_LAMBDA(int const i, int const j) { CoefficientType tmp = 0; @@ -164,35 +289,10 @@ class MovingLeastSquaresComputation tmp += a_inv(i, 0, k) * p(i, j, k) * phi(i, j); } - _coeffs(i, j) = tmp; + coeffs(i, j) = tmp; }); - } - template - Kokkos::View - apply(ExecutionSpace const &space, SourceValues const &source_values) - { - using value_t = typename SourceValues::non_const_value_type; - using memory_space = typename SourceValues::memory_space; - - Kokkos::View target_values( - "Example::MLSC::target_values", _num_targets); - Kokkos::parallel_for( - "Example::MLSC::target_interpolation", - Kokkos::RangePolicy(space, 0, _num_targets), - KOKKOS_LAMBDA(int const i) { - value_t tmp = 0; - - for (int j = 0; j < _num_neighbors; j++) - { - tmp += _coeffs(i, j) * source_values(i * _num_neighbors + j); - } - - target_values(i) = tmp; - }); - - return target_values; + return coeffs; } private: diff --git a/examples/moving_least_squares/DetailsSymmetricPseudoInverseSVD.hpp b/examples/moving_least_squares/DetailsSymmetricPseudoInverseSVD.hpp index 985ab5bf2..6f55db2f5 100644 --- a/examples/moving_least_squares/DetailsSymmetricPseudoInverseSVD.hpp +++ b/examples/moving_least_squares/DetailsSymmetricPseudoInverseSVD.hpp @@ -78,8 +78,8 @@ symmetricPseudoInverseSVD(ExecutionSpace const &space, Matrices const &mats) mats.layout()); Kokkos::parallel_for( "Example::SPISVD::ES_U_init", - Kokkos::MDRangePolicy>(space, {0, 0, 0}, - {num_matrices, size, size}), + Kokkos::MDRangePolicy>( + space, {0, 0, 0}, {num_matrices, size, size}), KOKKOS_LAMBDA(int const i, int const j, int const k) { es(i, j, k) = value_t(mats(i, j, k)); u(i, j, k) = value_t((j == k)); @@ -184,8 +184,8 @@ symmetricPseudoInverseSVD(ExecutionSpace const &space, Matrices const &mats) mats.layout()); Kokkos::parallel_for( "Example::SPISVD::inv_fill", - Kokkos::MDRangePolicy>(space, {0, 0, 0}, - {num_matrices, size, size}), + Kokkos::MDRangePolicy>( + space, {0, 0, 0}, {num_matrices, size, size}), KOKKOS_LAMBDA(int const i, int const j, int const k) { value_t value = 0; for (int l = 0; l < size; l++) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index 139e1230d..287103f0e 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -77,8 +77,8 @@ int main(int argc, char *argv[]) std::size_t thickness = cube_side / mpi_size; Kokkos::parallel_for( "Example::source_points_init", - Kokkos::MDRangePolicy>(space, {0, 0, 0}, - {cube_side, cube_side, thickness}), + Kokkos::MDRangePolicy>( + space, {0, 0, 0}, {cube_side, cube_side, thickness}), KOKKOS_LAMBDA(int const i, int const j, int const k) { source_points(i * cube_side * thickness + j * thickness + k) = ArborX::Point{ From db133c898f7f244dca81d0f459d10afe3153eba6 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Tue, 22 Aug 2023 14:59:05 -0400 Subject: [PATCH 41/44] Automatic polynomial basis generation and better rbf interface --- .../DetailsMovingLeastSquaresComputation.hpp | 44 ++++---- .../DetailsPolynomialBasis.hpp | 100 ++++++++++++++++++ .../DetailsRadialBasisFunctions.hpp | 32 +++--- .../MovingLeastSquares.hpp | 18 ++-- .../moving_least_squares.cpp | 18 +--- 5 files changed, 157 insertions(+), 55 deletions(-) create mode 100644 examples/moving_least_squares/DetailsPolynomialBasis.hpp diff --git a/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp b/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp index 6d37ff690..b16ed4cde 100644 --- a/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp +++ b/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp @@ -15,6 +15,7 @@ #include +#include "DetailsPolynomialBasis.hpp" #include "DetailsSymmetricPseudoInverseSVD.hpp" namespace Details @@ -26,13 +27,13 @@ class MovingLeastSquaresComputation public: MovingLeastSquaresComputation() = default; - template MovingLeastSquaresComputation(ExecutionSpace const &space, SourcePoints const &source_points, TargetPoints const &target_points, - PolynomialBasis const &pb, + PolynomialDegree const &pd, RadialBasisFunction const &rbf) { using src_acc = ArborX::AccessTraits; @@ -41,6 +42,9 @@ class MovingLeastSquaresComputation _num_targets = tgt_acc::size(target_points); _num_neighbors = src_acc::size(source_points) / _num_targets; + static constexpr std::size_t polynomialBasisSize = + polynomialBasisSizeFromAT; + // We center each group of points around the target as it ables us to // optimize the final computation and transfer point types into ours // TODO: Use multidimensional points! @@ -62,12 +66,12 @@ class MovingLeastSquaresComputation // Instead of relying on an external type, could it be produced // automatically? Kokkos::View p = vandermondeComputation( - space, source_ref_target, _num_targets, _num_neighbors, pb); + space, source_ref_target, _num_targets, _num_neighbors, pd); // From the weight and Vandermonde matrices, we can compute the moment // matrix as A = P^T.PHI.P - Kokkos::View a = - momentComputation(space, phi, p, _num_targets, _num_neighbors, pb); + Kokkos::View a = momentComputation( + space, phi, p, _num_targets, _num_neighbors, polynomialBasisSize); // We then take the pseudo-inverse of that moment matrix. Kokkos::View a_inv = @@ -75,7 +79,7 @@ class MovingLeastSquaresComputation // We finally build the coefficients as C = [1 0 0 ...].A^-1.P^T.PHI _coeffs = coefficientsComputation(space, phi, p, a_inv, _num_targets, - _num_neighbors, pb); + _num_neighbors, polynomialBasisSize); } template @@ -203,26 +207,30 @@ class MovingLeastSquaresComputation return phi; } - template + template static Kokkos::View vandermondeComputation( ExecutionSpace const &space, Kokkos::View const &source_ref_target, std::size_t num_targets, std::size_t num_neighbors, - PolynomialBasis const &) + PolynomialDegree const &) { + static constexpr std::size_t polynomialBasisSize = + polynomialBasisSizeFromT; + Kokkos::View p( Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLSC::vandermonde"), - num_targets, num_neighbors, PolynomialBasis::size); + num_targets, num_neighbors, polynomialBasisSize); Kokkos::parallel_for( "Example::MLSC::vandermonde_computation", Kokkos::MDRangePolicy>( space, {0, 0}, {num_targets, num_neighbors}), KOKKOS_LAMBDA(int const i, int const j) { - auto basis = PolynomialBasis::basis(source_ref_target(i, j)); + auto basis = polynomialBasis( + source_ref_target(i, j)); - for (int k = 0; k < PolynomialBasis::size; k++) + for (int k = 0; k < polynomialBasisSize; k++) { p(i, j, k) = basis[k]; } @@ -231,24 +239,24 @@ class MovingLeastSquaresComputation return p; } - template + template static Kokkos::View momentComputation(ExecutionSpace const &space, Kokkos::View const &phi, Kokkos::View const &p, std::size_t num_targets, std::size_t num_neighbors, - PolynomialBasis const &) + std::size_t polynomialBasisSize) { Kokkos::View a( Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLSC::moment"), - num_targets, PolynomialBasis::size, PolynomialBasis::size); + num_targets, polynomialBasisSize, polynomialBasisSize); Kokkos::parallel_for( "Example::MLSC::moment_computation", Kokkos::MDRangePolicy>( space, {0, 0, 0}, - {num_targets, PolynomialBasis::size, PolynomialBasis::size}), + {num_targets, polynomialBasisSize, polynomialBasisSize}), KOKKOS_LAMBDA(int const i, int const j, int const k) { CoefficientType tmp = 0; @@ -263,14 +271,14 @@ class MovingLeastSquaresComputation return a; } - template + template static Kokkos::View coefficientsComputation( ExecutionSpace const &space, Kokkos::View const &phi, Kokkos::View const &p, Kokkos::View const &a_inv, std::size_t num_targets, std::size_t num_neighbors, - PolynomialBasis const &) + std::size_t polynomialBasisSize) { Kokkos::View coeffs( Kokkos::view_alloc(Kokkos::WithoutInitializing, @@ -284,7 +292,7 @@ class MovingLeastSquaresComputation KOKKOS_LAMBDA(int const i, int const j) { CoefficientType tmp = 0; - for (int k = 0; k < PolynomialBasis::size; k++) + for (int k = 0; k < polynomialBasisSize; k++) { tmp += a_inv(i, 0, k) * p(i, j, k) * phi(i, j); } diff --git a/examples/moving_least_squares/DetailsPolynomialBasis.hpp b/examples/moving_least_squares/DetailsPolynomialBasis.hpp new file mode 100644 index 000000000..e4383b632 --- /dev/null +++ b/examples/moving_least_squares/DetailsPolynomialBasis.hpp @@ -0,0 +1,100 @@ +/**************************************************************************** + * Copyright (c) 2023 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#pragma once + +#include + +#include + +#include + +namespace Details +{ + +template +KOKKOS_FUNCTION constexpr Kokkos::Array, Deg> +polynomialBasisColumnSizes() +{ + Kokkos::Array, Deg> arr{}; + + for (std::size_t dim = 0; dim < Dim; dim++) + arr[0][dim] = 1; + for (std::size_t deg = 0; deg < Deg; deg++) + arr[deg][0] = 1; + + for (std::size_t deg = 1; deg < Deg; deg++) + for (std::size_t dim = 1; dim < Dim; dim++) + arr[deg][dim] = arr[deg - 1][dim] + arr[deg][dim - 1]; + + return arr; +} + +template +KOKKOS_FUNCTION constexpr std::size_t polynomialBasisSize() +{ + auto arr = polynomialBasisColumnSizes(); + std::size_t size = 1; + + for (std::size_t deg = 0; deg < Deg; deg++) + for (std::size_t dim = 0; dim < Dim; dim++) + size += arr[deg][dim]; + + return size; +} +template +static constexpr std::size_t polynomialBasisSizeFromT = + polynomialBasisSize, Deg>(); + +template +static constexpr std::size_t polynomialBasisSizeFromAT = + polynomialBasisSizeFromT< + typename ArborX::Details::AccessTraitsHelper< + ArborX::AccessTraits>::type, + Deg>; + +template +KOKKOS_FUNCTION auto polynomialBasis(Point const &p) +{ + static constexpr std::size_t dimension = + ArborX::GeometryTraits::dimension_v; + static constexpr auto column_details = + polynomialBasisColumnSizes(); + using value_t = typename ArborX::GeometryTraits::coordinate_type::type; + + Kokkos::Array()> arr{}; + arr[0] = value_t(1); + + std::size_t prev_col = 0; + std::size_t curr_col = 1; + for (std::size_t deg = 0; deg < Deg; deg++) + { + std::size_t loc_offset = curr_col; + for (std::size_t dim = 0; dim < dimension; dim++) + { + // copy the previous column and multply by p[dim] + for (std::size_t i = 0; i < column_details[deg][dim]; i++) + arr[loc_offset + i] = arr[prev_col + i] * p[dim]; + + loc_offset += column_details[deg][dim]; + } + + prev_col = curr_col; + curr_col = loc_offset; + } + + return arr; +} + +template +static constexpr std::integral_constant degree{}; + +} // namespace Details diff --git a/examples/moving_least_squares/DetailsRadialBasisFunctions.hpp b/examples/moving_least_squares/DetailsRadialBasisFunctions.hpp index 9d0d43551..ad357852d 100644 --- a/examples/moving_least_squares/DetailsRadialBasisFunctions.hpp +++ b/examples/moving_least_squares/DetailsRadialBasisFunctions.hpp @@ -17,11 +17,15 @@ #define RBF_DECL(name) \ template \ - struct name + struct __##name; \ + \ + template \ + static constexpr __##name name \ + {} #define RBF_DEF(name, n, func) \ template <> \ - struct name \ + struct __##name \ { \ template \ KOKKOS_INLINE_FUNCTION static T apply(T x) \ @@ -33,34 +37,34 @@ namespace Details { -RBF_DECL(Wendland); -RBF_DEF(Wendland, 0, (1 - x) * (1 - x)); -RBF_DEF(Wendland, 2, (1 - x) * (1 - x) * (1 - x) * (1 - x) * (4 * x + 1)); -RBF_DEF(Wendland, 4, +RBF_DECL(wendland); +RBF_DEF(wendland, 0, (1 - x) * (1 - x)); +RBF_DEF(wendland, 2, (1 - x) * (1 - x) * (1 - x) * (1 - x) * (4 * x + 1)); +RBF_DEF(wendland, 4, (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * (35 * x * x + 18 * x + 3)); -RBF_DEF(Wendland, 6, +RBF_DEF(wendland, 6, (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * (32 * x * x * x + 25 * x * x + 8 * x + 1)); -RBF_DECL(Wu); -RBF_DEF(Wu, 2, +RBF_DECL(wu); +RBF_DEF(wu, 2, (1 - x) * (1 - x) * (1 - x) * (1 - x) * (3 * x * x * x + 12 * x + 16 * x + 4)); -RBF_DEF(Wu, 4, +RBF_DEF(wu, 4, (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * (5 * x * x * x * x * x + 30 * x * x * x * x + 72 * x * x * x + 82 * x * x + 36 * x + 6)); -RBF_DECL(Buhmann); -RBF_DEF(Buhmann, 2, +RBF_DECL(buhmann); +RBF_DEF(buhmann, 2, 2 * x * x * x * x * log(x) - T(7) / 2 * x * x * x * x + T(16) / 3 * x * x * x - 2 * x * x + T(1) / 6); -RBF_DEF(Buhmann, 3, +RBF_DEF(buhmann, 3, 1 * x * x * x * x * x * x * x * x - T(84) / 5 * x * x * x * x * x * x + T(1024) / 5 * x * x * x * x * sqrt(x) - 378 * x * x * x * x + T(1024) / 5 * x * x * x * sqrt(x) - T(84) / 5 * x * x + 1); -RBF_DEF(Buhmann, 4, +RBF_DEF(buhmann, 4, T(99) / 35 * x * x * x * x * x * x * x * x - 132 * x * x * x * x * x * x + T(9216) / 35 * x * x * x * x * x * sqrt(x) - diff --git a/examples/moving_least_squares/MovingLeastSquares.hpp b/examples/moving_least_squares/MovingLeastSquares.hpp index ce1382cd0..0f44b25a3 100644 --- a/examples/moving_least_squares/MovingLeastSquares.hpp +++ b/examples/moving_least_squares/MovingLeastSquares.hpp @@ -17,6 +17,7 @@ #include "DetailsDistributedTreePostQueryComms.hpp" #include "DetailsMovingLeastSquaresComputation.hpp" +#include "DetailsPolynomialBasis.hpp" namespace Details { @@ -62,14 +63,15 @@ template class MovingLeastSquares { public: - template - MovingLeastSquares(MPI_Comm comm, ExecutionSpace const &space, - SourcePoints const &source_points, - TargetPoints const &target_points, - PolynomialBasis const &pb, RadialBasisFunction const &rbf, - std::size_t num_neighbors = PolynomialBasis::size) + MovingLeastSquares( + MPI_Comm comm, ExecutionSpace const &space, + SourcePoints const &source_points, TargetPoints const &target_points, + PolynomialDegree const &pd, RadialBasisFunction const &rbf, + std::size_t num_neighbors = Details::polynomialBasisSizeFromAT< + SourcePoints, PolynomialDegree::value>) { // Organize the source points as a tree and create the predicates ArborX::DistributedTree source_tree(comm, space, @@ -91,7 +93,7 @@ class MovingLeastSquares // Finally, compute the local MLS for the local target points _mlsc = Details::MovingLeastSquaresComputation( - space, local_source_points, target_points, pb, rbf); + space, local_source_points, target_points, pd, rbf); } template diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index 287103f0e..d4c364e8e 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -30,18 +30,6 @@ using ExecutionSpace = Kokkos::DefaultExecutionSpace; using MemorySpace = ExecutionSpace::memory_space; -struct MVPolynomialBasis_3D -{ - static constexpr std::size_t size = 10; - - KOKKOS_INLINE_FUNCTION static Kokkos::Array - basis(ArborX::Point const &p) - { - return {{1.f, p[0], p[1], p[2], p[0] * p[0], p[0] * p[1], p[0] * p[2], - p[1] * p[1], p[1] * p[2], p[2] * p[2]}}; - } -}; - // Function to approximate KOKKOS_INLINE_FUNCTION float manufactured_solution(ArborX::Point const &p) { @@ -95,9 +83,9 @@ int main(int argc, char *argv[]) Kokkos::deep_copy(space, target_points, target_points_host); // Create the transform from a point cloud to another - MovingLeastSquares mls( - mpi_comm, space, source_points, target_points, MVPolynomialBasis_3D{}, - Details::Wendland<0>{}); + MovingLeastSquares mls(mpi_comm, space, source_points, + target_points, Details::degree<2>, + Details::wendland<0>); // Compute source values Kokkos::View source_values("Example::source_values", From bc388f873d1916769d4fea5e329207b38f3f5db1 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Tue, 22 Aug 2023 16:37:59 -0400 Subject: [PATCH 42/44] Hypergeometry (only works in 3D) --- .../DetailsMovingLeastSquaresComputation.hpp | 56 ++++++++++++------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp b/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp index b16ed4cde..218dac4b7 100644 --- a/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp +++ b/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp @@ -21,6 +21,16 @@ namespace Details { +template +using PointEquivalence = ArborX::ExperimentalHyperGeometry::Point< + ArborX::GeometryTraits::dimension_v, + typename ArborX::GeometryTraits::coordinate_type::type>; + +template +using PointEquivalenceFromAT = + PointEquivalence>::type>; + template class MovingLeastSquaresComputation { @@ -38,6 +48,7 @@ class MovingLeastSquaresComputation { using src_acc = ArborX::AccessTraits; using tgt_acc = ArborX::AccessTraits; + using point_t = PointEquivalenceFromAT; _num_targets = tgt_acc::size(target_points); _num_neighbors = src_acc::size(source_points) / _num_targets; @@ -48,7 +59,7 @@ class MovingLeastSquaresComputation // We center each group of points around the target as it ables us to // optimize the final computation and transfer point types into ours // TODO: Use multidimensional points! - Kokkos::View source_ref_target = + Kokkos::View source_ref_target = sourceRefTargetFill(space, source_points, target_points, _num_targets, _num_neighbors); @@ -115,7 +126,7 @@ class MovingLeastSquaresComputation template - static Kokkos::View + static Kokkos::View **, MemorySpace> sourceRefTargetFill(ExecutionSpace const &space, SourcePoints const &source_points, TargetPoints const &target_points, @@ -123,8 +134,9 @@ class MovingLeastSquaresComputation { using src_acc = ArborX::AccessTraits; using tgt_acc = ArborX::AccessTraits; + using point_t = PointEquivalenceFromAT; - Kokkos::View source_ref_target( + Kokkos::View source_ref_target( Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLSC::source_ref_target"), num_targets, num_neighbors); @@ -136,25 +148,26 @@ class MovingLeastSquaresComputation KOKKOS_LAMBDA(int const i, int const j) { auto src = src_acc::get(source_points, i * num_neighbors + j); auto tgt = tgt_acc::get(target_points, i); - source_ref_target(i, j) = ArborX::Point{ - src[0] - tgt[0], - src[1] - tgt[1], - src[2] - tgt[2], - }; + point_t t{}; + + for (int k = 0; k < ArborX::GeometryTraits::dimension_v; k++) + t[k] = src[k] - tgt[k]; + + source_ref_target(i, j) = t; }); return source_ref_target; } - template - static Kokkos::View radiiComputation( - ExecutionSpace const &space, - Kokkos::View const &source_ref_target, - std::size_t num_targets, std::size_t num_neighbors) + template + static Kokkos::View + radiiComputation(ExecutionSpace const &space, + Kokkos::View const &source_ref_target, + std::size_t num_targets, std::size_t num_neighbors) { constexpr CoefficientType epsilon = std::numeric_limits::epsilon(); - constexpr ArborX::Point origin = ArborX::Point{0, 0, 0}; + constexpr Point origin{}; Kokkos::View radii( Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLSC::radii"), @@ -180,15 +193,16 @@ class MovingLeastSquaresComputation return radii; } - template + template static Kokkos::View weightComputation( ExecutionSpace const &space, - Kokkos::View const &source_ref_target, + Kokkos::View const &source_ref_target, Kokkos::View const &radii, std::size_t num_targets, std::size_t num_neighbors, RadialBasisFunction const &) { - constexpr ArborX::Point origin = ArborX::Point{0, 0, 0}; + constexpr Point origin{}; Kokkos::View phi( Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLSC::phi"), @@ -207,15 +221,15 @@ class MovingLeastSquaresComputation return phi; } - template + template static Kokkos::View vandermondeComputation( ExecutionSpace const &space, - Kokkos::View const &source_ref_target, + Kokkos::View const &source_ref_target, std::size_t num_targets, std::size_t num_neighbors, PolynomialDegree const &) { static constexpr std::size_t polynomialBasisSize = - polynomialBasisSizeFromT; + polynomialBasisSizeFromT; Kokkos::View p( Kokkos::view_alloc(Kokkos::WithoutInitializing, @@ -227,7 +241,7 @@ class MovingLeastSquaresComputation Kokkos::MDRangePolicy>( space, {0, 0}, {num_targets, num_neighbors}), KOKKOS_LAMBDA(int const i, int const j) { - auto basis = polynomialBasis( + auto basis = polynomialBasis( source_ref_target(i, j)); for (int k = 0; k < polynomialBasisSize; k++) From 220407cce1deb0c5349836a590583ab675801ff4 Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Wed, 23 Aug 2023 12:08:42 -0400 Subject: [PATCH 43/44] Using point clouds creation and gathering of the maximum error --- .../DetailsMovingLeastSquaresComputation.hpp | 4 +- .../DetailsPolynomialBasis.hpp | 1 + .../moving_least_squares.cpp | 147 ++++++++++++------ 3 files changed, 101 insertions(+), 51 deletions(-) diff --git a/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp b/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp index 218dac4b7..f3dad773b 100644 --- a/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp +++ b/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp @@ -105,7 +105,9 @@ class MovingLeastSquaresComputation Kokkos::View coeffs = _coeffs; Kokkos::View target_values( - "Example::MLSC::target_values", _num_targets); + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MLSC::target_values"), + _num_targets); Kokkos::parallel_for( "Example::MLSC::target_interpolation", diff --git a/examples/moving_least_squares/DetailsPolynomialBasis.hpp b/examples/moving_least_squares/DetailsPolynomialBasis.hpp index e4383b632..14dfde948 100644 --- a/examples/moving_least_squares/DetailsPolynomialBasis.hpp +++ b/examples/moving_least_squares/DetailsPolynomialBasis.hpp @@ -50,6 +50,7 @@ KOKKOS_FUNCTION constexpr std::size_t polynomialBasisSize() return size; } + template static constexpr std::size_t polynomialBasisSizeFromT = polynomialBasisSize, Deg>(); diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index d4c364e8e..7b422f98c 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -20,9 +20,12 @@ #include +#include #include -#include +#include +#include +#include "../../benchmarks/point_clouds/point_clouds.hpp" #include "DetailsRadialBasisFunctions.hpp" #include "MovingLeastSquares.hpp" #include @@ -33,7 +36,7 @@ using MemorySpace = ExecutionSpace::memory_space; // Function to approximate KOKKOS_INLINE_FUNCTION float manufactured_solution(ArborX::Point const &p) { - return p[2] + p[1]; + return Kokkos::cos(5 * p[2]) * p[0] + p[1] + 1; } int main(int argc, char *argv[]) @@ -41,45 +44,64 @@ int main(int argc, char *argv[]) MPI_Init(&argc, &argv); Kokkos::ScopeGuard guard(argc, argv); - constexpr std::size_t cube_side = 20; - constexpr std::size_t source_points_num = cube_side * cube_side * cube_side; - constexpr std::size_t target_points_num = 4; - ExecutionSpace space{}; MPI_Comm mpi_comm = MPI_COMM_WORLD; int mpi_size, mpi_rank; MPI_Comm_size(mpi_comm, &mpi_size); MPI_Comm_rank(mpi_comm, &mpi_rank); - std::size_t local_source_points_num = source_points_num / mpi_size; + static constexpr std::size_t total_source_points = 1024 * 512; + std::size_t local_source_points_num = total_source_points / mpi_size; + static constexpr std::size_t total_target_points = 1024; + std::size_t local_target_points_num = total_target_points / mpi_size; + static constexpr double cube_side = 5; Kokkos::View source_points( Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::source_points"), local_source_points_num); + auto source_points_host = Kokkos::create_mirror_view(source_points); Kokkos::View target_points( Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::target_points"), - target_points_num); + local_source_points_num); auto target_points_host = Kokkos::create_mirror_view(target_points); - // Generate source points (Organized within a [-10, 10]^3 cube) - std::size_t thickness = cube_side / mpi_size; - Kokkos::parallel_for( - "Example::source_points_init", - Kokkos::MDRangePolicy>( - space, {0, 0, 0}, {cube_side, cube_side, thickness}), - KOKKOS_LAMBDA(int const i, int const j, int const k) { - source_points(i * cube_side * thickness + j * thickness + - k) = ArborX::Point{ - 20.f * (float(i) / (cube_side - 1) - .5f), - 20.f * (float(j) / (cube_side - 1) - .5f), - 20.f * (float(k + thickness * mpi_rank) / (cube_side - 1) - .5f)}; - }); + // source and target points are within a 5x5x5 cube + if (mpi_rank == 0) + { + Kokkos::View all_source_points( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::all_source_points"), + total_source_points); + filledBoxCloud(cube_side / 2, all_source_points); + MPI_Scatter( + all_source_points.data(), local_source_points_num * 3 * sizeof(float), + MPI_BYTE, source_points_host.data(), + local_source_points_num * 3 * sizeof(float), MPI_BYTE, 0, mpi_comm); + + Kokkos::View all_target_points( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::all_target_points"), + total_target_points); + filledBoxCloud(cube_side / 2, all_target_points); + MPI_Scatter( + all_target_points.data(), local_target_points_num * 3 * sizeof(float), + MPI_BYTE, target_points_host.data(), + local_target_points_num * 3 * sizeof(float), MPI_BYTE, 0, mpi_comm); + } + else + { + MPI_Scatter(nullptr, local_source_points_num * 3 * sizeof(float), MPI_BYTE, + source_points_host.data(), + local_source_points_num * 3 * sizeof(float), MPI_BYTE, 0, + mpi_comm); + + MPI_Scatter(nullptr, local_target_points_num * 3 * sizeof(float), MPI_BYTE, + target_points_host.data(), + local_target_points_num * 3 * sizeof(float), MPI_BYTE, 0, + mpi_comm); + } - // Generate target points - target_points_host(0) = ArborX::Point{1.f, 0.f, 1.f}; - target_points_host(1) = ArborX::Point{5.f, 5.f, 5.f}; - target_points_host(2) = ArborX::Point{-5.f, 5.f, 3.f}; - target_points_host(3) = ArborX::Point{1.f, -3.3f, 7.f}; + Kokkos::deep_copy(space, source_points, source_points_host); Kokkos::deep_copy(space, target_points, target_points_host); // Create the transform from a point cloud to another @@ -99,41 +121,66 @@ int main(int argc, char *argv[]) // Compute target values from source ones auto target_values = mls.apply(space, source_values); + auto target_values_host = Kokkos::create_mirror_view(target_values); + Kokkos::deep_copy(space, target_values_host, target_values); // Compute target values via evaluation Kokkos::View target_values_exact( - "Example::target_values_exact", target_points_num); + "Example::target_values_exact", local_target_points_num); Kokkos::parallel_for( "Example::target_evaluation", - Kokkos::RangePolicy(space, 0, target_points_num), + Kokkos::RangePolicy(space, 0, local_target_points_num), KOKKOS_LAMBDA(int const i) { target_values_exact(i) = manufactured_solution(target_points(i)); }); - // Show difference - auto target_values_host = Kokkos::create_mirror_view(target_values); - Kokkos::deep_copy(space, target_values_host, target_values); - auto target_values_exact_host = - Kokkos::create_mirror_view(target_values_exact); - Kokkos::deep_copy(space, target_values_exact_host, target_values_exact); - - std::stringstream ss{}; - float error = 0.f; - for (int i = 0; i < target_points_num; i++) + // Compute local error + static constexpr float epsilon = std::numeric_limits::epsilon(); + using ErrType = typename Kokkos::MaxLoc::value_type; + ErrType error{0, 0}; + Kokkos::parallel_reduce( + "Example::error_computation", + Kokkos::RangePolicy(space, 0, local_target_points_num), + KOKKOS_LAMBDA(int const i, ErrType &loc_error) { + float abs_error = + Kokkos::abs(target_values(i) - target_values_exact(i)); + float abs_value = Kokkos::abs(target_values_exact(i)) + + epsilon; + + if (loc_error.val < abs_error / abs_value) + { + loc_error.val = abs_error / abs_value; + loc_error.loc = i; + } + }, + Kokkos::MaxLoc(error)); + + std::tuple error_obj{ + error.val, target_points_host(error.loc), target_values_host(error.loc)}; + + if (mpi_rank == 0) { - error = Kokkos::max( - Kokkos::abs(target_values_host(i) - target_values_exact_host(i)) / - Kokkos::abs(target_values_exact_host(i)), - error); - - ss << mpi_rank << ": ==== Target " << i << '\n' - << mpi_rank << ": Interpolation: " << target_values_host(i) << '\n' - << mpi_rank << ": Real value : " << target_values_exact_host(i) - << '\n'; + std::vector all_error_obj(mpi_size); + MPI_Gather(&error_obj, sizeof(decltype(error_obj)), MPI_BYTE, + all_error_obj.data(), sizeof(decltype(error_obj)), MPI_BYTE, 0, + mpi_comm); + + for (int i = 0; i < mpi_size; i++) + if (std::get<0>(error_obj) < std::get<0>(all_error_obj[i])) + error_obj = all_error_obj[i]; + + float error = std::get<0>(error_obj), approx = std::get<2>(error_obj); + auto point = std::get<1>(error_obj); + std::cout << "Maximum error: " << error << " at point " << point[0] << ", " + << point[1] << ", " << point[2] + << "\nTrue value: " << manufactured_solution(point) + << "\nComputed: " << approx << std::endl; + } + else + { + MPI_Gather(&error_obj, sizeof(decltype(error_obj)), MPI_BYTE, nullptr, + sizeof(decltype(error_obj)), MPI_BYTE, 0, mpi_comm); } - ss << mpi_rank << ": Maximum relative error: " << error << std::endl; - - std::cout << ss.str(); MPI_Finalize(); return 0; From dcdedd3077e062b0275a1f32008b842402bdbdaf Mon Sep 17 00:00:00 2001 From: Yohann Bosqued Date: Wed, 23 Aug 2023 16:03:04 -0400 Subject: [PATCH 44/44] Back and forth MLS --- .../moving_least_squares.cpp | 292 +++++++++++------- 1 file changed, 184 insertions(+), 108 deletions(-) diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp index 7b422f98c..2a7b75583 100644 --- a/examples/moving_least_squares/moving_least_squares.cpp +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -9,19 +9,11 @@ * SPDX-License-Identifier: BSD-3-Clause * ****************************************************************************/ -// Example taken from DataTransferKit -// (https://github.com/ORNL-CEES/DataTransferKit) -// with MLS resolution from -// (http://dx.doi.org/10.1016/j.jcp.2015.11.055) -// and -// (A conservative mesh-free approach for fluid-structure interface problems) - #include #include #include -#include #include #include @@ -33,137 +25,160 @@ using ExecutionSpace = Kokkos::DefaultExecutionSpace; using MemorySpace = ExecutionSpace::memory_space; +using HostExecutionSpace = Kokkos::DefaultHostExecutionSpace; +using HostMemorySpace = HostExecutionSpace::memory_space; + // Function to approximate -KOKKOS_INLINE_FUNCTION float manufactured_solution(ArborX::Point const &p) +struct Step { - return Kokkos::cos(5 * p[2]) * p[0] + p[1] + 1; -} + KOKKOS_INLINE_FUNCTION static float eval(ArborX::Point const &p) + { + return !Kokkos::signbit(p[0]) * 1.f; + } -int main(int argc, char *argv[]) -{ - MPI_Init(&argc, &argv); - Kokkos::ScopeGuard guard(argc, argv); + template + static Kokkos::View + map(ExecutionSpace const &space, + Kokkos::View const &ps) + { + Kokkos::View evals("Example::evals", ps.extent(0)); + Kokkos::parallel_for( + "Example::evaluation", + Kokkos::RangePolicy(space, 0, ps.extent(0)), + KOKKOS_LAMBDA(int const i) { evals(i) = eval(ps(i)); }); + return evals; + } +}; - ExecutionSpace space{}; - MPI_Comm mpi_comm = MPI_COMM_WORLD; +Kokkos::Array, 2> +createPointClouds(HostExecutionSpace const &hspace, ExecutionSpace const &space, + MPI_Comm comm, std::size_t points_num) +{ int mpi_size, mpi_rank; - MPI_Comm_size(mpi_comm, &mpi_size); - MPI_Comm_rank(mpi_comm, &mpi_rank); + MPI_Comm_size(comm, &mpi_size); + MPI_Comm_rank(comm, &mpi_rank); + + Kokkos::Array, 2> + point_clouds_host{Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::points_cloud_0"), + points_num), + Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::points_cloud_1"), + points_num)}; - static constexpr std::size_t total_source_points = 1024 * 512; - std::size_t local_source_points_num = total_source_points / mpi_size; - static constexpr std::size_t total_target_points = 1024; - std::size_t local_target_points_num = total_target_points / mpi_size; - static constexpr double cube_side = 5; - - Kokkos::View source_points( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::source_points"), - local_source_points_num); - auto source_points_host = Kokkos::create_mirror_view(source_points); - Kokkos::View target_points( - Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::target_points"), - local_source_points_num); - auto target_points_host = Kokkos::create_mirror_view(target_points); - - // source and target points are within a 5x5x5 cube if (mpi_rank == 0) { - Kokkos::View all_source_points( - Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::all_source_points"), - total_source_points); - filledBoxCloud(cube_side / 2, all_source_points); - MPI_Scatter( - all_source_points.data(), local_source_points_num * 3 * sizeof(float), - MPI_BYTE, source_points_host.data(), - local_source_points_num * 3 * sizeof(float), MPI_BYTE, 0, mpi_comm); - - Kokkos::View all_target_points( - Kokkos::view_alloc(Kokkos::WithoutInitializing, - "Example::all_target_points"), - total_target_points); - filledBoxCloud(cube_side / 2, all_target_points); - MPI_Scatter( - all_target_points.data(), local_target_points_num * 3 * sizeof(float), - MPI_BYTE, target_points_host.data(), - local_target_points_num * 3 * sizeof(float), MPI_BYTE, 0, mpi_comm); + Kokkos::Array, 2> + all_point_clouds{Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::all_points_cloud_0"), + points_num * mpi_size), + Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::all_points_cloud_1"), + points_num * mpi_size)}; + + filledBoxCloud(.5, all_point_clouds[0]); + filledBoxCloud(.5, all_point_clouds[1]); + + MPI_Scatter(all_point_clouds[0].data(), points_num * 3 * sizeof(float), + MPI_BYTE, point_clouds_host[0].data(), + points_num * 3 * sizeof(float), MPI_BYTE, 0, comm); + MPI_Scatter(all_point_clouds[1].data(), points_num * 3 * sizeof(float), + MPI_BYTE, point_clouds_host[1].data(), + points_num * 3 * sizeof(float), MPI_BYTE, 0, comm); } else { - MPI_Scatter(nullptr, local_source_points_num * 3 * sizeof(float), MPI_BYTE, - source_points_host.data(), - local_source_points_num * 3 * sizeof(float), MPI_BYTE, 0, - mpi_comm); - - MPI_Scatter(nullptr, local_target_points_num * 3 * sizeof(float), MPI_BYTE, - target_points_host.data(), - local_target_points_num * 3 * sizeof(float), MPI_BYTE, 0, - mpi_comm); + MPI_Scatter(nullptr, 0, MPI_BYTE, point_clouds_host[0].data(), + points_num * 3 * sizeof(float), MPI_BYTE, 0, comm); + MPI_Scatter(nullptr, 0, MPI_BYTE, point_clouds_host[1].data(), + points_num * 3 * sizeof(float), MPI_BYTE, 0, comm); } - Kokkos::deep_copy(space, source_points, source_points_host); - Kokkos::deep_copy(space, target_points, target_points_host); - - // Create the transform from a point cloud to another - MovingLeastSquares mls(mpi_comm, space, source_points, - target_points, Details::degree<2>, - Details::wendland<0>); - - // Compute source values - Kokkos::View source_values("Example::source_values", - local_source_points_num); Kokkos::parallel_for( - "Example::source_evaluation", - Kokkos::RangePolicy(space, 0, local_source_points_num), + "Example::flatten_points", + Kokkos::RangePolicy(hspace, 0, points_num), KOKKOS_LAMBDA(int const i) { - source_values(i) = manufactured_solution(source_points(i)); + point_clouds_host[0](i)[2] = 0; + point_clouds_host[1](i)[2] = 0; }); - // Compute target values from source ones - auto target_values = mls.apply(space, source_values); - auto target_values_host = Kokkos::create_mirror_view(target_values); - Kokkos::deep_copy(space, target_values_host, target_values); + Kokkos::Array, 2> point_clouds{ + Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::points_cloud_0"), + points_num), + Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::points_cloud_1"), + points_num)}; + Kokkos::deep_copy(space, point_clouds[0], point_clouds_host[0]); + Kokkos::deep_copy(space, point_clouds[1], point_clouds_host[1]); - // Compute target values via evaluation - Kokkos::View target_values_exact( - "Example::target_values_exact", local_target_points_num); - Kokkos::parallel_for( - "Example::target_evaluation", - Kokkos::RangePolicy(space, 0, local_target_points_num), - KOKKOS_LAMBDA(int const i) { - target_values_exact(i) = manufactured_solution(target_points(i)); - }); + return point_clouds; +} + +template +Kokkos::Array, 2> createMLSObjects( + MPI_Comm comm, ExecutionSpace const &space, + Kokkos::View const &point_clouds_0, + Kokkos::View const &point_clouds_1, + Deg const °, RBF const &rbf) +{ + return {MovingLeastSquares(comm, space, point_clouds_0, + point_clouds_1, deg, rbf), + MovingLeastSquares(comm, space, point_clouds_1, + point_clouds_0, deg, rbf)}; +} + +void doError(MPI_Comm comm, ExecutionSpace const &space, + Kokkos::View const &points, + Kokkos::View const &approx, + Kokkos::View const &values) +{ + int mpi_size, mpi_rank; + MPI_Comm_size(comm, &mpi_size); + MPI_Comm_rank(comm, &mpi_rank); // Compute local error - static constexpr float epsilon = std::numeric_limits::epsilon(); using ErrType = typename Kokkos::MaxLoc::value_type; ErrType error{0, 0}; + float error_sum = 0; Kokkos::parallel_reduce( "Example::error_computation", - Kokkos::RangePolicy(space, 0, local_target_points_num), - KOKKOS_LAMBDA(int const i, ErrType &loc_error) { - float abs_error = - Kokkos::abs(target_values(i) - target_values_exact(i)); - float abs_value = Kokkos::abs(target_values_exact(i)) + - epsilon; - - if (loc_error.val < abs_error / abs_value) + Kokkos::RangePolicy(space, 0, approx.extent(0)), + KOKKOS_LAMBDA(int const i, ErrType &loc_error, float &loc_error_sum) { + float abs_error = Kokkos::abs(approx(i) - values(i)); + + loc_error_sum += abs_error; + if (loc_error.val < abs_error) { - loc_error.val = abs_error / abs_value; + loc_error.val = abs_error; loc_error.loc = i; } }, - Kokkos::MaxLoc(error)); + Kokkos::MaxLoc(error), Kokkos::Sum(error_sum)); + + auto approx_host = Kokkos::create_mirror_view(approx); + auto points_host = Kokkos::create_mirror_view(points); + Kokkos::deep_copy(space, approx_host, approx); + Kokkos::deep_copy(space, points_host, points); std::tuple error_obj{ - error.val, target_points_host(error.loc), target_values_host(error.loc)}; + error.val, points_host(error.loc), approx_host(error.loc)}; + // Compute global error if (mpi_rank == 0) { + float error_sum_global; std::vector all_error_obj(mpi_size); MPI_Gather(&error_obj, sizeof(decltype(error_obj)), MPI_BYTE, all_error_obj.data(), sizeof(decltype(error_obj)), MPI_BYTE, 0, - mpi_comm); + comm); + MPI_Reduce(&error_sum, &error_sum_global, 1, MPI_FLOAT, MPI_SUM, 0, comm); for (int i = 0; i < mpi_size; i++) if (std::get<0>(error_obj) < std::get<0>(all_error_obj[i])) @@ -171,15 +186,76 @@ int main(int argc, char *argv[]) float error = std::get<0>(error_obj), approx = std::get<2>(error_obj); auto point = std::get<1>(error_obj); - std::cout << "Maximum error: " << error << " at point " << point[0] << ", " - << point[1] << ", " << point[2] - << "\nTrue value: " << manufactured_solution(point) - << "\nComputed: " << approx << std::endl; + std::cout << "Mean error: " + << error_sum_global / (points.extent(0) * mpi_size) + << "\nMaximum error: " << error << " at point " << point[0] + << ", " << point[1] << "\n True value: " << Step::eval(point) + << "\n Computed: " << approx << std::endl; } else { MPI_Gather(&error_obj, sizeof(decltype(error_obj)), MPI_BYTE, nullptr, - sizeof(decltype(error_obj)), MPI_BYTE, 0, mpi_comm); + sizeof(decltype(error_obj)), MPI_BYTE, 0, comm); + MPI_Reduce(&error_sum, nullptr, 1, MPI_FLOAT, MPI_SUM, 0, comm); + } +} + +Kokkos::View +doOne(MPI_Comm comm, ExecutionSpace const &space, + Kokkos::View const &tgt, + Kokkos::View const &values, + Kokkos::View const &true_values, + MovingLeastSquares &mls) +{ + auto tgt_values = mls.apply(space, values); + doError(comm, space, tgt, tgt_values, true_values); + return tgt_values; +} + +int main(int argc, char *argv[]) +{ + static constexpr std::size_t total_points = 1024 * 128; + static constexpr std::size_t num_back_forth = 50; + static constexpr auto deg = Details::degree<4>; + static constexpr auto rbf = Details::wu<2>; + + MPI_Init(&argc, &argv); + Kokkos::ScopeGuard guard(argc, argv); + + ExecutionSpace space{}; + HostExecutionSpace host_space{}; + MPI_Comm mpi_comm = MPI_COMM_WORLD; + int mpi_size, mpi_rank; + MPI_Comm_size(mpi_comm, &mpi_size); + MPI_Comm_rank(mpi_comm, &mpi_rank); + + auto point_clouds = + createPointClouds(host_space, space, mpi_comm, total_points / mpi_size); + + // Create the transform from a point cloud to another + auto mlss = createMLSObjects(mpi_comm, space, point_clouds[0], + point_clouds[1], deg, rbf); + + Kokkos::Array, 2> true_values{ + Step::map(space, point_clouds[0]), Step::map(space, point_clouds[1])}; + + Kokkos::View source_values = true_values[0]; + for (int i = 0; i < num_back_forth * 2; i++) + { + if (mpi_rank == 0) + std::cout << "=== TURN " << i + 1 << std::endl; + + Kokkos::View target = + point_clouds[1 - (i % 2)]; + Kokkos::View tgt_true_values = + true_values[1 - (i % 2)]; + MovingLeastSquares &mls = mlss[i % 2]; + + source_values = + doOne(mpi_comm, space, target, source_values, tgt_true_values, mls); + + if (mpi_rank == 0) + std::cout << "===\n" << std::endl; } MPI_Finalize();