diff --git a/.gitignore b/.gitignore index 48439bce0..488cc1fd5 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ *.swp .#* /build* +.vscode \ No newline at end of file diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 389a6bcdf..15b7e5b7f 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -17,3 +17,7 @@ if(Boost_FOUND) add_subdirectory(raytracing) add_subdirectory(brute_force) endif() + +if(ARBORX_ENABLE_MPI) + add_subdirectory(moving_least_squares) +endif() diff --git a/examples/moving_least_squares/CMakeLists.txt b/examples/moving_least_squares/CMakeLists.txt new file mode 100644 index 000000000..a22bdbe30 --- /dev/null +++ b/examples/moving_least_squares/CMakeLists.txt @@ -0,0 +1,3 @@ +add_executable(ArborX_Example_MovingLeastSquares.exe moving_least_squares.cpp) +target_link_libraries(ArborX_Example_MovingLeastSquares.exe ArborX::ArborX) +add_test(NAME ArborX_Example_MovingLeastSquares COMMAND ArborX_Example_MovingLeastSquares.exe) diff --git a/examples/moving_least_squares/DetailsDistributedTreePostQueryComms.hpp b/examples/moving_least_squares/DetailsDistributedTreePostQueryComms.hpp new file mode 100644 index 000000000..6c4e7ec34 --- /dev/null +++ b/examples/moving_least_squares/DetailsDistributedTreePostQueryComms.hpp @@ -0,0 +1,205 @@ +/**************************************************************************** + * Copyright (c) 2023 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#pragma once + +#include + +#include + +#include +#include + +#include + +namespace Details +{ + +template +class DistributedTreePostQueryComms +{ +public: + DistributedTreePostQueryComms() = default; + + template + DistributedTreePostQueryComms(MPI_Comm comm, ExecutionSpace const &space, + IndicesAndRanks const &indices_and_ranks) + { + std::size_t data_len = indices_and_ranks.extent(0); + + _comm.reset( + [comm]() { + auto p = new MPI_Comm; + MPI_Comm_dup(comm, p); + return p; + }(), + [](MPI_Comm *p) { + int mpi_finalized; + MPI_Finalized(&mpi_finalized); + if (!mpi_finalized) + MPI_Comm_free(p); + delete p; + }); + + int rank; + MPI_Comm_rank(*_comm, &rank); + + Kokkos::View mpi_tmp( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::DTPQC::tmp"), + data_len); + + // Split indices/ranks + Kokkos::Array, 2> split_indices_ranks = + indicesAndRanksSplit(space, indices_and_ranks, data_len); + Kokkos::View indices = split_indices_ranks[0]; + Kokkos::View ranks = split_indices_ranks[1]; + + // Computes what will be common to every exchange. Every time + // someone wants to get the value from the same set of elements, + // they will use the same list of recv and send indices. + // The rank data will be saved inside the back distributor, + // as the front one is not relevant once the recv indices + // are computed. + + // This builds for each process a local array indicating how much + // informatiom will be gathered + ArborX::Details::Distributor distributor_forth(*_comm); + _num_requests = distributor_forth.createFromSends(space, ranks); + + // This creates the temporary buffer that will help when producing the + // array that rebuilds the output + Kokkos::View mpi_rev_indices( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::DTPQC::rev_indices"), + _num_requests); + ArborX::iota(space, mpi_tmp); + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + space, distributor_forth, mpi_tmp, mpi_rev_indices); + + // This retrieves which source index a process wants and gives it to + // the process owning the source + _mpi_send_indices = Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::DTPQC::send_indices"), + _num_requests); + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + space, distributor_forth, indices, _mpi_send_indices); + + // This builds the temporary buffer that will create the reverse + // distributor to dispatch the values + Kokkos::View mpi_rev_ranks( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::DTPQC::rev_ranks"), + _num_requests); + Kokkos::deep_copy(space, mpi_tmp, rank); + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + space, distributor_forth, mpi_tmp, mpi_rev_ranks); + + // This will create the reverse of the previous distributor + _distributor = ArborX::Details::Distributor(*_comm); + _num_responses = _distributor->createFromSends(space, mpi_rev_ranks); + + // There should be enough responses to perfectly fill what was requested + // i.e. _num_responses == data_len + + // The we send back the requested indices so that each process can rebuild + // their output + _mpi_recv_indices = Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::DTPQC::recv_indices"), + _num_responses); + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + space, *_distributor, mpi_rev_indices, _mpi_recv_indices); + } + + template + Kokkos::View>::type *, + typename ArborX::AccessTraits< + Values, ArborX::PrimitivesTag>::memory_space> + distribute(ExecutionSpace const &space, Values const &source) + { + using src_acc = ArborX::AccessTraits; + using value_t = typename ArborX::Details::AccessTraitsHelper::type; + using memory_space = typename src_acc::memory_space; + + // We know what each process want so we prepare the data to be sent + Kokkos::View data_to_send( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::DTPQC::data_to_send"), + _num_requests); + Kokkos::parallel_for( + "Example::DTPQC::data_to_send_fill", + Kokkos::RangePolicy(space, 0, _num_requests), + KOKKOS_CLASS_LAMBDA(int const i) { + data_to_send(i) = src_acc::get(source, _mpi_send_indices(i)); + }); + + // We properly send the data, and each process has what it wants, but in the + // wrong order + Kokkos::View data_to_recv( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::DTPQC::data_to_recv"), + _num_responses); + ArborX::Details::DistributedTreeImpl::sendAcrossNetwork( + space, *_distributor, data_to_send, data_to_recv); + + // So we fix this by moving everything + Kokkos::View output( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::DTPQC::output"), + _num_responses); + Kokkos::parallel_for( + "Example::DTPQC::output_fill", + Kokkos::RangePolicy(space, 0, _num_responses), + KOKKOS_CLASS_LAMBDA(int const i) { + output(_mpi_recv_indices(i)) = data_to_recv(i); + }); + + return output; + } + + template + static Kokkos::Array, 2> + indicesAndRanksSplit(ExecutionSpace const &space, + IndicesAndRanks const &indices_and_ranks, + std::size_t data_len) + { + Kokkos::View indices( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::DTPQC::indices"), + data_len); + Kokkos::View ranks( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::DTPQC::ranks"), + data_len); + + Kokkos::parallel_for( + "Example::DTPQC::indices_and_ranks_split", + Kokkos::RangePolicy(space, 0, data_len), + KOKKOS_LAMBDA(int const i) { + indices(i) = indices_and_ranks(i).index; + ranks(i) = indices_and_ranks(i).rank; + }); + + return {{indices, ranks}}; + } + +private: + std::shared_ptr _comm; + Kokkos::View _mpi_send_indices; + Kokkos::View _mpi_recv_indices; + std::optional> _distributor; + std::size_t _num_requests; + std::size_t _num_responses; +}; + +} // namespace Details diff --git a/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp b/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp new file mode 100644 index 000000000..f3dad773b --- /dev/null +++ b/examples/moving_least_squares/DetailsMovingLeastSquaresComputation.hpp @@ -0,0 +1,328 @@ +/**************************************************************************** + * Copyright (c) 2023 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#pragma once + +#include + +#include + +#include "DetailsPolynomialBasis.hpp" +#include "DetailsSymmetricPseudoInverseSVD.hpp" + +namespace Details +{ + +template +using PointEquivalence = ArborX::ExperimentalHyperGeometry::Point< + ArborX::GeometryTraits::dimension_v, + typename ArborX::GeometryTraits::coordinate_type::type>; + +template +using PointEquivalenceFromAT = + PointEquivalence>::type>; + +template +class MovingLeastSquaresComputation +{ +public: + MovingLeastSquaresComputation() = default; + + template + MovingLeastSquaresComputation(ExecutionSpace const &space, + SourcePoints const &source_points, + TargetPoints const &target_points, + PolynomialDegree const &pd, + RadialBasisFunction const &rbf) + { + using src_acc = ArborX::AccessTraits; + using tgt_acc = ArborX::AccessTraits; + using point_t = PointEquivalenceFromAT; + + _num_targets = tgt_acc::size(target_points); + _num_neighbors = src_acc::size(source_points) / _num_targets; + + static constexpr std::size_t polynomialBasisSize = + polynomialBasisSizeFromAT; + + // We center each group of points around the target as it ables us to + // optimize the final computation and transfer point types into ours + // TODO: Use multidimensional points! + Kokkos::View source_ref_target = + sourceRefTargetFill(space, source_points, target_points, _num_targets, + _num_neighbors); + + // To properly use the RBF, we need to decide for a radius around each + // target point that encapsulates all of the points + Kokkos::View radii = radiiComputation( + space, source_ref_target, _num_targets, _num_neighbors); + + // Once the radius is computed, the wieght follows by evaluating the RBF at + // each source point with their proper radii + Kokkos::View phi = weightComputation( + space, source_ref_target, radii, _num_targets, _num_neighbors, rbf); + + // We then need to create the Vandermonde matrix for each source point + // Instead of relying on an external type, could it be produced + // automatically? + Kokkos::View p = vandermondeComputation( + space, source_ref_target, _num_targets, _num_neighbors, pd); + + // From the weight and Vandermonde matrices, we can compute the moment + // matrix as A = P^T.PHI.P + Kokkos::View a = momentComputation( + space, phi, p, _num_targets, _num_neighbors, polynomialBasisSize); + + // We then take the pseudo-inverse of that moment matrix. + Kokkos::View a_inv = + symmetricPseudoInverseSVD(space, a); + + // We finally build the coefficients as C = [1 0 0 ...].A^-1.P^T.PHI + _coeffs = coefficientsComputation(space, phi, p, a_inv, _num_targets, + _num_neighbors, polynomialBasisSize); + } + + template + Kokkos::View + apply(ExecutionSpace const &space, SourceValues const &source_values) + { + using value_t = typename SourceValues::non_const_value_type; + using memory_space = typename SourceValues::memory_space; + + std::size_t num_neighbors = _num_neighbors; + Kokkos::View coeffs = _coeffs; + + Kokkos::View target_values( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MLSC::target_values"), + _num_targets); + + Kokkos::parallel_for( + "Example::MLSC::target_interpolation", + Kokkos::RangePolicy(space, 0, _num_targets), + KOKKOS_LAMBDA(int const i) { + value_t tmp = 0; + + for (int j = 0; j < num_neighbors; j++) + { + tmp += coeffs(i, j) * source_values(i * num_neighbors + j); + } + + target_values(i) = tmp; + }); + + return target_values; + } + + template + static Kokkos::View **, MemorySpace> + sourceRefTargetFill(ExecutionSpace const &space, + SourcePoints const &source_points, + TargetPoints const &target_points, + std::size_t num_targets, std::size_t num_neighbors) + { + using src_acc = ArborX::AccessTraits; + using tgt_acc = ArborX::AccessTraits; + using point_t = PointEquivalenceFromAT; + + Kokkos::View source_ref_target( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MLSC::source_ref_target"), + num_targets, num_neighbors); + + Kokkos::parallel_for( + "Example::MLSC::source_ref_target_fill", + Kokkos::MDRangePolicy>( + space, {0, 0}, {num_targets, num_neighbors}), + KOKKOS_LAMBDA(int const i, int const j) { + auto src = src_acc::get(source_points, i * num_neighbors + j); + auto tgt = tgt_acc::get(target_points, i); + point_t t{}; + + for (int k = 0; k < ArborX::GeometryTraits::dimension_v; k++) + t[k] = src[k] - tgt[k]; + + source_ref_target(i, j) = t; + }); + + return source_ref_target; + } + + template + static Kokkos::View + radiiComputation(ExecutionSpace const &space, + Kokkos::View const &source_ref_target, + std::size_t num_targets, std::size_t num_neighbors) + { + constexpr CoefficientType epsilon = + std::numeric_limits::epsilon(); + constexpr Point origin{}; + + Kokkos::View radii( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLSC::radii"), + num_targets); + + Kokkos::parallel_for( + "Example::MLSC::radii_computation", + Kokkos::RangePolicy(space, 0, num_targets), + KOKKOS_LAMBDA(int const i) { + CoefficientType radius = 10 * epsilon; + + for (int j = 0; j < num_neighbors; j++) + { + CoefficientType norm = + ArborX::Details::distance(source_ref_target(i, j), origin); + radius = (radius < norm) ? norm : radius; + } + + // The one at the limit would be valued at 0 due to how RBF works + radii(i) = 1.1 * radius; + }); + + return radii; + } + + template + static Kokkos::View weightComputation( + ExecutionSpace const &space, + Kokkos::View const &source_ref_target, + Kokkos::View const &radii, + std::size_t num_targets, std::size_t num_neighbors, + RadialBasisFunction const &) + { + constexpr Point origin{}; + + Kokkos::View phi( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::MLSC::phi"), + num_targets, num_neighbors); + + Kokkos::parallel_for( + "Example::MLSC::phi_computation", + Kokkos::MDRangePolicy>( + space, {0, 0}, {num_targets, num_neighbors}), + KOKKOS_LAMBDA(int const i, int const j) { + CoefficientType norm = + ArborX::Details::distance(source_ref_target(i, j), origin); + phi(i, j) = RadialBasisFunction::apply(norm / radii(i)); + }); + + return phi; + } + + template + static Kokkos::View vandermondeComputation( + ExecutionSpace const &space, + Kokkos::View const &source_ref_target, + std::size_t num_targets, std::size_t num_neighbors, + PolynomialDegree const &) + { + static constexpr std::size_t polynomialBasisSize = + polynomialBasisSizeFromT; + + Kokkos::View p( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MLSC::vandermonde"), + num_targets, num_neighbors, polynomialBasisSize); + + Kokkos::parallel_for( + "Example::MLSC::vandermonde_computation", + Kokkos::MDRangePolicy>( + space, {0, 0}, {num_targets, num_neighbors}), + KOKKOS_LAMBDA(int const i, int const j) { + auto basis = polynomialBasis( + source_ref_target(i, j)); + + for (int k = 0; k < polynomialBasisSize; k++) + { + p(i, j, k) = basis[k]; + } + }); + + return p; + } + + template + static Kokkos::View + momentComputation(ExecutionSpace const &space, + Kokkos::View const &phi, + Kokkos::View const &p, + std::size_t num_targets, std::size_t num_neighbors, + std::size_t polynomialBasisSize) + { + Kokkos::View a( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MLSC::moment"), + num_targets, polynomialBasisSize, polynomialBasisSize); + + Kokkos::parallel_for( + "Example::MLSC::moment_computation", + Kokkos::MDRangePolicy>( + space, {0, 0, 0}, + {num_targets, polynomialBasisSize, polynomialBasisSize}), + KOKKOS_LAMBDA(int const i, int const j, int const k) { + CoefficientType tmp = 0; + + for (int l = 0; l < num_neighbors; l++) + { + tmp += p(i, l, j) * p(i, l, k) * phi(i, l); + } + + a(i, j, k) = tmp; + }); + + return a; + } + + template + static Kokkos::View coefficientsComputation( + ExecutionSpace const &space, + Kokkos::View const &phi, + Kokkos::View const &p, + Kokkos::View const &a_inv, + std::size_t num_targets, std::size_t num_neighbors, + std::size_t polynomialBasisSize) + { + Kokkos::View coeffs( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::MLSC::coefficients"), + num_targets, num_neighbors); + + Kokkos::parallel_for( + "Example::MLSC::coefficients_computation", + Kokkos::MDRangePolicy>( + space, {0, 0}, {num_targets, num_neighbors}), + KOKKOS_LAMBDA(int const i, int const j) { + CoefficientType tmp = 0; + + for (int k = 0; k < polynomialBasisSize; k++) + { + tmp += a_inv(i, 0, k) * p(i, j, k) * phi(i, j); + } + + coeffs(i, j) = tmp; + }); + + return coeffs; + } + +private: + Kokkos::View _coeffs; + std::size_t _num_targets; + std::size_t _num_neighbors; +}; + +} // namespace Details diff --git a/examples/moving_least_squares/DetailsPolynomialBasis.hpp b/examples/moving_least_squares/DetailsPolynomialBasis.hpp new file mode 100644 index 000000000..14dfde948 --- /dev/null +++ b/examples/moving_least_squares/DetailsPolynomialBasis.hpp @@ -0,0 +1,101 @@ +/**************************************************************************** + * Copyright (c) 2023 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#pragma once + +#include + +#include + +#include + +namespace Details +{ + +template +KOKKOS_FUNCTION constexpr Kokkos::Array, Deg> +polynomialBasisColumnSizes() +{ + Kokkos::Array, Deg> arr{}; + + for (std::size_t dim = 0; dim < Dim; dim++) + arr[0][dim] = 1; + for (std::size_t deg = 0; deg < Deg; deg++) + arr[deg][0] = 1; + + for (std::size_t deg = 1; deg < Deg; deg++) + for (std::size_t dim = 1; dim < Dim; dim++) + arr[deg][dim] = arr[deg - 1][dim] + arr[deg][dim - 1]; + + return arr; +} + +template +KOKKOS_FUNCTION constexpr std::size_t polynomialBasisSize() +{ + auto arr = polynomialBasisColumnSizes(); + std::size_t size = 1; + + for (std::size_t deg = 0; deg < Deg; deg++) + for (std::size_t dim = 0; dim < Dim; dim++) + size += arr[deg][dim]; + + return size; +} + +template +static constexpr std::size_t polynomialBasisSizeFromT = + polynomialBasisSize, Deg>(); + +template +static constexpr std::size_t polynomialBasisSizeFromAT = + polynomialBasisSizeFromT< + typename ArborX::Details::AccessTraitsHelper< + ArborX::AccessTraits>::type, + Deg>; + +template +KOKKOS_FUNCTION auto polynomialBasis(Point const &p) +{ + static constexpr std::size_t dimension = + ArborX::GeometryTraits::dimension_v; + static constexpr auto column_details = + polynomialBasisColumnSizes(); + using value_t = typename ArborX::GeometryTraits::coordinate_type::type; + + Kokkos::Array()> arr{}; + arr[0] = value_t(1); + + std::size_t prev_col = 0; + std::size_t curr_col = 1; + for (std::size_t deg = 0; deg < Deg; deg++) + { + std::size_t loc_offset = curr_col; + for (std::size_t dim = 0; dim < dimension; dim++) + { + // copy the previous column and multply by p[dim] + for (std::size_t i = 0; i < column_details[deg][dim]; i++) + arr[loc_offset + i] = arr[prev_col + i] * p[dim]; + + loc_offset += column_details[deg][dim]; + } + + prev_col = curr_col; + curr_col = loc_offset; + } + + return arr; +} + +template +static constexpr std::integral_constant degree{}; + +} // namespace Details diff --git a/examples/moving_least_squares/DetailsRadialBasisFunctions.hpp b/examples/moving_least_squares/DetailsRadialBasisFunctions.hpp new file mode 100644 index 000000000..ad357852d --- /dev/null +++ b/examples/moving_least_squares/DetailsRadialBasisFunctions.hpp @@ -0,0 +1,77 @@ +/**************************************************************************** + * Copyright (c) 2023 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#pragma once + +#include + +#include + +#define RBF_DECL(name) \ + template \ + struct __##name; \ + \ + template \ + static constexpr __##name name \ + {} + +#define RBF_DEF(name, n, func) \ + template <> \ + struct __##name \ + { \ + template \ + KOKKOS_INLINE_FUNCTION static T apply(T x) \ + { \ + return func; \ + } \ + } + +namespace Details +{ + +RBF_DECL(wendland); +RBF_DEF(wendland, 0, (1 - x) * (1 - x)); +RBF_DEF(wendland, 2, (1 - x) * (1 - x) * (1 - x) * (1 - x) * (4 * x + 1)); +RBF_DEF(wendland, 4, + (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * + (35 * x * x + 18 * x + 3)); +RBF_DEF(wendland, 6, + (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * + (1 - x) * (32 * x * x * x + 25 * x * x + 8 * x + 1)); + +RBF_DECL(wu); +RBF_DEF(wu, 2, + (1 - x) * (1 - x) * (1 - x) * (1 - x) * + (3 * x * x * x + 12 * x + 16 * x + 4)); +RBF_DEF(wu, 4, + (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * (1 - x) * + (5 * x * x * x * x * x + 30 * x * x * x * x + 72 * x * x * x + + 82 * x * x + 36 * x + 6)); + +RBF_DECL(buhmann); +RBF_DEF(buhmann, 2, + 2 * x * x * x * x * log(x) - T(7) / 2 * x * x * x * x + + T(16) / 3 * x * x * x - 2 * x * x + T(1) / 6); +RBF_DEF(buhmann, 3, + 1 * x * x * x * x * x * x * x * x - T(84) / 5 * x * x * x * x * x * x + + T(1024) / 5 * x * x * x * x * sqrt(x) - 378 * x * x * x * x + + T(1024) / 5 * x * x * x * sqrt(x) - T(84) / 5 * x * x + 1); +RBF_DEF(buhmann, 4, + T(99) / 35 * x * x * x * x * x * x * x * x - + 132 * x * x * x * x * x * x + + T(9216) / 35 * x * x * x * x * x * sqrt(x) - + T(11264) / 35 * x * x * x * x * sqrt(x) + 198 * x * x * x * x - + T(396) / 5 * x * x + 1); + +} // namespace Details + +#undef RBF_DECL +#undef RBF_DEF \ No newline at end of file diff --git a/examples/moving_least_squares/DetailsSymmetricPseudoInverseSVD.hpp b/examples/moving_least_squares/DetailsSymmetricPseudoInverseSVD.hpp new file mode 100644 index 000000000..6f55db2f5 --- /dev/null +++ b/examples/moving_least_squares/DetailsSymmetricPseudoInverseSVD.hpp @@ -0,0 +1,206 @@ +/**************************************************************************** + * Copyright (c) 2023 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#pragma once + +#include + +#include +#include + +namespace Details +{ + +// This finds the biggest off-diagonal value of E.S as well as its +// coordinates. Being symmetric, we can always check on the upper +// triangle (and always have q > p) +template +KOKKOS_FUNCTION typename Matrices::non_const_value_type +spisvdArgmaxOffDiagonal(Matrices const &es, int const i, int &p, int &q) +{ + using value_t = typename Matrices::non_const_value_type; + + std::size_t const size = es.extent(1); + value_t max = 0; + p = q = 0; + + for (int j = 0; j < size; j++) + { + for (int k = j + 1; k < size; k++) + { + value_t val = Kokkos::abs(es(i, j, k)); + if (max < val) + { + max = val; + p = j; + q = k; + } + } + } + + return max; +} + +// Pseudo-inverse of symmetric matrices using SVD +// We must find U, E (diagonal and positive) and V such that A = U.E.V^T +// We also know that A is symmetric (by construction), so U = SV where S is +// a sign matrix (only 1 or -1 in the diagonal, 0 elsewhere). +// Thus A = U.E.S.U^T and A^-1 = U.[ E^-1.S ].U^T +template +Kokkos::View +symmetricPseudoInverseSVD(ExecutionSpace const &space, Matrices const &mats) +{ + using value_t = typename Matrices::non_const_value_type; + using memory_space = typename Matrices::memory_space; + + std::size_t const num_matrices = mats.extent(0); + std::size_t const size = mats.extent(1); + constexpr value_t epsilon = std::numeric_limits::epsilon(); + constexpr value_t pi_4 = value_t(M_PI_4); + + // ==> Initialisation + // E.S is the input matrix + // U is the identity + Kokkos::View es( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::SPISVD::ES"), + mats.layout()); + Kokkos::View u( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::SPISVD::U"), + mats.layout()); + Kokkos::parallel_for( + "Example::SPISVD::ES_U_init", + Kokkos::MDRangePolicy>( + space, {0, 0, 0}, {num_matrices, size, size}), + KOKKOS_LAMBDA(int const i, int const j, int const k) { + es(i, j, k) = value_t(mats(i, j, k)); + u(i, j, k) = value_t((j == k)); + }); + + // ==> Loop + // Iterative approach, we will "deconstruct" E.S until only the diagonal + // is relevent inside the matrix + // It is possible to prove that, at each step, the "norm" of the matrix + // is strictly less that of the previous + // For all the loops, the following equality holds: A = U.E.S.U^T + Kokkos::parallel_for( + "Example::SPISVD::compute_ES_U", + Kokkos::RangePolicy(space, 0, num_matrices), + KOKKOS_LAMBDA(int const i) { + int p, q; + value_t norm = spisvdArgmaxOffDiagonal(es, i, p, q); + while (norm > epsilon) + { + value_t a = es(i, p, p); + value_t b = es(i, p, q); + value_t c = es(i, q, q); + + // Our submatrix is now + // +----------+----------+ +---+---+ + // | es(p, p) | es(p, q) | | a | b | + // +----------+----------+ = +---+---+ + // | es(q, p) | es(q, q) | | b | c | + // +----------+----------+ +---+---+ + + // Lets compute x, y and theta such that + // +---+---+ +---+---+ + // | a | b | | x | 0 | + // +---+---+ = R(theta) * +---+---+ * R(theta)^T + // | b | c | | 0 | y | + // +---+---+ +---+---+ + + value_t theta, x, y; + if (a == c) // <-- better to check if |a - c| < epsilon? + { + theta = pi_4; + x = a + b; + y = a - b; + } + else + { + theta = Kokkos::atan((2 * b) / (a - c)) / 2; + value_t a_c_cos2 = (a - c) / Kokkos::cos(2 * theta); + x = (a + c + a_c_cos2) / 2; + y = (a + c - a_c_cos2) / 2; + } + value_t cos = Kokkos::cos(theta); + value_t sin = Kokkos::sin(theta); + + // Now lets compute the following new values for U amd E.S + // E.S <- R'(theta)^T . E.S . R'(theta) + // U <- U . R'(theta) + + // R'(theta)^T . E.S + for (int j = 0; j < size; j++) + { + value_t es_ipj = es(i, p, j); + value_t es_iqj = es(i, q, j); + es(i, p, j) = cos * es_ipj + sin * es_iqj; + es(i, q, j) = -sin * es_ipj + cos * es_iqj; + } + + // [R'(theta)^T . E.S] . R'(theta) + for (int j = 0; j < size; j++) + { + value_t es_ijp = es(i, j, p); + value_t es_ijq = es(i, j, q); + es(i, j, p) = cos * es_ijp + sin * es_ijq; + es(i, j, q) = -sin * es_ijp + cos * es_ijq; + } + + // U . R'(theta) + for (int j = 0; j < size; j++) + { + value_t u_ijp = u(i, j, p); + value_t u_ijq = u(i, j, q); + u(i, j, p) = cos * u_ijp + sin * u_ijq; + u(i, j, q) = -sin * u_ijp + cos * u_ijq; + } + + // These should theorically hold but is it ok to force them to their + // real value? + es(i, p, p) = x; + es(i, q, q) = y; + es(i, p, q) = 0; + es(i, q, p) = 0; + + norm = spisvdArgmaxOffDiagonal(es, i, p, q); + } + }); + + // ==> Output + // U and E.S are computed, we can now build the inverse + // U.[ E^-1.S ].U^T + Kokkos::View inv( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::SPISVD::inv"), + mats.layout()); + Kokkos::parallel_for( + "Example::SPISVD::inv_fill", + Kokkos::MDRangePolicy>( + space, {0, 0, 0}, {num_matrices, size, size}), + KOKKOS_LAMBDA(int const i, int const j, int const k) { + value_t value = 0; + for (int l = 0; l < size; l++) + { + value_t v = es(i, l, l); + if (Kokkos::abs(v) > epsilon) + { + value += u(i, j, l) * u(i, k, l) / v; + } + } + + inv(i, j, k) = value; + }); + + return inv; +} + +} // namespace Details \ No newline at end of file diff --git a/examples/moving_least_squares/MovingLeastSquares.hpp b/examples/moving_least_squares/MovingLeastSquares.hpp new file mode 100644 index 000000000..0f44b25a3 --- /dev/null +++ b/examples/moving_least_squares/MovingLeastSquares.hpp @@ -0,0 +1,110 @@ +/**************************************************************************** + * Copyright (c) 2023 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#pragma once + +#include + +#include + +#include "DetailsDistributedTreePostQueryComms.hpp" +#include "DetailsMovingLeastSquaresComputation.hpp" +#include "DetailsPolynomialBasis.hpp" + +namespace Details +{ + +// This is done to avoid clashing with another predicate access trait +template +struct TargetPointsPredicateWrapper +{ + Points target_points; + std::size_t num_neighbors; +}; + +} // namespace Details + +template +struct ArborX::AccessTraits, + ArborX::PredicatesTag> +{ + static KOKKOS_FUNCTION std::size_t + size(::Details::TargetPointsPredicateWrapper const &tp) + { + return ArborX::AccessTraits::size( + tp.target_points); + } + + static KOKKOS_FUNCTION auto + get(::Details::TargetPointsPredicateWrapper const &tp, std::size_t i) + { + return ArborX::nearest( + ArborX::AccessTraits::get( + tp.target_points, i), + tp.num_neighbors); + } + + using memory_space = + typename ArborX::AccessTraits::memory_space; +}; + +// Public interface to compute the moving least squares approximation between a +// souce and target point cloud +template +class MovingLeastSquares +{ +public: + template + MovingLeastSquares( + MPI_Comm comm, ExecutionSpace const &space, + SourcePoints const &source_points, TargetPoints const &target_points, + PolynomialDegree const &pd, RadialBasisFunction const &rbf, + std::size_t num_neighbors = Details::polynomialBasisSizeFromAT< + SourcePoints, PolynomialDegree::value>) + { + // Organize the source points as a tree and create the predicates + ArborX::DistributedTree source_tree(comm, space, + source_points); + Details::TargetPointsPredicateWrapper predicates{ + target_points, num_neighbors}; + + // Makes the NN query + Kokkos::View indices_and_ranks( + "Example::MLS::indices_and_ranks", 0); + Kokkos::View offsets("Example::MLS::offsets", 0); + source_tree.query(space, predicates, indices_and_ranks, offsets); + + // Set up comms and collect the points for a local MLS + _comms = Details::DistributedTreePostQueryComms( + comm, space, indices_and_ranks); + auto local_source_points = _comms.distribute(space, source_points); + + // Finally, compute the local MLS for the local target points + _mlsc = Details::MovingLeastSquaresComputation( + space, local_source_points, target_points, pd, rbf); + } + + template + auto apply(ExecutionSpace const &space, SourceValues const &source_values) + { + // Distribute and compute the result + return _mlsc.apply(space, _comms.distribute(space, source_values)); + } + +private: + Details::MovingLeastSquaresComputation + _mlsc; + Details::DistributedTreePostQueryComms _comms; +}; \ No newline at end of file diff --git a/examples/moving_least_squares/moving_least_squares.cpp b/examples/moving_least_squares/moving_least_squares.cpp new file mode 100644 index 000000000..2a7b75583 --- /dev/null +++ b/examples/moving_least_squares/moving_least_squares.cpp @@ -0,0 +1,263 @@ +/**************************************************************************** + * Copyright (c) 2023 by the ArborX authors * + * All rights reserved. * + * * + * This file is part of the ArborX library. ArborX is * + * distributed under a BSD 3-clause license. For the licensing terms see * + * the LICENSE file in the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include + +#include + +#include +#include +#include + +#include "../../benchmarks/point_clouds/point_clouds.hpp" +#include "DetailsRadialBasisFunctions.hpp" +#include "MovingLeastSquares.hpp" +#include + +using ExecutionSpace = Kokkos::DefaultExecutionSpace; +using MemorySpace = ExecutionSpace::memory_space; + +using HostExecutionSpace = Kokkos::DefaultHostExecutionSpace; +using HostMemorySpace = HostExecutionSpace::memory_space; + +// Function to approximate +struct Step +{ + KOKKOS_INLINE_FUNCTION static float eval(ArborX::Point const &p) + { + return !Kokkos::signbit(p[0]) * 1.f; + } + + template + static Kokkos::View + map(ExecutionSpace const &space, + Kokkos::View const &ps) + { + Kokkos::View evals("Example::evals", ps.extent(0)); + Kokkos::parallel_for( + "Example::evaluation", + Kokkos::RangePolicy(space, 0, ps.extent(0)), + KOKKOS_LAMBDA(int const i) { evals(i) = eval(ps(i)); }); + return evals; + } +}; + +Kokkos::Array, 2> +createPointClouds(HostExecutionSpace const &hspace, ExecutionSpace const &space, + MPI_Comm comm, std::size_t points_num) +{ + int mpi_size, mpi_rank; + MPI_Comm_size(comm, &mpi_size); + MPI_Comm_rank(comm, &mpi_rank); + + Kokkos::Array, 2> + point_clouds_host{Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::points_cloud_0"), + points_num), + Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::points_cloud_1"), + points_num)}; + + if (mpi_rank == 0) + { + Kokkos::Array, 2> + all_point_clouds{Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::all_points_cloud_0"), + points_num * mpi_size), + Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::all_points_cloud_1"), + points_num * mpi_size)}; + + filledBoxCloud(.5, all_point_clouds[0]); + filledBoxCloud(.5, all_point_clouds[1]); + + MPI_Scatter(all_point_clouds[0].data(), points_num * 3 * sizeof(float), + MPI_BYTE, point_clouds_host[0].data(), + points_num * 3 * sizeof(float), MPI_BYTE, 0, comm); + MPI_Scatter(all_point_clouds[1].data(), points_num * 3 * sizeof(float), + MPI_BYTE, point_clouds_host[1].data(), + points_num * 3 * sizeof(float), MPI_BYTE, 0, comm); + } + else + { + MPI_Scatter(nullptr, 0, MPI_BYTE, point_clouds_host[0].data(), + points_num * 3 * sizeof(float), MPI_BYTE, 0, comm); + MPI_Scatter(nullptr, 0, MPI_BYTE, point_clouds_host[1].data(), + points_num * 3 * sizeof(float), MPI_BYTE, 0, comm); + } + + Kokkos::parallel_for( + "Example::flatten_points", + Kokkos::RangePolicy(hspace, 0, points_num), + KOKKOS_LAMBDA(int const i) { + point_clouds_host[0](i)[2] = 0; + point_clouds_host[1](i)[2] = 0; + }); + + Kokkos::Array, 2> point_clouds{ + Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::points_cloud_0"), + points_num), + Kokkos::View( + Kokkos::view_alloc(Kokkos::WithoutInitializing, + "Example::points_cloud_1"), + points_num)}; + Kokkos::deep_copy(space, point_clouds[0], point_clouds_host[0]); + Kokkos::deep_copy(space, point_clouds[1], point_clouds_host[1]); + + return point_clouds; +} + +template +Kokkos::Array, 2> createMLSObjects( + MPI_Comm comm, ExecutionSpace const &space, + Kokkos::View const &point_clouds_0, + Kokkos::View const &point_clouds_1, + Deg const °, RBF const &rbf) +{ + return {MovingLeastSquares(comm, space, point_clouds_0, + point_clouds_1, deg, rbf), + MovingLeastSquares(comm, space, point_clouds_1, + point_clouds_0, deg, rbf)}; +} + +void doError(MPI_Comm comm, ExecutionSpace const &space, + Kokkos::View const &points, + Kokkos::View const &approx, + Kokkos::View const &values) +{ + int mpi_size, mpi_rank; + MPI_Comm_size(comm, &mpi_size); + MPI_Comm_rank(comm, &mpi_rank); + + // Compute local error + using ErrType = typename Kokkos::MaxLoc::value_type; + ErrType error{0, 0}; + float error_sum = 0; + Kokkos::parallel_reduce( + "Example::error_computation", + Kokkos::RangePolicy(space, 0, approx.extent(0)), + KOKKOS_LAMBDA(int const i, ErrType &loc_error, float &loc_error_sum) { + float abs_error = Kokkos::abs(approx(i) - values(i)); + + loc_error_sum += abs_error; + if (loc_error.val < abs_error) + { + loc_error.val = abs_error; + loc_error.loc = i; + } + }, + Kokkos::MaxLoc(error), Kokkos::Sum(error_sum)); + + auto approx_host = Kokkos::create_mirror_view(approx); + auto points_host = Kokkos::create_mirror_view(points); + Kokkos::deep_copy(space, approx_host, approx); + Kokkos::deep_copy(space, points_host, points); + + std::tuple error_obj{ + error.val, points_host(error.loc), approx_host(error.loc)}; + + // Compute global error + if (mpi_rank == 0) + { + float error_sum_global; + std::vector all_error_obj(mpi_size); + MPI_Gather(&error_obj, sizeof(decltype(error_obj)), MPI_BYTE, + all_error_obj.data(), sizeof(decltype(error_obj)), MPI_BYTE, 0, + comm); + MPI_Reduce(&error_sum, &error_sum_global, 1, MPI_FLOAT, MPI_SUM, 0, comm); + + for (int i = 0; i < mpi_size; i++) + if (std::get<0>(error_obj) < std::get<0>(all_error_obj[i])) + error_obj = all_error_obj[i]; + + float error = std::get<0>(error_obj), approx = std::get<2>(error_obj); + auto point = std::get<1>(error_obj); + std::cout << "Mean error: " + << error_sum_global / (points.extent(0) * mpi_size) + << "\nMaximum error: " << error << " at point " << point[0] + << ", " << point[1] << "\n True value: " << Step::eval(point) + << "\n Computed: " << approx << std::endl; + } + else + { + MPI_Gather(&error_obj, sizeof(decltype(error_obj)), MPI_BYTE, nullptr, + sizeof(decltype(error_obj)), MPI_BYTE, 0, comm); + MPI_Reduce(&error_sum, nullptr, 1, MPI_FLOAT, MPI_SUM, 0, comm); + } +} + +Kokkos::View +doOne(MPI_Comm comm, ExecutionSpace const &space, + Kokkos::View const &tgt, + Kokkos::View const &values, + Kokkos::View const &true_values, + MovingLeastSquares &mls) +{ + auto tgt_values = mls.apply(space, values); + doError(comm, space, tgt, tgt_values, true_values); + return tgt_values; +} + +int main(int argc, char *argv[]) +{ + static constexpr std::size_t total_points = 1024 * 128; + static constexpr std::size_t num_back_forth = 50; + static constexpr auto deg = Details::degree<4>; + static constexpr auto rbf = Details::wu<2>; + + MPI_Init(&argc, &argv); + Kokkos::ScopeGuard guard(argc, argv); + + ExecutionSpace space{}; + HostExecutionSpace host_space{}; + MPI_Comm mpi_comm = MPI_COMM_WORLD; + int mpi_size, mpi_rank; + MPI_Comm_size(mpi_comm, &mpi_size); + MPI_Comm_rank(mpi_comm, &mpi_rank); + + auto point_clouds = + createPointClouds(host_space, space, mpi_comm, total_points / mpi_size); + + // Create the transform from a point cloud to another + auto mlss = createMLSObjects(mpi_comm, space, point_clouds[0], + point_clouds[1], deg, rbf); + + Kokkos::Array, 2> true_values{ + Step::map(space, point_clouds[0]), Step::map(space, point_clouds[1])}; + + Kokkos::View source_values = true_values[0]; + for (int i = 0; i < num_back_forth * 2; i++) + { + if (mpi_rank == 0) + std::cout << "=== TURN " << i + 1 << std::endl; + + Kokkos::View target = + point_clouds[1 - (i % 2)]; + Kokkos::View tgt_true_values = + true_values[1 - (i % 2)]; + MovingLeastSquares &mls = mlss[i % 2]; + + source_values = + doOne(mpi_comm, space, target, source_values, tgt_true_values, mls); + + if (mpi_rank == 0) + std::cout << "===\n" << std::endl; + } + + MPI_Finalize(); + return 0; +}