Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Moving Least Squares example #919

Closed
wants to merge 44 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
12105e8
New example program
mrlag31 Jul 28, 2023
9289fc6
Source and target point creation
mrlag31 Jul 28, 2023
8c97beb
Completion of MLS (not tested)
mrlag31 Jul 31, 2023
ea3a2b4
Small fixes and values computation
mrlag31 Aug 1, 2023
357bda3
Small fixup (memory and kernel names, removing templates, ...)
mrlag31 Aug 1, 2023
4c22a22
source points generation (20x20x20 cube)
mrlag31 Aug 1, 2023
2586400
double to float
mrlag31 Aug 1, 2023
da4671b
clang format
mrlag31 Aug 1, 2023
4f44e1c
Fixup (memory and kernel names, floats, error, exec space)
mrlag31 Aug 2, 2023
15b150b
Modifies predicates array into AccessTraits
mrlag31 Aug 2, 2023
d69f348
Correct declaration convention
mrlag31 Aug 2, 2023
f6cd686
Typo fix, execution spaces in deep copies and range policies simplifi…
mrlag31 Aug 2, 2023
793a5f2
Switching from gaussian inverse to SVD
mrlag31 Aug 3, 2023
15ca7a1
Specifying ExecutionSpace in RangePolicies
mrlag31 Aug 4, 2023
62de5ed
Fixing wrong SVD calculation
mrlag31 Aug 7, 2023
e7f7918
Adding MPI (unstable)
mrlag31 Aug 8, 2023
d0932ed
Relative error and misc fixes
mrlag31 Aug 8, 2023
67db96a
Separation and templation of SVD inverse
mrlag31 Aug 9, 2023
fdf0443
MPI fixed
mrlag31 Aug 9, 2023
7ccec9e
clang format
mrlag31 Aug 9, 2023
df15ad8
Templation of MPI communication
mrlag31 Aug 10, 2023
0d3f723
clang format
mrlag31 Aug 10, 2023
4edbe19
Switching from std::size_t to int const and removing missing floats
mrlag31 Aug 10, 2023
3a9afcf
Templation of the proper MLS computation
mrlag31 Aug 10, 2023
68e199a
CMake MPI check
mrlag31 Aug 10, 2023
894e80f
Templated Moving Least Squares
mrlag31 Aug 14, 2023
1e6d4a5
Better RBF
mrlag31 Aug 14, 2023
52fe5e1
Removed DeviceType, duplicated communicators
mrlag31 Aug 15, 2023
db39716
Style fixes and num_neighbors as an optional arg
mrlag31 Aug 15, 2023
03b600c
Moving ExecutionSpace templates
mrlag31 Aug 15, 2023
dec46be
Swicthing to AccessTraits for user inputs (attempt)
mrlag31 Aug 15, 2023
210243d
Simplification of traits access
mrlag31 Aug 15, 2023
b1267dd
Assertions for public interfaces
mrlag31 Aug 15, 2023
9315a4c
Replacing code using ArborX's code and convention
mrlag31 Aug 17, 2023
daf9822
Better symmetric pseudo inverse (free function and better template)
mrlag31 Aug 17, 2023
0c6af81
Better MLS computations, more permissive templates
mrlag31 Aug 18, 2023
a1bd291
Proper usage of AccessTraits for MPI comms
mrlag31 Aug 18, 2023
a7650db
Proper MLS public interface
mrlag31 Aug 18, 2023
8fc8a75
Extra RBFs
mrlag31 Aug 18, 2023
f3556de
NVCC and CUDA compliance (compilation errors)
mrlag31 Aug 21, 2023
db133c8
Automatic polynomial basis generation and better rbf interface
mrlag31 Aug 22, 2023
bc388f8
Hypergeometry (only works in 3D)
mrlag31 Aug 22, 2023
220407c
Using point clouds creation and gathering of the maximum error
mrlag31 Aug 23, 2023
dcdedd3
Back and forth MLS
mrlag31 Aug 23, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
*.swp
.#*
/build*
.vscode
4 changes: 4 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,7 @@ if(Boost_FOUND)
add_subdirectory(raytracing)
add_subdirectory(brute_force)
endif()

if(ARBORX_ENABLE_MPI)
add_subdirectory(moving_least_squares)
endif()
3 changes: 3 additions & 0 deletions examples/moving_least_squares/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
add_executable(ArborX_Example_MovingLeastSquares.exe moving_least_squares.cpp)
target_link_libraries(ArborX_Example_MovingLeastSquares.exe ArborX::ArborX)
add_test(NAME ArborX_Example_MovingLeastSquares COMMAND ArborX_Example_MovingLeastSquares.exe)
205 changes: 205 additions & 0 deletions examples/moving_least_squares/DetailsDistributedTreePostQueryComms.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
/****************************************************************************
* Copyright (c) 2023 by the ArborX authors *
* All rights reserved. *
* *
* This file is part of the ArborX library. ArborX is *
* distributed under a BSD 3-clause license. For the licensing terms see *
* the LICENSE file in the top-level directory. *
* *
* SPDX-License-Identifier: BSD-3-Clause *
****************************************************************************/

#pragma once

#include <ArborX.hpp>

#include <Kokkos_Core.hpp>

#include <memory>
#include <optional>

#include <mpi.h>

namespace Details
{

template <typename MemorySpace>
class DistributedTreePostQueryComms
{
public:
DistributedTreePostQueryComms() = default;

template <typename ExecutionSpace, typename IndicesAndRanks>
DistributedTreePostQueryComms(MPI_Comm comm, ExecutionSpace const &space,
IndicesAndRanks const &indices_and_ranks)
{
std::size_t data_len = indices_and_ranks.extent(0);

_comm.reset(
[comm]() {
auto p = new MPI_Comm;
MPI_Comm_dup(comm, p);
return p;
}(),
[](MPI_Comm *p) {
int mpi_finalized;
MPI_Finalized(&mpi_finalized);
if (!mpi_finalized)
MPI_Comm_free(p);
delete p;
});

int rank;
MPI_Comm_rank(*_comm, &rank);

Kokkos::View<int *, MemorySpace> mpi_tmp(
Kokkos::view_alloc(Kokkos::WithoutInitializing, "Example::DTPQC::tmp"),
data_len);

// Split indices/ranks
Kokkos::Array<Kokkos::View<int *, MemorySpace>, 2> split_indices_ranks =
indicesAndRanksSplit(space, indices_and_ranks, data_len);
Kokkos::View<int *, MemorySpace> indices = split_indices_ranks[0];
Kokkos::View<int *, MemorySpace> ranks = split_indices_ranks[1];

// Computes what will be common to every exchange. Every time
// someone wants to get the value from the same set of elements,
// they will use the same list of recv and send indices.
// The rank data will be saved inside the back distributor,
// as the front one is not relevant once the recv indices
// are computed.

// This builds for each process a local array indicating how much
// informatiom will be gathered
ArborX::Details::Distributor<MemorySpace> distributor_forth(*_comm);
_num_requests = distributor_forth.createFromSends(space, ranks);

// This creates the temporary buffer that will help when producing the
// array that rebuilds the output
Kokkos::View<int *, MemorySpace> mpi_rev_indices(
Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Example::DTPQC::rev_indices"),
_num_requests);
ArborX::iota(space, mpi_tmp);
ArborX::Details::DistributedTreeImpl<MemorySpace>::sendAcrossNetwork(
space, distributor_forth, mpi_tmp, mpi_rev_indices);

// This retrieves which source index a process wants and gives it to
// the process owning the source
_mpi_send_indices = Kokkos::View<int *, MemorySpace>(
Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Example::DTPQC::send_indices"),
_num_requests);
ArborX::Details::DistributedTreeImpl<MemorySpace>::sendAcrossNetwork(
space, distributor_forth, indices, _mpi_send_indices);

// This builds the temporary buffer that will create the reverse
// distributor to dispatch the values
Kokkos::View<int *, MemorySpace> mpi_rev_ranks(
Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Example::DTPQC::rev_ranks"),
_num_requests);
Kokkos::deep_copy(space, mpi_tmp, rank);
ArborX::Details::DistributedTreeImpl<MemorySpace>::sendAcrossNetwork(
space, distributor_forth, mpi_tmp, mpi_rev_ranks);

// This will create the reverse of the previous distributor
_distributor = ArborX::Details::Distributor<MemorySpace>(*_comm);
_num_responses = _distributor->createFromSends(space, mpi_rev_ranks);

// There should be enough responses to perfectly fill what was requested
// i.e. _num_responses == data_len

// The we send back the requested indices so that each process can rebuild
// their output
_mpi_recv_indices = Kokkos::View<int *, MemorySpace>(
Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Example::DTPQC::recv_indices"),
_num_responses);
ArborX::Details::DistributedTreeImpl<MemorySpace>::sendAcrossNetwork(
space, *_distributor, mpi_rev_indices, _mpi_recv_indices);
}

template <typename ExecutionSpace, typename Values>
Kokkos::View<typename ArborX::Details::AccessTraitsHelper<
ArborX::AccessTraits<Values, ArborX::PrimitivesTag>>::type *,
typename ArborX::AccessTraits<
Values, ArborX::PrimitivesTag>::memory_space>
distribute(ExecutionSpace const &space, Values const &source)
{
using src_acc = ArborX::AccessTraits<Values, ArborX::PrimitivesTag>;
using value_t = typename ArborX::Details::AccessTraitsHelper<src_acc>::type;
using memory_space = typename src_acc::memory_space;

// We know what each process want so we prepare the data to be sent
Kokkos::View<value_t *, MemorySpace> data_to_send(
Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Example::DTPQC::data_to_send"),
_num_requests);
Kokkos::parallel_for(
"Example::DTPQC::data_to_send_fill",
Kokkos::RangePolicy<ExecutionSpace>(space, 0, _num_requests),
KOKKOS_CLASS_LAMBDA(int const i) {
data_to_send(i) = src_acc::get(source, _mpi_send_indices(i));
});

// We properly send the data, and each process has what it wants, but in the
// wrong order
Kokkos::View<value_t *, MemorySpace> data_to_recv(
Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Example::DTPQC::data_to_recv"),
_num_responses);
ArborX::Details::DistributedTreeImpl<MemorySpace>::sendAcrossNetwork(
space, *_distributor, data_to_send, data_to_recv);

// So we fix this by moving everything
Kokkos::View<value_t *, memory_space> output(
Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Example::DTPQC::output"),
_num_responses);
Kokkos::parallel_for(
"Example::DTPQC::output_fill",
Kokkos::RangePolicy<ExecutionSpace>(space, 0, _num_responses),
KOKKOS_CLASS_LAMBDA(int const i) {
output(_mpi_recv_indices(i)) = data_to_recv(i);
});

return output;
}

template <typename ExecutionSpace, typename IndicesAndRanks>
static Kokkos::Array<Kokkos::View<int *, MemorySpace>, 2>
indicesAndRanksSplit(ExecutionSpace const &space,
IndicesAndRanks const &indices_and_ranks,
std::size_t data_len)
{
Kokkos::View<int *, MemorySpace> indices(
Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Example::DTPQC::indices"),
data_len);
Kokkos::View<int *, MemorySpace> ranks(
Kokkos::view_alloc(Kokkos::WithoutInitializing,
"Example::DTPQC::ranks"),
data_len);

Kokkos::parallel_for(
"Example::DTPQC::indices_and_ranks_split",
Kokkos::RangePolicy<ExecutionSpace>(space, 0, data_len),
KOKKOS_LAMBDA(int const i) {
indices(i) = indices_and_ranks(i).index;
ranks(i) = indices_and_ranks(i).rank;
});

return {{indices, ranks}};
}

private:
std::shared_ptr<MPI_Comm> _comm;
Kokkos::View<int *, MemorySpace> _mpi_send_indices;
Kokkos::View<int *, MemorySpace> _mpi_recv_indices;
std::optional<ArborX::Details::Distributor<MemorySpace>> _distributor;
std::size_t _num_requests;
std::size_t _num_responses;
};

} // namespace Details
Loading