Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pseudo-inverse of symmetric matrices using SVD / Utility for moving least squares #950

Merged
merged 20 commits into from
Sep 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
run: |
mkdir build
cd build
cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS="/EHsc /bigobj" -DKokkos_ROOT="C:\kokkos-install" ${{ steps.vcpkg.outputs.vcpkg-cmake-config }} -DARBORX_ENABLE_MPI=OFF -DARBORX_ENABLE_TESTS=ON -DARBORX_ENABLE_EXAMPLES=ON -DARBORX_ENABLE_BENCHMARKS=ON ..
cmake -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS="/EHsc /bigobj" -DKokkos_ROOT="C:\kokkos-install" ${{ steps.vcpkg.outputs.vcpkg-cmake-config }} -DARBORX_ENABLE_MPI=OFF -DARBORX_ENABLE_TESTS=ON -DARBORX_ENABLE_EXAMPLES=ON -DARBORX_ENABLE_BENCHMARKS=ON -DARBORX_ENABLE_HEADER_SELF_CONTAINMENT_TESTS=OFF ..
- name: Build ArborX
shell: bash
run: |
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
*.swp
.#*
/build*
.vscode
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
/****************************************************************************
* Copyright (c) 2023 by the ArborX authors *
* All rights reserved. *
* *
* This file is part of the ArborX library. ArborX is *
* distributed under a BSD 3-clause license. For the licensing terms see *
* the LICENSE file in the top-level directory. *
* *
* SPDX-License-Identifier: BSD-3-Clause *
****************************************************************************/

#ifndef ARBORX_INTERP_DETAILS_SYMMETRIC_PSEUDO_INVERSE_SVD_HPP
#define ARBORX_INTERP_DETAILS_SYMMETRIC_PSEUDO_INVERSE_SVD_HPP

#include <ArborX_DetailsKokkosExtAccessibilityTraits.hpp>
#include <ArborX_Exception.hpp>

#include <Kokkos_Core.hpp>

namespace ArborX::Interpolation::Details
{

template <typename Matrix>
KOKKOS_INLINE_FUNCTION void
ensureIsSquareMatrix([[maybe_unused]] Matrix const &mat)
{
static_assert(Kokkos::is_view_v<Matrix>, "Matrix must be a view");
static_assert(Matrix::rank == 2, "Matrix must be 2D");
KOKKOS_ASSERT(mat.extent(0) == mat.extent(1));
}

template <typename Matrix>
KOKKOS_INLINE_FUNCTION void ensureIsSquareSymmetricMatrix(Matrix const &mat)
{
ensureIsSquareMatrix(mat);

[[maybe_unused]] auto is_symmetric = [&]() {
int const size = mat.extent(0);
for (int i = 0; i < size; i++)
for (int j = i + 1; j < size; j++)
if (mat(i, j) != mat(j, i))
return false;
return true;
};

KOKKOS_ASSERT(is_symmetric());
}

// Gets the argmax from the upper triangle part of a matrix
template <typename Matrix>
KOKKOS_FUNCTION auto argmaxUpperTriangle(Matrix const &mat)
{
ensureIsSquareMatrix(mat);
using value_t = typename Matrix::non_const_value_type;

struct
{
value_t max = 0;
int row = 0;
int col = 0;
} result;
Comment on lines +56 to +61
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't even know you could return an object from a local unnamed class.
Sure why not given that these are implementation details.


int const size = mat.extent(0);
for (int i = 0; i < size; i++)
for (int j = i + 1; j < size; j++)
{
value_t val = Kokkos::abs(mat(i, j));
if (result.max < val)
{
result.max = val;
result.row = i;
result.col = j;
}
}

return result;
}

// Pseudo-inverse of symmetric matrices using SVD
// We must find U, E (diagonal and positive) and V such that A = U.E.V^T
// We also suppose, as the input, that A is symmetric, so U = SV where S is
// a sign matrix (only 1 or -1 on the diagonal, 0 elsewhere).
// Thus A = U.ES.U^T and A^-1 = U.[ ES^-1 ].U^T
template <typename AMatrix, typename ESMatrix, typename UMatrix>
KOKKOS_FUNCTION void
symmetricPseudoInverseSVDSerialKernel(AMatrix &A, ESMatrix &ES, UMatrix &U)
{
ensureIsSquareSymmetricMatrix(A);
static_assert(!std::is_const_v<typename AMatrix::value_type>,
"A must be writable");
ensureIsSquareMatrix(ES);
static_assert(!std::is_const_v<typename ESMatrix::value_type>,
"ES must be writable");
ensureIsSquareMatrix(U);
static_assert(!std::is_const_v<typename UMatrix::value_type>,
"U must be writable");
static_assert(std::is_same_v<typename AMatrix::value_type,
typename ESMatrix::value_type> &&
std::is_same_v<typename ESMatrix::value_type,
typename UMatrix::value_type>,
"All input matrices must have the same value type");
KOKKOS_ASSERT(A.extent(0) == ES.extent(0) && ES.extent(0) == U.extent(0));
using value_t = typename AMatrix::non_const_value_type;
int const size = A.extent(0);

// We first initialize U as the identity matrix and copy A to ES
for (int i = 0; i < size; i++)
for (int j = 0; j < size; j++)
{
U(i, j) = value_t(i == j);
ES(i, j) = A(i, j);
}

static constexpr value_t epsilon = Kokkos::Experimental::epsilon_v<float>;
while (true)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the CI is stuck in this loop?

{
// We have a guarantee that p < q
auto const [max_val, p, q] = argmaxUpperTriangle(ES);
if (max_val <= epsilon)
break;

auto const a = ES(p, p);
auto const b = ES(p, q);
auto const c = ES(q, q);

// Our submatrix is now
// +----------+----------+ +---+---+
// | ES(p, p) | ES(p, q) | | a | b |
// +----------+----------+ = +---+---+
// | ES(q, p) | ES(q, q) | | b | c |
// +----------+----------+ +---+---+

// Let's compute x, y and theta such that
// +---+---+ +---+---+
// | a | b | | x | 0 |
// +---+---+ = R(theta) * +---+---+ * R(theta)^T
// | b | c | | 0 | y |
// +---+---+ +---+---+

value_t cos_theta;
value_t sin_theta;
value_t x;
value_t y;
if (a == c)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't you need to have a tolerance here? What happens if a = 1 and c = 1+1e-15?

Copy link
Contributor Author

@mrlag31 mrlag31 Sep 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A tolerance is not really needed as the algorithm behaves correctly even if the values are very close together. This case is here to avoid a "true" division by zero.

{
cos_theta = Kokkos::sqrt(value_t(2)) / 2;
sin_theta = cos_theta;
x = a + b;
y = a - b;
}
else
{
auto const u = (2 * b) / (a - c);
auto const v = 1 / Kokkos::sqrt(u * u + 1);
cos_theta = Kokkos::sqrt((1 + v) / 2);
sin_theta = Kokkos::copysign(Kokkos::sqrt((1 - v) / 2), u);
x = (a + c + (a - c) / v) / 2;
y = a + c - x;
}

// Now let's compute the following new values for U and ES
// ES <- R'(theta)^T . ES . R'(theta)
// U <- U . R'(theta)

// R'(theta)^T . ES . R'(theta)
for (int i = 0; i < p; i++)
{
auto const es_ip = ES(i, p);
auto const es_iq = ES(i, q);
ES(i, p) = cos_theta * es_ip + sin_theta * es_iq;
ES(i, q) = -sin_theta * es_ip + cos_theta * es_iq;
}
ES(p, p) = x;
for (int i = p + 1; i < q; i++)
{
auto const es_pi = ES(p, i);
auto const es_iq = ES(i, q);
ES(p, i) = cos_theta * es_pi + sin_theta * es_iq;
ES(i, q) = -sin_theta * es_pi + cos_theta * es_iq;
}
ES(q, q) = y;
for (int i = q + 1; i < size; i++)
{
auto const es_pi = ES(p, i);
auto const es_qi = ES(q, i);
ES(p, i) = cos_theta * es_pi + sin_theta * es_qi;
ES(q, i) = -sin_theta * es_pi + cos_theta * es_qi;
}
ES(p, q) = 0;

// U . R'(theta)
for (int i = 0; i < size; i++)
{
auto const u_ip = U(i, p);
auto const u_iq = U(i, q);
U(i, p) = cos_theta * u_ip + sin_theta * u_iq;
U(i, q) = -sin_theta * u_ip + cos_theta * u_iq;
}
Comment on lines +192 to +198
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to use hierarchical parallelism for these for-loops or do we assume size to be small enough that it doesn't matter? Or do we have sufficient parallelism already in the outer loop?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would make a lot of sense to use hierarchical parallelism here (and in other loops) as well as using scratch pads for the auxiliary matrices. However, because this is part of the MLS algorithm, it might not be a bottleneck and it is best to avoid early optimization.

However, the following should be a logical distribution:

  • Each team treats a single matrix
  • Threads/Vectors handle the loops and reductions
  • Use the team scratch pad for ES and U

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough.

}

// We compute the max to get a range of the invertible eigenvalues
auto max_eigen = epsilon;
for (int i = 0; i < size; i++)
max_eigen = Kokkos::max(Kokkos::abs(ES(i, i)), max_eigen);

// We invert the diagonal of ES, except if "0" is found
for (int i = 0; i < size; i++)
ES(i, i) = (Kokkos::abs(ES(i, i)) < max_eigen * epsilon) ? 0 : 1 / ES(i, i);

// Then we fill out A as the pseudo inverse
for (int i = 0; i < size; i++)
for (int j = 0; j < size; j++)
{
value_t tmp = 0;
for (int k = 0; k < size; k++)
tmp += ES(k, k) * U(i, k) * U(j, k);
A(i, j) = tmp;
}
}

template <typename ExecutionSpace, typename InOutMatrices>
void symmetricPseudoInverseSVD(ExecutionSpace const &space,
InOutMatrices &matrices)
{
// InOutMatrices is a list of square symmetric matrices (3D view)
static_assert(Kokkos::is_view_v<InOutMatrices>, "matrices must be a view");
static_assert(!std::is_const_v<typename InOutMatrices::value_type>,
"matrices must be writable");
static_assert(InOutMatrices::rank == 3,
"matrices must be a list of square matrices");
static_assert(
KokkosExt::is_accessible_from<typename InOutMatrices::memory_space,
ExecutionSpace>::value,
"matrices must be accessible from the execution space");

ARBORX_ASSERT(matrices.extent(1) == matrices.extent(2)); // Must be square

InOutMatrices ESs(
Kokkos::view_alloc(space, Kokkos::WithoutInitializing,
"ArborX::SymmetricPseudoInverseSVD::ESs"),
matrices.extent(0), matrices.extent(1), matrices.extent(2));
InOutMatrices Us(Kokkos::view_alloc(space, Kokkos::WithoutInitializing,
"ArborX::SymmetricPseudoInverseSVD::Us"),
matrices.extent(0), matrices.extent(1), matrices.extent(2));

Kokkos::parallel_for(
"ArborX::SymmetricPseudoInverseSVD::computations",
Kokkos::RangePolicy<ExecutionSpace>(space, 0, matrices.extent(0)),
KOKKOS_LAMBDA(int const i) {
auto A = Kokkos::subview(matrices, i, Kokkos::ALL, Kokkos::ALL);
auto ES = Kokkos::subview(ESs, i, Kokkos::ALL, Kokkos::ALL);
auto U = Kokkos::subview(Us, i, Kokkos::ALL, Kokkos::ALL);
symmetricPseudoInverseSVDSerialKernel(A, ES, U);
});
}

} // namespace ArborX::Interpolation::Details

#endif
84 changes: 84 additions & 0 deletions test/ArborX_EnableViewComparison.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,92 @@
#include <Kokkos_Core.hpp>

#include "BoostTest_CUDA_clang_workarounds.hpp"
#include <boost/test/unit_test.hpp>
#include <boost/test/utils/is_forward_iterable.hpp>

template <typename U, typename V>
using CommonValueType =
typename boost::common_type<typename U::non_const_value_type,
typename V::non_const_value_type>::type;

template <typename U, typename V>
void arborxViewCheck(U const &u, V const &v, std::string const &u_name,
std::string const &v_name, CommonValueType<U, V> tol = 0)
{
static constexpr int rank = U::rank;

bool same_dim_size = true;
for (int i = 0; i < rank; i++)
{
int ui = u.extent_int(i);
int vi = v.extent_int(i);
BOOST_TEST(ui == vi, "check " << u_name << " == " << v_name
<< " failed at dimension " << i << " size ["
<< ui << " != " << vi << "]");
same_dim_size = (ui == vi) && same_dim_size;
}

if (!same_dim_size)
return;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that you don't need this. BOOST_TEST_REQUIRE does what you want (to check).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really, as this will abort the current test and not check the other matrices of the same test.


int index[8]{0, 0, 0, 0, 0, 0, 0, 0};
auto make_index = [&]() {
std::stringstream sstr;
sstr << "(";
for (int i = 0; i < rank - 1; i++)
sstr << index[i] << ", ";
sstr << index[rank - 1] << ")";
return sstr.str();
};

while (index[0] != u.extent_int(0))
{
auto uval = u.access(index[0], index[1], index[2], index[3], index[4],
index[5], index[6], index[7]);
auto vval = v.access(index[0], index[1], index[2], index[3], index[4],
index[5], index[6], index[7]);
std::string index_str = make_index();

// Can "tol" be used as a tolerance value? If not, go back to regular
// comparison
if constexpr (boost::math::fpc::tolerance_based<decltype(tol)>::value)
BOOST_TEST(uval == vval, u_name << " == " << v_name << " at " << index_str
<< boost::test_tools::tolerance(tol));
else
BOOST_TEST(uval == vval, "check " << u_name << " == " << v_name
<< " failed at " << index_str << " ["
<< uval << " != " << vval << "]");

index[rank - 1]++;
for (int i = rank - 1; i > 0; i--)
if (index[i] == u.extent_int(i))
{
index[i] = 0;
index[i - 1]++;
}
}
}

#define ARBORX_MDVIEW_TEST_TOL(VIEWA, VIEWB, TOL) \
[](decltype(VIEWA) const &u, decltype(VIEWB) const &v) { \
auto view_a = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, u); \
auto view_b = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, v); \
\
static_assert(unsigned(std::decay_t<decltype(u)>::rank) == \
unsigned(std::decay_t<decltype(v)>::rank), \
"'" #VIEWA "' and '" #VIEWB "' must have the same rank"); \
\
std::string view_a_name(#VIEWA); \
view_a_name += " (" + view_a.label() + ")"; \
\
std::string view_b_name(#VIEWB); \
view_b_name += " (" + view_b.label() + ")"; \
\
arborxViewCheck(view_a, view_b, view_a_name, view_b_name, TOL); \
}(VIEWA, VIEWB)

#define ARBORX_MDVIEW_TEST(VIEWA, VIEWB) ARBORX_MDVIEW_TEST_TOL(VIEWA, VIEWB, 0)

// Enable element-wise comparison for views that are accessible from the host
namespace boost
{
Expand Down
6 changes: 6 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,12 @@ target_link_libraries(ArborX_Test_BoostAdapters.exe PRIVATE ArborX Boost::unit_t
target_compile_definitions(ArborX_Test_BoostAdapters.exe PRIVATE BOOST_TEST_DYN_LINK)
add_test(NAME ArborX_Test_BoostAdapters COMMAND ArborX_Test_BoostAdapters.exe)

add_executable(ArborX_Test_InterpDetailsSVD.exe tstInterpDetailsSVD.cpp utf_main.cpp)
target_link_libraries(ArborX_Test_InterpDetailsSVD.exe PRIVATE ArborX Boost::unit_test_framework)
target_compile_definitions(ArborX_Test_InterpDetailsSVD.exe PRIVATE BOOST_TEST_DYN_LINK)
target_include_directories(ArborX_Test_InterpDetailsSVD.exe PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
add_test(NAME ArborX_Test_InterpDetailsSVD COMMAND ArborX_Test_InterpDetailsSVD.exe)

if(ARBORX_ENABLE_HEADER_SELF_CONTAINMENT_TESTS)
add_subdirectory(headers_self_contained)
endif()
Loading