-
Notifications
You must be signed in to change notification settings - Fork 39
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Pseudo-inverse of symmetric matrices using SVD / Utility for moving least squares #950
Changes from all commits
a9b9eea
6f67dd3
8acf635
98c2dd8
a4f7326
79580e3
797fd65
d7c4ee5
d3b6e8d
3d105a1
20f906a
0c5bb0d
e2e5838
4568d88
8af21a9
61fff17
8098fc0
2dfac38
e68f83b
4edd575
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,3 +2,4 @@ | |
*.swp | ||
.#* | ||
/build* | ||
.vscode |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,259 @@ | ||
/**************************************************************************** | ||
* Copyright (c) 2023 by the ArborX authors * | ||
* All rights reserved. * | ||
* * | ||
* This file is part of the ArborX library. ArborX is * | ||
* distributed under a BSD 3-clause license. For the licensing terms see * | ||
* the LICENSE file in the top-level directory. * | ||
* * | ||
* SPDX-License-Identifier: BSD-3-Clause * | ||
****************************************************************************/ | ||
|
||
#ifndef ARBORX_INTERP_DETAILS_SYMMETRIC_PSEUDO_INVERSE_SVD_HPP | ||
#define ARBORX_INTERP_DETAILS_SYMMETRIC_PSEUDO_INVERSE_SVD_HPP | ||
|
||
#include <ArborX_DetailsKokkosExtAccessibilityTraits.hpp> | ||
#include <ArborX_Exception.hpp> | ||
|
||
#include <Kokkos_Core.hpp> | ||
|
||
namespace ArborX::Interpolation::Details | ||
{ | ||
|
||
template <typename Matrix> | ||
KOKKOS_INLINE_FUNCTION void | ||
ensureIsSquareMatrix([[maybe_unused]] Matrix const &mat) | ||
{ | ||
static_assert(Kokkos::is_view_v<Matrix>, "Matrix must be a view"); | ||
static_assert(Matrix::rank == 2, "Matrix must be 2D"); | ||
KOKKOS_ASSERT(mat.extent(0) == mat.extent(1)); | ||
} | ||
|
||
template <typename Matrix> | ||
KOKKOS_INLINE_FUNCTION void ensureIsSquareSymmetricMatrix(Matrix const &mat) | ||
{ | ||
ensureIsSquareMatrix(mat); | ||
|
||
[[maybe_unused]] auto is_symmetric = [&]() { | ||
int const size = mat.extent(0); | ||
for (int i = 0; i < size; i++) | ||
for (int j = i + 1; j < size; j++) | ||
if (mat(i, j) != mat(j, i)) | ||
return false; | ||
return true; | ||
}; | ||
|
||
KOKKOS_ASSERT(is_symmetric()); | ||
} | ||
|
||
// Gets the argmax from the upper triangle part of a matrix | ||
template <typename Matrix> | ||
KOKKOS_FUNCTION auto argmaxUpperTriangle(Matrix const &mat) | ||
{ | ||
ensureIsSquareMatrix(mat); | ||
using value_t = typename Matrix::non_const_value_type; | ||
|
||
struct | ||
{ | ||
value_t max = 0; | ||
int row = 0; | ||
int col = 0; | ||
} result; | ||
|
||
int const size = mat.extent(0); | ||
for (int i = 0; i < size; i++) | ||
for (int j = i + 1; j < size; j++) | ||
{ | ||
value_t val = Kokkos::abs(mat(i, j)); | ||
if (result.max < val) | ||
{ | ||
result.max = val; | ||
result.row = i; | ||
result.col = j; | ||
} | ||
} | ||
|
||
return result; | ||
} | ||
|
||
// Pseudo-inverse of symmetric matrices using SVD | ||
// We must find U, E (diagonal and positive) and V such that A = U.E.V^T | ||
// We also suppose, as the input, that A is symmetric, so U = SV where S is | ||
// a sign matrix (only 1 or -1 on the diagonal, 0 elsewhere). | ||
// Thus A = U.ES.U^T and A^-1 = U.[ ES^-1 ].U^T | ||
template <typename AMatrix, typename ESMatrix, typename UMatrix> | ||
KOKKOS_FUNCTION void | ||
symmetricPseudoInverseSVDSerialKernel(AMatrix &A, ESMatrix &ES, UMatrix &U) | ||
{ | ||
ensureIsSquareSymmetricMatrix(A); | ||
static_assert(!std::is_const_v<typename AMatrix::value_type>, | ||
"A must be writable"); | ||
ensureIsSquareMatrix(ES); | ||
static_assert(!std::is_const_v<typename ESMatrix::value_type>, | ||
"ES must be writable"); | ||
ensureIsSquareMatrix(U); | ||
static_assert(!std::is_const_v<typename UMatrix::value_type>, | ||
"U must be writable"); | ||
static_assert(std::is_same_v<typename AMatrix::value_type, | ||
typename ESMatrix::value_type> && | ||
std::is_same_v<typename ESMatrix::value_type, | ||
typename UMatrix::value_type>, | ||
"All input matrices must have the same value type"); | ||
KOKKOS_ASSERT(A.extent(0) == ES.extent(0) && ES.extent(0) == U.extent(0)); | ||
using value_t = typename AMatrix::non_const_value_type; | ||
int const size = A.extent(0); | ||
|
||
// We first initialize U as the identity matrix and copy A to ES | ||
for (int i = 0; i < size; i++) | ||
for (int j = 0; j < size; j++) | ||
{ | ||
U(i, j) = value_t(i == j); | ||
ES(i, j) = A(i, j); | ||
} | ||
|
||
static constexpr value_t epsilon = Kokkos::Experimental::epsilon_v<float>; | ||
while (true) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess the CI is stuck in this loop? |
||
{ | ||
// We have a guarantee that p < q | ||
auto const [max_val, p, q] = argmaxUpperTriangle(ES); | ||
if (max_val <= epsilon) | ||
break; | ||
|
||
auto const a = ES(p, p); | ||
auto const b = ES(p, q); | ||
auto const c = ES(q, q); | ||
|
||
// Our submatrix is now | ||
// +----------+----------+ +---+---+ | ||
// | ES(p, p) | ES(p, q) | | a | b | | ||
// +----------+----------+ = +---+---+ | ||
// | ES(q, p) | ES(q, q) | | b | c | | ||
// +----------+----------+ +---+---+ | ||
|
||
// Let's compute x, y and theta such that | ||
// +---+---+ +---+---+ | ||
// | a | b | | x | 0 | | ||
// +---+---+ = R(theta) * +---+---+ * R(theta)^T | ||
// | b | c | | 0 | y | | ||
// +---+---+ +---+---+ | ||
|
||
value_t cos_theta; | ||
value_t sin_theta; | ||
value_t x; | ||
value_t y; | ||
if (a == c) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't you need to have a tolerance here? What happens if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A tolerance is not really needed as the algorithm behaves correctly even if the values are very close together. This case is here to avoid a "true" division by zero. |
||
{ | ||
cos_theta = Kokkos::sqrt(value_t(2)) / 2; | ||
sin_theta = cos_theta; | ||
x = a + b; | ||
y = a - b; | ||
} | ||
else | ||
{ | ||
auto const u = (2 * b) / (a - c); | ||
auto const v = 1 / Kokkos::sqrt(u * u + 1); | ||
cos_theta = Kokkos::sqrt((1 + v) / 2); | ||
sin_theta = Kokkos::copysign(Kokkos::sqrt((1 - v) / 2), u); | ||
x = (a + c + (a - c) / v) / 2; | ||
y = a + c - x; | ||
} | ||
|
||
// Now let's compute the following new values for U and ES | ||
// ES <- R'(theta)^T . ES . R'(theta) | ||
// U <- U . R'(theta) | ||
|
||
// R'(theta)^T . ES . R'(theta) | ||
for (int i = 0; i < p; i++) | ||
{ | ||
auto const es_ip = ES(i, p); | ||
auto const es_iq = ES(i, q); | ||
ES(i, p) = cos_theta * es_ip + sin_theta * es_iq; | ||
ES(i, q) = -sin_theta * es_ip + cos_theta * es_iq; | ||
} | ||
ES(p, p) = x; | ||
for (int i = p + 1; i < q; i++) | ||
{ | ||
auto const es_pi = ES(p, i); | ||
auto const es_iq = ES(i, q); | ||
ES(p, i) = cos_theta * es_pi + sin_theta * es_iq; | ||
ES(i, q) = -sin_theta * es_pi + cos_theta * es_iq; | ||
} | ||
ES(q, q) = y; | ||
for (int i = q + 1; i < size; i++) | ||
{ | ||
auto const es_pi = ES(p, i); | ||
auto const es_qi = ES(q, i); | ||
ES(p, i) = cos_theta * es_pi + sin_theta * es_qi; | ||
ES(q, i) = -sin_theta * es_pi + cos_theta * es_qi; | ||
} | ||
ES(p, q) = 0; | ||
|
||
// U . R'(theta) | ||
for (int i = 0; i < size; i++) | ||
{ | ||
auto const u_ip = U(i, p); | ||
auto const u_iq = U(i, q); | ||
U(i, p) = cos_theta * u_ip + sin_theta * u_iq; | ||
U(i, q) = -sin_theta * u_ip + cos_theta * u_iq; | ||
} | ||
Comment on lines
+192
to
+198
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it make sense to use hierarchical parallelism for these for-loops or do we assume There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would make a lot of sense to use hierarchical parallelism here (and in other loops) as well as using scratch pads for the auxiliary matrices. However, because this is part of the MLS algorithm, it might not be a bottleneck and it is best to avoid early optimization. However, the following should be a logical distribution:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fair enough. |
||
} | ||
|
||
// We compute the max to get a range of the invertible eigenvalues | ||
auto max_eigen = epsilon; | ||
for (int i = 0; i < size; i++) | ||
max_eigen = Kokkos::max(Kokkos::abs(ES(i, i)), max_eigen); | ||
|
||
// We invert the diagonal of ES, except if "0" is found | ||
for (int i = 0; i < size; i++) | ||
ES(i, i) = (Kokkos::abs(ES(i, i)) < max_eigen * epsilon) ? 0 : 1 / ES(i, i); | ||
|
||
// Then we fill out A as the pseudo inverse | ||
for (int i = 0; i < size; i++) | ||
for (int j = 0; j < size; j++) | ||
{ | ||
value_t tmp = 0; | ||
for (int k = 0; k < size; k++) | ||
tmp += ES(k, k) * U(i, k) * U(j, k); | ||
A(i, j) = tmp; | ||
} | ||
} | ||
|
||
template <typename ExecutionSpace, typename InOutMatrices> | ||
void symmetricPseudoInverseSVD(ExecutionSpace const &space, | ||
InOutMatrices &matrices) | ||
{ | ||
// InOutMatrices is a list of square symmetric matrices (3D view) | ||
static_assert(Kokkos::is_view_v<InOutMatrices>, "matrices must be a view"); | ||
static_assert(!std::is_const_v<typename InOutMatrices::value_type>, | ||
"matrices must be writable"); | ||
static_assert(InOutMatrices::rank == 3, | ||
"matrices must be a list of square matrices"); | ||
static_assert( | ||
KokkosExt::is_accessible_from<typename InOutMatrices::memory_space, | ||
ExecutionSpace>::value, | ||
"matrices must be accessible from the execution space"); | ||
|
||
ARBORX_ASSERT(matrices.extent(1) == matrices.extent(2)); // Must be square | ||
|
||
InOutMatrices ESs( | ||
Kokkos::view_alloc(space, Kokkos::WithoutInitializing, | ||
"ArborX::SymmetricPseudoInverseSVD::ESs"), | ||
matrices.extent(0), matrices.extent(1), matrices.extent(2)); | ||
InOutMatrices Us(Kokkos::view_alloc(space, Kokkos::WithoutInitializing, | ||
"ArborX::SymmetricPseudoInverseSVD::Us"), | ||
matrices.extent(0), matrices.extent(1), matrices.extent(2)); | ||
|
||
Kokkos::parallel_for( | ||
"ArborX::SymmetricPseudoInverseSVD::computations", | ||
Kokkos::RangePolicy<ExecutionSpace>(space, 0, matrices.extent(0)), | ||
KOKKOS_LAMBDA(int const i) { | ||
auto A = Kokkos::subview(matrices, i, Kokkos::ALL, Kokkos::ALL); | ||
auto ES = Kokkos::subview(ESs, i, Kokkos::ALL, Kokkos::ALL); | ||
auto U = Kokkos::subview(Us, i, Kokkos::ALL, Kokkos::ALL); | ||
symmetricPseudoInverseSVDSerialKernel(A, ES, U); | ||
}); | ||
} | ||
|
||
} // namespace ArborX::Interpolation::Details | ||
|
||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,8 +17,92 @@ | |
#include <Kokkos_Core.hpp> | ||
|
||
#include "BoostTest_CUDA_clang_workarounds.hpp" | ||
#include <boost/test/unit_test.hpp> | ||
#include <boost/test/utils/is_forward_iterable.hpp> | ||
|
||
template <typename U, typename V> | ||
using CommonValueType = | ||
typename boost::common_type<typename U::non_const_value_type, | ||
typename V::non_const_value_type>::type; | ||
|
||
template <typename U, typename V> | ||
void arborxViewCheck(U const &u, V const &v, std::string const &u_name, | ||
std::string const &v_name, CommonValueType<U, V> tol = 0) | ||
{ | ||
static constexpr int rank = U::rank; | ||
|
||
bool same_dim_size = true; | ||
for (int i = 0; i < rank; i++) | ||
{ | ||
int ui = u.extent_int(i); | ||
int vi = v.extent_int(i); | ||
BOOST_TEST(ui == vi, "check " << u_name << " == " << v_name | ||
<< " failed at dimension " << i << " size [" | ||
<< ui << " != " << vi << "]"); | ||
same_dim_size = (ui == vi) && same_dim_size; | ||
} | ||
|
||
if (!same_dim_size) | ||
return; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that you don't need this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not really, as this will abort the current test and not check the other matrices of the same test. |
||
|
||
int index[8]{0, 0, 0, 0, 0, 0, 0, 0}; | ||
auto make_index = [&]() { | ||
std::stringstream sstr; | ||
sstr << "("; | ||
for (int i = 0; i < rank - 1; i++) | ||
sstr << index[i] << ", "; | ||
sstr << index[rank - 1] << ")"; | ||
return sstr.str(); | ||
}; | ||
|
||
while (index[0] != u.extent_int(0)) | ||
{ | ||
auto uval = u.access(index[0], index[1], index[2], index[3], index[4], | ||
index[5], index[6], index[7]); | ||
auto vval = v.access(index[0], index[1], index[2], index[3], index[4], | ||
index[5], index[6], index[7]); | ||
std::string index_str = make_index(); | ||
|
||
// Can "tol" be used as a tolerance value? If not, go back to regular | ||
// comparison | ||
if constexpr (boost::math::fpc::tolerance_based<decltype(tol)>::value) | ||
BOOST_TEST(uval == vval, u_name << " == " << v_name << " at " << index_str | ||
<< boost::test_tools::tolerance(tol)); | ||
else | ||
BOOST_TEST(uval == vval, "check " << u_name << " == " << v_name | ||
<< " failed at " << index_str << " [" | ||
<< uval << " != " << vval << "]"); | ||
|
||
index[rank - 1]++; | ||
for (int i = rank - 1; i > 0; i--) | ||
if (index[i] == u.extent_int(i)) | ||
{ | ||
index[i] = 0; | ||
index[i - 1]++; | ||
} | ||
} | ||
} | ||
|
||
#define ARBORX_MDVIEW_TEST_TOL(VIEWA, VIEWB, TOL) \ | ||
[](decltype(VIEWA) const &u, decltype(VIEWB) const &v) { \ | ||
auto view_a = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, u); \ | ||
auto view_b = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, v); \ | ||
\ | ||
static_assert(unsigned(std::decay_t<decltype(u)>::rank) == \ | ||
unsigned(std::decay_t<decltype(v)>::rank), \ | ||
"'" #VIEWA "' and '" #VIEWB "' must have the same rank"); \ | ||
\ | ||
std::string view_a_name(#VIEWA); \ | ||
view_a_name += " (" + view_a.label() + ")"; \ | ||
\ | ||
std::string view_b_name(#VIEWB); \ | ||
view_b_name += " (" + view_b.label() + ")"; \ | ||
\ | ||
arborxViewCheck(view_a, view_b, view_a_name, view_b_name, TOL); \ | ||
}(VIEWA, VIEWB) | ||
|
||
#define ARBORX_MDVIEW_TEST(VIEWA, VIEWB) ARBORX_MDVIEW_TEST_TOL(VIEWA, VIEWB, 0) | ||
|
||
// Enable element-wise comparison for views that are accessible from the host | ||
namespace boost | ||
{ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't even know you could return an object from a local unnamed class.
Sure why not given that these are implementation details.