-
Notifications
You must be signed in to change notification settings - Fork 39
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Pseudo-inverse of symmetric matrices using SVD / Utility for moving least squares #950
Changes from 14 commits
a9b9eea
6f67dd3
8acf635
98c2dd8
a4f7326
79580e3
797fd65
d7c4ee5
d3b6e8d
3d105a1
20f906a
0c5bb0d
e2e5838
4568d88
8af21a9
61fff17
8098fc0
2dfac38
e68f83b
4edd575
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,3 +2,4 @@ | |
*.swp | ||
.#* | ||
/build* | ||
.vscode |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,264 @@ | ||||||
/**************************************************************************** | ||||||
* Copyright (c) 2023 by the ArborX authors * | ||||||
* All rights reserved. * | ||||||
* * | ||||||
* This file is part of the ArborX library. ArborX is * | ||||||
* distributed under a BSD 3-clause license. For the licensing terms see * | ||||||
* the LICENSE file in the top-level directory. * | ||||||
* * | ||||||
* SPDX-License-Identifier: BSD-3-Clause * | ||||||
****************************************************************************/ | ||||||
|
||||||
#ifndef ARBORX_INTERP_DETAILS_SYMMETRIC_PSEUDO_INVERSE_SVD_HPP | ||||||
#define ARBORX_INTERP_DETAILS_SYMMETRIC_PSEUDO_INVERSE_SVD_HPP | ||||||
|
||||||
#include <ArborX_DetailsKokkosExtAccessibilityTraits.hpp> | ||||||
#include <ArborX_Exception.hpp> | ||||||
|
||||||
#include <Kokkos_Core.hpp> | ||||||
|
||||||
namespace ArborX::Interpolation::Details | ||||||
{ | ||||||
|
||||||
template <typename Matrix> | ||||||
KOKKOS_INLINE_FUNCTION void ensureIsSquareMatrix(Matrix const &mat) | ||||||
{ | ||||||
static_assert(Kokkos::is_view_v<Matrix>, "Matrix must be a view"); | ||||||
static_assert(Matrix::rank == 2, "Matrix must be 2D"); | ||||||
KOKKOS_ASSERT(mat.extent(0) == mat.extent(1)); | ||||||
} | ||||||
|
||||||
template <typename Matrix> | ||||||
KOKKOS_INLINE_FUNCTION void ensureIsSquareSymmetricMatrix(Matrix const &mat) | ||||||
{ | ||||||
ensureIsSquareMatrix(mat); | ||||||
using value_t = typename Matrix::non_const_value_type; | ||||||
|
||||||
auto is_symmetric = [&]() { | ||||||
int const size = mat.extent(0); | ||||||
for (int i = 0; i < size; i++) | ||||||
for (int j = i + 1; j < size; j++) | ||||||
{ | ||||||
auto const val = Kokkos::abs(mat(i, j) - mat(j, i)); | ||||||
auto const ref = Kokkos::abs(mat(i, j)); | ||||||
static constexpr value_t epsilon = | ||||||
Kokkos::Experimental::epsilon_v<float>; | ||||||
if (ref == value_t(0) && val > epsilon) | ||||||
return false; | ||||||
if (ref != value_t(0) && val / ref > epsilon) | ||||||
return false; | ||||||
} | ||||||
return true; | ||||||
}; | ||||||
|
||||||
KOKKOS_ASSERT(is_symmetric()); | ||||||
} | ||||||
|
||||||
// Gets the argmax from the upper triangle part of a matrix | ||||||
template <typename Matrix> | ||||||
KOKKOS_FUNCTION auto argmaxUpperTriangle(Matrix const &mat) | ||||||
{ | ||||||
ensureIsSquareMatrix(mat); | ||||||
using value_t = typename Matrix::non_const_value_type; | ||||||
|
||||||
struct | ||||||
{ | ||||||
value_t max = 0; | ||||||
int row = 0; | ||||||
int col = 0; | ||||||
} result; | ||||||
Comment on lines
+56
to
+61
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't even know you could return an object from a local unnamed class. |
||||||
|
||||||
int const size = mat.extent(0); | ||||||
for (int i = 0; i < size; i++) | ||||||
for (int j = i + 1; j < size; j++) | ||||||
{ | ||||||
value_t val = Kokkos::abs(mat(i, j)); | ||||||
if (result.max < val) | ||||||
{ | ||||||
result.max = val; | ||||||
result.row = i; | ||||||
result.col = j; | ||||||
} | ||||||
} | ||||||
|
||||||
return result; | ||||||
} | ||||||
|
||||||
// Pseudo-inverse of symmetric matrices using SVD | ||||||
// We must find U, E (diagonal and positive) and V such that A = U.E.V^T | ||||||
// We also suppose, as the input, that A is symmetric, so U = SV where S is | ||||||
// a sign matrix (only 1 or -1 in the diagonal, 0 elsewhere). | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
// Thus A = U.ES.U^T and A^-1 = U.[ ES^-1 ].U^T | ||||||
template <typename AMatrix, typename ESMatrix, typename UMatrix> | ||||||
KOKKOS_FUNCTION void | ||||||
symmetricPseudoInverseSVDSerialKernel(AMatrix &A, ESMatrix &ES, UMatrix &U) | ||||||
{ | ||||||
ensureIsSquareSymmetricMatrix(A); | ||||||
static_assert(!std::is_const_v<typename AMatrix::value_type>, | ||||||
"A must be writable"); | ||||||
ensureIsSquareMatrix(ES); | ||||||
static_assert(!std::is_const_v<typename ESMatrix::value_type>, | ||||||
"ES must be writable"); | ||||||
ensureIsSquareMatrix(U); | ||||||
static_assert(!std::is_const_v<typename UMatrix::value_type>, | ||||||
"U must be writable"); | ||||||
static_assert(std::is_same_v<typename AMatrix::value_type, | ||||||
typename ESMatrix::value_type> && | ||||||
std::is_same_v<typename ESMatrix::value_type, | ||||||
typename UMatrix::value_type>, | ||||||
"Each input matrix must have the same value type"); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
KOKKOS_ASSERT(A.extent(0) == ES.extent(0) && ES.extent(0) == U.extent(0)); | ||||||
using value_t = typename AMatrix::non_const_value_type; | ||||||
int const size = A.extent(0); | ||||||
|
||||||
// We first initialize U as the identity matrix and copy A to ES | ||||||
for (int i = 0; i < size; i++) | ||||||
for (int j = 0; j < size; j++) | ||||||
{ | ||||||
U(i, j) = value_t(i == j); | ||||||
ES(i, j) = A(i, j); | ||||||
} | ||||||
|
||||||
static constexpr value_t epsilon = Kokkos::Experimental::epsilon_v<float>; | ||||||
while (true) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess the CI is stuck in this loop? |
||||||
{ | ||||||
// We have a guarantee that p < q | ||||||
auto const [max_val, p, q] = argmaxUpperTriangle(ES); | ||||||
if (max_val <= epsilon) | ||||||
break; | ||||||
|
||||||
auto const a = ES(p, p); | ||||||
auto const b = ES(p, q); | ||||||
auto const c = ES(q, q); | ||||||
|
||||||
// Our submatrix is now | ||||||
// +----------+----------+ +---+---+ | ||||||
// | ES(p, p) | ES(p, q) | | a | b | | ||||||
// +----------+----------+ = +---+---+ | ||||||
// | ES(q, p) | ES(q, q) | | b | c | | ||||||
// +----------+----------+ +---+---+ | ||||||
|
||||||
// Lets compute x, y and theta such that | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
// +---+---+ +---+---+ | ||||||
// | a | b | | x | 0 | | ||||||
// +---+---+ = R(theta) * +---+---+ * R(theta)^T | ||||||
// | b | c | | 0 | y | | ||||||
// +---+---+ +---+---+ | ||||||
|
||||||
value_t cos_theta, sin_theta, x, y; | ||||||
if (a == c) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't you need to have a tolerance here? What happens if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A tolerance is not really needed as the algorithm behaves correctly even if the values are very close together. This case is here to avoid a "true" division by zero. |
||||||
{ | ||||||
cos_theta = Kokkos::sqrt(value_t(2)) / 2; | ||||||
sin_theta = cos_theta; | ||||||
x = a + b; | ||||||
y = a - b; | ||||||
} | ||||||
else | ||||||
{ | ||||||
auto const u = (2 * b) / (a - c); | ||||||
auto const v = 1 / Kokkos::sqrt(u * u + 1); | ||||||
cos_theta = Kokkos::sqrt((1 + v) / 2); | ||||||
sin_theta = Kokkos::copysign(Kokkos::sqrt((1 - v) / 2), u); | ||||||
x = (a + c + (a - c) / v) / 2; | ||||||
y = a + c - x; | ||||||
} | ||||||
|
||||||
// Now lets compute the following new values for U and ES | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
// ES <- R'(theta)^T . ES . R'(theta) | ||||||
// U <- U . R'(theta) | ||||||
|
||||||
// R'(theta)^T . ES . R'(theta) | ||||||
for (int i = 0; i < p; i++) | ||||||
{ | ||||||
auto const es_ip = ES(i, p); | ||||||
auto const es_iq = ES(i, q); | ||||||
ES(i, p) = cos_theta * es_ip + sin_theta * es_iq; | ||||||
ES(i, q) = -sin_theta * es_ip + cos_theta * es_iq; | ||||||
} | ||||||
ES(p, p) = x; | ||||||
for (int i = p + 1; i < q; i++) | ||||||
{ | ||||||
auto const es_pi = ES(p, i); | ||||||
auto const es_iq = ES(i, q); | ||||||
ES(p, i) = cos_theta * es_pi + sin_theta * es_iq; | ||||||
ES(i, q) = -sin_theta * es_pi + cos_theta * es_iq; | ||||||
} | ||||||
ES(q, q) = y; | ||||||
for (int i = q + 1; i < size; i++) | ||||||
{ | ||||||
auto const es_pi = ES(p, i); | ||||||
auto const es_qi = ES(q, i); | ||||||
ES(p, i) = cos_theta * es_pi + sin_theta * es_qi; | ||||||
ES(q, i) = -sin_theta * es_pi + cos_theta * es_qi; | ||||||
} | ||||||
ES(p, q) = 0; | ||||||
|
||||||
// U . R'(theta) | ||||||
for (int i = 0; i < size; i++) | ||||||
{ | ||||||
auto const u_ip = U(i, p); | ||||||
auto const u_iq = U(i, q); | ||||||
U(i, p) = cos_theta * u_ip + sin_theta * u_iq; | ||||||
U(i, q) = -sin_theta * u_ip + cos_theta * u_iq; | ||||||
} | ||||||
Comment on lines
+192
to
+198
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it make sense to use hierarchical parallelism for these for-loops or do we assume There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would make a lot of sense to use hierarchical parallelism here (and in other loops) as well as using scratch pads for the auxiliary matrices. However, because this is part of the MLS algorithm, it might not be a bottleneck and it is best to avoid early optimization. However, the following should be a logical distribution:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fair enough. |
||||||
} | ||||||
|
||||||
// We compute the max to get a range of the invertible eigen values | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
auto max_eigen = epsilon; | ||||||
for (int i = 0; i < size; i++) | ||||||
max_eigen = Kokkos::max(Kokkos::abs(ES(i, i)), max_eigen); | ||||||
|
||||||
// We inverse the diagonal of ES, except if "0" is found | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
for (int i = 0; i < size; i++) | ||||||
ES(i, i) = (Kokkos::abs(ES(i, i)) < max_eigen * epsilon) ? 0 : 1 / ES(i, i); | ||||||
|
||||||
// Then we fill out A as the pseudo inverse | ||||||
for (int i = 0; i < size; i++) | ||||||
for (int j = 0; j < size; j++) | ||||||
{ | ||||||
value_t tmp = 0; | ||||||
for (int k = 0; k < size; k++) | ||||||
tmp += ES(k, k) * U(i, k) * U(j, k); | ||||||
A(i, j) = tmp; | ||||||
} | ||||||
} | ||||||
|
||||||
template <typename ExecutionSpace, typename InOutMatrices> | ||||||
void symmetricPseudoInverseSVD(ExecutionSpace const &space, | ||||||
InOutMatrices &matrices) | ||||||
{ | ||||||
// InOutMatrices is a single or list of matrices (i.e 2 or 3D view) | ||||||
static_assert(Kokkos::is_view_v<InOutMatrices>, "matrices must be a view"); | ||||||
static_assert(!std::is_const_v<typename InOutMatrices::value_type>, | ||||||
"matrices must be writable"); | ||||||
static_assert(InOutMatrices::rank == 3, | ||||||
"matrices must be a list of square matrices"); | ||||||
static_assert( | ||||||
KokkosExt::is_accessible_from<typename InOutMatrices::memory_space, | ||||||
ExecutionSpace>::value, | ||||||
"matrices must be accessible from the execution space"); | ||||||
|
||||||
ARBORX_ASSERT(matrices.extent(1) == matrices.extent(2)); // Must be square | ||||||
|
||||||
InOutMatrices ESs( | ||||||
Kokkos::view_alloc(space, Kokkos::WithoutInitializing, | ||||||
"ArborX::SymmetricPseudoInverseSVD::ESs"), | ||||||
matrices.extent(0), matrices.extent(1), matrices.extent(2)); | ||||||
InOutMatrices Us(Kokkos::view_alloc(space, Kokkos::WithoutInitializing, | ||||||
"ArborX::SymmetricPseudoInverseSVD::Us"), | ||||||
matrices.extent(0), matrices.extent(1), matrices.extent(2)); | ||||||
|
||||||
Kokkos::parallel_for( | ||||||
"ArborX::SymmetricPseudoInverseSVD::computations", | ||||||
Kokkos::RangePolicy<ExecutionSpace>(space, 0, matrices.extent(0)), | ||||||
KOKKOS_LAMBDA(int const i) { | ||||||
auto A = Kokkos::subview(matrices, i, Kokkos::ALL, Kokkos::ALL); | ||||||
auto ES = Kokkos::subview(ESs, i, Kokkos::ALL, Kokkos::ALL); | ||||||
auto U = Kokkos::subview(Us, i, Kokkos::ALL, Kokkos::ALL); | ||||||
symmetricPseudoInverseSVDSerialKernel(A, ES, U); | ||||||
}); | ||||||
} | ||||||
|
||||||
} // namespace ArborX::Interpolation::Details | ||||||
|
||||||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,8 +17,88 @@ | |
#include <Kokkos_Core.hpp> | ||
|
||
#include "BoostTest_CUDA_clang_workarounds.hpp" | ||
#include <boost/test/unit_test.hpp> | ||
#include <boost/test/utils/is_forward_iterable.hpp> | ||
|
||
template <typename U, typename V> | ||
using CommonValueType = | ||
typename boost::common_type<typename U::non_const_value_type, | ||
typename V::non_const_value_type>::type; | ||
|
||
template <typename U, typename V> | ||
void arborxViewCheck(U const &u, V const &v, std::string const &u_name, | ||
std::string const &v_name, CommonValueType<U, V> tol = 0) | ||
{ | ||
static constexpr int rank = U::rank; | ||
|
||
bool same_dim_size = true; | ||
for (int i = 0; i < rank; i++) | ||
{ | ||
int ui = u.extent(i), vi = v.extent(i); | ||
BOOST_TEST(ui == vi, "check " << u_name << " == " << v_name | ||
<< " failed at dimension " << i << " size [" | ||
<< ui << " != " << vi << "]"); | ||
same_dim_size = (ui == vi) && same_dim_size; | ||
} | ||
|
||
if (!same_dim_size) | ||
return; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that you don't need this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not really, as this will abort the current test and not check the other matrices of the same test. |
||
|
||
int index[8]{0, 0, 0, 0, 0, 0, 0, 0}; | ||
auto make_index = [&]() { | ||
std::stringstream sstr; | ||
sstr << "("; | ||
for (int i = 0; i < rank - 1; i++) | ||
sstr << index[i] << ", "; | ||
sstr << index[rank - 1] << ")"; | ||
return sstr.str(); | ||
}; | ||
|
||
while (index[0] != u.extent_int(0)) | ||
{ | ||
auto uval = u.access(index[0], index[1], index[2], index[3], index[4], | ||
index[5], index[6], index[7]); | ||
auto vval = v.access(index[0], index[1], index[2], index[3], index[4], | ||
index[5], index[6], index[7]); | ||
std::string index_str = make_index(); | ||
|
||
// Can "tol" be used as a tolerance value? If not, go back to regular | ||
// comparison | ||
if constexpr (boost::math::fpc::tolerance_based<decltype(tol)>::value) | ||
BOOST_TEST(uval == vval, u_name << " == " << v_name << " at " << index_str | ||
<< boost::test_tools::tolerance(tol)); | ||
else | ||
BOOST_TEST(uval == vval, "check " << u_name << " == " << v_name | ||
<< " failed at " << index_str << " [" | ||
<< uval << " != " << vval << "]"); | ||
|
||
index[rank - 1]++; | ||
for (int i = rank - 1; i > 0; i--) | ||
if (index[i] == u.extent_int(i)) | ||
{ | ||
index[i] = 0; | ||
index[i - 1]++; | ||
} | ||
} | ||
} | ||
|
||
#define ARBORX_MDVIEW_TEST(VIEWA, VIEWB, ...) \ | ||
[](decltype(VIEWA) const &u, decltype(VIEWB) const &v) { \ | ||
auto view_a = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, u); \ | ||
auto view_b = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace{}, v); \ | ||
\ | ||
static_assert(decltype(view_a)::rank == decltype(view_b)::rank, \ | ||
"'" #VIEWA "' and '" #VIEWB "' must have the same rank"); \ | ||
\ | ||
std::string view_a_name(#VIEWA); \ | ||
view_a_name += " (" + view_a.label() + ")"; \ | ||
\ | ||
std::string view_b_name(#VIEWB); \ | ||
view_b_name += " (" + view_b.label() + ")"; \ | ||
\ | ||
arborxViewCheck(view_a, view_b, view_a_name, view_b_name, ##__VA_ARGS__); \ | ||
}(VIEWA, VIEWB) | ||
|
||
// Enable element-wise comparison for views that are accessible from the host | ||
namespace boost | ||
{ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure why the check is using tolerances. I think the input matrix should always be symmetric without tolerances, if constructed correctly. So I would just do
If that's not the case, I would rename the function to include
Tol
in the name, and provide anepsilon
function argument.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will change to the tolerance-free version, as it forces users to have their matrices properly symmetric.