From f93343d75d84a8756690bb6c26526c40e8357f5c Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Wed, 6 Mar 2024 11:32:14 -0800 Subject: [PATCH 01/24] Add code path to apply rcm reordering to streams --- packages/ifpack2/src/Ifpack2_RILUK_decl.hpp | 2 + packages/ifpack2/src/Ifpack2_RILUK_def.hpp | 240 +++++++++++++----- .../sparse/src/KokkosSparse_Utils.hpp | 131 ++++++++-- .../Test_Sparse_extractCrsDiagonalBlocks.hpp | 45 +++- 4 files changed, 334 insertions(+), 84 deletions(-) diff --git a/packages/ifpack2/src/Ifpack2_RILUK_decl.hpp b/packages/ifpack2/src/Ifpack2_RILUK_decl.hpp index 0cfb82080984..850c121922df 100644 --- a/packages/ifpack2/src/Ifpack2_RILUK_decl.hpp +++ b/packages/ifpack2/src/Ifpack2_RILUK_decl.hpp @@ -647,6 +647,8 @@ class RILUK: bool isKokkosKernelsStream_; int num_streams_; std::vector exec_space_instances_; + bool hasStreamReordered_; + std::vector perm_v_; }; // NOTE (mfh 11 Feb 2015) This used to exist in order to deal with diff --git a/packages/ifpack2/src/Ifpack2_RILUK_def.hpp b/packages/ifpack2/src/Ifpack2_RILUK_def.hpp index 1e90f9bdf7f3..4a5ba4bc692e 100644 --- a/packages/ifpack2/src/Ifpack2_RILUK_def.hpp +++ b/packages/ifpack2/src/Ifpack2_RILUK_def.hpp @@ -91,7 +91,8 @@ RILUK::RILUK (const Teuchos::RCP& Matrix_in) Rthresh_ (Teuchos::ScalarTraits::one ()), isKokkosKernelsSpiluk_(false), isKokkosKernelsStream_(false), - num_streams_(0) + num_streams_(0), + hasStreamReordered_(false) { allocateSolvers(); } @@ -116,7 +117,8 @@ RILUK::RILUK (const Teuchos::RCP& Matrix_in) Rthresh_ (Teuchos::ScalarTraits::one ()), isKokkosKernelsSpiluk_(false), isKokkosKernelsStream_(false), - num_streams_(0) + num_streams_(0), + hasStreamReordered_(false) { allocateSolvers(); } @@ -412,7 +414,7 @@ setParameters (const Teuchos::ParameterList& params) getParamTryingTypes (nstreams, params, paramName, prefix); } - + // Forward to trisolvers. L_solver_->setParameters(params); U_solver_->setParameters(params); @@ -427,6 +429,9 @@ setParameters (const Teuchos::ParameterList& params) if (num_streams_ >= 1) { this->isKokkosKernelsStream_ = true; + // Will we do reordering in streams? + if (params.isParameter("fact: kspiluk reordering in streams")) + hasStreamReordered_ = params.get ("fact: kspiluk reordering in streams"); } else { this->isKokkosKernelsStream_ = false; @@ -524,7 +529,7 @@ void RILUK::initialize () "matrix until the matrix is fill complete. If your matrix is a " "Tpetra::CrsMatrix, please call fillComplete on it (with the domain and " "range Maps, if appropriate) before calling this method."); - + Teuchos::Time timer ("RILUK::initialize"); double startTime = timer.wallTime(); { // Start timing @@ -592,8 +597,10 @@ void RILUK::initialize () } else { auto lclMtx = A_local_crs->getLocalMatrixDevice(); - KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks); - + if (!hasStreamReordered_) + KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks); + else + perm_v_ = KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks, true); for(int i = 0; i < num_streams_; i++) { Teuchos::RCP A_local_diagblks_RowMap = rcp (new crs_map_type(A_local_diagblks[i].numRows(), A_local_diagblks[i].numRows(), @@ -654,6 +661,7 @@ void RILUK::initialize () #if !defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) || !defined(KOKKOS_ENABLE_CUDA) || (CUDA_VERSION < 11030) L_solver_->compute ();//NOTE: It makes sense to do compute here because only the nonzero pattern is involved in trisolve compute #endif + if (!isKokkosKernelsStream_) { U_solver_->setMatrix (U_); } @@ -1050,7 +1058,11 @@ void RILUK::compute () A_local_values_ = lclMtx.values; } else { - KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks); + if (!hasStreamReordered_) + KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks); + else + perm_v_ = KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(lclMtx, A_local_diagblks, true); + A_local_diagblks_rowmap_v_ = std::vector(num_streams_); A_local_diagblks_entries_v_ = std::vector(num_streams_); A_local_diagblks_values_v_ = std::vector(num_streams_); @@ -1198,77 +1210,187 @@ apply (const Tpetra::MultiVectorgetLocalViewDevice(Tpetra::Access::ReadOnly); + auto ReorderedX_lcl = ReorderedX_j->getLocalViewDevice(Tpetra::Access::ReadWrite); + local_ordinal_type stream_begin = 0; + local_ordinal_type stream_end; + for(int i = 0; i < num_streams_; i++) { + auto perm_i = perm_v_[i]; + stream_end = stream_begin + perm_i.extent(0); + auto X_lcl_sub = Kokkos::subview (X_lcl, Kokkos::make_pair(stream_begin, stream_end), 0); + auto ReorderedX_lcl_sub = Kokkos::subview (ReorderedX_lcl, Kokkos::make_pair(stream_begin, stream_end), 0); + Kokkos::parallel_for( Kokkos::RangePolicy(0, static_cast(perm_i.extent(0))), KOKKOS_LAMBDA ( const int& ii ) { + ReorderedX_lcl_sub(perm_i(ii)) = X_lcl_sub(ii); + }); + stream_begin = stream_end; + } + } + + if (mode == Teuchos::NO_TRANS) { // Solve L (D (U Y)) = X for Y. #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030) - //NOTE (Nov-15-2022): - //This is a workaround for Cuda >= 11.3 (using cusparseSpSV) - //since cusparseSpSV_solve() does not support in-place computation - MV Y_tmp (Y.getMap (), Y.getNumVectors ()); + //NOTE (Nov-15-2022): + //This is a workaround for Cuda >= 11.3 (using cusparseSpSV) + //since cusparseSpSV_solve() does not support in-place computation + MV Y_tmp (ReorderedY.getMap (), ReorderedY.getNumVectors ()); + + // Start by solving L Y_tmp = X for Y_tmp. + L_solver_->apply (ReorderedX, Y_tmp, mode); + + if (!this->isKokkosKernelsSpiluk_) { + // Solve D Y = Y. The operation lets us do this in place in Y, so we can + // write "solve D Y = Y for Y." + Y_tmp.elementWiseMultiply (one, *D_, Y_tmp, zero); + } - // Start by solving L Y_tmp = X for Y_tmp. - L_solver_->apply (X, Y_tmp, mode); + U_solver_->apply (Y_tmp, ReorderedY, mode); // Solve U Y = Y_tmp. +#else + // Start by solving L Y = X for Y. + L_solver_->apply (ReorderedX, ReorderedY, mode); - if (!this->isKokkosKernelsSpiluk_) { - // Solve D Y = Y. The operation lets us do this in place in Y, so we can - // write "solve D Y = Y for Y." - Y_tmp.elementWiseMultiply (one, *D_, Y_tmp, zero); - } + if (!this->isKokkosKernelsSpiluk_) { + // Solve D Y = Y. The operation lets us do this in place in Y, so we can + // write "solve D Y = Y for Y." + Y.elementWiseMultiply (one, *D_, Y, zero); + } - U_solver_->apply (Y_tmp, Y, mode); // Solve U Y = Y_tmp. + U_solver_->apply (ReorderedY, ReorderedY, mode); // Solve U Y = Y. +#endif + } + else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T). +#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030) + //NOTE (Nov-15-2022): + //This is a workaround for Cuda >= 11.3 (using cusparseSpSV) + //since cusparseSpSV_solve() does not support in-place computation + MV Y_tmp (ReorderedY.getMap (), ReorderedY.getNumVectors ()); + + // Start by solving U^P Y_tmp = X for Y_tmp. + U_solver_->apply (ReorderedX, Y_tmp, mode); + + if (!this->isKokkosKernelsSpiluk_) { + // Solve D^P Y = Y. + // + // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we + // need to do an elementwise multiply with the conjugate of + // D_, not just with D_ itself. + Y_tmp.elementWiseMultiply (one, *D_, Y_tmp, zero); + } + + L_solver_->apply (Y_tmp, ReorderedY, mode); // Solve L^P Y = Y_tmp. #else - // Start by solving L Y = X for Y. - L_solver_->apply (X, Y, mode); - - if (!this->isKokkosKernelsSpiluk_) { - // Solve D Y = Y. The operation lets us do this in place in Y, so we can - // write "solve D Y = Y for Y." - Y.elementWiseMultiply (one, *D_, Y, zero); + // Start by solving U^P Y = X for Y. + U_solver_->apply (ReorderedX, ReorderedY, mode); + + if (!this->isKokkosKernelsSpiluk_) { + // Solve D^P Y = Y. + // + // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we + // need to do an elementwise multiply with the conjugate of + // D_, not just with D_ itself. + Y.elementWiseMultiply (one, *D_, Y, zero); + } + + L_solver_->apply (ReorderedY, ReorderedY, mode); // Solve L^P Y = Y. +#endif } - U_solver_->apply (Y, Y, mode); // Solve U Y = Y. -#endif + for (size_t j = 0; j < Y.getNumVectors(); j++) { + auto Y_j = Y.getVectorNonConst(j); + auto ReorderedY_j = ReorderedY.getVector(j); + auto Y_lcl = Y_j->getLocalViewDevice(Tpetra::Access::ReadWrite); + auto ReorderedY_lcl = ReorderedY_j->getLocalViewDevice(Tpetra::Access::ReadOnly); + local_ordinal_type stream_begin = 0; + local_ordinal_type stream_end; + for(int i = 0; i < num_streams_; i++) { + auto perm_i = perm_v_[i]; + stream_end = stream_begin + perm_i.extent(0); + auto Y_lcl_sub = Kokkos::subview (Y_lcl, Kokkos::make_pair(stream_begin, stream_end), 0); + auto ReorderedY_lcl_sub = Kokkos::subview (ReorderedY_lcl, Kokkos::make_pair(stream_begin, stream_end), 0); + Kokkos::parallel_for( Kokkos::RangePolicy(0, static_cast(perm_i.extent(0))), KOKKOS_LAMBDA ( const int& ii ) { + Y_lcl_sub(ii) = ReorderedY_lcl_sub(perm_i(ii)); + }); + stream_begin = stream_end; + } + } } - else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T). + else { + if (mode == Teuchos::NO_TRANS) { // Solve L (D (U Y)) = X for Y. #if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030) - //NOTE (Nov-15-2022): - //This is a workaround for Cuda >= 11.3 (using cusparseSpSV) - //since cusparseSpSV_solve() does not support in-place computation - MV Y_tmp (Y.getMap (), Y.getNumVectors ()); + //NOTE (Nov-15-2022): + //This is a workaround for Cuda >= 11.3 (using cusparseSpSV) + //since cusparseSpSV_solve() does not support in-place computation + MV Y_tmp (Y.getMap (), Y.getNumVectors ()); + + // Start by solving L Y_tmp = X for Y_tmp. + L_solver_->apply (X, Y_tmp, mode); + + if (!this->isKokkosKernelsSpiluk_) { + // Solve D Y = Y. The operation lets us do this in place in Y, so we can + // write "solve D Y = Y for Y." + Y_tmp.elementWiseMultiply (one, *D_, Y_tmp, zero); + } - // Start by solving U^P Y_tmp = X for Y_tmp. - U_solver_->apply (X, Y_tmp, mode); + U_solver_->apply (Y_tmp, Y, mode); // Solve U Y = Y_tmp. +#else + // Start by solving L Y = X for Y. + L_solver_->apply (X, Y, mode); - if (!this->isKokkosKernelsSpiluk_) { - // Solve D^P Y = Y. - // - // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we - // need to do an elementwise multiply with the conjugate of - // D_, not just with D_ itself. - Y_tmp.elementWiseMultiply (one, *D_, Y_tmp, zero); - } + if (!this->isKokkosKernelsSpiluk_) { + // Solve D Y = Y. The operation lets us do this in place in Y, so we can + // write "solve D Y = Y for Y." + Y.elementWiseMultiply (one, *D_, Y, zero); + } - L_solver_->apply (Y_tmp, Y, mode); // Solve L^P Y = Y_tmp. + U_solver_->apply (Y, Y, mode); // Solve U Y = Y. +#endif + } + else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T). +#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030) + //NOTE (Nov-15-2022): + //This is a workaround for Cuda >= 11.3 (using cusparseSpSV) + //since cusparseSpSV_solve() does not support in-place computation + MV Y_tmp (Y.getMap (), Y.getNumVectors ()); + + // Start by solving U^P Y_tmp = X for Y_tmp. + U_solver_->apply (X, Y_tmp, mode); + + if (!this->isKokkosKernelsSpiluk_) { + // Solve D^P Y = Y. + // + // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we + // need to do an elementwise multiply with the conjugate of + // D_, not just with D_ itself. + Y_tmp.elementWiseMultiply (one, *D_, Y_tmp, zero); + } + + L_solver_->apply (Y_tmp, Y, mode); // Solve L^P Y = Y_tmp. #else - // Start by solving U^P Y = X for Y. - U_solver_->apply (X, Y, mode); - - if (!this->isKokkosKernelsSpiluk_) { - // Solve D^P Y = Y. - // - // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we - // need to do an elementwise multiply with the conjugate of - // D_, not just with D_ itself. - Y.elementWiseMultiply (one, *D_, Y, zero); - } - - L_solver_->apply (Y, Y, mode); // Solve L^P Y = Y. + // Start by solving U^P Y = X for Y. + U_solver_->apply (X, Y, mode); + + if (!this->isKokkosKernelsSpiluk_) { + // Solve D^P Y = Y. + // + // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we + // need to do an elementwise multiply with the conjugate of + // D_, not just with D_ itself. + Y.elementWiseMultiply (one, *D_, Y, zero); + } + + L_solver_->apply (Y, Y, mode); // Solve L^P Y = Y. #endif + } } } else { // alpha != 1 or beta != 0 diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp index f3fbec18369f..2b89c1a2f74e 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp @@ -25,6 +25,7 @@ #include "KokkosSparse_CrsMatrix.hpp" #include "KokkosSparse_BsrMatrix.hpp" #include "Kokkos_Bitset.hpp" +#include "KokkosGraph_RCM.hpp" #ifdef KOKKOSKERNELS_HAVE_PARALLEL_GNUSORT #include @@ -2415,15 +2416,23 @@ void kk_extract_subblock_crsmatrix_sequential( * @tparam crsMat_t The type of the CRS matrix. * @param A [in] The square CrsMatrix. It is expected that column indices are * in ascending order + * @param UseRCMReordering [in] Boolean indicating whether applying (true) RCM + * reordering to diagonal blocks or not (false) (default: false) * @param DiagBlk_v [out] The vector of the extracted the CRS diagonal blocks * (1 <= the number of diagonal blocks <= A_nrows) + * @return a vector of lists of vertices in RCM order (a list per a diagonal + * block) if UseRCMReordering is true, or an empty vector if UseRCMReordering is + * false * * Usage Example: - * kk_extract_diagonal_blocks_crsmatrix_sequential(A_in, diagBlk_in_b); + * perm = kk_extract_diagonal_blocks_crsmatrix_sequential(A_in, diagBlk_out, + * UseRCMReordering); */ template -void kk_extract_diagonal_blocks_crsmatrix_sequential( - const crsMat_t &A, std::vector &DiagBlk_v) { +std::vector +kk_extract_diagonal_blocks_crsmatrix_sequential( + const crsMat_t &A, std::vector &DiagBlk_v, + bool UseRCMReordering = false) { using row_map_type = typename crsMat_t::row_map_type; using entries_type = typename crsMat_t::index_type; using values_type = typename crsMat_t::values_type; @@ -2437,6 +2446,7 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( using ordinal_type = typename crsMat_t::non_const_ordinal_type; using size_type = typename crsMat_t::non_const_size_type; + using value_type = typename crsMat_t::non_const_value_type; using offset_view1d_type = Kokkos::View; @@ -2463,8 +2473,12 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( throw std::runtime_error(os.str()); } + std::vector perm_v; + std::vector perm_h_v; + if (n_blocks == 1) { // One block case: simply shallow copy A to DiagBlk_v[0] + // Note: always not applying RCM reordering, for now DiagBlk_v[0] = crsMat_t(A); } else { // n_blocks > 1 @@ -2487,12 +2501,10 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( ? (A_nrows / n_blocks) : (A_nrows / n_blocks + 1); - std::vector row_map_v(n_blocks); - std::vector entries_v(n_blocks); - std::vector values_v(n_blocks); - std::vector row_map_h_v(n_blocks); - std::vector entries_h_v(n_blocks); - std::vector values_h_v(n_blocks); + if (UseRCMReordering) { + perm_v.resize(n_blocks); + perm_h_v.resize(n_blocks); + } ordinal_type blk_row_start = 0; // first row index of i-th diagonal block ordinal_type blk_col_start = 0; // first col index of i-th diagonal block @@ -2509,37 +2521,110 @@ void kk_extract_diagonal_blocks_crsmatrix_sequential( // First round: count i-th non-zeros or size of entries_v[i] and find // the first and last column indices at each row size_type blk_nnz = 0; - offset_view1d_type first("first", blk_nrows); // first position per row - offset_view1d_type last("last", blk_nrows); // last position per row + offset_view1d_type first( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "first"), + blk_nrows); // first position per row + offset_view1d_type last( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "last"), + blk_nrows); // last position per row kk_find_nnz_first_last_indices_subblock_crsmatrix_sequential( A_row_map_h, A_entries_h, blk_row_start, blk_col_start, blk_nrows, blk_ncols, blk_nnz, first, last); // Second round: extract - row_map_v[i] = out_row_map_type("row_map_v", blk_nrows + 1); - entries_v[i] = out_entries_type("entries_v", blk_nnz); - values_v[i] = out_values_type("values_v", blk_nnz); - row_map_h_v[i] = - out_row_map_hostmirror_type("row_map_h_v", blk_nrows + 1); - entries_h_v[i] = out_entries_hostmirror_type("entries_h_v", blk_nnz); - values_h_v[i] = out_values_hostmirror_type("values_h_v", blk_nnz); + out_row_map_type row_map( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "row_map"), + blk_nrows + 1); + out_entries_type entries( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "entries"), + blk_nnz); + out_values_type values( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "values"), blk_nnz); + out_row_map_hostmirror_type row_map_h( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "row_map_h"), + blk_nrows + 1); + out_entries_hostmirror_type entries_h( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "entries_h"), + blk_nnz); + out_values_hostmirror_type values_h( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "values_h"), + blk_nnz); kk_extract_subblock_crsmatrix_sequential( A_entries_h, A_values_h, blk_col_start, blk_nrows, blk_nnz, first, - last, row_map_h_v[i], entries_h_v[i], values_h_v[i]); + last, row_map_h, entries_h, values_h); + + if (!UseRCMReordering) { + Kokkos::deep_copy(row_map, row_map_h); + Kokkos::deep_copy(entries, entries_h); + Kokkos::deep_copy(values, values_h); + } else { + perm_h_v[i] = KokkosGraph::Experimental::graph_rcm< + Kokkos::DefaultHostExecutionSpace>(row_map_h, entries_h); + perm_v[i] = out_entries_type( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "perm_v"), + perm_h_v[i].extent(0)); + + out_row_map_hostmirror_type row_map_perm_h( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "row_map_perm_h"), + blk_nrows + 1); + out_entries_hostmirror_type entries_perm_h( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "entries_perm_h"), + blk_nnz); + out_values_hostmirror_type values_perm_h( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "values_perm_h"), + blk_nnz); + + out_entries_hostmirror_type reverseperm_h( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "reverseperm_h"), + blk_nrows); + for (ordinal_type ii = 0; ii < blk_nrows; ii++) + reverseperm_h(perm_h_v[i](ii)) = ii; + + std::map colIdx_Value_rcm; + + // Loop through each row of the reordered matrix + size_type cnt = 0; + for (ordinal_type ii = 0; ii < blk_nrows; ii++) { + colIdx_Value_rcm.clear(); + // ii: reordered index + ordinal_type origRow = reverseperm_h( + ii); // get the original row idx of the reordered row idx, ii + for (size_type j = row_map_h(origRow); j < row_map_h(origRow + 1); + j++) { + ordinal_type origEi = entries_h(j); + value_type origV = values_h(j); + ordinal_type Ei = + perm_h_v[i](origEi); // get the reordered col idx of the + // original col idx, origEi + colIdx_Value_rcm[Ei] = origV; + } + row_map_perm_h(ii) = cnt; + for (typename std::map::iterator it = + colIdx_Value_rcm.begin(); + it != colIdx_Value_rcm.end(); ++it) { + entries_perm_h(cnt) = it->first; + values_perm_h(cnt) = it->second; + cnt++; + } + } + row_map_perm_h(blk_nrows) = cnt; - Kokkos::deep_copy(row_map_v[i], row_map_h_v[i]); - Kokkos::deep_copy(entries_v[i], entries_h_v[i]); - Kokkos::deep_copy(values_v[i], values_h_v[i]); + Kokkos::deep_copy(row_map, row_map_perm_h); + Kokkos::deep_copy(entries, entries_perm_h); + Kokkos::deep_copy(values, values_perm_h); + Kokkos::deep_copy(perm_v[i], perm_h_v[i]); + } DiagBlk_v[i] = crsMat_t("CrsMatrix", blk_nrows, blk_ncols, blk_nnz, - values_v[i], row_map_v[i], entries_v[i]); + values, row_map, entries); blk_row_start += blk_nrows; } // for (ordinal_type i = 0; i < n_blocks; i++) } // A_nrows >= 1 } // n_blocks > 1 + return perm_v; } } // namespace Impl diff --git a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp index 327780dec32d..28674ad353fe 100644 --- a/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp +++ b/packages/kokkos-kernels/sparse/unit_test/Test_Sparse_extractCrsDiagonalBlocks.hpp @@ -15,6 +15,8 @@ //@HEADER #include "KokkosSparse_Utils.hpp" +#include "KokkosSparse_spmv.hpp" +#include "KokkosBlas1_nrm2.hpp" #include "KokkosKernels_TestUtils.hpp" namespace Test { @@ -31,6 +33,7 @@ void run_test_extract_diagonal_blocks(int nrows, int nblocks) { crsMat_t A; std::vector DiagBlks(nblocks); + std::vector DiagBlks_rcm(nblocks); if (nrows != 0) { // Generate test matrix @@ -84,6 +87,10 @@ void run_test_extract_diagonal_blocks(int nrows, int nblocks) { KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential(A, DiagBlks); + auto perm = + KokkosSparse::Impl::kk_extract_diagonal_blocks_crsmatrix_sequential( + A, DiagBlks_rcm, true); + // Checking lno_t numRows = 0; lno_t numCols = 0; @@ -125,6 +132,40 @@ void run_test_extract_diagonal_blocks(int nrows, int nblocks) { col_start += DiagBlks[i].numCols(); } EXPECT_TRUE(flag); + + // Checking RCM + if (!perm.empty()) { + scalar_t one = scalar_t(1.0); + scalar_t zero = scalar_t(0.0); + scalar_t mone = scalar_t(-1.0); + for (int i = 0; i < nblocks; i++) { + ValuesType In("In", DiagBlks[i].numRows()); + ValuesType Out("Out", DiagBlks[i].numRows()); + + ValuesType_hm h_Out = Kokkos::create_mirror_view(Out); + ValuesType_hm h_Out_tmp = Kokkos::create_mirror(Out); + + Kokkos::deep_copy(In, one); + + auto h_perm = + Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), perm[i]); + + KokkosSparse::spmv("N", one, DiagBlks_rcm[i], In, zero, Out); + + Kokkos::deep_copy(h_Out_tmp, Out); + for (lno_t ii = 0; ii < static_cast(DiagBlks[i].numRows()); + ii++) { + lno_t rcm_ii = h_perm(ii); + h_Out(ii) = h_Out_tmp(rcm_ii); + } + Kokkos::deep_copy(Out, h_Out); + + KokkosSparse::spmv("N", one, DiagBlks[i], In, mone, Out); + + double nrm_val = KokkosBlas::nrm2(Out); + EXPECT_LE(nrm_val, 1e-9); + } + } } } } // namespace Test @@ -136,9 +177,9 @@ void test_extract_diagonal_blocks() { Test::run_test_extract_diagonal_blocks( 0, s); Test::run_test_extract_diagonal_blocks( - 12, s); + 153, s); Test::run_test_extract_diagonal_blocks( - 123, s); + 1553, s); } } From 18bc9460a213a7e30b0d43e016b62d95a78c0db8 Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Wed, 6 Mar 2024 12:35:19 -0800 Subject: [PATCH 02/24] Add option and short description to the Ifpack2 userguide for RCM reordering in streams --- packages/ifpack2/doc/UsersGuide/options.tex | 4 + packages/ifpack2/test/belos/build_problem.hpp | 4 +- .../ifpack2/test/belos/build_problem.hpp_ORIG | 240 ++++++++++++++++++ 3 files changed, 247 insertions(+), 1 deletion(-) create mode 100644 packages/ifpack2/test/belos/build_problem.hpp_ORIG diff --git a/packages/ifpack2/doc/UsersGuide/options.tex b/packages/ifpack2/doc/UsersGuide/options.tex index 9800fd4ede33..fc196831ea50 100644 --- a/packages/ifpack2/doc/UsersGuide/options.tex +++ b/packages/ifpack2/doc/UsersGuide/options.tex @@ -502,6 +502,10 @@ \subsection{ILU($k$)}\label{s:ILU} these streams can run concurrently, the total time can be faster. When this option is not set (i.e. not using stream), the entire sub-domain is used instead.} +\ccc{fact: kspiluk reordering in streams} + {bool} + {\false} + {Whether RCM reordering is applied to diagonal blocks in streams.} % All overlap-related code was removed by M. Hoemmen in % % commit 162f64572fbf93e2cac73e3034d76a3db918a494 diff --git a/packages/ifpack2/test/belos/build_problem.hpp b/packages/ifpack2/test/belos/build_problem.hpp index f4e699d0894a..c3a6a1921095 100644 --- a/packages/ifpack2/test/belos/build_problem.hpp +++ b/packages/ifpack2/test/belos/build_problem.hpp @@ -189,7 +189,8 @@ build_problem (Teuchos::ParameterList& test_params, if (b == Teuchos::null) { bool rhs_unit = false; - int rhs_option = 2; + //int rhs_option = 2; // ORIG + int rhs_option = 3; b = Teuchos::rcp (new TMV (rowmap, 1)); if (rhs_option == 0) { // random B @@ -202,6 +203,7 @@ build_problem (Teuchos::ParameterList& test_params, // b = A * random x->randomize (); } else { + if (comm->getRank() == 0) std::cout << "x is set to ones and b = A * ones!!!" << std::endl; // b = A * ones x->putScalar (STS::one ()); } diff --git a/packages/ifpack2/test/belos/build_problem.hpp_ORIG b/packages/ifpack2/test/belos/build_problem.hpp_ORIG new file mode 100644 index 000000000000..f4e699d0894a --- /dev/null +++ b/packages/ifpack2/test/belos/build_problem.hpp_ORIG @@ -0,0 +1,240 @@ +/* +//@HEADER +// *********************************************************************** +// +// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package +// Copyright (2009) Sandia Corporation +// +// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive +// license for use of this work by or on behalf of the U.S. Government. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// *********************************************************************** +//@HEADER +*/ + +#ifndef _build_problem_hpp_ +#define _build_problem_hpp_ + +#include +#include + +#include "Teuchos_ParameterList.hpp" +#include "Teuchos_RefCountPtr.hpp" +#include "Teuchos_Time.hpp" +#include "Teuchos_Comm.hpp" + +#include "Ifpack2_BorderedOperator.hpp" +#include "Ifpack2_Preconditioner.hpp" + +#include "BelosLinearProblem.hpp" +#include "BelosTpetraAdapter.hpp" + +#include "read_matrix.hpp" +#include "build_precond.hpp" + +template +Teuchos::RCP, + Tpetra::Operator > > +build_problem (Teuchos::ParameterList& test_params, + const Teuchos::RCP >& comm) +{ + using Teuchos::ArrayView; + using Teuchos::ParameterList; + using Teuchos::parameterList; + using Teuchos::RCP; + using Teuchos::rcp; + typedef LocalOrdinal LO; + typedef GlobalOrdinal GO; + typedef Tpetra::CrsMatrix crs_matrix_type; + typedef Tpetra::Map map_type; + typedef Tpetra::MultiVector TMV; + typedef Tpetra::MatrixMarket::Reader reader_type; + typedef Tpetra::Operator TOP; + typedef Belos::OperatorTraits BOPT; + typedef Belos::MultiVecTraits BMVT; + typedef Belos::LinearProblem BLinProb; + typedef Ifpack2::BorderedOperator IBOP; + typedef Teuchos::ScalarTraits STS; + + RCP A; + RCP b = Teuchos::null; + RCP nullVec = Teuchos::null; + + std::string mm_file("not specified"); + std::string map_mm_file("not specified"); + std::string rhs_mm_file("not specified"); + std::string nullMvec_mm_file("not specified"); + Ifpack2::getParameter(test_params, "mm_file", mm_file); + Ifpack2::getParameter(test_params, "map_mm_file", map_mm_file); + Ifpack2::getParameter(test_params, "rhs_mm_file", rhs_mm_file); + std::string hb_file("not specified"); + Ifpack2::getParameter(test_params, "hb_file", hb_file); + bool useMatrixWithConstGraph = false; + Ifpack2::getParameter(test_params, "Use matrix with const graph", useMatrixWithConstGraph); + + if (mm_file != "not specified") { + if (comm->getRank() == 0) { + std::cout << "Matrix Market file for sparse matrix A: " << mm_file << std::endl; + } + RCP constructorParams = parameterList ("CrsMatrix"); + RCP fillCompleteParams = parameterList ("fillComplete"); + if (useMatrixWithConstGraph) { + // We need to keep the local graph so that we can create a new + // matrix with a const graph, using the graph of the original + // matrix read in here. + // fillCompleteParams->set ("Optimize Storage", false); + fillCompleteParams->set ("Preserve Local Graph", true); + } + RCP rowMap; + RCP colMap = Teuchos::null; + if (map_mm_file != "not specified") { + if (comm->getRank() == 0) { + std::cout << "Matrix Market file for row Map of the sparse matrix A: " << map_mm_file << std::endl; + } + rowMap = reader_type::readMapFile(map_mm_file, comm); + A = reader_type::readSparseFile (mm_file, rowMap, colMap, rowMap, rowMap); + } + else { + A = reader_type::readSparseFile (mm_file, comm, constructorParams, + fillCompleteParams); + } + + RCP domainMap = A->getDomainMap (); + RCP rangeMap = A->getRangeMap (); + + if (rhs_mm_file != "not specified") { + if (comm->getRank() == 0) { + std::cout << "Matrix Market file for right-hand-side(s) B: " << rhs_mm_file << std::endl; + } + b = reader_type::readDenseFile (rhs_mm_file, comm, rangeMap); + } + + if (nullMvec_mm_file != "not specified") { + if (comm->getRank() == 0) { + std::cout << "Matrix Market file for null multivector: " << nullMvec_mm_file << std::endl; + } + // mfh 31 Jan 2013: I'm not sure what a "null multivector" means + // in this context, so I'm only guessing that it's a domain Map + // multivector. + nullVec = reader_type::readDenseFile (nullMvec_mm_file, comm, domainMap); + } + + } + else if (hb_file != "not specified") { + if (comm->getRank() == 0) { + std::cout << "Harwell-Boeing file: " << hb_file << std::endl; + } + A = read_matrix_hb (hb_file, comm); + } + else { + throw std::runtime_error("No matrix file specified."); + } + + if (useMatrixWithConstGraph) { + // Some Ifpack2 preconditioners that extract diagonal entries have + // a special path for doing so more efficiently when the matrix + // has a const graph (isStaticGraph()). In order to test this, we + // specifically create a matrix with a const graph, by extracting + // the original matrix's graph and copying all the values into the + // new matrix. + RCP A_constGraph (new crs_matrix_type (A->getCrsGraph ())); + // Copy the values row by row from A into A_constGraph. + using lids_type = typename crs_matrix_type::local_inds_host_view_type; + using vals_type = typename crs_matrix_type::values_host_view_type; + lids_type ind; + vals_type val; + const LO numLocalRows = static_cast (A->getLocalNumRows ()); + for (LO localRow = 0; localRow < numLocalRows; ++localRow) { + A->getLocalRowView (localRow, ind, val); + A_constGraph->replaceLocalValues (localRow, ind, val); + } + A_constGraph->fillComplete (A->getDomainMap (), A->getRangeMap ()); + A = A_constGraph; // Replace A with A_constGraph. + } + + Teuchos::RCP rowmap = A->getRowMap(); + + Teuchos::RCP x = Teuchos::rcp(new TMV(rowmap, 1)); + + if (b == Teuchos::null) { + bool rhs_unit = false; + int rhs_option = 2; + b = Teuchos::rcp (new TMV (rowmap, 1)); + if (rhs_option == 0) { + // random B + b->randomize (); + } else if (rhs_option == 1) { + // b = ones + b->putScalar (STS::one ()); + } else { + if (rhs_option == 2) { + // b = A * random + x->randomize (); + } else { + // b = A * ones + x->putScalar (STS::one ()); + } + BOPT::Apply (*A, *x, *b); + } + if (rhs_unit) { + // scale B to unit-norm + Teuchos::Array normsB(b->getNumVectors()); + b->norm2(normsB); + for (size_t j = 0; j < b->getNumVectors(); j++) { + b->getVectorNonConst(j)->scale(STS::one() / normsB[j]); + } + } + // X = zero + BMVT::MvInit (*x, STS::zero ()); + } + else { + x->putScalar (STS::zero ()); + } + + Teuchos::RCP< BLinProb > problem; + Teuchos::RCP borderedA; + if (nullVec == Teuchos::null) { + problem = Teuchos::rcp (new BLinProb (A, x, b)); + } + else { + borderedA = Teuchos::rcp (new IBOP (A)); + problem = Teuchos::rcp (new BLinProb (borderedA, x, b)); + } + + return problem; +} + + +#endif + From f9bf9abd5bd2bfa14189523754b8d1af927cda56 Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Wed, 6 Mar 2024 12:39:52 -0800 Subject: [PATCH 03/24] Remove redundant file --- .../ifpack2/test/belos/build_problem.hpp_ORIG | 240 ------------------ 1 file changed, 240 deletions(-) delete mode 100644 packages/ifpack2/test/belos/build_problem.hpp_ORIG diff --git a/packages/ifpack2/test/belos/build_problem.hpp_ORIG b/packages/ifpack2/test/belos/build_problem.hpp_ORIG deleted file mode 100644 index f4e699d0894a..000000000000 --- a/packages/ifpack2/test/belos/build_problem.hpp_ORIG +++ /dev/null @@ -1,240 +0,0 @@ -/* -//@HEADER -// *********************************************************************** -// -// Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package -// Copyright (2009) Sandia Corporation -// -// Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive -// license for use of this work by or on behalf of the U.S. Government. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact Michael A. Heroux (maherou@sandia.gov) -// -// *********************************************************************** -//@HEADER -*/ - -#ifndef _build_problem_hpp_ -#define _build_problem_hpp_ - -#include -#include - -#include "Teuchos_ParameterList.hpp" -#include "Teuchos_RefCountPtr.hpp" -#include "Teuchos_Time.hpp" -#include "Teuchos_Comm.hpp" - -#include "Ifpack2_BorderedOperator.hpp" -#include "Ifpack2_Preconditioner.hpp" - -#include "BelosLinearProblem.hpp" -#include "BelosTpetraAdapter.hpp" - -#include "read_matrix.hpp" -#include "build_precond.hpp" - -template -Teuchos::RCP, - Tpetra::Operator > > -build_problem (Teuchos::ParameterList& test_params, - const Teuchos::RCP >& comm) -{ - using Teuchos::ArrayView; - using Teuchos::ParameterList; - using Teuchos::parameterList; - using Teuchos::RCP; - using Teuchos::rcp; - typedef LocalOrdinal LO; - typedef GlobalOrdinal GO; - typedef Tpetra::CrsMatrix crs_matrix_type; - typedef Tpetra::Map map_type; - typedef Tpetra::MultiVector TMV; - typedef Tpetra::MatrixMarket::Reader reader_type; - typedef Tpetra::Operator TOP; - typedef Belos::OperatorTraits BOPT; - typedef Belos::MultiVecTraits BMVT; - typedef Belos::LinearProblem BLinProb; - typedef Ifpack2::BorderedOperator IBOP; - typedef Teuchos::ScalarTraits STS; - - RCP A; - RCP b = Teuchos::null; - RCP nullVec = Teuchos::null; - - std::string mm_file("not specified"); - std::string map_mm_file("not specified"); - std::string rhs_mm_file("not specified"); - std::string nullMvec_mm_file("not specified"); - Ifpack2::getParameter(test_params, "mm_file", mm_file); - Ifpack2::getParameter(test_params, "map_mm_file", map_mm_file); - Ifpack2::getParameter(test_params, "rhs_mm_file", rhs_mm_file); - std::string hb_file("not specified"); - Ifpack2::getParameter(test_params, "hb_file", hb_file); - bool useMatrixWithConstGraph = false; - Ifpack2::getParameter(test_params, "Use matrix with const graph", useMatrixWithConstGraph); - - if (mm_file != "not specified") { - if (comm->getRank() == 0) { - std::cout << "Matrix Market file for sparse matrix A: " << mm_file << std::endl; - } - RCP constructorParams = parameterList ("CrsMatrix"); - RCP fillCompleteParams = parameterList ("fillComplete"); - if (useMatrixWithConstGraph) { - // We need to keep the local graph so that we can create a new - // matrix with a const graph, using the graph of the original - // matrix read in here. - // fillCompleteParams->set ("Optimize Storage", false); - fillCompleteParams->set ("Preserve Local Graph", true); - } - RCP rowMap; - RCP colMap = Teuchos::null; - if (map_mm_file != "not specified") { - if (comm->getRank() == 0) { - std::cout << "Matrix Market file for row Map of the sparse matrix A: " << map_mm_file << std::endl; - } - rowMap = reader_type::readMapFile(map_mm_file, comm); - A = reader_type::readSparseFile (mm_file, rowMap, colMap, rowMap, rowMap); - } - else { - A = reader_type::readSparseFile (mm_file, comm, constructorParams, - fillCompleteParams); - } - - RCP domainMap = A->getDomainMap (); - RCP rangeMap = A->getRangeMap (); - - if (rhs_mm_file != "not specified") { - if (comm->getRank() == 0) { - std::cout << "Matrix Market file for right-hand-side(s) B: " << rhs_mm_file << std::endl; - } - b = reader_type::readDenseFile (rhs_mm_file, comm, rangeMap); - } - - if (nullMvec_mm_file != "not specified") { - if (comm->getRank() == 0) { - std::cout << "Matrix Market file for null multivector: " << nullMvec_mm_file << std::endl; - } - // mfh 31 Jan 2013: I'm not sure what a "null multivector" means - // in this context, so I'm only guessing that it's a domain Map - // multivector. - nullVec = reader_type::readDenseFile (nullMvec_mm_file, comm, domainMap); - } - - } - else if (hb_file != "not specified") { - if (comm->getRank() == 0) { - std::cout << "Harwell-Boeing file: " << hb_file << std::endl; - } - A = read_matrix_hb (hb_file, comm); - } - else { - throw std::runtime_error("No matrix file specified."); - } - - if (useMatrixWithConstGraph) { - // Some Ifpack2 preconditioners that extract diagonal entries have - // a special path for doing so more efficiently when the matrix - // has a const graph (isStaticGraph()). In order to test this, we - // specifically create a matrix with a const graph, by extracting - // the original matrix's graph and copying all the values into the - // new matrix. - RCP A_constGraph (new crs_matrix_type (A->getCrsGraph ())); - // Copy the values row by row from A into A_constGraph. - using lids_type = typename crs_matrix_type::local_inds_host_view_type; - using vals_type = typename crs_matrix_type::values_host_view_type; - lids_type ind; - vals_type val; - const LO numLocalRows = static_cast (A->getLocalNumRows ()); - for (LO localRow = 0; localRow < numLocalRows; ++localRow) { - A->getLocalRowView (localRow, ind, val); - A_constGraph->replaceLocalValues (localRow, ind, val); - } - A_constGraph->fillComplete (A->getDomainMap (), A->getRangeMap ()); - A = A_constGraph; // Replace A with A_constGraph. - } - - Teuchos::RCP rowmap = A->getRowMap(); - - Teuchos::RCP x = Teuchos::rcp(new TMV(rowmap, 1)); - - if (b == Teuchos::null) { - bool rhs_unit = false; - int rhs_option = 2; - b = Teuchos::rcp (new TMV (rowmap, 1)); - if (rhs_option == 0) { - // random B - b->randomize (); - } else if (rhs_option == 1) { - // b = ones - b->putScalar (STS::one ()); - } else { - if (rhs_option == 2) { - // b = A * random - x->randomize (); - } else { - // b = A * ones - x->putScalar (STS::one ()); - } - BOPT::Apply (*A, *x, *b); - } - if (rhs_unit) { - // scale B to unit-norm - Teuchos::Array normsB(b->getNumVectors()); - b->norm2(normsB); - for (size_t j = 0; j < b->getNumVectors(); j++) { - b->getVectorNonConst(j)->scale(STS::one() / normsB[j]); - } - } - // X = zero - BMVT::MvInit (*x, STS::zero ()); - } - else { - x->putScalar (STS::zero ()); - } - - Teuchos::RCP< BLinProb > problem; - Teuchos::RCP borderedA; - if (nullVec == Teuchos::null) { - problem = Teuchos::rcp (new BLinProb (A, x, b)); - } - else { - borderedA = Teuchos::rcp (new IBOP (A)); - problem = Teuchos::rcp (new BLinProb (borderedA, x, b)); - } - - return problem; -} - - -#endif - From 8c0608a65eeafb56a4ebc6900a4cba687c59cbfd Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Wed, 6 Mar 2024 12:42:02 -0800 Subject: [PATCH 04/24] Recover original build_problem.hpp --- packages/ifpack2/test/belos/build_problem.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/packages/ifpack2/test/belos/build_problem.hpp b/packages/ifpack2/test/belos/build_problem.hpp index c3a6a1921095..f4e699d0894a 100644 --- a/packages/ifpack2/test/belos/build_problem.hpp +++ b/packages/ifpack2/test/belos/build_problem.hpp @@ -189,8 +189,7 @@ build_problem (Teuchos::ParameterList& test_params, if (b == Teuchos::null) { bool rhs_unit = false; - //int rhs_option = 2; // ORIG - int rhs_option = 3; + int rhs_option = 2; b = Teuchos::rcp (new TMV (rowmap, 1)); if (rhs_option == 0) { // random B @@ -203,7 +202,6 @@ build_problem (Teuchos::ParameterList& test_params, // b = A * random x->randomize (); } else { - if (comm->getRank() == 0) std::cout << "x is set to ones and b = A * ones!!!" << std::endl; // b = A * ones x->putScalar (STS::one ()); } From 7651ded9919dcb493afa99c7bd68a37234879510 Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Wed, 6 Mar 2024 12:57:58 -0800 Subject: [PATCH 05/24] Add tests --- packages/ifpack2/test/belos/CMakeLists.txt | 39 +++++++++++++++++++ .../test_2_RILUK_2streams_rcm_nos1_hb.xml | 22 +++++++++++ .../test_2_RILUK_4streams_rcm_nos1_hb.xml | 22 +++++++++++ .../test_4_RILUK_2streams_rcm_nos1_hb.xml | 23 +++++++++++ .../test_4_RILUK_4streams_rcm_nos1_hb.xml | 23 +++++++++++ 5 files changed, 129 insertions(+) create mode 100644 packages/ifpack2/test/belos/test_2_RILUK_2streams_rcm_nos1_hb.xml create mode 100644 packages/ifpack2/test/belos/test_2_RILUK_4streams_rcm_nos1_hb.xml create mode 100644 packages/ifpack2/test/belos/test_4_RILUK_2streams_rcm_nos1_hb.xml create mode 100644 packages/ifpack2/test/belos/test_4_RILUK_4streams_rcm_nos1_hb.xml diff --git a/packages/ifpack2/test/belos/CMakeLists.txt b/packages/ifpack2/test/belos/CMakeLists.txt index 9e9799eaa643..00fdb64c288f 100644 --- a/packages/ifpack2/test/belos/CMakeLists.txt +++ b/packages/ifpack2/test/belos/CMakeLists.txt @@ -72,11 +72,15 @@ TRIBITS_COPY_FILES_TO_BINARY_DIR(Ifpack2BelosCopyFiles test_2_RILUK_HTS_nos1_hb.xml test_2_RILUK_2streams_nos1_hb.xml test_2_RILUK_4streams_nos1_hb.xml + test_2_RILUK_2streams_rcm_nos1_hb.xml + test_2_RILUK_4streams_rcm_nos1_hb.xml test_4_ILUT_nos1_hb.xml test_4_RILUK_nos1_hb.xml test_4_RILUK_HTS_nos1_hb.xml test_4_RILUK_2streams_nos1_hb.xml test_4_RILUK_4streams_nos1_hb.xml + test_4_RILUK_2streams_rcm_nos1_hb.xml + test_4_RILUK_4streams_rcm_nos1_hb.xml test_SGS_calore1_mm.xml test_MTSGS_calore1_mm.xml calore1.mtx @@ -403,6 +407,41 @@ IF(Kokkos_ENABLE_CUDA) NUM_MPI_PROCS 4 STANDARD_PASS_OUTPUT ) + TRIBITS_ADD_TEST( + tif_belos + NAME RILUK_2streams_rcm_hb_belos + ARGS "--xml_file=test_2_RILUK_2streams_rcm_nos1_hb.xml" + COMM serial mpi + NUM_MPI_PROCS 2 + STANDARD_PASS_OUTPUT + ) + + TRIBITS_ADD_TEST( + tif_belos + NAME RILUK_4streams_rcm_hb_belos + ARGS "--xml_file=test_2_RILUK_4streams_rcm_nos1_hb.xml" + COMM serial mpi + NUM_MPI_PROCS 2 + STANDARD_PASS_OUTPUT + ) + + TRIBITS_ADD_TEST( + tif_belos + NAME RILUK_2streams_rcm_hb_belos + ARGS "--xml_file=test_4_RILUK_2streams_rcm_nos1_hb.xml" + COMM serial mpi + NUM_MPI_PROCS 4 + STANDARD_PASS_OUTPUT + ) + + TRIBITS_ADD_TEST( + tif_belos + NAME RILUK_4streams_rcm_hb_belos + ARGS "--xml_file=test_4_RILUK_4streams_rcm_nos1_hb.xml" + COMM serial mpi + NUM_MPI_PROCS 4 + STANDARD_PASS_OUTPUT + ) ENDIF() ENDIF() diff --git a/packages/ifpack2/test/belos/test_2_RILUK_2streams_rcm_nos1_hb.xml b/packages/ifpack2/test/belos/test_2_RILUK_2streams_rcm_nos1_hb.xml new file mode 100644 index 000000000000..7af01acaa95f --- /dev/null +++ b/packages/ifpack2/test/belos/test_2_RILUK_2streams_rcm_nos1_hb.xml @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/packages/ifpack2/test/belos/test_2_RILUK_4streams_rcm_nos1_hb.xml b/packages/ifpack2/test/belos/test_2_RILUK_4streams_rcm_nos1_hb.xml new file mode 100644 index 000000000000..fc3859820a7d --- /dev/null +++ b/packages/ifpack2/test/belos/test_2_RILUK_4streams_rcm_nos1_hb.xml @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/packages/ifpack2/test/belos/test_4_RILUK_2streams_rcm_nos1_hb.xml b/packages/ifpack2/test/belos/test_4_RILUK_2streams_rcm_nos1_hb.xml new file mode 100644 index 000000000000..60d217541fde --- /dev/null +++ b/packages/ifpack2/test/belos/test_4_RILUK_2streams_rcm_nos1_hb.xml @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + diff --git a/packages/ifpack2/test/belos/test_4_RILUK_4streams_rcm_nos1_hb.xml b/packages/ifpack2/test/belos/test_4_RILUK_4streams_rcm_nos1_hb.xml new file mode 100644 index 000000000000..7ff88b06bfc1 --- /dev/null +++ b/packages/ifpack2/test/belos/test_4_RILUK_4streams_rcm_nos1_hb.xml @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + From 6f3523d21fdf2e42dbf5bf44a1377f7d83bf7463 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Fri, 8 Mar 2024 12:18:49 -0700 Subject: [PATCH 06/24] Tpetra: Deprecate Epetra-related interfaces --- .../tpetra/core/cmake/TpetraCore_config.h.in | 2 ++ packages/tpetra/core/src/CMakeLists.txt | 4 ++++ packages/tpetra/core/src/Epetra_TsqrAdaptor.hpp | 13 +++++++++++-- .../tpetra/core/src/Epetra_TsqrMessenger.cpp | 3 ++- .../tpetra/core/src/Epetra_TsqrMessenger.hpp | 10 ++++++++-- .../tpetra/core/src/Tpetra_EpetraRowMatrix.hpp | 17 +++++++++++++---- packages/tpetra/core/test/CMakeLists.txt | 3 +-- .../core/test/EpetraRowMatrix/CMakeLists.txt | 2 +- .../core/test/ImportExport/CMakeLists.txt | 2 +- 9 files changed, 43 insertions(+), 13 deletions(-) diff --git a/packages/tpetra/core/cmake/TpetraCore_config.h.in b/packages/tpetra/core/cmake/TpetraCore_config.h.in index 2202098bfab6..758c45b52f4a 100644 --- a/packages/tpetra/core/cmake/TpetraCore_config.h.in +++ b/packages/tpetra/core/cmake/TpetraCore_config.h.in @@ -179,4 +179,6 @@ @TPETRA_DEPRECATED_DECLARATIONS@ +#cmakedefine TPETRA_DEPRECATED_DECLARATIONS + #endif // TPETRACORE_CONFIG_H diff --git a/packages/tpetra/core/src/CMakeLists.txt b/packages/tpetra/core/src/CMakeLists.txt index d7a26fa72032..4b09921691b3 100644 --- a/packages/tpetra/core/src/CMakeLists.txt +++ b/packages/tpetra/core/src/CMakeLists.txt @@ -647,6 +647,10 @@ TRIBITS_SET_AND_INC_DIRS(DIR ${CMAKE_CURRENT_SOURCE_DIR}) APPEND_GLOB(HEADERS ${DIR}/*.h) APPEND_GLOB(HEADERS ${DIR}/*.hpp) APPEND_GLOB(SOURCES ${DIR}/*.cpp) +IF (NOT Tpetra_ENABLE_DEPRECATED_CODE OR NOT TpetraCore_ENABLE_Epetra) + LIST(REMOVE_ITEM SOURCES ${DIR}/Epetra_TsqrMessenger.cpp) + LIST(REMOVE_ITEM SOURCES ${DIR}/Tpetra_EpetraRowMatrix.cpp) +ENDIF() TRILINOS_CREATE_CLIENT_TEMPLATE_HEADERS(${DIR}) # Pull in the Kokkos refactor code. diff --git a/packages/tpetra/core/src/Epetra_TsqrAdaptor.hpp b/packages/tpetra/core/src/Epetra_TsqrAdaptor.hpp index f195e912a40b..ef94185b73a9 100644 --- a/packages/tpetra/core/src/Epetra_TsqrAdaptor.hpp +++ b/packages/tpetra/core/src/Epetra_TsqrAdaptor.hpp @@ -54,7 +54,15 @@ #include "Tpetra_ConfigDefs.hpp" -#if defined(HAVE_TPETRA_EPETRA) && defined(HAVE_TPETRA_TSQR) +#if defined(TPETRA_ENABLE_DEPRECATED_CODE) +#if defined(TPETRA_DEPRECATED_DECLARATIONS) +#warning This file is deprecated due to Epetra removal and will be removed +#endif +#else +#error This file is deprecated due to Epetra removal and will be removed +#endif + +#if defined(TPETRA_ENABLE_DEPRECATED_CODE) && defined(HAVE_TPETRA_EPETRA) && defined(HAVE_TPETRA_TSQR) #include "Tsqr_NodeTsqrFactory.hpp" // create intranode TSQR object #include "Tsqr.hpp" // full (internode + intranode) TSQR @@ -93,6 +101,7 @@ namespace Epetra { /// \warning The current implementation of this adaptor requires /// that all Epetra_MultiVector inputs use the same communicator /// object (that is, the same Epetra_Comm) and map. + TPETRA_DEPRECATED_MSG("epetra removal") class TsqrAdaptor : public Teuchos::ParameterListAcceptorDefaultBase { public: typedef Epetra_MultiVector MV; @@ -369,7 +378,7 @@ namespace Epetra { } // namespace Epetra -#endif // defined(HAVE_TPETRA_EPETRA) && defined(HAVE_TPETRA_TSQR) +#endif // defined(TPETRA_ENABLE_DEPRECATED_CODE) && defined(HAVE_TPETRA_EPETRA) && defined(HAVE_TPETRA_TSQR) #endif // EPETRA_TSQRADAPTOR_HPP diff --git a/packages/tpetra/core/src/Epetra_TsqrMessenger.cpp b/packages/tpetra/core/src/Epetra_TsqrMessenger.cpp index f297abdbdf94..01c0debda0e9 100644 --- a/packages/tpetra/core/src/Epetra_TsqrMessenger.cpp +++ b/packages/tpetra/core/src/Epetra_TsqrMessenger.cpp @@ -52,10 +52,11 @@ /// time to fix their build systems later. /// -#include +#include "TpetraCore_config.h" #if defined(HAVE_TPETRA_EPETRA) && defined(HAVE_TPETRA_TSQR) +#include #include // EPETRA_MPI #include // HAVE_TEUCHOS_MPI diff --git a/packages/tpetra/core/src/Epetra_TsqrMessenger.hpp b/packages/tpetra/core/src/Epetra_TsqrMessenger.hpp index d11ec38e29b1..e09e35e1b5ed 100644 --- a/packages/tpetra/core/src/Epetra_TsqrMessenger.hpp +++ b/packages/tpetra/core/src/Epetra_TsqrMessenger.hpp @@ -57,7 +57,11 @@ #include -#if defined(HAVE_TPETRA_EPETRA) && defined(HAVE_TPETRA_TSQR) +#if !defined(TPETRA_ENABLE_DEPRECATED_CODE) +#error This file is deprecated due to Epetra removal and will be removed +#endif + +#if defined(TPETRA_ENABLE_DEPRECATED_CODE) && defined(HAVE_TPETRA_EPETRA) && defined(HAVE_TPETRA_TSQR) // Include Epetra's MPI wrappers. #include @@ -80,11 +84,13 @@ namespace TSQR { /// object. Otherwise, return a Teuchos::SerialComm instance. It /// should be one of these two things, but if it's not, this /// function throws std::invalid_argument. + TPETRA_DEPRECATED_MSG("epetra removal") Teuchos::RCP > extractTeuchosComm (const Teuchos::RCP& epetraComm); //! Wrap the given Epetra_Comm in an object that TSQR understands. template + TPETRA_DEPRECATED_MSG("epetra removal") Teuchos::RCP > makeTsqrMessenger (const Teuchos::RCP& pComm) { @@ -98,7 +104,7 @@ namespace TSQR { } // namespace Epetra } // namespace TSQR -#endif // defined(HAVE_TPETRA_EPETRA) && defined(HAVE_TPETRA_TSQR) +#endif // defined(TPETRA_ENABLE_DEPRECATED_CODE) && defined(HAVE_TPETRA_EPETRA) && defined(HAVE_TPETRA_TSQR) #endif // EPETRA_TSQRMESSENGER_HPP diff --git a/packages/tpetra/core/src/Tpetra_EpetraRowMatrix.hpp b/packages/tpetra/core/src/Tpetra_EpetraRowMatrix.hpp index a513890810ef..a5a10c61d282 100644 --- a/packages/tpetra/core/src/Tpetra_EpetraRowMatrix.hpp +++ b/packages/tpetra/core/src/Tpetra_EpetraRowMatrix.hpp @@ -46,7 +46,11 @@ #include "TpetraCore_config.h" -#if defined(HAVE_TPETRA_EPETRA) +#if !defined(TPETRA_ENABLE_DEPRECATED_CODE) +#error This file is deprecated due to Epetra removal and will be removed +#endif + +#if defined(TPETRA_ENABLE_DEPRECATED_CODE) && defined(HAVE_TPETRA_EPETRA) #include #include @@ -64,6 +68,7 @@ namespace Details { // just like std::shared_ptr. We only return // std::shared_ptr because Epetra_Comm is an abstract // base class, so we must return it by pointer. +TPETRA_DEPRECATED_MSG("epetra removal") std::shared_ptr makeEpetraCommFromTeuchosComm (const Teuchos::Comm& teuchosComm); @@ -72,7 +77,9 @@ makeEpetraCommFromTeuchosComm (const Teuchos::Comm& teuchosComm); namespace { // (anonymous) + template +TPETRA_DEPRECATED_MSG("epetra removal") Epetra_Map tpetraToEpetraMapTmpl (const TpetraMapType& tpetraMap) { @@ -119,9 +126,11 @@ tpetraToEpetraMapTmpl (const TpetraMapType& tpetraMap) namespace Tpetra { //! A class for wrapping a Tpetra::RowMatrix object in the Epetra_RowMatrix interface. -template -class EpetraRowMatrix : public Epetra_BasicRowMatrix { +template class +TPETRA_DEPRECATED_MSG("epetra removal") +EpetraRowMatrix : public Epetra_BasicRowMatrix { public: + EpetraRowMatrix(const Teuchos::RCP &mat, const Epetra_Comm &comm); virtual ~EpetraRowMatrix() {}; @@ -218,7 +227,7 @@ int EpetraRowMatrix::NumMyRowEntries(int MyRow, int & NumEntri }//namespace Tpetra -#endif // defined(HAVE_TPETRA_EPETRA) +#endif // defined(TPETRA_ENABLE_DEPRECATED_CODE) && defined(HAVE_TPETRA_EPETRA) //here is the include-guard #endif: diff --git a/packages/tpetra/core/test/CMakeLists.txt b/packages/tpetra/core/test/CMakeLists.txt index af7b3842edfb..ff0760310286 100644 --- a/packages/tpetra/core/test/CMakeLists.txt +++ b/packages/tpetra/core/test/CMakeLists.txt @@ -38,7 +38,6 @@ ADD_SUBDIRECTORIES( Tsqr ) -ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_Epetra) -IF (${PACKAGE_NAME}_ENABLE_Epetra) +IF (Tpetra_ENABLE_DEPRECATED_CODE AND ${PACKAGE_NAME}_ENABLE_Epetra) ADD_SUBDIRECTORY(EpetraRowMatrix) ENDIF() diff --git a/packages/tpetra/core/test/EpetraRowMatrix/CMakeLists.txt b/packages/tpetra/core/test/EpetraRowMatrix/CMakeLists.txt index db72da25089b..63aaf85e1e4c 100644 --- a/packages/tpetra/core/test/EpetraRowMatrix/CMakeLists.txt +++ b/packages/tpetra/core/test/EpetraRowMatrix/CMakeLists.txt @@ -5,4 +5,4 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( ${TEUCHOS_STD_UNIT_TEST_MAIN} ARGS "" STANDARD_PASS_OUTPUT - ) +) diff --git a/packages/tpetra/core/test/ImportExport/CMakeLists.txt b/packages/tpetra/core/test/ImportExport/CMakeLists.txt index 0d9f61381304..1704adca6e54 100644 --- a/packages/tpetra/core/test/ImportExport/CMakeLists.txt +++ b/packages/tpetra/core/test/ImportExport/CMakeLists.txt @@ -103,7 +103,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( STANDARD_PASS_OUTPUT ) -IF (${PROJECT_NAME}_ENABLE_Epetra) +IF (Tpetra_ENABLE_DEPRECATED_CODE AND ${PROJECT_NAME}_ENABLE_Epetra) IF(NOT Trilinos_NO_32BIT_GLOBAL_INDICES AND Tpetra_INST_INT_INT) # Tpetra bug 5430: # https://software.sandia.gov/bugzilla/show_bug.cgi?id=5430 From 673dfa863e123e35f5bb69b028893ba2014d7df7 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Fri, 8 Mar 2024 12:39:47 -0700 Subject: [PATCH 07/24] Tpetra: remove docs for not-present argument --- packages/tpetra/core/test/BasicPerfTest/BasicPerfTest.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/tpetra/core/test/BasicPerfTest/BasicPerfTest.cpp b/packages/tpetra/core/test/BasicPerfTest/BasicPerfTest.cpp index 9de0b92ca345..25286d965721 100644 --- a/packages/tpetra/core/test/BasicPerfTest/BasicPerfTest.cpp +++ b/packages/tpetra/core/test/BasicPerfTest/BasicPerfTest.cpp @@ -280,7 +280,6 @@ TEUCHOS_UNIT_TEST_TEMPLATE_3_DECL( BasicPerfTest, MatrixAndMultiVector, LO, GO, // // nrhs - Number of rhs to generate. (First interface produces vectors, so nrhs is not needed // -// comm (In) - an Epetra_Comm object describing the parallel machine (numProcs and my proc ID) // map (Out) - Map describing distribution of matrix and vectors/multivectors // A (Out) - CrsMatrix constructed for nx by ny grid using prescribed stencil // Off-diagonal values are random between 0 and 1. If diagonal is part of stencil, From b12a5335e620cbfedc147d56d02eb24d8897179e Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Fri, 8 Mar 2024 12:40:32 -0700 Subject: [PATCH 08/24] Tpetra: remove ununsed type alias --- packages/tpetra/core/test/CrsMatrix/CrsMatrix_MatvecH2D.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/tpetra/core/test/CrsMatrix/CrsMatrix_MatvecH2D.cpp b/packages/tpetra/core/test/CrsMatrix/CrsMatrix_MatvecH2D.cpp index 1cca3aa9278d..1e206abc63a4 100644 --- a/packages/tpetra/core/test/CrsMatrix/CrsMatrix_MatvecH2D.cpp +++ b/packages/tpetra/core/test/CrsMatrix/CrsMatrix_MatvecH2D.cpp @@ -120,7 +120,6 @@ namespace { typedef CrsMatrix MAT; typedef MultiVector MV; typedef typename ST::magnitudeType Mag; - typedef ScalarTraits MT; // This code is left in in case people want to debug future issues using the Kokkos profiling // hooks in Tpetra From 9495142284252e286ece7b8f2ef46c037720ce45 Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Tue, 12 Mar 2024 07:48:07 -0700 Subject: [PATCH 09/24] Simplify code path for reordered case --- packages/ifpack2/src/Ifpack2_RILUK_def.hpp | 76 ++++------------------ 1 file changed, 11 insertions(+), 65 deletions(-) diff --git a/packages/ifpack2/src/Ifpack2_RILUK_def.hpp b/packages/ifpack2/src/Ifpack2_RILUK_def.hpp index 4a5ba4bc692e..e6ef53e4e4ea 100644 --- a/packages/ifpack2/src/Ifpack2_RILUK_def.hpp +++ b/packages/ifpack2/src/Ifpack2_RILUK_def.hpp @@ -1216,7 +1216,7 @@ apply (const Tpetra::MultiVector= 11030) - //NOTE (Nov-15-2022): - //This is a workaround for Cuda >= 11.3 (using cusparseSpSV) - //since cusparseSpSV_solve() does not support in-place computation - MV Y_tmp (ReorderedY.getMap (), ReorderedY.getNumVectors ()); - - // Start by solving L Y_tmp = X for Y_tmp. - L_solver_->apply (ReorderedX, Y_tmp, mode); - - if (!this->isKokkosKernelsSpiluk_) { - // Solve D Y = Y. The operation lets us do this in place in Y, so we can - // write "solve D Y = Y for Y." - Y_tmp.elementWiseMultiply (one, *D_, Y_tmp, zero); - } - - U_solver_->apply (Y_tmp, ReorderedY, mode); // Solve U Y = Y_tmp. -#else - // Start by solving L Y = X for Y. - L_solver_->apply (ReorderedX, ReorderedY, mode); - - if (!this->isKokkosKernelsSpiluk_) { - // Solve D Y = Y. The operation lets us do this in place in Y, so we can - // write "solve D Y = Y for Y." - Y.elementWiseMultiply (one, *D_, Y, zero); - } - - U_solver_->apply (ReorderedY, ReorderedY, mode); // Solve U Y = Y. -#endif + if (mode == Teuchos::NO_TRANS) { // Solve L (U Y) = X for Y. + // Solve L Y = X for Y. + L_solver_->apply (ReorderedX, Y, mode); + // Solve U Y = Y for Y. + U_solver_->apply (Y, ReorderedY, mode); } - else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T). -#if defined(KOKKOSKERNELS_ENABLE_TPL_CUSPARSE) && defined(KOKKOS_ENABLE_CUDA) && (CUDA_VERSION >= 11030) - //NOTE (Nov-15-2022): - //This is a workaround for Cuda >= 11.3 (using cusparseSpSV) - //since cusparseSpSV_solve() does not support in-place computation - MV Y_tmp (ReorderedY.getMap (), ReorderedY.getNumVectors ()); - - // Start by solving U^P Y_tmp = X for Y_tmp. - U_solver_->apply (ReorderedX, Y_tmp, mode); - - if (!this->isKokkosKernelsSpiluk_) { - // Solve D^P Y = Y. - // - // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we - // need to do an elementwise multiply with the conjugate of - // D_, not just with D_ itself. - Y_tmp.elementWiseMultiply (one, *D_, Y_tmp, zero); - } - - L_solver_->apply (Y_tmp, ReorderedY, mode); // Solve L^P Y = Y_tmp. -#else - // Start by solving U^P Y = X for Y. - U_solver_->apply (ReorderedX, ReorderedY, mode); - - if (!this->isKokkosKernelsSpiluk_) { - // Solve D^P Y = Y. - // - // FIXME (mfh 24 Jan 2014) If mode = Teuchos::CONJ_TRANS, we - // need to do an elementwise multiply with the conjugate of - // D_, not just with D_ itself. - Y.elementWiseMultiply (one, *D_, Y, zero); - } - - L_solver_->apply (ReorderedY, ReorderedY, mode); // Solve L^P Y = Y. -#endif + else { // Solve U^P (L^P Y) = X for Y (where P is * or T). + // Solve U^P Y = X for Y. + U_solver_->apply (ReorderedX, Y, mode); + // Solve L^P Y = Y for Y. + L_solver_->apply (Y, ReorderedY, mode); } for (size_t j = 0; j < Y.getNumVectors(); j++) { From 26060b7e4f071d4774e19bdb5b6bf47fcdbf5fe7 Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Tue, 12 Mar 2024 14:37:48 -0600 Subject: [PATCH 10/24] stokhos: attempted compatibility update of spmv usage Compatibility update corresponding to kokkos/kokkos-kernels#2126 --- .../pce/linalg/Kokkos_CrsMatrix_UQ_PCE.hpp | 38 ++++++++++++++++--- .../linalg/Kokkos_CrsMatrix_MP_Vector.hpp | 38 ++++++++++++++++--- 2 files changed, 66 insertions(+), 10 deletions(-) diff --git a/packages/stokhos/src/sacado/kokkos/pce/linalg/Kokkos_CrsMatrix_UQ_PCE.hpp b/packages/stokhos/src/sacado/kokkos/pce/linalg/Kokkos_CrsMatrix_UQ_PCE.hpp index 6683ee7689bc..bd951d700755 100644 --- a/packages/stokhos/src/sacado/kokkos/pce/linalg/Kokkos_CrsMatrix_UQ_PCE.hpp +++ b/packages/stokhos/src/sacado/kokkos/pce/linalg/Kokkos_CrsMatrix_UQ_PCE.hpp @@ -1481,20 +1481,33 @@ template < typename std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value && Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value +#if KOKKOSKERNELS_VERSION >= 40299 + // TODO what is an alternative compile-time option to determine the rank? + // Is rank appropriate here, or is additional checking based on specialize trait needed? + && (Kokkos::View< OutputType, OutputP... >().rank() == 1) +#endif >::type spmv( #if KOKKOSKERNELS_VERSION >= 40199 const ExecutionSpace& space, #endif +#if KOKKOSKERNELS_VERSION < 40299 KokkosKernels::Experimental::Controls, +#else + KokkosSparse::SPMVHandle, Kokkos::View< OutputType, OutputP... >>* handle, +#endif const char mode[], const AlphaType& a, const MatrixType& A, const Kokkos::View< InputType, InputP... >& x, const BetaType& b, - const Kokkos::View< OutputType, OutputP... >& y, - const RANK_ONE) + const Kokkos::View< OutputType, OutputP... >& y +#if KOKKOSKERNELS_VERSION < 40299 + , const RANK_ONE +#endif + ) { + std::cout << " STOKHOS UQPCE SPMV R1" << std::endl; typedef Kokkos::View< OutputType, OutputP... > OutputVectorType; typedef Kokkos::View< InputType, InputP... > InputVectorType; typedef Stokhos::Multiply >::value && Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value +#if KOKKOSKERNELS_VERSION >= 40299 + // TODO what is an alternative compile-time option to determine the rank? + // Is rank appropriate here, or is additional checking based on specialize trait needed? + && (Kokkos::View< OutputType, OutputP... >().rank() == 2) +#endif >::type spmv( #if KOKKOSKERNELS_VERSION >= 40199 const ExecutionSpace& space, #endif +#if KOKKOSKERNELS_VERSION < 40299 KokkosKernels::Experimental::Controls, +#else + KokkosSparse::SPMVHandle, Kokkos::View< OutputType, OutputP... >>* handle, +#endif const char mode[], const AlphaType& a, const MatrixType& A, const Kokkos::View< InputType, InputP... >& x, const BetaType& b, - const Kokkos::View< OutputType, OutputP... >& y, - const RANK_TWO) + const Kokkos::View< OutputType, OutputP... >& y +#if KOKKOSKERNELS_VERSION < 40299 + , const RANK_TWO +#endif + ) { + std::cout << " STOKHOS UQPCE SPMV R2" << std::endl; #if KOKKOSKERNELS_VERSION >= 40199 if(space != ExecutionSpace()) { Kokkos::Impl::raise_error( @@ -1569,7 +1595,9 @@ spmv( if (y.extent(1) == 1) { auto y_1D = subview(y, Kokkos::ALL(), 0); auto x_1D = subview(x, Kokkos::ALL(), 0); -#if KOKKOSKERNELS_VERSION >= 40199 +#if KOKKOSKERNELS_VERSION >= 40299 + spmv(space, handle, mode, a, A, x_1D, b, y_1D); +#elif (KOKKOSKERNELS_VERSION < 40299) && (KOKKOSKERNELS_VERSION >= 40199) spmv(space, KokkosKernels::Experimental::Controls(), mode, a, A, x_1D, b, y_1D, RANK_ONE()); #else spmv(KokkosKernels::Experimental::Controls(), mode, a, A, x_1D, b, y_1D, RANK_ONE()); diff --git a/packages/stokhos/src/sacado/kokkos/vector/linalg/Kokkos_CrsMatrix_MP_Vector.hpp b/packages/stokhos/src/sacado/kokkos/vector/linalg/Kokkos_CrsMatrix_MP_Vector.hpp index e1c828a60740..c276b9d62757 100644 --- a/packages/stokhos/src/sacado/kokkos/vector/linalg/Kokkos_CrsMatrix_MP_Vector.hpp +++ b/packages/stokhos/src/sacado/kokkos/vector/linalg/Kokkos_CrsMatrix_MP_Vector.hpp @@ -543,20 +543,33 @@ template < typename std::enable_if< Kokkos::is_view_mp_vector< Kokkos::View< InputType, InputP... > >::value && Kokkos::is_view_mp_vector< Kokkos::View< OutputType, OutputP... > >::value +#if KOKKOSKERNELS_VERSION >= 40299 + // TODO what is an alternative compile-time option to determine the rank? + // Is rank appropriate here, or is additional checking based on specialize trait needed? + && (Kokkos::View< OutputType, OutputP... >().rank() == 1) +#endif >::type spmv( #if KOKKOSKERNELS_VERSION >= 40199 const ExecutionSpace& space, #endif +#if KOKKOSKERNELS_VERSION < 40299 KokkosKernels::Experimental::Controls, +#else + KokkosSparse::SPMVHandle, Kokkos::View< OutputType, OutputP... >>* handle, +#endif const char mode[], const AlphaType& a, const MatrixType& A, const Kokkos::View< InputType, InputP... >& x, const BetaType& b, - const Kokkos::View< OutputType, OutputP... >& y, - const RANK_ONE) + const Kokkos::View< OutputType, OutputP... >& y +#if KOKKOSKERNELS_VERSION < 40299 + , const RANK_ONE +#endif + ) { + std::cout << " STOKHOS MPVEC SPMV R1" << std::endl; typedef Kokkos::View< OutputType, OutputP... > OutputVectorType; typedef Kokkos::View< InputType, InputP... > InputVectorType; using input_vector_type = const_type_t; @@ -640,20 +653,33 @@ template < typename std::enable_if< Kokkos::is_view_mp_vector< Kokkos::View< InputType, InputP... > >::value && Kokkos::is_view_mp_vector< Kokkos::View< OutputType, OutputP... > >::value +#if KOKKOSKERNELS_VERSION >= 40299 + // TODO what is an alternative compile-time option to determine the rank? + // Is rank appropriate here, or is additional checking based on specialize trait needed? + && (Kokkos::View< OutputType, OutputP... >().rank() == 2) +#endif >::type spmv( #if KOKKOSKERNELS_VERSION >= 40199 const ExecutionSpace& space, #endif +#if KOKKOSKERNELS_VERSION < 40299 KokkosKernels::Experimental::Controls, +#else + KokkosSparse::SPMVHandle, Kokkos::View< OutputType, OutputP... >>* handle, +#endif const char mode[], const AlphaType& a, const MatrixType& A, const Kokkos::View< InputType, InputP... >& x, const BetaType& b, - const Kokkos::View< OutputType, OutputP... >& y, - const RANK_TWO) + const Kokkos::View< OutputType, OutputP... >& y +#if KOKKOSKERNELS_VERSION < 40299 + , const RANK_TWO +#endif + ) { + std::cout << " STOKHOS MPVEC SPMV R2" << std::endl; #if KOKKOSKERNELS_VERSION >= 40199 if(space != ExecutionSpace()) { Kokkos::Impl::raise_error( @@ -667,7 +693,9 @@ spmv( if (y.extent(1) == 1) { auto y_1D = subview(y, Kokkos::ALL(), 0); auto x_1D = subview(x, Kokkos::ALL(), 0); -#if KOKKOSKERNELS_VERSION >= 40199 +#if KOKKOSKERNELS_VERSION >= 40299 + spmv(space, handle, mode, a, A, x_1D, b, y_1D); +#elif (KOKKOSKERNELS_VERSION < 40299) && (KOKKOSKERNELS_VERSION >= 40199) spmv(space, KokkosKernels::Experimental::Controls(), mode, a, A, x_1D, b, y_1D, RANK_ONE()); #else spmv(KokkosKernels::Experimental::Controls(), mode, a, A, x_1D, b, y_1D, RANK_ONE()); From 5179f162083abb9a7d52874d60c7fb9aaa114ea9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 13 Mar 2024 11:49:21 +0000 Subject: [PATCH 11/24] Bump ossf/scorecard-action from 2.0.6 to 2.3.1 Bumps [ossf/scorecard-action](https://github.com/ossf/scorecard-action) from 2.0.6 to 2.3.1. - [Release notes](https://github.com/ossf/scorecard-action/releases) - [Changelog](https://github.com/ossf/scorecard-action/blob/main/RELEASE.md) - [Commits](https://github.com/ossf/scorecard-action/compare/99c53751e09b9529366343771cc321ec74e9bd3d...0864cf19026789058feabb7e87baa5f140aac736) --- updated-dependencies: - dependency-name: ossf/scorecard-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/scorecards.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 85f6e38986b3..7dc4117b2396 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -36,7 +36,7 @@ jobs: persist-credentials: false - name: "Run analysis" - uses: ossf/scorecard-action@99c53751e09b9529366343771cc321ec74e9bd3d # v2.0.6 + uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1 with: results_file: results.sarif results_format: sarif From c3db16877f3e90b268e5056ed1a05e48c17aefca Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 13 Mar 2024 11:49:26 +0000 Subject: [PATCH 12/24] Bump actions/github-script from 3.2.0 to 7.0.1 Bumps [actions/github-script](https://github.com/actions/github-script) from 3.2.0 to 7.0.1. - [Release notes](https://github.com/actions/github-script/releases) - [Commits](https://github.com/actions/github-script/compare/v3.2.0...60a0d83039c74a4aee543508d2ffcb1c3799cdea) --- updated-dependencies: - dependency-name: actions/github-script dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/clang_format.yml | 2 +- .github/workflows/tpetra_muelu_label_to_project.yml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/clang_format.yml b/.github/workflows/clang_format.yml index ca6c86305fee..57760cee24a0 100644 --- a/.github/workflows/clang_format.yml +++ b/.github/workflows/clang_format.yml @@ -38,7 +38,7 @@ jobs: message: | Your PR updated files that did not respect package clang formatting settings. Please apply the patch found [here](${{ steps.upload-artf.outputs.artifact-url }}) - - uses: actions/github-script@ffc2c79a5b2490bd33e0a41c1de74b877714d736 # v3.2.0 + - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 if: ${{ hashFiles('format_patch.txt') != '' }} with: script: | diff --git a/.github/workflows/tpetra_muelu_label_to_project.yml b/.github/workflows/tpetra_muelu_label_to_project.yml index 8f856769c04a..cd3a6b2afb3d 100644 --- a/.github/workflows/tpetra_muelu_label_to_project.yml +++ b/.github/workflows/tpetra_muelu_label_to_project.yml @@ -13,13 +13,13 @@ permissions: jobs: assign_one_project: permissions: - issues: write # for actions/github-script@d7906e4ad0b1822421a7e6a35d5ca353c962f410 # v6.4.1 + issues: write # for actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 repository-projects: write # for srggrs/assign-one-project-github-action to assign issues and PRs to repo project name: Assign to MueLu or Tpetra Project runs-on: ubuntu-latest steps: - name: Add MueLu Label - uses: actions/github-script@d7906e4ad0b1822421a7e6a35d5ca353c962f410 # v6.4.1 + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 if: contains(github.event.issue.title, 'MueLu') with: script: | @@ -30,7 +30,7 @@ jobs: labels: ["pkg: MueLu"] }) - name: Add Tpetra Label - uses: actions/github-script@d7906e4ad0b1822421a7e6a35d5ca353c962f410 # v6.4.1 + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 if: contains(github.event.issue.title, 'Tpetra') with: script: | From c76fbfa577c5c7c500c4512a92c172cfacffb7a1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 13 Mar 2024 11:49:28 +0000 Subject: [PATCH 13/24] Bump srggrs/assign-one-project-github-action from 1.3.0 to 1.3.1 Bumps [srggrs/assign-one-project-github-action](https://github.com/srggrs/assign-one-project-github-action) from 1.3.0 to 1.3.1. - [Release notes](https://github.com/srggrs/assign-one-project-github-action/releases) - [Changelog](https://github.com/srggrs/assign-one-project-github-action/blob/master/CHANGELOG.md) - [Commits](https://github.com/srggrs/assign-one-project-github-action/compare/37de3321023f8c12ea85372d748ab2017b995bfd...65a8ddab497df42ef268001e67bbf976f8fd39e1) --- updated-dependencies: - dependency-name: srggrs/assign-one-project-github-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/tpetra_muelu_label_to_project.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tpetra_muelu_label_to_project.yml b/.github/workflows/tpetra_muelu_label_to_project.yml index 8f856769c04a..f5eafec543cc 100644 --- a/.github/workflows/tpetra_muelu_label_to_project.yml +++ b/.github/workflows/tpetra_muelu_label_to_project.yml @@ -41,13 +41,13 @@ jobs: labels: ["pkg: Tpetra"] }) - name: Add to MueLu Project - uses: srggrs/assign-one-project-github-action@37de3321023f8c12ea85372d748ab2017b995bfd # 1.3.0 + uses: srggrs/assign-one-project-github-action@65a8ddab497df42ef268001e67bbf976f8fd39e1 # 1.3.1 if: contains(github.event.label.name, 'MueLu') || contains(github.event.issue.title, 'MueLu') with: project: 'https://github.com/trilinos/Trilinos/projects/5' column_name: 'Backlog' - name: Add to Tpetra Project - uses: srggrs/assign-one-project-github-action@37de3321023f8c12ea85372d748ab2017b995bfd # 1.3.0 + uses: srggrs/assign-one-project-github-action@65a8ddab497df42ef268001e67bbf976f8fd39e1 # 1.3.1 if: contains(github.event.label.name, 'Tpetra') || contains(github.event.issue.title, 'Tpetra') with: project: 'https://github.com/trilinos/Trilinos/projects/2' From 1e6e0dd0aeaab12c1fd1e624f5a3aa15d7d0f499 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 13 Mar 2024 11:49:32 +0000 Subject: [PATCH 14/24] Bump peter-evans/create-or-update-comment from 2.1.1 to 4.0.0 Bumps [peter-evans/create-or-update-comment](https://github.com/peter-evans/create-or-update-comment) from 2.1.1 to 4.0.0. - [Release notes](https://github.com/peter-evans/create-or-update-comment/releases) - [Commits](https://github.com/peter-evans/create-or-update-comment/compare/67dcc547d311b736a8e6c5c236542148a47adc3d...71345be0265236311c031f5c7866368bd1eff043) --- updated-dependencies: - dependency-name: peter-evans/create-or-update-comment dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/title_to_mention.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/title_to_mention.yml b/.github/workflows/title_to_mention.yml index f0c0be816763..3c802e6b5831 100644 --- a/.github/workflows/title_to_mention.yml +++ b/.github/workflows/title_to_mention.yml @@ -18,14 +18,14 @@ jobs: steps: - name: Mention MueLu - uses: peter-evans/create-or-update-comment@67dcc547d311b736a8e6c5c236542148a47adc3d # v2.1.1 + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0 if: (contains(github.event.action, 'labeled') && contains(github.event.label.name, 'MueLu')) || (contains(github.event.action, 'opened') && contains(github.event.issue.title, 'MueLu')) with: issue-number: ${{ github.event.issue.number }} body: | Automatic mention of the @trilinos/muelu team - name: Mention Ifpack2 - uses: peter-evans/create-or-update-comment@67dcc547d311b736a8e6c5c236542148a47adc3d # v2.1.1 + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4.0.0 if: (contains(github.event.action, 'labeled') && contains(github.event.label.name, 'Ifpack2')) || (contains(github.event.action, 'opened') && contains(github.event.issue.title, 'Ifpack2')) with: issue-number: ${{ github.event.issue.number }} From 1bb335cc8f551f76a0d1e07525c3d13737d74e27 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 13 Mar 2024 11:49:35 +0000 Subject: [PATCH 15/24] Bump actions/dependency-review-action from 2.5.1 to 4.1.3 Bumps [actions/dependency-review-action](https://github.com/actions/dependency-review-action) from 2.5.1 to 4.1.3. - [Release notes](https://github.com/actions/dependency-review-action/releases) - [Commits](https://github.com/actions/dependency-review-action/compare/0efb1d1d84fc9633afcdaad14c485cbbc90ef46c...9129d7d40b8c12c1ed0f60400d00c92d437adcce) --- updated-dependencies: - dependency-name: actions/dependency-review-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/dependency-review.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index 3f3456223b0a..00c327a8f204 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -24,4 +24,4 @@ jobs: - name: 'Checkout Repository' uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 - name: 'Dependency Review' - uses: actions/dependency-review-action@0efb1d1d84fc9633afcdaad14c485cbbc90ef46c # v2.5.1 + uses: actions/dependency-review-action@9129d7d40b8c12c1ed0f60400d00c92d437adcce # v4.1.3 From 3a1cb4661d474e6b49c6cc3d980495ca69709da5 Mon Sep 17 00:00:00 2001 From: Chris Siefert Date: Wed, 13 Mar 2024 08:36:13 -0600 Subject: [PATCH 16/24] Update dependabot.yml --- .github/dependabot.yml | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index d7242a0f42c5..4d94ce08ae8d 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -3,14 +3,6 @@ updates: - package-ecosystem: github-actions directory: / schedule: - interval: daily - - - package-ecosystem: pip - directory: /cmake/tribits/doc/sphinx - schedule: - interval: daily - - - package-ecosystem: pip - directory: /packages/kokkos-kernels/docs - schedule: - interval: daily + interval: weekly + target-branch: develop + From 4e214021ba2315be8d7a57066e6779053e375c67 Mon Sep 17 00:00:00 2001 From: Vinh Dang Date: Wed, 13 Mar 2024 09:42:50 -0700 Subject: [PATCH 17/24] Add fence to make sure reordering is complete --- packages/ifpack2/src/Ifpack2_RILUK_def.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/ifpack2/src/Ifpack2_RILUK_def.hpp b/packages/ifpack2/src/Ifpack2_RILUK_def.hpp index e6ef53e4e4ea..c42873035843 100644 --- a/packages/ifpack2/src/Ifpack2_RILUK_def.hpp +++ b/packages/ifpack2/src/Ifpack2_RILUK_def.hpp @@ -1237,7 +1237,7 @@ apply (const Tpetra::MultiVectorapply (ReorderedX, Y, mode); From 3904ea84f1a5776ff4ec49a9918d7708c041a562 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Wed, 13 Mar 2024 10:46:29 -0600 Subject: [PATCH 18/24] Finish updating sacado overloads of spmv --- .../pce/linalg/Kokkos_CrsMatrix_UQ_PCE.hpp | 22 +++++++++-------- .../linalg/Kokkos_CrsMatrix_MP_Vector.hpp | 24 ++++++++++--------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/packages/stokhos/src/sacado/kokkos/pce/linalg/Kokkos_CrsMatrix_UQ_PCE.hpp b/packages/stokhos/src/sacado/kokkos/pce/linalg/Kokkos_CrsMatrix_UQ_PCE.hpp index bd951d700755..6031d6fc4d2d 100644 --- a/packages/stokhos/src/sacado/kokkos/pce/linalg/Kokkos_CrsMatrix_UQ_PCE.hpp +++ b/packages/stokhos/src/sacado/kokkos/pce/linalg/Kokkos_CrsMatrix_UQ_PCE.hpp @@ -1470,6 +1470,9 @@ namespace KokkosSparse { template < #if KOKKOSKERNELS_VERSION >= 40199 typename ExecutionSpace, +#endif +#if KOKKOSKERNELS_VERSION >= 40299 + typename Handle, #endif typename AlphaType, typename BetaType, @@ -1482,9 +1485,8 @@ typename std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value && Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value #if KOKKOSKERNELS_VERSION >= 40299 - // TODO what is an alternative compile-time option to determine the rank? - // Is rank appropriate here, or is additional checking based on specialize trait needed? - && (Kokkos::View< OutputType, OutputP... >().rank() == 1) + && KokkosSparse::is_crs_matrix_v + && (Kokkos::View< OutputType, OutputP... >::rank() == 1) #endif >::type spmv( @@ -1494,7 +1496,7 @@ spmv( #if KOKKOSKERNELS_VERSION < 40299 KokkosKernels::Experimental::Controls, #else - KokkosSparse::SPMVHandle, Kokkos::View< OutputType, OutputP... >>* handle, + Handle* handle, #endif const char mode[], const AlphaType& a, @@ -1507,7 +1509,6 @@ spmv( #endif ) { - std::cout << " STOKHOS UQPCE SPMV R1" << std::endl; typedef Kokkos::View< OutputType, OutputP... > OutputVectorType; typedef Kokkos::View< InputType, InputP... > InputVectorType; typedef Stokhos::Multiply= 40199 typename ExecutionSpace, +#endif +#if KOKKOSKERNELS_VERSION >= 40299 + typename Handle, #endif typename AlphaType, typename BetaType, @@ -1556,9 +1560,8 @@ typename std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value && Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value #if KOKKOSKERNELS_VERSION >= 40299 - // TODO what is an alternative compile-time option to determine the rank? - // Is rank appropriate here, or is additional checking based on specialize trait needed? - && (Kokkos::View< OutputType, OutputP... >().rank() == 2) + && KokkosSparse::is_crs_matrix_v + && (Kokkos::View< OutputType, OutputP... >::rank() == 2) #endif >::type spmv( @@ -1568,7 +1571,7 @@ spmv( #if KOKKOSKERNELS_VERSION < 40299 KokkosKernels::Experimental::Controls, #else - KokkosSparse::SPMVHandle, Kokkos::View< OutputType, OutputP... >>* handle, + Handle* handle, #endif const char mode[], const AlphaType& a, @@ -1581,7 +1584,6 @@ spmv( #endif ) { - std::cout << " STOKHOS UQPCE SPMV R2" << std::endl; #if KOKKOSKERNELS_VERSION >= 40199 if(space != ExecutionSpace()) { Kokkos::Impl::raise_error( diff --git a/packages/stokhos/src/sacado/kokkos/vector/linalg/Kokkos_CrsMatrix_MP_Vector.hpp b/packages/stokhos/src/sacado/kokkos/vector/linalg/Kokkos_CrsMatrix_MP_Vector.hpp index c276b9d62757..2fd9472604c8 100644 --- a/packages/stokhos/src/sacado/kokkos/vector/linalg/Kokkos_CrsMatrix_MP_Vector.hpp +++ b/packages/stokhos/src/sacado/kokkos/vector/linalg/Kokkos_CrsMatrix_MP_Vector.hpp @@ -532,6 +532,9 @@ namespace KokkosSparse { template < #if KOKKOSKERNELS_VERSION >= 40199 typename ExecutionSpace, +#endif +#if KOKKOSKERNELS_VERSION >= 40299 + typename Handle, #endif typename AlphaType, typename BetaType, @@ -544,9 +547,8 @@ typename std::enable_if< Kokkos::is_view_mp_vector< Kokkos::View< InputType, InputP... > >::value && Kokkos::is_view_mp_vector< Kokkos::View< OutputType, OutputP... > >::value #if KOKKOSKERNELS_VERSION >= 40299 - // TODO what is an alternative compile-time option to determine the rank? - // Is rank appropriate here, or is additional checking based on specialize trait needed? - && (Kokkos::View< OutputType, OutputP... >().rank() == 1) + && KokkosSparse::is_crs_matrix_v + && (Kokkos::View< OutputType, OutputP... >::rank() == 1) #endif >::type spmv( @@ -556,7 +558,7 @@ spmv( #if KOKKOSKERNELS_VERSION < 40299 KokkosKernels::Experimental::Controls, #else - KokkosSparse::SPMVHandle, Kokkos::View< OutputType, OutputP... >>* handle, + Handle* handle, #endif const char mode[], const AlphaType& a, @@ -567,9 +569,8 @@ spmv( #if KOKKOSKERNELS_VERSION < 40299 , const RANK_ONE #endif - ) +) { - std::cout << " STOKHOS MPVEC SPMV R1" << std::endl; typedef Kokkos::View< OutputType, OutputP... > OutputVectorType; typedef Kokkos::View< InputType, InputP... > InputVectorType; using input_vector_type = const_type_t; @@ -642,6 +643,9 @@ spmv( template < #if KOKKOSKERNELS_VERSION >= 40199 typename ExecutionSpace, +#endif +#if KOKKOSKERNELS_VERSION >= 40299 + typename Handle, #endif typename AlphaType, typename BetaType, @@ -654,9 +658,8 @@ typename std::enable_if< Kokkos::is_view_mp_vector< Kokkos::View< InputType, InputP... > >::value && Kokkos::is_view_mp_vector< Kokkos::View< OutputType, OutputP... > >::value #if KOKKOSKERNELS_VERSION >= 40299 - // TODO what is an alternative compile-time option to determine the rank? - // Is rank appropriate here, or is additional checking based on specialize trait needed? - && (Kokkos::View< OutputType, OutputP... >().rank() == 2) + && KokkosSparse::is_crs_matrix_v + && (Kokkos::View< OutputType, OutputP... >::rank() == 2) #endif >::type spmv( @@ -666,7 +669,7 @@ spmv( #if KOKKOSKERNELS_VERSION < 40299 KokkosKernels::Experimental::Controls, #else - KokkosSparse::SPMVHandle, Kokkos::View< OutputType, OutputP... >>* handle, + Handle* handle, #endif const char mode[], const AlphaType& a, @@ -679,7 +682,6 @@ spmv( #endif ) { - std::cout << " STOKHOS MPVEC SPMV R2" << std::endl; #if KOKKOSKERNELS_VERSION >= 40199 if(space != ExecutionSpace()) { Kokkos::Impl::raise_error( From 905c5909606a39950aa9597d23027d9a70d28f32 Mon Sep 17 00:00:00 2001 From: malphil Date: Wed, 13 Mar 2024 14:46:05 -0600 Subject: [PATCH 19/24] Add missing const qualifiers to getDomainMap and getRangeMap in TpetraHalfPrecisionOperator --- .../src/Operator/Xpetra_TpetraHalfPrecisionOperator.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/xpetra/src/Operator/Xpetra_TpetraHalfPrecisionOperator.hpp b/packages/xpetra/src/Operator/Xpetra_TpetraHalfPrecisionOperator.hpp index 509294a77fdc..b0c87f06d811 100644 --- a/packages/xpetra/src/Operator/Xpetra_TpetraHalfPrecisionOperator.hpp +++ b/packages/xpetra/src/Operator/Xpetra_TpetraHalfPrecisionOperator.hpp @@ -127,12 +127,12 @@ class TpetraHalfPrecisionOperator : public Xpetra::Operator> getDomainMap() const { + const Teuchos::RCP> getDomainMap() const { return Op_->getDomainMap(); } //! Returns the Tpetra::Map object associated with the range of this TpetraOperator. - Teuchos::RCP> getRangeMap() const { + const Teuchos::RCP> getRangeMap() const { return Op_->getRangeMap(); } From 6fc4035b2815b107c1c84e28924148b7c08b469b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 13 Mar 2024 22:32:17 +0000 Subject: [PATCH 20/24] Bump github/codeql-action from 2.24.5 to 3.24.7 Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2.24.5 to 3.24.7. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/a56a03b370b87b26fde6d680755f818cfda0372b...3ab4101902695724f9365a384f86c1074d94e18c) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/scorecards.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 85f6e38986b3..ed03f8d5dc73 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -66,6 +66,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@a56a03b370b87b26fde6d680755f818cfda0372b # v2.24.5 + uses: github/codeql-action/upload-sarif@3ab4101902695724f9365a384f86c1074d94e18c # v3.24.7 with: sarif_file: results.sarif From 59c6e8c0a5b5dfe57b72a3396f6f0b19f81215dc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 13 Mar 2024 22:32:20 +0000 Subject: [PATCH 21/24] Bump styfle/cancel-workflow-action from 0.11.0 to 0.12.1 Bumps [styfle/cancel-workflow-action](https://github.com/styfle/cancel-workflow-action) from 0.11.0 to 0.12.1. - [Release notes](https://github.com/styfle/cancel-workflow-action/releases) - [Commits](https://github.com/styfle/cancel-workflow-action/compare/b173b6ec0100793626c2d9e6b90435061f4fc3e5...85880fa0301c86cca9da44039ee3bb12d3bedbfa) --- updated-dependencies: - dependency-name: styfle/cancel-workflow-action dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- .github/workflows/PR-gcc-openmpi.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/PR-gcc-openmpi.yml b/.github/workflows/PR-gcc-openmpi.yml index 7eb91f341044..27f850e2b52d 100644 --- a/.github/workflows/PR-gcc-openmpi.yml +++ b/.github/workflows/PR-gcc-openmpi.yml @@ -35,7 +35,7 @@ jobs: bash -l -c "module list" printenv PATH - name: Cancel Previous Runs - uses: styfle/cancel-workflow-action@b173b6ec0100793626c2d9e6b90435061f4fc3e5 # 0.11.0 + uses: styfle/cancel-workflow-action@85880fa0301c86cca9da44039ee3bb12d3bedbfa # 0.12.1 with: access_token: ${{ github.token }} - name: make dirs From aa8643cfdfe57230866a1c7b3ab56e0755e188c1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 13 Mar 2024 22:32:23 +0000 Subject: [PATCH 22/24] Bump mshick/add-pr-comment from 2.8.1 to 2.8.2 Bumps [mshick/add-pr-comment](https://github.com/mshick/add-pr-comment) from 2.8.1 to 2.8.2. - [Release notes](https://github.com/mshick/add-pr-comment/releases) - [Commits](https://github.com/mshick/add-pr-comment/compare/v2.8.1...b8f338c590a895d50bcbfa6c5859251edc8952fc) --- updated-dependencies: - dependency-name: mshick/add-pr-comment dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/clang_format.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/clang_format.yml b/.github/workflows/clang_format.yml index ca6c86305fee..c12c6a7843eb 100644 --- a/.github/workflows/clang_format.yml +++ b/.github/workflows/clang_format.yml @@ -32,7 +32,7 @@ jobs: # This does not work for PRs from forks. - name: Post artifact in issue comment - uses: mshick/add-pr-comment@7c0890544fb33b0bdd2e59467fbacb62e028a096 # v2.8.1 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2.8.2 if: ${{ (hashFiles('format_patch.txt') != '') && (github.event.pull_request.head.repo.full_name == github.repository) }} with: message: | From 02f62eb3ed14940ae6015d537ea8035b643c7995 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 13 Mar 2024 22:32:26 +0000 Subject: [PATCH 23/24] Bump actions/upload-artifact from 3.1.3 to 4.3.1 Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 3.1.3 to 4.3.1. - [Release notes](https://github.com/actions/upload-artifact/releases) - [Commits](https://github.com/actions/upload-artifact/compare/v3.1.3...5d5d22a31266ced268874388b861e4b58bb5c2f3) --- updated-dependencies: - dependency-name: actions/upload-artifact dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/scorecards.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index 85f6e38986b3..a9425b5e26c6 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -58,7 +58,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3 + uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 with: name: SARIF file path: results.sarif From 22eaa480d2dea565df1bca28f758d2b86c0e05c3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 14 Mar 2024 22:45:13 +0000 Subject: [PATCH 24/24] Bump actions/stale from 4.1.1 to 9.0.0 Bumps [actions/stale](https://github.com/actions/stale) from 4.1.1 to 9.0.0. - [Release notes](https://github.com/actions/stale/releases) - [Changelog](https://github.com/actions/stale/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/stale/compare/a20b814fb01b71def3bd6f56e7494d667ddf28da...28ca1036281a5e5922ead5184a1bbf96e5fc984e) --- updated-dependencies: - dependency-name: actions/stale dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/stale.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index bf8e74755e96..3fcfd0d5f391 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -26,7 +26,7 @@ jobs: pull-requests: write # for actions/stale to close stale PRs runs-on: ubuntu-latest steps: - - uses: actions/stale@a20b814fb01b71def3bd6f56e7494d667ddf28da # v4.1.1 + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9.0.0 with: debug-only: false ascending: true