From 8794cc3fcd5c66b5e1f0509f1328fbe354a6673a Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Thu, 27 Oct 2022 12:37:59 -0600 Subject: [PATCH 1/4] FROSch : move symbolic factorization of kII to symbolic phase, and use Kokkos for "detectLinearDependencies - Push-back" --- .../CoarseSpaces/FROSch_CoarseSpace_def.hpp | 6 +- ...ROSch_AlgebraicOverlappingOperator_def.hpp | 6 +- .../FROSch_CoarseOperator_decl.hpp | 1 + .../FROSch_HarmonicCoarseOperator_decl.hpp | 116 +++++++++++ .../FROSch_HarmonicCoarseOperator_def.hpp | 190 ++++++++++++++---- .../FROSch_SchwarzOperator_decl.hpp | 1 + .../Tools/FROSch_ExtractSubmatrices_decl.hpp | 6 + .../Tools/FROSch_ExtractSubmatrices_def.hpp | 35 ++-- 8 files changed, 308 insertions(+), 53 deletions(-) diff --git a/packages/shylu/shylu_dd/frosch/src/CoarseSpaces/FROSch_CoarseSpace_def.hpp b/packages/shylu/shylu_dd/frosch/src/CoarseSpaces/FROSch_CoarseSpace_def.hpp index 260e5d33af6e..3ce7b3714a59 100644 --- a/packages/shylu/shylu_dd/frosch/src/CoarseSpaces/FROSch_CoarseSpace_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/CoarseSpaces/FROSch_CoarseSpace_def.hpp @@ -238,7 +238,7 @@ namespace FROSch { // count number of nonzeros per row UN numLocalRows = rowMap->getLocalNumElements(); - rowptr_type Rowptr ("Rowptr", numLocalRows+1); + rowptr_type Rowptr (Kokkos::ViewAllocateWithoutInitializing("Rowptr"), numLocalRows+1); Kokkos::deep_copy(Rowptr, 0); Kokkos::parallel_for( "FROSch_CoarseSpace::countGlobalBasisMatrix", policy_row, @@ -268,8 +268,8 @@ namespace FROSch { (1+numLocalRows, Rowptr); // fill into the local matrix - indices_type Indices ("Indices", nnz); - values_type Values ("Values", nnz); + indices_type Indices (Kokkos::ViewAllocateWithoutInitializing("Indices"), nnz); + values_type Values (Kokkos::ViewAllocateWithoutInitializing("Values"), nnz); auto AssembledBasisLocalMap = AssembledBasisMap_->getLocalMap(); Kokkos::parallel_for( "FROSch_CoarseSpace::fillGlobalBasisMatrix", policy_row, diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_AlgebraicOverlappingOperator_def.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_AlgebraicOverlappingOperator_def.hpp index ebfd489a9ff0..c82bad4affe0 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_AlgebraicOverlappingOperator_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_AlgebraicOverlappingOperator_def.hpp @@ -294,7 +294,7 @@ namespace FROSch { FROSCH_DETAILTIMER_START_LEVELID(updateLocalOverlappingMatricesTime,"AlgebraicOverlappingOperator::updateLocalOverlappingMatrices"); if (this->ExtractLocalSubdomainMatrix_Symbolic_Done_) { // using original K_ as input - ExtractLocalSubdomainMatrix_Compute(this->K_, this->subdomainMatrix_, this->localSubdomainMatrix_); + ExtractLocalSubdomainMatrix_Compute(this->subdomainScatter_, this->K_, this->subdomainMatrix_, this->localSubdomainMatrix_); this->OverlappingMatrix_ = this->localSubdomainMatrix_.getConst(); } else { if (this->IsComputed_) { @@ -322,8 +322,8 @@ namespace FROSch { FROSCH_DETAILTIMER_START_LEVELID(AlgebraicOverlappin_extractLocalSubdomainMatrix_SymbolicTime,"AlgebraicOverlappinOperator::extractLocalSubdomainMatrix_Symbolic"); // buid sudomain matrix this->subdomainMatrix_ = MatrixFactory::Build(this->OverlappingMap_, this->OverlappingMatrix_->getGlobalMaxNumRowEntries()); - RCP > scatter = ImportFactory::Build(this->OverlappingMatrix_->getRowMap(), this->OverlappingMap_); - this->subdomainMatrix_->doImport(*(this->OverlappingMatrix_), *scatter, ADD); + this->subdomainScatter_ = ImportFactory::Build(this->OverlappingMatrix_->getRowMap(), this->OverlappingMap_); + this->subdomainMatrix_->doImport(*(this->OverlappingMatrix_), *(this->subdomainScatter_), ADD); // build local subdomain matrix RCP > SerialComm = rcp(new MpiComm(MPI_COMM_SELF)); diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_CoarseOperator_decl.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_CoarseOperator_decl.hpp index 497c818dd557..1ea5213a7f24 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_CoarseOperator_decl.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_CoarseOperator_decl.hpp @@ -204,6 +204,7 @@ namespace FROSch { bool coarseExtractLocalSubdomainMatrix_Symbolic_Done_ = false; XMatrixPtr coarseSubdomainMatrix_; XMatrixPtr coarseLocalSubdomainMatrix_; + XImportPtr coarseScatter_; // Temp Vectors for apply() mutable XMultiVectorPtr XTmp_; diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_decl.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_decl.hpp index fb7b13d12ff4..d66fcce52276 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_decl.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_decl.hpp @@ -154,6 +154,122 @@ namespace FROSch { UN j_out; SCView data_out; }; + + + struct ScaleTag {}; + struct CountNnzTag {}; + struct TotalNnzTag {}; + struct FillNzEntriesTag {}; + template + struct detectLinearDependenciesFunctor + { + using Real = typename Teuchos::ScalarTraits::magnitudeType; + using STS = Kokkos::ArithTraits; + using RTS = Kokkos::ArithTraits; + + UN numRows; + UN numCols; + SCView scale; + localMVBasisType localMVBasis; + + SC tresholdDropping; + indicesView indicesGammaDofsAll; + localRowMapType localRowMap; + localRowMapType localRepeatedMap; + + RowptrType Rowptr; + IndicesType Indices; + ValuesType Values; + + // Constructor for ScaleTag + detectLinearDependenciesFunctor(UN numRows_, UN numCols_, SCView scale_, localMVBasisType localMVBasis_) : + numRows (numRows_), + numCols (numCols_), + scale (scale_), + localMVBasis (localMVBasis_) + {} + + // Constructor for CountNnzTag + detectLinearDependenciesFunctor(UN numRows_, UN numCols_, localMVBasisType localMVBasis_, SC tresholdDropping_, + indicesView indicesGammaDofsAll_, localRowMapType localRowMap_, localRowMapType localRepeatedMap_, + RowptrType Rowptr_) : + numRows (numRows_), + numCols (numCols_), + scale (), + localMVBasis (localMVBasis_), + tresholdDropping (tresholdDropping_), + indicesGammaDofsAll (indicesGammaDofsAll_), + localRowMap (localRowMap_), + localRepeatedMap (localRepeatedMap_), + Rowptr (Rowptr_) + {} + + // Constructor for FillNzEntriesTag + detectLinearDependenciesFunctor(UN numRows_, UN numCols_, SCView scale_, localMVBasisType localMVBasis_, + SC tresholdDropping_, indicesView indicesGammaDofsAll_, + localRowMapType localRowMap_, localRowMapType localRepeatedMap_, + RowptrType Rowptr_, IndicesType Indices_, ValuesType Values_) : + numRows (numRows_), + numCols (numCols_), + scale (scale_), + localMVBasis (localMVBasis_), + tresholdDropping (tresholdDropping_), + indicesGammaDofsAll (indicesGammaDofsAll_), + localRowMap (localRowMap_), + localRepeatedMap (localRepeatedMap_), + Rowptr (Rowptr_), + Indices (Indices_), + Values (Values_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const ScaleTag &, const int j) const { + scale(j) = STS::zero(); + for (UN i = 0; i < numRows; i++) { + scale(j) += localMVBasis(i,j)*localMVBasis(i,j); + } + scale(j) = RTS::one()/RTS::sqrt(STS::abs(scale(j))); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const CountNnzTag &, const int i) const { + LO rowID = indicesGammaDofsAll[i]; + GO iGlobal = localRepeatedMap.getGlobalElement(rowID); + LO iLocal = localRowMap.getLocalElement(iGlobal); + if (iLocal!=-1) { // This should prevent duplicate entries on the interface + for (UN j=0; jtresholdDropping) { + Rowptr(iLocal+1) ++; + } + } + } + } + + + KOKKOS_INLINE_FUNCTION + void operator()(const TotalNnzTag&, const size_t i, UN &lsum) const { + lsum += Rowptr[i]; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const FillNzEntriesTag &, const int i) const { + LO rowID = indicesGammaDofsAll[i]; + GO iGlobal = localRepeatedMap.getGlobalElement(rowID); + LO iLocal = localRowMap.getLocalElement(iGlobal); + if (iLocal!=-1) { // This should prevent duplicate entries on the interface + UN nnz_i = Rowptr(iLocal); + for (UN j=0; jtresholdDropping) { + Indices(nnz_i) = j; //localBasisMap.getGlobalElement(j); + Values(nnz_i) = valueTmp*scale(j); + nnz_i ++; + } + } + } + } + }; #endif protected: diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_def.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_def.hpp index 3dbd51bcd7c2..e3e6223cb3ae 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_def.hpp @@ -69,7 +69,8 @@ namespace FROSch { ConstXMapPtr repeatedMap; ConstXMatrixPtr repeatedMatrix; if (this->coarseExtractLocalSubdomainMatrix_Symbolic_Done_) { - ExtractLocalSubdomainMatrix_Compute(this->K_.getConst(), + ExtractLocalSubdomainMatrix_Compute(this->coarseScatter_, + this->K_.getConst(), this->coarseSubdomainMatrix_, this->coarseLocalSubdomainMatrix_); repeatedMap = this->coarseSubdomainMatrix_->getRowMap(); @@ -503,39 +504,125 @@ namespace FROSch { { FROSCH_DETAILTIMER_START_LEVELID(detectLinearDependenciesTime,"HarmonicCoarseOperator::detectLinearDependencies"); LOVecPtr linearDependentVectors(AssembledInterfaceCoarseSpace_->getBasisMap()->getLocalNumElements()); //if (this->Verbose_) cout << AssembledInterfaceCoarseSpace_->getAssembledBasis()->getNumVectors() << " " << AssembledInterfaceCoarseSpace_->getAssembledBasis()->getLocalLength() << " " << indicesGammaDofsAll.size() << endl; - if (AssembledInterfaceCoarseSpace_->getAssembledBasis()->getNumVectors()>0 && AssembledInterfaceCoarseSpace_->getAssembledBasis()->getLocalLength()>0) { - //Construct matrix phiGamma - XMatrixPtr phiGamma = MatrixFactory::Build(rowMap,AssembledInterfaceCoarseSpace_->getBasisMap()->getLocalNumElements()); - // Array for scaling the columns of PhiGamma (1/norm(PhiGamma(:,i))) - SCVec scale(AssembledInterfaceCoarseSpace_->getAssembledBasis()->getNumVectors(),0.0); - for (UN i = 0; i < AssembledInterfaceCoarseSpace_->getAssembledBasis()->getNumVectors(); i++) { - ConstSCVecPtr assembledInterfaceCoarseSpaceData = AssembledInterfaceCoarseSpace_->getAssembledBasis()->getData(i); - for (UN j = 0; j < AssembledInterfaceCoarseSpace_->getAssembledBasis()->getLocalLength(); j++) { - scale[i] += assembledInterfaceCoarseSpaceData[j]*assembledInterfaceCoarseSpaceData[j]; + auto asembledBasis = AssembledInterfaceCoarseSpace_->getAssembledBasis(); + auto basisMap = AssembledInterfaceCoarseSpace_->getBasisMap(); + auto basisMapUnique = AssembledInterfaceCoarseSpace_->getBasisMapUnique(); + UN numMVRows = asembledBasis->getLocalLength(); + UN numCols = asembledBasis->getNumVectors(); + if (numMVRows > 0 && numCols > 0) { + + XMatrixPtr phiGamma; +#if defined(HAVE_XPETRA_KOKKOS_REFACTOR) && defined(HAVE_XPETRA_TPETRA) + if (rowMap->lib() == UseTpetra) + { + using crsmat_type = typename Matrix::local_matrix_type; + using graph_type = typename crsmat_type::StaticCrsGraphType; + using rowptr_type = typename graph_type::row_map_type::non_const_type; + using indices_type = typename graph_type::entries_type::non_const_type; + using values_type = typename crsmat_type::values_type::non_const_type; + using local_mv_type = typename MultiVector::dual_view_type::t_dev_const_um; + using local_map_type = typename Map::local_map_type; + + UN numRows = rowMap->getLocalNumElements(); + + auto localRowMap = rowMap->getLocalMap(); + auto localRepeatedMap = repeatedMap->getLocalMap(); + auto localBasisMap = basisMap->getLocalMap(); + + auto localMVBasis = asembledBasis->getDeviceLocalView(Xpetra::Access::ReadOnly); + + // Array for scaling the columns of PhiGamma (1/norm(PhiGamma(:,i))) + using execution_space = typename Map::local_map_type::execution_space; + using SCView = Kokkos::View; + SCView scale(Kokkos::ViewAllocateWithoutInitializing("FROSch::HarmonicCoarseOperator::detectLinearDependencies::scale"), numCols); + detectLinearDependenciesFunctor + scale_functor(numMVRows, numCols, scale, localMVBasis); + Kokkos::RangePolicy policy_scale (0, numCols); + Kokkos::parallel_for( + "FROSch_HarmonicCoarseOperator::detectLinearDependencies::scale", policy_scale, scale_functor); + + // count nnz + rowptr_type Rowptr (Kokkos::ViewAllocateWithoutInitializing("Rowptr"), numRows+1); + Kokkos::deep_copy(Rowptr, 0); + detectLinearDependenciesFunctor + count_functor(numMVRows, numCols, localMVBasis, tresholdDropping, indicesGammaDofsAll, + localRowMap, localRepeatedMap, Rowptr); + Kokkos::RangePolicy policy_count (0, numMVRows); + Kokkos::parallel_for( + "FROSch_HarmonicCoarseOperator::detectLinearDependencies::countNnz", policy_count, count_functor); + Kokkos::fence(); + + // get Nnz + UN nnz = 0; + Kokkos::RangePolicy policy_nnz (0, 1+numRows); + Kokkos::parallel_reduce( + "FROSch_HarmonicCoarseOperator::detectLinearDependencies::reduceNnz", policy_nnz, count_functor, nnz); + Kokkos::fence(); + + // make it into offsets + KokkosKernels::Impl::kk_inclusive_parallel_prefix_sum + (1+numRows, Rowptr); + Kokkos::fence(); + + // allocate local matrix + indices_type Indices (Kokkos::ViewAllocateWithoutInitializing("Indices"), nnz); + values_type Values (Kokkos::ViewAllocateWithoutInitializing("Values"), nnz); + + // fill in all the nz entries + detectLinearDependenciesFunctor + fill_functor(numMVRows, numCols, scale, localMVBasis, tresholdDropping, indicesGammaDofsAll, + localRowMap, localRepeatedMap, Rowptr, Indices, Values); + Kokkos::RangePolicy policy_fill (0, numMVRows); + Kokkos::parallel_for( + "FROSch_HarmonicCoarseOperator::detectLinearDependencies::fillNz", policy_fill, fill_functor); + Kokkos::fence(); + + Teuchos::RCP params = Teuchos::rcp (new Teuchos::ParameterList()); + params->set("sorted", false); + params->set("No Nonlocal Changes", true); + + graph_type crsgraph (Indices, Rowptr); + crsmat_type crsmat = crsmat_type ("CrsMatrix", numRows, Values, crsgraph); + + phiGamma = MatrixFactory::Build(crsmat, rowMap, basisMap, basisMapUnique, rangeMap, + params); + } else +#endif + { + // Array for scaling the columns of PhiGamma (1/norm(PhiGamma(:,i))) + SCVec scale(numCols, 0.0); + for (UN j = 0; j < numCols; j++) { + ConstSCVecPtr assembledInterfaceCoarseSpaceData = asembledBasis->getData(j); + for (UN i = 0; i < numMVRows; i++) { + scale[j] += assembledInterfaceCoarseSpaceData[i]*assembledInterfaceCoarseSpaceData[i]; + } + scale[j] = 1.0/sqrt(scale[j]); } - scale[i] = 1.0/sqrt(scale[i]); - } - LO iD; - SC valueTmp; - for (UN i=0; igetAssembledBasis()->getLocalLength(); i++) { - GOVec indices; - SCVec values; - iD = repeatedMap->getGlobalElement(indicesGammaDofsAll[i]); - if (rowMap->getLocalElement(iD)!=-1) { // This should prevent duplicate entries on the interface - for (UN j=0; jgetAssembledBasis()->getNumVectors(); j++) { - valueTmp=AssembledInterfaceCoarseSpace_->getAssembledBasis()->getData(j)[i]; - if (fabs(valueTmp)>tresholdDropping) { - indices.push_back(AssembledInterfaceCoarseSpace_->getBasisMap()->getGlobalElement(j)); - values.push_back(valueTmp*scale[j]); + //Construct matrix phiGamma + phiGamma = MatrixFactory::Build(rowMap,basisMap->getLocalNumElements()); + LO iD; + SC valueTmp; + for (UN i=0; igetLocalLength(); i++) { + iD = repeatedMap->getGlobalElement(indicesGammaDofsAll[i]); + if (rowMap->getLocalElement(iD)!=-1) { // This should prevent duplicate entries on the interface + GOVec indices; + SCVec values; + for (UN j=0; jgetNumVectors(); j++) { + valueTmp=asembledBasis->getData(j)[i]; + if (fabs(valueTmp)>tresholdDropping) { + indices.push_back(basisMap->getGlobalElement(j)); + values.push_back(valueTmp*scale[j]); + } } + phiGamma->insertGlobalValues(iD,indices(),values()); } - - phiGamma->insertGlobalValues(iD,indices(),values()); } + RCP fillCompleteParams(new ParameterList); + fillCompleteParams->set("No Nonlocal Changes", true); + phiGamma->fillComplete(basisMapUnique,rangeMap,fillCompleteParams); } - phiGamma->fillComplete(AssembledInterfaceCoarseSpace_->getBasisMapUnique(),rangeMap); //Compute Phi^T * Phi RCP fancy = fancyOStream(rcpFromRef(cout)); @@ -756,10 +843,14 @@ namespace FROSch { // Jetzt der solver für kII if (indicesIDofsAll.size()>0) { - ExtensionSolver_ = SolverFactory::Build(kII, - sublist(this->ParameterList_,"ExtensionSolver"), - string("ExtensionSolver (Level ") + to_string(this->LevelID_) + string(")")); - ExtensionSolver_->initialize(); + if (this->coarseExtractLocalSubdomainMatrix_Symbolic_Done_) { + ExtensionSolver_->updateMatrix(kII, true); + } else { + ExtensionSolver_ = SolverFactory::Build(kII, + sublist(this->ParameterList_,"ExtensionSolver"), + string("ExtensionSolver (Level ") + to_string(this->LevelID_) + string(")")); + ExtensionSolver_->initialize(); + } ExtensionSolver_->compute(); ExtensionSolver_->apply(*mVtmp,*mVPhiI); } @@ -803,7 +894,7 @@ namespace FROSch { using GOIndView = Kokkos::View; UN numIndices = indicesIDofsAll.size(); GOIndViewHost indicesIDofsAllHostData(indicesIDofsAll.getRawPtr(), numIndices); - GOIndView indicesIDofsAllData ("indicesIDofsAllData", numIndices); + GOIndView indicesIDofsAllData (Kokkos::ViewAllocateWithoutInitializing("indicesIDofsAllData"), numIndices); Kokkos::deep_copy(indicesIDofsAllData, indicesIDofsAllHostData); using xTMVector = Xpetra::TpetraMultiVector; @@ -1080,8 +1171,8 @@ namespace FROSch { // buid sudomain matrix this->coarseSubdomainMatrix_ = MatrixFactory::Build(repeatedMap, repeatedMap, this->K_->getGlobalMaxNumRowEntries()); - RCP > scatter = ImportFactory::Build(this->K_->getRowMap(), repeatedMap); - this->coarseSubdomainMatrix_->doImport(*(this->K_.getConst()), *scatter,ADD); + this->coarseScatter_ = ImportFactory::Build(this->K_->getRowMap(), repeatedMap); + this->coarseSubdomainMatrix_->doImport(*(this->K_.getConst()), *(this->coarseScatter_),ADD); // build local subdomain matrix RCP > SerialComm = rcp(new MpiComm(MPI_COMM_SELF)); @@ -1094,6 +1185,37 @@ namespace FROSch { // turn flag on this->coarseExtractLocalSubdomainMatrix_Symbolic_Done_ = true; + + + // ------------------------------------------------------- + // symbolic for computeExtensions to factor kII + GOVec indicesGammaDofsAll(0); + GOVec indicesIDofsAll(0); + LO tmp = 0; + + for (UN i=0; icoarseLocalSubdomainMatrix_; + BuildSubmatrices(repeatedMatrix.getConst(),indicesIDofsAll(),kII,kIGamma,kGammaI,kGammaGamma); + + // perform symbolic on kII + ExtensionSolver_ = SolverFactory::Build(kII, + sublist(this->ParameterList_,"ExtensionSolver"), + string("ExtensionSolver (Level ") + to_string(this->LevelID_) + string(")")); + ExtensionSolver_->initialize(); } } diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_SchwarzOperator_decl.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_SchwarzOperator_decl.hpp index b303eb74c1d9..0b9ad2652a7c 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_SchwarzOperator_decl.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_SchwarzOperator_decl.hpp @@ -236,6 +236,7 @@ namespace FROSch { bool ExtractLocalSubdomainMatrix_Symbolic_Done_ = false; XMatrixPtr subdomainMatrix_; XMatrixPtr localSubdomainMatrix_; + XImportPtr subdomainScatter_; bool Verbose_ = false; diff --git a/packages/shylu/shylu_dd/frosch/src/Tools/FROSch_ExtractSubmatrices_decl.hpp b/packages/shylu/shylu_dd/frosch/src/Tools/FROSch_ExtractSubmatrices_decl.hpp index 622dc06b7ef6..2e190a686e30 100644 --- a/packages/shylu/shylu_dd/frosch/src/Tools/FROSch_ExtractSubmatrices_decl.hpp +++ b/packages/shylu/shylu_dd/frosch/src/Tools/FROSch_ExtractSubmatrices_decl.hpp @@ -72,6 +72,12 @@ namespace FROSch { void ExtractLocalSubdomainMatrix_Compute(RCP > globalMatrix, RCP< Matrix > subdomainMatrix, RCP< Matrix > repeatedMatrix); + + template + void ExtractLocalSubdomainMatrix_Compute(RCP< Import > scatter, + RCP > globalMatrix, + RCP< Matrix > subdomainMatrix, + RCP< Matrix > repeatedMatrix); // ----------------------------------------------------------- // template diff --git a/packages/shylu/shylu_dd/frosch/src/Tools/FROSch_ExtractSubmatrices_def.hpp b/packages/shylu/shylu_dd/frosch/src/Tools/FROSch_ExtractSubmatrices_def.hpp index 39302b368811..c278e937a8e5 100644 --- a/packages/shylu/shylu_dd/frosch/src/Tools/FROSch_ExtractSubmatrices_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/Tools/FROSch_ExtractSubmatrices_def.hpp @@ -122,12 +122,21 @@ namespace FROSch { void ExtractLocalSubdomainMatrix_Compute(RCP > globalMatrix, RCP< Matrix > subdomainMatrix, RCP< Matrix > localSubdomainMatrix) + { + RCP > scatter = ImportFactory::Build(globalMatrix->getRowMap(), subdomainMatrix->getRowMap()); + ExtractLocalSubdomainMatrix_Compute(scatter, globalMatrix, subdomainMatrix, localSubdomainMatrix); + } + + template + void ExtractLocalSubdomainMatrix_Compute(RCP< Import > scatter, + RCP > globalMatrix, + RCP< Matrix > subdomainMatrix, + RCP< Matrix > localSubdomainMatrix) { FROSCH_DETAILTIMER_START(extractLocalSubdomainMatrixTime_compute, "ExtractLocalSubdomainMatrix_Compute"); const SC zero = ScalarTraits::zero(); auto subdomainRowMap = subdomainMatrix->getRowMap(); - RCP > scatter = ImportFactory::Build(globalMatrix->getRowMap(),subdomainRowMap); subdomainMatrix->setAllToScalar(zero); subdomainMatrix->resumeFill(); subdomainMatrix->doImport(*globalMatrix, *scatter, ADD); @@ -314,10 +323,10 @@ namespace FROSch { UN numRowsI = indI.size(); UN numRowsJ = indJ.size(); - rowptr_type RowptrII ("RowptrII", numRowsI+1); - rowptr_type RowptrIJ ("RowptrIJ", numRowsI+1); - rowptr_type RowptrJI ("RowptrJI", numRowsJ+1); - rowptr_type RowptrJJ ("RowptrJJ", numRowsJ+1); + rowptr_type RowptrII (Kokkos::ViewAllocateWithoutInitializing("RowptrII"), numRowsI+1); + rowptr_type RowptrIJ (Kokkos::ViewAllocateWithoutInitializing("RowptrIJ"), numRowsI+1); + rowptr_type RowptrJI (Kokkos::ViewAllocateWithoutInitializing("RowptrJI"), numRowsJ+1); + rowptr_type RowptrJJ (Kokkos::ViewAllocateWithoutInitializing("RowptrJJ"), numRowsJ+1); // count nnz on each blocks Kokkos::deep_copy(RowptrII, 0); @@ -388,17 +397,17 @@ namespace FROSch { (1+numRowsJ, RowptrJJ); // allocate kII block - indices_type IndicesII ("IndicesII", nnzII); - values_type ValuesII ("ValuesII", nnzII); + indices_type IndicesII (Kokkos::ViewAllocateWithoutInitializing("IndicesII"), nnzII); + values_type ValuesII (Kokkos::ViewAllocateWithoutInitializing("ValuesII"), nnzII); // allocate kIJ block - indices_type IndicesIJ ("IndicesIJ", nnzIJ); - values_type ValuesIJ ("ValuesIJ", nnzIJ); + indices_type IndicesIJ (Kokkos::ViewAllocateWithoutInitializing("IndicesIJ"), nnzIJ); + values_type ValuesIJ (Kokkos::ViewAllocateWithoutInitializing("ValuesIJ"), nnzIJ); // allocate kJI block - indices_type IndicesJI ("IndicesJI", nnzJI); - values_type ValuesJI ("ValuesJI", nnzJI); + indices_type IndicesJI (Kokkos::ViewAllocateWithoutInitializing("IndicesJI"), nnzJI); + values_type ValuesJI (Kokkos::ViewAllocateWithoutInitializing("ValuesJI"), nnzJI); // allocate kJJ block - indices_type IndicesJJ ("IndicesJJ", nnzJJ); - values_type ValuesJJ ("ValuesJJ", nnzJJ); + indices_type IndicesJJ (Kokkos::ViewAllocateWithoutInitializing("IndicesJJ"), nnzJJ); + values_type ValuesJJ (Kokkos::ViewAllocateWithoutInitializing("ValuesJJ"), nnzJJ); // fill in all the blocks Kokkos::parallel_for( From c9854ac41d75fae94e97a3a64f91d2841d8500ff Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Thu, 27 Oct 2022 12:43:22 -0600 Subject: [PATCH 2/4] FROSch : re-allocate auxiliary vectors when the number of columns changes --- .../FROSch_CoarseOperator_def.hpp | 29 ++++++++++++++----- .../FROSch_OverlappingOperator_def.hpp | 10 +++++-- .../FROSch_SumOperator_def.hpp | 4 ++- .../FROSch_Amesos2SolverTpetra_def.hpp | 4 ++- 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_CoarseOperator_def.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_CoarseOperator_def.hpp index ce3107647fcd..94b39ba25964 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_CoarseOperator_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_CoarseOperator_def.hpp @@ -133,14 +133,21 @@ namespace FROSch { FROSCH_TIMER_START_LEVELID(applyTime,"CoarseOperator::apply"); static int i = 0; if (!Phi_.is_null() && this->IsComputed_) { - if (XTmp_.is_null()) XTmp_ = MultiVectorFactory::Build(x.getMap(),x.getNumVectors()); + if (XTmp_.is_null() || XTmp_->getNumVectors() != x.getNumVectors()) { + XTmp_ = MultiVectorFactory::Build(x.getMap(),x.getNumVectors()); + } *XTmp_ = x; if (!usePreconditionerOnly && mode == NO_TRANS) { this->K_->apply(x,*XTmp_,mode,ScalarTraits::one(),ScalarTraits::zero()); } - if (XCoarseSolve_.is_null()) XCoarseSolve_ = MultiVectorFactory::Build(GatheringMaps_[GatheringMaps_.size()-1],x.getNumVectors()); - else XCoarseSolve_->replaceMap(GatheringMaps_[GatheringMaps_.size()-1]); // The map is replaced in applyCoarseSolve(). If we do not build it from scratch, we should at least replace the map here. This may be important since the maps live on different communicators. - if (YCoarseSolve_.is_null()) YCoarseSolve_ = MultiVectorFactory::Build(GatheringMaps_[GatheringMaps_.size()-1],y.getNumVectors()); + if (XCoarseSolve_.is_null() || XCoarseSolve_->getNumVectors() != x.getNumVectors()) { + XCoarseSolve_ = MultiVectorFactory::Build(GatheringMaps_[GatheringMaps_.size()-1],x.getNumVectors()); + } else { + XCoarseSolve_->replaceMap(GatheringMaps_[GatheringMaps_.size()-1]); // The map is replaced in applyCoarseSolve(). If we do not build it from scratch, we should at least replace the map here. This may be important since the maps live on different communicators. + } + if (YCoarseSolve_.is_null() || YCoarseSolve_->getNumVectors() != y.getNumVectors()) { + YCoarseSolve_ = MultiVectorFactory::Build(GatheringMaps_[GatheringMaps_.size()-1],y.getNumVectors()); + } applyPhiT(*XTmp_,*XCoarseSolve_); applyCoarseSolve(*XCoarseSolve_,*YCoarseSolve_,mode); applyPhi(*YCoarseSolve_,*XTmp_); @@ -192,12 +199,18 @@ namespace FROSch { FROSCH_DETAILTIMER_START_LEVELID(applyCoarseSolveTime,"CoarseOperator::applyCoarseSolve"); if (OnCoarseSolveComm_) { x.replaceMap(CoarseSolveMap_); - if (YTmp_.is_null()) YTmp_ = MultiVectorFactory::Build(CoarseSolveMap_,x.getNumVectors()); - else YTmp_->replaceMap(CoarseSolveMap_); // The map is replaced later in this function. If we do not build it from scratch, we should at least replace the map here. This may be important since the maps live on different communicators. + if (YTmp_.is_null() || YTmp_->getNumVectors() != x.getNumVectors()) { + YTmp_ = MultiVectorFactory::Build(CoarseSolveMap_,x.getNumVectors()); + } else { + YTmp_->replaceMap(CoarseSolveMap_); // The map is replaced later in this function. If we do not build it from scratch, we should at least replace the map here. This may be important since the maps live on different communicators. + } CoarseSolver_->apply(x,*YTmp_,mode); } else { - if (YTmp_.is_null()) YTmp_ = MultiVectorFactory::Build(CoarseSolveMap_,x.getNumVectors()); - else YTmp_->replaceMap(CoarseSolveMap_); // The map is replaced later in this function. If we do not build it from scratch, we should at least replace the map here. This may be important since the maps live on different communicators. + if (YTmp_.is_null() || YTmp_->getNumVectors() != x.getNumVectors()) { + YTmp_ = MultiVectorFactory::Build(CoarseSolveMap_,x.getNumVectors()); + } else { + YTmp_->replaceMap(CoarseSolveMap_); // The map is replaced later in this function. If we do not build it from scratch, we should at least replace the map here. This may be important since the maps live on different communicators. + } } YTmp_->replaceMap(GatheringMaps_[GatheringMaps_.size()-1]); y = *YTmp_; diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_OverlappingOperator_def.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_OverlappingOperator_def.hpp index c415223891d8..1ce7847adf77 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_OverlappingOperator_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_OverlappingOperator_def.hpp @@ -83,13 +83,15 @@ namespace FROSch { { FROSCH_TIMER_START_LEVELID(applyTime,"OverlappingOperator::apply"); FROSCH_ASSERT(this->IsComputed_,"FROSch::OverlappingOperator: OverlappingOperator has to be computed before calling apply()"); - if (XTmp_.is_null()) XTmp_ = MultiVectorFactory::Build(x.getMap(),x.getNumVectors()); + if (XTmp_.is_null() || XTmp_->getNumVectors() != x.getNumVectors()) { + XTmp_ = MultiVectorFactory::Build(x.getMap(),x.getNumVectors()); + } *XTmp_ = x; if (!usePreconditionerOnly && mode == NO_TRANS) { this->K_->apply(x,*XTmp_,mode,ScalarTraits::one(),ScalarTraits::zero()); } // AH 11/28/2018: For Epetra, XOverlap_ will only have a view to the values of XOverlapTmp_. Therefore, xOverlapTmp should not be deleted before XOverlap_ is used. - if (YOverlap_.is_null()) { + if (YOverlap_.is_null() || YOverlap_->getNumVectors() != x.getNumVectors()) { YOverlap_ = MultiVectorFactory::Build(OverlappingMatrix_->getDomainMap(),x.getNumVectors()); } else { YOverlap_->replaceMap(OverlappingMatrix_->getDomainMap()); @@ -97,7 +99,9 @@ namespace FROSch { // AH 11/28/2018: replaceMap does not update the GlobalNumRows. Therefore, we have to create a new MultiVector on the serial Communicator. In Epetra, we can prevent to copy the MultiVector. if (XTmp_->getMap()->lib() == UseEpetra) { #ifdef HAVE_SHYLU_DDFROSCH_EPETRA - if (XOverlapTmp_.is_null()) XOverlapTmp_ = MultiVectorFactory::Build(OverlappingMap_,x.getNumVectors()); + if (XOverlapTmp_.is_null() || XOverlap_->getNumVectors() != x.getNumVectors()) { + XOverlapTmp_ = MultiVectorFactory::Build(OverlappingMap_,x.getNumVectors()); + } XOverlapTmp_->doImport(*XTmp_,*Scatter_,INSERT); const RCP > xEpetraMultiVectorXOverlapTmp = rcp_dynamic_cast >(XOverlapTmp_); RCP epetraMultiVectorXOverlapTmp = xEpetraMultiVectorXOverlapTmp->getEpetra_MultiVector(); diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_SumOperator_def.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_SumOperator_def.hpp index 1aaa6910a45e..0f2f94143f2d 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_SumOperator_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_SumOperator_def.hpp @@ -118,7 +118,9 @@ namespace FROSch { { FROSCH_TIMER_START_LEVELID(applyTime,"SumOperator::apply"); if (OperatorVector_.size()>0) { - if (XTmp_.is_null()) XTmp_ = MultiVectorFactory::Build(x.getMap(),x.getNumVectors()); + if (XTmp_.is_null() || XTmp_->getNumVectors() != x.getNumVectors()) { + XTmp_ = MultiVectorFactory::Build(x.getMap(),x.getNumVectors()); + } *XTmp_ = x; // Das brauche ich für den Fall das x=y UN itmp = 0; for (UN i=0; igetTpetra_MultiVector(); - if (Y_.is_null()) Y_ = XMultiVectorFactory::Build(y.getMap(),x.getNumVectors()); + if (Y_.is_null() || Y_->getNumVectors() != x.getNumVectors()) { + Y_ = XMultiVectorFactory::Build(y.getMap(),x.getNumVectors()); + } const TpetraMultiVector * xTpetraMultiVectorY = dynamic_cast *>(Y_.get()); FROSCH_ASSERT(xTpetraMultiVectorY,"FROSch::Amesos2SolverTpetra: dynamic_cast failed."); TMultiVectorPtr tpetraMultiVectorY = xTpetraMultiVectorY->getTpetra_MultiVector(); From a6eb3111956d85cd68e354dea3ba54b8056ffaac Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Thu, 27 Oct 2022 12:45:18 -0600 Subject: [PATCH 3/4] FROSch : add "using initialize" --- .../SchwarzPreconditioners/FROSch_GDSWPreconditioner_decl.hpp | 1 + .../SchwarzPreconditioners/FROSch_RGDSWPreconditioner_decl.hpp | 1 + .../FROSch_TwoLevelPreconditioner_decl.hpp | 1 + 3 files changed, 3 insertions(+) diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzPreconditioners/FROSch_GDSWPreconditioner_decl.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzPreconditioners/FROSch_GDSWPreconditioner_decl.hpp index eab7c0f962ee..2642a4f46c5d 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzPreconditioners/FROSch_GDSWPreconditioner_decl.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzPreconditioners/FROSch_GDSWPreconditioner_decl.hpp @@ -82,6 +82,7 @@ namespace FROSch { using GOVecPtr = typename SchwarzPreconditioner::GOVecPtr; public: + using AlgebraicOverlappingPreconditioner::initialize; GDSWPreconditioner(ConstXMatrixPtr k, ParameterListPtr parameterList); diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzPreconditioners/FROSch_RGDSWPreconditioner_decl.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzPreconditioners/FROSch_RGDSWPreconditioner_decl.hpp index 13ec9393815a..7fca55b7524e 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzPreconditioners/FROSch_RGDSWPreconditioner_decl.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzPreconditioners/FROSch_RGDSWPreconditioner_decl.hpp @@ -81,6 +81,7 @@ namespace FROSch { using GOVecPtr = typename SchwarzPreconditioner::GOVecPtr; public: + using AlgebraicOverlappingPreconditioner::initialize; RGDSWPreconditioner(ConstXMatrixPtr k, ParameterListPtr parameterList); diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzPreconditioners/FROSch_TwoLevelPreconditioner_decl.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzPreconditioners/FROSch_TwoLevelPreconditioner_decl.hpp index 7a51284eabbe..01306d0d722d 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzPreconditioners/FROSch_TwoLevelPreconditioner_decl.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzPreconditioners/FROSch_TwoLevelPreconditioner_decl.hpp @@ -83,6 +83,7 @@ namespace FROSch { using GOVecPtr = typename SchwarzPreconditioner::GOVecPtr; public: + using OneLevelPreconditioner::initialize; TwoLevelPreconditioner(ConstXMatrixPtr k, ParameterListPtr parameterList); From c3429bdc8e6c90f5a56d9db7e7c68b054e5fe1f6 Mon Sep 17 00:00:00 2001 From: iyamazaki Date: Thu, 14 Mar 2024 19:05:07 -0600 Subject: [PATCH 4/4] FROSch : fix maps --- .../FROSch_AlgebraicOverlappingOperator_def.hpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_AlgebraicOverlappingOperator_def.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_AlgebraicOverlappingOperator_def.hpp index c82bad4affe0..408e3a3482ba 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_AlgebraicOverlappingOperator_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_AlgebraicOverlappingOperator_def.hpp @@ -322,8 +322,11 @@ namespace FROSch { FROSCH_DETAILTIMER_START_LEVELID(AlgebraicOverlappin_extractLocalSubdomainMatrix_SymbolicTime,"AlgebraicOverlappinOperator::extractLocalSubdomainMatrix_Symbolic"); // buid sudomain matrix this->subdomainMatrix_ = MatrixFactory::Build(this->OverlappingMap_, this->OverlappingMatrix_->getGlobalMaxNumRowEntries()); - this->subdomainScatter_ = ImportFactory::Build(this->OverlappingMatrix_->getRowMap(), this->OverlappingMap_); - this->subdomainMatrix_->doImport(*(this->OverlappingMatrix_), *(this->subdomainScatter_), ADD); + RCP > scatter = ImportFactory::Build(this->OverlappingMatrix_->getRowMap(), this->OverlappingMap_); + this->subdomainMatrix_->doImport(*(this->OverlappingMatrix_), *scatter, ADD); + + // Used to Map original K_ to overlapping suubdomainMatrix + this->subdomainScatter_ = ImportFactory::Build(this->K_->getRowMap(), this->OverlappingMap_); // build local subdomain matrix RCP > SerialComm = rcp(new MpiComm(MPI_COMM_SELF));