diff --git a/packages/shylu/shylu_dd/frosch/src/CoarseSpaces/FROSch_CoarseSpace_def.hpp b/packages/shylu/shylu_dd/frosch/src/CoarseSpaces/FROSch_CoarseSpace_def.hpp index c4be87918dcc..d944c8d745a9 100644 --- a/packages/shylu/shylu_dd/frosch/src/CoarseSpaces/FROSch_CoarseSpace_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/CoarseSpaces/FROSch_CoarseSpace_def.hpp @@ -238,7 +238,7 @@ namespace FROSch { // count number of nonzeros per row UN numLocalRows = rowMap->getLocalNumElements(); - rowptr_type Rowptr ("Rowptr", numLocalRows+1); + rowptr_type Rowptr (Kokkos::ViewAllocateWithoutInitializing("Rowptr"), numLocalRows+1); Kokkos::deep_copy(Rowptr, 0); Kokkos::parallel_for( "FROSch_CoarseSpace::countGlobalBasisMatrix", policy_row, @@ -273,8 +273,8 @@ namespace FROSch { #endif // fill into the local matrix - indices_type Indices ("Indices", nnz); - values_type Values ("Values", nnz); + indices_type Indices (Kokkos::ViewAllocateWithoutInitializing("Indices"), nnz); + values_type Values (Kokkos::ViewAllocateWithoutInitializing("Values"), nnz); auto AssembledBasisLocalMap = AssembledBasisMap_->getLocalMap(); Kokkos::parallel_for( "FROSch_CoarseSpace::fillGlobalBasisMatrix", policy_row, diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_AlgebraicOverlappingOperator_def.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_AlgebraicOverlappingOperator_def.hpp index ebfd489a9ff0..408e3a3482ba 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_AlgebraicOverlappingOperator_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_AlgebraicOverlappingOperator_def.hpp @@ -294,7 +294,7 @@ namespace FROSch { FROSCH_DETAILTIMER_START_LEVELID(updateLocalOverlappingMatricesTime,"AlgebraicOverlappingOperator::updateLocalOverlappingMatrices"); if (this->ExtractLocalSubdomainMatrix_Symbolic_Done_) { // using original K_ as input - ExtractLocalSubdomainMatrix_Compute(this->K_, this->subdomainMatrix_, this->localSubdomainMatrix_); + ExtractLocalSubdomainMatrix_Compute(this->subdomainScatter_, this->K_, this->subdomainMatrix_, this->localSubdomainMatrix_); this->OverlappingMatrix_ = this->localSubdomainMatrix_.getConst(); } else { if (this->IsComputed_) { @@ -325,6 +325,9 @@ namespace FROSch { RCP > scatter = ImportFactory::Build(this->OverlappingMatrix_->getRowMap(), this->OverlappingMap_); this->subdomainMatrix_->doImport(*(this->OverlappingMatrix_), *scatter, ADD); + // Used to Map original K_ to overlapping suubdomainMatrix + this->subdomainScatter_ = ImportFactory::Build(this->K_->getRowMap(), this->OverlappingMap_); + // build local subdomain matrix RCP > SerialComm = rcp(new MpiComm(MPI_COMM_SELF)); RCP > localSubdomainMap = MapFactory::Build(this->OverlappingMap_->lib(), this->OverlappingMap_->getLocalNumElements(), 0, SerialComm); diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_CoarseOperator_decl.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_CoarseOperator_decl.hpp index ca2e2e94b123..37dbb1670d4f 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_CoarseOperator_decl.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_CoarseOperator_decl.hpp @@ -204,6 +204,7 @@ namespace FROSch { bool coarseExtractLocalSubdomainMatrix_Symbolic_Done_ = false; XMatrixPtr coarseSubdomainMatrix_; XMatrixPtr coarseLocalSubdomainMatrix_; + XImportPtr coarseScatter_; // Temp Vectors for apply() mutable XMultiVectorPtr XTmp_; diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_CoarseOperator_def.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_CoarseOperator_def.hpp index ce3107647fcd..94b39ba25964 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_CoarseOperator_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_CoarseOperator_def.hpp @@ -133,14 +133,21 @@ namespace FROSch { FROSCH_TIMER_START_LEVELID(applyTime,"CoarseOperator::apply"); static int i = 0; if (!Phi_.is_null() && this->IsComputed_) { - if (XTmp_.is_null()) XTmp_ = MultiVectorFactory::Build(x.getMap(),x.getNumVectors()); + if (XTmp_.is_null() || XTmp_->getNumVectors() != x.getNumVectors()) { + XTmp_ = MultiVectorFactory::Build(x.getMap(),x.getNumVectors()); + } *XTmp_ = x; if (!usePreconditionerOnly && mode == NO_TRANS) { this->K_->apply(x,*XTmp_,mode,ScalarTraits::one(),ScalarTraits::zero()); } - if (XCoarseSolve_.is_null()) XCoarseSolve_ = MultiVectorFactory::Build(GatheringMaps_[GatheringMaps_.size()-1],x.getNumVectors()); - else XCoarseSolve_->replaceMap(GatheringMaps_[GatheringMaps_.size()-1]); // The map is replaced in applyCoarseSolve(). If we do not build it from scratch, we should at least replace the map here. This may be important since the maps live on different communicators. - if (YCoarseSolve_.is_null()) YCoarseSolve_ = MultiVectorFactory::Build(GatheringMaps_[GatheringMaps_.size()-1],y.getNumVectors()); + if (XCoarseSolve_.is_null() || XCoarseSolve_->getNumVectors() != x.getNumVectors()) { + XCoarseSolve_ = MultiVectorFactory::Build(GatheringMaps_[GatheringMaps_.size()-1],x.getNumVectors()); + } else { + XCoarseSolve_->replaceMap(GatheringMaps_[GatheringMaps_.size()-1]); // The map is replaced in applyCoarseSolve(). If we do not build it from scratch, we should at least replace the map here. This may be important since the maps live on different communicators. + } + if (YCoarseSolve_.is_null() || YCoarseSolve_->getNumVectors() != y.getNumVectors()) { + YCoarseSolve_ = MultiVectorFactory::Build(GatheringMaps_[GatheringMaps_.size()-1],y.getNumVectors()); + } applyPhiT(*XTmp_,*XCoarseSolve_); applyCoarseSolve(*XCoarseSolve_,*YCoarseSolve_,mode); applyPhi(*YCoarseSolve_,*XTmp_); @@ -192,12 +199,18 @@ namespace FROSch { FROSCH_DETAILTIMER_START_LEVELID(applyCoarseSolveTime,"CoarseOperator::applyCoarseSolve"); if (OnCoarseSolveComm_) { x.replaceMap(CoarseSolveMap_); - if (YTmp_.is_null()) YTmp_ = MultiVectorFactory::Build(CoarseSolveMap_,x.getNumVectors()); - else YTmp_->replaceMap(CoarseSolveMap_); // The map is replaced later in this function. If we do not build it from scratch, we should at least replace the map here. This may be important since the maps live on different communicators. + if (YTmp_.is_null() || YTmp_->getNumVectors() != x.getNumVectors()) { + YTmp_ = MultiVectorFactory::Build(CoarseSolveMap_,x.getNumVectors()); + } else { + YTmp_->replaceMap(CoarseSolveMap_); // The map is replaced later in this function. If we do not build it from scratch, we should at least replace the map here. This may be important since the maps live on different communicators. + } CoarseSolver_->apply(x,*YTmp_,mode); } else { - if (YTmp_.is_null()) YTmp_ = MultiVectorFactory::Build(CoarseSolveMap_,x.getNumVectors()); - else YTmp_->replaceMap(CoarseSolveMap_); // The map is replaced later in this function. If we do not build it from scratch, we should at least replace the map here. This may be important since the maps live on different communicators. + if (YTmp_.is_null() || YTmp_->getNumVectors() != x.getNumVectors()) { + YTmp_ = MultiVectorFactory::Build(CoarseSolveMap_,x.getNumVectors()); + } else { + YTmp_->replaceMap(CoarseSolveMap_); // The map is replaced later in this function. If we do not build it from scratch, we should at least replace the map here. This may be important since the maps live on different communicators. + } } YTmp_->replaceMap(GatheringMaps_[GatheringMaps_.size()-1]); y = *YTmp_; diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_decl.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_decl.hpp index a90668481860..02042c540564 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_decl.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_decl.hpp @@ -154,6 +154,122 @@ namespace FROSch { UN j_out; SCView data_out; }; + + + struct ScaleTag {}; + struct CountNnzTag {}; + struct TotalNnzTag {}; + struct FillNzEntriesTag {}; + template + struct detectLinearDependenciesFunctor + { + using Real = typename Teuchos::ScalarTraits::magnitudeType; + using STS = Kokkos::ArithTraits; + using RTS = Kokkos::ArithTraits; + + UN numRows; + UN numCols; + SCView scale; + localMVBasisType localMVBasis; + + SC tresholdDropping; + indicesView indicesGammaDofsAll; + localRowMapType localRowMap; + localRowMapType localRepeatedMap; + + RowptrType Rowptr; + IndicesType Indices; + ValuesType Values; + + // Constructor for ScaleTag + detectLinearDependenciesFunctor(UN numRows_, UN numCols_, SCView scale_, localMVBasisType localMVBasis_) : + numRows (numRows_), + numCols (numCols_), + scale (scale_), + localMVBasis (localMVBasis_) + {} + + // Constructor for CountNnzTag + detectLinearDependenciesFunctor(UN numRows_, UN numCols_, localMVBasisType localMVBasis_, SC tresholdDropping_, + indicesView indicesGammaDofsAll_, localRowMapType localRowMap_, localRowMapType localRepeatedMap_, + RowptrType Rowptr_) : + numRows (numRows_), + numCols (numCols_), + scale (), + localMVBasis (localMVBasis_), + tresholdDropping (tresholdDropping_), + indicesGammaDofsAll (indicesGammaDofsAll_), + localRowMap (localRowMap_), + localRepeatedMap (localRepeatedMap_), + Rowptr (Rowptr_) + {} + + // Constructor for FillNzEntriesTag + detectLinearDependenciesFunctor(UN numRows_, UN numCols_, SCView scale_, localMVBasisType localMVBasis_, + SC tresholdDropping_, indicesView indicesGammaDofsAll_, + localRowMapType localRowMap_, localRowMapType localRepeatedMap_, + RowptrType Rowptr_, IndicesType Indices_, ValuesType Values_) : + numRows (numRows_), + numCols (numCols_), + scale (scale_), + localMVBasis (localMVBasis_), + tresholdDropping (tresholdDropping_), + indicesGammaDofsAll (indicesGammaDofsAll_), + localRowMap (localRowMap_), + localRepeatedMap (localRepeatedMap_), + Rowptr (Rowptr_), + Indices (Indices_), + Values (Values_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const ScaleTag &, const int j) const { + scale(j) = STS::zero(); + for (UN i = 0; i < numRows; i++) { + scale(j) += localMVBasis(i,j)*localMVBasis(i,j); + } + scale(j) = RTS::one()/RTS::sqrt(STS::abs(scale(j))); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const CountNnzTag &, const int i) const { + LO rowID = indicesGammaDofsAll[i]; + GO iGlobal = localRepeatedMap.getGlobalElement(rowID); + LO iLocal = localRowMap.getLocalElement(iGlobal); + if (iLocal!=-1) { // This should prevent duplicate entries on the interface + for (UN j=0; jtresholdDropping) { + Rowptr(iLocal+1) ++; + } + } + } + } + + + KOKKOS_INLINE_FUNCTION + void operator()(const TotalNnzTag&, const size_t i, UN &lsum) const { + lsum += Rowptr[i]; + } + + KOKKOS_INLINE_FUNCTION + void operator()(const FillNzEntriesTag &, const int i) const { + LO rowID = indicesGammaDofsAll[i]; + GO iGlobal = localRepeatedMap.getGlobalElement(rowID); + LO iLocal = localRowMap.getLocalElement(iGlobal); + if (iLocal!=-1) { // This should prevent duplicate entries on the interface + UN nnz_i = Rowptr(iLocal); + for (UN j=0; jtresholdDropping) { + Indices(nnz_i) = j; //localBasisMap.getGlobalElement(j); + Values(nnz_i) = valueTmp*scale(j); + nnz_i ++; + } + } + } + } + }; #endif protected: diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_def.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_def.hpp index d691bdfa71a2..21e516f854d5 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_HarmonicCoarseOperator_def.hpp @@ -69,7 +69,8 @@ namespace FROSch { ConstXMapPtr repeatedMap; ConstXMatrixPtr repeatedMatrix; if (this->coarseExtractLocalSubdomainMatrix_Symbolic_Done_) { - ExtractLocalSubdomainMatrix_Compute(this->K_.getConst(), + ExtractLocalSubdomainMatrix_Compute(this->coarseScatter_, + this->K_.getConst(), this->coarseSubdomainMatrix_, this->coarseLocalSubdomainMatrix_); repeatedMap = this->coarseSubdomainMatrix_->getRowMap(); @@ -503,39 +504,125 @@ namespace FROSch { { FROSCH_DETAILTIMER_START_LEVELID(detectLinearDependenciesTime,"HarmonicCoarseOperator::detectLinearDependencies"); LOVecPtr linearDependentVectors(AssembledInterfaceCoarseSpace_->getBasisMap()->getLocalNumElements()); //if (this->Verbose_) cout << AssembledInterfaceCoarseSpace_->getAssembledBasis()->getNumVectors() << " " << AssembledInterfaceCoarseSpace_->getAssembledBasis()->getLocalLength() << " " << indicesGammaDofsAll.size() << endl; - if (AssembledInterfaceCoarseSpace_->getAssembledBasis()->getNumVectors()>0 && AssembledInterfaceCoarseSpace_->getAssembledBasis()->getLocalLength()>0) { - //Construct matrix phiGamma - XMatrixPtr phiGamma = MatrixFactory::Build(rowMap,AssembledInterfaceCoarseSpace_->getBasisMap()->getLocalNumElements()); - // Array for scaling the columns of PhiGamma (1/norm(PhiGamma(:,i))) - SCVec scale(AssembledInterfaceCoarseSpace_->getAssembledBasis()->getNumVectors(),0.0); - for (UN i = 0; i < AssembledInterfaceCoarseSpace_->getAssembledBasis()->getNumVectors(); i++) { - ConstSCVecPtr assembledInterfaceCoarseSpaceData = AssembledInterfaceCoarseSpace_->getAssembledBasis()->getData(i); - for (UN j = 0; j < AssembledInterfaceCoarseSpace_->getAssembledBasis()->getLocalLength(); j++) { - scale[i] += assembledInterfaceCoarseSpaceData[j]*assembledInterfaceCoarseSpaceData[j]; + auto asembledBasis = AssembledInterfaceCoarseSpace_->getAssembledBasis(); + auto basisMap = AssembledInterfaceCoarseSpace_->getBasisMap(); + auto basisMapUnique = AssembledInterfaceCoarseSpace_->getBasisMapUnique(); + UN numMVRows = asembledBasis->getLocalLength(); + UN numCols = asembledBasis->getNumVectors(); + if (numMVRows > 0 && numCols > 0) { + + XMatrixPtr phiGamma; +#if defined(HAVE_XPETRA_KOKKOS_REFACTOR) && defined(HAVE_XPETRA_TPETRA) + if (rowMap->lib() == UseTpetra) + { + using crsmat_type = typename Matrix::local_matrix_type; + using graph_type = typename crsmat_type::StaticCrsGraphType; + using rowptr_type = typename graph_type::row_map_type::non_const_type; + using indices_type = typename graph_type::entries_type::non_const_type; + using values_type = typename crsmat_type::values_type::non_const_type; + using local_mv_type = typename MultiVector::dual_view_type::t_dev_const_um; + using local_map_type = typename Map::local_map_type; + + UN numRows = rowMap->getLocalNumElements(); + + auto localRowMap = rowMap->getLocalMap(); + auto localRepeatedMap = repeatedMap->getLocalMap(); + auto localBasisMap = basisMap->getLocalMap(); + + auto localMVBasis = asembledBasis->getDeviceLocalView(Xpetra::Access::ReadOnly); + + // Array for scaling the columns of PhiGamma (1/norm(PhiGamma(:,i))) + using execution_space = typename Map::local_map_type::execution_space; + using SCView = Kokkos::View; + SCView scale(Kokkos::ViewAllocateWithoutInitializing("FROSch::HarmonicCoarseOperator::detectLinearDependencies::scale"), numCols); + detectLinearDependenciesFunctor + scale_functor(numMVRows, numCols, scale, localMVBasis); + Kokkos::RangePolicy policy_scale (0, numCols); + Kokkos::parallel_for( + "FROSch_HarmonicCoarseOperator::detectLinearDependencies::scale", policy_scale, scale_functor); + + // count nnz + rowptr_type Rowptr (Kokkos::ViewAllocateWithoutInitializing("Rowptr"), numRows+1); + Kokkos::deep_copy(Rowptr, 0); + detectLinearDependenciesFunctor + count_functor(numMVRows, numCols, localMVBasis, tresholdDropping, indicesGammaDofsAll, + localRowMap, localRepeatedMap, Rowptr); + Kokkos::RangePolicy policy_count (0, numMVRows); + Kokkos::parallel_for( + "FROSch_HarmonicCoarseOperator::detectLinearDependencies::countNnz", policy_count, count_functor); + Kokkos::fence(); + + // get Nnz + UN nnz = 0; + Kokkos::RangePolicy policy_nnz (0, 1+numRows); + Kokkos::parallel_reduce( + "FROSch_HarmonicCoarseOperator::detectLinearDependencies::reduceNnz", policy_nnz, count_functor, nnz); + Kokkos::fence(); + + // make it into offsets + KokkosKernels::Impl::kk_inclusive_parallel_prefix_sum + (1+numRows, Rowptr); + Kokkos::fence(); + + // allocate local matrix + indices_type Indices (Kokkos::ViewAllocateWithoutInitializing("Indices"), nnz); + values_type Values (Kokkos::ViewAllocateWithoutInitializing("Values"), nnz); + + // fill in all the nz entries + detectLinearDependenciesFunctor + fill_functor(numMVRows, numCols, scale, localMVBasis, tresholdDropping, indicesGammaDofsAll, + localRowMap, localRepeatedMap, Rowptr, Indices, Values); + Kokkos::RangePolicy policy_fill (0, numMVRows); + Kokkos::parallel_for( + "FROSch_HarmonicCoarseOperator::detectLinearDependencies::fillNz", policy_fill, fill_functor); + Kokkos::fence(); + + Teuchos::RCP params = Teuchos::rcp (new Teuchos::ParameterList()); + params->set("sorted", false); + params->set("No Nonlocal Changes", true); + + graph_type crsgraph (Indices, Rowptr); + crsmat_type crsmat = crsmat_type ("CrsMatrix", numRows, Values, crsgraph); + + phiGamma = MatrixFactory::Build(crsmat, rowMap, basisMap, basisMapUnique, rangeMap, + params); + } else +#endif + { + // Array for scaling the columns of PhiGamma (1/norm(PhiGamma(:,i))) + SCVec scale(numCols, 0.0); + for (UN j = 0; j < numCols; j++) { + ConstSCVecPtr assembledInterfaceCoarseSpaceData = asembledBasis->getData(j); + for (UN i = 0; i < numMVRows; i++) { + scale[j] += assembledInterfaceCoarseSpaceData[i]*assembledInterfaceCoarseSpaceData[i]; + } + scale[j] = 1.0/sqrt(scale[j]); } - scale[i] = 1.0/sqrt(scale[i]); - } - LO iD; - SC valueTmp; - for (UN i=0; igetAssembledBasis()->getLocalLength(); i++) { - GOVec indices; - SCVec values; - iD = repeatedMap->getGlobalElement(indicesGammaDofsAll[i]); - if (rowMap->getLocalElement(iD)!=-1) { // This should prevent duplicate entries on the interface - for (UN j=0; jgetAssembledBasis()->getNumVectors(); j++) { - valueTmp=AssembledInterfaceCoarseSpace_->getAssembledBasis()->getData(j)[i]; - if (fabs(valueTmp)>tresholdDropping) { - indices.push_back(AssembledInterfaceCoarseSpace_->getBasisMap()->getGlobalElement(j)); - values.push_back(valueTmp*scale[j]); + //Construct matrix phiGamma + phiGamma = MatrixFactory::Build(rowMap,basisMap->getLocalNumElements()); + LO iD; + SC valueTmp; + for (UN i=0; igetLocalLength(); i++) { + iD = repeatedMap->getGlobalElement(indicesGammaDofsAll[i]); + if (rowMap->getLocalElement(iD)!=-1) { // This should prevent duplicate entries on the interface + GOVec indices; + SCVec values; + for (UN j=0; jgetNumVectors(); j++) { + valueTmp=asembledBasis->getData(j)[i]; + if (fabs(valueTmp)>tresholdDropping) { + indices.push_back(basisMap->getGlobalElement(j)); + values.push_back(valueTmp*scale[j]); + } } + phiGamma->insertGlobalValues(iD,indices(),values()); } - - phiGamma->insertGlobalValues(iD,indices(),values()); } + RCP fillCompleteParams(new ParameterList); + fillCompleteParams->set("No Nonlocal Changes", true); + phiGamma->fillComplete(basisMapUnique,rangeMap,fillCompleteParams); } - phiGamma->fillComplete(AssembledInterfaceCoarseSpace_->getBasisMapUnique(),rangeMap); //Compute Phi^T * Phi RCP fancy = fancyOStream(rcpFromRef(cout)); @@ -756,10 +843,14 @@ namespace FROSch { // Jetzt der solver für kII if (indicesIDofsAll.size()>0) { - ExtensionSolver_ = SolverFactory::Build(kII, - sublist(this->ParameterList_,"ExtensionSolver"), - string("ExtensionSolver (Level ") + to_string(this->LevelID_) + string(")")); - ExtensionSolver_->initialize(); + if (this->coarseExtractLocalSubdomainMatrix_Symbolic_Done_) { + ExtensionSolver_->updateMatrix(kII, true); + } else { + ExtensionSolver_ = SolverFactory::Build(kII, + sublist(this->ParameterList_,"ExtensionSolver"), + string("ExtensionSolver (Level ") + to_string(this->LevelID_) + string(")")); + ExtensionSolver_->initialize(); + } ExtensionSolver_->compute(); ExtensionSolver_->apply(*mVtmp,*mVPhiI); } @@ -803,7 +894,7 @@ namespace FROSch { using GOIndView = Kokkos::View; UN numIndices = indicesIDofsAll.size(); GOIndViewHost indicesIDofsAllHostData(indicesIDofsAll.getRawPtr(), numIndices); - GOIndView indicesIDofsAllData ("indicesIDofsAllData", numIndices); + GOIndView indicesIDofsAllData (Kokkos::ViewAllocateWithoutInitializing("indicesIDofsAllData"), numIndices); Kokkos::deep_copy(indicesIDofsAllData, indicesIDofsAllHostData); using xTMVector = Xpetra::TpetraMultiVector; @@ -1080,8 +1171,8 @@ namespace FROSch { // buid sudomain matrix this->coarseSubdomainMatrix_ = MatrixFactory::Build(repeatedMap, repeatedMap, this->K_->getGlobalMaxNumRowEntries()); - RCP > scatter = ImportFactory::Build(this->K_->getRowMap(), repeatedMap); - this->coarseSubdomainMatrix_->doImport(*(this->K_.getConst()), *scatter,ADD); + this->coarseScatter_ = ImportFactory::Build(this->K_->getRowMap(), repeatedMap); + this->coarseSubdomainMatrix_->doImport(*(this->K_.getConst()), *(this->coarseScatter_),ADD); // build local subdomain matrix RCP > SerialComm = rcp(new MpiComm(MPI_COMM_SELF)); @@ -1094,6 +1185,37 @@ namespace FROSch { // turn flag on this->coarseExtractLocalSubdomainMatrix_Symbolic_Done_ = true; + + + // ------------------------------------------------------- + // symbolic for computeExtensions to factor kII + GOVec indicesGammaDofsAll(0); + GOVec indicesIDofsAll(0); + LO tmp = 0; + + for (UN i=0; icoarseLocalSubdomainMatrix_; + BuildSubmatrices(repeatedMatrix.getConst(),indicesIDofsAll(),kII,kIGamma,kGammaI,kGammaGamma); + + // perform symbolic on kII + ExtensionSolver_ = SolverFactory::Build(kII, + sublist(this->ParameterList_,"ExtensionSolver"), + string("ExtensionSolver (Level ") + to_string(this->LevelID_) + string(")")); + ExtensionSolver_->initialize(); } } diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_OverlappingOperator_def.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_OverlappingOperator_def.hpp index 7087dc8c0c5d..b194f42786d5 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_OverlappingOperator_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_OverlappingOperator_def.hpp @@ -83,13 +83,15 @@ namespace FROSch { { FROSCH_TIMER_START_LEVELID(applyTime,"OverlappingOperator::apply"); FROSCH_ASSERT(this->IsComputed_,"FROSch::OverlappingOperator: OverlappingOperator has to be computed before calling apply()"); - if (XTmp_.is_null()) XTmp_ = MultiVectorFactory::Build(x.getMap(),x.getNumVectors()); + if (XTmp_.is_null() || XTmp_->getNumVectors() != x.getNumVectors()) { + XTmp_ = MultiVectorFactory::Build(x.getMap(),x.getNumVectors()); + } *XTmp_ = x; if (!usePreconditionerOnly && mode == NO_TRANS) { this->K_->apply(x,*XTmp_,mode,ScalarTraits::one(),ScalarTraits::zero()); } // AH 11/28/2018: For Epetra, XOverlap_ will only have a view to the values of XOverlapTmp_. Therefore, xOverlapTmp should not be deleted before XOverlap_ is used. - if (YOverlap_.is_null()) { + if (YOverlap_.is_null() || YOverlap_->getNumVectors() != x.getNumVectors()) { YOverlap_ = MultiVectorFactory::Build(OverlappingMatrix_->getDomainMap(),x.getNumVectors()); } else { YOverlap_->replaceMap(OverlappingMatrix_->getDomainMap()); @@ -97,7 +99,9 @@ namespace FROSch { // AH 11/28/2018: replaceMap does not update the GlobalNumRows. Therefore, we have to create a new MultiVector on the serial Communicator. In Epetra, we can prevent to copy the MultiVector. if (XTmp_->getMap()->lib() == UseEpetra) { #ifdef HAVE_SHYLU_DDFROSCH_EPETRA - if (XOverlapTmp_.is_null()) XOverlapTmp_ = MultiVectorFactory::Build(OverlappingMap_,x.getNumVectors()); + if (XOverlapTmp_.is_null() || XOverlap_->getNumVectors() != x.getNumVectors()) { + XOverlapTmp_ = MultiVectorFactory::Build(OverlappingMap_,x.getNumVectors()); + } XOverlapTmp_->doImport(*XTmp_,*Scatter_,INSERT); const RCP > xEpetraMultiVectorXOverlapTmp = rcp_dynamic_cast >(XOverlapTmp_); RCP epetraMultiVectorXOverlapTmp = xEpetraMultiVectorXOverlapTmp->getEpetra_MultiVector(); diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_SchwarzOperator_decl.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_SchwarzOperator_decl.hpp index 7edba1e9ee1a..1664ce675f6c 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_SchwarzOperator_decl.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_SchwarzOperator_decl.hpp @@ -236,6 +236,7 @@ namespace FROSch { bool ExtractLocalSubdomainMatrix_Symbolic_Done_ = false; XMatrixPtr subdomainMatrix_; XMatrixPtr localSubdomainMatrix_; + XImportPtr subdomainScatter_; bool Verbose_ = false; diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_SumOperator_def.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_SumOperator_def.hpp index f7435adddf65..623facb65205 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_SumOperator_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzOperators/FROSch_SumOperator_def.hpp @@ -118,7 +118,9 @@ namespace FROSch { { FROSCH_TIMER_START_LEVELID(applyTime,"SumOperator::apply"); if (OperatorVector_.size()>0) { - if (XTmp_.is_null()) XTmp_ = MultiVectorFactory::Build(x.getMap(),x.getNumVectors()); + if (XTmp_.is_null() || XTmp_->getNumVectors() != x.getNumVectors()) { + XTmp_ = MultiVectorFactory::Build(x.getMap(),x.getNumVectors()); + } *XTmp_ = x; // Das brauche ich für den Fall das x=y UN itmp = 0; for (UN i=0; i::GOVecPtr; public: + using AlgebraicOverlappingPreconditioner::initialize; GDSWPreconditioner(ConstXMatrixPtr k, ParameterListPtr parameterList); diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzPreconditioners/FROSch_RGDSWPreconditioner_decl.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzPreconditioners/FROSch_RGDSWPreconditioner_decl.hpp index c49296219d02..9bfa54e329c6 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzPreconditioners/FROSch_RGDSWPreconditioner_decl.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzPreconditioners/FROSch_RGDSWPreconditioner_decl.hpp @@ -81,6 +81,7 @@ namespace FROSch { using GOVecPtr = typename SchwarzPreconditioner::GOVecPtr; public: + using AlgebraicOverlappingPreconditioner::initialize; RGDSWPreconditioner(ConstXMatrixPtr k, ParameterListPtr parameterList); diff --git a/packages/shylu/shylu_dd/frosch/src/SchwarzPreconditioners/FROSch_TwoLevelPreconditioner_decl.hpp b/packages/shylu/shylu_dd/frosch/src/SchwarzPreconditioners/FROSch_TwoLevelPreconditioner_decl.hpp index 5ab729575920..817d5e94334a 100644 --- a/packages/shylu/shylu_dd/frosch/src/SchwarzPreconditioners/FROSch_TwoLevelPreconditioner_decl.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SchwarzPreconditioners/FROSch_TwoLevelPreconditioner_decl.hpp @@ -83,6 +83,7 @@ namespace FROSch { using GOVecPtr = typename SchwarzPreconditioner::GOVecPtr; public: + using OneLevelPreconditioner::initialize; TwoLevelPreconditioner(ConstXMatrixPtr k, ParameterListPtr parameterList); diff --git a/packages/shylu/shylu_dd/frosch/src/SolverInterfaces/FROSch_Amesos2SolverTpetra_def.hpp b/packages/shylu/shylu_dd/frosch/src/SolverInterfaces/FROSch_Amesos2SolverTpetra_def.hpp index 925865f8d8c2..b828d0dcaf2b 100644 --- a/packages/shylu/shylu_dd/frosch/src/SolverInterfaces/FROSch_Amesos2SolverTpetra_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/SolverInterfaces/FROSch_Amesos2SolverTpetra_def.hpp @@ -91,7 +91,9 @@ namespace FROSch { FROSCH_ASSERT(xTpetraMultiVectorX,"FROSch::Amesos2SolverTpetra: dynamic_cast failed."); TMultiVectorPtr tpetraMultiVectorX = xTpetraMultiVectorX->getTpetra_MultiVector(); - if (Y_.is_null()) Y_ = XMultiVectorFactory::Build(y.getMap(),x.getNumVectors()); + if (Y_.is_null() || Y_->getNumVectors() != x.getNumVectors()) { + Y_ = XMultiVectorFactory::Build(y.getMap(),x.getNumVectors()); + } const TpetraMultiVector * xTpetraMultiVectorY = dynamic_cast *>(Y_.get()); FROSCH_ASSERT(xTpetraMultiVectorY,"FROSch::Amesos2SolverTpetra: dynamic_cast failed."); TMultiVectorPtr tpetraMultiVectorY = xTpetraMultiVectorY->getTpetra_MultiVector(); diff --git a/packages/shylu/shylu_dd/frosch/src/Tools/FROSch_ExtractSubmatrices_decl.hpp b/packages/shylu/shylu_dd/frosch/src/Tools/FROSch_ExtractSubmatrices_decl.hpp index 622dc06b7ef6..2e190a686e30 100644 --- a/packages/shylu/shylu_dd/frosch/src/Tools/FROSch_ExtractSubmatrices_decl.hpp +++ b/packages/shylu/shylu_dd/frosch/src/Tools/FROSch_ExtractSubmatrices_decl.hpp @@ -72,6 +72,12 @@ namespace FROSch { void ExtractLocalSubdomainMatrix_Compute(RCP > globalMatrix, RCP< Matrix > subdomainMatrix, RCP< Matrix > repeatedMatrix); + + template + void ExtractLocalSubdomainMatrix_Compute(RCP< Import > scatter, + RCP > globalMatrix, + RCP< Matrix > subdomainMatrix, + RCP< Matrix > repeatedMatrix); // ----------------------------------------------------------- // template diff --git a/packages/shylu/shylu_dd/frosch/src/Tools/FROSch_ExtractSubmatrices_def.hpp b/packages/shylu/shylu_dd/frosch/src/Tools/FROSch_ExtractSubmatrices_def.hpp index 6ede2ec78608..0a14388697c2 100644 --- a/packages/shylu/shylu_dd/frosch/src/Tools/FROSch_ExtractSubmatrices_def.hpp +++ b/packages/shylu/shylu_dd/frosch/src/Tools/FROSch_ExtractSubmatrices_def.hpp @@ -122,12 +122,21 @@ namespace FROSch { void ExtractLocalSubdomainMatrix_Compute(RCP > globalMatrix, RCP< Matrix > subdomainMatrix, RCP< Matrix > localSubdomainMatrix) + { + RCP > scatter = ImportFactory::Build(globalMatrix->getRowMap(), subdomainMatrix->getRowMap()); + ExtractLocalSubdomainMatrix_Compute(scatter, globalMatrix, subdomainMatrix, localSubdomainMatrix); + } + + template + void ExtractLocalSubdomainMatrix_Compute(RCP< Import > scatter, + RCP > globalMatrix, + RCP< Matrix > subdomainMatrix, + RCP< Matrix > localSubdomainMatrix) { FROSCH_DETAILTIMER_START(extractLocalSubdomainMatrixTime_compute, "ExtractLocalSubdomainMatrix_Compute"); const SC zero = ScalarTraits::zero(); auto subdomainRowMap = subdomainMatrix->getRowMap(); - RCP > scatter = ImportFactory::Build(globalMatrix->getRowMap(),subdomainRowMap); subdomainMatrix->setAllToScalar(zero); subdomainMatrix->resumeFill(); subdomainMatrix->doImport(*globalMatrix, *scatter, ADD); @@ -314,10 +323,10 @@ namespace FROSch { UN numRowsI = indI.size(); UN numRowsJ = indJ.size(); - rowptr_type RowptrII ("RowptrII", numRowsI+1); - rowptr_type RowptrIJ ("RowptrIJ", numRowsI+1); - rowptr_type RowptrJI ("RowptrJI", numRowsJ+1); - rowptr_type RowptrJJ ("RowptrJJ", numRowsJ+1); + rowptr_type RowptrII (Kokkos::ViewAllocateWithoutInitializing("RowptrII"), numRowsI+1); + rowptr_type RowptrIJ (Kokkos::ViewAllocateWithoutInitializing("RowptrIJ"), numRowsI+1); + rowptr_type RowptrJI (Kokkos::ViewAllocateWithoutInitializing("RowptrJI"), numRowsJ+1); + rowptr_type RowptrJJ (Kokkos::ViewAllocateWithoutInitializing("RowptrJJ"), numRowsJ+1); // count nnz on each blocks Kokkos::deep_copy(RowptrII, 0); @@ -399,17 +408,17 @@ namespace FROSch { #endif // allocate kII block - indices_type IndicesII ("IndicesII", nnzII); - values_type ValuesII ("ValuesII", nnzII); + indices_type IndicesII (Kokkos::ViewAllocateWithoutInitializing("IndicesII"), nnzII); + values_type ValuesII (Kokkos::ViewAllocateWithoutInitializing("ValuesII"), nnzII); // allocate kIJ block - indices_type IndicesIJ ("IndicesIJ", nnzIJ); - values_type ValuesIJ ("ValuesIJ", nnzIJ); + indices_type IndicesIJ (Kokkos::ViewAllocateWithoutInitializing("IndicesIJ"), nnzIJ); + values_type ValuesIJ (Kokkos::ViewAllocateWithoutInitializing("ValuesIJ"), nnzIJ); // allocate kJI block - indices_type IndicesJI ("IndicesJI", nnzJI); - values_type ValuesJI ("ValuesJI", nnzJI); + indices_type IndicesJI (Kokkos::ViewAllocateWithoutInitializing("IndicesJI"), nnzJI); + values_type ValuesJI (Kokkos::ViewAllocateWithoutInitializing("ValuesJI"), nnzJI); // allocate kJJ block - indices_type IndicesJJ ("IndicesJJ", nnzJJ); - values_type ValuesJJ ("ValuesJJ", nnzJJ); + indices_type IndicesJJ (Kokkos::ViewAllocateWithoutInitializing("IndicesJJ"), nnzJJ); + values_type ValuesJJ (Kokkos::ViewAllocateWithoutInitializing("ValuesJJ"), nnzJJ); // fill in all the blocks Kokkos::parallel_for(