diff --git a/.github/workflows/AT2.yml b/.github/workflows/AT2.yml index b232051eddf2..c085620db33a 100644 --- a/.github/workflows/AT2.yml +++ b/.github/workflows/AT2.yml @@ -60,7 +60,7 @@ jobs: mkdir -p /home/Trilinos/src/Trilinos mkdir -p /home/Trilinos/build - name: Clone trilinos - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 - name: Repo status @@ -151,7 +151,7 @@ jobs: mkdir -p /home/Trilinos/src/Trilinos mkdir -p /home/Trilinos/build - name: Clone trilinos - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 - name: Repo status @@ -242,7 +242,7 @@ jobs: mkdir -p /home/Trilinos/src/Trilinos mkdir -p /home/Trilinos/build - name: Clone trilinos - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 - name: Repo status @@ -334,7 +334,7 @@ jobs: mkdir -p /home/Trilinos/src/Trilinos mkdir -p /home/Trilinos/build - name: Clone trilinos - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 - name: Repo status diff --git a/.github/workflows/clang_format.yml b/.github/workflows/clang_format.yml index a3fd0968ad75..d0b7392226a0 100644 --- a/.github/workflows/clang_format.yml +++ b/.github/workflows/clang_format.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - uses: DoozyX/clang-format-lint-action@c71d0bf4e21876ebec3e5647491186f8797fde31 # v0.18.2 with: source: './packages/muelu ./packages/tempus ./packages/teko ./packages/xpetra' diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 56bbf091adaf..f280dd632cef 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -1,68 +1,51 @@ -# For most projects, this workflow file will not need changing; you simply need -# to commit it to your repository. -# -# You may wish to alter this file to override the set of languages analyzed, -# or to provide custom queries or build logic. -# -# ******** NOTE ******** -# We have attempted to detect the languages in your repository. Please check -# the `language` matrix defined below to confirm you have the correct set of -# supported CodeQL languages. -# -name: "CodeQL: Linear Solvers" +name: "CodeQL Security Scan" on: - #push: - # branches: [ "muelu-sync-workflow" ] pull_request: - branches: [ "develop" ] + branches: + - develop + types: + - opened + - synchronize schedule: - cron: '41 23 * * 2' +# Cancels any in progress workflows associated with this PR +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + permissions: contents: read jobs: analyze: name: Analyze (${{ matrix.language }}) - # Runner size impacts CodeQL analysis time. To learn more, please see: - # - https://gh.io/recommended-hardware-resources-for-running-codeql - # - https://gh.io/supported-runners-and-hardware-resources - # - https://gh.io/using-larger-runners (GitHub.com only) - # Consider using larger runners or machines with greater resources for possible analysis time improvements. - runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} - timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} + runs-on: [self-hosted, gcc-10.3.0_openmpi-4.1.6] + if: ${{ github.event.action == 'synchronize' || github.event.action == 'opened' }} + permissions: # required for all workflows security-events: write - # only required for workflows in private repositories actions: read contents: read - strategy: fail-fast: false matrix: include: - language: c-cpp build-mode: manual - #- language: python - # build-mode: none - # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' - # Use `c-cpp` to analyze code written in C, C++ or both - # Use 'java-kotlin' to analyze code written in Java, Kotlin or both - # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both - # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, - # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. - # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how - # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages + defaults: + run: + shell: bash -l {0} + steps: - name: Checkout repository - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@f779452ac5af1c261dce0346a8f964149f49322b # v3.26.13 + uses: github/codeql-action/init@662472033e021d55d94146f66f6058822b0b39fd # v3.27.0 with: languages: ${{ matrix.language }} build-mode: ${{ matrix.build-mode }} @@ -70,21 +53,61 @@ jobs: query-filters: - exclude: tags: cpp/integer-multiplication-cast-to-long - + + - name: Print environment + env: + GITHUB_CONTEXT: ${{ toJson(github) }} + run: | + env + + - name: Module list + run: | + module list + printenv PATH + - if: matrix.build-mode == 'manual' - name: Configure Trilinos + name: Get dependencies + working-directory: ./packages/framework run: | - mkdir -p trilinos_build - cd trilinos_build - cmake -G 'Unix Makefiles' -DTrilinos_ENABLE_TESTS=OFF -DTrilinos_ENABLE_Epetra=OFF -DTrilinos_ENABLE_AztecOO=OFF -DTrilinos_ENABLE_Ifpack=OFF -DTrilinos_ENABLE_ML=OFF -D Trilinos_ENABLE_Triutils=OFF -DTrilinos_ENABLE_Tpetra=ON -DTrilinos_ENABLE_MueLu=ON -DTrilinos_ENABLE_Krino=OFF .. - + ./get_dependencies.sh --container + - if: matrix.build-mode == 'manual' - name: Build Trilinos + name: Generate CMake fragments run: | - cd trilinos_build - make -j 2 - + git fetch origin ${GITHUB_BASE_REF} + + mkdir -p trilinos_build && cd trilinos_build + + source ${GITHUB_WORKSPACE}/packages/framework/GenConfig/gen-config.sh --force --cmake-fragment genconfig_fragment.cmake rhel8_gcc-openmpi_debug_shared_no-kokkos-arch_no-asan_complex_no-fpic_mpi_no-pt_no-rdc_no-uvm_deprecated-on_no-package-enables + ${GITHUB_WORKSPACE}/commonTools/framework/get-changed-trilinos-packages.sh origin/${GITHUB_BASE_REF} HEAD package_enables.cmake package_subprojects.cmake + + - if: matrix.build-mode == 'manual' + name: Configure and build Trilinos + working-directory: ./trilinos_build + run: | + cmake -C genconfig_fragment.cmake -C package_enables.cmake \ + -DTrilinos_ENABLE_ALL_FORWARD_DEP_PACKAGES=OFF \ + -DTrilinos_ENABLE_ALL_OPTIONAL_PACKAGES=OFF \ + -DTrilinos_ENABLE_SECONDARY_TESTED_CODE=OFF \ + -DTrilinos_ENABLE_Amesos=OFF \ + -DTrilinos_ENABLE_AztecOO=OFF \ + -DTrilinos_ENABLE_Epetra=OFF \ + -DTrilinos_ENABLE_EpetraExt=OFF \ + -DTrilinos_ENABLE_Ifpack=OFF \ + -DTrilinos_ENABLE_Intrepid=OFF \ + -DTrilinos_ENABLE_Isorropia=OFF \ + -DTrilinos_ENABLE_ML=OFF \ + -DTrilinos_ENABLE_NewPackage=OFF \ + -DTrilinos_ENABLE_Pliris=OFF \ + -DTrilinos_ENABLE_PyTrilinos=OFF \ + -DTrilinos_ENABLE_ShyLU_DDCore=OFF \ + -DTrilinos_ENABLE_ThyraEpetraAdapters=OFF \ + -DTrilinos_ENABLE_ThyraEpetraExtAdapters=OFF \ + -DTrilinos_ENABLE_Triutils=OFF .. + + ninja -j 16 + - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@f779452ac5af1c261dce0346a8f964149f49322b # v3.26.13 + uses: github/codeql-action/analyze@662472033e021d55d94146f66f6058822b0b39fd # v3.27.0 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/dependency-review.yml b/.github/workflows/dependency-review.yml index 7b0990bcf5ca..955b3b3fb2d0 100644 --- a/.github/workflows/dependency-review.yml +++ b/.github/workflows/dependency-review.yml @@ -22,6 +22,6 @@ jobs: egress-policy: audit - name: 'Checkout Repository' - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - name: 'Dependency Review' - uses: actions/dependency-review-action@5a2ce3f5b92ee19cbb1541a4984c76d921601d7c # v4.3.4 + uses: actions/dependency-review-action@4081bf99e2866ebe428fc0477b69eb4fcda7220a # v4.4.0 diff --git a/.github/workflows/detect-git-lfs.yml b/.github/workflows/detect-git-lfs.yml index ebe778088863..68595577ec7c 100644 --- a/.github/workflows/detect-git-lfs.yml +++ b/.github/workflows/detect-git-lfs.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Check out code - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 diff --git a/.github/workflows/detect-mpi-comm-world.yml b/.github/workflows/detect-mpi-comm-world.yml index 1fd6790c8c86..e85d71db2f6a 100644 --- a/.github/workflows/detect-mpi-comm-world.yml +++ b/.github/workflows/detect-mpi-comm-world.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Check out code - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 diff --git a/.github/workflows/per-commit.yml b/.github/workflows/per-commit.yml index 3f619a7dbbc0..80dfc8b94008 100644 --- a/.github/workflows/per-commit.yml +++ b/.github/workflows/per-commit.yml @@ -12,7 +12,7 @@ jobs: steps: - name: Check out code - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 0 diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index c648a7e9b626..1ac917d3af8a 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -31,7 +31,7 @@ jobs: steps: - name: "Checkout code" - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: persist-credentials: false @@ -66,6 +66,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@f779452ac5af1c261dce0346a8f964149f49322b # v3.26.13 + uses: github/codeql-action/upload-sarif@662472033e021d55d94146f66f6058822b0b39fd # v3.27.0 with: sarif_file: results.sarif diff --git a/.github/workflows/spack.yml b/.github/workflows/spack.yml index 59976c1d9b3e..3c3c01b75849 100644 --- a/.github/workflows/spack.yml +++ b/.github/workflows/spack.yml @@ -24,7 +24,7 @@ jobs: runs-on: [self-hosted, gcc-10.3.0_openmpi-4.1.6] steps: - name: Clone Trilinos - uses: actions/checkout@eef61447b9ff4aafe5dcd4e0bbf5d482be7e7871 # v4.2.1 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: fetch-depth: 1 - name: Spack build diff --git a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp index 0b7e1d89d800..814450ffa710 100644 --- a/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp +++ b/packages/muelu/src/Graph/MatrixTransformation/MueLu_CoalesceDropFactory_def.hpp @@ -25,6 +25,8 @@ #include +#include +#include #include "MueLu_CoalesceDropFactory_decl.hpp" #include "MueLu_AmalgamationFactory.hpp" @@ -79,6 +81,11 @@ struct DropTol { }; } // namespace Details +enum decisionAlgoType { defaultAlgo, + unscaled_cut, + scaled_cut, + scaled_cut_symmetric }; + template RCP CoalesceDropFactory::GetValidParameterList() const { RCP validParamList = rcp(new ParameterList()); @@ -314,11 +321,6 @@ void CoalesceDropFactory::Build(Level #endif //////////////////////////////////////////////////// - enum decisionAlgoType { defaultAlgo, - unscaled_cut, - scaled_cut, - scaled_cut_symmetric }; - decisionAlgoType distanceLaplacianAlgo = defaultAlgo; decisionAlgoType classicalAlgo = defaultAlgo; if (algo == "distance laplacian") { @@ -452,8 +454,10 @@ void CoalesceDropFactory::Build(Level GetOStream(Statistics1) << "Calculating max block off-diagonal" << std::endl; } } else { - ghostedDiag = MueLu::Utilities::GetMatrixOverlappedDiagonal(*A); - ghostedDiagVals = ghostedDiag->getData(0); + ghostedDiag = MueLu::Utilities::GetMatrixOverlappedDiagonal(*A); + if (classicalAlgo == defaultAlgo) { + ghostedDiagVals = ghostedDiag->getData(0); + } } auto boundaryNodes = MueLu::Utilities::DetectDirichletRows_kokkos_host(*A, dirichletThreshold); if (rowSumTol > 0.) { @@ -474,14 +478,15 @@ void CoalesceDropFactory::Build(Level LO realnnz = 0; rows(0) = 0; - for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { - size_t nnz = A->getNumEntriesInLocalRow(row); - bool rowIsDirichlet = boundaryNodes[row]; - ArrayView indices; - ArrayView vals; - A->getLocalRowView(row, indices, vals); + if (classicalAlgo == defaultAlgo) { + SubFactoryMonitor m1(*this, "Classical RS/SA", currentLevel); + for (LO row = 0; row < Teuchos::as(A->getRowMap()->getLocalNumElements()); ++row) { + size_t nnz = A->getNumEntriesInLocalRow(row); + bool rowIsDirichlet = boundaryNodes[row]; + ArrayView indices; + ArrayView vals; + A->getLocalRowView(row, indices, vals); - if (classicalAlgo == defaultAlgo) { // FIXME the current predrop function uses the following // FIXME if(std::abs(vals[k]) > std::abs(threshold_) || grow == gcid ) // FIXME but the threshold doesn't take into account the rows' diagonal entries @@ -540,109 +545,183 @@ void CoalesceDropFactory::Build(Level } rows(row + 1) = realnnz; } + } // end for row + } else { + /* Cut Algorithm */ + SubFactoryMonitor m1(*this, "Cut Drop", currentLevel); + using ExecSpace = typename Node::execution_space; + using TeamPol = Kokkos::TeamPolicy; + using TeamMem = typename TeamPol::member_type; + using ATS = Kokkos::ArithTraits; + using impl_scalar_type = typename ATS::val_type; + using implATS = Kokkos::ArithTraits; + + // move from host to device + auto ghostedDiagValsView = Kokkos::subview(ghostedDiag->getDeviceLocalView(Xpetra::Access::ReadOnly), Kokkos::ALL(), 0); + auto thresholdKokkos = static_cast(threshold); + auto realThresholdKokkos = implATS::magnitude(thresholdKokkos); + auto columnsDevice = Kokkos::create_mirror_view(ExecSpace(), columns); + + auto A_device = A->getLocalMatrixDevice(); + RCP graph = rcp(new LWGraph(A->getCrsGraph(), "graph of A")); + RCP importer = A->getCrsGraph()->getImporter(); + RCP boundaryNodesVector = Xpetra::VectorFactory::Build(graph->GetDomainMap()); + RCP boundaryColumnVector; + for (size_t i = 0; i < graph->GetNodeNumVertices(); i++) { + boundaryNodesVector->getDataNonConst(0)[i] = boundaryNodes[i]; + } + if (!importer.is_null()) { + boundaryColumnVector = Xpetra::VectorFactory::Build(graph->GetImportMap()); + boundaryColumnVector->doImport(*boundaryNodesVector, *importer, Xpetra::INSERT); } else { - /* Cut Algorithm */ - // CMS - using DropTol = Details::DropTol; - std::vector drop_vec; - drop_vec.reserve(nnz); - const real_type zero = Teuchos::ScalarTraits::zero(); - const real_type one = Teuchos::ScalarTraits::one(); - LO rownnz = 0; - // NOTE: This probably needs to be fixed for rowsum - - // find magnitudes - for (LO colID = 0; colID < (LO)nnz; colID++) { - LO col = indices[colID]; - if (row == col) { - drop_vec.emplace_back(zero, one, colID, false); - continue; - } + boundaryColumnVector = boundaryNodesVector; + } + auto boundaryColumn = boundaryColumnVector->getDeviceLocalView(Xpetra::Access::ReadOnly); + auto boundary = Kokkos::subview(boundaryColumn, Kokkos::ALL(), 0); - // Don't aggregate boundaries - if (boundaryNodes[colID]) continue; - typename STS::magnitudeType aiiajj = STS::magnitude(threshold * threshold * ghostedDiagVals[col] * ghostedDiagVals[row]); // eps^2*|a_ii|*|a_jj| - typename STS::magnitudeType aij = STS::magnitude(vals[colID] * vals[colID]); // |a_ij|^2 - drop_vec.emplace_back(aij, aiiajj, colID, false); - } + Kokkos::View rownnzView("rownnzView", A_device.numRows()); + auto drop_views = Kokkos::View("drop_views", A_device.nnz()); + auto index_views = Kokkos::View("index_views", A_device.nnz()); - const size_t n = drop_vec.size(); + Kokkos::parallel_reduce( + "classical_cut", TeamPol(A_device.numRows(), Kokkos::AUTO), KOKKOS_LAMBDA(const TeamMem& teamMember, LO& globalnnz, GO& totalDropped) { + LO row = teamMember.league_rank(); + auto rowView = A_device.rowConst(row); + size_t nnz = rowView.length; - if (classicalAlgo == unscaled_cut) { - std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) { - return a.val > b.val; - }); + auto drop_view = Kokkos::subview(drop_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row + 1))); + auto index_view = Kokkos::subview(index_views, Kokkos::make_pair(A_device.graph.row_map(row), A_device.graph.row_map(row + 1))); - bool drop = false; - for (size_t i = 1; i < n; ++i) { - if (!drop) { - auto const& x = drop_vec[i - 1]; - auto const& y = drop_vec[i]; - auto a = x.val; - auto b = y.val; - if (a > realThreshold * b) { - drop = true; -#ifdef HAVE_MUELU_DEBUG - if (distanceLaplacianCutVerbose) { - std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl; - } -#endif + // find magnitudes + Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, (LO)nnz), [&](const LO colID) { + index_view(colID) = colID; + LO col = rowView.colidx(colID); + // ignore diagonals for now, they are checked again later + // Don't aggregate boundaries + if (row == col || boundary(col)) { + drop_view(colID) = true; + } else { + drop_view(colID) = false; } - } - drop_vec[i].drop = drop; - } - } else if (classicalAlgo == scaled_cut) { - std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) { - return a.val / a.diag > b.val / b.diag; - }); - bool drop = false; - // printf("[%d] Scaled Cut: ",(int)row); - // printf("%3d(%4s) ",indices[drop_vec[0].col],"keep"); - for (size_t i = 1; i < n; ++i) { - if (!drop) { - auto const& x = drop_vec[i - 1]; - auto const& y = drop_vec[i]; - auto a = x.val / x.diag; - auto b = y.val / y.diag; - if (a > realThreshold * b) { - drop = true; + }); -#ifdef HAVE_MUELU_DEBUG - if (distanceLaplacianCutVerbose) { - std::cout << "DJS: KEEP, N, ROW: " << i + 1 << ", " << n << ", " << row << std::endl; + size_t dropStart = nnz; + if (classicalAlgo == unscaled_cut) { + // push diagonals and boundaries to the right, sort everything else by aij on the left + Kokkos::Experimental::sort_team(teamMember, index_view, [=](size_t& x, size_t& y) -> bool { + if (drop_view(x) || drop_view(y)) { + return drop_view(x) < drop_view(y); + } else { + auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x)); + auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y)); + return x_aij > y_aij; } -#endif - } - // printf("%3d(%4s) ",indices[drop_vec[i].col],drop?"drop":"keep"); + }); + + // find index where dropping starts + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](size_t i, size_t& min) { + auto const& x = index_view(i - 1); + auto const& y = index_view(i); + typename implATS::magnitudeType x_aij = 0; + typename implATS::magnitudeType y_aij = 0; + if (!drop_view(x)) { + x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x)); + } + if (!drop_view(y)) { + y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y)); + } + + if (realThresholdKokkos * realThresholdKokkos * x_aij > y_aij) { + if (i < min) { + min = i; + } + } + }, + Kokkos::Min(dropStart)); + } else if (classicalAlgo == scaled_cut) { + // push diagonals and boundaries to the right, sort everything else by aij/aiiajj on the left + Kokkos::Experimental::sort_team(teamMember, index_view, [=](size_t& x, size_t& y) -> bool { + if (drop_view(x) || drop_view(y)) { + return drop_view(x) < drop_view(y); + } else { + auto x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x)); + auto y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y)); + auto x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row)); + auto y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row)); + return (x_aij / x_aiiajj) > (y_aij / y_aiiajj); + } + }); + + // find index where dropping starts + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(teamMember, 1, nnz), [=](size_t i, size_t& min) { + auto const& x = index_view(i - 1); + auto const& y = index_view(i); + typename implATS::magnitudeType x_val = 0; + typename implATS::magnitudeType y_val = 0; + if (!drop_view(x)) { + typename implATS::magnitudeType x_aij = implATS::magnitude(rowView.value(x) * rowView.value(x)); + typename implATS::magnitudeType x_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(x)) * ghostedDiagValsView(row)); + x_val = x_aij / x_aiiajj; + } + if (!drop_view(y)) { + typename implATS::magnitudeType y_aij = implATS::magnitude(rowView.value(y) * rowView.value(y)); + typename implATS::magnitudeType y_aiiajj = implATS::magnitude(ghostedDiagValsView(rowView.colidx(y)) * ghostedDiagValsView(row)); + y_val = y_aij / y_aiiajj; + } + + if (realThresholdKokkos * realThresholdKokkos * x_val > y_val) { + if (i < min) { + min = i; + } + } + }, + Kokkos::Min(dropStart)); } - drop_vec[i].drop = drop; - } - // printf("\n"); - } - std::sort(drop_vec.begin(), drop_vec.end(), [](DropTol const& a, DropTol const& b) { - return a.col < b.col; - }); - - for (LO idxID = 0; idxID < (LO)drop_vec.size(); idxID++) { - LO col = indices[drop_vec[idxID].col]; - // don't drop diagonal - if (row == col) { - columns[realnnz++] = col; - rownnz++; - continue; - } - if (!drop_vec[idxID].drop) { - columns[realnnz++] = col; - rownnz++; - } else { - numDropped++; - } - } - // CMS - rows[row + 1] = realnnz; - } - } // end for row + // drop everything to the right of where values stop passing threshold + if (dropStart < nnz) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember, dropStart, nnz), [=](size_t i) { + drop_view(index_view(i)) = true; + }); + } + + LO rownnz = 0; + GO rowDropped = 0; + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(teamMember, nnz), [=](const size_t idxID, LO& keep, GO& drop) { + LO col = rowView.colidx(idxID); + // don't drop diagonal + if (row == col || !drop_view(idxID)) { + columnsDevice(A_device.graph.row_map(row) + idxID) = col; + keep++; + } else { + columnsDevice(A_device.graph.row_map(row) + idxID) = -1; + drop++; + } + }, + rownnz, rowDropped); + + globalnnz += rownnz; + totalDropped += rowDropped; + rownnzView(row) = rownnz; + }, + realnnz, numDropped); + + // update column indices so that kept indices are aligned to the left for subview that happens later on + Kokkos::Experimental::remove(ExecSpace(), columnsDevice, -1); + Kokkos::deep_copy(columns, columnsDevice); + + // update row indices by adding up new # of nnz in each row + auto rowsDevice = Kokkos::create_mirror_view(ExecSpace(), rows); + Kokkos::parallel_scan( + Kokkos::RangePolicy(0, A_device.numRows()), KOKKOS_LAMBDA(const int i, LO& partial_sum, bool is_final) { + partial_sum += rownnzView(i); + if (is_final) rowsDevice(i + 1) = partial_sum; + }); + Kokkos::deep_copy(rows, rowsDevice); + } numTotal = A->getLocalNumEntries(); @@ -1281,7 +1360,7 @@ void CoalesceDropFactory::Build(Level auto const& y = drop_vec[i]; auto a = x.val; auto b = y.val; - if (a > realThreshold * b) { + if (realThreshold * realThreshold * a > b) { drop = true; #ifdef HAVE_MUELU_DEBUG if (distanceLaplacianCutVerbose) { @@ -1304,7 +1383,7 @@ void CoalesceDropFactory::Build(Level auto const& y = drop_vec[i]; auto a = x.val / x.diag; auto b = y.val / y.diag; - if (a > realThreshold * b) { + if (realThreshold * realThreshold * a > b) { drop = true; #ifdef HAVE_MUELU_DEBUG if (distanceLaplacianCutVerbose) { diff --git a/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_def.hpp b/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_def.hpp index b432ffb1d868..40f4635e0b3d 100644 --- a/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/PairwiseAggregation/MueLu_NotayAggregationFactory_def.hpp @@ -55,7 +55,6 @@ RCP NotayAggregationFactorysetEntry(name, MasterList::getEntry(name)) SET_VALID_ENTRY("aggregation: pairwise: size"); SET_VALID_ENTRY("aggregation: pairwise: tie threshold"); - SET_VALID_ENTRY("aggregation: compute aggregate qualities"); SET_VALID_ENTRY("aggregation: Dirichlet threshold"); SET_VALID_ENTRY("aggregation: ordering"); #undef SET_VALID_ENTRY @@ -64,21 +63,15 @@ RCP NotayAggregationFactoryset>("A", null, "Generating factory of the matrix"); validParamList->set>("Graph", null, "Generating factory of the graph"); validParamList->set>("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); - validParamList->set>("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'"); return validParamList; } template void NotayAggregationFactory::DeclareInput(Level& currentLevel) const { - const ParameterList& pL = GetParameterList(); - Input(currentLevel, "A"); Input(currentLevel, "Graph"); Input(currentLevel, "DofsPerNode"); - if (pL.get("aggregation: compute aggregate qualities")) { - Input(currentLevel, "AggregateQualities"); - } } template diff --git a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_def.hpp b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_def.hpp index fdbb1106294c..386451d1cfc3 100644 --- a/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_def.hpp +++ b/packages/muelu/src/Graph/UncoupledAggregation/MueLu_UncoupledAggregationFactory_def.hpp @@ -75,14 +75,12 @@ RCP UncoupledAggregationFactoryset>("Graph", null, "Generating factory of the graph"); validParamList->set>("DofsPerNode", null, "Generating factory for variable \'DofsPerNode\', usually the same as for \'Graph\'"); - validParamList->set>("AggregateQualities", null, "Generating factory for variable \'AggregateQualities\'"); // special variables necessary for OnePtAggregationAlgorithm validParamList->set("OnePt aggregate map name", "", "Name of input map for single node aggregates. (default='')"); @@ -131,10 +129,6 @@ void UncoupledAggregationFactory::DeclareInpu Input(currentLevel, "nodeOnInterface"); } } - - if (pL.get("aggregation: compute aggregate qualities")) { - Input(currentLevel, "AggregateQualities"); - } } template @@ -375,10 +369,6 @@ void UncoupledAggregationFactory::Build(Level aggregates->ComputeAggregateSizes(true /*forceRecompute*/); Set(currentLevel, "Aggregates", aggregates); - - if (pL.get("aggregation: compute aggregate qualities")) { - RCP> aggQualities = Get>>(currentLevel, "AggregateQualities"); - } } template diff --git a/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_def.hpp b/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_def.hpp index e46d286abb90..207791bf5b5b 100644 --- a/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_def.hpp +++ b/packages/muelu/src/Interface/MueLu_ParameterListInterpreter_def.hpp @@ -1098,7 +1098,6 @@ void ParameterListInterpreter:: MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: preserve Dirichlet points", bool, aggParams); MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: error on nodes with no on-rank neighbors", bool, aggParams); MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: phase3 avoid singletons", bool, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: compute aggregate qualities", bool, aggParams); aggFactory->SetParameterList(aggParams); // make sure that the aggregation factory has all necessary data aggFactory->SetFactory("DofsPerNode", manager.GetFactory("Graph")); @@ -1180,7 +1179,6 @@ void ParameterListInterpreter:: MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: pairwise: tie threshold", double, aggParams); MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: Dirichlet threshold", double, aggParams); MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: ordering", std::string, aggParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregation: compute aggregate qualities", bool, aggParams); aggFactory->SetParameterList(aggParams); aggFactory->SetFactory("DofsPerNode", manager.GetFactory("Graph")); aggFactory->SetFactory("Graph", manager.GetFactory("Graph")); @@ -1200,25 +1198,6 @@ void ParameterListInterpreter:: coarseMap->SetFactory("Aggregates", manager.GetFactory("Aggregates")); manager.SetFactory("CoarseMap", coarseMap); - // Aggregate qualities - if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: compute aggregate qualities", bool, true)) { - RCP aggQualityFact = rcp(new AggregateQualityEstimateFactory()); - ParameterList aggQualityParams; - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: good aggregate threshold", double, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: file output", bool, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: file base", std::string, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: check symmetry", bool, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: algorithm", std::string, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: zero threshold", double, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: percentiles", Teuchos::Array, aggQualityParams); - MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: mode", std::string, aggQualityParams); - aggQualityFact->SetParameterList(aggQualityParams); - manager.SetFactory("AggregateQualities", aggQualityFact); - - assert(aggType == "uncoupled"); - aggFactory->SetFactory("AggregateQualities", aggQualityFact); - } - // Tentative P MUELU_KOKKOS_FACTORY(Ptent, TentativePFactory, TentativePFactory_kokkos); ParameterList ptentParams; @@ -1319,6 +1298,28 @@ void ParameterListInterpreter:: RAPs->SetFactory("R", manager.GetFactory("R")); } + // Aggregate qualities + if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: compute aggregate qualities", bool, true)) { + RCP aggQualityFact = rcp(new AggregateQualityEstimateFactory()); + ParameterList aggQualityParams; + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: good aggregate threshold", double, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: file output", bool, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: file base", std::string, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: check symmetry", bool, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: algorithm", std::string, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: zero threshold", double, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: percentiles", Teuchos::Array, aggQualityParams); + MUELU_TEST_AND_SET_PARAM_2LIST(paramList, defaultList, "aggregate qualities: mode", std::string, aggQualityParams); + aggQualityFact->SetParameterList(aggQualityParams); + aggQualityFact->SetFactory("Aggregates", manager.GetFactory("Aggregates")); + aggQualityFact->SetFactory("CoarseMap", manager.GetFactory("CoarseMap")); + + if (!RAP.is_null()) + RAP->AddTransferFactory(aggQualityFact); + else + RAPs->AddTransferFactory(aggQualityFact); + } + if (MUELU_TEST_PARAM_2LIST(paramList, defaultList, "aggregation: export visualization data", bool, true)) { RCP aggExport = rcp(new AggregationExportFactory()); ParameterList aggExportParams; diff --git a/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_decl.hpp b/packages/muelu/src/Utils/MueLu_AggregateQualityEstimateFactory_decl.hpp similarity index 88% rename from packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_decl.hpp rename to packages/muelu/src/Utils/MueLu_AggregateQualityEstimateFactory_decl.hpp index be87ec960139..473ad53ce0bf 100644 --- a/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_decl.hpp +++ b/packages/muelu/src/Utils/MueLu_AggregateQualityEstimateFactory_decl.hpp @@ -11,7 +11,7 @@ #define MUELU_AGGREGATEQUALITYESTIMATEFACTORY_DECL_HPP #include "MueLu_ConfigDefs.hpp" -#include "MueLu_SingleLevelFactoryBase.hpp" +#include "MueLu_TwoLevelFactoryBase.hpp" #include "MueLu_AggregateQualityEstimateFactory_fwd.hpp" #include @@ -41,8 +41,11 @@ namespace MueLu { computing, 34(2), A1079-A1109. */ -template -class AggregateQualityEstimateFactory : public SingleLevelFactoryBase { +template +class AggregateQualityEstimateFactory : public TwoLevelFactoryBase { #undef MUELU_AGGREGATEQUALITYESTIMATEFACTORY_SHORT #include "MueLu_UseShortNames.hpp" @@ -70,7 +73,7 @@ class AggregateQualityEstimateFactory : public SingleLevelFactoryBase { If the Build method of this class requires some data, but the generating factory is not specified in DeclareInput, then this class will fall back to the settings in FactoryManager. */ - void DeclareInput(Level& currentLevel) const; + void DeclareInput(Level& fineLevel, Level& coarseLevel) const; //@} @@ -78,7 +81,7 @@ class AggregateQualityEstimateFactory : public SingleLevelFactoryBase { //@{ //! Build aggregate quality esimates with this factory. - void Build(Level& currentLevel) const; + void Build(Level& fineLevel, Level& coarseLevel) const; //@} diff --git a/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_def.hpp b/packages/muelu/src/Utils/MueLu_AggregateQualityEstimateFactory_def.hpp similarity index 96% rename from packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_def.hpp rename to packages/muelu/src/Utils/MueLu_AggregateQualityEstimateFactory_def.hpp index c2c288192214..e7a2943d9969 100644 --- a/packages/muelu/src/Misc/MueLu_AggregateQualityEstimateFactory_def.hpp +++ b/packages/muelu/src/Utils/MueLu_AggregateQualityEstimateFactory_def.hpp @@ -34,10 +34,10 @@ template AggregateQualityEstimateFactory::~AggregateQualityEstimateFactory() {} template -void AggregateQualityEstimateFactory::DeclareInput(Level& currentLevel) const { - Input(currentLevel, "A"); - Input(currentLevel, "Aggregates"); - Input(currentLevel, "CoarseMap"); +void AggregateQualityEstimateFactory::DeclareInput(Level& fineLevel, Level& coarseLevel) const { + Input(fineLevel, "A"); + Input(fineLevel, "Aggregates"); + Input(fineLevel, "CoarseMap"); } template @@ -64,13 +64,13 @@ RCP AggregateQualityEstimateFactory -void AggregateQualityEstimateFactory::Build(Level& currentLevel) const { - FactoryMonitor m(*this, "Build", currentLevel); +void AggregateQualityEstimateFactory::Build(Level& fineLevel, Level& coarseLevel) const { + FactoryMonitor m(*this, "Build", fineLevel); - RCP A = Get>(currentLevel, "A"); - RCP aggregates = Get>(currentLevel, "Aggregates"); + RCP A = Get>(fineLevel, "A"); + RCP aggregates = Get>(fineLevel, "Aggregates"); - RCP map = Get>(currentLevel, "CoarseMap"); + RCP map = Get>(fineLevel, "CoarseMap"); assert(!aggregates->AggregatesCrossProcessors()); ParameterList pL = GetParameterList(); @@ -81,15 +81,15 @@ void AggregateQualityEstimateFactory: if (mode == "eigenvalue" || mode == "both") { aggregate_qualities = Xpetra::MultiVectorFactory::Build(map, 1); ComputeAggregateQualities(A, aggregates, aggregate_qualities); - OutputAggQualities(currentLevel, aggregate_qualities); + OutputAggQualities(fineLevel, aggregate_qualities); } if (mode == "size" || mode == "both") { RCP aggregate_sizes = Xpetra::VectorFactory::Build(map); ComputeAggregateSizes(A, aggregates, aggregate_sizes); - Set(currentLevel, "AggregateSizes", aggregate_sizes); - OutputAggSizes(currentLevel, aggregate_sizes); + Set(fineLevel, "AggregateSizes", aggregate_sizes); + OutputAggSizes(fineLevel, aggregate_sizes); } - Set(currentLevel, "AggregateQualities", aggregate_qualities); + Set(coarseLevel, "AggregateQualities", aggregate_qualities); } template diff --git a/packages/muelu/test/interface/default/EasyParameterListInterpreter/aggregatequalities.xml b/packages/muelu/test/interface/default/EasyParameterListInterpreter/aggregatequalities.xml new file mode 100644 index 000000000000..f732f2a3c9b5 --- /dev/null +++ b/packages/muelu/test/interface/default/EasyParameterListInterpreter/aggregatequalities.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/packages/muelu/test/interface/default/FactoryParameterListInterpreter/aggregatequalities.xml b/packages/muelu/test/interface/default/FactoryParameterListInterpreter/aggregatequalities.xml index b36abd859cdd..56565e5f4de7 100644 --- a/packages/muelu/test/interface/default/FactoryParameterListInterpreter/aggregatequalities.xml +++ b/packages/muelu/test/interface/default/FactoryParameterListInterpreter/aggregatequalities.xml @@ -47,6 +47,7 @@ + @@ -58,15 +59,14 @@ - - - + + diff --git a/packages/muelu/test/interface/default/Output/aggregatequalities_epetra.gold b/packages/muelu/test/interface/default/Output/aggregatequalities_epetra.gold index 3714e69e8895..5d4a2e452dab 100644 --- a/packages/muelu/test/interface/default/Output/aggregatequalities_epetra.gold +++ b/packages/muelu/test/interface/default/Output/aggregatequalities_epetra.gold @@ -26,13 +26,9 @@ BuildAggregatesNonKokkos (Phase 1 (main)) BuildAggregatesNonKokkos (Phase 2a (secondary)) BuildAggregatesNonKokkos (Phase 2b (expansion)) BuildAggregatesNonKokkos (Phase 3 (cleanup)) -Build (MueLu::AggregateQualityEstimateFactory) -Build (MueLu::CoarseMapFactory) Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace -aggregate qualities: good aggregate threshold = 100 [unused] -aggregate qualities: check symmetry = 0 [unused] -aggregation: compute aggregate qualities = 1 +Build (MueLu::CoarseMapFactory) matrixmatrix: kernel params -> [empty list] matrixmatrix: kernel params -> @@ -41,6 +37,10 @@ Transpose P (MueLu::TransPFactory) matrixmatrix: kernel params -> [empty list] Computing Ac (MueLu::RAPFactory) +RAPFactory: call transfer factory: MueLu::AggregateQualityEstimateFactory +Build (MueLu::AggregateQualityEstimateFactory) +aggregate qualities: good aggregate threshold = 100 [unused] +aggregate qualities: check symmetry = 0 [unused] RAPFactory: call transfer factory: MueLu::CoordinatesTransferFactory Build (MueLu::CoordinatesTransferFactory) Transferring coordinates @@ -71,13 +71,9 @@ BuildAggregatesNonKokkos (Phase 1 (main)) BuildAggregatesNonKokkos (Phase 2a (secondary)) BuildAggregatesNonKokkos (Phase 2b (expansion)) BuildAggregatesNonKokkos (Phase 3 (cleanup)) -Build (MueLu::AggregateQualityEstimateFactory) -Build (MueLu::CoarseMapFactory) Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace -aggregate qualities: good aggregate threshold = 100 [unused] -aggregate qualities: check symmetry = 0 [unused] -aggregation: compute aggregate qualities = 1 +Build (MueLu::CoarseMapFactory) matrixmatrix: kernel params -> [empty list] matrixmatrix: kernel params -> @@ -86,6 +82,10 @@ Transpose P (MueLu::TransPFactory) matrixmatrix: kernel params -> [empty list] Computing Ac (MueLu::RAPFactory) +RAPFactory: call transfer factory: MueLu::AggregateQualityEstimateFactory +Build (MueLu::AggregateQualityEstimateFactory) +aggregate qualities: good aggregate threshold = 100 [unused] +aggregate qualities: check symmetry = 0 [unused] RAPFactory: call transfer factory: MueLu::CoordinatesTransferFactory Build (MueLu::CoordinatesTransferFactory) Transferring coordinates diff --git a/packages/muelu/test/interface/default/Output/aggregatequalities_tpetra.gold b/packages/muelu/test/interface/default/Output/aggregatequalities_tpetra.gold index ef6897802897..4c9b7d57f952 100644 --- a/packages/muelu/test/interface/default/Output/aggregatequalities_tpetra.gold +++ b/packages/muelu/test/interface/default/Output/aggregatequalities_tpetra.gold @@ -27,13 +27,9 @@ BuildAggregatesNonKokkos (Phase 1 (main)) BuildAggregatesNonKokkos (Phase 2a (secondary)) BuildAggregatesNonKokkos (Phase 2b (expansion)) BuildAggregatesNonKokkos (Phase 3 (cleanup)) -Build (MueLu::AggregateQualityEstimateFactory) -Build (MueLu::CoarseMapFactory) Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace -aggregate qualities: good aggregate threshold = 100 [unused] -aggregate qualities: check symmetry = 0 [unused] -aggregation: compute aggregate qualities = 1 +Build (MueLu::CoarseMapFactory) matrixmatrix: kernel params -> [empty list] matrixmatrix: kernel params -> @@ -42,6 +38,10 @@ Transpose P (MueLu::TransPFactory) matrixmatrix: kernel params -> [empty list] Computing Ac (MueLu::RAPFactory) +RAPFactory: call transfer factory: MueLu::AggregateQualityEstimateFactory +Build (MueLu::AggregateQualityEstimateFactory) +aggregate qualities: good aggregate threshold = 100 [unused] +aggregate qualities: check symmetry = 0 [unused] RAPFactory: call transfer factory: MueLu::CoordinatesTransferFactory Build (MueLu::CoordinatesTransferFactory) Transferring coordinates @@ -73,13 +73,9 @@ BuildAggregatesNonKokkos (Phase 1 (main)) BuildAggregatesNonKokkos (Phase 2a (secondary)) BuildAggregatesNonKokkos (Phase 2b (expansion)) BuildAggregatesNonKokkos (Phase 3 (cleanup)) -Build (MueLu::AggregateQualityEstimateFactory) -Build (MueLu::CoarseMapFactory) Nullspace factory (MueLu::NullspaceFactory) Fine level nullspace = Nullspace -aggregate qualities: good aggregate threshold = 100 [unused] -aggregate qualities: check symmetry = 0 [unused] -aggregation: compute aggregate qualities = 1 +Build (MueLu::CoarseMapFactory) matrixmatrix: kernel params -> [empty list] matrixmatrix: kernel params -> @@ -88,6 +84,10 @@ Transpose P (MueLu::TransPFactory) matrixmatrix: kernel params -> [empty list] Computing Ac (MueLu::RAPFactory) +RAPFactory: call transfer factory: MueLu::AggregateQualityEstimateFactory +Build (MueLu::AggregateQualityEstimateFactory) +aggregate qualities: good aggregate threshold = 100 [unused] +aggregate qualities: check symmetry = 0 [unused] RAPFactory: call transfer factory: MueLu::CoordinatesTransferFactory Build (MueLu::CoordinatesTransferFactory) Transferring coordinates diff --git a/packages/muelu/test/unit_tests/AggregateQualityEstimateFactory.cpp b/packages/muelu/test/unit_tests/AggregateQualityEstimateFactory.cpp index 769b47c77c19..dd095e626038 100644 --- a/packages/muelu/test/unit_tests/AggregateQualityEstimateFactory.cpp +++ b/packages/muelu/test/unit_tests/AggregateQualityEstimateFactory.cpp @@ -90,26 +90,40 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(AggregateQualityEstimateFactory, Poisson2D, Sc RCP> comm = Parameters::getDefaultComm(); - Level level; - TestHelpers::TestFactory::createSingleLevelHierarchy(level); + Level fineLevel, coarseLevel; + TestHelpers::TestFactory::createTwoLevelHierarchy(fineLevel, coarseLevel); GO nx = 20 * comm->getSize(); GO ny = nx; RCP Op = TestHelpers::TestFactory::Build2DPoisson(nx, ny); - level.Set("A", Op); + fineLevel.Set("A", Op); - AggregateQualityEstimateFactory aggQualityEstimateFactory; - std::cout << *(aggQualityEstimateFactory.GetValidParameterList()) << std::endl; - aggQualityEstimateFactory.SetParameter("aggregate qualities: check symmetry", Teuchos::ParameterEntry(false)); - aggQualityEstimateFactory.SetParameter("aggregate qualities: good aggregate threshold", Teuchos::ParameterEntry(100.0)); - aggQualityEstimateFactory.SetParameter("aggregate qualities: file output", Teuchos::ParameterEntry(false)); + RCP aggQualityEstimateFactory = rcp(new AggregateQualityEstimateFactory()); + aggQualityEstimateFactory->SetParameter("aggregate qualities: check symmetry", Teuchos::ParameterEntry(false)); + aggQualityEstimateFactory->SetParameter("aggregate qualities: good aggregate threshold", Teuchos::ParameterEntry(100.0)); + aggQualityEstimateFactory->SetParameter("aggregate qualities: file output", Teuchos::ParameterEntry(false)); - level.Request("AggregateQualities", &aggQualityEstimateFactory); - level.Request(aggQualityEstimateFactory); + RCP amalgFact = rcp(new AmalgamationFactory()); + RCP dropFact = rcp(new CoalesceDropFactory()); + dropFact->SetFactory("UnAmalgamationInfo", amalgFact); + RCP aggFact = rcp(new UncoupledAggregationFactory()); + aggFact->SetFactory("Graph", dropFact); + RCP coarsemapFact = Teuchos::rcp(new CoarseMapFactory()); + coarsemapFact->SetFactory("Aggregates", aggFact); + aggQualityEstimateFactory->SetFactory("Aggregates", aggFact); + aggQualityEstimateFactory->SetFactory("CoarseMap", coarsemapFact); + + coarseLevel.Request(*aggQualityEstimateFactory); + fineLevel.Request(*aggFact); + fineLevel.Request(*coarsemapFact); + + aggQualityEstimateFactory->Build(fineLevel, coarseLevel); + + coarseLevel.Request("AggregateQualities", aggQualityEstimateFactory.get()); out << "Getting aggregate qualities...\n\n"; - RCP aggQualities = level.Get>("AggregateQualities", &aggQualityEstimateFactory); + RCP aggQualities = coarseLevel.Get>("AggregateQualities", aggQualityEstimateFactory.get()); out << "Testing aggregate qualities to make sure all aggregates are of good quality...\n\n"; @@ -536,7 +550,6 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(AggregateQualityEstimateFactory, ConvectionDif TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(AggregateQualityEstimateFactory, Constructor, Scalar, LO, GO, Node) \ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(AggregateQualityEstimateFactory, Poisson2D, Scalar, LO, GO, Node) // TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(AggregateQualityEstimateFactory,AnisotropicDiffusion2D,Scalar,LO,GO,Node) - // TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(AggregateQualityEstimateFactory,ConvectionDiffusion2D,Scalar,LO,GO,Node) #include diff --git a/packages/muelu/test/unit_tests/CoalesceDropFactory.cpp b/packages/muelu/test/unit_tests/CoalesceDropFactory.cpp index 99c6062b069b..d6b894a01116 100644 --- a/packages/muelu/test/unit_tests/CoalesceDropFactory.cpp +++ b/packages/muelu/test/unit_tests/CoalesceDropFactory.cpp @@ -1187,7 +1187,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory, DistanceLaplacianScaledCu // L_ij = -36 // L_ii = 72 // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| - coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(8.0)); + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(sqrt(0.125))); coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("distance laplacian"))); coalesceDropFact.SetParameter("aggregation: distance laplacian algo", Teuchos::ParameterEntry(std::string("scaled cut"))); fineLevel.Request("Graph", &coalesceDropFact); @@ -1253,7 +1253,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory, DistanceLaplacianUnscaled // L_ij = -36 // L_ii = 72 // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| - coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(8.0)); + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(sqrt(0.125))); coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("distance laplacian"))); coalesceDropFact.SetParameter("aggregation: distance laplacian algo", Teuchos::ParameterEntry(std::string("unscaled cut"))); fineLevel.Request("Graph", &coalesceDropFact); @@ -1319,7 +1319,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory, DistanceLaplacianCutSym, // L_ij = -36 // L_ii = 72 // criterion for dropping is |L_ij|^2 <= tol^2 * |L_ii*L_jj| - coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(8.0)); + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(sqrt(0.125))); coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("distance laplacian"))); coalesceDropFact.SetParameter("aggregation: distance laplacian algo", Teuchos::ParameterEntry(std::string("scaled cut symmetric"))); fineLevel.Request("Graph", &coalesceDropFact); @@ -1353,6 +1353,8 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory, ClassicalScaledCut, Scala typedef Teuchos::ScalarTraits STS; typedef typename STS::magnitudeType real_type; typedef Xpetra::MultiVector RealValuedMultiVector; + typedef Tpetra::Map map_type; + typedef Tpetra::CrsMatrix crs_matrix_type; MUELU_TESTING_SET_OSTREAM; MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); @@ -1363,11 +1365,36 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory, ClassicalScaledCut, Scala Level fineLevel; TestHelpers::TestFactory::createSingleLevelHierarchy(fineLevel); - RCP A = TestHelpers::TestFactory::Build1DPoisson(36); + const global_size_t globalIndices = 12; + const GO indexBase = 0; + RCP map = rcp(new map_type(globalIndices, indexBase, comm)); + RCP A_t(new crs_matrix_type(map, 5)); + const SC two = static_cast(2.0); + const SC one = static_cast(1.0); + const SC negOne = static_cast(-1.0); + for (LO lclRow = 0; lclRow < static_cast(map->getLocalNumElements()); lclRow++) { + const GO gblRow = map->getGlobalElement(lclRow); + if (gblRow == 0) { + A_t->insertGlobalValues(gblRow, Teuchos::tuple(gblRow, gblRow + 1), Teuchos::tuple(two, negOne)); + } else if (static_cast(gblRow) == globalIndices - 1) { + A_t->insertGlobalValues(gblRow, Teuchos::tuple(gblRow - 1, gblRow), Teuchos::tuple(negOne, two)); + } else if (gblRow == 2 || gblRow == 9) { + A_t->insertGlobalValues(gblRow, Teuchos::tuple(gblRow), Teuchos::tuple(one)); + } else if (gblRow == 5) { + A_t->insertGlobalValues(gblRow, Teuchos::tuple(gblRow - 2, gblRow - 1, gblRow, gblRow + 1, gblRow + 2), Teuchos::tuple(negOne, negOne, two, negOne, negOne)); + } else if (gblRow == 6) { + A_t->insertGlobalValues(gblRow, Teuchos::tuple(gblRow - 2, gblRow - 1, gblRow, gblRow + 1, gblRow + 2), Teuchos::tuple(negOne, two, two, two, negOne)); + } else { + A_t->insertGlobalValues(gblRow, Teuchos::tuple(gblRow - 1, gblRow, gblRow + 1), Teuchos::tuple(negOne, two, negOne)); + } + } + A_t->fillComplete(); + RCP A_x = rcp(new TpetraCrsMatrix(A_t)); + RCP A = rcp(new CrsMatrixWrap(A_x)); fineLevel.Set("A", A); Teuchos::ParameterList galeriList; - galeriList.set("nx", Teuchos::as(36)); + galeriList.set("nx", Teuchos::as(globalIndices)); RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); fineLevel.Set("Coordinates", coordinates); @@ -1393,25 +1420,59 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory, ClassicalScaledCut, Scala const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! const RCP myDomainMap = graph->GetDomainMap(); - TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), globalIndices - 1); TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); - TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(globalIndices + (comm->getSize() - 1) * 2)); - TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), globalIndices - 1); TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); - TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); - - TEST_EQUALITY(graph->GetGlobalNumEdges(), 72); - -} // SignaledClassical + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), globalIndices); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 28); + + int rows[13] = {0, 2, 4, 5, 7, 10, 15, 18, 21, 23, 24, 26, 28}; + int columns[28] = {0, 1, + 0, 1, + 2, + 3, 4, + 3, 4, 5, + 3, 4, 5, 6, 7, + 5, 6, 7, + 6, 7, 8, + 7, 8, + 9, + 10, 11, + 10, 11}; + auto rowPtrs = graph->getRowPtrs(); + auto entries = graph->getEntries(); + size_t rowID = 0; + TEST_EQUALITY(rowPtrs(0), rowID); + for (size_t i = 0; i < rowPtrs.size() - 1; i++) { + auto gblID = myDomainMap->getGlobalElement(i); + int rownnz = rows[gblID + 1] - rows[gblID]; + rowID += rownnz; + TEST_EQUALITY(rowPtrs(i + 1), rowID); + + std::vector colID; + for (int j = 0; j < rownnz; j++) { + colID.push_back(myImportMap->getGlobalElement(entries(rowPtrs(i) + j))); + } + std::sort(std::begin(colID), std::end(colID)); + for (int j = 0; j < rownnz; j++) { + TEST_EQUALITY(colID[j], columns[rows[gblID] + j]); + } + } +} // ClassicalScaledCut TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory, ClassicalUnScaledCut, Scalar, LocalOrdinal, GlobalOrdinal, Node) { #include typedef Teuchos::ScalarTraits STS; typedef typename STS::magnitudeType real_type; typedef Xpetra::MultiVector RealValuedMultiVector; + typedef Tpetra::Map map_type; + typedef Tpetra::CrsMatrix crs_matrix_type; MUELU_TESTING_SET_OSTREAM; MUELU_TESTING_LIMIT_SCOPE(Scalar, GlobalOrdinal, Node); @@ -1422,11 +1483,36 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory, ClassicalUnScaledCut, Sca Level fineLevel; TestHelpers::TestFactory::createSingleLevelHierarchy(fineLevel); - RCP A = TestHelpers::TestFactory::Build1DPoisson(36); + const global_size_t globalIndices = 12; + const GO indexBase = 0; + RCP map = rcp(new map_type(globalIndices, indexBase, comm)); + RCP A_t(new crs_matrix_type(map, 5)); + const SC two = static_cast(2.0); + const SC one = static_cast(1.0); + const SC negOne = static_cast(-1.0); + for (LO lclRow = 0; lclRow < static_cast(map->getLocalNumElements()); lclRow++) { + const GO gblRow = map->getGlobalElement(lclRow); + if (gblRow == 0) { + A_t->insertGlobalValues(gblRow, Teuchos::tuple(gblRow, gblRow + 1), Teuchos::tuple(two, negOne)); + } else if (static_cast(gblRow) == globalIndices - 1) { + A_t->insertGlobalValues(gblRow, Teuchos::tuple(gblRow - 1, gblRow), Teuchos::tuple(negOne, two)); + } else if (gblRow == 2 || gblRow == 9) { + A_t->insertGlobalValues(gblRow, Teuchos::tuple(gblRow), Teuchos::tuple(one)); + } else if (gblRow == 5) { + A_t->insertGlobalValues(gblRow, Teuchos::tuple(gblRow - 2, gblRow - 1, gblRow, gblRow + 1, gblRow + 2), Teuchos::tuple(negOne, negOne, two, negOne, negOne)); + } else if (gblRow == 6) { + A_t->insertGlobalValues(gblRow, Teuchos::tuple(gblRow - 2, gblRow - 1, gblRow, gblRow + 1, gblRow + 2), Teuchos::tuple(negOne, two, two, two, negOne)); + } else { + A_t->insertGlobalValues(gblRow, Teuchos::tuple(gblRow - 1, gblRow, gblRow + 1), Teuchos::tuple(negOne, two, negOne)); + } + } + A_t->fillComplete(); + RCP A_x = rcp(new TpetraCrsMatrix(A_t)); + RCP A = rcp(new CrsMatrixWrap(A_x)); fineLevel.Set("A", A); Teuchos::ParameterList galeriList; - galeriList.set("nx", Teuchos::as(36)); + galeriList.set("nx", Teuchos::as(globalIndices)); RCP coordinates = Galeri::Xpetra::Utils::CreateCartesianCoordinates("1D", A->getRowMap(), galeriList); fineLevel.Set("Coordinates", coordinates); @@ -1452,19 +1538,51 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory, ClassicalUnScaledCut, Sca const RCP myImportMap = graph->GetImportMap(); // < note that the ImportMap is built from the column map of the matrix A WITHOUT dropping! const RCP myDomainMap = graph->GetDomainMap(); - TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myImportMap->getMaxAllGlobalIndex(), globalIndices - 1); TEST_EQUALITY(myImportMap->getMinAllGlobalIndex(), 0); TEST_EQUALITY(myImportMap->getMinLocalIndex(), 0); - TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(36 + (comm->getSize() - 1) * 2)); + TEST_EQUALITY(myImportMap->getGlobalNumElements(), Teuchos::as(globalIndices + (comm->getSize() - 1) * 2)); - TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), 35); + TEST_EQUALITY(myDomainMap->getMaxAllGlobalIndex(), globalIndices - 1); TEST_EQUALITY(myDomainMap->getMinAllGlobalIndex(), 0); TEST_EQUALITY(myDomainMap->getMinLocalIndex(), 0); - TEST_EQUALITY(myDomainMap->getGlobalNumElements(), 36); - - TEST_EQUALITY(graph->GetGlobalNumEdges(), 72); - -} // SignaledClassical + TEST_EQUALITY(myDomainMap->getGlobalNumElements(), globalIndices); + + TEST_EQUALITY(graph->GetGlobalNumEdges(), 28); + + int rows[13] = {0, 2, 4, 5, 7, 10, 15, 18, 21, 23, 24, 26, 28}; + int columns[28] = {0, 1, + 0, 1, + 2, + 3, 4, + 3, 4, 5, + 3, 4, 5, 6, 7, + 5, 6, 7, + 6, 7, 8, + 7, 8, + 9, + 10, 11, + 10, 11}; + auto rowPtrs = graph->getRowPtrs(); + auto entries = graph->getEntries(); + size_t rowID = 0; + TEST_EQUALITY(rowPtrs(0), rowID); + for (size_t i = 0; i < rowPtrs.size() - 1; i++) { + auto gblID = myDomainMap->getGlobalElement(i); + int rownnz = rows[gblID + 1] - rows[gblID]; + rowID += rownnz; + TEST_EQUALITY(rowPtrs(i + 1), rowID); + + std::vector colID; + for (int j = 0; j < rownnz; j++) { + colID.push_back(myImportMap->getGlobalElement(entries(rowPtrs(i) + j))); + } + std::sort(std::begin(colID), std::end(colID)); + for (int j = 0; j < rownnz; j++) { + TEST_EQUALITY(colID[j], columns[rows[gblID] + j]); + } + } +} // ClassicalUnScaledCut TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory, SignaledClassical, Scalar, LocalOrdinal, GlobalOrdinal, Node) { #include @@ -1866,7 +1984,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory, BlockDiagonal, Scalar, Lo coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); coalesceDropFact.SetFactory("UnAmalgamationInfo", amalgFact); coalesceDropFact.SetFactory("BlockNumber", ibFact); - coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(8.0)); + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(sqrt(0.125))); coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("block diagonal"))); coalesceDropFact.SetParameter("aggregation: block diagonal: interleaved blocksize", Teuchos::ParameterEntry(3)); coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); @@ -1913,7 +2031,7 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(CoalesceDropFactory, BlockDiagonalClassical, S coalesceDropFact.SetDefaultVerbLevel(MueLu::Extreme); coalesceDropFact.SetFactory("UnAmalgamationInfo", amalgFact); coalesceDropFact.SetFactory("BlockNumber", ibFact); - coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(8.0)); + coalesceDropFact.SetParameter("aggregation: drop tol", Teuchos::ParameterEntry(sqrt(0.125))); coalesceDropFact.SetParameter("aggregation: drop scheme", Teuchos::ParameterEntry(std::string("block diagonal classical"))); coalesceDropFact.SetParameter("aggregation: block diagonal: interleaved blocksize", Teuchos::ParameterEntry(3)); fineLevel.Request("Graph", &coalesceDropFact); diff --git a/packages/panzer/adapters-stk/test/evaluator_tests/CMakeLists.txt b/packages/panzer/adapters-stk/test/evaluator_tests/CMakeLists.txt index 3b2202563a32..d871d0375cb0 100644 --- a/packages/panzer/adapters-stk/test/evaluator_tests/CMakeLists.txt +++ b/packages/panzer/adapters-stk/test/evaluator_tests/CMakeLists.txt @@ -43,6 +43,13 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( NUM_MPI_PROCS 2 ) +TRIBITS_ADD_EXECUTABLE_AND_TEST( + tGatherSolution_BlockedTpetra + SOURCES tpetra_blocked_gather_solution.cpp ${UNIT_TEST_DRIVER} + COMM serial mpi + NUM_MPI_PROCS 2 + ) + TRIBITS_ADD_EXECUTABLE_AND_TEST( tScatterResidual_Tpetra SOURCES tpetra_scatter_residual.cpp ${UNIT_TEST_DRIVER} diff --git a/packages/panzer/adapters-stk/test/evaluator_tests/tpetra_blocked_gather_solution.cpp b/packages/panzer/adapters-stk/test/evaluator_tests/tpetra_blocked_gather_solution.cpp new file mode 100644 index 000000000000..279956f8d6eb --- /dev/null +++ b/packages/panzer/adapters-stk/test/evaluator_tests/tpetra_blocked_gather_solution.cpp @@ -0,0 +1,721 @@ +// @HEADER +// ***************************************************************************** +// Panzer: A partial differential equation assembly +// engine for strongly coupled complex multiphysics systems +// +// Copyright 2011 NTESS and the Panzer contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/////////////////////////////////////////////////////////////////////////////// +// +// Include Files +// +/////////////////////////////////////////////////////////////////////////////// + +// C++ +#include +#include +#include + +// Kokkos +#include "Kokkos_View_Fad.hpp" + +// Panzer +#include "PanzerAdaptersSTK_config.hpp" +#include "Panzer_BasisIRLayout.hpp" +#include "Panzer_BlockedTpetraLinearObjFactory.hpp" +#include "Panzer_BlockedDOFManager.hpp" +#include "Panzer_DOFManager.hpp" +#include "Panzer_Evaluator_WithBaseImpl.hpp" +#include "Panzer_FieldManagerBuilder.hpp" +#include "Panzer_GatherOrientation.hpp" +#include "Panzer_PureBasis.hpp" +#include "Panzer_STKConnManager.hpp" +#include "Panzer_STK_Interface.hpp" +#include "Panzer_STK_SetupUtilities.hpp" +#include "Panzer_STK_SquareQuadMeshFactory.hpp" +#include "Panzer_STK_Version.hpp" +#include "Panzer_Workset.hpp" +#include "Panzer_LOCPair_GlobalEvaluationData.hpp" +#include "Panzer_GlobalEvaluationDataContainer.hpp" + +// Teuchos +#include "Teuchos_DefaultMpiComm.hpp" +#include "Teuchos_GlobalMPISession.hpp" +#include "Teuchos_OpaqueWrapper.hpp" +#include "Teuchos_RCP.hpp" +#include "Teuchos_TimeMonitor.hpp" +#include "Teuchos_UnitTestHarness.hpp" + +// Thyra +#include "Thyra_ProductVectorBase.hpp" +#include "Thyra_VectorStdOps.hpp" + +// Tpetra +#include "Tpetra_Vector.hpp" + +// user_app +#include "user_app_EquationSetFactory.hpp" + +typedef double ScalarT; +using LocalOrdinalT = panzer::LocalOrdinal; +using GlobalOrdinalT = panzer::GlobalOrdinal; + +typedef Tpetra::Vector VectorType; +typedef Tpetra::Operator OperatorType; +typedef Tpetra::CrsMatrix CrsMatrixType; +typedef Tpetra::CrsGraph CrsGraphType; +typedef Tpetra::Map MapType; +typedef Tpetra::Import ImportType; +typedef Tpetra::Export ExportType; + +typedef Thyra::TpetraLinearOp ThyraLinearOp; + +typedef panzer::BlockedTpetraLinearObjFactory BlockedTpetraLinObjFactoryType; +typedef panzer::TpetraLinearObjFactory TpetraLinObjFactoryType; +typedef panzer::BlockedTpetraLinearObjContainer BlockedTpetraLinObjContainerType; +typedef panzer::TpetraLinearObjContainer TpetraLinObjContainerType; + +namespace panzer +{ + + Teuchos::RCP buildBasis(std::size_t worksetSize, const std::string &basisName); + void testInitialization(const Teuchos::RCP &ipb); + Teuchos::RCP buildMesh(int elemX, int elemY); + void testGatherScatter(const bool enable_tangents, Teuchos::FancyOStream &out, bool &success); + + // Test without tangent fields in gather evaluator + TEUCHOS_UNIT_TEST(tpetra_assembly, gather_solution_no_tangents) + { + testGatherScatter(false, out, success); + } + + // Test with tangent fields in gather evaluator + TEUCHOS_UNIT_TEST(tpetra_assembly, gather_solution_tangents) + { + testGatherScatter(true, out, success); + } + + // enable_tangents determines whether tangent fields dx/dp are added to gather evaluator. + // These are used when computing df/dx*dx/dp with the tangent evaluation type + void testGatherScatter(const bool enable_tangents, Teuchos::FancyOStream &out, bool &success) + { +#ifdef HAVE_MPI + Teuchos::RCP> tComm = Teuchos::rcp(new Teuchos::MpiComm(MPI_COMM_WORLD)); +#else + Teuchos::RCP> tComm = Teuchos::rcp(new Teuchos::SerialComm(MPI_COMM_WORLD)); +#endif + + int myRank = tComm->getRank(); + int numProcs = tComm->getSize(); + + const std::size_t workset_size = 4 / numProcs; + const std::string fieldName1_q1 = "U"; + const std::string fieldName2_q1 = "V"; + const std::string fieldName_qedge1 = "B"; + const int num_tangent = enable_tangents ? 5 : 0; + + Teuchos::RCP mesh = buildMesh(2, 2); + + // build input physics block + Teuchos::RCP basis_q1 = buildBasis(workset_size, "Q1"); + Teuchos::RCP basis_qedge1 = buildBasis(workset_size, "QEdge1"); + + Teuchos::RCP ipb = Teuchos::parameterList(); + testInitialization(ipb); + + const int default_int_order = 1; + std::string eBlockID = "eblock-0_0"; + Teuchos::RCP eqset_factory = Teuchos::rcp(new user_app::MyFactory); + panzer::CellData cellData(workset_size, mesh->getCellTopology("eblock-0_0")); + Teuchos::RCP gd = panzer::createGlobalData(); + Teuchos::RCP physicsBlock = + Teuchos::rcp(new PhysicsBlock(ipb, eBlockID, default_int_order, cellData, eqset_factory, gd, false)); + + Teuchos::RCP> work_sets = panzer_stk::buildWorksets(*mesh, physicsBlock->elementBlockID(), + physicsBlock->getWorksetNeeds()); + TEST_EQUALITY(work_sets->size(), 1); + + // build connection manager and field manager + const Teuchos::RCP conn_manager = Teuchos::rcp(new panzer_stk::STKConnManager(mesh)); + Teuchos::RCP blocked_dofManager = Teuchos::rcp(new panzer::BlockedDOFManager(conn_manager, MPI_COMM_WORLD)); + + blocked_dofManager->addField(fieldName1_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + blocked_dofManager->addField(fieldName2_q1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + blocked_dofManager->addField(fieldName_qedge1, Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_qedge1->getIntrepid2Basis()))); + + std::vector > fieldOrder(3); + fieldOrder[0].push_back(fieldName1_q1); + fieldOrder[1].push_back(fieldName_qedge1); + fieldOrder[2].push_back(fieldName2_q1); + blocked_dofManager->setFieldOrder(fieldOrder); + + blocked_dofManager->buildGlobalUnknowns(); + + // setup linear object factory + ///////////////////////////////////////////////////////////// + + Teuchos::RCP t_lof = Teuchos::rcp(new BlockedTpetraLinObjFactoryType(tComm.getConst(), blocked_dofManager)); + Teuchos::RCP> lof = t_lof; + Teuchos::RCP loc = t_lof->buildGhostedLinearObjContainer(); + t_lof->initializeGhostedContainer(LinearObjContainer::X, *loc); + loc->initialize(); + + Teuchos::RCP t_loc = Teuchos::rcp_dynamic_cast(loc); + Teuchos::RCP> x_vec = t_loc->get_x_th(); + Thyra::assign(x_vec.ptr(), 123.0 + myRank); + + // need a place to evaluate the tangent fields, so we create a + // unblocked DOFManager and LOF and set up if needed + std::vector> tangentContainers; + Teuchos::RCP dofManager = Teuchos::rcp(new panzer::DOFManager(conn_manager, MPI_COMM_WORLD)); + Teuchos::RCP tangent_lof = Teuchos::rcp(new TpetraLinObjFactoryType(tComm.getConst(), dofManager)); + Teuchos::RCP> parent_tangent_lof = tangent_lof; + + if (enable_tangents) + { + using Teuchos::RCP; + using Teuchos::rcp; + using Teuchos::rcp_dynamic_cast; + using Thyra::ProductVectorBase; + using LOCPair = panzer::LOCPair_GlobalEvaluationData; + + std::vector tangent_fieldOrder; + for (int i(0); i < num_tangent; ++i) { + std::stringstream ssedge; + ssedge << fieldName_qedge1 << " Tangent " << i; + std::stringstream ss1, ss2; + ss1 << fieldName1_q1 << " Tangent " << i; + ss2 << fieldName2_q1 << " Tangent " << i; + + dofManager->addField(ss1.str(), Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(ss2.str(), Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_q1->getIntrepid2Basis()))); + dofManager->addField(ssedge.str(), Teuchos::rcp(new panzer::Intrepid2FieldPattern(basis_qedge1->getIntrepid2Basis()))); + tangent_fieldOrder.push_back(ss1.str()); + tangent_fieldOrder.push_back(ss2.str()); + tangent_fieldOrder.push_back(ssedge.str()); + } + dofManager->setFieldOrder(tangent_fieldOrder); + dofManager->buildGlobalUnknowns(); + + // generate and evaluate some fields + Teuchos::RCP tangent_loc = tangent_lof->buildGhostedLinearObjContainer(); + tangent_lof->initializeGhostedContainer(LinearObjContainer::X, *tangent_loc); + tangent_loc->initialize(); + + for (int i(0); i < num_tangent; ++i) + { + auto locPair = Teuchos::rcp(new LOCPair(tangent_lof, panzer::LinearObjContainer::X)); + + auto global_t_loc = rcp_dynamic_cast(locPair->getGlobalLOC()); + Teuchos::RCP> global_x_vec = global_t_loc->get_x_th(); + Thyra::assign(global_x_vec.ptr(), 0.123 + myRank + i); + + auto ghosted_t_loc = rcp_dynamic_cast(locPair->getGhostedLOC()); + Teuchos::RCP> ghosted_x_vec = ghosted_t_loc->get_x_th(); + Thyra::assign(ghosted_x_vec.ptr(), 0.123 + myRank + i); + + tangentContainers.push_back(locPair); + } // end loop over the tangents + } // end if (enable_tangents) + + // setup field manager, add evaluator under test + ///////////////////////////////////////////////////////////// + + PHX::FieldManager fm; + + std::vector derivative_dimensions; + derivative_dimensions.push_back(12); + fm.setKokkosExtendedDataTypeDimensions(derivative_dimensions); + + std::vector tan_derivative_dimensions; + if (enable_tangents) + tan_derivative_dimensions.push_back(num_tangent); + else + tan_derivative_dimensions.push_back(0); + fm.setKokkosExtendedDataTypeDimensions(tan_derivative_dimensions); + + Teuchos::RCP evalField_q1, evalField_qedge1; + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName1_q1); + names->push_back(fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_q1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 2); + + fm.registerEvaluator(evaluator); + fm.requireField(*evaluator->evaluatedFields()[0]); + } + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_qedge1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm.registerEvaluator(evaluator); + fm.requireField(*evaluator->evaluatedFields()[0]); + } + + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName1_q1); + names->push_back(fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_q1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 2); + + fm.registerEvaluator(evaluator); + fm.requireField(*evaluator->evaluatedFields()[0]); + } + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_qedge1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm.registerEvaluator(evaluator); + fm.requireField(*evaluator->evaluatedFields()[0]); + } + + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName1_q1); + names->push_back(fieldName2_q1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_q1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + + if (enable_tangents) + { + RCP>> tangent_names = + rcp(new std::vector>(2)); + for (int i = 0; i < num_tangent; ++i) + { + std::stringstream ss1, ss2; + ss1 << fieldName1_q1 << " Tangent " << i; + ss2 << fieldName2_q1 << " Tangent " << i; + (*tangent_names)[0].push_back(ss1.str()); + (*tangent_names)[1].push_back(ss2.str()); + } + pl.set("Tangent Names", tangent_names); + } + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 2); + + fm.registerEvaluator(evaluator); + fm.requireField(*evaluator->evaluatedFields()[0]); + } + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + names->push_back(fieldName_qedge1); + + Teuchos::ParameterList pl; + pl.set("Basis", basis_qedge1); + pl.set("DOF Names", names); + pl.set("Indexer Names", names); + + if (enable_tangents) + { + RCP>> tangent_names = + rcp(new std::vector>(1)); + for (int i = 0; i < num_tangent; ++i) + { + std::stringstream ss; + ss << fieldName_qedge1 << " Tangent " << i; + (*tangent_names)[0].push_back(ss.str()); + } + pl.set("Tangent Names", tangent_names); + } + + Teuchos::RCP> evaluator = lof->buildGather(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm.registerEvaluator(evaluator); + fm.requireField(*evaluator->evaluatedFields()[0]); + } + + if (enable_tangents) + { + for (int i = 0; i < num_tangent; ++i) + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + RCP> tangent_names = rcp(new std::vector); + names->push_back(fieldName1_q1); + names->push_back(fieldName2_q1); + { + std::stringstream ss1, ss2; + ss1 << fieldName1_q1 << " Tangent " << i; + ss2 << fieldName2_q1 << " Tangent " << i; + tangent_names->push_back(ss1.str()); + tangent_names->push_back(ss2.str()); + } + + Teuchos::ParameterList pl; + pl.set("Basis", basis_q1); + pl.set("DOF Names", tangent_names); + pl.set("Indexer Names", tangent_names); + + { + std::stringstream ss; + ss << "Tangent Container " << i; + pl.set("Global Data Key", ss.str()); + } + + Teuchos::RCP> evaluator = + parent_tangent_lof->buildGatherTangent(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 2); + + fm.registerEvaluator(evaluator); + } + for (int i = 0; i < num_tangent; ++i) + { + using Teuchos::RCP; + using Teuchos::rcp; + RCP> names = rcp(new std::vector); + RCP> tangent_names = rcp(new std::vector); + names->push_back(fieldName_qedge1); + { + std::stringstream ss; + ss << fieldName_qedge1 << " Tangent " << i; + tangent_names->push_back(ss.str()); + } + + Teuchos::ParameterList pl; + pl.set("Basis", basis_qedge1); + pl.set("DOF Names", tangent_names); + pl.set("Indexer Names", tangent_names); + + { + std::stringstream ss; + ss << "Tangent Container " << i; + pl.set("Global Data Key", ss.str()); + } + + Teuchos::RCP> evaluator = + parent_tangent_lof->buildGatherTangent(pl); + + TEST_EQUALITY(evaluator->evaluatedFields().size(), 1); + + fm.registerEvaluator(evaluator); + } + } + + panzer::Traits::SD sd; + + panzer::Workset &workset = (*work_sets)[0]; + workset.alpha = 0.0; + workset.beta = 2.0; // derivatives multiplied by 2 + workset.time = 0.0; + workset.evaluate_transient_terms = false; + + sd.worksets_ = work_sets; + + fm.postRegistrationSetup(sd); + + panzer::Traits::PED ped; + ped.gedc->addDataObject("Solution Gather Container", loc); + if (enable_tangents) + { + for (int i(0); i < num_tangent; ++i) + { + std::stringstream ss; + ss << "Tangent Container " << i; + ped.gedc->addDataObject(ss.str(), tangentContainers[i]); + } + } + + fm.preEvaluate(ped); + fm.evaluateFields(workset); + fm.postEvaluate(0); + + fm.preEvaluate(ped); + fm.evaluateFields(workset); + fm.postEvaluate(0); + + fm.preEvaluate(ped); + fm.evaluateFields(workset); + fm.postEvaluate(0); + + // test Residual fields + { + PHX::MDField + fieldData1_q1(fieldName1_q1, basis_q1->functional); + PHX::MDField + fieldData2_q1(fieldName2_q1, basis_qedge1->functional); + + fm.getFieldData(fieldData1_q1); + fm.getFieldData(fieldData2_q1); + + TEST_EQUALITY(fieldData1_q1.extent(0), Teuchos::as(4 / numProcs)); + TEST_EQUALITY(fieldData1_q1.extent(1), 4); + TEST_EQUALITY(fieldData2_q1.extent(0), Teuchos::as(4 / numProcs)); + TEST_EQUALITY(fieldData2_q1.extent(1), 4); + TEST_EQUALITY(fieldData1_q1.size(), Teuchos::as(4 * 4 / numProcs)); + TEST_EQUALITY(fieldData2_q1.size(), Teuchos::as(4 * 4 / numProcs)); + + auto fieldData1_q1_h = Kokkos::create_mirror_view(fieldData1_q1.get_static_view()); + auto fieldData2_q1_h = Kokkos::create_mirror_view(fieldData2_q1.get_static_view()); + Kokkos::deep_copy(fieldData1_q1_h, fieldData1_q1.get_static_view()); + Kokkos::deep_copy(fieldData2_q1_h, fieldData2_q1.get_static_view()); + + for (unsigned int i = 0; i < fieldData1_q1.extent(0); i++) + for (unsigned int j = 0; j < fieldData1_q1.extent(1); j++) + TEST_EQUALITY(fieldData1_q1_h(i, j), 123.0 + myRank); + + for (unsigned int i = 0; i < fieldData2_q1.extent(0); i++) + for (unsigned int j = 0; j < fieldData2_q1.extent(1); j++) + TEST_EQUALITY(fieldData2_q1_h(i, j), 123.0 + myRank); + } + { + PHX::MDField + fieldData_qedge1(fieldName_qedge1, basis_qedge1->functional); + + fm.getFieldData(fieldData_qedge1); + + auto fieldData_qedge1_h = Kokkos::create_mirror_view(fieldData_qedge1.get_static_view()); + Kokkos::deep_copy(fieldData_qedge1_h, fieldData_qedge1.get_static_view()); + + TEST_EQUALITY(fieldData_qedge1.extent(0), Teuchos::as(4 / numProcs)); + TEST_EQUALITY(fieldData_qedge1.extent(1), 4); + TEST_EQUALITY(fieldData_qedge1.size(), Teuchos::as(4 * 4 / numProcs)); + + for (unsigned int cell = 0; cell < fieldData_qedge1.extent(0); ++cell) + for (unsigned int pt = 0; pt < fieldData_qedge1.extent(1); pt++) + TEST_EQUALITY(fieldData_qedge1_h(cell, pt), 123.0 + myRank); + } + + // test Jacobian fields + { + PHX::MDField + fieldData1_q1(fieldName1_q1, basis_q1->functional); + PHX::MDField + fieldData2_q1(fieldName2_q1, basis_qedge1->functional); + + fm.getFieldData(fieldData1_q1); + fm.getFieldData(fieldData2_q1); + + auto fieldData1_q1_h = Kokkos::create_mirror_view(fieldData1_q1.get_static_view()); + auto fieldData2_q1_h = Kokkos::create_mirror_view(fieldData2_q1.get_static_view()); + Kokkos::deep_copy(fieldData1_q1_h, fieldData1_q1.get_static_view()); + Kokkos::deep_copy(fieldData2_q1_h, fieldData2_q1.get_static_view()); + + for (unsigned int cell = 0; cell < fieldData1_q1.extent(0); ++cell) + { + for (unsigned int pt = 0; pt < fieldData1_q1.extent(1); pt++) + { + TEST_EQUALITY(fieldData1_q1_h(cell, pt), 123.0 + myRank); + TEST_EQUALITY(fieldData1_q1_h(cell, pt).availableSize(), 12); + } + } + for (unsigned int cell = 0; cell < fieldData2_q1.extent(0); ++cell) + { + for (unsigned int pt = 0; pt < fieldData2_q1.extent(1); pt++) + { + TEST_EQUALITY(fieldData2_q1_h(cell, pt), 123.0 + myRank); + TEST_EQUALITY(fieldData2_q1_h(cell, pt).availableSize(), 12); + } + } + } + { + PHX::MDField + fieldData_qedge1(fieldName_qedge1, basis_qedge1->functional); + + fm.getFieldData(fieldData_qedge1); + + auto fieldData_qedge1_h = Kokkos::create_mirror_view(fieldData_qedge1.get_static_view()); + Kokkos::deep_copy(fieldData_qedge1_h, fieldData_qedge1.get_static_view()); + + for (unsigned int cell = 0; cell < fieldData_qedge1.extent(0); ++cell) + { + for (unsigned int pt = 0; pt < fieldData_qedge1.extent(1); ++pt) + { + TEST_EQUALITY(fieldData_qedge1_h(cell, pt), 123.0 + myRank); + TEST_EQUALITY(fieldData_qedge1_h(cell, pt).availableSize(), 12); + } + } + } + + // test Tangent fields + { + PHX::MDField + fieldData1_q1(fieldName1_q1, basis_q1->functional); + PHX::MDField + fieldData2_q1(fieldName2_q1, basis_qedge1->functional); + + fm.getFieldData(fieldData1_q1); + fm.getFieldData(fieldData2_q1); + + auto fieldData1_q1_h = Kokkos::create_mirror_view(fieldData1_q1.get_static_view()); + auto fieldData2_q1_h = Kokkos::create_mirror_view(fieldData2_q1.get_static_view()); + Kokkos::deep_copy(fieldData1_q1_h, fieldData1_q1.get_static_view()); + Kokkos::deep_copy(fieldData2_q1_h, fieldData2_q1.get_static_view()); + + for (unsigned int cell = 0; cell < fieldData1_q1.extent(0); ++cell) + { + for (unsigned int pt = 0; pt < fieldData1_q1.extent(1); pt++) + { + if (enable_tangents) + { + TEST_EQUALITY(fieldData1_q1_h(cell, pt).val(), 123.0 + myRank); + TEST_EQUALITY(fieldData1_q1_h(cell, pt).availableSize(), num_tangent); + for (int i = 0; i < num_tangent; ++i) + TEST_EQUALITY(fieldData1_q1_h(cell, pt).dx(i), 0.123 + myRank + i); + } + else + { + TEST_EQUALITY(fieldData1_q1_h(cell, pt), 123.0 + myRank); + TEST_EQUALITY(fieldData1_q1_h(cell, pt).availableSize(), 0); + } + } + } + for (unsigned int cell = 0; cell < fieldData2_q1.extent(0); ++cell) + { + for (unsigned int pt = 0; pt < fieldData2_q1.extent(1); pt++) + { + if (enable_tangents) + { + TEST_EQUALITY(fieldData2_q1_h(cell, pt).val(), 123.0 + myRank); + TEST_EQUALITY(fieldData2_q1_h(cell, pt).availableSize(), num_tangent); + for (int i = 0; i < num_tangent; ++i) + { + TEST_EQUALITY(fieldData2_q1_h(cell, pt).dx(i), 0.123 + myRank + i); + TEST_EQUALITY(fieldData2_q1_h(cell, pt).dx(i), 0.123 + myRank + i); + } + } + else + { + TEST_EQUALITY(fieldData2_q1_h(cell, pt), 123.0 + myRank); + TEST_EQUALITY(fieldData2_q1_h(cell, pt).availableSize(), 0); + } + } + } + } + { + PHX::MDField + fieldData_qedge1(fieldName_qedge1, basis_qedge1->functional); + + fm.getFieldData(fieldData_qedge1); + + auto fieldData_qedge1_h = Kokkos::create_mirror_view(fieldData_qedge1.get_static_view()); + Kokkos::deep_copy(fieldData_qedge1_h, fieldData_qedge1.get_static_view()); + + for (unsigned int cell = 0; cell < fieldData_qedge1.extent(0); ++cell) + { + for (unsigned int pt = 0; pt < fieldData_qedge1.extent(1); ++pt) + { + if (enable_tangents) + { + TEST_EQUALITY(fieldData_qedge1_h(cell, pt).val(), 123.0 + myRank); + TEST_EQUALITY(fieldData_qedge1_h(cell, pt).availableSize(), num_tangent); + for (int i = 0; i < num_tangent; ++i) + TEST_EQUALITY(fieldData_qedge1_h(cell, pt).dx(i), 0.123 + myRank + i); + } + else + { + TEST_EQUALITY(fieldData_qedge1_h(cell, pt), 123.0 + myRank); + TEST_EQUALITY(fieldData_qedge1_h(cell, pt).availableSize(), 0); + } + } + } + } + } + + Teuchos::RCP buildBasis(std::size_t worksetSize, const std::string &basisName) + { + Teuchos::RCP topo = + Teuchos::rcp(new shards::CellTopology(shards::getCellTopologyData>())); + + panzer::CellData cellData(worksetSize, topo); + return Teuchos::rcp(new panzer::PureBasis(basisName, 1, cellData)); + } + + Teuchos::RCP buildMesh(int elemX, int elemY) + { + Teuchos::RCP pl = rcp(new Teuchos::ParameterList); + pl->set("X Blocks", 1); + pl->set("Y Blocks", 1); + pl->set("X Elements", elemX); + pl->set("Y Elements", elemY); + + panzer_stk::SquareQuadMeshFactory factory; + factory.setParameterList(pl); + Teuchos::RCP mesh = factory.buildUncommitedMesh(MPI_COMM_WORLD); + factory.completeMeshConstruction(*mesh, MPI_COMM_WORLD); + + return mesh; + } + + void testInitialization(const Teuchos::RCP &ipb) + { + // Physics block + ipb->setName("test physics"); + { + Teuchos::ParameterList &p = ipb->sublist("a"); + p.set("Type", "Energy"); + p.set("Prefix", ""); + p.set("Model ID", "solid"); + p.set("Basis Type", "HGrad"); + p.set("Basis Order", 1); + p.set("Integration Order", 1); + } + { + Teuchos::ParameterList &p = ipb->sublist("b"); + p.set("Type", "Energy"); + p.set("Prefix", "ION_"); + p.set("Model ID", "solid"); + p.set("Basis Type", "HCurl"); + p.set("Basis Order", 1); + p.set("Integration Order", 1); + } + } + +} diff --git a/packages/panzer/disc-fe/src/evaluators/Panzer_GatherSolution_BlockedTpetra.hpp b/packages/panzer/disc-fe/src/evaluators/Panzer_GatherSolution_BlockedTpetra.hpp index aec43b41dfbc..6d9bde9d1a3b 100644 --- a/packages/panzer/disc-fe/src/evaluators/Panzer_GatherSolution_BlockedTpetra.hpp +++ b/packages/panzer/disc-fe/src/evaluators/Panzer_GatherSolution_BlockedTpetra.hpp @@ -163,7 +163,7 @@ class GatherSolution_BlockedTpetra public: GatherSolution_BlockedTpetra(const Teuchos::RCP & indexer) - : gidIndexer_(indexer) {} + : globalIndexer_(indexer) {} GatherSolution_BlockedTpetra(const Teuchos::RCP & indexer, const Teuchos::ParameterList& p); @@ -176,13 +176,13 @@ class GatherSolution_BlockedTpetra void evaluateFields(typename TRAITS::EvalData d); virtual Teuchos::RCP clone(const Teuchos::ParameterList & pl) const - { return Teuchos::rcp(new GatherSolution_BlockedTpetra(gidIndexer_,pl)); } + { return Teuchos::rcp(new GatherSolution_BlockedTpetra(globalIndexer_,pl)); } private: typedef typename panzer::Traits::Tangent EvalT; typedef typename panzer::Traits::Tangent::ScalarT ScalarT; - //typedef typename panzer::Traits::RealType RealT; + typedef typename panzer::Traits::RealType RealT; typedef BlockedTpetraLinearObjContainer ContainerType; typedef Tpetra::Vector VectorType; @@ -194,10 +194,14 @@ class GatherSolution_BlockedTpetra // maps the local (field,element,basis) triplet to a global ID // for scattering - Teuchos::RCP gidIndexer_; + Teuchos::RCP globalIndexer_; std::vector fieldIds_; // field IDs needing mapping + //! Returns the index into the Thyra ProductVector sub-block. Size + //! of number of fields to scatter + std::vector productVectorBlockIndex_; + std::vector< PHX::MDField > gatherFields_; std::vector indexerNames_; @@ -206,9 +210,16 @@ class GatherSolution_BlockedTpetra Teuchos::RCP > blockedContainer_; + //! Local indices for unknowns + PHX::View worksetLIDs_; + + //! Offset into the cell lids for each field. Size of number of fields to scatter. + std::vector> fieldOffsets_; + // Fields for storing tangent components dx/dp of solution vector x bool has_tangent_fields_; - std::vector< std::vector< PHX::MDField > > tangentFields_; + std::vector< std::vector< PHX::MDField > > tangentFields_; + PHX::ViewOfViews<2,PHX::View> tangentFieldsVoV_; GatherSolution_BlockedTpetra(); }; diff --git a/packages/panzer/disc-fe/src/evaluators/Panzer_GatherSolution_BlockedTpetra_impl.hpp b/packages/panzer/disc-fe/src/evaluators/Panzer_GatherSolution_BlockedTpetra_impl.hpp index b0ef54fdd70b..52488585d37e 100644 --- a/packages/panzer/disc-fe/src/evaluators/Panzer_GatherSolution_BlockedTpetra_impl.hpp +++ b/packages/panzer/disc-fe/src/evaluators/Panzer_GatherSolution_BlockedTpetra_impl.hpp @@ -8,8 +8,8 @@ // ***************************************************************************** // @HEADER -#ifndef PANZER_GATHER_SOLUTION_BLOCKED_EPETRA_IMPL_HPP -#define PANZER_GATHER_SOLUTION_BLOCKED_EPETRA_IMPL_HPP +#ifndef PANZER_GATHER_SOLUTION_BLOCKED_TPETRA_IMPL_HPP +#define PANZER_GATHER_SOLUTION_BLOCKED_TPETRA_IMPL_HPP #include "Teuchos_Assert.hpp" #include "Phalanx_DataLayout.hpp" @@ -216,7 +216,7 @@ panzer::GatherSolution_BlockedTpetra & indexer, const Teuchos::ParameterList& p) - : gidIndexer_(indexer) + : globalIndexer_(indexer) , has_tangent_fields_(false) { typedef std::vector< std::vector > vvstring; @@ -250,7 +250,7 @@ GatherSolution_BlockedTpetra( tangentFields_[fd].resize(tangent_field_names[fd].size()); for (std::size_t i=0; i(tangent_field_names[fd][i],basis->functional); + PHX::MDField(tangent_field_names[fd][i],basis->functional); this->addDependentField(tangentFields_[fd][i]); } } @@ -268,17 +268,60 @@ GatherSolution_BlockedTpetra( // ********************************************************************** template void panzer::GatherSolution_BlockedTpetra:: -postRegistrationSetup(typename TRAITS::SetupData /* d */, +postRegistrationSetup(typename TRAITS::SetupData d, PHX::FieldManager& /* fm */) { TEUCHOS_ASSERT(gatherFields_.size() == indexerNames_.size()); - fieldIds_.resize(gatherFields_.size()); + const Workset & workset_0 = (*d.worksets_)[0]; + const std::string blockId = this->wda(workset_0).block_id; + fieldIds_.resize(gatherFields_.size()); + fieldOffsets_.resize(gatherFields_.size()); + productVectorBlockIndex_.resize(gatherFields_.size()); + int maxElementBlockGIDCount = -1; for (std::size_t fd = 0; fd < gatherFields_.size(); ++fd) { - // get field ID from DOF manager - const std::string& fieldName = indexerNames_[fd]; - fieldIds_[fd] = gidIndexer_->getFieldNum(fieldName); + + const std::string fieldName = indexerNames_[fd]; + const int globalFieldNum = globalIndexer_->getFieldNum(fieldName); // Field number in the aggregate BlockDOFManager + productVectorBlockIndex_[fd] = globalIndexer_->getFieldBlock(globalFieldNum); + const auto& subGlobalIndexer = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fd]]; + fieldIds_[fd] = subGlobalIndexer->getFieldNum(fieldName); // Field number in the sub-global-indexer + + const std::vector& offsets = subGlobalIndexer->getGIDFieldOffsets(blockId,fieldIds_[fd]); + fieldOffsets_[fd] = PHX::View("GatherSolution_BlockedTpetra(Tangent):fieldOffsets",offsets.size()); + auto hostOffsets = Kokkos::create_mirror_view(fieldOffsets_[fd]); + for (std::size_t i=0; i < offsets.size(); ++i) + hostOffsets(i) = offsets[i]; + Kokkos::deep_copy(fieldOffsets_[fd], hostOffsets); + maxElementBlockGIDCount = std::max(subGlobalIndexer->getElementBlockGIDCount(blockId),maxElementBlockGIDCount); + } + + // We will use one workset lid view for all fields, but has to be + // sized big enough to hold the largest elementBlockGIDCount in the + // ProductVector. + worksetLIDs_ = PHX::View("ScatterResidual_BlockedTpetra(Tangent):worksetLIDs", + gatherFields_[0].extent(0), + maxElementBlockGIDCount); + + // Set up storage for tangentFields using view of views + // We also need storage for the number of tangent fields associated with + // each gatherField + + if (has_tangent_fields_) { + + size_t inner_vector_max_size = 0; + for (std::size_t fd = 0; fd < tangentFields_.size(); ++fd) + inner_vector_max_size = std::max(inner_vector_max_size,tangentFields_[fd].size()); + tangentFieldsVoV_.initialize("GatherSolution_BlockedTpetra::tangentFieldsVoV_",gatherFields_.size(),inner_vector_max_size); + + for (std::size_t fd = 0; fd < gatherFields_.size(); ++fd) { + for (std::size_t i=0; i void panzer::GatherSolution_BlockedTpetra:: evaluateFields(typename TRAITS::EvalData workset) { - using Teuchos::RCP; - using Teuchos::ArrayRCP; - using Teuchos::ptrFromRef; - using Teuchos::rcp_dynamic_cast; - - using Thyra::VectorBase; - using Thyra::SpmdVectorBase; - using Thyra::ProductVectorBase; + using Teuchos::RCP; + using Teuchos::ArrayRCP; + using Teuchos::ptrFromRef; + using Teuchos::rcp_dynamic_cast; - Teuchos::FancyOStream out(Teuchos::rcpFromRef(std::cout)); - out.setShowProcRank(true); - out.setOutputToRootOnly(-1); + using Thyra::VectorBase; + using Thyra::SpmdVectorBase; + using Thyra::ProductVectorBase; - std::vector > GIDs; - std::vector LIDs; + Teuchos::FancyOStream out(Teuchos::rcpFromRef(std::cout)); + out.setShowProcRank(true); + out.setOutputToRootOnly(-1); - // for convenience pull out some objects from workset - std::string blockId = this->wda(workset).block_id; - const std::vector & localCellIds = this->wda(workset).cell_local_ids; + const PHX::View & localCellIds = this->wda(workset).getLocalCellIDs(); - Teuchos::RCP > x; - if (useTimeDerivativeSolutionVector_) - x = rcp_dynamic_cast >(blockedContainer_->get_dxdt()); - else - x = rcp_dynamic_cast >(blockedContainer_->get_x()); + Teuchos::RCP > blockedSolution; + if (useTimeDerivativeSolutionVector_) + blockedSolution = rcp_dynamic_cast >(blockedContainer_->get_dxdt()); + else + blockedSolution = rcp_dynamic_cast >(blockedContainer_->get_x()); - // gather operation for each cell in workset - for(std::size_t worksetCellIndex=0;worksetCellIndexgetFieldDOFManagers()[productVectorBlockIndex_[fieldIndex]]; + const std::string blockId = this->wda(workset).block_id; + const int num_dofs = globalIndexer_->getFieldDOFManagers()[productVectorBlockIndex_[fieldIndex]]->getElementBlockGIDCount(blockId); + blockIndexer->getElementLIDs(localCellIds,worksetLIDs_,num_dofs); + currentWorksetLIDSubBlock = productVectorBlockIndex_[fieldIndex]; + } - gidIndexer_->getElementGIDsPair(cellLocalId,GIDs,blockId); + const int blockRowIndex = productVectorBlockIndex_[fieldIndex]; + const auto& subblockSolution = *((rcp_dynamic_cast>(blockedSolution->getNonconstVectorBlock(blockRowIndex),true))->getTpetraVector()); + const auto kokkosSolution = subblockSolution.getLocalViewDevice(Tpetra::Access::ReadOnly); - // caculate the local IDs for this element - LIDs.resize(GIDs.size()); - for(std::size_t i=0;i x_map = blockedContainer_->getMapForBlock(GIDs[i].first); + // Class data fields for lambda capture + const PHX::View fieldOffsets = fieldOffsets_[fieldIndex]; + const PHX::View worksetLIDs = worksetLIDs_; + const PHX::View fieldValues = gatherFields_[fieldIndex].get_static_view(); - LIDs[i] = x_map->getLocalElement(GIDs[i].second); - } + if (has_tangent_fields_) { + const int numTangents = tangentFields_[fieldIndex].size(); + const auto tangentFieldsDevice = tangentFieldsVoV_.getViewDevice(); + const auto kokkosTangents = Kokkos::subview(tangentFieldsDevice,fieldIndex,Kokkos::ALL()); + Kokkos::parallel_for(Kokkos::RangePolicy(0,workset.num_cells), KOKKOS_LAMBDA (const int& cell) { + for (int basis=0; basis < static_cast(fieldOffsets.size()); ++basis) { + const int rowLID = worksetLIDs(cell,fieldOffsets(basis)); + fieldValues(cell,basis).zero(); + fieldValues(cell,basis).val() = kokkosSolution(rowLID,0); + for (int i_tangent=0; i_tangent(0,workset.num_cells), KOKKOS_LAMBDA (const int& cell) { + for (int basis=0; basis < static_cast(fieldOffsets.size()); ++basis) { + const int rowLID = worksetLIDs(cell,fieldOffsets(basis)); + fieldValues(cell,basis).zero(); + fieldValues(cell,basis) = kokkosSolution(rowLID,0); + } + }); + } + } - // loop over the fields to be gathered - Teuchos::ArrayRCP local_x; - for (std::size_t fieldIndex=0; fieldIndexgetFieldBlock(fieldNum); - - // grab local data for inputing - RCP > block_x = rcp_dynamic_cast >(x->getNonconstVectorBlock(indexerId)); - block_x->getLocalData(ptrFromRef(local_x)); - - const std::vector & elmtOffset = gidIndexer_->getGIDFieldOffsets(blockId,fieldNum); - - // loop over basis functions and fill the fields - for(std::size_t basis=0;basis::non_const_value_type >::value , "Can only deep copy into non-const type" ); - Kokkos::fence(); - Kokkos::Impl::DynRankViewFill< DynRankView >( view , value ); - Kokkos::fence(); + switch(view.rank()) { + case 0: deep_copy(Impl::as_view_of_rank_n<0>(view), value); break; + case 1: deep_copy(Impl::as_view_of_rank_n<1>(view), value); break; + case 2: deep_copy(Impl::as_view_of_rank_n<2>(view), value); break; + case 3: deep_copy(Impl::as_view_of_rank_n<3>(view), value); break; + case 4: deep_copy(Impl::as_view_of_rank_n<4>(view), value); break; + case 5: deep_copy(Impl::as_view_of_rank_n<5>(view), value); break; + case 6: deep_copy(Impl::as_view_of_rank_n<6>(view), value); break; + case 7: deep_copy(Impl::as_view_of_rank_n<7>(view), value); break; + } } // Overload of deep_copy for Fad views intializing to a constant Fad @@ -1010,9 +1017,16 @@ void deep_copy( typename ViewTraits::non_const_value_type >::value , "Can only deep copy into non-const type" ); - Kokkos::fence(); - Kokkos::Impl::DynRankViewFill< DynRankView >( view , value ); - Kokkos::fence(); + switch(view.rank()) { + case 0: deep_copy(Impl::as_view_of_rank_n<0>(view), value); break; + case 1: deep_copy(Impl::as_view_of_rank_n<1>(view), value); break; + case 2: deep_copy(Impl::as_view_of_rank_n<2>(view), value); break; + case 3: deep_copy(Impl::as_view_of_rank_n<3>(view), value); break; + case 4: deep_copy(Impl::as_view_of_rank_n<4>(view), value); break; + case 5: deep_copy(Impl::as_view_of_rank_n<5>(view), value); break; + case 6: deep_copy(Impl::as_view_of_rank_n<6>(view), value); break; + case 7: deep_copy(Impl::as_view_of_rank_n<7>(view), value); break; + } } template< class DstType , class SrcType >