Skip to content

Commit

Permalink
Merge Pull Request trilinos#12872 from trilinos/Trilinos/master_merge…
Browse files Browse the repository at this point in the history
…_20240329_175926

Automatically Merged using Trilinos Master Merge AutoTester
PR Title: b'Trilinos Master Merge PR Generator: Auto PR created to promote from master_merge_20240329_175926 branch to master'
PR Author: trilinos-autotester
  • Loading branch information
trilinos-autotester authored Mar 30, 2024
2 parents 7001783 + 6c485b0 commit 88b2d6f
Show file tree
Hide file tree
Showing 981 changed files with 33,490 additions and 30,022 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/dependency-review.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@ jobs:
- name: 'Checkout Repository'
uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2
- name: 'Dependency Review'
uses: actions/dependency-review-action@9129d7d40b8c12c1ed0f60400d00c92d437adcce # v4.1.3
uses: actions/dependency-review-action@733dd5d4a5203f238c33806593ec0f5fc5343d8c # v4.2.4
2 changes: 1 addition & 1 deletion .github/workflows/scorecards.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,6 @@ jobs:

# Upload the results to GitHub's code scanning dashboard.
- name: "Upload to code-scanning"
uses: github/codeql-action/upload-sarif@05963f47d870e2cb19a537396c1f668a348c7d8f # v3.24.8
uses: github/codeql-action/upload-sarif@1b1aada464948af03b950897e5eb522f92603cc2 # v3.24.9
with:
sarif_file: results.sarif
79 changes: 77 additions & 2 deletions packages/framework/ini-files/config-specs.ini
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,16 @@ opt-set-cmake-var TPL_METIS_LIBRARIES STRING FORCE : ${METIS_LIB|ENV}/libmetis.s
opt-set-cmake-var Zlib_INCLUDE_DIRS PATH FORCE : ${ZLIB_INC|ENV}
opt-set-cmake-var Zlib_LIBRARY_DIRS PATH FORCE : ${ZLIB_LIB|ENV}

[COMMON_AUE_SPACK]
use COMMON_SPACK_TPLS

# Overrides from [COMMON_SPACK_TPLS] to let container handle the values
opt-set-cmake-var TPL_Netcdf_LIBRARIES STRING FORCE : ""
opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : ""
opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : ""
opt-set-cmake-var TPL_HDF5_LIBRARIES STRING FORCE : ""

opt-set-cmake-var SuperLU_LIBRARY_NAMES STRING FORCE : superlu;m

[COMMON_USE-MPI|NO]
use COMMON
Expand Down Expand Up @@ -2793,6 +2803,60 @@ use PACKAGE-ENABLES|NO-PACKAGE-ENABLES

use COMMON_SPACK_TPLS

opt-set-cmake-var TPL_BLAS_LIBRARY_DIRS STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib
opt-set-cmake-var TPL_BLAS_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib/libopenblas.a;-L${OPENBLAS_ROOT|ENV}/lib;-lgfortran;-lgomp;-lm
opt-set-cmake-var TPL_LAPACK_LIBRARY_DIRS STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib
opt-set-cmake-var TPL_LAPACK_LIBRARIES STRING FORCE : ${OPENBLAS_ROOT|ENV}/lib/libopenblas.a;-L${OPENBLAS_ROOT|ENV}/lib;-lgfortran;-lgomp;-lm

opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --bind-to;none --mca btl vader,self
opt-set-cmake-var CMAKE_CXX_FLAGS STRING FORCE : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-nonnull-compare -Wno-address -Wno-inline -DTRILINOS_HIDE_DEPRECATED_HEADER_WARNINGS

opt-set-cmake-var TPL_ENABLE_SuperLUDist BOOL FORCE : ON
opt-set-cmake-var TPL_ENABLE_ParMETIS BOOL FORCE : ON
opt-set-cmake-var Amesos_ENABLE_SuperLU BOOL FORCE : OFF
opt-set-cmake-var Amesos_ENABLE_SuperLUDist BOOL FORCE : OFF
opt-set-cmake-var Amesos2_ENABLE_SuperLU BOOL FORCE : OFF
opt-set-cmake-var Amesos2_ENABLE_SuperLUDist BOOL FORCE : OFF
opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF

opt-set-cmake-var Teko_DISABLE_LSCSTABALIZED_TPETRA_ALPAH_INV_D BOOL : ON
opt-set-cmake-var KokkosKernels_blas_serial_MPI_1_DISABLE BOOL : ON
opt-set-cmake-var ROL_example_PDE-OPT_helmholtz_example_02_MPI_1_DISABLE BOOL : ON
opt-set-cmake-var ROL_example_PDE-OPT_navier-stokes_example_01_MPI_4_DISABLE BOOL : ON
opt-set-cmake-var Pliris_vector_random_MPI_3_DISABLE BOOL : ON
opt-set-cmake-var Pliris_vector_random_MPI_4_DISABLE BOOL : ON

opt-set-cmake-var Zoltan_ch_simple_parmetis_parallel_DISABLE BOOL : ON
opt-set-cmake-var Belos_bl_gmres_complex_hb_3_MPI_4_DISABLE BOOL : ON
opt-set-cmake-var Belos_hybrid_gmres_complex_hb_0_MPI_4_DISABLE BOOL : ON
opt-set-cmake-var Belos_hybrid_gmres_complex_hb_1_MPI_4_DISABLE BOOL : ON
opt-set-cmake-var Belos_gcrodr_complex_hb_MPI_4_DISABLE BOOL : ON
opt-set-cmake-var Belos_Tpetra_gcrodr_complex_hb_MPI_4_DISABLE BOOL : ON
opt-set-cmake-var Stratimikos_Galeri_xpetra_complex_double_Jacobi_MPI_4_DISABLE BOOL : ON

use GCC_PACKAGE_SPECIFIC_WARNING_FLAGS
use RHEL8_POST

[rhel8_aue-gcc-openmpi_debug_shared_no-kokkos-arch_no-asan_complex_no-fpic_mpi_no-pt_no-rdc_no-uvm_deprecated-on_no-package-enables]
use COMPILER|GNU
use NODE-TYPE|SERIAL
use BUILD-TYPE|DEBUG

use RHEL8_LIB-TYPE|SHARED
use KOKKOS-ARCH|NO-KOKKOS-ARCH

use USE-ASAN|NO
use USE-FPIC|NO
use USE-MPI|YES
use USE-PT|NO
use USE-COMPLEX|YES
use USE-RDC|NO
use USE-UVM|NO
use USE-DEPRECATED|YES
use PACKAGE-ENABLES|NO-PACKAGE-ENABLES

use COMMON_AUE_SPACK

opt-set-cmake-var MPI_EXEC_PRE_NUMPROCS_FLAGS STRING : --bind-to;none --mca btl vader,self
opt-set-cmake-var Teko_DISABLE_LSCSTABALIZED_TPETRA_ALPAH_INV_D BOOL : ON
opt-set-cmake-var KokkosKernels_blas_serial_MPI_1_DISABLE BOOL : ON
Expand All @@ -2802,17 +2866,28 @@ opt-set-cmake-var Pliris_vector_random_MPI_3_DISABLE BOO
opt-set-cmake-var Pliris_vector_random_MPI_4_DISABLE BOOL : ON
opt-set-cmake-var CMAKE_CXX_FLAGS STRING FORCE : -Wall -Wno-clobbered -Wno-vla -Wno-pragmas -Wno-unknown-pragmas -Wno-unused-local-typedefs -Wno-literal-suffix -Wno-deprecated-declarations -Wno-misleading-indentation -Wno-int-in-bool-context -Wno-maybe-uninitialized -Wno-nonnull-compare -Wno-address -Wno-inline -DTRILINOS_HIDE_DEPRECATED_HEADER_WARNINGS

opt-set-cmake-var TPL_ENABLE_SuperLUDist BOOL FORCE: ON
opt-set-cmake-var TPL_ENABLE_SuperLUDist BOOL FORCE: OFF

# Turn off Framework tests due to issues in AUE container
opt-set-cmake-var Trilinos_ENABLE_TrilinosFrameworkTests BOOL FORCE : OFF
opt-set-cmake-var Trilinos_ENABLE_TrilinosBuildStats BOOL FORCE : OFF

# Turned off to bypass: ML CONFIGURATION ERROR: SuperLU_5.0 detected - only SuperLU version < 5.0 currently supported for this package.
opt-set-cmake-var ML_ENABLE_SuperLU BOOL FORCE : OFF

use GCC_PACKAGE_SPECIFIC_WARNING_FLAGS

use RHEL8_POST


[rhel8_gcc-openmpi_debug_shared_no-kokkos-arch_no-asan_complex_no-fpic_mpi_no-pt_no-rdc_no-uvm_deprecated-on_all]
use rhel8_gcc-openmpi_debug_shared_no-kokkos-arch_no-asan_complex_no-fpic_mpi_no-pt_no-rdc_no-uvm_deprecated-on_no-package-enables
use PACKAGE-ENABLES|ALL
opt-set-cmake-var Trilinos_ENABLE_TrilinosFrameworkTests BOOL FORCE : OFF
opt-set-cmake-var Trilinos_ENABLE_TrilinosBuildStats BOOL FORCE : OFF

[rhel8_aue-gcc-openmpi_debug_shared_no-kokkos-arch_no-asan_complex_no-fpic_mpi_no-pt_no-rdc_no-uvm_deprecated-on_all]
use rhel8_aue-gcc-openmpi_debug_shared_no-kokkos-arch_no-asan_complex_no-fpic_mpi_no-pt_no-rdc_no-uvm_deprecated-on_no-package-enables
use PACKAGE-ENABLES|ALL

[rhel8_oneapi-intelmpi_release-debug_shared_no-kokkos-arch_no-asan_no-complex_fpic_mpi_no-pt_no-rdc_no-uvm_deprecated-on_all]
use BUILD-TYPE|RELEASE-DEBUG
Expand Down
2 changes: 2 additions & 0 deletions packages/framework/ini-files/environment-specs.ini
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ envvar-find-in-path MPIF90 : mpif90

[rhel8_gcc-openmpi]

[rhel8_aue-gcc-openmpi]

[rhel8_oneapi-intelmpi]
use MPI-COMPILER-VARS
envvar-find-in-path I_MPI_CXX : icpx
Expand Down
1 change: 1 addition & 0 deletions packages/framework/ini-files/supported-envs.ini
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ gnu
[rhel8]
oneapi-intelmpi
gcc-openmpi
aue-gcc-openmpi
sems-cuda-11.4.2-sems-gnu-10.1.0-sems-openmpi-4.1.4

[ats2]
Expand Down
100 changes: 76 additions & 24 deletions packages/ifpack2/src/Ifpack2_BlockTriDiContainer_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -549,12 +549,25 @@ namespace Ifpack2 {

// 0. post receive async
for (local_ordinal_type i=0,iend=pids.recv.extent(0);i<iend;++i) {
irecv(comm,
reinterpret_cast<char*>(buffer.recv.data() + offset_host.recv[i]*mv_blocksize),
(offset_host.recv[i+1] - offset_host.recv[i])*mv_blocksize*sizeof(impl_scalar_type),
pids.recv[i],
42,
&reqs.recv[i]);
if(Tpetra::Details::Behavior::assumeMpiIsGPUAware()) {
irecv(comm,
reinterpret_cast<char*>(buffer.recv.data() + offset_host.recv[i]*mv_blocksize),
(offset_host.recv[i+1] - offset_host.recv[i])*mv_blocksize*sizeof(impl_scalar_type),
pids.recv[i],
42,
&reqs.recv[i]);
}
else {
const auto buffer_recv_host = Kokkos::create_mirror_view(
Kokkos::view_alloc(Kokkos::WithoutInitializing), buffer.recv);
irecv(comm,
reinterpret_cast<char*>(buffer_recv_host.data() + offset_host.recv[i]*mv_blocksize),
(offset_host.recv[i+1] - offset_host.recv[i])*mv_blocksize*sizeof(impl_scalar_type),
pids.recv[i],
42,
&reqs.recv[i]);
Kokkos::deep_copy(buffer.recv, buffer_recv_host);
}
}

/// this is necessary to pass unit test. somewhere overlapped using the default execution space
Expand All @@ -576,12 +589,25 @@ namespace Ifpack2 {
for (local_ordinal_type i=0;i<static_cast<local_ordinal_type>(pids.send.extent(0));++i) {
// 1.1. sync the stream and isend
if (i<8) exec_instances[i%8].fence();
isend(comm,
reinterpret_cast<const char*>(buffer.send.data() + offset_host.send[i]*mv_blocksize),
(offset_host.send[i+1] - offset_host.send[i])*mv_blocksize*sizeof(impl_scalar_type),
pids.send[i],
42,
&reqs.send[i]);
if(Tpetra::Details::Behavior::assumeMpiIsGPUAware()) {
isend(comm,
reinterpret_cast<const char*>(buffer.send.data() + offset_host.send[i]*mv_blocksize),
(offset_host.send[i+1] - offset_host.send[i])*mv_blocksize*sizeof(impl_scalar_type),
pids.send[i],
42,
&reqs.send[i]);
}
else {
const auto buffer_send_host = Kokkos::create_mirror_view(
Kokkos::view_alloc(Kokkos::WithoutInitializing), buffer.send);
Kokkos::deep_copy(buffer_send_host, buffer.send);
isend(comm,
reinterpret_cast<const char*>(buffer_send_host.data() + offset_host.send[i]*mv_blocksize),
(offset_host.send[i+1] - offset_host.send[i])*mv_blocksize*sizeof(impl_scalar_type),
pids.send[i],
42,
&reqs.send[i]);
}
}

// 2. poke communication
Expand Down Expand Up @@ -696,25 +722,51 @@ namespace Ifpack2 {

// receive async
for (local_ordinal_type i=0,iend=pids.recv.extent(0);i<iend;++i) {
irecv(comm,
reinterpret_cast<char*>(buffer.recv.data() + offset_host.recv[i]*mv_blocksize),
(offset_host.recv[i+1] - offset_host.recv[i])*mv_blocksize*sizeof(impl_scalar_type),
pids.recv[i],
42,
&reqs.recv[i]);
if(Tpetra::Details::Behavior::assumeMpiIsGPUAware()) {
irecv(comm,
reinterpret_cast<char*>(buffer.recv.data() + offset_host.recv[i]*mv_blocksize),
(offset_host.recv[i+1] - offset_host.recv[i])*mv_blocksize*sizeof(impl_scalar_type),
pids.recv[i],
42,
&reqs.recv[i]);
}
else {
const auto buffer_recv_host = Kokkos::create_mirror_view(
Kokkos::view_alloc(Kokkos::WithoutInitializing), buffer.recv);
irecv(comm,
reinterpret_cast<char*>(buffer_recv_host.data() + offset_host.recv[i]*mv_blocksize),
(offset_host.recv[i+1] - offset_host.recv[i])*mv_blocksize*sizeof(impl_scalar_type),
pids.recv[i],
42,
&reqs.recv[i]);
Kokkos::deep_copy(buffer.recv, buffer_recv_host);
}
}

// send async
for (local_ordinal_type i=0,iend=pids.send.extent(0);i<iend;++i) {
copy<ToBuffer>(lids.send, buffer.send, offset_host.send(i), offset_host.send(i+1),
mv, blocksize);
Kokkos::fence();
isend(comm,
reinterpret_cast<const char*>(buffer.send.data() + offset_host.send[i]*mv_blocksize),
(offset_host.send[i+1] - offset_host.send[i])*mv_blocksize*sizeof(impl_scalar_type),
pids.send[i],
42,
&reqs.send[i]);
if(Tpetra::Details::Behavior::assumeMpiIsGPUAware()) {
isend(comm,
reinterpret_cast<const char*>(buffer.send.data() + offset_host.send[i]*mv_blocksize),
(offset_host.send[i+1] - offset_host.send[i])*mv_blocksize*sizeof(impl_scalar_type),
pids.send[i],
42,
&reqs.send[i]);
}
else {
const auto buffer_send_host = Kokkos::create_mirror_view(
Kokkos::view_alloc(Kokkos::WithoutInitializing), buffer.send);
Kokkos::deep_copy(buffer_send_host, buffer.send);
isend(comm,
reinterpret_cast<const char*>(buffer_send_host.data() + offset_host.send[i]*mv_blocksize),
(offset_host.send[i+1] - offset_host.send[i])*mv_blocksize*sizeof(impl_scalar_type),
pids.send[i],
42,
&reqs.send[i]);
}
}

// I find that issuing an Iprobe seems to nudge some MPIs into action,
Expand Down
Loading

0 comments on commit 88b2d6f

Please sign in to comment.