From 4d3b39ddf9b6949e2040f5c67b37cef178b1dcce Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Wed, 29 May 2024 20:19:35 -0500 Subject: [PATCH 1/6] datatype: add MPIR_Typerep_test in typerep_dataloop_pack.c Even though the dataloop does not support nonblocking pack/unpack, we need add a dummy implementation for the typerep API -- MPIR_Typerep_test. --- src/mpi/datatype/typerep/src/typerep_dataloop_pack.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/mpi/datatype/typerep/src/typerep_dataloop_pack.c b/src/mpi/datatype/typerep/src/typerep_dataloop_pack.c index 8a9b4a01c03..1d2a1d9c909 100644 --- a/src/mpi/datatype/typerep/src/typerep_dataloop_pack.c +++ b/src/mpi/datatype/typerep/src/typerep_dataloop_pack.c @@ -8,6 +8,12 @@ #include "typerep_pre.h" #include "typerep_internal.h" +int MPIR_Typerep_test(MPIR_Typerep_req typerep_req, int *completed) +{ + *completed = 1; + return MPI_SUCCESS; +} + int MPIR_Typerep_icopy(void *outbuf, const void *inbuf, MPI_Aint num_bytes, MPIR_Typerep_req * typerep_req, uint32_t flags) { From bd878374e525764e41700825bb5bab5767fb5f43 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Wed, 29 May 2024 20:22:58 -0500 Subject: [PATCH 2/6] datatype/dataloop: fix iov routines Fix the dataloop implementation of MPIR_Typerep_to_iov_offset and MPIR_Typerep_iov_len. Both missed the case for contig type with multiple counts. --- src/mpi/datatype/typerep/src/typerep_dataloop_iov.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mpi/datatype/typerep/src/typerep_dataloop_iov.c b/src/mpi/datatype/typerep/src/typerep_dataloop_iov.c index daea200bdcb..f544983d31f 100644 --- a/src/mpi/datatype/typerep/src/typerep_dataloop_iov.c +++ b/src/mpi/datatype/typerep/src/typerep_dataloop_iov.c @@ -43,7 +43,7 @@ int MPIR_Typerep_to_iov_offset(const void *buf, MPI_Aint count, MPI_Datatype typ typesize = MPIR_Datatype_get_basic_size(type); if (max_iov_len >= 1) { iov[0].iov_base = (char *) buf; - iov[0].iov_len = typesize; + iov[0].iov_len = typesize * count; *actual_iov_len = 1; } else { *actual_iov_len = 0; @@ -84,7 +84,7 @@ int MPIR_Typerep_iov_len(MPI_Aint count, MPI_Datatype type, MPI_Aint max_iov_byt } if (max_iov_bytes >= count * type_size) { - *iov_len = count * num_contig; + *iov_len = is_contig ? 1 : count * num_contig; if (actual_iov_bytes) { *actual_iov_bytes = count * type_size; } From cc098931df6a15caa0033a87de24e43d4bb47628 Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Wed, 29 May 2024 20:31:51 -0500 Subject: [PATCH 3/6] ch4/ofi: fix MPIDI_OFI_gpu_rma_enabled The check only applies to gpu memory pointer. All other cases should return true. For example, host memory certainly should not prevent the native rma path. --- src/mpid/ch4/netmod/ofi/ofi_rma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mpid/ch4/netmod/ofi/ofi_rma.h b/src/mpid/ch4/netmod/ofi/ofi_rma.h index 15d2ffba128..c288145ad74 100644 --- a/src/mpid/ch4/netmod/ofi/ofi_rma.h +++ b/src/mpid/ch4/netmod/ofi/ofi_rma.h @@ -54,7 +54,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_gpu_rma_enabled(const void *ptr) return 0; } } - return 0; + return 1; } MPL_STATIC_INLINE_PREFIX void MPIDI_OFI_query_acc_atomic_support(MPI_Datatype dt, int query_type, From f1f8a162d0cc5de023c7aa6dc92a19ec659f4eee Mon Sep 17 00:00:00 2001 From: Hui Zhou Date: Fri, 31 May 2024 16:21:08 -0500 Subject: [PATCH 4/6] fixup! datatype/dataloop: fix iov routines --- src/mpi/datatype/typerep/src/typerep_dataloop_iov.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/mpi/datatype/typerep/src/typerep_dataloop_iov.c b/src/mpi/datatype/typerep/src/typerep_dataloop_iov.c index f544983d31f..a8f715a6989 100644 --- a/src/mpi/datatype/typerep/src/typerep_dataloop_iov.c +++ b/src/mpi/datatype/typerep/src/typerep_dataloop_iov.c @@ -38,9 +38,19 @@ int MPIR_Typerep_to_iov_offset(const void *buf, MPI_Aint count, MPI_Datatype typ { int mpi_errno = MPI_SUCCESS; + int is_contig; + MPI_Aint typesize; if (HANDLE_IS_BUILTIN(type)) { - MPI_Aint typesize; + is_contig = 1; typesize = MPIR_Datatype_get_basic_size(type); + } else { + MPIR_Datatype *dt_ptr; + MPIR_Datatype_get_ptr(type, dt_ptr); + is_contig = dt_ptr->is_contig; + typesize = dt_ptr->size; + } + + if (is_contig) { if (max_iov_len >= 1) { iov[0].iov_base = (char *) buf; iov[0].iov_len = typesize * count; From 350dbaaca18c136ff8cc3cc74d46706a74c09b0f Mon Sep 17 00:00:00 2001 From: Ken Raffenetti Date: Sat, 22 Jun 2024 14:41:44 -0500 Subject: [PATCH 5/6] modules: update libfabric Add ofiwg/libfabric#10096. Fixes pmodels/mpich#7014. --- modules/libfabric | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/libfabric b/modules/libfabric index db9ff1bdbdb..f37dbb95da8 160000 --- a/modules/libfabric +++ b/modules/libfabric @@ -1 +1 @@ -Subproject commit db9ff1bdbdb1d6f70fa216a626face5f6a902a7e +Subproject commit f37dbb95da86f40094283972c4ee3323abdc8342 From f30d5da3e2d2c05ae277ad3135ce4dd2ef5239c4 Mon Sep 17 00:00:00 2001 From: Ken Raffenetti Date: Mon, 17 Jun 2024 10:36:39 -0500 Subject: [PATCH 6/6] Update CHANGES and version.m4 for 4.2.2 --- CHANGES | 24 ++++++++++++++++++++++++ maint/version.m4 | 4 ++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/CHANGES b/CHANGES index a19220a382a..844fffde6af 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,27 @@ +=============================================================================== + Changes in 4.2.2 +=============================================================================== + +# Update embedded libfabric to v1.20.1 and fix compilation with GCC 14. + +# Fix dataloop support for MPIX_Type_iov routines + +# Fix crash in Hydra when system has many local ip addresses + +# Fix RMA fallback check in ch4:ofi netmod + +# Fix MPI_UNDEFINED handling in mpi_f08 module + +# Fix Slurm environment variable inheritance in Hydra + +# Fix multiple issues with large count support in ROMIO + +# Fix potential hang in init using PMIx client and nonstandard keys + +# Fix crash if PMIx client cannot get appnum from server during init + +# Fix other build errors and warnings + =============================================================================== Changes in 4.2.1 =============================================================================== diff --git a/maint/version.m4 b/maint/version.m4 index 84071229f5b..4e4b472c1da 100644 --- a/maint/version.m4 +++ b/maint/version.m4 @@ -14,7 +14,7 @@ # changing this by playing with diversions, but then we would probably be # playing with autotools-fire. -m4_define([MPICH_VERSION_m4],[4.2.1])dnl +m4_define([MPICH_VERSION_m4],[4.2.2])dnl m4_define([MPICH_RELEASE_DATE_m4],[unreleased development copy])dnl # For libtool ABI versioning rules see: @@ -36,7 +36,7 @@ m4_define([MPICH_RELEASE_DATE_m4],[unreleased development copy])dnl # standard, and does not include MPIX_ functions and C++ bindings. # Use [0:0:0] for unstable (e.g. alpha and beta) releases. -# last version: 4.2.1 - 16:1:4 +# last version: 4.2.2 - 16:2:4 m4_define([libmpi_so_version_m4],[0:0:0])dnl