Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ch4/shm: fix performance degradation on Sapphire Rapids with Intel Compiler #7150

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 81 additions & 3 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -906,11 +906,14 @@ for option in $enable_fast ; do
;;
alwaysinline) # No op in MPICH. See mpl/configure.ac
;;
sse2) # No op in MPICH. See mpl/configure.ac
sse2)
enable_fast_sse2_instr=yes
;;
avx) # No op in MPICH. See mpl/configure.ac
avx)
enable_fast_avx_instr=yes
;;
avx512f) # No op in MPICH. See mpl/configure.ac
avx512f)
enable_fast_avx512f_instr=yes
;;
all|yes)
enable_fast_ndebug=yes
Expand Down Expand Up @@ -955,6 +958,81 @@ if test -z "$enable_fast_no_strict_alignment" ; then
AC_DEFINE(NEEDS_STRICT_ALIGNMENT,1,[Define if strict alignment memory access is required])
fi

if test "$enable_fast_sse2_instr" = "yes" ; then
AC_CACHE_CHECK([whether -msse2 is supported], pac_cv_found_sse2,
[PAC_C_CHECK_COMPILER_OPTION([-msse2],pac_cv_found_sse2=yes,pac_cv_found_sse2=no)],
pac_cv_found_sse2=no,pac_cv_found_sse2=yes)
PAC_PUSH_FLAG([CFLAGS])
PAC_APPEND_FLAG([-msse2],[CFLAGS])
AC_CACHE_CHECK([whether SSE2 is supported by the CPU], pac_cv_found_sse2_runnable,[
AC_RUN_IFELSE([AC_LANG_SOURCE([[
#include <emmintrin.h>

int main() {
__m128i a = _mm_set1_epi32(1);
__asm__ volatile("" : : "x" (a) : "memory");
return 0;
}
]])], pac_cv_found_sse2_runnable="yes",
pac_cv_found_sse2_runnable="no",
pac_cv_found_sse2_runnable="unknown")
])
PAC_POP_FLAG([CFLAGS])
if test "$pac_cv_found_sse2" = "yes" && test "$pac_cv_found_sse2_runnable" = "yes"; then
PAC_APPEND_FLAG([-msse2],[CFLAGS])
fi
fi

if test "$enable_fast_avx_instr" = "yes" ; then
AC_CACHE_CHECK([whether -mavx is supported], pac_cv_found_avx,
[PAC_C_CHECK_COMPILER_OPTION([-mavx],pac_cv_found_avx=yes,pac_cv_found_avx=no)],
pac_cv_found_avx=no,pac_cv_found_avx=yes)
PAC_PUSH_FLAG([CFLAGS])
PAC_APPEND_FLAG([-mavx],[CFLAGS])
AC_CACHE_CHECK([whether AVX is supported by the CPU], pac_cv_found_avx_runnable,[
AC_RUN_IFELSE([AC_LANG_SOURCE([[
#include <immintrin.h>

int main() {
__m256i a = _mm256_set1_epi32(1);
__asm__ volatile("" : : "x" (a) : "memory");
return 0;
}
]])], pac_cv_found_avx_runnable="yes",
pac_cv_found_avx_runnable="no",
pac_cv_found_avx_runnable="unknown")
])
PAC_POP_FLAG([CFLAGS])
if test "$pac_cv_found_avx" = "yes" && test "$pac_cv_found_avx_runnable" = "yes"; then
PAC_APPEND_FLAG([-mavx],[CFLAGS])
fi
fi

if test "$enable_fast_avx512f_instr" = "yes" ; then
AC_CACHE_CHECK([whether -mavx512f is supported], pac_cv_found_avx512f,
[PAC_C_CHECK_COMPILER_OPTION([-mavx512f],pac_cv_found_avx512f=yes,pac_cv_found_avx512f=no)],
pac_cv_found_avx512f=no,pac_cv_found_avx512f=yes)
PAC_PUSH_FLAG([CFLAGS])
PAC_APPEND_FLAG([-mavx512f],[CFLAGS])
AC_CACHE_CHECK([whether AVX512F is supported by the CPU], pac_cv_found_avx512f_runnable,[
AC_RUN_IFELSE([AC_LANG_SOURCE([[
#include <immintrin.h>

int main() {
__m512i a = _mm512_set1_epi32(1);
__asm__ volatile("" : : "x" (a) : "memory");
return 0;
}
]])], pac_cv_found_avx512f_runnable="yes",
pac_cv_found_avx512f_runnable="no",
pac_cv_found_avx512f_runnable="unknown")
])
PAC_POP_FLAG([CFLAGS])
if test "$pac_cv_found_avx512f" = "yes" && test "$pac_cv_found_avx512f_runnable" = "yes"; then
PAC_APPEND_FLAG([-mavx512f],[CFLAGS])
fi
fi

# error-checking
# Change default into the specific value of the default
if test "$enable_error_checking" = "yes" ; then
Expand Down
2 changes: 1 addition & 1 deletion src/mpid/ch4/shm/posix/posix_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
- name : MPIR_CVAR_CH4_SHM_POSIX_TOPO_ENABLE
category : CH4
type : boolean
default : false
default : true
class : none
verbosity : MPI_T_VERBOSITY_USER_BASIC
scope : MPI_T_SCOPE_ALL_EQ
Expand Down
Loading