From 05bca95f0275578fd7d8eba2ae7f7f432bad3480 Mon Sep 17 00:00:00 2001 From: akolliasAMD <99202231+akolliasAMD@users.noreply.github.com> Date: Wed, 19 Jun 2024 09:27:29 -0600 Subject: [PATCH] BUILD: add support for specific gpu arch with ROCM (#987) --- config/m4/rocm.m4 | 35 ++++++++++++++++++++--- configure.ac | 3 ++ cuda_lt.sh | 4 +-- src/components/ec/rocm/kernel/Makefile.am | 4 +-- 4 files changed, 38 insertions(+), 8 deletions(-) diff --git a/config/m4/rocm.m4 b/config/m4/rocm.m4 index c7c4ca141d..8a2bdc2fb9 100644 --- a/config/m4/rocm.m4 +++ b/config/m4/rocm.m4 @@ -4,6 +4,17 @@ # See file LICENSE for terms. # +ROCM_ARCH_NATIVE="--offload-arch=native" +ROCM_ARCH908="--offload-arch=gfx908" +ROCM_ARCH90A="--offload-arch=gfx90a" +ROCM_ARCH94="--offload-arch=gfx940 \ +--offload-arch=gfx941 \ +--offload-arch=gfx942" +ROCM_ARCH10="--offload-arch=gfx1030" +ROCM_ARCH11="--offload-arch=gfx1100 \ +--offload-arch=gfx1101 \ +--offload-arch=gfx1102" + # ROCM_PARSE_FLAGS(ARG, VAR_LIBS, VAR_LDFLAGS, VAR_CPPFLAGS) # ---------------------------------------------------------- # Parse whitespace-separated ARG into appropriate LIBS, LDFLAGS, and @@ -73,7 +84,12 @@ AC_ARG_WITH([rocm], [Enable the use of ROCm (default is autodetect).])], [], [with_rocm=guess]) - +AC_ARG_WITH([rocm-arch], + [AS_HELP_STRING([--with-rocm-arch=arch-code], + [Defines target GPU architecture, + see rocm documentation for valid --offload-arch options for details + 'all-arch-no-native' for all default architectures but not native])], + [], [with_rocm_arch=all]) rocm_happy=no hip_happy=no AS_IF([test "x$with_rocm" != "xno"], @@ -115,15 +131,26 @@ AS_IF([test "x$with_rocm" != "xno"], AC_SUBST([ROCM_ROOT])], [AC_MSG_WARN([ROCm not found])]) + + # Check whether we run on ROCm 6.0 or higher + CHECK_ROCM_VERSION(6, ROCM_VERSION_60_OR_GREATER) + AC_MSG_CHECKING([if ROCm version is 6.0 or above]) + + AS_IF([test "x$rocm_happy" = "xyes"], + [AS_IF([test "x$with_rocm_arch" = "xall"], + [ROCM_ARCH="${ROCM_ARCH908} ${ROCM_ARCH90A} ${ROCM_ARCH94} ${ROCM_ARCH10} ${ROCM_ARCH11} ${ROCM_ARCH_NATIVE}"], + [AS_IF([test "x$with_rocm_arch" = "xall-arch-no-native"], + [ROCM_ARCH="${ROCM_ARCH908} ${ROCM_ARCH90A} ${ROCM_ARCH94} ${ROCM_ARCH10} ${ROCM_ARCH11}"], + [ROCM_ARCH="$with_rocm_arch"])]) + AS_IF([test "$ROCM_VERSION_60_OR_GREATER" = "1"], + AC_SUBST([ROCM_ARCH], ["$ROCM_ARCH"]), + AC_SUBST([ROCM_ARCH], [""]))]) CPPFLAGS="$SAVE_CPPFLAGS" LDFLAGS="$SAVE_LDFLAGS" LIBS="$SAVE_LIBS" HIP_BUILD_FLAGS([$with_rocm], [HIP_LIBS], [HIP_LDFLAGS], [HIP_CPPFLAGS]) - # Check whether we run on ROCm 6.0 or higher - CHECK_ROCM_VERSION(6, ROCM_VERSION_60_OR_GREATER) - AC_MSG_CHECKING([if ROCm version is 6.0 or above]) if test "$ROCM_VERSION_60_OR_GREATER" = "1" ; then AC_MSG_RESULT([yes]) else diff --git a/configure.ac b/configure.ac index 246e297c90..0022b147fb 100644 --- a/configure.ac +++ b/configure.ac @@ -265,6 +265,9 @@ AC_MSG_NOTICE([ C++ compiler: ${CXX} ${CXXFLAGS} ${BASE_CXXFLAGS}]) AS_IF([test "x$cuda_happy" = "xyes"],[ AC_MSG_NOTICE([ NVCC gencodes: ${NVCC_ARCH}]) ]) +AS_IF([test "x$rocm_happy" = xyes],[ +AC_MSG_NOTICE([ROCM architectures: ${ROCM_ARCH}]) +]) AC_MSG_NOTICE([ Perftest: ${mpi_enable}]) AC_MSG_NOTICE([ Gtest: ${gtest_enable}]) AC_MSG_NOTICE([ MC modules: <$(echo ${mc_modules}|tr ':' ' ') >]) diff --git a/cuda_lt.sh b/cuda_lt.sh index 6601e3edcb..9b7b505d6f 100755 --- a/cuda_lt.sh +++ b/cuda_lt.sh @@ -28,7 +28,7 @@ mkdir -p $pic_dir tmpcmd="${@:3}" if [[ "$tmpcmd" == *"amdclang"* ]]; then - cmd="${@:3:2} -x hip -target x86_64-unknown-linux-gnu --offload-arch=gfx908 --offload-arch=gfx90a --offload-arch=gfx940 --offload-arch=gfx941 --offload-arch=gfx942 --offload-arch=gfx1030 --offload-arch=gfx1100 --offload-arch=gfx1101 --offload-arch=gfx1102 --offload-arch=native ${@:5} -fPIC -O3 -o ${pic_filepath}" + cmd="${@:3:2} -x hip -target x86_64-unknown-linux-gnu ${@:5} -fPIC -O3 -o ${pic_filepath}" elif [[ "$tmpcmd" == *"hipcc"* ]]; then cmd="${@:3} -fPIC -o ${pic_filepath}" else @@ -38,7 +38,7 @@ echo $cmd $cmd if [[ "$tmpcmd" == *"amdclang"* ]]; then - cmd="${@:3:2} -x hip -target x86_64-unknown-linux-gnu --offload-arch=gfx908 --offload-arch=gfx90a --offload-arch=gfx940 --offload-arch=gfx941 --offload-arch=gfx942 --offload-arch=gfx1030 --offload-arch=gfx1100 --offload-arch=gfx1101 --offload-arch=gfx1102 --offload-arch=native ${@:5} -O3 -o ${npic_filepath}" + cmd="${@:3:2} -x hip -target x86_64-unknown-linux-gnu ${@:5} -O3 -o ${npic_filepath}" else cmd="${@:3} -o ${npic_filepath}" fi diff --git a/src/components/ec/rocm/kernel/Makefile.am b/src/components/ec/rocm/kernel/Makefile.am index 6f95d2b33b..17c00bf95d 100644 --- a/src/components/ec/rocm/kernel/Makefile.am +++ b/src/components/ec/rocm/kernel/Makefile.am @@ -17,10 +17,10 @@ HIPCCFLAGS = \ LINK = $(LIBTOOL) --mode=link $(CC) -o $@ .cu.o: - $(HIPCC) -c $< -o $@ $(HIPCCFLAGS) + $(HIPCC) -c $< -o $@ $(ROCM_ARCH) $(HIPCCFLAGS) .cu.lo: - /bin/bash $(top_srcdir)/cuda_lt.sh "$(LIBTOOL)" $@ $(HIPCC) -c $< $(HIPCCFLAGS) + /bin/bash $(top_srcdir)/cuda_lt.sh "$(LIBTOOL)" $@ $(HIPCC) -c $< $(ROCM_ARCH) $(HIPCCFLAGS) comp_noinst = libucc_ec_rocm_kernels.la