From f9012b6ffdf1293148dc41f599a4fa65e05b3e5c Mon Sep 17 00:00:00 2001 From: Wenkai Du <43822138+wenkaidu@users.noreply.github.com> Date: Fri, 29 Sep 2023 08:25:44 -0700 Subject: [PATCH] Re-eable LL128 for gfx90a (#855) --- src/collectives/device/common.h | 6 +++--- src/graph/tuning.cc | 4 ---- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/collectives/device/common.h b/src/collectives/device/common.h index dfda0292d..bf3e33d1f 100644 --- a/src/collectives/device/common.h +++ b/src/collectives/device/common.h @@ -28,7 +28,7 @@ { __atomic_store_n((DST), (SRC), __ATOMIC_SEQ_CST); } #endif -#ifdef ENABLE_LL128 +#ifdef __gfx90a__ #define NCCL_FUNC5(func, algo, devredop, type, nullify) \ MACRO_IF(nullify, nullptr, NCCL_FUNC_NAME(func, algo, LL, devredop, type)), \ MACRO_IF(nullify, nullptr, NCCL_FUNC_NAME(func, algo, LL128, devredop, type)), \ @@ -549,7 +549,7 @@ __forceinline__ __device__ void ncclKernel( #ifdef USE_INDIRECT_FUNCTION_CALL ncclFuncs[ncclShmem.work.header.funcIndex](); #else -#ifdef ENABLE_LL128 +#ifdef __gfx90a__ NCCL_CALL_FUNCTIONS<1>(ncclShmem.work.header.funcIndex); #else NCCL_CALL_FUNCTIONS<0>(ncclShmem.work.header.funcIndex); @@ -618,7 +618,7 @@ __device__ __attribute__((noinline)) void NCCL_FUNC_NAME(func, algo, proto, dev #endif // Only generate inline kernels for LL -#ifdef ENABLE_LL128 +#ifdef __gfx90a__ #define IMPL_COLL4(func, algo, devredop, type) \ IMPL_COLL_FUNC(func, algo, LL, devredop, type) \ IMPL_COLL_FUNC(func, algo, LL128, devredop, type) \ diff --git a/src/graph/tuning.cc b/src/graph/tuning.cc index 2a8a3d245..d20fd98ad 100644 --- a/src/graph/tuning.cc +++ b/src/graph/tuning.cc @@ -409,13 +409,9 @@ ncclResult_t ncclTopoTuneModel(struct ncclComm* comm, int minCompCap, int maxCom int pEnable = protoEnable[p]; if (pEnable == 2 && p == NCCL_PROTO_LL128) { #if defined(__HIP_PLATFORM_HCC__) || defined(__HCC__) || defined(__HIPCC__) -#if defined(ENABLE_LL128) // Enable LL128 by default only on gfx90a with available tuning table pEnable = (graphs[a]->typeInter <= PATH_PXB) && graphs[a]->typeIntra <= PATH_NVL && (comm->topo->nodes[GPU].nodes[0].gpu.gcn == 910 && comm->topo->ll128Enabled) ? 1 : 0; -#else - pEnable = 0; -#endif #else // Enable LL128 by default only on Volta/Ampere/Hopper+NVLink. Other cases are not tested and may cause silent data corruption. pEnable = 1;