Skip to content

Commit

Permalink
Re-eable LL128 for gfx90a (#855)
Browse files Browse the repository at this point in the history
  • Loading branch information
wenkaidu authored Sep 29, 2023
1 parent 0491ffb commit f9012b6
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 7 deletions.
6 changes: 3 additions & 3 deletions src/collectives/device/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
{ __atomic_store_n((DST), (SRC), __ATOMIC_SEQ_CST); }
#endif

#ifdef ENABLE_LL128
#ifdef __gfx90a__
#define NCCL_FUNC5(func, algo, devredop, type, nullify) \
MACRO_IF(nullify, nullptr, NCCL_FUNC_NAME(func, algo, LL, devredop, type)), \
MACRO_IF(nullify, nullptr, NCCL_FUNC_NAME(func, algo, LL128, devredop, type)), \
Expand Down Expand Up @@ -549,7 +549,7 @@ __forceinline__ __device__ void ncclKernel(
#ifdef USE_INDIRECT_FUNCTION_CALL
ncclFuncs[ncclShmem.work.header.funcIndex]();
#else
#ifdef ENABLE_LL128
#ifdef __gfx90a__
NCCL_CALL_FUNCTIONS<1>(ncclShmem.work.header.funcIndex);
#else
NCCL_CALL_FUNCTIONS<0>(ncclShmem.work.header.funcIndex);
Expand Down Expand Up @@ -618,7 +618,7 @@ __device__ __attribute__((noinline)) void NCCL_FUNC_NAME(func, algo, proto, dev
#endif

// Only generate inline kernels for LL
#ifdef ENABLE_LL128
#ifdef __gfx90a__
#define IMPL_COLL4(func, algo, devredop, type) \
IMPL_COLL_FUNC(func, algo, LL, devredop, type) \
IMPL_COLL_FUNC(func, algo, LL128, devredop, type) \
Expand Down
4 changes: 0 additions & 4 deletions src/graph/tuning.cc
Original file line number Diff line number Diff line change
Expand Up @@ -409,13 +409,9 @@ ncclResult_t ncclTopoTuneModel(struct ncclComm* comm, int minCompCap, int maxCom
int pEnable = protoEnable[p];
if (pEnable == 2 && p == NCCL_PROTO_LL128) {
#if defined(__HIP_PLATFORM_HCC__) || defined(__HCC__) || defined(__HIPCC__)
#if defined(ENABLE_LL128)
// Enable LL128 by default only on gfx90a with available tuning table
pEnable = (graphs[a]->typeInter <= PATH_PXB) && graphs[a]->typeIntra <= PATH_NVL &&
(comm->topo->nodes[GPU].nodes[0].gpu.gcn == 910 && comm->topo->ll128Enabled) ? 1 : 0;
#else
pEnable = 0;
#endif
#else
// Enable LL128 by default only on Volta/Ampere/Hopper+NVLink. Other cases are not tested and may cause silent data corruption.
pEnable = 1;
Expand Down

0 comments on commit f9012b6

Please sign in to comment.