Skip to content

Commit

Permalink
Merge pull request #1264 from nileshnegi/rocm-6.2-revert/consistent-c…
Browse files Browse the repository at this point in the history
…hannel-shuffling

[ROCm 6.2.0] Reverts to ensure consistent channel shuffling for MI300X multi-node
  • Loading branch information
mamaydeo authored Jul 22, 2024
2 parents 43b9844 + 816b391 commit 45b618a
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 166 deletions.
26 changes: 4 additions & 22 deletions src/graph/connect.cc
Original file line number Diff line number Diff line change
Expand Up @@ -624,29 +624,11 @@ ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, int* treePa
NCCLCHECK(connectRings(comm, ringRecv, ringSend, ringPrev, ringNext));
NCCLCHECK(connectTrees(comm, treeToParent, treeToChild0, treeToChild1, treePatterns));

// Define channels for non-gfx94 GPU architectures
int maxChannels = 2*CHANNEL_LIMIT;
int multiNodeNchannels = maxChannels;

// Define channels for gfx94 GPU architectures
if (IsArchMatch(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx94")) {
// Only use full MAXCHANNELS for gfx94x
maxChannels = MAXCHANNELS;

// Define channels=64 for gfx94 multi-node systems
multiNodeNchannels = 64;

// Check if NCCL_IB_GID_INDEX=3 -- needed for RoCE systems
const char* ncclIbGidIndex = ncclGetEnv("NCCL_IB_GID_INDEX");
int gid_index = 0;
if (ncclIbGidIndex) gid_index = atoi(ncclIbGidIndex);

// Limit channels=48 for RoCE gfx94 multi-node systems
multiNodeNchannels = gid_index == 3 ? 48 : multiNodeNchannels;
}
// Only use full MAXCHANNELS for gfx94x
int maxChannels = IsArchMatch(comm->topo->nodes[GPU].nodes[0].gpu.gcn, "gfx94") ? MAXCHANNELS : 2*CHANNEL_LIMIT;

if (graphs[NCCL_ALGO_RING]->nIntraChannels > 0 || comm->nNodes > 1) {
maxChannels = std::min(multiNodeNchannels, maxChannels);
maxChannels = std::min(64, maxChannels);
}

// Duplicate ringPrev/ringNext for ncclBuildRing
Expand Down Expand Up @@ -692,7 +674,7 @@ ncclResult_t ncclTopoPostset(struct ncclComm* comm, int* firstRanks, int* treePa

int minNchannels = ncclMinNchannels();
if (comm->nNodes > 1) {
minNchannels = std::min(multiNodeNchannels, minNchannels);
minNchannels = std::min(64, minNchannels);
}

if (mscclEnabled() && (comm->topo->mscclEnabled || mscclForceEnabled())) {
Expand Down
145 changes: 1 addition & 144 deletions src/graph/rome_models.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ THE SOFTWARE.
#include <algorithm>
#include <string.h>
#include "rome_models.h"
#include "param.h"

struct rcclRomeModel {
int nGpus;
Expand Down Expand Up @@ -813,7 +812,7 @@ static struct rcclRomeModel rome_model_81 = {
"N7 7 3 2 6 0 4 1 5 N5|"
"N1 1 0 2 4 3 5 7 6 N6|",

.options = "noCpuCheck=1,tuning=5,disableNumaMatching=1,isRoCE=0",
.options = "noCpuCheck=1,tuning=5,disableNumaMatching=1",
};

static struct rcclRomeModel rome_model_84 = {
Expand Down Expand Up @@ -842,114 +841,6 @@ static struct rcclRomeModel rome_model_85 = {
.options = "tuning=2",
};

static struct rcclRomeModel rome_model_86 = {
.nGpus = 8, .nCpus = 2, .nNics = 8, .nLinks = 7,
.gpuIds = { 0xc000, 0x22000, 0x38000, 0x5c000, 0x9f000, 0xaf000, 0xbf000, 0xdf000, },
.nicIds = { 0x7000, 0x1d000, 0x33000, 0x57000, 0x9a000, 0xaa000, 0xba000, 0xda000, },
.gpuNuma = { 0, 0, 0, 0, 1, 1, 1, 1, },
.nicNuma = { 0, 0, 0, 0, 1, 1, 1, 1, },
.connMatrix = { 0, 1, 1, 1, 1, 1, 1, 1,
1, 0, 1, 1, 1, 1, 1, 1,
1, 1, 0, 1, 1, 1, 1, 1,
1, 1, 1, 0, 1, 1, 1, 1,
1, 1, 1, 1, 0, 1, 1, 1,
1, 1, 1, 1, 1, 0, 1, 1,
1, 1, 1, 1, 1, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 0, },
.gdrLevel = {PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS,
PATH_PHB, PATH_PXB, PATH_PHB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS,
PATH_PHB, PATH_PHB, PATH_PXB, PATH_PHB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS,
PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS,
PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PXB, PATH_PHB, PATH_PHB, PATH_PHB,
PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PXB, PATH_PHB, PATH_PHB,
PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PXB, PATH_PHB,
PATH_SYS, PATH_SYS, PATH_SYS, PATH_SYS, PATH_PHB, PATH_PHB, PATH_PHB, PATH_PXB, },
.pattern = "4444",
.ringBase = "N0 0 1 2 3 4 5 6 7 N7|"
"N1 1 0 2 4 3 5 7 6 N6|"
"N2 2 5 0 3 6 1 7 4 N4|"
"N3 3 7 0 4 2 1 6 5 N5|"
"N4 4 6 2 7 3 0 5 1 N1|"
"N5 5 4 7 1 3 2 6 0 N0|"
"N6 6 3 1 4 0 7 5 2 N2|"
"N7 7 2 0 6 4 1 5 3 N3|"

"N0 0 1 2 3 4 5 6 7 N7|"
"N1 1 0 2 4 3 5 7 6 N6|"
"N2 2 5 0 3 6 1 7 4 N4|"
"N3 3 7 0 4 2 1 6 5 N5|"
"N4 4 6 2 7 3 0 5 1 N1|"
"N5 5 4 7 1 3 2 6 0 N0|"
"N6 6 3 1 4 0 7 5 2 N2|"
"N7 7 2 0 6 4 1 5 3 N3|"

"N0 0 1 2 3 4 5 6 7 N7|"
"N1 1 0 2 4 3 5 7 6 N6|"
"N2 2 5 0 3 6 1 7 4 N4|"
"N3 3 7 0 4 2 1 6 5 N5|"
"N4 4 6 2 7 3 0 5 1 N1|"
"N5 5 4 7 1 3 2 6 0 N0|"
"N6 6 3 1 4 0 7 5 2 N2|"
"N7 7 2 0 6 4 1 5 3 N3",

.ringTail2 = "N7 7 4 1 3 2 0 6 5 N5|"
"N6 6 3 0 7 5 1 4 2 N2|"
"N4 4 6 2 1 7 0 5 3 N3|"
"N5 5 2 7 3 1 6 0 4 N4|"
"N1 1 0 2 4 3 5 7 6 N6|"
"N0 0 1 2 3 4 5 6 7 N7|"
"N2 2 5 0 3 6 4 7 1 N1|"
"N3 3 7 2 6 1 5 4 0 N0|"

"N7 7 4 1 3 2 0 6 5 N5|"
"N6 6 3 0 7 5 1 4 2 N2|"
"N4 4 6 2 1 7 0 5 3 N3|"
"N5 5 2 7 3 1 6 0 4 N4|"
"N1 1 0 2 4 3 5 7 6 N6|"
"N0 0 1 2 3 4 5 6 7 N7|"
"N2 2 5 0 3 6 4 7 1 N1|"
"N3 3 7 2 6 1 5 4 0 N0|"

"N7 7 4 1 3 2 0 6 5 N5|"
"N6 6 3 0 7 5 1 4 2 N2|"
"N4 4 6 2 1 7 0 5 3 N3|"
"N5 5 2 7 3 1 6 0 4 N4|"
"N1 1 0 2 4 3 5 7 6 N6|"
"N0 0 1 2 3 4 5 6 7 N7|"
"N2 2 5 0 3 6 4 7 1 N1|"
"N3 3 7 2 6 1 5 4 0 N0",


.ringTail1 = "N5 5 4 2 7 1 6 3 0 N0|"
"N2 2 5 0 3 7 4 6 1 N1|"
"N3 3 6 4 0 5 1 7 2 N2|"
"N4 4 7 0 6 5 2 1 3 N3|"
"N6 6 2 0 7 5 3 1 4 N4|"
"N7 7 3 2 6 0 4 1 5 N5|"
"N1 1 0 2 4 3 5 7 6 N6|"
"N0 0 1 2 3 4 5 6 7 N7|"

"N5 5 4 2 7 1 6 3 0 N0|"
"N2 2 5 0 3 7 4 6 1 N1|"
"N3 3 6 4 0 5 1 7 2 N2|"
"N4 4 7 0 6 5 2 1 3 N3|"
"N6 6 2 0 7 5 3 1 4 N4|"
"N7 7 3 2 6 0 4 1 5 N5|"
"N1 1 0 2 4 3 5 7 6 N6|"
"N0 0 1 2 3 4 5 6 7 N7|"

"N5 5 4 2 7 1 6 3 0 N0|"
"N2 2 5 0 3 7 4 6 1 N1|"
"N3 3 6 4 0 5 1 7 2 N2|"
"N4 4 7 0 6 5 2 1 3 N3|"
"N6 6 2 0 7 5 3 1 4 N4|"
"N7 7 3 2 6 0 4 1 5 N5|"
"N1 1 0 2 4 3 5 7 6 N6|"
"N0 0 1 2 3 4 5 6 7 N7",

.options = "noCpuCheck=1,tuning=5,disableNumaMatching=1,isRoCE=1",
};


static struct rcclRomeModel romeTopoModels[] = {
rome_model_22, /* 0 */
Expand Down Expand Up @@ -995,7 +886,6 @@ static struct rcclRomeModel romeTopoModels[] = {
rome_model_81, /* 40 */
rome_model_84, /* 41 */
rome_model_85, /* 42 */
rome_model_86, /* 43 */
};

/* Parse user defined rings. Format is like :
Expand Down Expand Up @@ -1283,27 +1173,6 @@ static bool checkOption(const char *options, const char *name) {
return false;
}

static int checkOptionValue(const char *options, const char *name) {
if (strcmp(options, "")) {
char *str_temp = (char *)malloc(strlen(options) + 1);
strcpy(str_temp, options);
char* tokens[MAX_OPT_TOKENS];
int numTokens = 0;
char* state;
tokens[numTokens] = strtok_r(str_temp, "=, ", &state);
numTokens++;
while (tokens[numTokens-1] != NULL && numTokens < MAX_OPT_TOKENS)
tokens[numTokens++] = strtok_r(NULL, "=, ", &state);
for (int i = 0; i < numTokens/2; i++) {
if (strcmp(tokens[i*2], name) == 0) {
return atol(tokens[i*2+1]);
}
}
free(str_temp);
}
return -2;
}

ncclResult_t parseChordalRing(struct ncclTopoSystem* system, struct ncclTopoGraph* graph) {
static const char *ringBase = "0 1 2 3 5 4 7 6|0 2 4 1 7 3 6 5|0 3 1 5 7 2 6 4|0 6 7 4 5 3 2 1|0 5 6 3 7 1 4 2|0 4 6 2 7 5 1 3";
int id[8], dist[8];
Expand Down Expand Up @@ -1667,24 +1536,12 @@ ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, struct ncclTopoGraph*
}
if (i < romeTopo.nGpus) match_nbio = false;

// check if NCCL_IB_GID_INDEX=3 -- needed for RoCE systems
const char* ncclIbGidIndex = ncclGetEnv("NCCL_IB_GID_INDEX");
int gid_index = 0;
if (ncclIbGidIndex) gid_index = atoi(ncclIbGidIndex);
int isRoCE = gid_index == 3 ? 1 : 0;

for (i = 0; i < sizeof(romeTopoModels)/sizeof(romeTopoModels[0]); i++) {
bool ignore_cpu = checkOption(romeTopoModels[i].options, "noCpuCheck");
if (!ignore_cpu && (arch != NCCL_TOPO_CPU_ARCH_X86 || vendor != NCCL_TOPO_CPU_VENDOR_AMD || model != NCCL_TOPO_CPU_TYPE_ROME))
continue;

bool ignore_numa = checkOption(romeTopoModels[i].options, "disableNumaMatching");
if (!ignore_numa && romeTopo.nCpus != romeTopoModels[i].nCpus) continue;

// check if "isRoCE=1" is defined in model struct options
int optionsIsRoCE = checkOptionValue(romeTopoModels[i].options, "isRoCE");
if (optionsIsRoCE != -2 && optionsIsRoCE != isRoCE) continue;

if (romeTopo.nGpus != romeTopoModels[i].nGpus ||
romeTopo.nNics != romeTopoModels[i].nNics || romeTopo.nLinks != romeTopoModels[i].nLinks) continue;
if (!ignore_numa && strcmp(romeTopoModels[i].pattern, pattern)) continue;
Expand Down

0 comments on commit 45b618a

Please sign in to comment.