diff --git a/src/graph/search.cc b/src/graph/search.cc
index 2072bfd14..6ba948272 100644
--- a/src/graph/search.cc
+++ b/src/graph/search.cc
@@ -882,6 +882,8 @@ static ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, char **str) {
static const char *ringBase_10302120_2 = "6 4 7 5 0 1 3 2|6 5 7 4 2 3 1 0";
static const char *ringBase_11303011_1 = "2 1 0 3 6 7 5 4|7 6 4 5 1 2 3 0";
static const char *ringBase_11303011_2 = "0 6 2 3 1 7 5 4|7 1 4 5 6 0 3 2";
+ static const char *ringBase_0110201010200110_1 = "1 2 3 0 6 4 5 7|4 6 7 5 2 1 0 3";
+ static const char *ringBase_0110201010200110_2 = "3 0 6 2 1 4 5 7|4 1 0 3 2 6 7 5";
static const char *ringBase;
static char ringRemap[64];
int id[8], dist[8];
@@ -891,7 +893,7 @@ static ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, char **str) {
int ngpus = system->nodes[GPU].count;
int ncpus = system->nodes[CPU].count;
// 8 GPUs and 4 numa nodes only
- if (ngpus != 8 || ncpus != 4)
+ if (ngpus != 8 || (ncpus != 4 && ncpus != 8))
return ncclSuccess;
// only valid on Rome
int arch, vendor, model;
@@ -899,14 +901,14 @@ static ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, char **str) {
if (arch != NCCL_TOPO_CPU_ARCH_X86 || vendor != NCCL_TOPO_CPU_VENDOR_AMD || model != NCCL_TOPO_CPU_TYPE_ROME)
return ncclSuccess;
// number of GPUs and NICs on each numa node is used as first screening pattern
- char pattern[9];
- for (int i = 0; i < ncpus; i++) {
+ char pattern[256];
+ for (i = 0; i < ncpus; i++) {
int g, n;
if (!getGpuNetCount(system, i, &g, &n)) return ncclSuccess;
pattern[i*2] = '0' + g;
pattern[i*2+1] = '0' + n;
}
- pattern[8] = 0;
+ pattern[i*2] = 0;
int g[8], h1[4], h2[4];
for (int i = 0; i <8; i++) g[i] = -1;
if (strcmp(pattern, "10302120") == 0) {
@@ -961,6 +963,26 @@ static ncclResult_t parseRome4P2H(struct ncclTopoSystem* system, char **str) {
ringBase = ringBase_11303011_1;
}
}
+ else if (strcmp(pattern, "0110201010200110") == 0) {
+ if (findGpuByXGMI(system, 2, 5, &g[2], &g[6], 1, -1, -1)) {
+ if (!findGpuByXGMI(system, 4, 2, &g[4], &g[1], 1, g[6], g[2])) return ncclSuccess;
+ if (!findGpuByXGMI(system, 1, 3, &g[0], &g[3], 0, -1, -1)) return ncclSuccess;
+ if (!findGpuByXGMI(system, 7, 5, &g[7], &g[5], 1, -1, -1)) return ncclSuccess;
+ h1[0] = g[0]; h1[1] = g[3]; h1[2] = g[2]; h1[3] = g[6];
+ h2[0] = g[1]; h2[1] = g[4]; h2[2] = g[5]; h2[3] = g[7];
+ ringBase = ringBase_0110201010200110_2;
+ } else {
+ if (!findGpuByXGMI(system, 1, 2, &g[0], &g[1], 1, -1, -1)) return ncclSuccess;
+ if (!findGpuByXGMI(system, 1, 3, &g[0], &g[3], 0, -1, -1)) return ncclSuccess;
+ if (!findGpuByXGMI(system, 2, 2, &g[1], &g[2], -1, -1, -1)) return ncclSuccess;
+ if (!findGpuByXGMI(system, 7, 5, &g[7], &g[5], -1, -1, -1)) return ncclSuccess;
+ if (!findGpuByXGMI(system, 7, 5, &g[7], &g[6], -1, -1, g[5])) return ncclSuccess;
+ if (!findGpuByXGMI(system, 4, 5, &g[4], &g[5], -1, -1, -1)) return ncclSuccess;
+ h1[0] = g[0]; h1[1] = g[1]; h1[2] = g[2]; h1[3] = g[3];
+ h2[0] = g[4]; h2[1] = g[5]; h2[2] = g[7]; h2[3] = g[6];
+ ringBase = ringBase_0110201010200110_1;
+ }
+}
else
return ncclSuccess;
diff --git a/tools/scripts/topo_val.sh b/tools/scripts/topo_val.sh
index b103ebb0d..e0f2b3c15 100755
--- a/tools/scripts/topo_val.sh
+++ b/tools/scripts/topo_val.sh
@@ -21,7 +21,7 @@
DIR="$(cd -P "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-for i in {0..29}
+for i in {0..33}
do
$DIR/../topo_expl/topo_expl -m $i > "topo_m$i.log"
$DIR/../TopoVisual/topo_visual.sh -i "topo_m$i.log"
diff --git a/tools/topo_expl/models/topo_8p_ts1_n4.xml b/tools/topo_expl/models/topo_8p_ts1_n4.xml
new file mode 100644
index 000000000..b206e0cc5
--- /dev/null
+++ b/tools/topo_expl/models/topo_8p_ts1_n4.xml
@@ -0,0 +1,92 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tools/topo_expl/models/topo_8p_ts1_n4_1.xml b/tools/topo_expl/models/topo_8p_ts1_n4_1.xml
new file mode 100644
index 000000000..ed0632b27
--- /dev/null
+++ b/tools/topo_expl/models/topo_8p_ts1_n4_1.xml
@@ -0,0 +1,92 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/tools/topo_expl/topo_expl.cpp b/tools/topo_expl/topo_expl.cpp
index aedfc6505..2ab829b64 100644
--- a/tools/topo_expl/topo_expl.cpp
+++ b/tools/topo_expl/topo_expl.cpp
@@ -99,6 +99,10 @@ NodeModelDesc model_descs[] = {
{4, "topo_8p_ts1_1.xml", "4 nodes 8 VEGA20 TS1 Alt. Model"},
{1, "topo_4p3l_2h.xml", "single node 8 gfx908 Rome"},
{4, "topo_4p3l_2h.xml", "4 nodes 8 gfx908 Rome"},
+ {1, "topo_8p_ts1_n4.xml", "single node 8 VEGA20 TS1 NPS=4"},
+ {4, "topo_8p_ts1_n4.xml", "4 nodes 8 VEGA20 TS1 NPS=4"},
+ {1, "topo_8p_ts1_n4_1.xml", "single node 8 VEGA20 TS1 NPS=4 Alt. Model"},
+ {4, "topo_8p_ts1_n4_1.xml", "4 nodes 8 VEGA20 TS1 NPS=4 Alt. Model"},
};
int main(int argc,char* argv[])