From 937aec99f0ecdc5e5d000d3cef667941cec328de Mon Sep 17 00:00:00 2001 From: Wenkai Du <43822138+wenkaidu@users.noreply.github.com> Date: Fri, 13 Nov 2020 17:58:04 -0800 Subject: [PATCH] Use device's link width and speed if port doesn't report (#304) (cherry picked from commit 554729079d98a1d95eaf69ded5f8b2f286532dd8) --- src/graph/xml.cc | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/graph/xml.cc b/src/graph/xml.cc index a1f936bab..3bf4fa783 100644 --- a/src/graph/xml.cc +++ b/src/graph/xml.cc @@ -492,7 +492,10 @@ ncclResult_t ncclTopoGetXmlFromSys(struct ncclXmlNode* pciNode, struct ncclXml* char portSpeedStr[MAX_STR_LEN]; float portSpeed; NCCLCHECK(ncclTopoGetStrFromSys(path, "../max_link_speed", portSpeedStr)); - sscanf(portSpeedStr, "%f GT/s", &portSpeed); + if (portSpeedStr[0]) + sscanf(portSpeedStr, "%f GT/s", &portSpeed); + else + portSpeed = deviceSpeed; NCCLCHECK(xmlSetAttr(pciNode, "link_speed", portSpeed < deviceSpeed ? portSpeedStr : deviceSpeedStr)); } NCCLCHECK(xmlGetAttrIndex(pciNode, "link_width", &index)); @@ -502,7 +505,11 @@ ncclResult_t ncclTopoGetXmlFromSys(struct ncclXmlNode* pciNode, struct ncclXml* NCCLCHECK(ncclTopoGetStrFromSys(path, "max_link_width", strValue)); int deviceWidth = strtol(strValue, NULL, 0); NCCLCHECK(ncclTopoGetStrFromSys(path, "../max_link_width", strValue)); - int portWidth = strtol(strValue, NULL, 0); + int portWidth; + if (strValue[0]) + portWidth = strtol(strValue, NULL, 0); + else + portWidth = deviceWidth; NCCLCHECK(xmlSetAttrInt(pciNode, "link_width", std::min(deviceWidth,portWidth))); } struct ncclXmlNode* parent = pciNode->parent;