From 6dca2cfc6d28b7b577507dcbc8ac0796d435e458 Mon Sep 17 00:00:00 2001 From: gilbertlee-amd <44450918+gilbertlee-amd@users.noreply.github.com> Date: Tue, 29 Aug 2023 11:31:51 -0600 Subject: [PATCH] More robust msccl shared directory location discovery (#869) --- src/misc/msccl/msccl_lifecycle.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/misc/msccl/msccl_lifecycle.cc b/src/misc/msccl/msccl_lifecycle.cc index e89a0b695..3853f324e 100644 --- a/src/misc/msccl/msccl_lifecycle.cc +++ b/src/misc/msccl/msccl_lifecycle.cc @@ -71,8 +71,8 @@ static const char* mscclAlgoDirEnv = "MSCCL_ALGO_DIR"; static const char* mscclAlgoDefaultDir = "msccl-algorithms"; extern "C" bool mscclUnitTestMode() __attribute__((__weak__)); static const char* mscclUnitTestAlgoDefaultDir = "msccl-unit-test-algorithms"; -static const char* mscclAlgoShareDirPath = "share/rccl/msccl-algorithms"; -static const char* mscclUnitTestAlgoShareDirPath = "share/rccl/msccl-unit-test-algorithms"; +static const char* mscclAlgoShareDirPath = "../share/rccl/msccl-algorithms"; +static const char* mscclUnitTestAlgoShareDirPath = "../share/rccl/msccl-unit-test-algorithms"; static ncclResult_t mscclInternalSchedulerInit() { mscclStatus& status = mscclGetStatus(); @@ -94,7 +94,7 @@ static ncclResult_t mscclInternalSchedulerInit() { mscclAlgoDirStr += (mscclUnitTestMode && mscclUnitTestMode()) ? mscclUnitTestAlgoDefaultDir : mscclAlgoDefaultDir; mscclAlgoDir = mscclAlgoDirStr.c_str(); // Get share Directory Paths - mscclAlgoShareDirStr = selfLibPath.substr(0, selfLibPath.rfind("lib")); + mscclAlgoShareDirStr = selfLibPath.substr(0, selfLibPath.find_last_of("/\\") + 1); mscclAlgoShareDirStr += (mscclUnitTestMode && mscclUnitTestMode()) ? mscclUnitTestAlgoShareDirPath : mscclAlgoShareDirPath; mscclAlgoShareDir = mscclAlgoShareDirStr.c_str(); } @@ -112,6 +112,7 @@ static ncclResult_t mscclInternalSchedulerInit() { } else { fullDirPath = mscclAlgoDir; } + INFO(NCCL_INIT, "Using MSCCL files from %s", fullDirPath); while ((entry = readdir(dp))) { if (entry->d_type != DT_LNK && entry->d_type != DT_REG) { continue;