NVMLによるGPUのモニタリングを行わないようにするオプションを追加。(--disable-nvml)

rigaya · Nov 28, 2023 · 4c1423b · 4c1423b
1 parent 5ce6292
commit 4c1423b
Show file tree

Hide file tree

Showing 12 changed files with 81 additions and 24 deletions.
diff --git a/NVEncC/NVEncC.vcxproj b/NVEncC/NVEncC.vcxproj
@@ -514,7 +514,7 @@ copy /y "$(SolutionDir)ffmpeg_lgpl\lib\$(PlatformName)\libvmaf.dll" "$(OutDir)"
       <EnableEnhancedInstructionSet>StreamingSIMDExtensions</EnableEnhancedInstructionSet>
       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
-      <OmitFramePointers>true</OmitFramePointers>
+      <OmitFramePointers>false</OmitFramePointers>
       <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
       <MultiProcessorCompilation>true</MultiProcessorCompilation>
       <LanguageStandard>stdcpp17</LanguageStandard>
@@ -569,7 +569,7 @@ copy /y "$(SolutionDir)ffmpeg_lgpl\lib\$(PlatformName)\libass-*.dll" "$(OutDir)"
       <StringPooling>true</StringPooling>
       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
-      <OmitFramePointers>true</OmitFramePointers>
+      <OmitFramePointers>false</OmitFramePointers>
       <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
       <MultiProcessorCompilation>true</MultiProcessorCompilation>
       <LanguageStandard>stdcpp17</LanguageStandard>

diff --git a/NVEncC_Options.en.md b/NVEncC_Options.en.md
@@ -213,6 +213,7 @@
   - [--vpp-nvvfx-model-dir \<string\>](#--vpp-nvvfx-model-dir-string)
 - [Other Options](#other-options)
   - [--cuda-schedule \<string\>](#--cuda-schedule-string)
+  - [--disable-nvml \<int\>](#--disable-nvml-int)
   - [--output-buf \<int\>](#--output-buf-int)
   - [--output-thread \<int\>](#--output-thread-int)
   - [--log \<string\>](#--log-string)
@@ -2529,7 +2530,7 @@ Set path to the model folder of Video Effect models.
 ### --cuda-schedule &lt;string&gt;
   Change the behavior of the CPU when waiting for GPU task completion. The default is auto.
 
-- paramters
+- **paramters**
   - auto (default)
     Leave the mode decision to the driver of CUDA.
   
@@ -2542,6 +2543,19 @@ Set path to the model folder of Video Effect models.
   - sync
     Sleep a thread until the end of the GPU task. Performance might decrease, but will reduce CPU utilization especially when decoding is done by HW.
 
+### --disable-nvml &lt;int&gt;
+Disable NVML GPU monitoring。
+
+- **Paramters**
+  - 0 (default)  
+    Enable NVML.
+
+  - 1
+    Disable NVML when system has one CUDA devices.
+
+  - 2
+    Always disable NVML.
+
 ### --output-buf &lt;int&gt;
 Specify the output buffer size in MB. The default is 8 and the maximum value is 128.
 

diff --git a/NVEncC_Options.ja.md b/NVEncC_Options.ja.md
@@ -209,6 +209,7 @@
   - [--vpp-nvvfx-model-dir \<string\>](#--vpp-nvvfx-model-dir-string)
 - [制御系のオプション](#制御系のオプション)
   - [--cuda-schedule \<string\>](#--cuda-schedule-string)
+  - [--disable-nvml \<int\>](#--disable-nvml-int)
   - [--output-buf \<int\>](#--output-buf-int)
   - [--output-thread \<int\>](#--output-thread-int)
   - [--log \<string\>](#--log-string)
@@ -2603,6 +2604,19 @@ NVIDIA MAXINE VideoEffects のモデルを格納しているフォルダの場
   - sync  
  GPUタスクの終了まで、スレッドをスリープさせる。性能が落ちる可能性があるかわりに、特にHWデコード使用時に、CPU使用率を大きく削減する。
 
+### --disable-nvml &lt;int&gt;
+NVMLによるGPUモニタリングの無効化について指定する。デフォルトは0 (無効化しない)。
+
+- **パラメータ**
+  - 0 (デフォルト)  
+    NVMLを無効化しない。
+
+  - 1
+    CUDAを実行できるGPUが1つの場合にNVMLを無効化する。
+
+  - 2
+    常にNVMLを無効化する。
+
 ### --output-buf &lt;int&gt;
 出力バッファサイズをMB単位で指定する。デフォルトは8、最大値は128。0で使用しない。
 

diff --git a/NVEncCore/NVEncCmd.cpp b/NVEncCore/NVEncCmd.cpp
@@ -323,6 +323,8 @@ tstring encoder_help() {
         _T("       sync  : CPU will sleep when waiting GPU tasks, performance might\n")
         _T("                drop slightly, while CPU utilization will be lower,\n")
         _T("                especially on HW decode mode.\n"));
+    str += _T("")
+        _T("   --disable-nvml <int>        disable NVML GPU monitoring (default 0, 0-2)\n");
     str += gen_cmd_help_ctrl();
     return str;
 }
@@ -1619,6 +1621,20 @@ int parse_one_option(const TCHAR *option_name, const TCHAR* strInput[], int& i,
         pParams->sessionRetry = value;
         return 0;
     }
+    if (IS_OPTION("disable-nvml")) {
+        i++;
+        int value = 0;
+        if (1 != _stscanf_s(strInput[i], _T("%d"), &value)) {
+            print_cmd_error_invalid_value(option_name, strInput[i]);
+            return 1;
+        }
+        if (value < 0) {
+            print_cmd_error_invalid_value(option_name, strInput[i], _T("disable-nvml should be specified in positive value."));
+            return 1;
+        }
+        pParams->disableNVML = value;
+        return 0;
+    }
 
     auto ret = parse_one_input_option(option_name, strInput, i, nArgNum, &pParams->input, &pParams->inprm, argData);
     if (ret >= 0) return ret;
@@ -2054,6 +2070,7 @@ tstring gen_cmd(const InEncodeVideoParam *pParams, const NV_ENC_CODEC_CONFIG cod
 
     OPT_LST(_T("--cuda-schedule"), cudaSchedule, list_cuda_schedule);
     OPT_NUM(_T("--session-retry"), sessionRetry);
+    OPT_NUM(_T("--disable-nvml"), disableNVML);
 
     cmd << gen_cmd(&pParams->ctrl, &encPrmDefault.ctrl, save_disabled_prm);
 

diff --git a/NVEncCore/NVEncCore.cpp b/NVEncCore/NVEncCore.cpp
@@ -904,8 +904,9 @@ NVENCSTATUS NVEncCore::GPUAutoSelect(std::vector<std::unique_ptr<NVGPUInfo>> &gp
         NVMLMonitorInfo info;
 #if ENABLE_NVML
         NVMLMonitor monitor;
-        auto nvml_ret = monitor.Init(gpu->pciBusId());
-        if (nvml_ret == NVML_SUCCESS
+        auto nvml_ret = NVML_SUCCESS;
+        if (gpu->pciBusId().length() > 0
+            && (nvml_ret = monitor.Init(gpu->pciBusId())) == NVML_SUCCESS
             && monitor.getData(&info) == NVML_SUCCESS) {
 #else
         NVSMIInfo nvsmi;
@@ -3253,7 +3254,7 @@ NVENCSTATUS NVEncCore::InitEncode(InEncodeVideoParam *inputParam) {
 
     //デコーダが使用できるか確認する必要があるので、先にGPU関係の情報を取得しておく必要がある
     std::vector<std::unique_ptr<NVGPUInfo>> gpuList;
-    if (NV_ENC_SUCCESS != (nvStatus = InitDeviceList(gpuList, m_cudaSchedule, inputParam->ctrl.skipHWDecodeCheck))) {
+    if (NV_ENC_SUCCESS != (nvStatus = InitDeviceList(gpuList, m_cudaSchedule, inputParam->ctrl.skipHWDecodeCheck, inputParam->disableNVML))) {
         PrintMes(RGY_LOG_ERROR, FOR_AUO ? _T("Cudaの初期化に失敗しました。\n") : _T("Failed to initialize CUDA.\n"));
         return nvStatus;
     }

diff --git a/NVEncCore/NVEncCore.vcxproj b/NVEncCore/NVEncCore.vcxproj
@@ -478,7 +478,7 @@ if exist rgy_rev.h.%PID%.tmp del rgy_rev.h.%PID%.tmp &gt; nul 2&gt;&amp;1</Comma
       <DisableSpecificWarnings>4505;4512</DisableSpecificWarnings>
       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
-      <OmitFramePointers>true</OmitFramePointers>
+      <OmitFramePointers>false</OmitFramePointers>
       <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
       <StringPooling>true</StringPooling>
       <BufferSecurityCheck>false</BufferSecurityCheck>
@@ -532,7 +532,7 @@ if exist rgy_rev.h.%PID%.tmp del rgy_rev.h.%PID%.tmp &gt; nul 2&gt;&amp;1</Comma
       <DisableSpecificWarnings>4505;4512</DisableSpecificWarnings>
       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
-      <OmitFramePointers>true</OmitFramePointers>
+      <OmitFramePointers>false</OmitFramePointers>
       <EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
       <StringPooling>true</StringPooling>
       <BufferSecurityCheck>false</BufferSecurityCheck>

diff --git a/NVEncCore/NVEncDevice.cpp b/NVEncCore/NVEncDevice.cpp
@@ -939,7 +939,7 @@ const NVEncCodecFeature *NVEncoder::getCodecFeature(const GUID &codec) {
     return nullptr;
 }
 
-RGY_ERR NVGPUInfo::initDevice(int deviceID, CUctx_flags ctxFlags, bool error_if_fail, bool skipHWDecodeCheck) {
+RGY_ERR NVGPUInfo::initDevice(int deviceID, CUctx_flags ctxFlags, bool error_if_fail, bool skipHWDecodeCheck, bool disableNVML) {
 #define GETATTRIB_CHECK(val, attrib, dev) { \
         cudaError_t cuErr = cudaDeviceGetAttribute(&(val), (attrib), (dev)); \
         if (cuErr == cudaErrorInvalidDevice || cuErr == cudaErrorInvalidValue) { \
@@ -978,7 +978,7 @@ RGY_ERR NVGPUInfo::initDevice(int deviceID, CUctx_flags ctxFlags, bool error_if_
     }
     writeLog(RGY_LOG_DEBUG, _T("  cudaDeviceGetAttribute: CUDA %d.%d\n"), cudaDevMajor, cudaDevMinor);
 
-    {
+    if (!disableNVML) {
         auto cuErr = cudaDeviceGetPCIBusId(pci_bus_name, sizeof(pci_bus_name), deviceID);
         if (cuErr == cudaErrorInvalidDevice || cuErr == cudaErrorInvalidValue) {
             writeLog((error_if_fail) ? RGY_LOG_WARN : RGY_LOG_DEBUG, _T("  Warn: cudaDeviceGetPCIBusId(): %s\n"), char_to_tstring(cudaGetErrorString(cuErr)).c_str());
@@ -1244,7 +1244,7 @@ NVENCSTATUS NVEncCtrl::InitCuda() {
 NVENCSTATUS NVEncCtrl::ShowDeviceList(const int cudaSchedule, const bool skipHWDecodeCheck) {
     NVENCSTATUS nvStatus = NV_ENC_SUCCESS;
     std::vector<std::unique_ptr<NVGPUInfo>> gpuList;
-    if (NV_ENC_SUCCESS != (nvStatus = InitDeviceList(gpuList, cudaSchedule, skipHWDecodeCheck))) {
+    if (NV_ENC_SUCCESS != (nvStatus = InitDeviceList(gpuList, cudaSchedule, skipHWDecodeCheck, false))) {
         PrintMes(RGY_LOG_ERROR, FOR_AUO ? _T("Cudaの初期化に失敗しました。\n") : _T("Failed to initialize CUDA.\n"));
         return nvStatus;
     }
@@ -1263,7 +1263,7 @@ NVENCSTATUS NVEncCtrl::ShowDeviceList(const int cudaSchedule, const bool skipHWD
 NVENCSTATUS NVEncCtrl::ShowCodecSupport(const int cudaSchedule, const bool skipHWDecodeCheck) {
     NVENCSTATUS nvStatus = NV_ENC_SUCCESS;
     std::vector<std::unique_ptr<NVGPUInfo>> gpuList;
-    if (NV_ENC_SUCCESS != (nvStatus = InitDeviceList(gpuList, cudaSchedule, skipHWDecodeCheck))) {
+    if (NV_ENC_SUCCESS != (nvStatus = InitDeviceList(gpuList, cudaSchedule, skipHWDecodeCheck, false))) {
         PrintMes(RGY_LOG_ERROR, FOR_AUO ? _T("Cudaの初期化に失敗しました。\n") : _T("Failed to initialize CUDA.\n"));
         return nvStatus;
     }
@@ -1293,7 +1293,7 @@ NVENCSTATUS NVEncCtrl::ShowCodecSupport(const int cudaSchedule, const bool skipH
 NVENCSTATUS NVEncCtrl::ShowNVEncFeatures(const int cudaSchedule, const bool skipHWDecodeCheck) {
     NVENCSTATUS nvStatus = NV_ENC_SUCCESS;
     std::vector<std::unique_ptr<NVGPUInfo>> gpuList;
-    if (NV_ENC_SUCCESS != (nvStatus = InitDeviceList(gpuList, cudaSchedule, skipHWDecodeCheck))) {
+    if (NV_ENC_SUCCESS != (nvStatus = InitDeviceList(gpuList, cudaSchedule, skipHWDecodeCheck, false))) {
         PrintMes(RGY_LOG_ERROR, FOR_AUO ? _T("Cudaの初期化に失敗しました。\n") : _T("Failed to initialize CUDA.\n"));
         return nvStatus;
     }
@@ -1368,7 +1368,7 @@ NVENCSTATUS NVEncCtrl::ShowNVEncFeatures(const int cudaSchedule, const bool skip
     return nvStatus;
 }
 
-NVENCSTATUS NVEncCtrl::InitDeviceList(std::vector<std::unique_ptr<NVGPUInfo>>& gpuList, const int cudaSchedule, const bool skipHWDecodeCheck) {
+NVENCSTATUS NVEncCtrl::InitDeviceList(std::vector<std::unique_ptr<NVGPUInfo>>& gpuList, const int cudaSchedule, const bool skipHWDecodeCheck, const int disableNVML) {
     int deviceCount = 0;
     auto cuResult = cuDeviceGetCount(&deviceCount);
     if (cuResult != CUDA_SUCCESS) {
@@ -1386,12 +1386,14 @@ NVENCSTATUS NVEncCtrl::InitDeviceList(std::vector<std::unique_ptr<NVGPUInfo>>& g
         return NV_ENC_ERR_INVALID_ENCODERDEVICE;
     }
 
+    const bool disableNVMLCheck = (disableNVML > 1 || (disableNVML == 1 && deviceCount > 1));
+
     gpuList.clear();
     for (int currentDevice = 0; currentDevice < deviceCount; currentDevice++) {
         cudaGetLastError(); //これまでのエラーを初期化
         if ((m_nDeviceId < 0 || m_nDeviceId == currentDevice)) {
             auto gpu = std::make_unique<NVGPUInfo>(m_pNVLog);
-            if (gpu->initDevice(currentDevice, (CUctx_flags)cudaSchedule, m_nDeviceId == currentDevice, skipHWDecodeCheck) == RGY_ERR_NONE) {
+            if (gpu->initDevice(currentDevice, (CUctx_flags)cudaSchedule, m_nDeviceId == currentDevice, skipHWDecodeCheck, disableNVMLCheck) == RGY_ERR_NONE) {
                 gpuList.push_back(std::move(gpu));
             }
         }

diff --git a/NVEncCore/NVEncDevice.h b/NVEncCore/NVEncDevice.h
@@ -320,7 +320,7 @@ class NVGPUInfo {
 
     void close_device();
 
-    RGY_ERR initDevice(int deviceID, CUctx_flags ctxFlags, bool error_if_fail, bool skipHWDecodeCheck);
+    RGY_ERR initDevice(int deviceID, CUctx_flags ctxFlags, bool error_if_fail, bool skipHWDecodeCheck, bool disableNVML);
     RGY_ERR initEncoder();
     tstring infostr() const;
 protected:
@@ -375,7 +375,7 @@ class NVEncCtrl {
     NVENCSTATUS InitCuda();
 
     //deviceリストを作成
-    NVENCSTATUS InitDeviceList(std::vector<std::unique_ptr<NVGPUInfo>> &gpuList, const int cudaSchedule, const bool skipHWDecodeCheck);
+    NVENCSTATUS InitDeviceList(std::vector<std::unique_ptr<NVGPUInfo>> &gpuList, const int cudaSchedule, const bool skipHWDecodeCheck, const int disableNVML);
 
     shared_ptr<RGYLog>           m_pNVLog;                //ログ出力管理
     int                          m_nDeviceId;             //DeviceId

diff --git a/NVEncCore/NVEncParam.cpp b/NVEncCore/NVEncParam.cpp
@@ -371,6 +371,7 @@ InEncodeVideoParam::InEncodeVideoParam() :
     deviceID(-1),
     cudaSchedule(DEFAULT_CUDA_SCHEDULE),
     sessionRetry(0),
+    disableNVML(0),
     input(),
     preset(0),
     nHWDecType(0),

diff --git a/NVEncCore/NVEncParam.h b/NVEncCore/NVEncParam.h
@@ -807,6 +807,7 @@ struct InEncodeVideoParam {
     int deviceID;                 //使用するGPUのID
     int cudaSchedule;
     int sessionRetry;
+    int disableNVML;
 
     VideoInfo input;              //入力する動画の情報
     int preset;                   //出力プリセット

diff --git a/NVEncCore/rgy_perf_monitor.cpp b/NVEncCore/rgy_perf_monitor.cpp
@@ -533,6 +533,9 @@ void CPerfMonitor::clear() {
     }
     m_pManager.reset();
 #endif //#if ENABLE_METRIC_FRAMEWORK
+#if ENABLE_NVML
+    m_nvmlMonitor.reset();
+#endif //#if ENABLE_NVML
 
     m_nStep = 0;
     m_thMainThread.reset();
@@ -789,11 +792,15 @@ int CPerfMonitor::init(tstring filename, const TCHAR *pPythonPath,
     }
 #endif //#if ENABLE_METRIC_FRAMEWORK
 #if ENABLE_NVML
-    auto nvml_ret = m_nvmlMonitor.Init(prm->pciBusId);
-    if (nvml_ret != NVML_SUCCESS) {
-        AddMessage(RGY_LOG_INFO, _T("Failed to start NVML Monitoring for \"%s\": %s.\n"), char_to_tstring(prm->pciBusId).c_str(), nvmlErrStr(nvml_ret));
-    } else {
-        AddMessage(RGY_LOG_DEBUG, _T("Eanble NVML Monitoring\n"));
+    if (prm->pciBusId.length() > 0) {
+        m_nvmlMonitor = std::make_unique<NVMLMonitor>();
+        auto nvml_ret = m_nvmlMonitor->Init(prm->pciBusId);
+        if (nvml_ret != NVML_SUCCESS) {
+            AddMessage(RGY_LOG_INFO, _T("Failed to start NVML Monitoring for \"%s\": %s.\n"), char_to_tstring(prm->pciBusId).c_str(), nvmlErrStr(nvml_ret));
+            m_nvmlMonitor.reset();
+        } else {
+            AddMessage(RGY_LOG_DEBUG, _T("Eanble NVML Monitoring\n"));
+        }
     }
 #else
     UNREFERENCED_PARAMETER(prm);
@@ -970,7 +977,7 @@ void CPerfMonitor::check() {
     pInfoNew->pcie_throughput_rx_per_sec = 0;
 #if ENABLE_NVML
     NVMLMonitorInfo nvmlInfo;
-    if (m_nvmlMonitor.getData(&nvmlInfo) == NVML_SUCCESS) {
+    if (m_nvmlMonitor && m_nvmlMonitor->getData(&nvmlInfo) == NVML_SUCCESS) {
         m_nvmlInfo = nvmlInfo;
         pInfoNew->gpu_info_valid   = TRUE;
         pInfoNew->gpu_clock        = m_nvmlInfo.GPUFreq;

diff --git a/NVEncCore/rgy_perf_monitor.h b/NVEncCore/rgy_perf_monitor.h
@@ -513,7 +513,7 @@ class CPerfMonitor {
     CQSVConsumer m_Consumer;
 #endif //#if ENABLE_METRIC_FRAMEWORK
 #if ENABLE_NVML
-    NVMLMonitor m_nvmlMonitor;
+    std::unique_ptr<NVMLMonitor> m_nvmlMonitor;
     NVMLMonitorInfo m_nvmlInfo;
 #endif //#if ENABLE_NVML
 #if ENABLE_GPUZ_INFO