Skip to content

Commit

Permalink
NVMLによるGPUのモニタリングを行わないようにするオプションを追加。(--disable-nvml)
Browse files Browse the repository at this point in the history
  • Loading branch information
rigaya committed Nov 28, 2023
1 parent 5ce6292 commit 4c1423b
Show file tree
Hide file tree
Showing 12 changed files with 81 additions and 24 deletions.
4 changes: 2 additions & 2 deletions NVEncC/NVEncC.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@ copy /y "$(SolutionDir)ffmpeg_lgpl\lib\$(PlatformName)\libvmaf.dll" "$(OutDir)"
<EnableEnhancedInstructionSet>StreamingSIMDExtensions</EnableEnhancedInstructionSet>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<OmitFramePointers>true</OmitFramePointers>
<OmitFramePointers>false</OmitFramePointers>
<EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<LanguageStandard>stdcpp17</LanguageStandard>
Expand Down Expand Up @@ -569,7 +569,7 @@ copy /y "$(SolutionDir)ffmpeg_lgpl\lib\$(PlatformName)\libass-*.dll" "$(OutDir)"
<StringPooling>true</StringPooling>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<OmitFramePointers>true</OmitFramePointers>
<OmitFramePointers>false</OmitFramePointers>
<EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
<MultiProcessorCompilation>true</MultiProcessorCompilation>
<LanguageStandard>stdcpp17</LanguageStandard>
Expand Down
16 changes: 15 additions & 1 deletion NVEncC_Options.en.md
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@
- [--vpp-nvvfx-model-dir \<string\>](#--vpp-nvvfx-model-dir-string)
- [Other Options](#other-options)
- [--cuda-schedule \<string\>](#--cuda-schedule-string)
- [--disable-nvml \<int\>](#--disable-nvml-int)
- [--output-buf \<int\>](#--output-buf-int)
- [--output-thread \<int\>](#--output-thread-int)
- [--log \<string\>](#--log-string)
Expand Down Expand Up @@ -2529,7 +2530,7 @@ Set path to the model folder of Video Effect models.
### --cuda-schedule &lt;string&gt;
Change the behavior of the CPU when waiting for GPU task completion. The default is auto.
- paramters
- **paramters**
- auto (default)
Leave the mode decision to the driver of CUDA.
Expand All @@ -2542,6 +2543,19 @@ Set path to the model folder of Video Effect models.
- sync
Sleep a thread until the end of the GPU task. Performance might decrease, but will reduce CPU utilization especially when decoding is done by HW.
### --disable-nvml &lt;int&gt;
Disable NVML GPU monitoring。
- **Paramters**
- 0 (default)
Enable NVML.
- 1
Disable NVML when system has one CUDA devices.
- 2
Always disable NVML.
### --output-buf &lt;int&gt;
Specify the output buffer size in MB. The default is 8 and the maximum value is 128.
Expand Down
14 changes: 14 additions & 0 deletions NVEncC_Options.ja.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@
- [--vpp-nvvfx-model-dir \<string\>](#--vpp-nvvfx-model-dir-string)
- [制御系のオプション](#制御系のオプション)
- [--cuda-schedule \<string\>](#--cuda-schedule-string)
- [--disable-nvml \<int\>](#--disable-nvml-int)
- [--output-buf \<int\>](#--output-buf-int)
- [--output-thread \<int\>](#--output-thread-int)
- [--log \<string\>](#--log-string)
Expand Down Expand Up @@ -2603,6 +2604,19 @@ NVIDIA MAXINE VideoEffects のモデルを格納しているフォルダの場
- sync
GPUタスクの終了まで、スレッドをスリープさせる。性能が落ちる可能性があるかわりに、特にHWデコード使用時に、CPU使用率を大きく削減する。
### --disable-nvml &lt;int&gt;
NVMLによるGPUモニタリングの無効化について指定する。デフォルトは0 (無効化しない)。
- **パラメータ**
- 0 (デフォルト)
NVMLを無効化しない。
- 1
CUDAを実行できるGPUが1つの場合にNVMLを無効化する。
- 2
常にNVMLを無効化する。
### --output-buf &lt;int&gt;
出力バッファサイズをMB単位で指定する。デフォルトは8、最大値は128。0で使用しない。
Expand Down
17 changes: 17 additions & 0 deletions NVEncCore/NVEncCmd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,8 @@ tstring encoder_help() {
_T(" sync : CPU will sleep when waiting GPU tasks, performance might\n")
_T(" drop slightly, while CPU utilization will be lower,\n")
_T(" especially on HW decode mode.\n"));
str += _T("")
_T(" --disable-nvml <int> disable NVML GPU monitoring (default 0, 0-2)\n");
str += gen_cmd_help_ctrl();
return str;
}
Expand Down Expand Up @@ -1619,6 +1621,20 @@ int parse_one_option(const TCHAR *option_name, const TCHAR* strInput[], int& i,
pParams->sessionRetry = value;
return 0;
}
if (IS_OPTION("disable-nvml")) {
i++;
int value = 0;
if (1 != _stscanf_s(strInput[i], _T("%d"), &value)) {
print_cmd_error_invalid_value(option_name, strInput[i]);
return 1;
}
if (value < 0) {
print_cmd_error_invalid_value(option_name, strInput[i], _T("disable-nvml should be specified in positive value."));
return 1;
}
pParams->disableNVML = value;
return 0;
}

auto ret = parse_one_input_option(option_name, strInput, i, nArgNum, &pParams->input, &pParams->inprm, argData);
if (ret >= 0) return ret;
Expand Down Expand Up @@ -2054,6 +2070,7 @@ tstring gen_cmd(const InEncodeVideoParam *pParams, const NV_ENC_CODEC_CONFIG cod

OPT_LST(_T("--cuda-schedule"), cudaSchedule, list_cuda_schedule);
OPT_NUM(_T("--session-retry"), sessionRetry);
OPT_NUM(_T("--disable-nvml"), disableNVML);

cmd << gen_cmd(&pParams->ctrl, &encPrmDefault.ctrl, save_disabled_prm);

Expand Down
7 changes: 4 additions & 3 deletions NVEncCore/NVEncCore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -904,8 +904,9 @@ NVENCSTATUS NVEncCore::GPUAutoSelect(std::vector<std::unique_ptr<NVGPUInfo>> &gp
NVMLMonitorInfo info;
#if ENABLE_NVML
NVMLMonitor monitor;
auto nvml_ret = monitor.Init(gpu->pciBusId());
if (nvml_ret == NVML_SUCCESS
auto nvml_ret = NVML_SUCCESS;
if (gpu->pciBusId().length() > 0
&& (nvml_ret = monitor.Init(gpu->pciBusId())) == NVML_SUCCESS
&& monitor.getData(&info) == NVML_SUCCESS) {
#else
NVSMIInfo nvsmi;
Expand Down Expand Up @@ -3253,7 +3254,7 @@ NVENCSTATUS NVEncCore::InitEncode(InEncodeVideoParam *inputParam) {

//デコーダが使用できるか確認する必要があるので、先にGPU関係の情報を取得しておく必要がある
std::vector<std::unique_ptr<NVGPUInfo>> gpuList;
if (NV_ENC_SUCCESS != (nvStatus = InitDeviceList(gpuList, m_cudaSchedule, inputParam->ctrl.skipHWDecodeCheck))) {
if (NV_ENC_SUCCESS != (nvStatus = InitDeviceList(gpuList, m_cudaSchedule, inputParam->ctrl.skipHWDecodeCheck, inputParam->disableNVML))) {
PrintMes(RGY_LOG_ERROR, FOR_AUO ? _T("Cudaの初期化に失敗しました。\n") : _T("Failed to initialize CUDA.\n"));
return nvStatus;
}
Expand Down
4 changes: 2 additions & 2 deletions NVEncCore/NVEncCore.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,7 @@ if exist rgy_rev.h.%PID%.tmp del rgy_rev.h.%PID%.tmp &gt; nul 2&gt;&amp;1</Comma
<DisableSpecificWarnings>4505;4512</DisableSpecificWarnings>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<OmitFramePointers>true</OmitFramePointers>
<OmitFramePointers>false</OmitFramePointers>
<EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
<StringPooling>true</StringPooling>
<BufferSecurityCheck>false</BufferSecurityCheck>
Expand Down Expand Up @@ -532,7 +532,7 @@ if exist rgy_rev.h.%PID%.tmp del rgy_rev.h.%PID%.tmp &gt; nul 2&gt;&amp;1</Comma
<DisableSpecificWarnings>4505;4512</DisableSpecificWarnings>
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<OmitFramePointers>true</OmitFramePointers>
<OmitFramePointers>false</OmitFramePointers>
<EnableFiberSafeOptimizations>true</EnableFiberSafeOptimizations>
<StringPooling>true</StringPooling>
<BufferSecurityCheck>false</BufferSecurityCheck>
Expand Down
16 changes: 9 additions & 7 deletions NVEncCore/NVEncDevice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -939,7 +939,7 @@ const NVEncCodecFeature *NVEncoder::getCodecFeature(const GUID &codec) {
return nullptr;
}

RGY_ERR NVGPUInfo::initDevice(int deviceID, CUctx_flags ctxFlags, bool error_if_fail, bool skipHWDecodeCheck) {
RGY_ERR NVGPUInfo::initDevice(int deviceID, CUctx_flags ctxFlags, bool error_if_fail, bool skipHWDecodeCheck, bool disableNVML) {
#define GETATTRIB_CHECK(val, attrib, dev) { \
cudaError_t cuErr = cudaDeviceGetAttribute(&(val), (attrib), (dev)); \
if (cuErr == cudaErrorInvalidDevice || cuErr == cudaErrorInvalidValue) { \
Expand Down Expand Up @@ -978,7 +978,7 @@ RGY_ERR NVGPUInfo::initDevice(int deviceID, CUctx_flags ctxFlags, bool error_if_
}
writeLog(RGY_LOG_DEBUG, _T(" cudaDeviceGetAttribute: CUDA %d.%d\n"), cudaDevMajor, cudaDevMinor);

{
if (!disableNVML) {
auto cuErr = cudaDeviceGetPCIBusId(pci_bus_name, sizeof(pci_bus_name), deviceID);
if (cuErr == cudaErrorInvalidDevice || cuErr == cudaErrorInvalidValue) {
writeLog((error_if_fail) ? RGY_LOG_WARN : RGY_LOG_DEBUG, _T(" Warn: cudaDeviceGetPCIBusId(): %s\n"), char_to_tstring(cudaGetErrorString(cuErr)).c_str());
Expand Down Expand Up @@ -1244,7 +1244,7 @@ NVENCSTATUS NVEncCtrl::InitCuda() {
NVENCSTATUS NVEncCtrl::ShowDeviceList(const int cudaSchedule, const bool skipHWDecodeCheck) {
NVENCSTATUS nvStatus = NV_ENC_SUCCESS;
std::vector<std::unique_ptr<NVGPUInfo>> gpuList;
if (NV_ENC_SUCCESS != (nvStatus = InitDeviceList(gpuList, cudaSchedule, skipHWDecodeCheck))) {
if (NV_ENC_SUCCESS != (nvStatus = InitDeviceList(gpuList, cudaSchedule, skipHWDecodeCheck, false))) {
PrintMes(RGY_LOG_ERROR, FOR_AUO ? _T("Cudaの初期化に失敗しました。\n") : _T("Failed to initialize CUDA.\n"));
return nvStatus;
}
Expand All @@ -1263,7 +1263,7 @@ NVENCSTATUS NVEncCtrl::ShowDeviceList(const int cudaSchedule, const bool skipHWD
NVENCSTATUS NVEncCtrl::ShowCodecSupport(const int cudaSchedule, const bool skipHWDecodeCheck) {
NVENCSTATUS nvStatus = NV_ENC_SUCCESS;
std::vector<std::unique_ptr<NVGPUInfo>> gpuList;
if (NV_ENC_SUCCESS != (nvStatus = InitDeviceList(gpuList, cudaSchedule, skipHWDecodeCheck))) {
if (NV_ENC_SUCCESS != (nvStatus = InitDeviceList(gpuList, cudaSchedule, skipHWDecodeCheck, false))) {
PrintMes(RGY_LOG_ERROR, FOR_AUO ? _T("Cudaの初期化に失敗しました。\n") : _T("Failed to initialize CUDA.\n"));
return nvStatus;
}
Expand Down Expand Up @@ -1293,7 +1293,7 @@ NVENCSTATUS NVEncCtrl::ShowCodecSupport(const int cudaSchedule, const bool skipH
NVENCSTATUS NVEncCtrl::ShowNVEncFeatures(const int cudaSchedule, const bool skipHWDecodeCheck) {
NVENCSTATUS nvStatus = NV_ENC_SUCCESS;
std::vector<std::unique_ptr<NVGPUInfo>> gpuList;
if (NV_ENC_SUCCESS != (nvStatus = InitDeviceList(gpuList, cudaSchedule, skipHWDecodeCheck))) {
if (NV_ENC_SUCCESS != (nvStatus = InitDeviceList(gpuList, cudaSchedule, skipHWDecodeCheck, false))) {
PrintMes(RGY_LOG_ERROR, FOR_AUO ? _T("Cudaの初期化に失敗しました。\n") : _T("Failed to initialize CUDA.\n"));
return nvStatus;
}
Expand Down Expand Up @@ -1368,7 +1368,7 @@ NVENCSTATUS NVEncCtrl::ShowNVEncFeatures(const int cudaSchedule, const bool skip
return nvStatus;
}

NVENCSTATUS NVEncCtrl::InitDeviceList(std::vector<std::unique_ptr<NVGPUInfo>>& gpuList, const int cudaSchedule, const bool skipHWDecodeCheck) {
NVENCSTATUS NVEncCtrl::InitDeviceList(std::vector<std::unique_ptr<NVGPUInfo>>& gpuList, const int cudaSchedule, const bool skipHWDecodeCheck, const int disableNVML) {
int deviceCount = 0;
auto cuResult = cuDeviceGetCount(&deviceCount);
if (cuResult != CUDA_SUCCESS) {
Expand All @@ -1386,12 +1386,14 @@ NVENCSTATUS NVEncCtrl::InitDeviceList(std::vector<std::unique_ptr<NVGPUInfo>>& g
return NV_ENC_ERR_INVALID_ENCODERDEVICE;
}

const bool disableNVMLCheck = (disableNVML > 1 || (disableNVML == 1 && deviceCount > 1));

gpuList.clear();
for (int currentDevice = 0; currentDevice < deviceCount; currentDevice++) {
cudaGetLastError(); //これまでのエラーを初期化
if ((m_nDeviceId < 0 || m_nDeviceId == currentDevice)) {
auto gpu = std::make_unique<NVGPUInfo>(m_pNVLog);
if (gpu->initDevice(currentDevice, (CUctx_flags)cudaSchedule, m_nDeviceId == currentDevice, skipHWDecodeCheck) == RGY_ERR_NONE) {
if (gpu->initDevice(currentDevice, (CUctx_flags)cudaSchedule, m_nDeviceId == currentDevice, skipHWDecodeCheck, disableNVMLCheck) == RGY_ERR_NONE) {
gpuList.push_back(std::move(gpu));
}
}
Expand Down
4 changes: 2 additions & 2 deletions NVEncCore/NVEncDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ class NVGPUInfo {

void close_device();

RGY_ERR initDevice(int deviceID, CUctx_flags ctxFlags, bool error_if_fail, bool skipHWDecodeCheck);
RGY_ERR initDevice(int deviceID, CUctx_flags ctxFlags, bool error_if_fail, bool skipHWDecodeCheck, bool disableNVML);
RGY_ERR initEncoder();
tstring infostr() const;
protected:
Expand Down Expand Up @@ -375,7 +375,7 @@ class NVEncCtrl {
NVENCSTATUS InitCuda();

//deviceリストを作成
NVENCSTATUS InitDeviceList(std::vector<std::unique_ptr<NVGPUInfo>> &gpuList, const int cudaSchedule, const bool skipHWDecodeCheck);
NVENCSTATUS InitDeviceList(std::vector<std::unique_ptr<NVGPUInfo>> &gpuList, const int cudaSchedule, const bool skipHWDecodeCheck, const int disableNVML);

shared_ptr<RGYLog> m_pNVLog; //ログ出力管理
int m_nDeviceId; //DeviceId
Expand Down
1 change: 1 addition & 0 deletions NVEncCore/NVEncParam.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,7 @@ InEncodeVideoParam::InEncodeVideoParam() :
deviceID(-1),
cudaSchedule(DEFAULT_CUDA_SCHEDULE),
sessionRetry(0),
disableNVML(0),
input(),
preset(0),
nHWDecType(0),
Expand Down
1 change: 1 addition & 0 deletions NVEncCore/NVEncParam.h
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,7 @@ struct InEncodeVideoParam {
int deviceID; //使用するGPUのID
int cudaSchedule;
int sessionRetry;
int disableNVML;

VideoInfo input; //入力する動画の情報
int preset; //出力プリセット
Expand Down
19 changes: 13 additions & 6 deletions NVEncCore/rgy_perf_monitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,9 @@ void CPerfMonitor::clear() {
}
m_pManager.reset();
#endif //#if ENABLE_METRIC_FRAMEWORK
#if ENABLE_NVML
m_nvmlMonitor.reset();
#endif //#if ENABLE_NVML

m_nStep = 0;
m_thMainThread.reset();
Expand Down Expand Up @@ -789,11 +792,15 @@ int CPerfMonitor::init(tstring filename, const TCHAR *pPythonPath,
}
#endif //#if ENABLE_METRIC_FRAMEWORK
#if ENABLE_NVML
auto nvml_ret = m_nvmlMonitor.Init(prm->pciBusId);
if (nvml_ret != NVML_SUCCESS) {
AddMessage(RGY_LOG_INFO, _T("Failed to start NVML Monitoring for \"%s\": %s.\n"), char_to_tstring(prm->pciBusId).c_str(), nvmlErrStr(nvml_ret));
} else {
AddMessage(RGY_LOG_DEBUG, _T("Eanble NVML Monitoring\n"));
if (prm->pciBusId.length() > 0) {
m_nvmlMonitor = std::make_unique<NVMLMonitor>();
auto nvml_ret = m_nvmlMonitor->Init(prm->pciBusId);
if (nvml_ret != NVML_SUCCESS) {
AddMessage(RGY_LOG_INFO, _T("Failed to start NVML Monitoring for \"%s\": %s.\n"), char_to_tstring(prm->pciBusId).c_str(), nvmlErrStr(nvml_ret));
m_nvmlMonitor.reset();
} else {
AddMessage(RGY_LOG_DEBUG, _T("Eanble NVML Monitoring\n"));
}
}
#else
UNREFERENCED_PARAMETER(prm);
Expand Down Expand Up @@ -970,7 +977,7 @@ void CPerfMonitor::check() {
pInfoNew->pcie_throughput_rx_per_sec = 0;
#if ENABLE_NVML
NVMLMonitorInfo nvmlInfo;
if (m_nvmlMonitor.getData(&nvmlInfo) == NVML_SUCCESS) {
if (m_nvmlMonitor && m_nvmlMonitor->getData(&nvmlInfo) == NVML_SUCCESS) {
m_nvmlInfo = nvmlInfo;
pInfoNew->gpu_info_valid = TRUE;
pInfoNew->gpu_clock = m_nvmlInfo.GPUFreq;
Expand Down
2 changes: 1 addition & 1 deletion NVEncCore/rgy_perf_monitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ class CPerfMonitor {
CQSVConsumer m_Consumer;
#endif //#if ENABLE_METRIC_FRAMEWORK
#if ENABLE_NVML
NVMLMonitor m_nvmlMonitor;
std::unique_ptr<NVMLMonitor> m_nvmlMonitor;
NVMLMonitorInfo m_nvmlInfo;
#endif //#if ENABLE_NVML
#if ENABLE_GPUZ_INFO
Expand Down

0 comments on commit 4c1423b

Please sign in to comment.