Skip to content

Commit

Permalink
Signal/error handling
Browse files Browse the repository at this point in the history
  • Loading branch information
sasha0552 authored Jul 27, 2024
1 parent 9f1a115 commit 6ec2b2f
Showing 1 changed file with 51 additions and 14 deletions.
65 changes: 51 additions & 14 deletions src/main.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <nvapi.h>
#include <nvml.h>
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
Expand All @@ -16,11 +17,17 @@

/***** ***** ***** ***** ***** VARIABLES ***** ***** ***** ***** *****/

// Flag indicating whether the program should continue running
static volatile sig_atomic_t should_run = true;

// Flag indicating whether an error has occurred
static bool error_occurred = false;

// Flags to check initialization status of NVML and NVAPI libraries
static bool nvapiInitialized = false;
static bool nvmlInitialized = false;

// Arrays to store device handles for all GPUs
// Variables to store device handles for all GPUs
static NvPhysicalGpuHandle nvapiDevices[NVAPI_MAX_PHYSICAL_GPUS];
static nvmlDevice_t nvmlDevices[NVAPI_MAX_PHYSICAL_GPUS];

Expand All @@ -31,10 +38,18 @@ static unsigned int deviceCount;
static nvmlUtilization_t utilization;

// Variable to store GPU states
static gpuState gpuStates[NVAPI_MAX_PHYSICAL_GPUS] = { 0 };
static gpuState gpuStates[NVAPI_MAX_PHYSICAL_GPUS];

/***** ***** ***** ***** ***** FUNCTIONS ***** ***** ***** ***** *****/

static void handle_exit(int signal) {
// Check if the received signal is SIGINT or SIGTERM
if (signal == SIGINT || signal == SIGTERM) {
// Set the global flag to false to indicate the program should stop running
should_run = false;
}
}

static bool enter_pstate(unsigned int i, unsigned int pstateId) {
// Get the current state of the GPU
gpuState * state = &gpuStates[i];
Expand All @@ -60,10 +75,17 @@ static bool enter_pstate(unsigned int i, unsigned int pstateId) {
}

int main() {
/***** SIGNALS *****/
{
// Set up signal handling
signal(SIGINT, handle_exit);
signal(SIGTERM, handle_exit);
}

/***** NVAPI INIT *****/
{
// Initialize NVAPI library
NVAPI_CALL(NvAPI_Initialize(), cleanup);
NVAPI_CALL(NvAPI_Initialize(), errored);

// Mark NVAPI as initialized
nvapiInitialized = true;
Expand All @@ -72,7 +94,7 @@ int main() {
/***** NVML INIT *****/
{
// Initialize NVML library
NVML_CALL(nvmlInit(), cleanup);
NVML_CALL(nvmlInit(), errored);

// Mark NVML as initialized
nvmlInitialized = true;
Expand All @@ -81,14 +103,14 @@ int main() {
/***** NVAPI HANDLES *****/
{
// Get NVAPI device handles for all GPUs
NVAPI_CALL(NvAPI_EnumPhysicalGPUs(nvapiDevices, &deviceCount), cleanup);
NVAPI_CALL(NvAPI_EnumPhysicalGPUs(nvapiDevices, &deviceCount), errored);
}

/***** NVML HANDLES *****/
{
// Get NVML device handles for all GPUs
for (unsigned int i = 0; i < deviceCount; i++) {
NVML_CALL(nvmlDeviceGetHandleByIndex(i, &nvmlDevices[i]), cleanup);
NVML_CALL(nvmlDeviceGetHandleByIndex(i, &nvmlDevices[i]), errored);
}
}

Expand All @@ -101,30 +123,30 @@ int main() {
for (unsigned int i = 0; i < deviceCount; i++) {
// Switch to low performance state
if (!enter_pstate(i, PERFORMANCE_STATE_LOW)) {
goto cleanup;
goto errored;
}
}
}

/***** MAIN LOOP *****/
{
// Infinite loop to continuously monitor GPU utilization
while (true) {
while (should_run) {
// Loop through all devices to get their utilization rates
for (unsigned int i = 0; i < deviceCount; i++) {
// Get the current state of the GPU
gpuState * state = &gpuStates[i];

// Retrieve the current utilization rates of the GPU
NVML_CALL(nvmlDeviceGetUtilizationRates(nvmlDevices[i], &utilization), cleanup);
NVML_CALL(nvmlDeviceGetUtilizationRates(nvmlDevices[i], &utilization), errored);

// Check if the GPU utilization is not zero
if (utilization.gpu != 0) {
// If the GPU is not already in high performance state
if (state->pstateId != PERFORMANCE_STATE_HIGH) {
// Switch to high performance state
if (!enter_pstate(i, PERFORMANCE_STATE_HIGH)) {
goto cleanup;
goto errored;
}
} else {
// Reset the iteration counter
Expand All @@ -137,7 +159,7 @@ int main() {
if (state->iterations > ITERATIONS_BEFORE_SWITCH) {
// Switch to low performance state
if (!enter_pstate(i, PERFORMANCE_STATE_LOW)) {
goto cleanup;
goto errored;
}
}

Expand All @@ -156,6 +178,21 @@ int main() {
}
}

/***** NORMAL EXIT *****/
{
// Notify about the exit
printf("Exiting...\n");

// Jump to cleanup section
goto cleanup;
}

errored:
/***** APPLICATION ERROR HAPPENED *****/
{
error_occurred = true;
}

cleanup:
/***** NVAPI DEINIT *****/
{
Expand All @@ -165,7 +202,7 @@ int main() {
nvapiInitialized = false;

// Unload NVAPI library
NVAPI_CALL(NvAPI_Unload(), cleanup);
NVAPI_CALL(NvAPI_Unload(), errored);
}
}

Expand All @@ -177,12 +214,12 @@ int main() {
nvmlInitialized = false;

// Shutdown NVML library
NVML_CALL(nvmlShutdown(), cleanup);
NVML_CALL(nvmlShutdown(), errored);
}
}

/***** RETURN *****/
{
return 0;
return error_occurred;
}
}

0 comments on commit 6ec2b2f

Please sign in to comment.