-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Added NVIDIA SMI metrics reporter * Turned off metric provider for Tests
- Loading branch information
Showing
7 changed files
with
75 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
13 changes: 13 additions & 0 deletions
13
metric_providers/gpu/energy/nvidia/smi/component/metric-provider-nvidia-smi-wrapper.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#!/bin/bash | ||
|
||
i='' | ||
|
||
while getopts "i:" o; do | ||
case "$o" in | ||
i) | ||
i=${OPTARG} | ||
;; | ||
esac | ||
done | ||
|
||
nvidia-smi --query-gpu=power.draw --format=csv,noheader,nounits -lms $i | awk '{ "date +%s%N" | getline timestamp; print timestamp " " $0 }' |
46 changes: 46 additions & 0 deletions
46
metric_providers/gpu/energy/nvidia/smi/component/provider.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import os | ||
|
||
from metric_providers.base import BaseMetricProvider | ||
|
||
class GpuEnergyNvidiaSmiComponentProvider(BaseMetricProvider): | ||
def __init__(self, resolution, skip_check=False): | ||
super().__init__( | ||
metric_name='gpu_energy_nvidia_smi_component', | ||
metrics={'time': int, 'value': int}, | ||
resolution=resolution, | ||
unit='mJ', | ||
current_dir=os.path.dirname(os.path.abspath(__file__)), | ||
metric_provider_executable='metric-provider-nvidia-smi-wrapper.sh', | ||
skip_check=skip_check, | ||
) | ||
|
||
|
||
def read_metrics(self, run_id, containers=None): | ||
df = super().read_metrics(run_id, containers) | ||
|
||
''' | ||
Conversion to Joules | ||
If ever in need to convert the database from Joules back to a power format: | ||
WITH times as ( | ||
SELECT id, value, detail_name, time, (time - LAG(time) OVER (ORDER BY detail_name ASC, time ASC)) AS diff, unit | ||
FROM measurements | ||
WHERE run_id = RUN_ID AND metric = 'gpu_energy_nvidia_smi_component' | ||
ORDER BY detail_name ASC, time ASC) | ||
SELECT *, value / (diff / 1000) as power FROM times; | ||
One can see that the value only changes once per second | ||
''' | ||
|
||
intervals = df['time'].diff() | ||
intervals[0] = intervals.mean() # approximate first interval | ||
df['interval'] = intervals # in microseconds | ||
df['value'] = df.apply(lambda x: x['value'] * x['interval'] / 1_000, axis=1) | ||
df['value'] = df.value.fillna(0) # maybe not needed | ||
df['value'] = df.value.astype(int) | ||
|
||
df = df.drop(columns='interval') # clean up | ||
|
||
return df |