-
Notifications
You must be signed in to change notification settings - Fork 0
/
local_stat_parser.py
290 lines (222 loc) · 10.5 KB
/
local_stat_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
#!/usr/bin/env python
# This script will run for a few seconds and parse important info about the system.
from optparse import OptionParser
import sys
import subprocess
import re
import time
import json
def parse_gpu_info_lines(gpu_lines):
"""Parses the gpu info from 2 lines cut out from the nvidia-smi result.
Args:
gpu_lines (list of strings): Two nvidi-smi lines representing current gpu info.
Returns:
Tuple: Gpu id ('gpuX') and a dict with gpu info.
"""
gpu_id = 'gpu{}'.format(int(gpu_lines[0][1:5]))
gpu = {}
fan, temp, cur_w, max_w, cur_mem, max_mem, load = re.search('\| *(\d+)\% *(\d+)C *\S+ *(\d+|\S+)W* / *(\d+|\S+)W* \| *(\d+)MiB / *(\d+)MiB \| *(\d+|\S+)', gpu_lines[1]).groups()
gpu['fan'] = int(fan)
gpu['temp'] = int(temp)
gpu['cur_pow'] = int(cur_w) if cur_w != 'N/A' else -1
gpu['max_pow'] = int(max_w) if max_w != 'N/A' else -1
gpu['cur_mem'] = int(cur_mem) if cur_mem != 'N/A' else -1
gpu['max_mem'] = int(max_mem) if max_mem != 'N/A' else -1
gpu['load'] = int(load) if load != 'N/A' else -1
return (gpu_id,gpu)
def parse_gpu_proc_lines(proc_lines):
"""Parses the information from the nvidia-smi process lines (if available).
Args:
proc_lines (list of strings): All the nvidi-smi output lines with process info.
Returns:
Tuple: A dict with showing the detailed process info support of the gpu
and a list of process info.
"""
support ={}
processes = []
for l in proc_lines:
proc_info = re.search('\| *(?P<gpu_id>\d+) *(?P<pid>\d*) *[CG]* *([\S|\s]*?)\s+(?P<mem>\d*)[MiB|\s\|]', l)
gpu_id = 'gpu{}'.format(int(proc_info.group('gpu_id')))
support[gpu_id] = not (proc_info.group('pid') == '')
if support[gpu_id]:
processes.append((gpu_id, int(proc_info.group('pid')), int(proc_info.group('mem'))))
return (support, processes)
def parse_gpu_info(runs=20, wait=0.25):
"""Performs several runs of parsing the GPU info from nvidia-smi.
Args:
runs (int, optional): Number of runs
wait (float, optional): Seconds to wait between the runs.
Returns:
Dict: Compiled GPU info across the several runs.
"""
gpus = []
#For a set of runs
for r in range(runs):
#Parse the gpu info
info = subprocess.check_output('nvidia-smi', shell=True).decode('UTF-8').split('\n')
pid_info = subprocess.check_output('ps axo user:30,pid', shell=True).decode('UTF-8').split('\n')
#Setup a map of pids->users for checking who uses the memory here.
pid_to_user = {}
for p in pid_info[1:]:
p_info = re.search('(\S+) *(\d+)', p)
if p_info is None:
break
else:
p_info = p_info.groups()
pid_to_user[int(p_info[1])] = p_info[0]
#Parse the GPU info.
block_boundary = 0
while '|=====' not in info[block_boundary]:
block_boundary +=1
block_boundary +=1
gpus_iter = {}
while '+' == info[block_boundary+2][0]:
gpu_id, gpu = parse_gpu_info_lines(info[block_boundary:block_boundary+2])
gpus_iter[gpu_id] = gpu
gpus_iter[gpu_id]['proc_info_support'] = True # will be wet to False automatically if parsed later on.
gpus_iter[gpu_id]['proc_info'] = {}
block_boundary += 3
#Jump to the process info
block_boundary += 5
proc_info_support, proc_info = parse_gpu_proc_lines(info[block_boundary:-2])
for g, support in proc_info_support.items():
gpus_iter[g]['proc_info_support'] = support
for gpu_id, pid, mem_usage in proc_info:
user = pid_to_user[pid]
if user not in gpus_iter[gpu_id]['proc_info'].keys():
gpus_iter[gpu_id]['proc_info'][user] = mem_usage
else:
gpus_iter[gpu_id]['proc_info'][user] += mem_usage
#Store the info and save it.
gpus.append(gpus_iter)
time.sleep(wait)
#Now we need to summarize the infos.
gpus_info = gpus[0]
for g in gpus[1:]:
for g_i in g.keys():
#Take the max of some of the stats.
for s in ['load', 'fan', 'cur_mem', 'cur_pow', 'temp']:
gpus_info[g_i][s] = max([gpus_info[g_i][s], g[g_i][s]])
#Fuse the user info. Some users might now be on the gpu for all runs.
if gpus_info[g_i]['proc_info_support']:
for u,m in g[g_i]['proc_info'].items():
if u in gpus_info[g_i]['proc_info'].keys():
gpus_info[g_i]['proc_info'][u] = max([gpus_info[g_i]['proc_info'][u], g[g_i]['proc_info'][u]])
else:
gpus_info[g_i]['proc_info'][u] = g[g_i]['proc_info'][u]
return gpus_info
def parse_cpu_info(runs=5, wait=0.1):
"""Performs several runs of parsing the CPU info.
Args:
runs (int, optional): Number of runs
wait (float, optional): Seconds to wait between the runs.
Returns:
Dict: Compiled CPU info across the several runs.
"""
cpus = []
#For a set of runs
for r in range(runs):
cpus_iter = {}
#Get the memory info
mem_info = subprocess.check_output('cat /proc/meminfo | grep --color=no \'Swap\|Mem\|Cached\|Buffers\'', shell=True).decode('UTF-8').split('\n')
mem_dict = {}
for m in mem_info[:-1]:
k,v= re.search('(\S+): *(\d+)',m).groups()
mem_dict[k] = v
cpus_iter['max_ram'] = int(mem_dict['MemTotal']) // 1024
cpus_iter['used_ram'] = cpus_iter['max_ram'] - (int(mem_dict['Cached'])+int(mem_dict['Buffers'])+int(mem_dict['MemFree'])) // 1024
cpus_iter['max_swap'] = int(mem_dict['SwapTotal']) // 1024
cpus_iter['used_swap'] = cpus_iter['max_swap'] - int( mem_dict['SwapFree']) // 1024
#Get set of users with processes
pid_info = subprocess.check_output('ps axo user:30', shell=True).decode('UTF-8').split('\n')
cpus_iter['users'] = set(pid_info[1:-1])
#Get the cpu usage
cpu_info = subprocess.check_output('/usr/bin/top -bn2 -d 2 | grep --color=no \'Cpu(s)\' | tail -1', shell=True).decode('UTF-8')
usr_load, sys_load = proc_info = re.search('\%*Cpu\(s\): *(\d+.\d+)[ |\%]us, *(\d+.\d+)', cpu_info).groups()
cpus_iter['load'] = (float(usr_load) + float(sys_load))
#Store the info and save it.
cpus.append(cpus_iter)
time.sleep(wait)
#Summarize
cpu_info = cpus[0]
for c in cpus[1:]:
for s in ['load', 'used_swap', 'used_ram']:
cpu_info[s] = max([cpu_info[s] ,c[s]])
cpu_info['users'] |= c['users']
#Because json doesn't do sets
cpu_info['users'] = list(cpu_info['users'])
return cpu_info
def parse_disk_info():
"""Parses disk info on the machine.
Returns:
Dict representing the disk info.
"""
disk_info = subprocess.check_output('df -T /work /', shell=True).decode('UTF-8').split('\n')
disk_dict = {}
for d in disk_info[1:-1]: #Skip header and last empty line.
disk = re.search('(?P<dev>\S+) *(?P<type>\S+) *(?P<size>\d+) *(?P<used>\d+) *(\d+) *(\S+) *(?P<mount>\S+)', d)
dev = disk.group('dev')
disk_dict[dev] = {}
disk_dict[dev]['type'] = disk.group('type')
disk_dict[dev]['mount'] = disk.group('mount')
disk_dict[dev]['used'] = int(disk.group('used')) // 1024
disk_dict[dev]['size'] = int(disk.group('size')) // 1024
return disk_dict
def parse_machine_info():
"""Parses general machine info about hardware and software.
Returns:
Dict representing the machine info.
"""
machine = {}
#Max memory
mem_info = subprocess.check_output('cat /proc/meminfo | grep --color=no MemTotal', shell=True).decode('UTF-8')
k,v= re.search('(\S+): *(\d+)',mem_info).groups()
machine['memory'] = int(v)//1024
mem_info = subprocess.check_output('cat /proc/meminfo | grep --color=no SwapTotal', shell=True).decode('UTF-8')
k,v= re.search('(\S+): *(\d+)',mem_info).groups()
machine['swap'] = int(v)//1024
#CPU info
cpu_info = subprocess.check_output('cat /proc/cpuinfo | grep --color=no \'model name\'', shell=True).decode('UTF-8').split('\n')
cpu_model = re.search('\S* *: *([\S *]+)',cpu_info[0]).groups()[0]
machine['cpu_model'] = '{} (x{})'.format(cpu_model, len(cpu_info)-1)
#Nvidia driver + GPU Model
nvidia_info = subprocess.check_output('nvidia-smi -q | grep --color=no \'Driver Version\|Product Name\|Minor Number\'', shell=True).decode('UTF-8').split('\n')
machine['nvidia_version'] = re.search('\S* *: *(\d+.+\d+)',nvidia_info[0]).groups()[0]
machine['gpu_models'] = {}
for i in range(1,len(nvidia_info)-1,2):
gpu_model = re.search('\S* *: *([\S *]+)',nvidia_info[i]).groups()[0]
gpu_id = re.search('\S* *: *(\d+)',nvidia_info[i+1]).groups()[0]
machine['gpu_models']['gpu'+gpu_id] = gpu_model
#Ubuntu variant
ubuntu_info = subprocess.check_output('lsb_release -a 2>/dev/null | grep --color=no Description', shell=True).decode('UTF-8')
machine['ubuntu_version'] = re.search('\S*:\W*([\S *]+)',ubuntu_info).groups()[0]
#Kernel version
machine['kernel_version'] = subprocess.check_output('cat /proc/version', shell=True).decode('UTF-8')[:-1]
return machine
def main(argv):
"""Executes the parsing of all the info.
Args:
argv : List of arguments. '-g' specifies only the general info is provided.
Otherwise, only the detailed info is provided. This info is printed
as a json string.
"""
parser = OptionParser()
parser.add_option('-g','--general', action='store_true', dest='return_general_info', default=False,
help='When set, general machine info will be returned instead of detailed cpu and gpu info.')
parser.add_option('-p','--pretty', action='store_true', dest='print_pretty', default=False,
help='When set, the json will be printed in a less compact but more readable format.')
(options, args) = parser.parse_args(argv)
info = {}
if options.return_general_info:
info['configuration'] = parse_machine_info()
else:
info['gpu'] = parse_gpu_info()
info['cpu'] = parse_cpu_info()
info['disk'] = parse_disk_info()
#Compact encoding in json.
if options.print_pretty:
print(json.dumps(info, sort_keys=True, indent=4, separators=(',', ': ')))
else:
print(json.dumps(info, separators=(',',':')))
if __name__ == "__main__":
main(sys.argv[1:])