-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse_mces.py
61 lines (50 loc) · 2.19 KB
/
parse_mces.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
"""
Parse MCE errors coming from the kernel.
"""
import argparse
import datetime
import re
import sys
import pandas as pd
def main(argv) -> None:
parser = argparse.ArgumentParser()
parser.add_argument('mce_log')
args = parser.parse_args(argv)
mces = []
with open(args.mce_log, 'r', encoding='utf8') as log_file:
mce_lines = []
for line in log_file:
line = line.rstrip(' ')
if line == '\n':
mces.append(''.join(mce_lines))
mce_lines = []
else:
mce_lines.append(line)
pattern = re.compile(r'^.*mce: \[Hardware Error\]: Machine check events logged$\n'
r'^.*mce: \[Hardware Error\]: CPU (\d+): Machine Check: (\d+) Bank (\d+): ([\d\w]+)$\n'
r'^.*mce: \[Hardware Error\]: TSC (\d+) ADDR ([\d\w]+) MISC ([\d\w]+) SYND ([\d\w]+) IPID ([\d\w]+)$\n'
r'^.*mce: \[Hardware Error\]: PROCESSOR (\d:[\d\w]+) TIME (\d+) SOCKET (\d) APIC ([\d\w]) microcode (\d+)$',
re.MULTILINE)
data = []
for mce in mces:
if match := pattern.match(mce):
data.append(match.groups())
else:
print(f'no match for "{mce}"', file=sys.stderr)
frame = pd.DataFrame(data, columns=['CPU', 'MC', 'Bank', 'BankCode', 'TSC',
'ADDR', 'MISC', 'SYND', 'IPID', 'PROC',
'TIME', 'SOCKET', 'APIC', 'microcode'])
print(frame)
frame['CPU'] = frame['CPU'].apply(int)
frame['MC'] = frame['MC'].apply(int)
frame['TIME'] = frame['TIME'].apply(lambda x: datetime.datetime.fromtimestamp(int(x)))
frame['APIC'] = frame['APIC'].apply(lambda x: int(x, 16))
print(frame['microcode'].apply(int).value_counts(normalize=True))
print('BankCode:', frame['BankCode'].value_counts(normalize=True))
print('MISC:', frame['MISC'].value_counts(normalize=True))
print('SYND:', frame['SYND'].value_counts(normalize=True))
print('IPID:', frame['IPID'].value_counts(normalize=True))
print('PROC:', frame['PROC'].value_counts(normalize=True))
print(frame)
if __name__ == '__main__':
main(sys.argv[1:])