-
Notifications
You must be signed in to change notification settings - Fork 0
/
logLoader.py
38 lines (29 loc) · 928 Bytes
/
logLoader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import re
import sys
import numpy as np
import pandas as pd
parts = [
r'(?P<host>\S+)',
r'\S+',
r'(?P<user>\S+)',
r'\[(?P<time>.+)\]',
r'"(?P<request>.*)"',
r'(?P<status>[0-9]+)',
r'(?P<size>\S+)',
r'"(?P<referrer>.*)"',
r'"(?P<agent>.*)"',
]
def loadLogFileToDF(path):
pattern = re.compile(r'\s+'.join(parts)+r'\s*\Z')
log_data = []
try:
with open(path) as file:
lines = file.readlines()
lines = [line.rstrip() for line in lines]
for line in lines:
log_data.append(pattern.match(line).groupdict())
df = pd.DataFrame(log_data)
df = df.drop(columns=['host'])
return df
except:
raise Exception("Failed to load file")