-
Notifications
You must be signed in to change notification settings - Fork 1
/
histogram.py
38 lines (32 loc) · 1.58 KB
/
histogram.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# This file should generate histogram of the log file based on date and time
from collections import Counter
def histogram(Logfile):
d=dict() # To store the keys ( date and hours) and respective counter
digit=['1','2','3','4','5','6','7','8','9','0'] # Want to check, if the second char in "line" is digit
match_char=['I','W','E','F'] # Want to check for lines starting with IWEF only. Our logs are multi-line and I do not have to handle here
with open (Logfile, 'r') as input_file:
for line in input_file:
if line[0] in match_char and line[1] in digit:
key=line[1:10] # Plan with end range, if you need to analyze on hourly basis
if key in d: # Populating dictionary with new keys, and increment counter for existing keys
d[key]=d[key]+1
else:
d[key]=1
for key in list(d.keys()):
print("The count of {} is {}".format(key,d[key]))
return None
def word_count(Logfile):
count=0
d=dict() # To store the keys ( date and hours) and respective counter
with open (Logfile, 'r') as input_file:
for line in input_file:
for word in line.split():
if word in d: # Populating dictionary with new keys, and increment counter for existing keys
d[word]=d[word]+1
else:
d[word]=1
print ("Generating top 20 word count\n")
top_20 = sorted(d.items(), key = lambda x:x[1], reverse = True)[:20]
for k, v in top_20:
print (k, v)
return None