-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLogParser.py
114 lines (93 loc) · 4.3 KB
/
LogParser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from DatetimeParser import *
from collections import Counter
import mecab
class LogParser():
def __init__(self, **kwargs):
self.system = kwargs['system']
self.datetimeParser = self.getParser()
self.mecab = mecab.MeCab()
if 'topKw' in kwargs:
self.topKw = kwargs['topKw']
else:
self.topKw = 30
@classmethod
def read(cls, path):
with open(path, 'rb') as fp:
texts = fp.read().decode('utf-8').split('\r\n')
return texts
@classmethod
def isChat(cls, l):
return len(l.split(':')) > 2
@classmethod
def seperate(cls, l):
parts = l.split(',')
dt, rest = parts[0], ''.join(parts[1:])
rest = rest.split(':')
name, ctt = rest[0].strip(), ':'.join(rest[1:]).lstrip()
return dt, name, ctt
def process(self, path):
return self.analyze(path)
def getParser(self):
if self.system == 'android':
return AndroidDatetimeParser()
else:
return IOSDateTimeParser()
def analyze(self, path):
texts = self.read(path)
analyzed_result = {'amount': {}, 'hour': {}, 'keywords': {}, 'emoticons': {}, 'photos': {}}
startPoint = 3 if self.system == 'android' else 2
for i in range(startPoint, len(texts)):
l = texts[i]
if self.isChat(l):
dt, name, ctt = self.seperate(l)
name = '개인-' + name
hour = self.datetimeParser.getHour(dt)
if name in analyzed_result['amount']:
analyzed_result['amount'][name] += len(ctt)
else:
analyzed_result['amount'][name] = len(ctt)
if hour in analyzed_result['hour']:
analyzed_result['hour'][hour] += len(ctt)
else:
analyzed_result['hour'][hour] = len(ctt)
if ctt == '이모티콘':
if name in analyzed_result['emoticons']:
analyzed_result['emoticons'][name] += 1
else:
analyzed_result['emoticons'][name] = 1
elif ctt == '사진':
if name in analyzed_result['photos']:
analyzed_result['photos'][name] += 1
else:
analyzed_result['photos'][name] = 1
elif ctt.startswith('샵검색:'):
continue
else:
nouns = Counter(self.mecab.nouns(ctt))
if name in analyzed_result['keywords']:
for n in nouns.keys():
if n in analyzed_result['keywords'][name]:
analyzed_result['keywords'][name][n] += nouns[n]
else:
analyzed_result['keywords'][name][n] = nouns[n]
else:
analyzed_result['keywords'][name] = dict(nouns)
if 'total' in analyzed_result['keywords']:
for n in nouns.keys():
if n in analyzed_result['keywords']['total']:
analyzed_result['keywords']['total'][n] += nouns[n]
else:
analyzed_result['keywords']['total'][n] = nouns[n]
else:
analyzed_result['keywords']['total'] = dict(nouns)
keywords_list = []
for person in analyzed_result['keywords'].keys():
keywords_list.append([person, Counter(analyzed_result['keywords'][person]).most_common(self.topKw)])
analyzed_result['keywords'] = keywords_list
analyzed_result['amount'] = sorted(list(Counter(analyzed_result['amount']).items()), key=lambda e: e[1], reverse=True)
analyzed_result['emoticons'] = sorted(list(Counter(analyzed_result['emoticons']).items()), key=lambda e: e[1],
reverse=True)
analyzed_result['photos'] = sorted(list(Counter(analyzed_result['photos']).items()), key=lambda e: e[1],
reverse=True)
analyzed_result['hour'] = list(Counter(analyzed_result['hour']).items())
return analyzed_result