-
Notifications
You must be signed in to change notification settings - Fork 0
/
reviewparser.py
36 lines (30 loc) · 1.32 KB
/
reviewparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from xml.dom.minidom import parse
import xml.dom.minidom
from unidecode import unidecode
import re
def parseit(file):
rp = reviewparser()
return rp.parse(file)
class reviewparser:
def __init__(self):
self.opinion=""
self.thumbsup=0
self.thumbsdown=0
self.stars=0
self.user=""
self.category=""
self.evaluation_date=""
self.recommends=""
def parse(self, file):
DOMTree = xml.dom.minidom.parse(file)
review = DOMTree.documentElement
self.opinion = review.getElementsByTagName("opinion")[0].childNodes[0].data
self.thumbsup = review.getElementsByTagName("thumbsUp")[0].getAttribute("value")
self.thumbsdown = review.getElementsByTagName("thumbsDown")[0].getAttribute("value")
self.stars = review.getElementsByTagName("stars")[0].getAttribute("value")
self.user = unidecode(review.getElementsByTagName("user")[0].getAttribute("value"))
self.user = re.sub('[^0-9a-zA-Z]+', '', self.user)
self.category = unidecode(review.getElementsByTagName("category")[0].getAttribute("value"))
self.evaluation_date = review.getElementsByTagName("evaluation_date")[0].getAttribute("value")
self.recommends = review.getElementsByTagName("recommends")[0].getAttribute("value")
return self