forked from ptwikis/wikiloves
-
Notifications
You must be signed in to change notification settings - Fork 6
/
configuration.py
81 lines (67 loc) · 2.12 KB
/
configuration.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/python
# -*- coding: utf-8 -*-
import json
import re
from urllib.request import urlopen
from functions import EVENTS
def reData(txt, year):
"""
Parser para linha da configuração
"""
events = "|".join(EVENTS)
regex = r"""
\s*wl\["(?P<event>%s)"\]\[(?P<year>20\d\d)]\ ?=\ ?\{|
\s*\["(?P<country>[-a-z]+)"\]\ =\ \{\["start"\]\ =\ (?P<start>%s\d{10}),\ \["end"\]\ =\ (?P<end>%s\d\d{10})\}
""" % (
events,
year,
str(year)[:3],
)
m = re.search(regex, txt, re.X)
return m and m.groupdict()
def re_prefix(txt):
return re.search(
r'\s*\["(?P<prefix>[\w-]+)"\] = "(?P<name>[\w\-\' ]+)"|(?P<close>\})',
txt,
re.UNICODE,
)
def get_config_from_commons(page):
api = urlopen(
"https://commons.wikimedia.org/w/api.php?action=query&format=json&prop=revisions&titles=%s&rvprop=content"
% page
)
text = list(json.loads(api.read())["query"]["pages"].values())[0]["revisions"][0][
"*"
]
return str(text)
def parse_config(text):
data, event, prefixes = {}, None, {}
lines = iter(text.split("\n"))
for line in lines:
m = re_prefix(line)
if prefixes and m and m.group("close"):
break
elif m and m.group("prefix"):
prefixes[m.group("prefix")] = m.group("name")
for line in lines:
g = reData(line, event[-4:] if event else r"20\d\d")
if not g:
continue
if g["event"]:
event = g["event"] + g["year"]
data[event] = {}
elif g["country"] and event:
if g["country"] not in prefixes:
# updateLog.append(u'Unknown prefix: ' + g['country'])
continue
data[event][prefixes[g["country"]]] = {
"start": int(g["start"]),
"end": int(g["end"]),
}
return {name: config for name, config in list(data.items()) if config}
def getConfig(page):
"""
Lê a configuração da página de configuração no Commons
"""
text = get_config_from_commons(page)
return parse_config(text)