-
Notifications
You must be signed in to change notification settings - Fork 0
/
huanglitools.py
72 lines (62 loc) · 2.59 KB
/
huanglitools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import hashlib
import json
import re
from random import Random
from urllib.parse import urlencode
class HuangLi:
@staticmethod
def get_merged_data(file: str):
with open(file) as fle:
return eval(fle.readlines()[0])
@staticmethod
def get_word_bank(file: str) -> dict:
with open(file) as fle:
return eval(fle.readlines()[0])
@staticmethod
def get_templates(file: str) -> list:
ret = []
with open(file) as fle:
for line in fle.readlines():
line = line.strip()
if not line or line.startswith('#'):
continue
ret.append(line)
return ret
@staticmethod
def get_noref_words(file: str) -> set:
with open(file) as fle:
return eval(fle.readlines()[0])
def __init__(self, merged_json='merged.json', use_merged_data_py=False, merged_data_file='merged.txt', words_file='wordbank.txt', templates_file='templates.txt',
noref_file='noref.txt'):
self.words = {}
self.templates = []
self.noref_words = set()
if merged_json:
words, noref_words_list, templates = {}, [], []
with open(merged_json) as fle:
words, noref_words_list, templates = json.load(fle)
self.words, self.noref_words, self.templates = words, set(noref_words_list), templates
elif use_merged_data_py:
import merged_data
self.words, self.noref_words, self.templates = merged_data.words, merged_data.noref_words, merged_data.templates
elif merged_data_file:
self.words, self.noref_words, self.templates = HuangLi.get_merged_data(merged_data_file)
else:
self.words = HuangLi.get_word_bank(words_file)
self.templates = HuangLi.get_templates(templates_file)
self.noref_words = HuangLi.get_noref_words(noref_file)
def calculate(self, data) -> str:
random = Random(hashlib.md5(bytes(str(data), 'utf-8')).digest())
def replacer(match):
s = match.group(0)[1:-1]
split = s.split(',')
topic = random.choice(split)
topic_words = self.words[topic]
word = random.choice(topic_words)
if word not in self.noref_words:
return '[{}](https://www.google.com/search?{})'.format(word, urlencode(
{'q': '"{}" site:cuhk.edu.cn'.format(word)}))
else:
return word
template = random.choice(self.templates)
return re.sub(r'<\w+(,\w+)*>', replacer, template)