-
Notifications
You must be signed in to change notification settings - Fork 1
/
library.py
131 lines (120 loc) · 3.06 KB
/
library.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
def ls_get_all_locale():
return '''
arb-Arab-ZZ
cmn-Hans-CN
cmn-Latn-CN
dan-Latn-DK
deu-Latn-DE
eng-Latn-US
fin-Latn-FI
fra-Latn-FR
heb-Hebr-IL
hin-Deva-IN
ita-Latn-IT
jpn-Hrkt-JP
jpn-Jpan-JP
kor-Hang-KR
kor-Kore-KR
nld-Latn-NL
nob-Latn-NO
pes-Aran-IR
por-Latn-PT
rus-Cyrl-RU
swe-Latn-SE
spa-Latn-ES
tur-Latn-TR
zxx-Zmth-ZZ
'''.strip('\n').split('\n')
def s_get_principal_locale():
return 'eng-Latn-US'
def ls_get_separator_for_principal_locale():
return [', ', '; ']
def s_get_subject_header():
return 'locale\tsubject'
def s_get_concept_header():
return 'locale\tconcept\tprerequisite'
def s_get_content_directory():
return 'content'
def s_get_synonym_separator(s_locale, s_text):
s_language, s_writing_system, s_region = s_locale.split('-')
if s_writing_system in ['Arab', 'Aran']:
return '، '
elif s_writing_system == 'Bamu':
return '꛵ '
elif s_writing_system in ['Bopo', 'Hanb', 'Hani', 'Hans', 'Hant', 'Hrkt']:
if ',' in s_text:
return ','
return '、'
elif s_writing_system == 'Ethi':
return '፣ '
elif s_writing_system == 'Hmng':
return '𖬹 '
elif s_writing_system == 'Lisu':
return '꓾ '
elif s_writing_system == 'Medf':
return '𖺗 '
elif s_writing_system == 'Mong':
if '᠈' in s_text:
return '᠈ '
return '᠂ '
elif s_writing_system == 'Newa':
return '𑑍 '
elif s_writing_system == 'Nkoo':
return '߸ '
elif s_writing_system == 'Sgnw':
return '𝪇 '
elif s_writing_system == 'Tibt':
return '༔ '
elif s_writing_system == 'Vaii':
return '꘍ '
return ', '
def s_get_nonsynonym_separator(s_locale, s_text):
s_language, s_writing_system, s_region = s_locale.split('-')
if s_writing_system in ['Arab', 'Aran']:
if '⁏' in s_text:
return '⁏ '
return '؛ '
elif s_writing_system == 'Armn':
return '․ '
elif s_writing_system == 'Bamu':
return '꛶ '
elif s_writing_system in ['Bopo', 'Hanb', 'Hani', 'Hans', 'Hant', 'Hrkt', 'Jpan']:
return ';'
elif s_writing_system == 'Grek':
return '· '
elif s_writing_system == 'Ethi':
return '፤ '
elif s_writing_system == 'Sgnw':
return '𝪉'
return '; '
def s_get_coupled_phonetic_writing_system_for_logogram(s_locale):
s_language, s_writing_system, s_region = s_locale.split('-')
if s_language in ['cdo', 'cjy', 'cmn', 'cpx', 'cnp', 'csp', 'czh', 'czo', 'gan', 'hak', 'hsn', 'mnp', 'nan', 'wuu', 'yue']:
if s_region == 'TW' and s_language == 'cmn':
return 'Bopo'
return 'Latn'
elif s_language in ['ams', 'jpn', 'kzg', 'mvi', 'okn', 'tkn', 'rvn', 'ryu', 'rys', 'xug', 'yoi', 'yox']:
return 'Hrkt'
elif s_language in ['jje', 'kor']:
return 'Hang'
elif s_language == 'vie':
return 'Latn'
return ''
def ls_get_separated_element(s_separator, s_entry):
if '{' in s_entry:
s_escapement = ['{', '}']
else:
s_escapement = ['{', '}']
ls_text = s_entry.split(s_separator)
ls_entry = []
b_escape = False
for s_text in ls_text:
if b_escape:
ls_entry[-1] += s_separator + s_text
else:
ls_entry.append(s_text)
if s_escapement[0] in s_text:
b_escape = True
elif s_escapement[1] in s_text:
b_escape = False
return ls_entry