-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwiki_trac_rst_convert.py
302 lines (224 loc) · 7.18 KB
/
wiki_trac_rst_convert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
# Convert local files formated in Trac Wiki RST to GitHub RST.
import re
import sys
import os
try:
import config
except ModuleNotFoundError:
# In the tests, we monkeypatch this module.
config = None
def main():
"""
Do the job.
"""
if len(sys.argv) != 2:
print("Need to pass the path to wiki base directory.")
sys.exit(1)
for root, _, files in os.walk(sys.argv[1]):
for name in files:
convert_file(os.path.join(root, name))
print('Conversion complete.')
def convert_file(path: str):
"""
In-place conversion of files; no backup.
"""
if _is_rst_file(path):
print('Converting ', path)
with open(path) as f:
text = f.read()
with open(path, 'w') as f:
f.write(convert_content(text))
def _is_rst_file(path: str):
"""
Returns `True` if path looks like a ReStructuredText file.
"""
path = path.lower()
return path.endswith('rst') or path.endswith('rest')
def convert_content(text: str):
"""
Convert from Trac wiki RST format to GitHub RST format.
* Remove RST wrapping
* Convert Trac wiki directives to GitHub wiki links.
* Convert TracWiki headings, subheadings, and lists to RST.
"""
to_remove = ['{{{', '#!rst', '}}}']
for seq in to_remove:
text = text.replace(seq, '')
text = _remove_pageoutline(text)
text = _remove_rst_contents(text)
text = text.strip() + '\n'
text = _ensure_rst_content_directive(text)
text = _trac_to_github_wiki_links(text)
text = _tracwiki_to_rst_links(text)
text = _tracwiki_wiki_link_with_text_to_github_links(text)
text = _trac_ticket_links(text)
text = _tracwiki_heading_to_rst_heading(text)
text = _tracwiki_subheading_to_rst_subheading(text)
text = _tracwiki_list_dedent(text)
text = _tracwiki_list_separate_from_paragraph(text)
return text
def _remove_pageoutline(text: str):
"""
Remove any TracWiki PageOutline directives
"""
return text.replace('[[PageOutline]]', '')
def _remove_rst_contents(text: str):
"""
Remove any RST `contents` directives
"""
directive = r'\.\.(\ +)contents::\n'
return re.sub(directive, '', text)
def _ensure_rst_content_directive(text: str):
"""
Ensures a `contents` directive at the top of every document.
"""
return (
'.. contents::\n'
'\n' +
text
)
def _trac_to_github_wiki_links(text: str):
"""
Takes content with Trac wiki link directives and coverts
the directives to inline GitHub wiki links.
"""
link_matchers = [
# RST markup:
':trac:`wiki:(.+?)`',
'`wiki:(.+?)`:trac:',
# TracWiki markup:
r'`\[wiki:"?([^ ]+?)"?]`:trac:',
r'\[wiki:"?([^ ]+?)"?]',
]
for link_re in link_matchers:
for title in matches(link_re, text):
text = sub(link_re, f'`<{_wiki_url(title)}>`_', text)
return text
def _tracwiki_to_rst_links(text: str):
"""
Takes TracWiki markup and converts its links to RST links.
"""
url = '[a-z]+://[^ ]+'
link_text = '[^]]+'
link_re = rf'\[({url}) ({link_text})]'
for url, link_text in matches(link_re, text):
text = sub(link_re, f'`{link_text} <{url}>`_', text)
return text
def _tracwiki_wiki_link_with_text_to_github_links(text: str):
"""
Takes TracWiki markup and converts its Wiki links which have
explicit link text into RST links.
If the link text is the same as the article name, generate a more
compact syntax.
"""
title = '[^ ]+'
link_text = '[^]]+'
link_matchers = [
rf'`\[wiki:({title}) ({link_text})]`:trac:',
rf'\[wiki:({title}) ({link_text})]',
]
for link_re in link_matchers:
for title, link_text in matches(link_re, text):
if title == link_text:
text = sub(link_re, f'`<{_wiki_url(title)}>`_', text)
else:
replacement = f'`{link_text} <{_wiki_url(title)}>`_'
text = sub(link_re, replacement, text)
return text
def _trac_ticket_links(text: str):
"""
Replace Trac reference to ticket with an RST link to the ticket.
"""
ticket_re = ':trac:`#([0-9]+)`'
for ticket in matches(ticket_re, text):
text = sub(
ticket_re,
f'`Trac #{ticket} <{config.TRAC_TICKET_PREFIX}{ticket}>`_',
text
)
return text
def _tracwiki_heading_to_rst_heading(text: str):
"""
Convert TracWiki 1st level headings to RST heading.
TracWiki:
= Some Top Heading =
Content here
RST conversion:
Some Top Heading
================
Content here
"""
heading_re = '^= (.*) =$'
for match in matches(heading_re, text):
text = sub(heading_re, _underline(match, '='), text)
return text
def _tracwiki_subheading_to_rst_subheading(text: str):
"""
Convert TracWiki 2nd level headings to RST heading.
TracWiki:
== Some 2nd Heading ==
Content here
RST conversion:
Some 2nd Heading
----------------
Content here
"""
heading_re = '^== (.*) ==$'
for match in matches(heading_re, text):
text = sub(heading_re, _underline(match, '-'), text)
return text
def _tracwiki_list_dedent(text: str):
"""
Remove a space before a list item, if exactly one space is
before the asterisk.
"""
indented_list_item_re = r'^ \* '
for _ in matches(indented_list_item_re, text):
text = sub(indented_list_item_re, '* ', text)
return text
def _tracwiki_list_separate_from_paragraph(text: str):
"""
During conversion from TracWiki to RST, ensure an empty line
between each non-list-item and the list item following it, if any.
"""
lines = text.split('\n')
newlines = []
was_list_item_or_blank = True
for l in lines:
is_list_item = re.match(r'^ *\* .*', l)
if is_list_item:
if not was_list_item_or_blank:
newlines.append('')
was_list_item_or_blank = True
else:
is_empty = l.strip() == ''
was_list_item_or_blank = is_empty
newlines.append(l)
return '\n'.join(newlines)
def _underline(text: str, line_symbol: str):
"""
Add a line made of `line_symbol` after given `text`,
and return new text.
"""
return text + "\n" + line_symbol * len(text)
def matches(pattern: str, text: str):
"""
Return all matches of a particular `pattern` occurring in `text`.
"""
return re.findall(pattern, text, flags=re.MULTILINE)
def sub(regex: str, replacement: str, text: str):
"""
Substitute one occurrence of `regex` in `text` with `replacement`.
Return the resulting new text.
"""
return re.sub(regex, replacement, text, count=1, flags=re.MULTILINE)
def _wiki_url(title: str):
"""
GitHub Wiki collapses directory structure.
After `wiki_migrate.py` replaced the path separator `/` with space,
GitHub converts spaces to dashes in the URLs.
Therefore, the original Trac links must get dashes as well.
"""
return title.replace('/', '-')
if __name__ == '__main__':
main()