From f1c19a7b048cb5d3c723cfc003deb70d2d1edbb3 Mon Sep 17 00:00:00 2001 From: Dan Haiduc Date: Thu, 28 Jan 2021 13:12:00 +0200 Subject: [PATCH] Process TracWiki links and formatting. (#4) * WIP ticket #4: TracWiki-format wiki and ticket links * Process TracWiki format: links, headings, and lists * Update readme with todo or manual edits. * Add example for nested sidebar. * Fix list formating. * Add info about GitHub meta pages. Co-authored-by: Adi Roiban --- README.rst | 18 ++- config.py.sample | 2 + test/test_wiki_trac_rst_convert.py | 200 ++++++++++++++++++++++++++-- wiki_trac_rst_convert.py | 203 ++++++++++++++++++++++++++--- 4 files changed, 392 insertions(+), 31 deletions(-) diff --git a/README.rst b/README.rst index 23fcb7e..d6dd924 100644 --- a/README.rst +++ b/README.rst @@ -14,17 +14,31 @@ Create a virtualenv:: mv config.py.sample config.py -For wiki migration:: +For wiki migration. +All pages are generated into a flat file structure. +Spaces are used instead of path separators:: python wiki_migrate.py PATH/TO/Trac.DB PATH/TO/GIT-REPO -You might want to add a `_Sidebar.rst` file in the root wit:: +You might want to add a `_Sidebar.rst` file in the root with:: * ``_ * ``_ * ``_ + + * `Services `_ + * `Machines `_ + * ``_ For wiki content conversion:: python wiki_trac_rst_convert.py PATH/TO/GIT-REPO + + +Things that are not yet auto-converted: + +* TracWiki 3rd level heading `=== Some sub-section ===` +* Sub-pages listing macro `[[TitleIndex(Development/)]]` +* Local table of content `[[PageOutline]]` +* Manually create _Sidebar.rst and _Footer.rst GitHub wiki meta-pages. diff --git a/config.py.sample b/config.py.sample index b18b1e4..87ee7f7 100644 --- a/config.py.sample +++ b/config.py.sample @@ -2,3 +2,5 @@ USER_MAPPING = { 'adi': ('adiroiban', 'Adi Roiban '), } + +TRAC_TICKET_PREFIX = 'https://trac.chevah.com/ticket/' diff --git a/test/test_wiki_trac_rst_convert.py b/test/test_wiki_trac_rst_convert.py index 0f4ef86..654dd84 100644 --- a/test/test_wiki_trac_rst_convert.py +++ b/test/test_wiki_trac_rst_convert.py @@ -3,18 +3,21 @@ from wiki_trac_rst_convert import convert_content -class TracRstToVanillaRst(unittest.TestCase): +class TracToGitHubRST(unittest.TestCase): """ - Test conversion of content from Trac-flavored reStructuredText to - vanilla reStructuredText, that is supported by GitHub + Test conversion of content from Trac-flavored reStructuredText and + TracWiki markup, into reStructuredText that is supported by GitHub """ def assertConvertedContent(self, expected: str, source: str): """ Run the Trac RST `source` through the converter, and assert that the output equals `expected`. + + Also expects the content to end with a newline. The newline is + added here for convenience and readability of test cases. """ - self.assertEqual(expected, convert_content(source)) + self.assertEqual(expected + '\n', convert_content(source)) def test_empty(self): """ @@ -25,13 +28,13 @@ def test_empty(self): saying "No newline at end of file". https://stackoverflow.com/a/729795/235463 """ - self.assertConvertedContent('\n', '') + self.assertConvertedContent('', '') def test_newline(self): """ A newline will not get appended another newline. """ - self.assertConvertedContent('\n', '\n') + self.assertConvertedContent('', '\n') def test_removes_rst_wrapping(self): """ @@ -39,9 +42,9 @@ def test_removes_rst_wrapping(self): {{{ #!rst }}} markers. It removes the TracWiki RST armor markup from the output. """ - self.assertConvertedContent('\n', '{{{#!rst}}}') + self.assertConvertedContent('', '{{{#!rst}}}') self.assertConvertedContent( - '\n', + '', '\n' '{{{\n' '#!rst' @@ -53,7 +56,7 @@ def test_does_not_strip_content(self): Both RST and non-RST content is preserved, after stripping the markers. """ self.assertConvertedContent( - 'some RST content and some non-RST content\n', + 'some RST content and some non-RST content', '{{{#!rst some RST content}}} and some non-RST content' ) @@ -63,7 +66,7 @@ def test_trac_rst_wiki_link(self): """ self.assertConvertedContent( - '``_\n', + '``_', ':trac:`wiki:Requirements`' ) @@ -74,7 +77,7 @@ def test_trac_rst_wiki_link_to_page_in_subdir(self): """ self.assertConvertedContent( - '``_\n', + '``_', ':trac:`wiki:General/FreeSoftwareUsage`' ) @@ -85,7 +88,7 @@ def test_trac_rst_wiki_reverse_link(self): """ self.assertConvertedContent( - '``_\n', + '``_', '`wiki:Infrastructure/Services/LAN#services`:trac:' ) @@ -99,7 +102,7 @@ def test_several_trac_wiki_rst_links_with_content(self): '* ``_\n' '* Some content\n' '* ``_' - ' List of free software used by Chevah Project.\n', + ' List of free software used by Chevah Project.', '* :trac:`wiki:Requirements`\n' '* Some content\n' @@ -107,6 +110,177 @@ def test_several_trac_wiki_rst_links_with_content(self): ' List of free software used by Chevah Project.' ) + def test_tracwiki_general_link(self): + """ + Process general links from TracWiki format to plain RST links + """ + self.assertConvertedContent( + '`Buildbot `_', + '[https://chevah.com/buildbot/ Buildbot]' + ) + + def test_tracwiki_wiki_link(self): + """ + Process wiki links from TracWiki format to GitHub-compatible + RST wiki links. + + There are various combinations of: + * link text different from article name + * link text the same as article name, and + * no link text, only article name + """ + self.assertConvertedContent( + '`Project management and administration `_', + '[wiki:Administrative Project management and administration]' + ) + self.assertConvertedContent( + '``_', + '[wiki:Administrative Administrative]' + ) + self.assertConvertedContent( + '``_', + '[wiki:"Administrative"]' + ) + self.assertConvertedContent( + '``_', + '[wiki:"Administrative/AllHandMeeting/Past"]' + ) + self.assertConvertedContent( + '``_', + '`[wiki:Infrastructure/Services/FileServer]`:trac:' + ) + self.assertConvertedContent( + '`Overton `_', + '`[wiki:Infrastructure/Machines/Overton Overton]`:trac:' + ) + + def test_trac_ticket(self): + """ + Trac ticket references are converted to a hyperlink. + This use case requires `config.py` with the following setting: + + TRAC_TICKET_PREFIX = 'https://trac.chevah.com/ticket/' + """ + self.assertConvertedContent( + '`Trac #738 `_', + ':trac:`#738`' + ) + + def test_heading(self): + """ + Converts headings to RST, which have an equal-sign-underline. + Also handles the multiline case. + + Headings in TracWiki have single equal signs around them. + """ + self.assertConvertedContent( + 'Heading\n' + '=======', + + '= Heading =' + ) + self.assertConvertedContent( + 'Policy and Process\n' + '==================\n' + '\n' + 'Some text', + + '= Policy and Process =\n' + '\n' + 'Some text' + ) + + def test_subheading(self): + """ + Converts subheadings to RST, which have a dash-underline. + Also handle the multiline case. + + Subheadings in TracWiki have double equal signs around them. + """ + self.assertConvertedContent( + 'Subheading\n' + '----------', + + '== Subheading ==' + ) + self.assertConvertedContent( + 'Subheading\n' + '----------\n' + '\n' + 'Some text', + + '== Subheading ==\n' + '\n' + 'Some text' + ) + + def test_list_indented(self): + """ + Un-indents list items that are indented by one space exactly. + + This is to avoid RST interpreting lists indented by one space + as quotations; we want them as unquoted lists instead. + """ + self.assertConvertedContent( + "* item 1\n" + "* item 2\n" + "* item 3", + + " * item 1\n" + " * item 2\n" + " * item 3" + ) + + def test_list_after_paragraph(self): + """ + Separates lists from paragraphs by one empty line. + + In RST, when lists follow a paragraph without an empty line + inbetween, they fail to parse as lists. + """ + self.assertConvertedContent( + "Paragraph\n\n" + "* item 1\n" + "* item 2\n" + "* item 3", + + "Paragraph\n" + "* item 1\n" + "* item 2\n" + "* item 3" + ) + + def test_list_after_paragraph_idempotent(self): + """ + Does not add another line when there is already a line between + a list and the paragraph before it. + """ + + self.assertConvertedContent( + "Paragraph\n\n" + "* item 1\n" + "* item 2\n" + "* item 3", + + "Paragraph\n\n" + "* item 1\n" + "* item 2\n" + "* item 3" + ) + + def test_bold_is_not_list(self): + """ + Italic text markup is preserved as in the TracWiki format. + """ + + self.assertConvertedContent( + "Paragraph\n" + "*italic text*", + + "Paragraph\n" + "*italic text*", + ) + if __name__ == '__main__': unittest.main() diff --git a/wiki_trac_rst_convert.py b/wiki_trac_rst_convert.py index 2d6742b..d83b7fd 100644 --- a/wiki_trac_rst_convert.py +++ b/wiki_trac_rst_convert.py @@ -3,6 +3,8 @@ import sys import os +from config import TRAC_TICKET_PREFIX + def main(): """ @@ -38,42 +40,211 @@ def convert_content(text: str): * Remove RST wrapping * Convert Trac wiki directives to GitHub wiki links. + * Convert TracWiki headings, subheadings, and lists to RST. """ to_remove = ['{{{', '#!rst', '}}}'] for seq in to_remove: text = text.replace(seq, '') text = text.strip() + '\n' - text = _trac_rst_wiki_to_github_links(text) + text = _trac_to_github_wiki_links(text) + text = _tracwiki_to_rst_links(text) + text = _tracwiki_wiki_link_with_text_to_github_links(text) + text = _trac_ticket_links(text) + text = _tracwiki_heading_to_rst_heading(text) + text = _tracwiki_subheading_to_rst_subheading(text) + text = _tracwiki_list_dedent(text) + text = _tracwiki_list_separate_from_paragraph(text) return text -def _trac_rst_wiki_to_github_links(text: str): +def _trac_to_github_wiki_links(text: str): """ - Takes RST content with Trac wiki link directives - and coverts the directives to inline GitHub wiki links. + Takes content with Trac wiki link directives and coverts + the directives to inline GitHub wiki links. """ - link_matchers =[re.compile(r) for r in [ + link_matchers = [ + # RST markup: ':trac:`wiki:(.+?)`', - '`wiki:(.+?)`:trac:' - ]] + '`wiki:(.+?)`:trac:', + + # TracWiki markup: + '`\[wiki:"?([^ ]+?)"?]`:trac:', + '\[wiki:"?([^ ]+?)"?]', + ] for link_re in link_matchers: - wiki_titles = re.findall(link_re, text) - for title in wiki_titles: - text = re.sub( - link_re, - rf'`<{_wiki_url(title)}>`_', - text, - 1 - ) + for title in _matches(link_re, text): + text = _sub(link_re, f'`<{_wiki_url(title)}>`_', text) + + return text + + +def _tracwiki_to_rst_links(text: str): + """ + Takes TracWiki markup and converts its links to RST links. + """ + + url = '[a-z]+://[^ ]+' + link_text = '[^]]+' + link_re = f'\[({url}) ({link_text})]' + + for url, link_text in _matches(link_re, text): + text = _sub(link_re, f'`{link_text} <{url}>`_', text) + + return text + + +def _tracwiki_wiki_link_with_text_to_github_links(text: str): + """ + Takes TracWiki markup and converts its Wiki links which have + explicit link text into RST links. + If the link text is the same as the article name, generate a more + compact syntax. + """ + + title = '[^ ]+' + link_text = '[^]]+' + + link_matchers = [ + f'`\[wiki:({title}) ({link_text})]`:trac:', + f'\[wiki:({title}) ({link_text})]', + ] + + for link_re in link_matchers: + for title, link_text in _matches(link_re, text): + if title == link_text: + text = _sub(link_re, f'`<{_wiki_url(title)}>`_', text) + else: + replacement = f'`{link_text} <{_wiki_url(title)}>`_' + text = _sub(link_re, replacement, text) + + return text + + +def _trac_ticket_links(text: str): + """ + Replace Trac reference to ticket with an RST link to the ticket. + """ + + ticket_re = ':trac:`#([0-9]+)`' + for ticket in _matches(ticket_re, text): + text = _sub( + ticket_re, + f'`Trac #{ticket} <{TRAC_TICKET_PREFIX}{ticket}>`_', + text + ) + return text + + +def _tracwiki_heading_to_rst_heading(text: str): + """ + Convert TracWiki 1st level headings to RST heading. + + TracWiki: + + = Some Top Heading = + Content here + + RST conversion: + + Some Top Heading + ================ + + Content here + """ + heading_re = '^= (.*) =$' + for match in _matches(heading_re, text): + text = _sub(heading_re, _underline(match, '='), text) return text -def _wiki_url(title): +def _tracwiki_subheading_to_rst_subheading(text: str): + """ + Convert TracWiki 2nd level headings to RST heading. + + TracWiki: + + == Some 2nd Heading == + Content here + + RST conversion: + + Some 2nd Heading + ---------------- + Content here + """ + heading_re = '^== (.*) ==$' + for match in _matches(heading_re, text): + text = _sub(heading_re, _underline(match, '-'), text) + + return text + + +def _tracwiki_list_dedent(text: str): + """ + Remove a space before a list item, if exactly one space is + before the asterisk. + """ + + indented_list_item_re = '^ \* ' + for _ in _matches(indented_list_item_re, text): + text = _sub(indented_list_item_re, '* ', text) + + return text + + +def _tracwiki_list_separate_from_paragraph(text: str): + """ + During conversion from TracWiki to RST, ensure an empty line + between each non-list-item and the list item following it, if any. + """ + + lines = text.split('\n') + newlines = [] + was_list_item_or_blank = True + + for l in lines: + is_list_item = re.match('^ *\* .*', l) + if is_list_item: + if not was_list_item_or_blank: + newlines.append('') + was_list_item_or_blank = True + else: + is_empty = l.strip() == '' + was_list_item_or_blank = is_empty + newlines.append(l) + + return '\n'.join(newlines) + + +def _underline(text: str, line_symbol: str): + """ + Add a line made of `line_symbol` after given `text`, + and return new text. + """ + return text + "\n" + line_symbol * len(text) + + +def _matches(pattern: str, text: str): + """ + Return all matches of a particular `pattern` occurring in `text`. + """ + return re.findall(pattern, text, flags=re.MULTILINE) + + +def _sub(regex: str, replacement: str, text: str): + """ + Substitute one occurrence of `regex` in `text` with `replacement`. + Return the resulting new text. + """ + return re.sub(regex, replacement, text, count=1, flags=re.MULTILINE) + + +def _wiki_url(title: str): """ GitHub Wiki collapses directory structure.