diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 847c9c0..3a3183d 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -23,14 +23,10 @@ jobs: - name: Install dependencies run: | python -m pip install pip==20.3.1 - pip install flake8==3.8.4 pytest==6.1.2 + pip install flake8==3.8.4 pytest==6.1.2 requests==2.26.0 - name: Lint with flake8 run: | - # stop the build if there are Python syntax errors or undefined names flake8 - - name: Prepare config - run: | - cp config.py.sample config.py - name: Test with pytest run: | pytest diff --git a/.gitignore b/.gitignore index 47686e6..18ee7d4 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,11 @@ config-chevah.py *.psql *.db3 +# Trac migration output +tickets_expected.tsv +tickets_created.tsv +comments_created.tsv + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/README.rst b/README.rst index 5f3f42c..57b1eff 100644 --- a/README.rst +++ b/README.rst @@ -5,14 +5,18 @@ Hacks used to migrate from Trac sqlite or Postgresql dump to GitHub. Works with Python 3.8. +# Wiki migration + For wiki migration, you will need git available in your dev environment. -This is a 2 stage process: +This is a 3 stage process: + +1. Copy `config.py.sample` over `config.py`, and edit all the settings. -1. Create the GitHub Wiki pages using content formated as TracWiki. +2. Create the GitHub Wiki pages using content formated as TracWiki. This is done to have better diffs between historic versions. -2. Convert the last version of the each page to ReStructuredText, +3. Convert the last version of the each page to ReStructuredText, or to any other format. @@ -58,3 +62,14 @@ Things that are not yet auto-converted: * Sub-pages listing macro `[[TitleIndex(Development/)]]` * Local table of content `[[PageOutline]]` * Manually create _Sidebar.rst and _Footer.rst GitHub wiki meta-pages. + +# Ticket migration + +1. Copy `config.py.sample` over `config.py`, and edit all the settings. +2. Get the latest `projects_created.tsv` to avoid duplicating projects. +3. Modify `select_tickets` to your liking. +4. If you are sure you want to create tickets, change `DRY_RUN` to `False` + in `ticket_migrate.py`. +5. Run `./ticket_migrate.py ../trac.db`, where `../trac.db` is the path + to the Trac SQLite DB dump. + diff --git a/config.py.sample b/config.py.sample index d8bbf09..12a1f9b 100644 --- a/config.py.sample +++ b/config.py.sample @@ -1,6 +1,7 @@ -# Trac User to GitHub user mapping +# Trac User to GitHub user mapping. USER_MAPPING = { 'adi': ('adiroiban', 'Adi Roiban '), + 'danuker': ('danuker', 'Dan Haiduc '), } TRAC_TICKET_PREFIX = 'https://trac.chevah.com/ticket/' @@ -10,3 +11,22 @@ TRAC_TICKET_PREFIX = 'https://trac.chevah.com/ticket/' DEFAULT_GITHUB_USER = None FILE_EXTENSION = '.mediawiki' + +# Trac ticket Component to GitHub repository mapping. +REPOSITORY_MAPPING = { + 'client': 'client', + 'commons': 'commons', + 'trac-migration-staging': 'trac-migration-staging', + } + +# GitHub repository for Trac tickets with Component not in the mapping. +FALLBACK_REPOSITORY = 'server' + +# Owner of GitHub repositories where to create issues. +OWNER = 'chevah' + +# The user to create the issues through the API. +# Create a token with `repo` permissions here: +# https://github.com/settings/tokens +OAUTH_USER = 'danuker' +OAUTH_TOKEN = 'ghp_qwertyuiop' diff --git a/config_test.py b/config_test.py new file mode 100644 index 0000000..7335ac8 --- /dev/null +++ b/config_test.py @@ -0,0 +1,28 @@ +# Trac User to GitHub user mapping. +# This is the configuration used by the test suite. + +USER_MAPPING = { + 'adi': ('adiroiban', 'Adi Roiban '), + 'danuker': ('danuker', 'Dan Haiduc '), + } + +TRAC_TICKET_PREFIX = 'https://trac.chevah.com/ticket/' + +# Trac ticket Component to GitHub repository mapping. +REPOSITORY_MAPPING = { + 'client': 'client', + 'commons': 'commons', + 'trac-migration-staging': 'trac-migration-staging', + } + +# GitHub repository for Trac tickets with Component not in the mapping. +FALLBACK_REPOSITORY = 'server' + +# Owner of GitHub repositories where to create issues. +OWNER = 'chevah' + +# The user to create the issues through the API. +# Create a token with `repo` permissions here: +# https://github.com/settings/tokens +OAUTH_USER = 'danuker' +OAUTH_TOKEN = 'ghp_qwertyuiop' diff --git a/link_issues.py b/link_issues.py new file mode 100755 index 0000000..6241ed3 --- /dev/null +++ b/link_issues.py @@ -0,0 +1,330 @@ +#!/usr/bin/env python3 + +# Comment on the issues with the PR code link, +# so that the GH PR page backlinks to the issue. + +# This is done with the official API, not the bulk API, +# because the bulk API triggers no backlinks. +import datetime +import pprint +import re + +import requests +import sqlite3 +import sys +import time + +try: + import config +except ModuleNotFoundError: + # In the tests, we monkeypatch this module. + config = None +# Set to False to perform actual GitHub issue creation. +DRY_RUN = True + + +def main(): + """ + Read the Trac DB and tickets on GitHub, + and link from them to their PRs. + """ + tickets = list(select_tickets(read_trac_tickets())) + ticket_mapping = get_tickets() + + # Parse tickets into GitHub CommentRequest objects. + comments = list(CommentRequest.fromTracDataMultiple( + tickets, ticket_mapping=ticket_mapping + )) + + print("Issues parsed. Starting to submit comments.") + + for comment in comments: + print(f"Linking GH {comment.getGitHubLink()}") + comment.submit_link_to_pr() + + print("Issue creation complete. You may now manually open issues and PRs.") + + +def select_tickets(tickets): + """ + Easy-to-edit method to choose tickets to submit. + Checks that the `t_id` is not in `tickets_created.tsv`. + Useful for creating tickets in multiple rounds. + """ + # Skip tickets that have NOT already been created. + # Only comment on already created tickets. + submitted_ids = get_tickets().keys() + tickets = [t for t in tickets if t['t_id'] in submitted_ids] + tickets = [t for t in tickets if t['t_id'] == 2936] + + # Only comment on tickets which have a branch (PR linked from Trac). + tickets = [t for t in tickets if t['branch']] + + # Skip tickets where we have already linked the PR. + tickets = [ + t for t in tickets + if t['t_id'] not in get_tickets('links_created.tsv')] + + # Skip tickets created with the classic API which linked the PRs. + return [t for t in tickets if t['component'] != 'pr'] + + return tickets + + +def get_tickets(filename='tickets_created.tsv'): + """ + Reads the tickets_create.tsv, and returns a dictionary of + Trac ID -> GitHub URL of tickets that were sent to GitHub already. + """ + created_tickets = {} + with open(filename) as f: + for line in f: + if line.startswith(config.TRAC_TICKET_PREFIX): + trac_link, github_url = line.strip().split('\t') + trac_id = trac_link.split(config.TRAC_TICKET_PREFIX)[1] + trac_id = int(trac_id) + created_tickets[trac_id] = github_url + return created_tickets + + +def read_trac_tickets(): + """ + Read the Trac ticket data from the database, and generate dicts. + """ + db = get_db() + + # Only take the last branch change. + # For example, https://trac.chevah.com/ticket/85 has multiple changes, + # and the last one is to GH PR 238. + # We use GROUP BY because SQLite has no DISTINCT ON. + # https://www.sqlite.org/quirks.html#aggregate_queries_can_contain_non_aggregate_result_columns_that_are_not_in_the_group_by_clause + for row in db.execute( + """\ + SELECT * + FROM + (SELECT * + FROM ticket + LEFT JOIN ticket_change ON ticket.id = ticket_change.ticket + AND ticket_change.field = 'branch' + AND ticket_change.newvalue LIKE '%github%' + ORDER BY ticket.id, + ticket_change.time DESC) + GROUP BY id; + """): + ( + t_id, + t_type, + time, + changetime, + component, + severity, + priority, + owner, + reporter, + cc, + version, + milestone, + status, + resolution, + summary, + description, + keywords, + _ticket, + _time, + _author, + _field, + _oldvalue, + _newvalue, + ) = row + + yield { + 't_id': t_id, + 't_type': t_type, + 'time': time, + 'changetime': changetime, + 'component': component, + 'severity': severity, + 'priority': priority, + 'owner': owner, + 'reporter': reporter, + 'cc': cc, + 'version': version, + 'milestone': milestone, + 'status': status, + 'resolution': resolution, + 'summary': summary, + 'description': description, + 'keywords': keywords, + 'branch': _newvalue, + } + + +def get_db(): + """ + Return a database connection. + """ + if len(sys.argv) != 2: + print("Need to pass the path to Trac DB as argument.") + sys.exit(1) + db = sqlite3.connect(sys.argv[1]) + return db + + +class CommentRequest: + """ + Store what is needed for a comment that links an issue to a PR. + `pr_link` maps to the `branch` of a ticket read from the Trac DB. + """ + def __init__( + self, trac_id, repo, github_number, pr_link): + self.repo = repo + self.t_id = trac_id + self.github_number = github_number + self.github_pr_link = pr_link + + def submit_link_to_pr(self): + """ + Send a POST request to GitHub creating the comment. + + API Docs: + https://docs.github.com/en/rest/reference/issues#create-an-issue-comment + """ + response = protected_request( + url=self.commentsURL(), data={'body': self.commentText()} + ) + + if response: + # Remember the GitHub URL assigned to each ticket. + with open('links_created.tsv', 'a') as f: + comment_url = response.json()['html_url'] + f.write(f'{self.getTracURL(self.t_id)}\t{comment_url}\n') + + def commentText(self): + """ + Convert Trac comment data to GitHub comment body as JSON. + """ + return f'PR for trac-{self.t_id} is at {self.github_pr_link}.' + + def getGitHubLink(self): + """ + Return the GitHub URL, + given the repository name and expected GitHub issue number. + """ + return f'https://github.com/' \ + f'{config.OWNER}/{self.repo}/issues/{self.github_number}' + + @staticmethod + def getTracURL(trac_id): + """ + Return this issue's Trac URL. + """ + return config.TRAC_TICKET_PREFIX + str(trac_id) + + @classmethod + def fromTracDataMultiple(cls, trac_data, ticket_mapping): + """ + Generate `CommentRequest`s from an iterable of dicts of Trac tickets. + """ + for ticket in trac_data: + github_link = ticket_mapping[ticket['t_id']] + yield cls.fromTracData( + t_id=ticket['t_id'], + repo=get_repo(github_link), + github_number=get_github_number(github_link), + branch=ticket['branch'], + ) + + @classmethod + def fromTracData( + cls, + t_id, + repo, + github_number, + branch, + ): + """ + Create a GitHubRequest from Trac ticket data fields. + """ + issue = cls( + trac_id=t_id, + repo=repo, + github_number=github_number, + pr_link=branch, + ) + return issue + + def commentsURL(self): + return f'https://api.github.com/repos/' \ + f'{config.OWNER}/{self.repo}/issues/{self.github_number}/comments' + + +def get_repo(github_link): + """ + Given the GitHub link, return its repository. + """ + match = re.match( + f'https://github.com/{config.OWNER}/(.+)/issues/[0-9]+', + github_link, + ) + return match.groups()[0] + + +def get_github_number(github_link) -> str: + """ + Given the GitHub link, return its issue number. + """ + return github_link.rsplit('/', 1)[1] + + +def protected_request( + url, data, method=requests.post, expected_status_code=201): + """ + Send a request if DRY_RUN is not truthy. + + In case of error, start the debugger. + In case of nearing rate limit, sleep until it resets. + """ + + if DRY_RUN: + print(f"Would call {method} on {url} with data:") + pprint.pprint(data) + return + + # Obey secondary rate limit: + # https://docs.github.com/en/rest/guides/best-practices-for-integrators#dealing-with-secondary-rate-limits + time.sleep(10) + + response = method( + url=url, + headers={'accept': 'application/vnd.github.v3+json'}, + json=data, + auth=(config.OAUTH_USER, config.OAUTH_TOKEN) + ) + + if response.status_code != expected_status_code: + print('Error: POST request failed!') + print(response) + pprint.pprint(response.json()) + import pdb + pdb.set_trace() + + wait_for_rate_reset(response) + + return response + + +def wait_for_rate_reset(response): + """ + Wait for a rate limit reset in case it is near exhaustion. + """ + remaining = int(response.headers['X-RateLimit-Remaining']) + reset_time = int(response.headers['X-RateLimit-Reset']) + if remaining < 10: + to_sleep = int(1 + reset_time - time.time()) + print( + f"Waiting {to_sleep}s (until {reset_time}) for rate limit reset.") + time.sleep(to_sleep) + + +if __name__ == '__main__': + main() diff --git a/test/test_link_issues.py b/test/test_link_issues.py new file mode 100644 index 0000000..fac0094 --- /dev/null +++ b/test/test_link_issues.py @@ -0,0 +1,66 @@ +import unittest + +import config_test +import link_issues as li + +# Monkeypatch the SUT to use the test config. +li.config = config_test + + +class TestCommentRequest(unittest.TestCase): + """ + `CommentRequest` objects are created from Trac data. + """ + def test_commentJSON(self): + """ + Creates a comment JSON fit for GitHub, mentioning the GH PR in a link. + """ + sut = li.CommentRequest( + trac_id='123', + repo='server', + github_number='1', + pr_link='https://github.com/chevah/server/pull/234', + ) + + self.assertEqual( + 'PR for trac-123 is at https://github.com/chevah/server/pull/234.', + sut.commentText() + ) + + def test_fromTracDataMultiple(self): + """ + The CommentRequest object is created + with data from the submission history + and only the PR link (`branch`) is retrieved from the Trac DB. + """ + + requests = li.CommentRequest.fromTracDataMultiple( + trac_data=[ + { + 't_id': 123, + 'branch': 'https://github.com/chevah/server/pull/234', + 'component': 'misleading', + } + ], + ticket_mapping={ + 123: 'https://github.com/chevah/server/issues/5454', + } + ) + requests = list(requests) + self.assertEqual(1, len(requests)) + + sut = requests[0] + + self.assertEqual( + 'PR for trac-123 is at https://github.com/chevah/server/pull/234.', + sut.commentText() + ) + + self.assertEqual( + 'https://api.github.com/repos/chevah/server/issues/5454/comments', + sut.commentsURL() + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_ticket_migrate.py b/test/test_ticket_migrate.py new file mode 100644 index 0000000..6364325 --- /dev/null +++ b/test/test_ticket_migrate.py @@ -0,0 +1,718 @@ +import unittest + +import config_test +import ticket_migrate_golden_comet_preview as tm + +# Monkeypatch the SUT to use the test config. +tm.config = config_test + + +class TestRepositoryMapping(unittest.TestCase): + """ + Trac components map to the configured GitHub repositories. + + These tests depend on `config.py` having the contents + from `config.py.sample`. + """ + + def test_get_repo(self): + """ + Check that the issue is opened in the correct GitHub repository + based on the Trac component. + """ + self.assertEqual(tm.get_repo('client'), 'client') + self.assertEqual(tm.get_repo('commons'), 'commons') + self.assertEqual(tm.get_repo('fallback'), 'server') + + +class TestLabelMapping(unittest.TestCase): + """ + Trac labels are parsed based on component, priority, and keywords_string. + + These tests depend on `config.py` having the contents + from `config.py.sample`. + """ + + def test_labels_from_keywords(self): + """ + Parse and clean the Trac keywords. + """ + # Split by space. + self.assertEqual( + {'easy', 'tech-debt'}, + tm.labels_from_keywords('easy tech-debt')) + + # Remove commas. + self.assertEqual({'tech-debt'}, tm.labels_from_keywords('tech-debt,')) + self.assertEqual( + {'tech-debt', 'feature'}, + tm.labels_from_keywords('tech-debt, feature')) + + # Fix typos. + self.assertEqual( + {'tech-debt', 'easy'}, + tm.labels_from_keywords('tech-dept easy')) + self.assertEqual( + {'tech-debt'}, + tm.labels_from_keywords('tech-deb')) + + # Discard unknown words to prevent tag explosion. + self.assertEqual( + set(), + tm.labels_from_keywords('unknown-tag')) + + # Deduplicate. + self.assertEqual( + {'tech-debt'}, + tm.labels_from_keywords('tech-deb, tech-debt tech-dept')) + + # Handles None correctly. + self.assertEqual( + set(), + tm.labels_from_keywords(None)) + + def test_get_labels_none(self): + """ + The issues that do not map to the fallback repository + get no label based on the component. + """ + self.assertEqual( + ['priority-low', 'tech-debt'], + tm.get_labels( + component='client', + priority='Low', + keywords='tech-dept', + status='', + resolution='', + )) + + self.assertEqual( + ['priority-high'], + tm.get_labels( + component='client', + priority='High', + keywords='', + status='', + resolution='', + )) + + # Handles "None" correctly. + self.assertEqual( + ['priority-low'], + tm.get_labels( + component='client', + priority=None, + keywords='', + status=None, + resolution=None, + )) + + def test_get_labels_component_name(self): + self.assertEqual( + ['fallback', 'priority-low'], + tm.get_labels('fallback', 'Low', '', '', '')) + + +class TestAssigneeMapping(unittest.TestCase): + """ + Trac Owners are mapped to GitHub assignees. + """ + def test_user_mapping(self): + self.assertEqual(['adiroiban'], tm.get_assignees('adi')) + + def test_unknown_user_mapping(self): + self.assertEqual([], tm.get_assignees('john-doe')) + + +class TestBody(unittest.TestCase): + """ + The GitHub issue body is made from the Trac description and other fields. + """ + def test_get_body_details(self): + """ + Writes Trac ticket details at the beginning of the description. + """ + self.assertEqual( + "trac-12345 bug was created by @adiroiban on " + "1970-01-01 00:00:00Z.\n" + "\n" + "The ticket description.", + + tm.get_body( + "The ticket description.", + { + 't_id': 12345, + 't_type': 'bug', + 'reporter': 'adi', + 'time': 1234, + 'changetime': 1234, + 'branch': None, + }, + ticket_mapping={}, + ) + ) + + def test_get_body_monospace(self): + """ + Parses monospace squiggly brackets. + """ + self.assertEqual( + "trac-5432 task was created by @someone_else on " + "1970-01-01 00:00:00Z.\n" + "\n" + "The ticket ```description```.", + + tm.get_body( + "The ticket {{{description}}}.", + { + 't_id': 5432, + 't_type': 'task', + 'reporter': 'someone_else', + 'time': 1234, + 'changetime': 1234, + 'branch': '' + }, + ticket_mapping={}, + ) + ) + + +class TestParseBody(unittest.TestCase): + def test_backticks(self): + """ + Monospace backticks are preserved. + """ + self.assertEqual( + 'text `monospace` text', + tm.parse_body('text `monospace` text', ticket_mapping={}) + ) + + def test_curly(self): + self.assertEqual( + 'text ```monospace``` text', + tm.parse_body('text {{{monospace}}} text', ticket_mapping={}) + ) + + def test_monospace_escaping(self): + """ + Escapes one monospace syntax with the other. + """ + self.assertEqual( + "```curly '''not bold'''```", + tm.parse_body("{{{curly '''not bold'''}}}", ticket_mapping={}) + ) + + self.assertEqual( + "`backtick '''not bold'''`", + tm.parse_body("`backtick '''not bold'''`", ticket_mapping={}) + ) + + self.assertEqual( + "quoting squigglies `{{{` or backticks ```````", + tm.parse_body( + "quoting squigglies `{{{` or backticks {{{`}}}", + ticket_mapping={}, + ) + ) + + def test_convert_content(self): + """ + Headings are converted. + """ + self.assertEqual( + "# Some Top Heading\n" + "\n" + "Content ```here```", + + tm.parse_body( + "= Some Top Heading =\n" + "\n" + "Content {{{here}}}", + ticket_mapping={}, + ) + ) + + def test_convert_content_in_monospace(self): + """ + Monospaced sections are not converted from TracWiki syntax. + """ + self.assertEqual( + "Some other content\n" + "\n" + "```\n" + "= Some Top Heading =\n" + "\n" + "Content here```\n" + "Some other content", + + tm.parse_body( + "Some other content\n" + "\n" + "{{{\n" + "= Some Top Heading =\n" + "\n" + "Content here}}}\n" + "Some other content", + ticket_mapping={}, + ) + ) + + def test_convert_RST(self): + """ + Leaves RST syntax as-is, does not treat it as monospace. + """ + self.assertEqual( + "\n" + "\n" + "Problem\n" + "-------\n" + "\n" + "Solution.\n" + "\n", + + tm.parse_body( + "{{{\n" + "#!rst\n" + "\n" + "Problem\n" + "-------\n" + "\n" + "Solution.\n" + "}}}\n", + ticket_mapping={}, + ) + ) + + def test_links(self): + """ + TracWiki syntax links get converted to Markdown links. + """ + self.assertEqual( + "In order to " + "[avoid third-party cookies](https://github.com/chevah/sftpplus.com/pull/254), " + "we need to handle the contact form ourselves.", + tm.parse_body( + "In order to " + "[https://github.com/chevah/sftpplus.com/pull/254 avoid third-party cookies], " + "we need to handle the contact form ourselves.", + ticket_mapping={}, + ) + ) + + def test_ticket_replacement(self): + """ + Converts Trac ticket IDs to GitHub numbers. + """ + self.assertEqual( + "Some issue is solved in [#234](some_url/234).", + tm.parse_body( + description="Some issue is solved in #123.", + ticket_mapping={123: 'some_url/234'}, + ) + ) + + def test_ticket_replacement_URL(self): + """ + Converts Trac ticket URLs to GitHub URLs. + """ + self.assertEqual( + "Issue [#234](some_url/234).", + tm.parse_body( + description="Issue https://trac.chevah.com/ticket/123.", + ticket_mapping={123: 'some_url/234'}, + ) + ) + + def test_ticket_replacement_multiple(self): + """ + Converts Trac ticket IDs to GitHub numbers. + """ + self.assertEqual( + "Some issue is solved in [#234](some_url/234).\n" + "Another issue in the same ticket [#234](some_url/234).\n" + "Yet another in a different ticket [#555](some_url/555).\n", + tm.parse_body( + description=( + "Some issue is solved in #123.\n" + "Another issue in the same ticket #123.\n" + "Yet another in a different ticket #444.\n" + ), + ticket_mapping={ + 123: 'some_url/234', + 444: 'some_url/555' + }, + ) + ) + + def test_ticket_replacement_URL_multiple(self): + """ + Converts Trac ticket URLs to GitHub URLs. + """ + self.assertEqual( + "(but see [#234](some_url/234)).\n" + "CMD [#234](some_url/234)", + tm.parse_body( + description="(but see https://trac.chevah.com/ticket/123).\n" + "CMD https://trac.chevah.com/ticket/123", + ticket_mapping={123: 'some_url/234'}, + ) + ) + + def test_missing_ticket_replacement(self): + """ + Leaves missing Trac ticket IDs alone. + """ + self.assertEqual( + "Some issue is solved in #345.", + tm.parse_body( + description="Some issue is solved in #345.", + ticket_mapping={123: 'some_url/234'}, + ) + ) + + def test_no_ticket_replacement_in_preformatted(self): + """ + Does not convert Trac ticket IDs to GitHub numbers + in preformatted text. + """ + self.assertEqual( + "```Some issue is solved in #123.```", + tm.parse_body( + description="{{{Some issue is solved in #123.}}}", + ticket_mapping={123: 'some_url/234'}, + ) + ) + + def test_no_ticket_replacement_subset_match(self): + """ + Does not convert Trac ticket IDs when only a string subset matches. + """ + self.assertEqual( + "Some issue is solved in #1234.", + tm.parse_body( + description="Some issue is solved in #1234.", + ticket_mapping={123: 'some_url/234'}, + ) + ) + + +class TestCommentGeneration(unittest.TestCase): + def test_basic(self): + """ + Check that the body of a comment includes its author and latest text. + """ + trac_data = { + 'ticket': 3928, + 'c_time': 1489439926524055, + 'author': 'adi', + 'newvalue': 'Thanks.', + } + desired_body = ( + "Comment by adiroiban at 2017-03-13 21:18:46Z.\n" + "\n" + "Thanks." + ) + + self.assertEqual( + desired_body, + tm.GitHubRequest.commentFromTracData( + trac_data, + ticket_mapping={} + )['body'] + ) + + def test_no_user(self): + """ + A user not defined in config.py is preserved. + """ + trac_data = { + 'ticket': 3928, + 'c_time': 1488909819877801, + 'author': 'andradaE', + 'newvalue': 'Thanks.', + } + desired_body = ( + "Comment by andradaE at 2017-03-07 18:03:39Z.\n" + "\n" + "Thanks." + ) + + self.assertEqual( + desired_body, + tm.GitHubRequest.commentFromTracData( + trac_data, + ticket_mapping={} + )['body'] + ) + + def test_formatting(self): + """ + Check that at least some formatting works. + """ + trac_data = { + 'ticket': 3928, + 'c_time': 1488909819877801, + 'author': 'andradaE', + 'newvalue': ( + '[http://styleguide.chevah.com/tickets.html Style Guide]' + ) + } + desired_body = ( + "Comment by andradaE at 2017-03-07 18:03:39Z.\n" + "\n" + "[Style Guide](http://styleguide.chevah.com/tickets.html)" + ) + + self.assertEqual( + desired_body, + tm.GitHubRequest.commentFromTracData( + trac_data, + ticket_mapping={}, + )['body'] + ) + + +class TestGitHubRequest(unittest.TestCase): + """ + `GitHubRequest` objects are created from Trac data. + """ + def test_fromTracDataMultiple(self): + """ + A list of one dictionary with Trac ticket data results in + one GitHubRequest object with the proper fields. + """ + + request_gen = tm.GitHubRequest.fromTracDataMultiple( + trac_data=[{ + 'component': 'trac-migration-staging', + 'owner': 'danuker', + 'status': 'closed', + 'resolution': 'wontfix', + 'milestone': 'some-milestone', + 'summary': 'summary', + 'description': 'description', + 'priority': 'high', + 'keywords': 'feature, easy', + 'reporter': 'adi', + 't_id': 6, + 't_type': 'task', + 'time': 1288883091000000, + 'changetime': 1360238496689890, + 'branch': 'https://github.com/chevah/agent-1.5/pull/10' + }], + ticket_mapping={}, + ) + + requests = list(request_gen) + self.assertEqual(1, len(requests)) + request = requests[0] + + self.assertEqual('trac-migration-staging', request.repo) + self.assertEqual('chevah', request.owner) + self.assertEqual('danuker', request.data['assignee']) + self.assertEqual( + ['easy', 'feature', 'priority-high', 'wontfix'], + request.data['labels']) + self.assertEqual('danuker', request.data['assignee']) + self.assertEqual('some-milestone', request.milestone) + self.assertEqual('summary', request.data['title']) + self.assertEqual( + 'trac-6 task was created by @adiroiban on 2010-11-04 15:04:51Z.\n' + 'Last changed on 2013-02-07 12:01:36Z.\n' + 'PR at https://github.com/chevah/agent-1.5/pull/10.\n' + '\n' + 'description', + request.data['body']) + + +class TestNumberPredictor(unittest.TestCase): + """ + NumberPredictor orders GitHub issues so they are created to match Trac ID, + as much as possible. + """ + def setUp(self): + """ + Initialize the NumberPredictor, and the `next_numbers` cache. + """ + + self.sut = tm.NumberPredictor() + + # Break the cache of next_numbers to prevent accidental get requests. + self.sut.next_numbers = { + 'server': 0, + 'client': 0, + 'commons': 0, + 'trac-migration-staging': 0, + } + + def test_requestNextNumber_cached(self): + """ + When the `next_numbers` cache has an entry for the repository, + it returns the value of the entry. + """ + self.sut.next_numbers['trac-migration-staging'] = 1234 + + self.assertEqual( + 1234, self.sut.requestNextNumber('trac-migration-staging', [])) + + def test_getMaxCreatedTicketNumber_match(self): + """ + Returns the largest ticket number in the matching repository. + """ + tickets = [ + 'https://github.com/chevah/matching/issues/1', + 'https://github.com/chevah/matching/issues/2', + 'https://github.com/matching/nonmatching/issues/3' + 'https://github.com/matching/nonmatching/issues/4' + ] + + self.assertEqual( + 2, + self.sut.getMaxCreatedTicketNumber( + repo='matching', ticket_urls=tickets) + ) + + def test_getMaxCreatedTicketNumber_nomatch(self): + """ + The max created ticket is 0 when there are no matches. + """ + tickets = [ + 'https://github.com/chevah/nonmatching1/issues/1', + 'https://github.com/chevah/nonmatching2/issues/2' + ] + + self.assertEqual( + 0, + self.sut.getMaxCreatedTicketNumber( + repo='matching', ticket_urls=tickets) + ) + + @staticmethod + def generateTickets(numbers): + """ + Create a list of tickets with given IDs. + """ + return [ + {'t_id': number, 'component': 'trac-migration-staging'} + for number in numbers + ] + + def test_orderTickets_simple(self): + """ + Return the tickets to submit in the same order, + if the next GitHub number is 1. + """ + tickets = self.generateTickets([1, 2, 3]) + self.sut.next_numbers['trac-migration-staging'] = 1 + + self.assertEqual( + (tickets, [1, 2, 3]), + self.sut.orderTickets(tickets, []) + ) + + def test_orderTickets_skip(self): + """ + Skip the tickets with a Trac ID lower than the next GitHub number, + and start with the first ticket matching Trac ID and GitHub number. + After the matching tickets are enumerated, continue with the others. + """ + self.sut.next_numbers['trac-migration-staging'] = 4 + tickets = self.generateTickets([1, 2, 3, 4, 5, 6]) + + self.assertEqual( + ( + self.generateTickets([4, 5, 6, 1, 2, 3]), + [4, 5, 6, 7, 8, 9], + ), + self.sut.orderTickets(tickets, []) + ) + + def test_orderTickets_unfillable_gap(self): + """ + When the minimum Trac ID is greater than the next GitHub number, + and the gap can't be filled with newer Trac IDs, + still return all the tickets. + """ + self.sut.next_numbers['trac-migration-staging'] = 4 + tickets = self.generateTickets([15, 16, 17]) + + self.assertEqual( + ( + self.generateTickets([17, 16, 15]), + [4, 5, 6], + ), + self.sut.orderTickets(tickets, []) + ) + + def test_orderTickets_gap_fillable_with_new(self): + """ + When the minimum Trac ID is greater than the next GitHub number, + the highest Trac IDs are sacrificed until they fill the gap, + and after the gap is filled, the Trac IDs match. + """ + self.sut.next_numbers['trac-migration-staging'] = 4 + tickets = self.generateTickets([5, 6, 7]) + + self.assertEqual( + (self.generateTickets([7, 5, 6]), [4, 5, 6]), + self.sut.orderTickets(tickets, []) + ) + + def test_orderTickets_gap_fillable_with_old(self): + """ + When the minimum Trac ID is lower than the next GitHub number, + fill any gaps with Trac IDs already taken by other GH numbers, + In the example, Trac IDs 8 and 10 will match the GitHub IDs. + """ + self.sut.next_numbers['trac-migration-staging'] = 7 + tickets = self.generateTickets([2, 4, 6, 8, 10]) + + self.assertEqual( + ( + self.generateTickets([2, 8, 4, 10, 6]), + [7, 8, 9, 10, 11], + ), + self.sut.orderTickets(tickets, []) + ) + + def test_orderTickets_multiple_repos(self): + """ + Splits tickets correctly across repositories. + """ + self.sut.next_numbers['commons'] = 7 + self.sut.next_numbers['trac-migration-staging'] = 17 + tickets = [ + {'t_id': 1, 'component': 'commons'}, + {'t_id': 7, 'component': 'commons'}, + {'t_id': 11, 'component': 'trac-migration-staging'}, + {'t_id': 17, 'component': 'trac-migration-staging'}, + ] + + output, expected_github = self.sut.orderTickets(tickets, []) + + commons_output = [t for t in output if t['component'] == 'commons'] + migration_output = [ + t for t in output if t['component'] == 'trac-migration-staging'] + + self.assertEqual( + [ + {'t_id': 7, 'component': 'commons'}, + {'t_id': 1, 'component': 'commons'}, + ], + commons_output + ) + self.assertEqual( + [ + {'t_id': 17, 'component': 'trac-migration-staging'}, + {'t_id': 11, 'component': 'trac-migration-staging'}, + ], + migration_output + ) + self.assertEqual( + expected_github, + [7, 8, 17, 18] + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/test_wiki_trac_rst_convert.py b/test/test_wiki_trac_rst_convert.py index ce70d2a..a5ece9c 100644 --- a/test/test_wiki_trac_rst_convert.py +++ b/test/test_wiki_trac_rst_convert.py @@ -1,6 +1,11 @@ import unittest -from wiki_trac_rst_convert import convert_content +import config_test +import wiki_trac_rst_convert + +# Monkeypatch the SUT to use the test config. +wiki_trac_rst_convert.config = config_test +convert_content = wiki_trac_rst_convert.convert_content class TracToGitHubRST(unittest.TestCase): diff --git a/ticket_migrate.py b/ticket_migrate.py new file mode 100755 index 0000000..f767475 --- /dev/null +++ b/ticket_migrate.py @@ -0,0 +1,947 @@ +#!/usr/bin/env python3 +# Migrate Trac tickets to GitHub, with the official (slow) API. +import datetime +import pprint +import requests +import sqlite3 +import sys +import time +from collections import deque +from typing import Union + +from wiki_trac_rst_convert import matches, sub + +try: + import config +except ModuleNotFoundError: + # In the tests, we monkeypatch this module. + config = None + +from trac2down import convert + +# Set to False to perform actual GitHub issue creation. +DRY_RUN = True + + +def main(): + """ + Read the Trac DB and post the open tickets to GitHub. + """ + tickets = list(select_tickets(read_trac_tickets())) + comments = list(read_trac_comments()) + np = NumberPredictor() + tickets, expected_numbers = np.orderTickets(tickets) + + ticket_mapping = get_ticket_mapping(tickets, expected_numbers) + + # Parse tickets into GitHub issue objects. + issues = list(GitHubRequest.fromTracDataMultiple( + tickets, ticket_mapping=ticket_mapping + )) + + output_stats(issues, expected_numbers) + + print("Issues parsed. Starting to submit them.\n" + "Please don't manually open issues or PRs until this is done.") + + for issue, expected_number in zip(issues, expected_numbers): + print(f"Processing GH {expected_number}") + issue.submit(expected_number) + issue.closeIfNeeded() + issue.submitToProject() + issue.submitMyComments(comments, ticket_mapping=ticket_mapping) + + print("Issue creation complete. You may now manually open issues and PRs.") + + +def select_tickets(tickets): + """ + Easy-to-edit method to choose tickets to submit. + Checks that the `t_id` is not in `tickets_created.tsv`. + Useful for creating tickets in multiple rounds. + """ + # Skip tickets that have already been created. + submitted_ids = get_tickets().keys() + tickets = [t for t in tickets if t['t_id'] not in submitted_ids] + + # return [t for t in tickets if t['status'] != 'closed'] + return [t for t in tickets if t['component'] == 'pr'] + # return [t for t in tickets if t['component'] == 'libs'] + # return [t for t in tickets if t['component'] not in ['pr', 'libs']] + return tickets + + +def get_ticket_mapping(tickets, expected_numbers): + """ + Returns a dictionary of all known Trac ID -> GitHub URL correspondences. + The GitHub URL may either be expected, or read from tickets_created.tsv. + """ + mapping = get_tickets() + expected_allrepos = get_tickets('tickets_expected_gold.tsv') + for ticket, expected_numbers in expected_allrepos.items(): + mapping[ticket] = expected_numbers + + return mapping + + +def get_tickets(filename='tickets_created.tsv'): + """ + Reads the tickets_create.tsv, and returns a dictionary of + Trac ID -> GitHub URL of tickets that were sent to GitHub already. + """ + created_tickets = {} + with open(filename) as f: + for line in f: + if line.startswith(config.TRAC_TICKET_PREFIX): + trac_link, github_url = line.strip().split('\t') + trac_id = trac_link.split(config.TRAC_TICKET_PREFIX)[1] + trac_id = int(trac_id) + created_tickets[trac_id] = github_url + return created_tickets + + +def read_trac_tickets(): + """ + Read the Trac ticket data from the database, and generate dicts. + """ + db = get_db() + + # Only take the last branch change. + # For example, https://trac.chevah.com/ticket/85 has multiple changes, + # and the last one is to GH PR 238. + # We use GROUP BY because SQLite has no DISTINCT ON. + # https://www.sqlite.org/quirks.html#aggregate_queries_can_contain_non_aggregate_result_columns_that_are_not_in_the_group_by_clause + for row in db.execute( + """\ + SELECT * + FROM + (SELECT * + FROM ticket + LEFT JOIN ticket_change ON ticket.id = ticket_change.ticket + AND ticket_change.field = 'branch' + AND ticket_change.newvalue LIKE '%github%' + ORDER BY ticket.id, + ticket_change.time DESC) + GROUP BY id; + """): + ( + t_id, + t_type, + time, + changetime, + component, + severity, + priority, + owner, + reporter, + cc, + version, + milestone, + status, + resolution, + summary, + description, + keywords, + _ticket, + _time, + _author, + _field, + _oldvalue, + _newvalue, + ) = row + + yield { + 't_id': t_id, + 't_type': t_type, + 'time': time, + 'changetime': changetime, + 'component': component, + 'severity': severity, + 'priority': priority, + 'owner': owner, + 'reporter': reporter, + 'cc': cc, + 'version': version, + 'milestone': milestone, + 'status': status, + 'resolution': resolution, + 'summary': summary, + 'description': description, + 'keywords': keywords, + 'branch': _newvalue, + } + + +def read_trac_comments(): + """ + Read the Trac comment data from the database. + + The last version is in the `newvalue` of the `comment` field. + + To find changed comments, check the `ticket_change` table in the DB + for the `field` column having the value `_comment0`. + """ + db = get_db() + for row in db.execute( + "SELECT * FROM ticket_change where field = 'comment';"): + t_id, c_time, author, field, oldvalue, newvalue = row + + # Only return comments with actual truthy text. + if newvalue: + yield { + 't_id': t_id, + 'c_time': c_time, + 'author': author, + 'field': field, + 'oldvalue': oldvalue, + 'newvalue': newvalue, + } + + +def get_db(): + """ + Return a database connection. + """ + if len(sys.argv) != 2: + print("Need to pass the path to Trac DB as argument.") + sys.exit(1) + db = sqlite3.connect(sys.argv[1]) + return db + + +class NumberPredictor: + """ + A best-effort algorithm to preserve issue IDs (named "numbers" in the API). + """ + def __init__(self): + """ + Store a cache of repository -> next issue numbers. + """ + self.next_numbers = {} + + def requestNextNumber(self, repo): + """ + Send GET requests for the latest issues and PRs, + look at the largest number assigned so far, and increment by one. + + Remember the result in `self.next_numbers`. + """ + if repo in self.next_numbers: + return self.next_numbers[repo] + + last_issue_number = self._requestMaxNumber(repo, 'issues') + last_pull_number = self._requestMaxNumber(repo, 'pulls') + + next_number = max(last_issue_number, last_pull_number) + 1 + print(f"Next issue for {repo} will be {next_number}.") + + return next_number + + @staticmethod + def _requestMaxNumber(repo, kind): + """ + Get the largest GitHub number, for either tickets or pulls. + `kind` is either "issues" or "pulls". + Fortunately GitHub orders them newest first. + """ + tickets_or_pulls = requests.get( + url=f'https://api.github.com/repos/{config.OWNER}/{repo}/{kind}', + headers={'accept': 'application/vnd.github.v3+json'}, + auth=(config.OAUTH_USER, config.OAUTH_TOKEN), + params={'state': 'all'}, + ) + try: + last_number = tickets_or_pulls.json()[0]['number'] + except IndexError: + last_number = 0 + except KeyError: + raise KeyError(f"Couldn't get tickets from {config.OWNER}/{repo}.") + + wait_for_rate_reset(tickets_or_pulls) + + return last_number + + def orderTickets(self, tickets): + """ + Choose an order to create tickets on GitHub so that we maximize + matches of GitHub IDs with Trac IDs. + + Return the ticket objects in order, and their expected GitHub numbers. + """ + repositories = ( + [config.REPOSITORY_MAPPING[k] for k in config.REPOSITORY_MAPPING] + + [config.FALLBACK_REPOSITORY] + ) + all_repo_ordered_tickets = [] + expected_github_numbers = [] + + for repo in unique(repositories): + print('processing repo', repo) + self.next_numbers[repo] = self.requestNextNumber(repo) + + tickets_by_id = { + t['t_id']: t + for t in select_tickets_for_repo(tickets, repo)} + ordered_tickets = [] + not_matching = deque() + + # Remember tickets not matching, which we can use to fill gaps. + for t_id in list(tickets_by_id.keys()): + if t_id < self.next_numbers[repo]: + not_matching.append(tickets_by_id[t_id]) + + start = self.next_numbers[repo] + end = start + len(tickets_by_id) + for github_number in range(start, end): + # Check if we have a ticket on this position. + ticket = tickets_by_id.pop(github_number, None) + if ticket: + ordered_tickets.append(ticket) + continue + + try: + # Use non-matching tickets to fill the gap, + # hoping that we eventually reach a matching one. + ticket = not_matching.popleft() + ordered_tickets.append(ticket) + except IndexError: + # Can't fill the gap. Sacrifice new tickets from the end. + t_id = max(tickets_by_id.keys()) + ordered_tickets.append(tickets_by_id.pop(t_id)) + + # Add what's left of the non-matching. + ordered_tickets.extend(not_matching) + + # And add to the all-repo list. + all_repo_ordered_tickets.extend(ordered_tickets) + + # Compute GitHub numbers. + github_end = start + len(ordered_tickets) + expected_github_numbers.extend(range(start, github_end)) + + assert len(all_repo_ordered_tickets) == len(expected_github_numbers) + return all_repo_ordered_tickets, expected_github_numbers + + +def unique(elements): + """ + Discard duplicate items, while preserving order. + """ + seen = set() + uniques = [] + for e in elements: + if e not in seen: + seen.add(e) + uniques.append(e) + + return uniques + + +def select_tickets_for_repo(tickets, repo: str): + """ + From a list of Trac tickets, + select the ones that will be posted to a given GitHub repository. + """ + return [t for t in tickets if get_repo(t['component']) == repo] + + +def get_repo(component): + """ + Given the Trac component, + choose the GitHub repository to create the issue in. + """ + return config.REPOSITORY_MAPPING.get(component, config.FALLBACK_REPOSITORY) + + +def output_stats(tickets, expected_numbers): + """ + Show how many tickets will preserve their Trac ID. + + Generate a file with the expected GitHub numbers. + """ + zipped = list(zip(tickets, expected_numbers)) + with open('tickets_expected.tsv', 'w') as f: + f.write('Trac link\tExpected GitHub link\n') + for t, e in zipped: + _github_link = github_link(t.repo, e) + f.write(f"{t.trac_url()}\t{_github_link}\n") + + match_count = sum(1 for t, e in zipped if t.t_id == e) + print('Expected GitHub numbers to match Trac ID: ' + f'{match_count} out of {len(tickets)}') + print('Check tickets_expected.tsv, and if correct, continue the debugger.') + import pdb + pdb.set_trace() + + +def github_link(repo, expected_number): + """ + Return the expected GitHub URL, + given the repository name and expected GitHub issue number. + """ + return f'https://github.com/{config.OWNER}/{repo}/issues/{expected_number}' + + +class GitHubRequest: + """ + Transform Trac tickets, comments, and their metadata to GitHub format, + and allow submitting that format. + """ + def __init__( + self, owner, repo, trac_id, + title, body, closed, resolution, milestone, labels, assignees): + self.owner = owner + self.repo = repo + self.t_id = trac_id + self.closed = closed + self.resolution = resolution + self.milestone = milestone + self.data = { + 'title': title, + 'body': body, + 'labels': labels, + 'assignees': assignees, + } + + # We get the issue number and ID after submitting. + self.github_number = None + self.github_id = None + + def submit(self, expected_number): + """ + Execute the POST request to create a GitHub issue. + + In case of an unexpected state, go into debug mode. + + API Docs: + https://docs.github.com/en/rest/reference/issues#create-an-issue + """ + url = f'https://api.github.com/repos/{self.owner}/{self.repo}/issues' + + response = protected_request(url=url, data=self.data) + + if response: + # Remember the GitHub URL assigned to each ticket. + with open('tickets_created.tsv', 'a') as f: + github_url = response.json()['html_url'] + f.write(f'{self.trac_url()}\t{github_url}\n') + + if response.json()['number'] != expected_number: + raise ValueError( + f"Ticket number mismatch: " + f"expected {expected_number}, created {github_url}.\n" + f"Please manually add the comments and project of the issue, " + f"close the issue if needed, " + f"and then restart the script." + ) + + self.github_number = expected_number + self.github_id = response.json()['id'] + + def trac_url(self): + """ + Return this issue's Trac URL. + """ + return config.TRAC_TICKET_PREFIX + str(self.t_id) + + def submitMyComments(self, comments, ticket_mapping): + """ + Look through `comments`, and submit the ones about this ticket. + + API Docs: + https://docs.github.com/en/rest/reference/issues#create-an-issue-comment + """ + for comment in (c for c in comments if c['t_id'] == self.t_id): + self._submitComment(self.commentFromTracData( + comment, ticket_mapping=ticket_mapping)) + + def getOrCreateProject(self): + """ + If a project for the given milestone exists, return its column IDs, + otherwise create it and return its column IDs. + Remembers projects in `projects_created.tsv`. + + API docs: + https://docs.github.com/en/rest/reference/projects#create-an-organization-project + https://docs.github.com/en/rest/reference/projects#create-a-project-column + """ + name = self.milestone + if not name: + # Some tickets don't have a milestone. + return + + # Check whether we have already created the project. + with open('projects_created.tsv') as f: + projects_data = [line.split('\t') for line in f] + for line_name, _, todo_id, done_id, rejected_id in projects_data: + if line_name == name: + return todo_id, done_id, rejected_id + + # We have not created the project. Create it. + response = protected_request( + url=f'https://api.github.com/orgs/{config.PROJECT_ORG}/projects', + data={'name': name} + ) + project_id = response.json()['id'] + columns_url = response.json()['columns_url'] + + # Create 3 columns: To Do, Done, and Rejected. + todo_resp = protected_request(columns_url, data={'name': 'To Do'}) + done_resp = protected_request(columns_url, data={'name': 'Done'}) + rejected_resp = protected_request(columns_url, data={ + 'name': 'Rejected', 'body': 'duplicate, invalid, or wontfix'}) + + # Close the project. + protected_request( + url=f'https://api.github.com/projects/{project_id}', + data={'state': 'closed'}, + method=requests.patch, + expected_status_code=200, + ) + + todo_id = todo_resp.json()['id'] + done_id = done_resp.json()['id'] + rejected_id = rejected_resp.json()['id'] + + with open('projects_created.tsv', 'a') as f: + f.write('\t'.join([ + name, + str(project_id), + str(todo_id), + str(done_id), + str(rejected_id), + ]) + '\n') + + return todo_id, done_id, rejected_id + + def submitToProject(self): + """ + Add an issue identified by the GitHub global `id` + to the proper column of the proper project. + + API docs (very bad ones): + https://docs.github.com/en/rest/reference/projects#create-a-project-card + """ + column_ids = self.getOrCreateProject() + if not column_ids: + return + + todo_id, done_id, rejected_id = column_ids + + # Set the column ID according to issue status and resolution. + column_id = todo_id + if self.closed: + column_id = rejected_id + if self.resolution == 'fixed': + column_id = done_id + + url = f'https://api.github.com/projects/columns/{column_id}/cards' + data = { + 'content_id': self.github_id, + 'content_type': 'Issue' + } + protected_request(url, data) + + @classmethod + def fromTracData( + cls, + component, + owner, + summary, + description, + priority, + keywords, + ticket_mapping, + **kwargs): + """ + Create a GitHubRequest from Trac ticket data fields. + """ + return cls( + owner=config.OWNER, + repo=get_repo(component), + trac_id=kwargs['t_id'], + title=summary, + body=get_body( + description, data=kwargs, ticket_mapping=ticket_mapping), + closed=kwargs['status'] == 'closed', + resolution=kwargs['resolution'], + milestone=kwargs['milestone'], + labels=get_labels( + component, + priority, + keywords, + kwargs['status'], + kwargs['resolution'] + ), + assignees=get_assignees(owner), + ) + + @classmethod + def fromTracDataMultiple(cls, trac_data, ticket_mapping): + """ + Generate GitHubRequests from an iterable of dicts of Trac tickets. + """ + for ticket in trac_data: + yield cls.fromTracData( + **{**ticket, 'ticket_mapping': ticket_mapping} + ) + + @staticmethod + def commentFromTracData(trac_data, ticket_mapping): + """ + Convert Trac comment data to GitHub comment body as JSON. + """ + author, _ = config.USER_MAPPING.get( + trac_data['author'], + (trac_data['author'], 'ignored-email-field') + ) + + body = ( + f"Comment by {author} at {showtime(trac_data['c_time'])}.\n" + f"\n" + f"{parse_body(trac_data['newvalue'], ticket_mapping)}" + ) + + return {'body': body} + + def _submitComment(self, comment_data): + """ + Send a POST request to GitHub creating the comment from `comment_data`. + """ + url = f'https://api.github.com/repos/' \ + f'{self.owner}/{self.repo}/issues/{self.github_number}/comments' + + response = protected_request(url=url, data=comment_data) + + if response: + # Remember the GitHub URL assigned to each ticket. + with open('comments_created.tsv', 'a') as f: + github_url = response.json()['html_url'] + f.write(f'{self.trac_url()}\t{github_url}\n') + + def closeIfNeeded(self): + """ + If the ticket status is closed, + send a PATCH request to GitHub to close it. + Unfortunately we can't directly create it as closed. + + API docs: + https://docs.github.com/en/rest/reference/issues#update-an-issue + """ + url = ( + f'https://api.github.com/repos/{self.owner}/{self.repo}/issues/' + f'{self.github_number}' + ) + + if self.closed: + protected_request( + url=url, + data={'state': 'closed'}, + method=requests.patch, + expected_status_code=200 + ) + + +def protected_request( + url, data, method=requests.post, expected_status_code=201): + """ + Send a request if DRY_RUN is not truthy. + + In case of error, start the debugger. + In case of nearing rate limit, sleep until it resets. + """ + + if DRY_RUN: + print(f"Would call {method} on {url} with data:") + pprint.pprint(data) + return + + # Obey secondary rate limit: + # https://docs.github.com/en/rest/guides/best-practices-for-integrators#dealing-with-secondary-rate-limits + time.sleep(10) + + response = method( + url=url, + headers={'accept': 'application/vnd.github.v3+json'}, + json=data, + auth=(config.OAUTH_USER, config.OAUTH_TOKEN) + ) + + if response.status_code != expected_status_code: + print('Error: POST request failed!') + print(response) + pprint.pprint(response.json()) + import pdb + pdb.set_trace() + + wait_for_rate_reset(response) + + return response + + +def wait_for_rate_reset(response): + """ + Wait for a rate limit reset in case it is near exhaustion. + """ + remaining = int(response.headers['X-RateLimit-Remaining']) + reset_time = int(response.headers['X-RateLimit-Reset']) + if remaining < 10: + to_sleep = int(1 + reset_time - time.time()) + print( + f"Waiting {to_sleep}s (until {reset_time}) for rate limit reset.") + time.sleep(to_sleep) + + +def get_body(description, data, ticket_mapping): + """ + Generate the ticket description body for GitHub. + """ + reporters = get_assignees(data['reporter']) + if reporters: + reporter = reporters[0] + else: + reporter = data['reporter'] + + changed_message = '' + if data['changetime'] != data['time']: + changed_message = f"Last changed on {showtime(data['changetime'])}.\n" + + pr_message = '' + if data['branch']: + pr_message = f"PR at {data['branch']}.\n" + + body = ( + f"T{data['t_id']} {data['t_type']} was created by {reporter}" + f" on {showtime(data['time'])}.\n" + f"{changed_message}" + f"{pr_message}" + "\n" + f"{parse_body(description, ticket_mapping)}" + ) + + return body + + +def get_labels(component, priority, keywords, status, resolution): + """ + Given the Trac component, priority, keywords, and resolution, + return the labels to apply on the GitHub issue. + """ + priority_label = labels_from_priority(priority) + keyword_labels = labels_from_keywords(keywords) + component_labels = labels_from_component(component) + status_labels = labels_from_status_and_resolution(status, resolution) + labels = ( + {priority_label}.union( + keyword_labels).union( + component_labels).union( + status_labels) + ) + return sorted(labels) + + +def get_assignees(owner): + """ + Map the owner to the GitHub account. + """ + try: + owner, _ = config.USER_MAPPING.get(owner) + return [owner] + except TypeError as error: + if 'cannot unpack non-iterable NoneType object' in str(error): + return [] + raise + + +def showtime(unix_usec): + """ + Convert a Trac timestamp to a human-readable date and time. + + Trac stores timestamps as microseconds since Epoch. + """ + timestamp = unix_usec // 1_000_000 + dt = datetime.datetime.utcfromtimestamp(timestamp) + return f"{dt.isoformat(sep=' ')}Z" + + +def labels_from_component(component: str): + """ + Given the Trac component, + choose the labels to apply on the GitHub issue, if any. + """ + if component in config.REPOSITORY_MAPPING: + return [] + + return [component] + + +def labels_from_keywords(keywords: Union[str, None]): + """ + Given the Trac `keywords` string, clean it up and parse it into a list. + """ + if keywords is None: + return set() + + keywords = keywords.replace(',', '') + keywords = keywords.split(' ') + + allowed_keyword_labels = { + 'design', + 'easy', + 'feature', + 'happy-hacking', + 'onboarding', + 'password-management', + 'perf-test', + 'remote-management', + 'salt', + 'scp', + 'security', + 'tech-debt', + 'twisted', + 'ux', + } + typo_fixes = {'tech-dept': 'tech-debt', 'tech-deb': 'tech-debt'} + + keywords = [typo_fixes.get(kw, kw) for kw in keywords if kw] + discarded = [kw for kw in keywords if kw not in allowed_keyword_labels] + + if discarded: + print("Warning: discarded keywords:", discarded) + + return {kw for kw in keywords if kw in allowed_keyword_labels} + + +def labels_from_priority(priority): + """ + Interpret None (missing) priority as Low. + """ + if priority is None: + return 'priority-low' + return 'priority-{}'.format(priority.lower()) + + +def labels_from_status_and_resolution(status, resolution): + """ + The resolution of a closed ticket is used, if there is one. + The resolution can be "fixed", "duplicate", "invalid", or "wontfix". + + If the ticket is not closed, the status is used, + if the status is not "assigned", "new", or "closed". + """ + if status == 'closed' and resolution: + return {resolution} + + if status in ['in_work', 'needs_changes', 'needs_merge', 'needs_review']: + return {status.replace('_', '-')} + + return set() + + +def parse_body(description, ticket_mapping): + """ + Parses text with curly-bracketed or backtick-surrounded monospace. + Converts the curly brackets to backtick brackets. + """ + if not description: + return '' + + def found(index): + """Return true if an index represents a found position in a string.""" + return index != -1 + + def is_first_index(a, b): + """ + Returns true if index a occurs before b, or if b does not exist. + """ + return found(a) and (not found(b) or b > a) + + min_backtick = description.find('`') + min_curly = description.find('{{{') + + if is_first_index(min_curly, min_backtick): + return ( + convert_issue_content(description[:min_curly], ticket_mapping) + + parse_curly(description[min_curly:], ticket_mapping) + ) + + if is_first_index(min_backtick, min_curly): + return ( + convert_issue_content(description[:min_backtick], ticket_mapping) + + parse_backtick(description[min_backtick:], ticket_mapping) + ) + + return convert_issue_content(description, ticket_mapping) + + +def convert_issue_content(text, ticket_mapping): + """ + Convert TracWiki text to GitHub Markdown. + Change the ticket IDs to GitHub URLs according to the mapping. + Ignore included images. + """ + text = text.replace(config.TRAC_TICKET_PREFIX, '#') + ticket_re = '#([0-9]+)' + for match in matches(ticket_re, text): + try: + github_url = ticket_mapping[int(match)] + new_ticket_id = github_url.rsplit('/', 1)[1] + text = sub( + f'#{match}', + f'[#{new_ticket_id}]({github_url})', + text + ) + except KeyError: + # We don't know this ticket. Leave it alone. + print("Warning: unknown ticket: #" + str(match)) + pass + + return convert(text, base_path='') + + +def parse_curly(description, ticket_mapping): + """ + Interpret curly brackets: + + - If a #!rst marker is the first token, + remove the brackets and return the text inside. + + - Otherwise, convert the brackets to triple backticks. + Leave text as is until the closing curly brackets, + which are again converted to triple backticks. + After that, let parse_body continue. + """ + if not description.startswith('{{{'): + raise ValueError('Desc starts with ', description[:10]) + ending = description.find('}}}') + 3 + content = description[3:ending-3] + + if content.strip().startswith('#!rst'): + return ( + content.split('#!rst', 1)[1] + + parse_body(description[ending:], ticket_mapping) + ) + + return ( + '```' + content + '```' + + parse_body(description[ending:], ticket_mapping) + ) + + +def parse_backtick(description, ticket_mapping): + """ + Leave text as is until the closing backtick. + After that, let parse_body continue. + """ + if not description.startswith('`'): + raise ValueError('Desc starts with ', description[:10]) + description = description[1:] + ending = description.find('`') + 1 + return ( + '`' + description[:ending] + + parse_body(description[ending:], ticket_mapping) + ) + + +if __name__ == '__main__': + main() diff --git a/ticket_migrate_golden_comet_preview.py b/ticket_migrate_golden_comet_preview.py new file mode 100755 index 0000000..971fa02 --- /dev/null +++ b/ticket_migrate_golden_comet_preview.py @@ -0,0 +1,1035 @@ +#!/usr/bin/env python3 +# Migrate Trac tickets to GitHub. +# Uses the Golden Comet preview (fast) API: +# https://gist.github.com/jonmagic/5282384165e0f86ef105 + +import datetime +import pprint +import requests +import sqlite3 +import sys +import time +from collections import deque +from typing import Union + +from wiki_trac_rst_convert import matches, sub + +try: + import config +except ModuleNotFoundError: + # In the tests, we monkeypatch this module. + config = None + +from trac2down import convert + +# Set to False to perform actual GitHub issue creation. +DRY_RUN = True + + +def main(): + """ + Read the Trac DB and post the tickets to GitHub. + """ + to_submit = list(select_tickets(read_trac_tickets())) + comments = list(read_trac_comments()) + submitted_already = get_tickets('tickets_created.tsv').values() + np = NumberPredictor() + to_submit, expected_numbers = np.orderTickets( + to_submit, already_created=submitted_already + ) + + ticket_mapping = get_ticket_mapping(to_submit, expected_numbers) + + # Parse tickets into GitHub issue objects. + issues = list(GitHubRequest.fromTracDataMultiple( + to_submit, ticket_mapping=ticket_mapping + )) + + output_stats(issues, expected_numbers) + + print("Issues parsed. Starting to submit them.\n" + "Please don't manually open issues or PRs until this is done.") + + for issue, expected_number in zip(issues, expected_numbers): + print(f"Processing GH {expected_number}") + issue.submit( + expected_number, + all_comments=comments, + ticket_mapping=ticket_mapping + ) + issue.submitToProject() + + print("Issue creation complete. You may now manually open issues and PRs.") + + +def select_tickets(tickets): + """ + Easy-to-edit method to choose tickets to submit. + Checks that the `t_id` is not in `tickets_created.tsv`. + Useful for creating tickets in multiple rounds. + """ + # Skip tickets that have already been created. + submitted_ids = get_tickets().keys() + tickets = [t for t in tickets if t['t_id'] not in submitted_ids] + + # return [t for t in tickets if t['t_id'] == 15] + # return [t for t in tickets if t['component'] == 'pr'] # DONE + # return [t for t in tickets if t['component'] == 'webadmin'] # DONE + # return [t for t in tickets if t['component'] == 'libs'] # DONE + # return [t for t in tickets if t['component'] == 'infrastructure'] # DONE + return tickets + + +def get_ticket_mapping(tickets, expected_numbers): + """ + Returns a dictionary of all known Trac ID -> GitHub URL correspondences. + The GitHub URL may either be expected, or read from tickets_created.tsv. + """ + mapping = get_tickets() + expected_allrepos = get_tickets('tickets_expected_gold.tsv') + for ticket, expected_numbers in expected_allrepos.items(): + mapping[ticket] = expected_numbers + + return mapping + + +def get_tickets(filename='tickets_created.tsv'): + """ + Reads the tickets_create.tsv, and returns a dictionary of + Trac ID -> GitHub URL of tickets that were sent to GitHub already. + """ + created_tickets = {} + with open(filename) as f: + for line in f: + if line.startswith(config.TRAC_TICKET_PREFIX): + trac_link, github_url = line.strip().split('\t') + trac_id = trac_link.split(config.TRAC_TICKET_PREFIX)[1] + trac_id = int(trac_id) + created_tickets[trac_id] = github_url + return created_tickets + + +def read_trac_tickets(): + """ + Read the Trac ticket data from the database, and generate dicts. + """ + db = get_db() + + # Only take the last branch change. + # For example, https://trac.chevah.com/ticket/85 has multiple changes, + # and the last one is to GH PR 238. + # We use GROUP BY because SQLite has no DISTINCT ON. + # https://www.sqlite.org/quirks.html#aggregate_queries_can_contain_non_aggregate_result_columns_that_are_not_in_the_group_by_clause + for row in db.execute( + """\ + SELECT * + FROM + (SELECT * + FROM ticket + LEFT JOIN ticket_change ON ticket.id = ticket_change.ticket + AND ticket_change.field = 'branch' + AND ticket_change.newvalue LIKE '%github%' + ORDER BY ticket.id, + ticket_change.time DESC) + GROUP BY id; + """): + ( + t_id, + t_type, + time, + changetime, + component, + severity, + priority, + owner, + reporter, + cc, + version, + milestone, + status, + resolution, + summary, + description, + keywords, + _ticket, + _time, + _author, + _field, + _oldvalue, + _newvalue, + ) = row + + yield { + 't_id': t_id, + 't_type': t_type, + 'time': time, + 'changetime': changetime, + 'component': component, + 'severity': severity, + 'priority': priority, + 'owner': owner, + 'reporter': reporter, + 'cc': cc, + 'version': version, + 'milestone': milestone, + 'status': status, + 'resolution': resolution, + 'summary': summary, + 'description': description, + 'keywords': keywords, + 'branch': _newvalue, + } + + +def read_trac_comments(): + """ + Read the Trac comment data from the database. + + The last version is in the `newvalue` of the `comment` field. + + To find changed comments, check the `ticket_change` table in the DB + for the `field` column having the value `_comment0`. + """ + db = get_db() + for row in db.execute( + "SELECT * FROM ticket_change where field = 'comment';"): + t_id, c_time, author, field, oldvalue, newvalue = row + + # Only return comments with actual truthy text. + if newvalue: + yield { + 't_id': t_id, + 'c_time': c_time, + 'author': author, + 'field': field, + 'oldvalue': oldvalue, + 'newvalue': newvalue, + } + + +def get_db(): + """ + Return a database connection. + """ + if len(sys.argv) != 2: + print("Need to pass the path to Trac DB as argument.") + sys.exit(1) + db = sqlite3.connect(sys.argv[1]) + return db + + +class NumberPredictor: + """ + A best-effort algorithm to preserve issue IDs (named "numbers" in the API). + """ + def __init__(self): + """ + Store a cache of repository -> next issue numbers. + """ + self.next_numbers = {} + + def requestNextNumber(self, repo, tickets_from_file): + """ + Send GET requests for the latest issues and PRs, + look at the largest number assigned so far, and increment by one. + + Remember the result in `self.next_numbers`. + """ + if repo in self.next_numbers: + return self.next_numbers[repo] + + last_issue_number = self._requestMaxNumber(repo, 'issues') + last_pull_number = self._requestMaxNumber(repo, 'pulls') + last_ticket_submitted = self.getMaxCreatedTicketNumber( + repo, tickets_from_file) + + next_number = max( + last_issue_number, last_pull_number, last_ticket_submitted + ) + 1 + print(f"Next issue for {repo} will be {next_number}.") + + return next_number + + @staticmethod + def getMaxCreatedTicketNumber(repo, ticket_urls): + """ + Given a repository and a list of created GitHub issue URLs, + return the largest ticket number in that repository. + If no tickets match, return 0. + """ + repo_urls = [ + url for url in ticket_urls + if url.startswith( + f'https://github.com/{config.OWNER}/{repo}/issues/' + ) + ] + repo_nums = [int(url.rsplit('/', 1)[1]) for url in repo_urls] + + if not repo_nums: + return 0 + return max(repo_nums) + + @staticmethod + def _requestMaxNumber(repo, kind): + """ + Get the largest GitHub number, for either tickets or pulls. + `kind` is either "issues" or "pulls". + + By default GitHub orders them newest first. + + Issue API docs: + https://docs.github.com/en/rest/reference/issues#list-repository-issues + PR API docs: + https://docs.github.com/en/rest/reference/pulls#list-pull-requests + """ + tickets_or_pulls = requests.get( + url=f'https://api.github.com/repos/{config.OWNER}/{repo}/{kind}', + headers={'accept': 'application/vnd.github.v3+json'}, + auth=(config.OAUTH_USER, config.OAUTH_TOKEN), + params={'state': 'all'}, + ) + try: + last_number = tickets_or_pulls.json()[0]['number'] + except IndexError: + last_number = 0 + except KeyError: + raise KeyError(f"Couldn't get tickets from {config.OWNER}/{repo}.") + + wait_for_rate_reset(tickets_or_pulls) + + return last_number + + def orderTickets(self, tickets, already_created): + """ + Choose an order to create tickets on GitHub so that we maximize + matches of GitHub IDs with Trac IDs. + + Return the ticket objects in order, and their expected GitHub numbers. + """ + repositories = ( + [config.REPOSITORY_MAPPING[k] for k in config.REPOSITORY_MAPPING] + + [config.FALLBACK_REPOSITORY] + ) + all_repo_ordered_tickets = [] + expected_github_numbers = [] + + for repo in unique(repositories): + print('processing repo', repo) + self.next_numbers[repo] = self.requestNextNumber( + repo, already_created) + + tickets_by_id = { + t['t_id']: t + for t in select_tickets_for_repo(tickets, repo)} + ordered_tickets = [] + not_matching = deque() + + # Remember tickets not matching, which we can use to fill gaps. + for t_id in list(tickets_by_id.keys()): + if t_id < self.next_numbers[repo]: + not_matching.append(tickets_by_id[t_id]) + + start = self.next_numbers[repo] + end = start + len(tickets_by_id) + for github_number in range(start, end): + # Check if we have a ticket on this position. + ticket = tickets_by_id.pop(github_number, None) + if ticket: + ordered_tickets.append(ticket) + continue + + try: + # Use non-matching tickets to fill the gap, + # hoping that we eventually reach a matching one. + ticket = not_matching.popleft() + ordered_tickets.append(ticket) + except IndexError: + # Can't fill the gap. Sacrifice new tickets from the end. + t_id = max(tickets_by_id.keys()) + ordered_tickets.append(tickets_by_id.pop(t_id)) + + # Add what's left of the non-matching. + ordered_tickets.extend(not_matching) + + # And add to the all-repo list. + all_repo_ordered_tickets.extend(ordered_tickets) + + # Compute GitHub numbers. + github_end = start + len(ordered_tickets) + expected_github_numbers.extend(range(start, github_end)) + + assert len(all_repo_ordered_tickets) == len(expected_github_numbers) + return all_repo_ordered_tickets, expected_github_numbers + + +def unique(elements): + """ + Discard duplicate items, while preserving order. + """ + seen = set() + uniques = [] + for e in elements: + if e not in seen: + seen.add(e) + uniques.append(e) + + return uniques + + +def select_tickets_for_repo(tickets, repo: str): + """ + From a list of Trac tickets, + select the ones that will be posted to a given GitHub repository. + """ + return [t for t in tickets if get_repo(t['component']) == repo] + + +def get_repo(component): + """ + Given the Trac component, + choose the GitHub repository to create the issue in. + """ + return config.REPOSITORY_MAPPING.get(component, config.FALLBACK_REPOSITORY) + + +def output_stats(tickets, expected_numbers): + """ + Show how many tickets will preserve their Trac ID. + + Generate a file with the expected GitHub numbers. + """ + zipped = list(zip(tickets, expected_numbers)) + with open('tickets_expected.tsv', 'w') as f: + f.write('Trac link\tExpected GitHub link\n') + for t, e in zipped: + _github_link = github_link(t.repo, e) + f.write(f"{t.trac_url()}\t{_github_link}\n") + + match_count = sum(1 for t, e in zipped if t.t_id == e) + print('Expected GitHub numbers to match Trac ID: ' + f'{match_count} out of {len(tickets)}') + print('Check tickets_expected.tsv, and if correct, continue the debugger.') + import pdb + pdb.set_trace() + + +def github_link(repo, expected_number): + """ + Return the expected GitHub URL, + given the repository name and expected GitHub issue number. + """ + return f'https://github.com/{config.OWNER}/{repo}/issues/{expected_number}' + + +class GitHubRequest: + """ + Transform Trac tickets, comments, and their metadata to GitHub format, + and allow submitting that format. + """ + def __init__( + self, owner, repo, trac_id, + title, body, closed, resolution, milestone, labels, assignees, + created_at, updated_at + ): + self.owner = owner + self.repo = repo + self.t_id = trac_id + self.closed = closed + self.resolution = resolution + self.milestone = milestone + self.data = { + 'title': title, + 'body': body, + 'labels': labels, + 'closed': closed, + } + if assignees: + self.data['assignee'] = assignees[0] + + # We used updated_at, because some tickets were repurposed, + # and it was more meaningful as the GitHub created_at. + self.data['created_at'] = updated_at + self.data['updated_at'] = updated_at + if closed: + # We are assuming closure is the last modification. + self.data['closed_at'] = updated_at + + # We get the issue number and ID after submitting. + self.github_number = None + self.github_id = None + + def submit(self, expected_number, all_comments, ticket_mapping): + """ + Execute the POST request to create a GitHub issue. + + In case of an unexpected state, go into debug mode. + + API Docs: + https://gist.github.com/jonmagic/5282384165e0f86ef105#supported-issue-and-comment-fields + Get issue ID after created: + https://docs.github.com/en/rest/reference/issues#get-an-issue + """ + url = f'https://api.github.com/repos/{self.owner}/{self.repo}/import/issues' + data = { + 'issue': self.data, + 'comments': [] + } + + for comment in (c for c in all_comments if c['t_id'] == self.t_id): + data['comments'].append( + self.commentFromTracData( + comment, ticket_mapping=ticket_mapping + ) + ) + + response = protected_request( + url=url, data=data, expected_status_code=202) + + if response: + # Remember the GitHub URL assigned to each ticket. + self.github_import_id = response.json()['id'] + + while response.json()['status'] == 'pending': + # Wait until our issue is created. + print('Waiting for import to finish...') + check_url = f'{url}/{self.github_import_id}' + response = protected_request( + url=check_url, + data=None, + method=requests.get, + expected_status_code=200 + ) + + if response.json()['status'] != 'imported': + response = debug_response(response) + + number = int(response.json()['issue_url'].rsplit('/', 1)[1]) + self.github_number = number + print(f"Import {response.json()['id']} succeeded for #{number}.") + + with open('tickets_created.tsv', 'a') as f: + github_url = ( + f'https://github.com/{self.owner}/{self.repo}/issues/' + f'{self.github_number}' + ) + f.write(f'{self.trac_url()}\t{github_url}\n') + + if number != expected_number: + raise ValueError( + f"Ticket number mismatch: " + f"expected {expected_number}, created {github_url}.\n" + f"Please manually add the comments and project of the issue, " + f"close the issue if needed, " + f"and then restart the script." + ) + response = protected_request( + url=response.json()['issue_url'], + data=None, + method=requests.get, + expected_status_code=200 + ) + self.github_id = response.json()['id'] + print(f"Issue #{self.github_number} has GHID {self.github_id}.") + + def trac_url(self): + """ + Return this issue's Trac URL. + """ + return config.TRAC_TICKET_PREFIX + str(self.t_id) + + def getOrCreateProject(self): + """ + If a project for the given milestone exists, return its column IDs, + otherwise create it and return its column IDs. + Remembers projects in `projects_created.tsv`. + + API docs: + https://docs.github.com/en/rest/reference/projects#create-an-organization-project + https://docs.github.com/en/rest/reference/projects#create-a-project-column + """ + name = self.milestone + if not name: + # Some tickets don't have a milestone. + return + + # Check whether we have already created the project. + with open('projects_created.tsv') as f: + projects_data = [line.split('\t') for line in f] + for line_name, _, todo_id, done_id, rejected_id in projects_data: + if line_name == name: + return todo_id, done_id, rejected_id + + # We have not created the project. Create it. + response = protected_request( + url=f'https://api.github.com/orgs/{config.PROJECT_ORG}/projects', + data={'name': name} + ) + project_id = response.json()['id'] + columns_url = response.json()['columns_url'] + + # Create 3 columns: To Do, Done, and Rejected. + todo_resp = protected_request(columns_url, data={'name': 'To Do'}) + done_resp = protected_request(columns_url, data={'name': 'Done'}) + rejected_resp = protected_request(columns_url, data={ + 'name': 'Rejected', 'body': 'duplicate, invalid, or wontfix'}) + + # Close the project. + protected_request( + url=f'https://api.github.com/projects/{project_id}', + data={'state': 'closed'}, + method=requests.patch, + expected_status_code=200, + ) + + todo_id = todo_resp.json()['id'] + done_id = done_resp.json()['id'] + rejected_id = rejected_resp.json()['id'] + + with open('projects_created.tsv', 'a') as f: + f.write('\t'.join([ + name, + str(project_id), + str(todo_id), + str(done_id), + str(rejected_id), + ]) + '\n') + + return todo_id, done_id, rejected_id + + def submitToProject(self): + """ + Add an issue identified by the GitHub global `id` + to the proper column of the proper project. + + API docs (very bad ones): + https://docs.github.com/en/rest/reference/projects#create-a-project-card + """ + column_ids = self.getOrCreateProject() + if not column_ids: + return + + todo_id, done_id, rejected_id = column_ids + + # Set the column ID according to issue status and resolution. + column_id = todo_id + if self.closed: + column_id = rejected_id + if self.resolution == 'fixed': + column_id = done_id + + url = f'https://api.github.com/projects/columns/{column_id}/cards' + data = { + 'content_id': self.github_id, + 'content_type': 'Issue' + } + protected_request(url, data) + + @classmethod + def fromTracData( + cls, + component, + owner, + summary, + description, + priority, + keywords, + ticket_mapping, + **kwargs): + """ + Create a GitHubRequest from Trac ticket data fields. + """ + return cls( + owner=config.OWNER, + repo=get_repo(component), + trac_id=kwargs['t_id'], + title=summary, + body=get_body( + description, data=kwargs, ticket_mapping=ticket_mapping), + closed=kwargs['status'] == 'closed', + resolution=kwargs['resolution'], + milestone=kwargs['milestone'], + labels=get_labels( + component, + priority, + keywords, + kwargs['status'], + kwargs['resolution'] + ), + assignees=get_assignees(owner), + created_at=isotime(kwargs['time']), + updated_at=isotime(kwargs['changetime']) + ) + + @classmethod + def fromTracDataMultiple(cls, trac_data, ticket_mapping): + """ + Generate GitHubRequests from an iterable of dicts of Trac tickets. + """ + for ticket in trac_data: + yield cls.fromTracData( + **{**ticket, 'ticket_mapping': ticket_mapping} + ) + + @staticmethod + def commentFromTracData(trac_data, ticket_mapping): + """ + Convert Trac comment data to GitHub comment body as JSON. + """ + author, _ = config.USER_MAPPING.get( + trac_data['author'], + (trac_data['author'], 'ignored-email-field') + ) + + body = ( + f"Comment by {author} at {showtime(trac_data['c_time'])}.\n" + f"\n" + f"{parse_body(trac_data['newvalue'], ticket_mapping)}" + ) + + return {'created_at': isotime(trac_data['c_time']), 'body': body} + + +def protected_request( + url, data, method=requests.post, expected_status_code=201): + """ + Send a request if DRY_RUN is not truthy. + + In case of error, start the debugger. + In case of nearing rate limit, sleep until it resets. + """ + + if DRY_RUN: + print(f"Would call {method} on {url} with data:") + pprint.pprint(data) + return + + # Import takes more than 0.2 seconds. Avoid checking excessively. + # There is a risk of GitHub reporting that the import job is done, + # but accessing the issue immediately after returns a 404. + # Also, there may be a risk of secondary rate limit: + # https://docs.github.com/en/rest/guides/best-practices-for-integrators#dealing-with-secondary-rate-limits + time.sleep(0.2) + + response = method( + url=url, + headers={'accept': 'application/vnd.github.golden-comet-preview+json'}, + json=data, + auth=(config.OAUTH_USER, config.OAUTH_TOKEN) + ) + + if response.status_code != expected_status_code: + print(f'Error: {method} request failed!') + debug_response(response) + + wait_for_rate_reset(response) + + return response + + +def debug_response(response): + """ + Debug a response from a server. + """ + print(response) + pprint.pprint(dict(response.headers)) + pprint.pprint(response.json()) + import pdb + pdb.set_trace() + + +def wait_for_rate_reset(response): + """ + Wait for a rate limit reset in case it is near exhaustion. + """ + remaining = int(response.headers['X-RateLimit-Remaining']) + reset_time = int(response.headers['X-RateLimit-Reset']) + if remaining < 50: + to_sleep = int(1 + reset_time - time.time()) + print( + f"Waiting {to_sleep / 60} minutes " + f"(until {reset_time}) for rate limit reset.") + time.sleep(to_sleep) + + +def get_body(description, data, ticket_mapping): + """ + Generate the ticket description body for GitHub. + """ + reporters = get_assignees(data['reporter']) + if reporters: + reporter = reporters[0] + else: + reporter = data['reporter'] + + changed_message = '' + if data['changetime'] != data['time']: + changed_message = f"Last changed on {showtime(data['changetime'])}.\n" + + pr_message = '' + if data['branch']: + pr_message = f"PR at {data['branch']}.\n" + + body = ( + f"trac-{data['t_id']} {data['t_type']} was created by @{reporter}" + f" on {showtime(data['time'])}.\n" + f"{changed_message}" + f"{pr_message}" + "\n" + f"{parse_body(description, ticket_mapping)}" + ) + + return body + + +def get_labels(component, priority, keywords, status, resolution): + """ + Given the Trac component, priority, keywords, and resolution, + return the labels to apply on the GitHub issue. + """ + priority_label = labels_from_priority(priority) + keyword_labels = labels_from_keywords(keywords) + component_labels = labels_from_component(component) + status_labels = labels_from_status_and_resolution(status, resolution) + labels = ( + {priority_label}.union( + keyword_labels).union( + component_labels).union( + status_labels) + ) + return sorted(labels) + + +def get_assignees(owner): + """ + Map the owner to the GitHub account. + """ + try: + owner, _ = config.USER_MAPPING.get(owner) + return [owner] + except TypeError as error: + if 'cannot unpack non-iterable NoneType object' in str(error): + return [] + raise + + +def showtime(unix_usec): + """ + Convert a Trac timestamp to a human-readable date and time. + + Trac stores timestamps as microseconds since Epoch. + """ + timestamp = unix_usec // 1_000_000 + dt = datetime.datetime.utcfromtimestamp(timestamp) + return f"{dt.isoformat(sep=' ')}Z" + + +def isotime(unix_usec): + """ + Convert a Trac timestamp to a ISO 8601 date and time + fit for GitHub timestamps. + + Trac stores timestamps as microseconds since Epoch. + """ + timestamp = unix_usec // 1_000_000 + dt = datetime.datetime.utcfromtimestamp(timestamp) + return f"{dt.isoformat(sep='T')}Z" + + +def labels_from_component(component: str): + """ + Given the Trac component, + choose the labels to apply on the GitHub issue, if any. + """ + if component in config.REPOSITORY_MAPPING: + return [] + + return [component] + + +def labels_from_keywords(keywords: Union[str, None]): + """ + Given the Trac `keywords` string, clean it up and parse it into a list. + """ + if keywords is None: + return set() + + keywords = keywords.replace(',', '') + keywords = keywords.split(' ') + + allowed_keyword_labels = { + 'design', + 'easy', + 'feature', + 'happy-hacking', + 'onboarding', + 'password-management', + 'perf-test', + 'remote-management', + 'salt', + 'scp', + 'security', + 'tech-debt', + 'twisted', + 'ux', + 'Adwords', 'Bing', 'PPC', + 'brink', + 'cdp', + 'cisco', + 'docker', + 'Documentation', + 'email', + 'events', + 'lets-encrypt', + 'macos', 'bind', + 'remote-manager', + 'syslog', + 'backup', + 'vpn', + 'file-server', + 'website', + 'windows', 'testing', + } + typo_fixes = {'tech-dept': 'tech-debt', 'tech-deb': 'tech-debt'} + + keywords = [typo_fixes.get(kw, kw) for kw in keywords if kw] + discarded = [kw for kw in keywords if kw not in allowed_keyword_labels] + + if discarded: + print("Warning: discarded keywords:", discarded) + + return {kw for kw in keywords if kw in allowed_keyword_labels} + + +def labels_from_priority(priority): + """ + Interpret None (missing) priority as Low. + """ + if priority is None: + return 'priority-low' + return 'priority-{}'.format(priority.lower()) + + +def labels_from_status_and_resolution(status, resolution): + """ + The resolution of a closed ticket is used, if there is one. + The resolution can be "fixed", "duplicate", "invalid", or "wontfix". + + If the ticket is not closed, the status is used, + if the status is not "assigned", "new", or "closed". + """ + if status == 'closed' and resolution: + return {resolution} + + if status in ['in_work', 'needs_changes', 'needs_merge', 'needs_review']: + return {status.replace('_', '-')} + + return set() + + +def parse_body(description, ticket_mapping): + """ + Parses text with curly-bracketed or backtick-surrounded monospace. + Converts the curly brackets to backtick brackets. + """ + if not description: + return '' + + def found(index): + """Return true if an index represents a found position in a string.""" + return index != -1 + + def is_first_index(a, b): + """ + Returns true if index a occurs before b, or if b does not exist. + """ + return found(a) and (not found(b) or b > a) + + min_backtick = description.find('`') + min_curly = description.find('{{{') + + if is_first_index(min_curly, min_backtick): + return ( + convert_issue_content(description[:min_curly], ticket_mapping) + + parse_curly(description[min_curly:], ticket_mapping) + ) + + if is_first_index(min_backtick, min_curly): + return ( + convert_issue_content(description[:min_backtick], ticket_mapping) + + parse_backtick(description[min_backtick:], ticket_mapping) + ) + + return convert_issue_content(description, ticket_mapping) + + +def convert_issue_content(text, ticket_mapping): + """ + Convert TracWiki text to GitHub Markdown. + Change the ticket IDs to GitHub URLs according to the mapping. + Ignore included images. + """ + text = text.replace(config.TRAC_TICKET_PREFIX, '#') + ticket_re = '#([0-9]+)' + for match in matches(ticket_re, text): + try: + github_url = ticket_mapping[int(match)] + new_ticket_id = github_url.rsplit('/', 1)[1] + text = sub( + f'#{match}', + f'[#{new_ticket_id}]({github_url})', + text + ) + except KeyError: + # We don't know this ticket. Leave it alone. + print("Warning: unknown ticket: #" + str(match)) + pass + + return convert(text, base_path='') + + +def parse_curly(description, ticket_mapping): + """ + Interpret curly brackets: + + - If a #!rst marker is the first token, + remove the brackets and return the text inside. + + - Otherwise, convert the brackets to triple backticks. + Leave text as is until the closing curly brackets, + which are again converted to triple backticks. + After that, let parse_body continue. + """ + if not description.startswith('{{{'): + raise ValueError('Desc starts with ', description[:10]) + ending = description.find('}}}') + 3 + content = description[3:ending-3] + + if content.strip().startswith('#!rst'): + return ( + content.split('#!rst', 1)[1] + + parse_body(description[ending:], ticket_mapping) + ) + + return ( + '```' + content + '```' + + parse_body(description[ending:], ticket_mapping) + ) + + +def parse_backtick(description, ticket_mapping): + """ + Leave text as is until the closing backtick. + After that, let parse_body continue. + """ + if not description.startswith('`'): + raise ValueError('Desc starts with ', description[:10]) + description = description[1:] + ending = description.find('`') + 1 + return ( + '`' + description[:ending] + + parse_body(description[ending:], ticket_mapping) + ) + + +if __name__ == '__main__': + main() diff --git a/trac2down.py b/trac2down.py new file mode 100644 index 0000000..8472aae --- /dev/null +++ b/trac2down.py @@ -0,0 +1,282 @@ +# -*- coding: utf-8 -*- +# Modified from: +# https://github.com/tracboat/tracboat/blob/master/src/tracboat/trac2down.py + +''' +Copyright © 2013 + Eric van der Vlist + Shigeru KANEMOTO +Copyright © 2017-2018 + Elan Ruusamäe + +See license information at the bottom of this file +''' + +from __future__ import division +import datetime +import re +import os +import codecs +from pprint import pprint +from sys import exit + + +def convert(text, base_path, multilines=False, note_map={}, attachments_path=None, svn2git_revisions={}): + text = re.sub('\r\n', '\n', text) + text = re.sub(r'{{{(.*?)}}}', r'`\1`', text) + text = re.sub(r'(?sm){{{(\n?#![^\n]+)?\n(.*?)\n}}}', r'```\n\2\n```', text) + + text = text.replace('[[TOC]]', '') + text = text.replace('[[BR]]', '\n') + text = text.replace('[[br]]', '\n') + + if multilines: + text = re.sub(r'^\S[^\n]+([^=-_|])\n([^\s`*0-9#=->-_|])', r'\1 \2', text) + + text = re.sub(r'(?m)^======\s+(.*?)(\s+======)*$', r'###### \1', text) + text = re.sub(r'(?m)^=====\s+(.*?)(\s+=====)*$', r'##### \1', text) + text = re.sub(r'(?m)^====\s+(.*?)(\s+====)*$', r'#### \1', text) + text = re.sub(r'(?m)^===\s+(.*?)(\s+===)*$', r'### \1', text) + text = re.sub(r'(?m)^==\s+(.*?)(\s+==)*$', r'## \1', text) + text = re.sub(r'(?m)^=\s+(.*?)(\s+=)*$', r'# \1', text) + + # what these are supposed to do? space + unlimited space? forgotten \* escape? +# text = re.sub(r'^ * ', r'****', text) +# text = re.sub(r'^ * ', r'***', text) +# text = re.sub(r'^ * ', r'**', text) +# text = re.sub(r'^ * ', r'*', text) + text = re.sub(r'^ \d+. ', r'1.', text) + + # https://stackoverflow.com/a/16891418/2314626 + def remove_prefix(text, prefix): + if text.startswith(prefix): + return text[len(prefix):] + return text + + attachment_re = re.compile(r""" + \[\[attachment: + (?P.+? # match filename + (:(?P.+?) # match optional type + (:(?P.+?))? # match optional id (optional with type) + )? + ) + \]\] | + # alternative without brackets + attachment:(?P\S+) + """, re.X) + def attachment_replace(m): + """ + @link https://trac.edgewall.org/wiki/TracLinks#attachment:links + + The link syntax for attachments is as follows: + + attachment:the_file.txt creates a link to the attachment the_file.txt of the current object + attachment:the_file.txt:wiki:MyPage creates a link to the attachment the_file.txt of the MyPage wiki page + attachment:the_file.txt:ticket:753 creates a link to the attachment the_file.txt of the ticket 753 + + see attachments: [[attachment:x.req]] and [[attachment:req.log]]) + -> [req.log](/uploads/issue_142/req.log) + + """ + d = m.groupdict() + d['attachments_path'] = attachments_path + if d['filename2']: + d['filename'] = d['filename2'] + return "[%(filename)s](%(attachments_path)s/%(filename)s)" % d + + source_re = re.compile(r""" + # default one with brackets + \[(?:source|browser): + (?P[^]]+) + \] + + # alternative without brackets + | (?:source|browser):(?P\S+) + """, re.X) + def source_replace(m): + """ + @link https://trac.edgewall.org/wiki/TracLinks#source:links + + - `source:` and `browser:` + * simple paths (/dir/file) + * paths at a given revision (/dir/file@234) + * paths with line number marks (/dir/file@234:10,20-30) + * paths with line number anchor (/dir/file@234#L100) + Marks and anchor can be combined. + The revision must be present when specifying line numbers. + In the few cases where it would be redundant (e.g. for tags), the + revision number itself can be omitted: /tags/v10/file@100-110#L99 + + """ + d = m.groupdict() + if d['path2']: + d['path'] = d['path2'] + path = str(d.get('path')) + path = remove_prefix(path, '/') # remove leading slash, it would not point to source otherwise + path = remove_prefix(path, 'trunk/') # remove branch name, assume default branch + d.update({ + 'git_path' : path, + }) + return "[%(git_path)s](%(git_path)s)" % d + + reply_re = re.compile(r'Replying to \[(?Pcomment|ticket):(?P\d+)\s+(?P[^]]+)\]:') + def reply_replace(m): + """ + Replying to [comment:4 glen]: + Replying to [ticket:41 katlyn]: + """ + + d = m.groupdict() + link_id = int(d['id']) + if d['type'] == 'comment': + # fallback to original id, can be fixed manually after import + note_id = note_map.get(link_id, link_id) + d['link'] = '#note_%d' % note_id + return "Replying to [%(username)s](%(link)s):" % d + elif d['type'] == 'ticket': + d['link'] = '#%d' % link_id + else: + raise Exception("Unsupported type: %s" % d['type']) + + return "Replying to [%(username)s](%(link)s):" % d + + commit_re = re.compile(r""" + \[(?P\d+)(?P/[^/]+)?\] # revision in brackets + | r(?P\d+) # revision with r-prefix + | \[(?P\d+)-(?P\d+)\] # revision range + | changeset:"?(?P\d+)"? + """, re.X) + def commit_replace(m): + """ + (In [35214]) + [36859], [36860] + Changesets [36872-36874] + changeset:38934 + [changeset:"65152"] + """ + d = m.groupdict() + d[0] = str(m.group(0)) + if d['rev1'] and d['rev2']: + d['rev1'] = svn2git_revisions.get(d['rev1']) + d['rev2'] = svn2git_revisions.get(d['rev2']) + + return "[%(rev1)s..%(rev2)s](../compare/%(rev1)s...%(rev2)s)" % d + else: + if d['changeset']: + revision = str(d['changeset']) + else: + if d['revision2']: + d['revision'] = d['revision2'] + revision = str(d['revision']) + d['git_hash'] = svn2git_revisions.get(revision, d[0]) + + return "%(git_hash)s" % d + + image_re = re.compile(r'\[\[Image\((?:(?P(?:source|wiki)):)?(?P[^)]+)\)\]\]') + def image_replace(m): + """ + https://trac.edgewall.org/wiki/WikiFormatting#Images + + [[Image(picture.gif)]] Current page (Ticket, Wiki, Comment) + + [[Image(wiki:WikiFormatting:picture.gif)]] (referring to attachment on another page) + [[Image(ticket:1:picture.gif)]] (file attached to a ticket) + [[Image(htdocs:picture.gif)]] (referring to a file inside the environment htdocs directory) + [[Image(source:/trunk/trac/htdocs/trac_logo_mini.png)]] (a file in repository) + """ + + module = m.group('module') + path = m.group('path') + + d = m.groupdict() + d.update({ + 'base_path': os.path.relpath('/tree/master/', base_path), + 'upload_path' : '/uploads/migrated/%s' % path, + }) + + if module == 'source': + return '![](%(base_path)s/%(path)s)' % d + elif module == 'wiki': + id, file = path.split(':', 2) + d['upload_path'] = '/uploads/migrated/%s' % file + d['file'] = file + return '![%(file)s](%(upload_path)s)' % d + else: + if path.startswith('http'): + # [[Image(http://example.org/s.jpg)]] + return '![%(path)s](%(path)s)' % d + else: + return '![%(path)s](%(upload_path)s)' % d + + a = [] + is_table = False + for line in text.split('\n'): + # not blockquote? + if not line.startswith(' '): + line = re.sub(r'\[(https?://[^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](\1)', line) + line = re.sub(r'\[wiki:([^\s\[\]]+)\s([^\[\]]+)\]', r'[\2](%s/\1)' % os.path.relpath('/wikis/', base_path), line) + line = re.sub(r'\[wiki:([^\s\[\]]+)\]', r'[\1](\1)', line) + line = re.sub(r'\!(([A-Z][a-z0-9]+){2,})', r'\1', line) + + line = source_re.sub(source_replace, line) + line = image_re.sub(image_replace, line) + line = reply_re.sub(reply_replace, line) + line = attachment_re.sub(attachment_replace, line) + line = commit_re.sub(commit_replace, line) + + # bold + line = re.sub(r"'''(.*?)'''", r'**\1**', line) + # italic + line = re.sub(r"''(.*?)''", r'_\1_', line) + # tables? + if line.startswith('||'): + if not is_table: + sep = re.sub(r'[^|]', r'-', line) + line = line + '\n' + sep + is_table = True + line = re.sub(r'\|\|', r'|', line) + else: + is_table = False + else: + is_table = False + a.append(line) + text = '\n'.join(a) + return text + +def save_file(text, name, version, date, author, path): + # We need to create a directory structure matching the hierarchical + # page title, e.g.: + # name == 'Chapter1/Main' + # the output file will be: + # Chapter1/Main.md + components = name.split("/") + name = components[-1] + levels = components[:-1] + if levels: + path = os.path.join(path, *levels) + if not os.path.exists(path): + os.makedirs(path) + filename = os.path.join(path, name + '.md') + with codecs.open(filename, 'w', encoding='utf-8') as fp: + # print >>fp, '' % name + # print >>fp, '' % version + # print >>fp, '' % date + # print >>fp, '' % author + fp.write(text) + +''' +This file is part of . + +This sotfware is free software: you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This sotfware is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this library. If not, see . +''' diff --git a/wiki_trac_rst_convert.py b/wiki_trac_rst_convert.py index ef7dba9..b63d966 100644 --- a/wiki_trac_rst_convert.py +++ b/wiki_trac_rst_convert.py @@ -3,7 +3,11 @@ import sys import os -from config import TRAC_TICKET_PREFIX +try: + import config +except ModuleNotFoundError: + # In the tests, we monkeypatch this module. + config = None def main(): @@ -93,7 +97,7 @@ def _ensure_rst_content_directive(text: str): '.. contents::\n' '\n' + text - ) + ) def _trac_to_github_wiki_links(text: str): @@ -110,11 +114,11 @@ def _trac_to_github_wiki_links(text: str): # TracWiki markup: r'`\[wiki:"?([^ ]+?)"?]`:trac:', r'\[wiki:"?([^ ]+?)"?]', - ] + ] for link_re in link_matchers: - for title in _matches(link_re, text): - text = _sub(link_re, f'`<{_wiki_url(title)}>`_', text) + for title in matches(link_re, text): + text = sub(link_re, f'`<{_wiki_url(title)}>`_', text) return text @@ -128,8 +132,8 @@ def _tracwiki_to_rst_links(text: str): link_text = '[^]]+' link_re = rf'\[({url}) ({link_text})]' - for url, link_text in _matches(link_re, text): - text = _sub(link_re, f'`{link_text} <{url}>`_', text) + for url, link_text in matches(link_re, text): + text = sub(link_re, f'`{link_text} <{url}>`_', text) return text @@ -148,15 +152,15 @@ def _tracwiki_wiki_link_with_text_to_github_links(text: str): link_matchers = [ rf'`\[wiki:({title}) ({link_text})]`:trac:', rf'\[wiki:({title}) ({link_text})]', - ] + ] for link_re in link_matchers: - for title, link_text in _matches(link_re, text): + for title, link_text in matches(link_re, text): if title == link_text: - text = _sub(link_re, f'`<{_wiki_url(title)}>`_', text) + text = sub(link_re, f'`<{_wiki_url(title)}>`_', text) else: replacement = f'`{link_text} <{_wiki_url(title)}>`_' - text = _sub(link_re, replacement, text) + text = sub(link_re, replacement, text) return text @@ -167,12 +171,12 @@ def _trac_ticket_links(text: str): """ ticket_re = ':trac:`#([0-9]+)`' - for ticket in _matches(ticket_re, text): - text = _sub( + for ticket in matches(ticket_re, text): + text = sub( ticket_re, - f'`Trac #{ticket} <{TRAC_TICKET_PREFIX}{ticket}>`_', + f'`Trac #{ticket} <{config.TRAC_TICKET_PREFIX}{ticket}>`_', text - ) + ) return text @@ -193,8 +197,8 @@ def _tracwiki_heading_to_rst_heading(text: str): Content here """ heading_re = '^= (.*) =$' - for match in _matches(heading_re, text): - text = _sub(heading_re, _underline(match, '='), text) + for match in matches(heading_re, text): + text = sub(heading_re, _underline(match, '='), text) return text @@ -215,8 +219,8 @@ def _tracwiki_subheading_to_rst_subheading(text: str): Content here """ heading_re = '^== (.*) ==$' - for match in _matches(heading_re, text): - text = _sub(heading_re, _underline(match, '-'), text) + for match in matches(heading_re, text): + text = sub(heading_re, _underline(match, '-'), text) return text @@ -228,8 +232,8 @@ def _tracwiki_list_dedent(text: str): """ indented_list_item_re = r'^ \* ' - for _ in _matches(indented_list_item_re, text): - text = _sub(indented_list_item_re, '* ', text) + for _ in matches(indented_list_item_re, text): + text = sub(indented_list_item_re, '* ', text) return text @@ -266,14 +270,14 @@ def _underline(text: str, line_symbol: str): return text + "\n" + line_symbol * len(text) -def _matches(pattern: str, text: str): +def matches(pattern: str, text: str): """ Return all matches of a particular `pattern` occurring in `text`. """ return re.findall(pattern, text, flags=re.MULTILINE) -def _sub(regex: str, replacement: str, text: str): +def sub(regex: str, replacement: str, text: str): """ Substitute one occurrence of `regex` in `text` with `replacement`. Return the resulting new text.