From 840e8c4d6b5a2ec9a1f718a6eee979d3562ba09c Mon Sep 17 00:00:00 2001 From: "Hussam.lawen" Date: Sun, 13 Oct 2024 22:41:33 +0300 Subject: [PATCH 1/6] support more types of github ticket url / references --- pr_agent/tools/ticket_pr_compliance_check.py | 34 ++++++++++++-------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/pr_agent/tools/ticket_pr_compliance_check.py b/pr_agent/tools/ticket_pr_compliance_check.py index 5dafecc1a..6881bb46f 100644 --- a/pr_agent/tools/ticket_pr_compliance_check.py +++ b/pr_agent/tools/ticket_pr_compliance_check.py @@ -34,20 +34,26 @@ def extract_ticket_links_from_pr_description(pr_description, repo_path): """ github_tickets = [] try: - # example link to search for: https://github.com/Codium-ai/pr-agent-pro/issues/525 - pattern = r'https://github[^/]+/[^/]+/[^/]+/issues/\d+' # should support also github server (for example 'https://github.company.ai/Codium-ai/pr-agent-pro/issues/525') - - # Find all matches in the text - github_tickets = re.findall(pattern, pr_description) - - # Find all issues referenced like #123 and add them as https://github.com/{repo_path}/issues/{issue_number} - issue_number_pattern = r'#\d+' - issue_numbers = re.findall(issue_number_pattern, pr_description) - for issue_number in issue_numbers: - issue_number = issue_number[1:] # remove # - # check if issue_number is a valid number and len(issue_number) < 5 - if issue_number.isdigit() and len(issue_number) < 5: - github_tickets.append(f'https://github.com/{repo_path}/issues/{issue_number}') + # Pattern to match full GitHub issue URLs and shorthand notations like owner/repo#issue_number or https://github.com/owner/repo/issues/issue_number + pattern = r'(https://github[^/]+/[^/]+/[^/]+/issues/\d+)|(\b(\w+)/(\w+)#(\d+)\b)' + + matches = re.findall(pattern, pr_description) + for match in matches: + if match[0]: # Full URL match + github_tickets.append(match[0]) + else: # Shorthand notation match + owner, repo, issue_number = match[2], match[3], match[4] + github_tickets.append(f'https://github.com/{owner}/{repo}/issues/{issue_number}') + if not github_tickets: + # Search for #123 format within the same repo + issue_number_pattern = r'#\d+' + issue_numbers = re.findall(issue_number_pattern, pr_description) + for issue_number in issue_numbers: + issue_number = issue_number[1:] # remove # + if issue_number.isdigit() and len(issue_number) < 5: + issue_url = f'https://github.com/{repo_path}/issues/{issue_number}' + if issue_url not in github_tickets: + github_tickets.append(issue_url) except Exception as e: get_logger().error(f"Error extracting tickets error= {e}", artifact={"traceback": traceback.format_exc()}) From 22bf7af9bac0a7a9fdf093432f2022fdfd4ec999 Mon Sep 17 00:00:00 2001 From: "Hussam.lawen" Date: Mon, 14 Oct 2024 08:44:01 +0300 Subject: [PATCH 2/6] refactor regex --- pr_agent/tools/ticket_pr_compliance_check.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pr_agent/tools/ticket_pr_compliance_check.py b/pr_agent/tools/ticket_pr_compliance_check.py index 6881bb46f..25f0f2b0f 100644 --- a/pr_agent/tools/ticket_pr_compliance_check.py +++ b/pr_agent/tools/ticket_pr_compliance_check.py @@ -5,6 +5,8 @@ from pr_agent.git_providers import GithubProvider from pr_agent.log import get_logger +# Compile the regex pattern once, outside the function +GITHUB_TICKET_PATTERN = re.compile(r'(https://github[^/]+/[^/]+/[^/]+/issues/\d+)|(\b(\w+)/(\w+)#(\d+)\b)') def find_jira_tickets(text): # Regular expression patterns for JIRA tickets @@ -35,9 +37,8 @@ def extract_ticket_links_from_pr_description(pr_description, repo_path): github_tickets = [] try: # Pattern to match full GitHub issue URLs and shorthand notations like owner/repo#issue_number or https://github.com/owner/repo/issues/issue_number - pattern = r'(https://github[^/]+/[^/]+/[^/]+/issues/\d+)|(\b(\w+)/(\w+)#(\d+)\b)' - - matches = re.findall(pattern, pr_description) + matches = GITHUB_TICKET_PATTERN.findall(pr_description) + for match in matches: if match[0]: # Full URL match github_tickets.append(match[0]) From eb804d0b342cfc5cf4149945950f8def1613ae86 Mon Sep 17 00:00:00 2001 From: "Hussam.lawen" Date: Mon, 14 Oct 2024 10:08:36 +0300 Subject: [PATCH 3/6] clean --- pr_agent/tools/ticket_pr_compliance_check.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pr_agent/tools/ticket_pr_compliance_check.py b/pr_agent/tools/ticket_pr_compliance_check.py index 25f0f2b0f..491cd6706 100644 --- a/pr_agent/tools/ticket_pr_compliance_check.py +++ b/pr_agent/tools/ticket_pr_compliance_check.py @@ -8,6 +8,7 @@ # Compile the regex pattern once, outside the function GITHUB_TICKET_PATTERN = re.compile(r'(https://github[^/]+/[^/]+/[^/]+/issues/\d+)|(\b(\w+)/(\w+)#(\d+)\b)') + def find_jira_tickets(text): # Regular expression patterns for JIRA tickets patterns = [ From 3a521226773c9af1bcd139ca15b162537cd6f313 Mon Sep 17 00:00:00 2001 From: "Hussam.lawen" Date: Mon, 14 Oct 2024 10:30:07 +0300 Subject: [PATCH 4/6] improve and combine regex --- pr_agent/tools/ticket_pr_compliance_check.py | 30 +++++++++----------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/pr_agent/tools/ticket_pr_compliance_check.py b/pr_agent/tools/ticket_pr_compliance_check.py index 491cd6706..e9d89e7d9 100644 --- a/pr_agent/tools/ticket_pr_compliance_check.py +++ b/pr_agent/tools/ticket_pr_compliance_check.py @@ -6,7 +6,9 @@ from pr_agent.log import get_logger # Compile the regex pattern once, outside the function -GITHUB_TICKET_PATTERN = re.compile(r'(https://github[^/]+/[^/]+/[^/]+/issues/\d+)|(\b(\w+)/(\w+)#(\d+)\b)') +GITHUB_TICKET_PATTERN = re.compile( + r'(https://github[^/]+/[^/]+/[^/]+/issues/\d+)|(\b(\w+)/(\w+)#(\d+)\b)|(#\d+)' +) def find_jira_tickets(text): @@ -35,32 +37,26 @@ def extract_ticket_links_from_pr_description(pr_description, repo_path): """ Extract all ticket links from PR description """ - github_tickets = [] + github_tickets = set() try: - # Pattern to match full GitHub issue URLs and shorthand notations like owner/repo#issue_number or https://github.com/owner/repo/issues/issue_number + # Use the updated pattern to find matches matches = GITHUB_TICKET_PATTERN.findall(pr_description) - + for match in matches: if match[0]: # Full URL match - github_tickets.append(match[0]) - else: # Shorthand notation match + github_tickets.add(match[0]) + elif match[1]: # Shorthand notation match: owner/repo#issue_number owner, repo, issue_number = match[2], match[3], match[4] - github_tickets.append(f'https://github.com/{owner}/{repo}/issues/{issue_number}') - if not github_tickets: - # Search for #123 format within the same repo - issue_number_pattern = r'#\d+' - issue_numbers = re.findall(issue_number_pattern, pr_description) - for issue_number in issue_numbers: - issue_number = issue_number[1:] # remove # + github_tickets.add(f'https://github.com/{owner}/{repo}/issues/{issue_number}') + else: # #123 format + issue_number = match[5][1:] # remove # if issue_number.isdigit() and len(issue_number) < 5: - issue_url = f'https://github.com/{repo_path}/issues/{issue_number}' - if issue_url not in github_tickets: - github_tickets.append(issue_url) + github_tickets.add(f'https://github.com/{repo_path}/issues/{issue_number}') except Exception as e: get_logger().error(f"Error extracting tickets error= {e}", artifact={"traceback": traceback.format_exc()}) - return github_tickets + return list(github_tickets) async def extract_tickets(git_provider): From d45a892fd222335948d8791dd1a138fb2f558033 Mon Sep 17 00:00:00 2001 From: Hussam Lawen Date: Mon, 14 Oct 2024 10:59:58 +0300 Subject: [PATCH 5/6] Update pr_agent/tools/ticket_pr_compliance_check.py Co-authored-by: codiumai-pr-agent-pro[bot] <151058649+codiumai-pr-agent-pro[bot]@users.noreply.github.com> --- pr_agent/tools/ticket_pr_compliance_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pr_agent/tools/ticket_pr_compliance_check.py b/pr_agent/tools/ticket_pr_compliance_check.py index e9d89e7d9..58d8bd1ed 100644 --- a/pr_agent/tools/ticket_pr_compliance_check.py +++ b/pr_agent/tools/ticket_pr_compliance_check.py @@ -50,7 +50,7 @@ def extract_ticket_links_from_pr_description(pr_description, repo_path): github_tickets.add(f'https://github.com/{owner}/{repo}/issues/{issue_number}') else: # #123 format issue_number = match[5][1:] # remove # - if issue_number.isdigit() and len(issue_number) < 5: + if issue_number.isdigit() and len(issue_number) < 5 and repo_path: github_tickets.add(f'https://github.com/{repo_path}/issues/{issue_number}') except Exception as e: get_logger().error(f"Error extracting tickets error= {e}", From 0946da381030e2e2b94485b867b799928acc3226 Mon Sep 17 00:00:00 2001 From: "Hussam.lawen" Date: Mon, 14 Oct 2024 14:31:34 +0300 Subject: [PATCH 6/6] support github enterprise --- pr_agent/tools/ticket_pr_compliance_check.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pr_agent/tools/ticket_pr_compliance_check.py b/pr_agent/tools/ticket_pr_compliance_check.py index 58d8bd1ed..dc760ed13 100644 --- a/pr_agent/tools/ticket_pr_compliance_check.py +++ b/pr_agent/tools/ticket_pr_compliance_check.py @@ -10,7 +10,6 @@ r'(https://github[^/]+/[^/]+/[^/]+/issues/\d+)|(\b(\w+)/(\w+)#(\d+)\b)|(#\d+)' ) - def find_jira_tickets(text): # Regular expression patterns for JIRA tickets patterns = [ @@ -33,7 +32,7 @@ def find_jira_tickets(text): return list(tickets) -def extract_ticket_links_from_pr_description(pr_description, repo_path): +def extract_ticket_links_from_pr_description(pr_description, repo_path, base_url_html='https://github.com'): """ Extract all ticket links from PR description """ @@ -47,11 +46,11 @@ def extract_ticket_links_from_pr_description(pr_description, repo_path): github_tickets.add(match[0]) elif match[1]: # Shorthand notation match: owner/repo#issue_number owner, repo, issue_number = match[2], match[3], match[4] - github_tickets.add(f'https://github.com/{owner}/{repo}/issues/{issue_number}') + github_tickets.add(f'{base_url_html.strip("/")}/{owner}/{repo}/issues/{issue_number}') else: # #123 format issue_number = match[5][1:] # remove # if issue_number.isdigit() and len(issue_number) < 5 and repo_path: - github_tickets.add(f'https://github.com/{repo_path}/issues/{issue_number}') + github_tickets.add(f'{base_url_html.strip("/")}/{repo_path}/issues/{issue_number}') except Exception as e: get_logger().error(f"Error extracting tickets error= {e}", artifact={"traceback": traceback.format_exc()}) @@ -64,7 +63,7 @@ async def extract_tickets(git_provider): try: if isinstance(git_provider, GithubProvider): user_description = git_provider.get_user_description() - tickets = extract_ticket_links_from_pr_description(user_description, git_provider.repo) + tickets = extract_ticket_links_from_pr_description(user_description, git_provider.repo, git_provider.base_url_html) tickets_content = [] if tickets: for ticket in tickets: