From 0f59f65af397c5facfd1214829714220837ec153 Mon Sep 17 00:00:00 2001 From: Cathy Teng Date: Tue, 16 Jul 2024 10:39:24 -0700 Subject: [PATCH 1/3] pull request file issues API --- .../organization_pull_request_file_issues.py | 95 +++ src/sentry/api/urls.py | 8 + .../integrations/github/open_pr_comment.py | 640 ++++++++++++++++++ ...t_organization_pull_request_file_issues.py | 100 +++ 4 files changed, 843 insertions(+) create mode 100644 src/sentry/api/endpoints/organization_pull_request_file_issues.py create mode 100644 src/sentry/tasks/integrations/github/open_pr_comment.py create mode 100644 tests/sentry/api/endpoints/test_organization_pull_request_file_issues.py diff --git a/src/sentry/api/endpoints/organization_pull_request_file_issues.py b/src/sentry/api/endpoints/organization_pull_request_file_issues.py new file mode 100644 index 00000000000000..27225c64f646b8 --- /dev/null +++ b/src/sentry/api/endpoints/organization_pull_request_file_issues.py @@ -0,0 +1,95 @@ +from __future__ import annotations + +from rest_framework import serializers, status +from rest_framework.request import Request +from rest_framework.response import Response + +from sentry.api.api_owners import ApiOwner +from sentry.api.api_publish_status import ApiPublishStatus +from sentry.api.base import region_silo_endpoint +from sentry.api.bases.organization import OrganizationEndpoint +from sentry.models.group import Group +from sentry.models.organization import Organization +from sentry.snuba.referrer import Referrer +from sentry.tasks.integrations.github.language_parsers import PATCH_PARSERS +from sentry.tasks.integrations.github.open_pr_comment import ( + get_projects_and_filenames_from_source_file, + get_top_5_issues_by_count_for_file, +) + + +class PullRequestFileSerializer(serializers.Serializer): + filename = serializers.CharField(required=True) + repo = serializers.CharField(required=True) + patch = serializers.CharField(required=True) + + def validate_filename(self, value): + if not value: + raise serializers.ValidationError("Filename is required") + + file_extension = value.split(".")[-1] + language_parser = PATCH_PARSERS.get(file_extension, None) + if not language_parser: + raise serializers.ValidationError("Unsupported file type") + + return value + + +@region_silo_endpoint +class OrganizationPullRequestFilesIssuesEndpoint(OrganizationEndpoint): + owner = ApiOwner.ECOSYSTEM + publish_status = { + "GET": ApiPublishStatus.EXPERIMENTAL, + } + + def post(self, request: Request, organization: Organization) -> Response: + serializer = PullRequestFileSerializer(data=request.data) + if not serializer.is_valid(): + return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST) + + filename = serializer.validated_data["filename"] + repo_name = serializer.validated_data["repo"] + patch = serializer.validated_data["patch"] + + projects, sentry_filenames = get_projects_and_filenames_from_source_file( + org_id=organization.id, repo_name=repo_name, pr_filename=filename + ) + + if not len(projects) or not len(sentry_filenames): + return Response([]) + + file_extension = filename.split(".")[-1] + language_parser = PATCH_PARSERS[file_extension] + + function_names = language_parser.extract_functions_from_patch(patch) + + if not len(function_names): + return Response([]) + + top_issues = get_top_5_issues_by_count_for_file( + list(projects), list(sentry_filenames), list(function_names) + ) + + group_id_to_info = {} + for issue in top_issues: + group_id = issue["group_id"] + group_id_to_info[group_id] = dict(filter(lambda k: k[0] != "group_id", issue.items())) + + issues = Group.objects.filter(id__in=list(group_id_to_info.keys())).all() + + pr_file_issues = [ + { + "title": issue.title, + "culprit": issue.culprit, + "url": issue.get_absolute_url(), + "affected_users": issue.count_users_seen( + referrer=Referrer.TAGSTORE_GET_GROUPS_USER_COUNTS_OPEN_PR_COMMENT.value + ), + "event_count": group_id_to_info[issue.id]["event_count"], + "function_name": group_id_to_info[issue.id]["function_name"], + } + for issue in issues + ] + pr_file_issues.sort(key=lambda k: k.get("event_count", 0), reverse=True) + + return Response(pr_file_issues) diff --git a/src/sentry/api/urls.py b/src/sentry/api/urls.py index 8f5ed98b9e0be5..1bebab231a320f 100644 --- a/src/sentry/api/urls.py +++ b/src/sentry/api/urls.py @@ -21,6 +21,9 @@ from sentry.api.endpoints.organization_projects_experiment import ( OrganizationProjectsExperimentEndpoint, ) +from sentry.api.endpoints.organization_pull_request_file_issues import ( + OrganizationPullRequestFilesIssuesEndpoint, +) from sentry.api.endpoints.organization_spans_aggregation import OrganizationSpansAggregationEndpoint from sentry.api.endpoints.organization_stats_summary import OrganizationStatsSummaryEndpoint from sentry.api.endpoints.organization_unsubscribe import ( @@ -1861,6 +1864,11 @@ def create_group_urls(name_prefix: str) -> list[URLPattern | URLResolver]: OrganizationPluginsConfigsEndpoint.as_view(), name="sentry-api-0-organization-plugins-configs", ), + re_path( + r"^(?P[^\/]+)/pr-file-issues/$", + OrganizationPullRequestFilesIssuesEndpoint.as_view(), + name="sentry-api-0-organization-pr-file-issues", + ), re_path( r"^(?P[^\/]+)/releases/$", OrganizationReleasesEndpoint.as_view(), diff --git a/src/sentry/tasks/integrations/github/open_pr_comment.py b/src/sentry/tasks/integrations/github/open_pr_comment.py new file mode 100644 index 00000000000000..043a436215c346 --- /dev/null +++ b/src/sentry/tasks/integrations/github/open_pr_comment.py @@ -0,0 +1,640 @@ +from __future__ import annotations + +import itertools +import logging +from datetime import UTC, datetime, timedelta +from typing import Any + +from django.db.models import Value +from django.db.models.functions import StrIndex +from snuba_sdk import ( + BooleanCondition, + BooleanOp, + Column, + Condition, + Direction, + Entity, + Function, + Op, + OrderBy, + Query, +) +from snuba_sdk import Request as SnubaRequest + +from sentry.constants import EXTENSION_LANGUAGE_MAP +from sentry.integrations.github.client import GitHubApiClient +from sentry.integrations.models.repository_project_path_config import RepositoryProjectPathConfig +from sentry.integrations.services.integration import integration_service +from sentry.models.group import Group, GroupStatus +from sentry.models.organization import Organization +from sentry.models.project import Project +from sentry.models.pullrequest import CommentType, PullRequest +from sentry.models.repository import Repository +from sentry.shared_integrations.exceptions import ApiError +from sentry.silo.base import SiloMode +from sentry.snuba.dataset import Dataset +from sentry.snuba.referrer import Referrer +from sentry.tasks.base import instrumented_task +from sentry.tasks.integrations.github.constants import ( + ISSUE_LOCKED_ERROR_MESSAGE, + RATE_LIMITED_MESSAGE, + STACKFRAME_COUNT, +) +from sentry.tasks.integrations.github.language_parsers import PATCH_PARSERS +from sentry.tasks.integrations.github.pr_comment import format_comment_url +from sentry.tasks.integrations.github.utils import ( + GithubAPIErrorType, + PullRequestFile, + PullRequestIssue, + create_or_update_comment, +) +from sentry.templatetags.sentry_helpers import small_count +from sentry.types.referrer_ids import GITHUB_OPEN_PR_BOT_REFERRER +from sentry.utils import metrics +from sentry.utils.snuba import raw_snql_query + +logger = logging.getLogger(__name__) + +OPEN_PR_METRICS_BASE = "github_open_pr_comment.{key}" + +# Caps the number of files that can be modified in a PR to leave a comment +OPEN_PR_MAX_FILES_CHANGED = 7 +# Caps the number of lines that can be modified in a PR to leave a comment +OPEN_PR_MAX_LINES_CHANGED = 500 + +OPEN_PR_COMMENT_BODY_TEMPLATE = """\ +## 🔍 Existing Issues For Review +Your pull request is modifying functions with the following pre-existing issues: + +{issue_tables} +--- + +Did you find this useful? React with a 👍 or 👎""" + +OPEN_PR_ISSUE_TABLE_TEMPLATE = """\ +📄 File: **{filename}** + +| Function | Unhandled Issue | +| :------- | :----- | +{issue_rows}""" + +OPEN_PR_ISSUE_TABLE_TOGGLE_TEMPLATE = """\ +
+📄 File: {filename} (Click to Expand) + +| Function | Unhandled Issue | +| :------- | :----- | +{issue_rows} +
""" + +OPEN_PR_ISSUE_DESCRIPTION_LENGTH = 52 + +MAX_RECENT_ISSUES = 5000 + + +def format_open_pr_comment(issue_tables: list[str]) -> str: + return OPEN_PR_COMMENT_BODY_TEMPLATE.format(issue_tables="\n".join(issue_tables)) + + +def format_open_pr_comment_subtitle(title_length, subtitle): + # the title length + " " + subtitle should be <= 52 + subtitle_length = OPEN_PR_ISSUE_DESCRIPTION_LENGTH - title_length - 1 + return subtitle[: subtitle_length - 3] + "..." if len(subtitle) > subtitle_length else subtitle + + +# for a single file, create a table +def format_issue_table( + diff_filename: str, issues: list[PullRequestIssue], patch_parsers: dict[str, Any], toggle: bool +) -> str: + language_parser = patch_parsers.get(diff_filename.split(".")[-1], None) + + if not language_parser: + return "" + + issue_row_template = language_parser.issue_row_template + + issue_rows = "\n".join( + [ + issue_row_template.format( + title=issue.title, + subtitle=format_open_pr_comment_subtitle(len(issue.title), issue.subtitle), + url=format_comment_url(issue.url, GITHUB_OPEN_PR_BOT_REFERRER), + event_count=small_count(issue.event_count), + function_name=issue.function_name, + affected_users=small_count(issue.affected_users), + ) + for issue in issues + ] + ) + + if toggle: + return OPEN_PR_ISSUE_TABLE_TOGGLE_TEMPLATE.format( + filename=diff_filename, issue_rows=issue_rows + ) + + return OPEN_PR_ISSUE_TABLE_TEMPLATE.format(filename=diff_filename, issue_rows=issue_rows) + + +# for a single file, get the contents +def get_issue_table_contents(issue_list: list[dict[str, Any]]) -> list[PullRequestIssue]: + group_id_to_info = {} + for issue in issue_list: + group_id = issue["group_id"] + group_id_to_info[group_id] = dict(filter(lambda k: k[0] != "group_id", issue.items())) + + issues = Group.objects.filter(id__in=list(group_id_to_info.keys())).all() + + pull_request_issues = [ + PullRequestIssue( + title=issue.title, + subtitle=issue.culprit, + url=issue.get_absolute_url(), + affected_users=issue.count_users_seen( + referrer=Referrer.TAGSTORE_GET_GROUPS_USER_COUNTS_OPEN_PR_COMMENT.value + ), + event_count=group_id_to_info[issue.id]["event_count"], + function_name=group_id_to_info[issue.id]["function_name"], + ) + for issue in issues + ] + pull_request_issues.sort(key=lambda k: k.event_count or 0, reverse=True) + + return pull_request_issues + + +# TODO(cathy): Change the client typing to allow for multiple SCM Integrations +def safe_for_comment( + gh_client: GitHubApiClient, repository: Repository, pull_request: PullRequest +) -> list[dict[str, str]]: + logger.info("github.open_pr_comment.check_safe_for_comment") + try: + pr_files = gh_client.get_pullrequest_files( + repo=repository.name, pull_number=pull_request.key + ) + except ApiError as e: + logger.info("github.open_pr_comment.api_error") + if e.json and RATE_LIMITED_MESSAGE in e.json.get("message", ""): + metrics.incr( + OPEN_PR_METRICS_BASE.format(key="api_error"), + tags={"type": GithubAPIErrorType.RATE_LIMITED.value, "code": e.code}, + ) + elif e.code == 404: + metrics.incr( + OPEN_PR_METRICS_BASE.format(key="api_error"), + tags={"type": GithubAPIErrorType.MISSING_PULL_REQUEST.value, "code": e.code}, + ) + else: + metrics.incr( + OPEN_PR_METRICS_BASE.format(key="api_error"), + tags={"type": GithubAPIErrorType.UNKNOWN.value, "code": e.code}, + ) + logger.exception("github.open_pr_comment.unknown_api_error", extra={"error": str(e)}) + return [] + + changed_file_count = 0 + changed_lines_count = 0 + filtered_pr_files = [] + + patch_parsers = PATCH_PARSERS + # NOTE: if we are testing beta patch parsers, add check here + + for file in pr_files: + filename = file["filename"] + # we only count the file if it's modified and if the file extension is in the list of supported file extensions + # we cannot look at deleted or newly added files because we cannot extract functions from the diffs + if file["status"] != "modified" or filename.split(".")[-1] not in patch_parsers: + continue + + changed_file_count += 1 + changed_lines_count += file["changes"] + filtered_pr_files.append(file) + + if changed_file_count > OPEN_PR_MAX_FILES_CHANGED: + metrics.incr( + OPEN_PR_METRICS_BASE.format(key="rejected_comment"), + tags={"reason": "too_many_files"}, + ) + return [] + if changed_lines_count > OPEN_PR_MAX_LINES_CHANGED: + metrics.incr( + OPEN_PR_METRICS_BASE.format(key="rejected_comment"), + tags={"reason": "too_many_lines"}, + ) + return [] + + return filtered_pr_files + + +def get_pr_files(pr_files: list[dict[str, str]]) -> list[PullRequestFile]: + # new files will not have sentry issues associated with them + # only fetch Python files + pullrequest_files = [ + PullRequestFile(filename=file["filename"], patch=file["patch"]) + for file in pr_files + if "patch" in file + ] + + logger.info("github.open_pr_comment.pr_filenames", extra={"count": len(pullrequest_files)}) + + return pullrequest_files + + +def get_projects_and_filenames_from_source_file( + org_id: int, + pr_filename: str, + repo_id: int | None = None, + repo_name: str | None = None, +) -> tuple[set[Project], set[str]]: + # fetch the code mappings in which the source_root is a substring at the start of pr_filename + if not repo_id and not repo_name: + raise ValueError("repository ID or repository name are required") + + if repo_id: + code_mappings = ( + RepositoryProjectPathConfig.objects.filter( + organization_id=org_id, + repository_id=repo_id, + ) + .annotate(substring_match=StrIndex(Value(pr_filename), "source_root")) + .filter(substring_match=1) + ) + else: + code_mappings = ( + RepositoryProjectPathConfig.objects.filter( + organization_id=org_id, + repository__name=repo_name, + ) + .annotate(substring_match=StrIndex(Value(pr_filename), "source_root")) + .filter(substring_match=1) + ) + + project_list: set[Project] = set() + sentry_filenames = set() + + if len(code_mappings): + for code_mapping in code_mappings: + project_list.add(code_mapping.project) + sentry_filenames.add( + pr_filename.replace(code_mapping.source_root, code_mapping.stack_root, 1) + ) + return project_list, sentry_filenames + + +def get_top_5_issues_by_count_for_file( + projects: list[Project], sentry_filenames: list[str], function_names: list[str] +) -> list[dict[str, Any]]: + """ + Given a list of projects, Github filenames reverse-codemapped into filenames in Sentry, + and function names representing the list of functions changed in a PR file, return a + sublist of the top 5 recent unhandled issues ordered by event count. + """ + if not len(projects): + return [] + + patch_parsers = PATCH_PARSERS + # NOTE: if we are testing beta patch parsers, add check here + + # fetches the appropriate parser for formatting the snuba query given the file extension + # the extension is never replaced in reverse codemapping + language_parser = patch_parsers.get(sentry_filenames[0].split(".")[-1], None) + + if not language_parser: + return [] + + group_ids = list( + Group.objects.filter( + first_seen__gte=datetime.now(UTC) - timedelta(days=90), + last_seen__gte=datetime.now(UTC) - timedelta(days=14), + status=GroupStatus.UNRESOLVED, + project__in=projects, + ) + .order_by("-times_seen") + .values_list("id", flat=True) + )[:MAX_RECENT_ISSUES] + project_ids = [p.id for p in projects] + + multi_if = language_parser.generate_multi_if(function_names) + + # fetch the count of events for each group_id + subquery = ( + Query(Entity("events")) + .set_select( + [ + Column("title"), + Column("culprit"), + Column("group_id"), + Function("count", [], "event_count"), + Function( + "multiIf", + multi_if, + "function_name", + ), + ] + ) + .set_groupby( + [ + Column("title"), + Column("culprit"), + Column("group_id"), + Column("exception_frames.function"), + ] + ) + .set_where( + [ + Condition(Column("project_id"), Op.IN, project_ids), + Condition(Column("group_id"), Op.IN, group_ids), + Condition(Column("timestamp"), Op.GTE, datetime.now() - timedelta(days=14)), + Condition(Column("timestamp"), Op.LT, datetime.now()), + # NOTE: ideally this would follow suspect commit logic + BooleanCondition( + BooleanOp.OR, + [ + BooleanCondition( + BooleanOp.AND, + [ + Condition( + Function( + "arrayElement", + (Column("exception_frames.filename"), i), + ), + Op.IN, + sentry_filenames, + ), + language_parser.generate_function_name_conditions( + function_names, i + ), + ], + ) + for i in range(-STACKFRAME_COUNT, 0) # first n frames + ], + ), + Condition(Function("notHandled", []), Op.EQ, 1), + ] + ) + .set_orderby([OrderBy(Column("event_count"), Direction.DESC)]) + ) + + # filter on the subquery to squash group_ids with the same title and culprit + # return the group_id with the greatest count of events + query = ( + Query(subquery) + .set_select( + [ + Column("function_name"), + Function( + "arrayElement", + (Function("groupArray", [Column("group_id")]), 1), + "group_id", + ), + Function( + "arrayElement", + (Function("groupArray", [Column("event_count")]), 1), + "event_count", + ), + ] + ) + .set_groupby( + [ + Column("title"), + Column("culprit"), + Column("function_name"), + ] + ) + .set_orderby([OrderBy(Column("event_count"), Direction.DESC)]) + .set_limit(5) + ) + + request = SnubaRequest( + dataset=Dataset.Events.value, + app_id="default", + tenant_ids={"organization_id": projects[0].organization_id}, + query=query, + ) + + try: + return raw_snql_query(request, referrer=Referrer.GITHUB_PR_COMMENT_BOT.value)["data"] + except Exception: + logger.exception( + "github.open_pr_comment.snuba_query_error", extra={"query": request.to_dict()["query"]} + ) + return [] + + +@instrumented_task( + name="sentry.tasks.integrations.open_pr_comment_workflow", silo_mode=SiloMode.REGION +) +def open_pr_comment_workflow(pr_id: int) -> None: + logger.info("github.open_pr_comment.start_workflow") + + # CHECKS + # check PR exists to get PR key + try: + pull_request = PullRequest.objects.get(id=pr_id) + except PullRequest.DoesNotExist: + logger.info("github.open_pr_comment.pr_missing") + metrics.incr(OPEN_PR_METRICS_BASE.format(key="error"), tags={"type": "missing_pr"}) + return + + # check org option + org_id = pull_request.organization_id + try: + Organization.objects.get_from_cache(id=org_id) + except Organization.DoesNotExist: + logger.exception("github.open_pr_comment.org_missing") + metrics.incr(OPEN_PR_METRICS_BASE.format(key="error"), tags={"type": "missing_org"}) + return + + # check PR repo exists to get repo name + try: + repo = Repository.objects.get(id=pull_request.repository_id) + except Repository.DoesNotExist: + logger.info("github.open_pr_comment.repo_missing", extra={"organization_id": org_id}) + metrics.incr(OPEN_PR_METRICS_BASE.format(key="error"), tags={"type": "missing_repo"}) + return + + # check integration exists to hit Github API with client + integration = integration_service.get_integration(integration_id=repo.integration_id) + if not integration: + logger.info("github.open_pr_comment.integration_missing", extra={"organization_id": org_id}) + metrics.incr(OPEN_PR_METRICS_BASE.format(key="error"), tags={"type": "missing_integration"}) + return + + installation = integration.get_installation(organization_id=org_id) + + client = installation.get_client() + + # CREATING THE COMMENT + + # fetch the files in the PR and determine if it is safe to comment + pr_files = safe_for_comment(gh_client=client, repository=repo, pull_request=pull_request) + + if len(pr_files) == 0: + logger.info( + "github.open_pr_comment.not_safe_for_comment", extra={"file_count": len(pr_files)} + ) + metrics.incr( + OPEN_PR_METRICS_BASE.format(key="error"), + tags={"type": "unsafe_for_comment"}, + ) + return + + pullrequest_files = get_pr_files(pr_files) + + issue_table_contents = {} + top_issues_per_file = [] + + patch_parsers = PATCH_PARSERS + # NOTE: if we are testing beta patch parsers, add check here + + file_extensions = set() + # fetch issues related to the files + for file in pullrequest_files: + projects, sentry_filenames = get_projects_and_filenames_from_source_file( + org_id=org_id, repo_id=repo.id, pr_filename=file.filename + ) + if not len(projects) or not len(sentry_filenames): + continue + + file_extension = file.filename.split(".")[-1] + logger.info( + "github.open_pr_comment.file_extension", + extra={ + "organization_id": org_id, + "repository_id": repo.id, + "extension": file_extension, + }, + ) + + language_parser = patch_parsers.get(file.filename.split(".")[-1], None) + if not language_parser: + logger.info( + "github.open_pr_comment.missing_parser", extra={"extension": file_extension} + ) + metrics.incr( + OPEN_PR_METRICS_BASE.format(key="missing_parser"), + tags={"extension": file_extension}, + ) + continue + + function_names = language_parser.extract_functions_from_patch(file.patch) + + if file_extension in ["js", "jsx"]: + logger.info( + "github.open_pr_comment.javascript", + extra={ + "organization_id": org_id, + "repository_id": repo.id, + "extension": file_extension, + "has_function_names": bool(function_names), + }, + ) + + if file_extension == ["php"]: + logger.info( + "github.open_pr_comment.php", + extra={ + "organization_id": org_id, + "repository_id": repo.id, + "extension": file_extension, + "has_function_names": bool(function_names), + }, + ) + + if file_extension == ["rb"]: + logger.info( + "github.open_pr_comment.ruby", + extra={ + "organization_id": org_id, + "repository_id": repo.id, + "extension": file_extension, + "has_function_names": bool(function_names), + }, + ) + + if not len(function_names): + continue + + top_issues = get_top_5_issues_by_count_for_file( + list(projects), list(sentry_filenames), list(function_names) + ) + if not len(top_issues): + continue + + top_issues_per_file.append(top_issues) + file_extensions.add(file_extension) + + issue_table_contents[file.filename] = get_issue_table_contents(top_issues) + + if not len(issue_table_contents): + logger.info("github.open_pr_comment.no_issues") + # don't leave a comment if no issues for files in PR + metrics.incr(OPEN_PR_METRICS_BASE.format(key="no_issues")) + return + + # format issues per file into comment + issue_tables = [] + first_table = True + for file in pullrequest_files: + pr_filename = file.filename + issue_table_content = issue_table_contents.get(pr_filename, None) + + if issue_table_content is None: + continue + + if first_table: + issue_table = format_issue_table( + pr_filename, issue_table_content, patch_parsers, toggle=False + ) + first_table = False + else: + # toggle all tables but the first one + issue_table = format_issue_table( + pr_filename, issue_table_content, patch_parsers, toggle=True + ) + + issue_tables.append(issue_table) + + comment_body = format_open_pr_comment(issue_tables) + + # list all issues in the comment + issue_list: list[dict[str, Any]] = list(itertools.chain.from_iterable(top_issues_per_file)) + issue_id_list: list[int] = [issue["group_id"] for issue in issue_list] + + # pick one language from the list of languages in the PR for analytics + languages = [ + EXTENSION_LANGUAGE_MAP[extension] + for extension in file_extensions + if extension in EXTENSION_LANGUAGE_MAP + ] + language = languages[0] if len(languages) else "not found" + + try: + create_or_update_comment( + client=client, + repo=repo, + pr_key=pull_request.key, + comment_body=comment_body, + pullrequest_id=pull_request.id, + issue_list=issue_id_list, + comment_type=CommentType.OPEN_PR, + metrics_base=OPEN_PR_METRICS_BASE, + language=language, + ) + except ApiError as e: + if e.json: + if ISSUE_LOCKED_ERROR_MESSAGE in e.json.get("message", ""): + metrics.incr( + OPEN_PR_METRICS_BASE.format(key="error"), + tags={"type": "issue_locked_error"}, + ) + return + + elif RATE_LIMITED_MESSAGE in e.json.get("message", ""): + metrics.incr( + OPEN_PR_METRICS_BASE.format(key="error"), + tags={"type": "rate_limited_error"}, + ) + return + + metrics.incr(OPEN_PR_METRICS_BASE.format(key="error"), tags={"type": "api_error"}) + raise diff --git a/tests/sentry/api/endpoints/test_organization_pull_request_file_issues.py b/tests/sentry/api/endpoints/test_organization_pull_request_file_issues.py new file mode 100644 index 00000000000000..58874aae1c4ae6 --- /dev/null +++ b/tests/sentry/api/endpoints/test_organization_pull_request_file_issues.py @@ -0,0 +1,100 @@ +from unittest.mock import patch + +from sentry.models.group import Group +from sentry.testutils.cases import APITestCase +from tests.sentry.tasks.integrations.github.test_open_pr_comment import CreateEventTestCase + + +@patch( + "sentry.api.endpoints.organization_pull_request_file_issues.get_projects_and_filenames_from_source_file" +) +class OrganizationPullRequestFileIssuesTest(APITestCase, CreateEventTestCase): + endpoint = "sentry-api-0-organization-pr-file-issues" + method = "post" + + def setUp(self): + super().setUp() + self.user_id = "user_1" + self.app_id = "app_1" + + self.group_id_1 = [ + self._create_event( + culprit="issue1", + user_id=str(i), + filenames=["bar.py", "foo.py"], + function_names=["planet", "blue"], + ) + for i in range(5) + ][0].group.id + self.group_id_2 = [ + self._create_event( + culprit="issue2", + filenames=["foo.py", "bar.py"], + function_names=["blue", "planet"], + user_id=str(i), + ) + for i in range(6) + ][0].group.id + + self.gh_repo = self.create_repo( + name="getsentry/sentry", + provider="integrations:github", + integration_id=self.integration.id, + project=self.project, + url="https://github.com/getsentry/sentry", + ) + self.groups = [ + { + "group_id": g.id, + "event_count": 1000 * (i + 1), + "function_name": "function_" + str(i), + } + for i, g in enumerate(Group.objects.all()) + ] + self.groups.reverse() + + self.login_as(self.user) + + def test_simple(self, mock_reverse_codemappings): + mock_reverse_codemappings.return_value = ([self.project], ["foo.py"]) + + patch = """@@ -36,6 +36,7 @@\n def blue(self):""" + response = self.get_success_response( + self.organization.slug, + **{"filename": "foo.py", "repo": self.gh_repo.name, "patch": patch}, + ) + group_urls = [g["url"] for g in response.data] + assert group_urls == [ + f"http://testserver/organizations/{self.organization.slug}/issues/{self.group_id_2}/", + f"http://testserver/organizations/{self.organization.slug}/issues/{self.group_id_1}/", + ] + + def test_no_codemappings(self, mock_reverse_codemappings): + mock_reverse_codemappings.return_value = ([], []) + + patch = """@@ -36,6 +36,7 @@\n def blue(self):""" + response = self.get_success_response( + self.organization.slug, + **{"filename": "foo.py", "repo": self.gh_repo.name, "patch": patch}, + ) + assert response.data == [] + + def test_no_functions(self, mock_reverse_codemappings): + mock_reverse_codemappings.return_value = ([self.project], ["foo.py"]) + + patch = """@@ -36,6 +36,7 @@\n import pytest""" + response = self.get_success_response( + self.organization.slug, + **{"filename": "foo.py", "repo": self.gh_repo.name, "patch": patch}, + ) + assert response.data == [] + + def test_no_issues(self, mock_reverse_codemappings): + mock_reverse_codemappings.return_value = ([self.project], ["foo.py"]) + + patch = """@@ -36,6 +36,7 @@\n def purple(self):""" + response = self.get_success_response( + self.organization.slug, + **{"filename": "bar.py", "repo": self.gh_repo.name, "patch": patch}, + ) + assert response.data == [] From 152cc8e27f2d1edb7ba9eee9f409a26db9fd926b Mon Sep 17 00:00:00 2001 From: Cathy Teng Date: Tue, 16 Jul 2024 10:53:34 -0700 Subject: [PATCH 2/3] option to set limit for number of issues returned --- .../organization_pull_request_file_issues.py | 17 +++++++-- .../integrations/github/open_pr_comment.py | 11 ++++-- ...t_organization_pull_request_file_issues.py | 38 +++++++++++++++++++ .../github/tasks/test_open_pr_comment.py | 32 ++++++++-------- 4 files changed, 75 insertions(+), 23 deletions(-) diff --git a/src/sentry/api/endpoints/organization_pull_request_file_issues.py b/src/sentry/api/endpoints/organization_pull_request_file_issues.py index 27225c64f646b8..f2b385e9114dc1 100644 --- a/src/sentry/api/endpoints/organization_pull_request_file_issues.py +++ b/src/sentry/api/endpoints/organization_pull_request_file_issues.py @@ -14,7 +14,7 @@ from sentry.tasks.integrations.github.language_parsers import PATCH_PARSERS from sentry.tasks.integrations.github.open_pr_comment import ( get_projects_and_filenames_from_source_file, - get_top_5_issues_by_count_for_file, + get_top_issues_by_count_for_file, ) @@ -22,6 +22,7 @@ class PullRequestFileSerializer(serializers.Serializer): filename = serializers.CharField(required=True) repo = serializers.CharField(required=True) patch = serializers.CharField(required=True) + limit = serializers.IntegerField(required=False, default=5) def validate_filename(self, value): if not value: @@ -34,6 +35,12 @@ def validate_filename(self, value): return value + def validate_limit(self, value): + if value and value < 1 or value > 100: + raise serializers.ValidationError("Issue count must be between 1 and 100") + + return value + @region_silo_endpoint class OrganizationPullRequestFilesIssuesEndpoint(OrganizationEndpoint): @@ -50,6 +57,7 @@ def post(self, request: Request, organization: Organization) -> Response: filename = serializer.validated_data["filename"] repo_name = serializer.validated_data["repo"] patch = serializer.validated_data["patch"] + limit = serializer.validated_data["limit"] projects, sentry_filenames = get_projects_and_filenames_from_source_file( org_id=organization.id, repo_name=repo_name, pr_filename=filename @@ -66,8 +74,11 @@ def post(self, request: Request, organization: Organization) -> Response: if not len(function_names): return Response([]) - top_issues = get_top_5_issues_by_count_for_file( - list(projects), list(sentry_filenames), list(function_names) + top_issues = get_top_issues_by_count_for_file( + projects=list(projects), + sentry_filenames=list(sentry_filenames), + function_names=list(function_names), + limit=limit, ) group_id_to_info = {} diff --git a/src/sentry/tasks/integrations/github/open_pr_comment.py b/src/sentry/tasks/integrations/github/open_pr_comment.py index 043a436215c346..560cad0b84a85a 100644 --- a/src/sentry/tasks/integrations/github/open_pr_comment.py +++ b/src/sentry/tasks/integrations/github/open_pr_comment.py @@ -280,8 +280,11 @@ def get_projects_and_filenames_from_source_file( return project_list, sentry_filenames -def get_top_5_issues_by_count_for_file( - projects: list[Project], sentry_filenames: list[str], function_names: list[str] +def get_top_issues_by_count_for_file( + projects: list[Project], + sentry_filenames: list[str], + function_names: list[str], + limit: int | None = 5, ) -> list[dict[str, Any]]: """ Given a list of projects, Github filenames reverse-codemapped into filenames in Sentry, @@ -401,7 +404,7 @@ def get_top_5_issues_by_count_for_file( ] ) .set_orderby([OrderBy(Column("event_count"), Direction.DESC)]) - .set_limit(5) + .set_limit(limit) ) request = SnubaRequest( @@ -554,7 +557,7 @@ def open_pr_comment_workflow(pr_id: int) -> None: if not len(function_names): continue - top_issues = get_top_5_issues_by_count_for_file( + top_issues = get_top_issues_by_count_for_file( list(projects), list(sentry_filenames), list(function_names) ) if not len(top_issues): diff --git a/tests/sentry/api/endpoints/test_organization_pull_request_file_issues.py b/tests/sentry/api/endpoints/test_organization_pull_request_file_issues.py index 58874aae1c4ae6..a1866603e9b531 100644 --- a/tests/sentry/api/endpoints/test_organization_pull_request_file_issues.py +++ b/tests/sentry/api/endpoints/test_organization_pull_request_file_issues.py @@ -1,5 +1,7 @@ from unittest.mock import patch +from rest_framework import status + from sentry.models.group import Group from sentry.testutils.cases import APITestCase from tests.sentry.tasks.integrations.github.test_open_pr_comment import CreateEventTestCase @@ -69,6 +71,42 @@ def test_simple(self, mock_reverse_codemappings): f"http://testserver/organizations/{self.organization.slug}/issues/{self.group_id_1}/", ] + def test_limit_validation(self, mock_reverse_codemappings): + mock_reverse_codemappings.return_value = ([self.project], ["foo.py"]) + + patch = """@@ -36,6 +36,7 @@\n def blue(self):""" + self.get_error_response( + self.organization.slug, + **{ + "filename": "foo.py", + "repo": self.gh_repo.name, + "patch": patch, + "limit": -1, + }, + status_code=status.HTTP_400_BAD_REQUEST, + ) + + self.get_error_response( + self.organization.slug, + **{ + "filename": "foo.py", + "repo": self.gh_repo.name, + "patch": patch, + "limit": 101, + }, + status_code=status.HTTP_400_BAD_REQUEST, + ) + + self.get_success_response( + self.organization.slug, + **{ + "filename": "foo.py", + "repo": self.gh_repo.name, + "patch": patch, + "limit": 100, + }, + ) + def test_no_codemappings(self, mock_reverse_codemappings): mock_reverse_codemappings.return_value = ([], []) diff --git a/tests/sentry/integrations/github/tasks/test_open_pr_comment.py b/tests/sentry/integrations/github/tasks/test_open_pr_comment.py index 710722fee99fba..2a9295ddf5b238 100644 --- a/tests/sentry/integrations/github/tasks/test_open_pr_comment.py +++ b/tests/sentry/integrations/github/tasks/test_open_pr_comment.py @@ -14,7 +14,7 @@ get_issue_table_contents, get_pr_files, get_projects_and_filenames_from_source_file, - get_top_5_issues_by_count_for_file, + get_top_issues_by_count_for_file, open_pr_comment_workflow, safe_for_comment, ) @@ -372,7 +372,7 @@ def test_simple(self): group_id = [ self._create_event(function_names=["blue", "planet"], user_id=str(i)) for i in range(7) ][0].group.id - top_5_issues = get_top_5_issues_by_count_for_file( + top_5_issues = get_top_issues_by_count_for_file( [self.project], ["baz.py"], ["world", "planet"] ) @@ -399,7 +399,7 @@ def test_javascript_simple(self): ) for i in range(6) ][0].group.id - top_5_issues = get_top_5_issues_by_count_for_file( + top_5_issues = get_top_issues_by_count_for_file( [self.project], ["baz.js"], ["world", "planet"] ) top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] @@ -425,7 +425,7 @@ def test_php_simple(self): ) for i in range(6) ][0].group.id - top_5_issues = get_top_5_issues_by_count_for_file( + top_5_issues = get_top_issues_by_count_for_file( [self.project], ["baz.php"], ["world", "planet"] ) top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] @@ -451,7 +451,7 @@ def test_ruby_simple(self): ) for i in range(6) ][0].group.id - top_5_issues = get_top_5_issues_by_count_for_file( + top_5_issues = get_top_issues_by_count_for_file( [self.project], ["baz.rb"], ["world", "planet"] ) top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] @@ -466,13 +466,13 @@ def test_filters_resolved_issue(self): group.substatus = None group.save() - top_5_issues = get_top_5_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) + top_5_issues = get_top_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) assert len(top_5_issues) == 0 def test_filters_handled_issue(self): group_id = self._create_event(filenames=["bar.py", "baz.py"], handled=True).group.id - top_5_issues = get_top_5_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) + top_5_issues = get_top_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] assert group_id != self.group_id assert top_5_issue_ids == [self.group_id] @@ -481,7 +481,7 @@ def test_project_group_id_mismatch(self): # we fetch all group_ids that belong to the projects passed into the function self._create_event(project_id=self.another_org_project.id) - top_5_issues = get_top_5_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) + top_5_issues = get_top_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] assert top_5_issue_ids == [self.group_id] @@ -490,7 +490,7 @@ def test_filename_mismatch(self): filenames=["foo.py", "bar.py"], ).group.id - top_5_issues = get_top_5_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) + top_5_issues = get_top_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] assert group_id != self.group_id assert top_5_issue_ids == [self.group_id] @@ -500,7 +500,7 @@ def test_function_name_mismatch(self): function_names=["world", "hello"], ).group.id - top_5_issues = get_top_5_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) + top_5_issues = get_top_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] assert group_id != self.group_id assert top_5_issue_ids == [self.group_id] @@ -510,7 +510,7 @@ def test_not_first_frame(self): function_names=["world", "hello"], filenames=["baz.py", "bar.py"], culprit="hi" ).group.id - top_5_issues = get_top_5_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) + top_5_issues = get_top_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] function_names = [issue["function_name"] for issue in top_5_issues] assert group_id != self.group_id @@ -522,7 +522,7 @@ def test_not_within_frame_limit(self): filenames = ["baz.py"] + ["foo.py" for _ in range(STACKFRAME_COUNT)] group_id = self._create_event(function_names=function_names, filenames=filenames).group.id - top_5_issues = get_top_5_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) + top_5_issues = get_top_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] assert group_id != self.group_id assert top_5_issue_ids == [self.group_id] @@ -532,7 +532,7 @@ def test_event_too_old(self): timestamp=iso_format(before_now(days=15)), filenames=["bar.py", "baz.py"] ).group.id - top_5_issues = get_top_5_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) + top_5_issues = get_top_issues_by_count_for_file([self.project], ["baz.py"], ["world"]) top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] assert group_id != self.group_id assert top_5_issue_ids == [self.group_id] @@ -559,7 +559,7 @@ def test_squashes_same_title_culprit_issues(self): for i in range(5) ][0].group_id - top_5_issues = get_top_5_issues_by_count_for_file( + top_5_issues = get_top_issues_by_count_for_file( [self.project], ["baz.py"], ["world", "planet"] ) top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] @@ -613,7 +613,7 @@ def test_fetches_top_five_issues(self): # unrelated issue with same stack trace in different project self._create_event(project_id=self.another_org_project.id) - top_5_issues = get_top_5_issues_by_count_for_file( + top_5_issues = get_top_issues_by_count_for_file( [self.project], ["baz.py"], ["world", "planet"] ) top_5_issue_ids = [issue["group_id"] for issue in top_5_issues] @@ -665,7 +665,7 @@ def test_get_issue_table_contents(self): for i in range(2) ][0].group.id - top_5_issues = get_top_5_issues_by_count_for_file( + top_5_issues = get_top_issues_by_count_for_file( [self.project], ["baz.py"], ["world", "planet"] ) affected_users = [6, 5, 4, 3, 2] From ca48219157e2dd3b56677d023fb4ee12bbc3b3fa Mon Sep 17 00:00:00 2001 From: Cathy Teng Date: Mon, 5 Aug 2024 14:55:57 -0700 Subject: [PATCH 3/3] fix tests --- .../api/endpoints/organization_pull_request_file_issues.py | 2 +- .../sentry/integrations/github/tasks/test_open_pr_comment.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sentry/api/endpoints/organization_pull_request_file_issues.py b/src/sentry/api/endpoints/organization_pull_request_file_issues.py index f2b385e9114dc1..329ba765882686 100644 --- a/src/sentry/api/endpoints/organization_pull_request_file_issues.py +++ b/src/sentry/api/endpoints/organization_pull_request_file_issues.py @@ -46,7 +46,7 @@ def validate_limit(self, value): class OrganizationPullRequestFilesIssuesEndpoint(OrganizationEndpoint): owner = ApiOwner.ECOSYSTEM publish_status = { - "GET": ApiPublishStatus.EXPERIMENTAL, + "POST": ApiPublishStatus.EXPERIMENTAL, } def post(self, request: Request, organization: Organization) -> Response: diff --git a/tests/sentry/integrations/github/tasks/test_open_pr_comment.py b/tests/sentry/integrations/github/tasks/test_open_pr_comment.py index 2a9295ddf5b238..69cefeac3ab632 100644 --- a/tests/sentry/integrations/github/tasks/test_open_pr_comment.py +++ b/tests/sentry/integrations/github/tasks/test_open_pr_comment.py @@ -309,7 +309,7 @@ def test_get_projects_and_filenames_from_source_file(self): ] project_list, sentry_filenames = get_projects_and_filenames_from_source_file( - self.organization.id, self.gh_repo.id, filename + org_id=self.organization.id, repo_id=self.gh_repo.id, pr_filename=filename ) assert project_list == set(projects) assert sentry_filenames == set(correct_filenames) @@ -356,7 +356,7 @@ def test_get_projects_and_filenames_from_source_file_filters_repo(self): ] project_list, sentry_filenames = get_projects_and_filenames_from_source_file( - self.organization.id, self.gh_repo.id, filename + org_id=self.organization.id, repo_id=self.gh_repo.id, pr_filename=filename ) assert project_list == set(projects) assert sentry_filenames == set(correct_filenames)