Skip to content

Commit

Permalink
fix(commit-context): do not create suspect commits older than 1 year (#…
Browse files Browse the repository at this point in the history
…55013)

## Objective:

Update on the reverted PR:
#54866

Fixes SENTRY-14ZG

The problem was the wrong key. copy-paste error.

But I noticed from the captured exceptions that some of the results from
the github API does not return a date for the commit. So I added a
filter for to exclude those commits from the `max` function that
determines the most recent commit.

---------

Co-authored-by: Snigdha Sharma <snigdha.sharma@sentry.io>
Co-authored-by: getsantry[bot] <66042841+getsantry[bot]@users.noreply.github.com>
  • Loading branch information
3 people committed Aug 28, 2023
1 parent bb43c00 commit 6544999
Show file tree
Hide file tree
Showing 7 changed files with 177 additions and 26 deletions.
14 changes: 9 additions & 5 deletions src/sentry/integrations/github/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@

import logging
import re
from datetime import datetime
from datetime import timezone
from typing import Any, Collection, Dict, Mapping, Sequence

from django.http import HttpResponse
from django.utils.text import slugify
from django.utils.translation import gettext_lazy as _
from isodate import parse_datetime
from rest_framework.request import Request

from sentry import features, options
Expand Down Expand Up @@ -248,10 +249,9 @@ def get_commit_context(
blame
for blame in blame_range
if blame.get("startingLine", 0) <= lineno <= blame.get("endingLine", 0)
and blame.get("commit", {}).get("committedDate")
),
key=lambda blame: datetime.strptime(
blame.get("commit", {}).get("committedDate"), "%Y-%m-%dT%H:%M:%SZ"
),
key=lambda blame: parse_datetime(blame.get("commit", {}).get("committedDate")),
default={},
)
if not commit:
Expand All @@ -263,9 +263,13 @@ def get_commit_context(
if not commitInfo:
return None
else:
committed_date = parse_datetime(commitInfo.get("committedDate")).astimezone(
timezone.utc
)

return {
"commitId": commitInfo.get("oid"),
"committedDate": commitInfo.get("committedDate"),
"committedDate": committed_date,
"commitMessage": commitInfo.get("message"),
"commitAuthorName": commitInfo.get("author", {}).get("name"),
"commitAuthorEmail": commitInfo.get("author", {}).get("email"),
Expand Down
18 changes: 8 additions & 10 deletions src/sentry/integrations/gitlab/integration.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from __future__ import annotations

from datetime import datetime, timezone
from datetime import timezone
from typing import Any, Mapping, Sequence
from urllib.parse import urlparse

from django import forms
from django.http import HttpResponse
from django.utils.translation import gettext_lazy as _
from isodate import parse_datetime
from rest_framework.request import Request

from sentry.identity.gitlab import get_oauth_data, get_user_info
Expand Down Expand Up @@ -166,13 +167,10 @@ def get_commit_context(
except ApiError as e:
raise e

date_format_expected = "%Y-%m-%dT%H:%M:%S.%f%z"
try:
commit = max(
blame_range,
key=lambda blame: datetime.strptime(
blame.get("commit", {}).get("committed_date"), date_format_expected
),
(blame for blame in blame_range if blame.get("commit", {}).get("committed_date")),
key=lambda blame: parse_datetime(blame.get("commit", {}).get("committed_date")),
)
except (ValueError, IndexError):
return None
Expand All @@ -181,11 +179,11 @@ def get_commit_context(
if not commitInfo:
return None
else:
committed_date = "{}Z".format(
datetime.strptime(commitInfo.get("committed_date"), date_format_expected)
.astimezone(timezone.utc)
.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3]
# TODO(nisanthan): Use dateutil.parser.isoparse once on python 3.11
committed_date = parse_datetime(commitInfo.get("committed_date")).astimezone(
timezone.utc
)

return {
"commitId": commitInfo.get("id"),
"committedDate": committed_date,
Expand Down
8 changes: 7 additions & 1 deletion src/sentry/integrations/utils/commit_context.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import logging
from datetime import datetime, timedelta, timezone
from typing import Any, List, Mapping, Sequence, Tuple

import sentry_sdk
Expand Down Expand Up @@ -108,7 +109,12 @@ def find_commit_context_for_event(
},
)

if commit_context:
# Only return suspect commits that are less than a year old
if commit_context and is_date_less_than_year(commit_context["committedDate"]):
result.append((commit_context, code_mapping))

return result, installation


def is_date_less_than_year(date: datetime):
return date > datetime.now(tz=timezone.utc) - timedelta(days=365)
105 changes: 105 additions & 0 deletions tests/sentry/integrations/github/test_integration.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

from datetime import datetime, timedelta, timezone
from typing import Any
from unittest import mock
from unittest.mock import patch
Expand All @@ -8,6 +9,7 @@
import pytest
import responses
from django.urls import reverse
from isodate import parse_datetime

import sentry
from sentry.api.utils import generate_organization_url
Expand Down Expand Up @@ -880,3 +882,106 @@ def test_get_trees_for_org_falls_back_to_cache_once_MAX_CONNECTION_ERRORS_is_hit
("baz", "master", []),
]
)

@responses.activate
def test_get_commit_context(self):
self.assert_setup_flow()
integration = Integration.objects.get(provider=self.provider.key)
with assume_test_silo_mode(SiloMode.REGION):
repo = Repository.objects.create(
organization_id=self.organization.id,
name="Test-Organization/foo",
url="https://github.com/Test-Organization/foo",
provider="github",
external_id=123,
config={"name": "Test-Organization/foo"},
integration_id=integration.id,
)

installation = integration.get_installation(self.organization.id)

filepath = "sentry/tasks.py"
event_frame = {
"function": "handle_set_commits",
"abs_path": "/usr/src/sentry/src/sentry/tasks.py",
"module": "sentry.tasks",
"in_app": True,
"lineno": 30,
"filename": "sentry/tasks.py",
}
ref = "master"
query = f"""query {{
repository(name: "foo", owner: "Test-Organization") {{
ref(qualifiedName: "{ref}") {{
target {{
... on Commit {{
blame(path: "{filepath}") {{
ranges {{
commit {{
oid
author {{
name
email
}}
message
committedDate
}}
startingLine
endingLine
age
}}
}}
}}
}}
}}
}}
}}"""
commit_date = (datetime.now(tz=timezone.utc) - timedelta(days=4)).strftime(
"%Y-%m-%dT%H:%M:%SZ"
)
responses.add(
method=responses.POST,
url="https://api.github.com/graphql",
json={
"query": query,
"data": {
"repository": {
"ref": {
"target": {
"blame": {
"ranges": [
{
"commit": {
"oid": "d42409d56517157c48bf3bd97d3f75974dde19fb",
"author": {
"date": commit_date,
"email": "nisanthan.nanthakumar@sentry.io",
"name": "Nisanthan Nanthakumar",
},
"message": "Add installation instructions",
"committedDate": commit_date,
},
"startingLine": 30,
"endingLine": 30,
"age": 3,
}
]
}
}
}
}
},
},
content_type="application/json",
)
commit_context = installation.get_commit_context(repo, filepath, ref, event_frame)

commit_context_expected = {
"commitId": "d42409d56517157c48bf3bd97d3f75974dde19fb",
"committedDate": parse_datetime(commit_date),
"commitMessage": "Add installation instructions",
"commitAuthorName": "Nisanthan Nanthakumar",
"commitAuthorEmail": "nisanthan.nanthakumar@sentry.io",
}

assert commit_context == commit_context_expected
3 changes: 2 additions & 1 deletion tests/sentry/integrations/gitlab/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import responses
from django.core.cache import cache
from django.test import override_settings
from isodate import parse_datetime

from fixtures.gitlab import GET_COMMIT_RESPONSE, GitLabTestCase
from sentry.integrations.gitlab import GitlabIntegrationProvider
Expand Down Expand Up @@ -425,7 +426,7 @@ def test_get_commit_context(self):

commit_context_expected = {
"commitId": "d42409d56517157c48bf3bd97d3f75974dde19fb",
"committedDate": "2015-12-18T08:12:22.000Z",
"committedDate": parse_datetime("2015-12-18T08:12:22.000Z"),
"commitMessage": "Add installation instructions",
"commitAuthorName": "Nisanthan Nanthakumar",
"commitAuthorEmail": "nisanthan.nanthakumar@sentry.io",
Expand Down
53 changes: 45 additions & 8 deletions tests/sentry/tasks/test_commit_context.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datetime import timedelta
from datetime import datetime, timedelta
from datetime import timezone as datetime_timezone
from unittest.mock import Mock, patch

import pytest
Expand Down Expand Up @@ -83,7 +84,7 @@ class TestCommitContext(TestCommitContextMixin):
"sentry.integrations.github.GitHubIntegration.get_commit_context",
return_value={
"commitId": "asdfwreqr",
"committedDate": "2023-02-14T11:11Z",
"committedDate": (datetime.now(tz=datetime_timezone.utc) - timedelta(days=7)),
"commitMessage": "placeholder commit message",
"commitAuthorName": "",
"commitAuthorEmail": "admin@localhost",
Expand Down Expand Up @@ -141,11 +142,47 @@ def test_failed_to_fetch_commit_context_record(self, mock_get_commit_context, mo
error_message="integration_failed",
)

@patch("sentry.tasks.commit_context.logger")
@patch(
"sentry.integrations.github.GitHubIntegration.get_commit_context",
return_value={
"commitId": "asdfasdf",
"committedDate": "2023-02-14T11:11Z",
"committedDate": (datetime.now(tz=datetime_timezone.utc) - timedelta(days=370)),
"commitMessage": "placeholder commit message",
"commitAuthorName": "",
"commitAuthorEmail": "admin@localhost",
},
)
def test_found_commit_is_too_old(self, mock_get_commit_context, mock_logger):
with self.tasks():
assert not GroupOwner.objects.filter(group=self.event.group).exists()
event_frames = get_frame_paths(self.event)
process_commit_context(
event_id=self.event.event_id,
event_platform=self.event.platform,
event_frames=event_frames,
group_id=self.event.group_id,
project_id=self.event.project_id,
)

assert mock_logger.info.call_count == 1
mock_logger.info.assert_called_with(
"process_commit_context.find_commit_context",
extra={
"event": self.event.event_id,
"group": self.event.group_id,
"organization": self.event.group.project.organization_id,
"reason": "could_not_fetch_commit_context",
"code_mappings_count": 1,
"fallback": True,
},
)

@patch(
"sentry.integrations.github.GitHubIntegration.get_commit_context",
return_value={
"commitId": "asdfasdf",
"committedDate": (datetime.now(tz=datetime_timezone.utc) - timedelta(days=7)),
"commitMessage": "placeholder commit message",
"commitAuthorName": "",
"commitAuthorEmail": "admin@localhost",
Expand All @@ -170,7 +207,7 @@ def test_no_matching_commit_in_db(self, mock_get_commit_context):
"sentry.integrations.github.GitHubIntegration.get_commit_context",
return_value={
"commitId": "asdfwreqr",
"committedDate": "2023-02-14T11:11Z",
"committedDate": (datetime.now(tz=datetime_timezone.utc) - timedelta(days=7)),
"commitMessage": "placeholder commit message",
"commitAuthorName": "",
"commitAuthorEmail": "admin@localhost",
Expand Down Expand Up @@ -255,7 +292,7 @@ def test_no_inapp_frame_in_stacktrace(self, mock_process_suspect_commits):
"sentry.integrations.github.GitHubIntegration.get_commit_context",
return_value={
"commitId": "somekey",
"committedDate": "2023-02-14T11:11Z",
"committedDate": (datetime.now(tz=datetime_timezone.utc) - timedelta(days=7)),
"commitMessage": "placeholder commit message",
"commitAuthorName": "",
"commitAuthorEmail": "randomuser@sentry.io",
Expand Down Expand Up @@ -296,7 +333,7 @@ def test_commit_author_not_in_sentry(self, mock_get_commit_context):
"sentry.integrations.github.GitHubIntegration.get_commit_context",
return_value={
"commitId": "somekey",
"committedDate": "2023-02-14T11:11Z",
"committedDate": (datetime.now(tz=datetime_timezone.utc) - timedelta(days=7)),
"commitMessage": "placeholder commit message",
"commitAuthorName": "",
"commitAuthorEmail": "randomuser@sentry.io",
Expand Down Expand Up @@ -337,7 +374,7 @@ def test_commit_author_no_user(self, mock_get_commit_context, mock_get_users_for
"sentry.integrations.github.GitHubIntegration.get_commit_context",
return_value={
"commitId": "somekey",
"committedDate": "2023-02-14T11:11Z",
"committedDate": (datetime.now(tz=datetime_timezone.utc) - timedelta(days=7)),
"commitMessage": "placeholder commit message",
"commitAuthorName": "",
"commitAuthorEmail": "randomuser@sentry.io",
Expand Down Expand Up @@ -423,7 +460,7 @@ def after_return(self, status, retval, task_id, args, kwargs, einfo):
Mock(
return_value={
"commitId": "asdfwreqr",
"committedDate": "2023-02-14T11:11Z",
"committedDate": (datetime.now(tz=datetime_timezone.utc) - timedelta(days=7)),
"commitMessage": "placeholder commit message",
"commitAuthorName": "",
"commitAuthorEmail": "admin@localhost",
Expand Down
2 changes: 1 addition & 1 deletion tests/sentry/tasks/test_post_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -1303,7 +1303,7 @@ def test_issue_owners_should_ratelimit(self, logger):
class ProcessCommitsTestMixin(BasePostProgressGroupMixin):
github_blame_return_value = {
"commitId": "asdfwreqr",
"committedDate": "",
"committedDate": (datetime.now(timezone.utc) - timedelta(days=2)),
"commitMessage": "placeholder commit message",
"commitAuthorName": "",
"commitAuthorEmail": "admin@localhost",
Expand Down

0 comments on commit 6544999

Please sign in to comment.