Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(commit-context): do not create suspect commits older than 1 year #55013

Merged
merged 13 commits into from
Aug 28, 2023
Merged
14 changes: 9 additions & 5 deletions src/sentry/integrations/github/integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@

import logging
import re
from datetime import datetime
from datetime import timezone
from typing import Any, Collection, Dict, Mapping, Sequence

from django.http import HttpResponse
from django.utils.text import slugify
from django.utils.translation import gettext_lazy as _
from isodate import parse_datetime
from rest_framework.request import Request

from sentry import features, options
Expand Down Expand Up @@ -248,10 +249,9 @@ def get_commit_context(
blame
for blame in blame_range
if blame.get("startingLine", 0) <= lineno <= blame.get("endingLine", 0)
and blame.get("commit", {}).get("committedDate")
),
key=lambda blame: datetime.strptime(
blame.get("commit", {}).get("committedDate"), "%Y-%m-%dT%H:%M:%SZ"
),
key=lambda blame: parse_datetime(blame.get("commit", {}).get("committedDate")),
default={},
)
if not commit:
Expand All @@ -263,9 +263,13 @@ def get_commit_context(
if not commitInfo:
return None
else:
committed_date = parse_datetime(commitInfo.get("committedDate")).astimezone(
timezone.utc
)

return {
"commitId": commitInfo.get("oid"),
"committedDate": commitInfo.get("committedDate"),
"committedDate": committed_date,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to codecov, the block above is not being tested. Please look into it before we merge this.
image

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added a test to increase coverage.

"commitMessage": commitInfo.get("message"),
"commitAuthorName": commitInfo.get("author", {}).get("name"),
"commitAuthorEmail": commitInfo.get("author", {}).get("email"),
Expand Down
18 changes: 8 additions & 10 deletions src/sentry/integrations/gitlab/integration.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from __future__ import annotations

from datetime import datetime, timezone
from datetime import timezone
from typing import Any, Mapping, Sequence
from urllib.parse import urlparse

from django import forms
from django.http import HttpResponse
from django.utils.translation import gettext_lazy as _
from isodate import parse_datetime
from rest_framework.request import Request

from sentry.identity.gitlab import get_oauth_data, get_user_info
Expand Down Expand Up @@ -166,13 +167,10 @@ def get_commit_context(
except ApiError as e:
raise e

date_format_expected = "%Y-%m-%dT%H:%M:%S.%f%z"
try:
commit = max(
blame_range,
key=lambda blame: datetime.strptime(
blame.get("commit", {}).get("committed_date"), date_format_expected
),
(blame for blame in blame_range if blame.get("commit", {}).get("committed_date")),
key=lambda blame: parse_datetime(blame.get("commit", {}).get("committed_date")),
)
except (ValueError, IndexError):
return None
Expand All @@ -181,11 +179,11 @@ def get_commit_context(
if not commitInfo:
return None
else:
committed_date = "{}Z".format(
datetime.strptime(commitInfo.get("committed_date"), date_format_expected)
.astimezone(timezone.utc)
.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3]
# TODO(nisanthan): Use dateutil.parser.isoparse once on python 3.11
committed_date = parse_datetime(commitInfo.get("committed_date")).astimezone(
timezone.utc
)

return {
"commitId": commitInfo.get("id"),
"committedDate": committed_date,
Expand Down
8 changes: 7 additions & 1 deletion src/sentry/integrations/utils/commit_context.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import logging
from datetime import datetime, timedelta, timezone
from typing import Any, List, Mapping, Sequence, Tuple

import sentry_sdk
Expand Down Expand Up @@ -108,7 +109,12 @@ def find_commit_context_for_event(
},
)

if commit_context:
# Only return suspect commits that are less than a year old
if commit_context and is_date_less_than_year(commit_context["committedDate"]):
result.append((commit_context, code_mapping))

return result, installation


def is_date_less_than_year(date: datetime):
return date > datetime.now(tz=timezone.utc) - timedelta(days=365)
105 changes: 105 additions & 0 deletions tests/sentry/integrations/github/test_integration.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from datetime import datetime, timedelta, timezone
from unittest.mock import MagicMock, patch
from urllib.parse import urlencode, urlparse

import responses
from django.urls import reverse
from isodate import parse_datetime

import sentry
from sentry.api.utils import generate_organization_url
Expand Down Expand Up @@ -867,3 +869,106 @@ def test_get_trees_for_org_falls_back_to_cache_once_MAX_CONNECTION_ERRORS_is_hit
("baz", "master", []),
]
)

@responses.activate
def test_get_commit_context(self):
self.assert_setup_flow()
integration = Integration.objects.get(provider=self.provider.key)
with assume_test_silo_mode(SiloMode.REGION):
repo = Repository.objects.create(
organization_id=self.organization.id,
name="Test-Organization/foo",
url="https://github.com/Test-Organization/foo",
provider="github",
external_id=123,
config={"name": "Test-Organization/foo"},
integration_id=integration.id,
)

installation = integration.get_installation(self.organization.id)

filepath = "sentry/tasks.py"
event_frame = {
"function": "handle_set_commits",
"abs_path": "/usr/src/sentry/src/sentry/tasks.py",
"module": "sentry.tasks",
"in_app": True,
"lineno": 30,
"filename": "sentry/tasks.py",
}
ref = "master"
query = f"""query {{
repository(name: "foo", owner: "Test-Organization") {{
ref(qualifiedName: "{ref}") {{
target {{
... on Commit {{
blame(path: "{filepath}") {{
ranges {{
commit {{
oid
author {{
name
email
}}
message
committedDate
}}
startingLine
endingLine
age
}}
}}
}}
}}
}}
}}
}}"""
commit_date = (datetime.now(tz=timezone.utc) - timedelta(days=4)).strftime(
"%Y-%m-%dT%H:%M:%SZ"
)
responses.add(
method=responses.POST,
url="https://api.github.com/graphql",
json={
"query": query,
"data": {
"repository": {
"ref": {
"target": {
"blame": {
"ranges": [
{
"commit": {
"oid": "d42409d56517157c48bf3bd97d3f75974dde19fb",
"author": {
"date": commit_date,
"email": "nisanthan.nanthakumar@sentry.io",
"name": "Nisanthan Nanthakumar",
},
"message": "Add installation instructions",
"committedDate": commit_date,
},
"startingLine": 30,
"endingLine": 30,
"age": 3,
}
]
}
}
}
}
},
},
content_type="application/json",
)
commit_context = installation.get_commit_context(repo, filepath, ref, event_frame)

commit_context_expected = {
"commitId": "d42409d56517157c48bf3bd97d3f75974dde19fb",
"committedDate": parse_datetime(commit_date),
"commitMessage": "Add installation instructions",
"commitAuthorName": "Nisanthan Nanthakumar",
"commitAuthorEmail": "nisanthan.nanthakumar@sentry.io",
}

assert commit_context == commit_context_expected
3 changes: 2 additions & 1 deletion tests/sentry/integrations/gitlab/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import responses
from django.core.cache import cache
from django.test import override_settings
from isodate import parse_datetime

from fixtures.gitlab import GET_COMMIT_RESPONSE, GitLabTestCase
from sentry.integrations.gitlab import GitlabIntegrationProvider
Expand Down Expand Up @@ -427,7 +428,7 @@ def test_get_commit_context(self):

commit_context_expected = {
"commitId": "d42409d56517157c48bf3bd97d3f75974dde19fb",
"committedDate": "2015-12-18T08:12:22.000Z",
"committedDate": parse_datetime("2015-12-18T08:12:22.000Z"),
"commitMessage": "Add installation instructions",
"commitAuthorName": "Nisanthan Nanthakumar",
"commitAuthorEmail": "nisanthan.nanthakumar@sentry.io",
Expand Down
53 changes: 45 additions & 8 deletions tests/sentry/tasks/test_commit_context.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from datetime import timedelta
from datetime import datetime, timedelta
from datetime import timezone as datetime_timezone
from unittest.mock import Mock, patch

import pytest
Expand Down Expand Up @@ -83,7 +84,7 @@ class TestCommitContext(TestCommitContextMixin):
"sentry.integrations.github.GitHubIntegration.get_commit_context",
return_value={
"commitId": "asdfwreqr",
"committedDate": "2023-02-14T11:11Z",
"committedDate": (datetime.now(tz=datetime_timezone.utc) - timedelta(days=7)),
"commitMessage": "placeholder commit message",
"commitAuthorName": "",
"commitAuthorEmail": "admin@localhost",
Expand Down Expand Up @@ -141,11 +142,47 @@ def test_failed_to_fetch_commit_context_record(self, mock_get_commit_context, mo
error_message="integration_failed",
)

@patch("sentry.tasks.commit_context.logger")
@patch(
"sentry.integrations.github.GitHubIntegration.get_commit_context",
return_value={
"commitId": "asdfasdf",
"committedDate": "2023-02-14T11:11Z",
"committedDate": (datetime.now(tz=datetime_timezone.utc) - timedelta(days=370)),
"commitMessage": "placeholder commit message",
"commitAuthorName": "",
"commitAuthorEmail": "admin@localhost",
},
)
def test_found_commit_is_too_old(self, mock_get_commit_context, mock_logger):
with self.tasks():
assert not GroupOwner.objects.filter(group=self.event.group).exists()
event_frames = get_frame_paths(self.event)
process_commit_context(
event_id=self.event.event_id,
event_platform=self.event.platform,
event_frames=event_frames,
group_id=self.event.group_id,
project_id=self.event.project_id,
)

assert mock_logger.info.call_count == 1
mock_logger.info.assert_called_with(
"process_commit_context.find_commit_context",
extra={
"event": self.event.event_id,
"group": self.event.group_id,
"organization": self.event.group.project.organization_id,
"reason": "could_not_fetch_commit_context",
"code_mappings_count": 1,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense for the reason to be more specific? e.g. "commit_is_older_than_a_year"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ehh it could be that only one of the code mappings/installations had the old commit. find_commit_context_for_event is a function that will return a list of commit contexts. I think this is valid when there is only 1 code mapping and that resulted in the old commit.

"fallback": True,
},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the fallback mean that we would use the previous version of suspect commits?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes.

)

@patch(
"sentry.integrations.github.GitHubIntegration.get_commit_context",
return_value={
"commitId": "asdfasdf",
"committedDate": (datetime.now(tz=datetime_timezone.utc) - timedelta(days=7)),
"commitMessage": "placeholder commit message",
"commitAuthorName": "",
"commitAuthorEmail": "admin@localhost",
Expand All @@ -170,7 +207,7 @@ def test_no_matching_commit_in_db(self, mock_get_commit_context):
"sentry.integrations.github.GitHubIntegration.get_commit_context",
return_value={
"commitId": "asdfwreqr",
"committedDate": "2023-02-14T11:11Z",
"committedDate": (datetime.now(tz=datetime_timezone.utc) - timedelta(days=7)),
"commitMessage": "placeholder commit message",
"commitAuthorName": "",
"commitAuthorEmail": "admin@localhost",
Expand Down Expand Up @@ -255,7 +292,7 @@ def test_no_inapp_frame_in_stacktrace(self, mock_process_suspect_commits):
"sentry.integrations.github.GitHubIntegration.get_commit_context",
return_value={
"commitId": "somekey",
"committedDate": "2023-02-14T11:11Z",
"committedDate": (datetime.now(tz=datetime_timezone.utc) - timedelta(days=7)),
"commitMessage": "placeholder commit message",
"commitAuthorName": "",
"commitAuthorEmail": "randomuser@sentry.io",
Expand Down Expand Up @@ -296,7 +333,7 @@ def test_commit_author_not_in_sentry(self, mock_get_commit_context):
"sentry.integrations.github.GitHubIntegration.get_commit_context",
return_value={
"commitId": "somekey",
"committedDate": "2023-02-14T11:11Z",
"committedDate": (datetime.now(tz=datetime_timezone.utc) - timedelta(days=7)),
"commitMessage": "placeholder commit message",
"commitAuthorName": "",
"commitAuthorEmail": "randomuser@sentry.io",
Expand Down Expand Up @@ -337,7 +374,7 @@ def test_commit_author_no_user(self, mock_get_commit_context, mock_get_users_for
"sentry.integrations.github.GitHubIntegration.get_commit_context",
return_value={
"commitId": "somekey",
"committedDate": "2023-02-14T11:11Z",
"committedDate": (datetime.now(tz=datetime_timezone.utc) - timedelta(days=7)),
"commitMessage": "placeholder commit message",
"commitAuthorName": "",
"commitAuthorEmail": "randomuser@sentry.io",
Expand Down Expand Up @@ -423,7 +460,7 @@ def after_return(self, status, retval, task_id, args, kwargs, einfo):
Mock(
return_value={
"commitId": "asdfwreqr",
"committedDate": "2023-02-14T11:11Z",
"committedDate": (datetime.now(tz=datetime_timezone.utc) - timedelta(days=7)),
"commitMessage": "placeholder commit message",
"commitAuthorName": "",
"commitAuthorEmail": "admin@localhost",
Expand Down
2 changes: 1 addition & 1 deletion tests/sentry/tasks/test_post_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -1296,7 +1296,7 @@ def test_issue_owners_should_ratelimit(self, logger):
class ProcessCommitsTestMixin(BasePostProgressGroupMixin):
github_blame_return_value = {
"commitId": "asdfwreqr",
"committedDate": "",
"committedDate": (datetime.now(timezone.utc) - timedelta(days=2)),
"commitMessage": "placeholder commit message",
"commitAuthorName": "",
"commitAuthorEmail": "admin@localhost",
Expand Down
Loading