Skip to content

Commit

Permalink
ref(ownership): refactor post_process telemetry (#78979)
Browse files Browse the repository at this point in the history
I was finding this code very hard to read with all of the telemetry
context creations and indentation. No behavior changes intended.

- [x] Taking advantage of `metrics.wraps` and `sentry_sdk.trace` we can
get rid of most of these.
- [x] Additionally, i've removed the spans around the caching operations
as all of our cache get / sets get instrumented with spans anways.
- [x] Utilize return early pattern to prevent more nesting
- [x] we don't need a wrapper to try / except the whole function as post
process already does this
https://github.com/getsentry/sentry/blob/020c9f4a668fdf5adf64791efc856384db4cde87/src/sentry/tasks/post_process.py#L692
  • Loading branch information
JoshFerge authored Oct 11, 2024
1 parent 47dbada commit cdbc6c3
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 89 deletions.
2 changes: 2 additions & 0 deletions src/sentry/models/projectownership.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from collections.abc import Mapping, Sequence
from typing import TYPE_CHECKING, Any

import sentry_sdk
from django.db import models
from django.db.models.signals import post_delete, post_save
from django.utils import timezone
Expand Down Expand Up @@ -168,6 +169,7 @@ def _hydrate_rules(cls, project_id, rules, type: str = OwnerRuleType.OWNERSHIP_R

@classmethod
@metrics.wraps("projectownership.get_issue_owners")
@sentry_sdk.trace
def get_issue_owners(
cls, project_id, data, limit=2
) -> Sequence[tuple[Rule, Sequence[Team | RpcUser], str]]:
Expand Down
158 changes: 69 additions & 89 deletions src/sentry/tasks/post_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ def _capture_group_stats(job: PostProcessJob) -> None:
metrics.incr("events.unique", tags={"platform": platform}, skip_internal=False)


@sentry_sdk.trace
def should_issue_owners_ratelimit(project_id: int, group_id: int, organization_id: int | None):
"""
Make sure that we do not accept more groups than the enforced_limit at the project level.
Expand Down Expand Up @@ -204,109 +205,87 @@ def should_issue_owners_ratelimit(project_id: int, group_id: int, organization_i
return len(groups) > enforced_limit


@metrics.wraps("post_process.handle_owner_assignment")
@sentry_sdk.trace
def handle_owner_assignment(job):
if job["is_reprocessed"]:
return

with sentry_sdk.start_span(op="tasks.post_process_group.handle_owner_assignment"):
try:
from sentry.models.groupowner import (
ASSIGNEE_DOES_NOT_EXIST_DURATION,
ASSIGNEE_EXISTS_DURATION,
ASSIGNEE_EXISTS_KEY,
ISSUE_OWNERS_DEBOUNCE_DURATION,
ISSUE_OWNERS_DEBOUNCE_KEY,
)
from sentry.models.projectownership import ProjectOwnership

event = job["event"]
project, group = event.project, event.group
# We want to debounce owner assignment when:
# - GroupOwner of type Ownership Rule || CodeOwner exist with TTL 1 day
# - we tried to calculate and could not find issue owners with TTL 1 day
# - an Assignee has been set with TTL of infinite
with metrics.timer("post_process.handle_owner_assignment"):
with sentry_sdk.start_span(op="post_process.handle_owner_assignment.ratelimited"):
if should_issue_owners_ratelimit(
project_id=project.id,
group_id=group.id,
organization_id=event.project.organization_id,
):
metrics.incr("sentry.task.post_process.handle_owner_assignment.ratelimited")
return
from sentry.models.groupowner import (
ASSIGNEE_DOES_NOT_EXIST_DURATION,
ASSIGNEE_EXISTS_DURATION,
ASSIGNEE_EXISTS_KEY,
ISSUE_OWNERS_DEBOUNCE_DURATION,
ISSUE_OWNERS_DEBOUNCE_KEY,
)
from sentry.models.projectownership import ProjectOwnership

with sentry_sdk.start_span(
op="post_process.handle_owner_assignment.cache_set_assignee"
):
# Is the issue already assigned to a team or user?
assignee_key = ASSIGNEE_EXISTS_KEY(group.id)
assignees_exists = cache.get(assignee_key)
if assignees_exists is None:
assignees_exists = group.assignee_set.exists()
# Cache for 1 day if it's assigned. We don't need to move that fast.
cache.set(
assignee_key,
assignees_exists,
(
ASSIGNEE_EXISTS_DURATION
if assignees_exists
else ASSIGNEE_DOES_NOT_EXIST_DURATION
),
)
event = job["event"]
project, group = event.project, event.group
# We want to debounce owner assignment when:
# - GroupOwner of type Ownership Rule || CodeOwner exist with TTL 1 day
# - we tried to calculate and could not find issue owners with TTL 1 day
# - an Assignee has been set with TTL of infinite

if should_issue_owners_ratelimit(
project_id=project.id,
group_id=group.id,
organization_id=event.project.organization_id,
):
metrics.incr("sentry.task.post_process.handle_owner_assignment.ratelimited")
return

if assignees_exists:
metrics.incr(
"sentry.task.post_process.handle_owner_assignment.assignee_exists"
)
return
# Is the issue already assigned to a team or user?
assignee_key = ASSIGNEE_EXISTS_KEY(group.id)
assignees_exists = cache.get(assignee_key)
if assignees_exists is None:
assignees_exists = group.assignee_set.exists()
# Cache for 1 day if it's assigned. We don't need to move that fast.
cache.set(
assignee_key,
assignees_exists,
(ASSIGNEE_EXISTS_DURATION if assignees_exists else ASSIGNEE_DOES_NOT_EXIST_DURATION),
)

with sentry_sdk.start_span(
op="post_process.handle_owner_assignment.debounce_issue_owners"
):
issue_owners_key = ISSUE_OWNERS_DEBOUNCE_KEY(group.id)
debounce_issue_owners = cache.get(issue_owners_key)
if assignees_exists:
metrics.incr("sentry.task.post_process.handle_owner_assignment.assignee_exists")
return

if debounce_issue_owners:
metrics.incr("sentry.tasks.post_process.handle_owner_assignment.debounce")
return
issue_owners_key = ISSUE_OWNERS_DEBOUNCE_KEY(group.id)
debounce_issue_owners = cache.get(issue_owners_key)

with metrics.timer("post_process.process_owner_assignments.duration"):
with sentry_sdk.start_span(
op="post_process.handle_owner_assignment.get_issue_owners"
):
if killswitch_matches_context(
"post_process.get-autoassign-owners",
{
"project_id": project.id,
},
):
# see ProjectOwnership.get_issue_owners
issue_owners: Sequence[tuple[Rule, Sequence[Team | RpcUser], str]] = []
else:
issue_owners = ProjectOwnership.get_issue_owners(project.id, event.data)

# Cache for 1 day after we calculated. We don't need to move that fast.
cache.set(
issue_owners_key,
True,
ISSUE_OWNERS_DEBOUNCE_DURATION,
)
if debounce_issue_owners:
metrics.incr("sentry.tasks.post_process.handle_owner_assignment.debounce")
return

with sentry_sdk.start_span(
op="post_process.handle_owner_assignment.handle_group_owners"
):
if issue_owners:
try:
handle_group_owners(project, group, issue_owners)
except Exception:
logger.exception("Failed to store group owners")
else:
handle_invalid_group_owners(group)
if killswitch_matches_context(
"post_process.get-autoassign-owners",
{
"project_id": project.id,
},
):
# see ProjectOwnership.get_issue_owners
issue_owners: Sequence[tuple[Rule, Sequence[Team | RpcUser], str]] = []
handle_invalid_group_owners(group)
else:
issue_owners = ProjectOwnership.get_issue_owners(project.id, event.data)
# Cache for 1 day after we calculated. We don't need to move that fast.
cache.set(
issue_owners_key,
True,
ISSUE_OWNERS_DEBOUNCE_DURATION,
)

if issue_owners:
try:
handle_group_owners(project, group, issue_owners)
except Exception:
logger.exception("Failed to handle owner assignments")
logger.exception("Failed to store group owners")
else:
handle_invalid_group_owners(group)


@sentry_sdk.trace
def handle_invalid_group_owners(group):
from sentry.models.groupowner import GroupOwner, GroupOwnerType

Expand All @@ -322,6 +301,7 @@ def handle_invalid_group_owners(group):
)


@sentry_sdk.trace
def handle_group_owners(
project: Project,
group: Group,
Expand Down

0 comments on commit cdbc6c3

Please sign in to comment.