Skip to content

Commit

Permalink
chore(issues): Backfill UNRESOLVED groups with missing substatuses (#…
Browse files Browse the repository at this point in the history
…76082)

We need to run a backfill to fix the missing substatuses for UNRESOLVED
groups. This [redash query](https://redash.getsentry.net/queries/6888)
shows we have ~500 unresolved groups that have no substatus. We can
backfill the groups by
- setting the substatus to `NEW` if the group is first seen in the last
7 days
- setting the substatus to `REGRESSED` if there is a matching
GroupHistory row in the past 7 days
- setting the substatus to `ONGOING` if no other info is present. In
this case, we're assuming any status changes were older than 7 days in
which case the auto_transition tasks would mark this group as `ONGOING`.

Marking this as a post-deploy migration for safety, but we'll only be
updating 500ish groups.

#76076
  • Loading branch information
snigdhas committed Aug 26, 2024
1 parent 082a90b commit f36c906
Show file tree
Hide file tree
Showing 3 changed files with 157 additions and 1 deletion.
2 changes: 1 addition & 1 deletion migrations_lockfile.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ hybridcloud: 0016_add_control_cacheversion
nodestore: 0002_nodestore_no_dictfield
remote_subscriptions: 0003_drop_remote_subscription
replays: 0004_index_together
sentry: 0751_grouphashmetadata_use_one_to_one_field_for_grouphash
sentry: 0752_fix_substatus_for_unresolved_groups
social_auth: 0002_default_auto_field
uptime: 0007_update_detected_subscription_interval
85 changes: 85 additions & 0 deletions src/sentry/migrations/0752_fix_substatus_for_unresolved_groups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Generated by Django 5.0.7 on 2024-08-13 17:06


from datetime import timedelta

from django.apps.registry import Apps
from django.db import migrations
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
from django.utils import timezone

from sentry.new_migrations.migrations import CheckedMigration


# Copying constants defined in the models
class GroupHistoryStatus:
REGRESSED = 7


class GroupSubStatus:
ONGOING = 3
REGRESSED = 6
NEW = 7


class GroupStatus:
UNRESOLVED = 0


# End copy


def backfill_substatus_for_unresolved_groups(
apps: Apps, schema_editor: BaseDatabaseSchemaEditor
) -> None:
Group = apps.get_model("sentry", "Group")
GroupHistory = apps.get_model("sentry", "GroupHistory")

seven_days_ago = timezone.now() - timedelta(days=7)
groups = Group.objects.filter(status=GroupStatus.UNRESOLVED, substatus=None)
group_history = GroupHistory.objects.filter(
date_added__gt=seven_days_ago, status=GroupHistoryStatus.REGRESSED
)

for group in groups:
new_substatus = None
if group.first_seen > seven_days_ago:
new_substatus = GroupSubStatus.NEW
else:
histories = group_history.filter(group=group)
if histories.exists():
new_substatus = GroupSubStatus.REGRESSED

if new_substatus is None:
new_substatus = GroupSubStatus.ONGOING

group.substatus = new_substatus
group.save()


class Migration(CheckedMigration):
# This flag is used to mark that a migration shouldn't be automatically run in production.
# This should only be used for operations where it's safe to run the migration after your
# code has deployed. So this should not be used for most operations that alter the schema
# of a table.
# Here are some things that make sense to mark as post deployment:
# - Large data migrations. Typically we want these to be run manually so that they can be
# monitored and not block the deploy for a long period of time while they run.
# - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
# run this outside deployments so that we don't block them. Note that while adding an index
# is a schema change, it's completely safe to run the operation after the code has deployed.
# Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment

is_post_deployment = True

dependencies = [
("sentry", "0751_grouphashmetadata_use_one_to_one_field_for_grouphash"),
]

operations = [
migrations.RunPython(
backfill_substatus_for_unresolved_groups,
migrations.RunPython.noop,
hints={"tables": ["sentry_groupedmessage", "sentry_grouphistory"]},
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from datetime import timedelta

from django.utils import timezone

from sentry.models.group import Group, GroupStatus
from sentry.models.grouphistory import GroupHistory, GroupHistoryStatus
from sentry.models.organization import Organization
from sentry.testutils.cases import TestMigrations
from sentry.types.group import GroupSubStatus


class BackfillMissingUnresolvedSubstatusTest(TestMigrations):
migrate_from = "0751_grouphashmetadata_use_one_to_one_field_for_grouphash"
migrate_to = "0752_fix_substatus_for_unresolved_groups"

def setup_before_migration(self, app):
self.organization = Organization.objects.create(name="test", slug="test")
self.project = self.create_project(organization=self.organization)
self.do_not_update = Group.objects.create(
project=self.project,
status=GroupStatus.UNRESOLVED,
substatus=GroupSubStatus.NEW,
)

self.ongoing_group = Group.objects.create(
project=self.project,
status=GroupStatus.UNRESOLVED,
)
# .update() skips calling the pre_save checks which add a substatus
self.ongoing_group.update(
substatus=None,
first_seen=timezone.now() - timedelta(days=8),
)
self.ongoing_group.refresh_from_db()
assert self.ongoing_group.substatus is None

self.regressed_group = Group.objects.create(
project=self.project,
status=GroupStatus.UNRESOLVED,
first_seen=timezone.now() - timedelta(days=8),
)
self.regressed_group.update(substatus=None)
assert self.regressed_group.substatus is None
GroupHistory.objects.create(
group=self.regressed_group,
date_added=timezone.now() - timedelta(days=1),
organization_id=self.organization.id,
project_id=self.project.id,
status=GroupHistoryStatus.REGRESSED,
)

self.new_group = Group.objects.create(
project=self.project,
status=GroupStatus.UNRESOLVED,
first_seen=timezone.now(),
)
self.new_group.update(substatus=None)
assert self.new_group.substatus is None

def test(self):
self.do_not_update.refresh_from_db()
assert self.do_not_update.substatus == GroupSubStatus.NEW

self.ongoing_group.refresh_from_db()
assert self.ongoing_group.substatus == GroupSubStatus.ONGOING

self.regressed_group.refresh_from_db()
assert self.regressed_group.substatus == GroupSubStatus.REGRESSED

self.new_group.refresh_from_db()
assert self.new_group.substatus == GroupSubStatus.NEW

0 comments on commit f36c906

Please sign in to comment.