ref(crons): Clarify ts argument in mark_{ok,failed} (#80560)

Since I'll be adding a `clock_tick` argument to `mark_failed` it was going to start becoming confusing what the `ts` argument in `mark_failed` means. This updates mark_{ok,failed} to use more appropriate names for what the timestamp represents Refactoring as part of GH-79328
getsentry · Nov 12, 2024 · a35efff · a35efff
1 parent f9a9a86
commit a35efff
Show file tree

Hide file tree

Showing 7 changed files with 39 additions and 31 deletions.
diff --git a/src/sentry/monitors/clock_tasks/check_missed.py b/src/sentry/monitors/clock_tasks/check_missed.py
@@ -167,4 +167,4 @@ def mark_environment_missing(monitor_environment_id: int, ts: datetime):
         monitor.schedule,
     )
 
-    mark_failed(checkin, ts=most_recent_expected_ts)
+    mark_failed(checkin, failed_at=most_recent_expected_ts)
diff --git a/src/sentry/monitors/clock_tasks/check_timeout.py b/src/sentry/monitors/clock_tasks/check_timeout.py
@@ -121,4 +121,4 @@ def mark_checkin_timeout(checkin_id: int, ts: datetime) -> None:
             monitor.schedule,
         )
 
-        mark_failed(checkin, ts=most_recent_expected_ts)
+        mark_failed(checkin, failed_at=most_recent_expected_ts)
diff --git a/src/sentry/monitors/consumers/monitor_consumer.py b/src/sentry/monitors/consumers/monitor_consumer.py
@@ -877,9 +877,9 @@ def _process_checkin(item: CheckinItem, txn: Transaction | Span):
                 # Note: We use `start_time` for received here since it's the time that this
                 # checkin was received by relay. Potentially, `ts` should be the client
                 # timestamp. If we change that, leave `received` the same.
-                mark_failed(check_in, ts=start_time, received=start_time)
+                mark_failed(check_in, failed_at=start_time, received=start_time)
             else:
-                mark_ok(check_in, ts=start_time)
+                mark_ok(check_in, succeeded_at=start_time)
 
             # track how much time it took for the message to make it through
             # relay into kafka. This should help us understand when missed

diff --git a/src/sentry/monitors/logic/mark_failed.py b/src/sentry/monitors/logic/mark_failed.py
@@ -13,17 +13,17 @@
 
 def mark_failed(
     failed_checkin: MonitorCheckIn,
-    ts: datetime,
+    failed_at: datetime,
     received: datetime | None = None,
 ) -> bool:
     """
     Given a failing check-in, mark the monitor environment as failed and trigger
     side effects for creating monitor incidents and issues.
 
-    The provided `ts` is the reference time for when the next check-in time is
-    calculated from. This typically would be the failed check-in's `date_added`
-    or completion time. Though for the missed and timedout tasks this may be
-    computed based on the tasks reference time.
+    The provided `failed_at` is the reference time for when the next check-in
+    time is calculated from. This typically would be the failed check-in's
+    `date_added` or completion time. Though for the missed and time-out tasks
+    this may be computed based on the tasks reference time.
     """
     monitor_env = failed_checkin.monitor_environment
 
@@ -35,8 +35,8 @@ def mark_failed(
         failure_issue_threshold = 1
 
     # Compute the next check-in time from our reference time
-    next_checkin = monitor_env.monitor.get_next_expected_checkin(ts)
-    next_checkin_latest = monitor_env.monitor.get_next_expected_checkin_latest(ts)
+    next_checkin = monitor_env.monitor.get_next_expected_checkin(failed_at)
+    next_checkin_latest = monitor_env.monitor.get_next_expected_checkin_latest(failed_at)
 
     # When the failed check-in is a synthetic missed check-in we do not move
     # the `last_checkin` timestamp forward.

diff --git a/src/sentry/monitors/logic/mark_ok.py b/src/sentry/monitors/logic/mark_ok.py
@@ -20,14 +20,22 @@ class _Params(TypedDict):
     status: NotRequired[int]
 
 
-def mark_ok(checkin: MonitorCheckIn, ts: datetime) -> None:
+def mark_ok(checkin: MonitorCheckIn, succeeded_at: datetime) -> None:
+    """
+    Given a successful check-in, attempt to resolve the active incident and
+    mark the monitor as OK.
+
+    The provided `succeeded_at` is the reference time for when the next check-in
+    time is calculated from. This typically would be when the successful
+    check-in was received.
+    """
     monitor_env = checkin.monitor_environment
 
     if monitor_env is None:
         return None
 
-    next_checkin = monitor_env.monitor.get_next_expected_checkin(ts)
-    next_checkin_latest = monitor_env.monitor.get_next_expected_checkin_latest(ts)
+    next_checkin = monitor_env.monitor.get_next_expected_checkin(succeeded_at)
+    next_checkin_latest = monitor_env.monitor.get_next_expected_checkin_latest(succeeded_at)
 
     params: _Params = {
         "last_checkin": checkin.date_added,
@@ -91,9 +99,9 @@ def mark_ok(checkin: MonitorCheckIn, ts: datetime) -> None:
                             monitor_env_id=monitor_env.id,
                         )
 
-    MonitorEnvironment.objects.filter(id=monitor_env.id).exclude(last_checkin__gt=ts).update(
-        **params
-    )
+    MonitorEnvironment.objects.filter(id=monitor_env.id).exclude(
+        last_checkin__gt=succeeded_at
+    ).update(**params)
 
 
 def resolve_incident_group(

diff --git a/tests/sentry/monitors/logic/test_mark_failed.py b/tests/sentry/monitors/logic/test_mark_failed.py
@@ -61,7 +61,7 @@ def test_mark_failed_default_params(self, mock_produce_occurrence_to_kafka):
             trace_id=trace_id,
             date_added=last_checkin,
         )
-        assert mark_failed(checkin, ts=checkin.date_added)
+        assert mark_failed(checkin, failed_at=checkin.date_added)
 
         monitor_environment.refresh_from_db()
         assert monitor_environment.status == MonitorStatus.ERROR
@@ -171,7 +171,7 @@ def test_mark_failed_muted(self, mock_produce_occurrence_to_kafka):
             project_id=self.project.id,
             status=CheckInStatus.UNKNOWN,
         )
-        assert mark_failed(checkin, ts=checkin.date_added)
+        assert mark_failed(checkin, failed_at=checkin.date_added)
 
         monitor.refresh_from_db()
         monitor_environment.refresh_from_db()
@@ -210,7 +210,7 @@ def test_mark_failed_env_muted(self, mock_produce_occurrence_to_kafka):
             project_id=self.project.id,
             status=CheckInStatus.UNKNOWN,
         )
-        assert mark_failed(checkin, ts=checkin.date_added)
+        assert mark_failed(checkin, failed_at=checkin.date_added)
 
         monitor.refresh_from_db()
         monitor_environment.refresh_from_db()
@@ -259,7 +259,7 @@ def test_mark_failed_issue_threshold(self, mock_produce_occurrence_to_kafka):
                 project_id=self.project.id,
                 status=status,
             )
-            mark_failed(checkin, ts=checkin.date_added)
+            mark_failed(checkin, failed_at=checkin.date_added)
 
         # failure has not hit threshold, monitor should be in an OK status
         monitor_environment = MonitorEnvironment.objects.get(id=monitor_environment.id)
@@ -283,7 +283,7 @@ def test_mark_failed_issue_threshold(self, mock_produce_occurrence_to_kafka):
             )
             if _ == 0:
                 first_checkin = checkin
-            mark_failed(checkin, ts=checkin.date_added)
+            mark_failed(checkin, failed_at=checkin.date_added)
 
         # failure has hit threshold, monitor should be in a failed state
         monitor_environment = MonitorEnvironment.objects.get(id=monitor_environment.id)
@@ -318,7 +318,7 @@ def test_mark_failed_issue_threshold(self, mock_produce_occurrence_to_kafka):
             project_id=self.project.id,
             status=status,
         )
-        mark_failed(checkin, ts=checkin.date_added)
+        mark_failed(checkin, failed_at=checkin.date_added)
         monitor_environment = MonitorEnvironment.objects.get(id=monitor_environment.id)
         assert monitor_environment.status == MonitorStatus.ERROR
 
@@ -357,7 +357,7 @@ def test_mark_failed_issue_threshold(self, mock_produce_occurrence_to_kafka):
                 project_id=self.project.id,
                 status=status,
             )
-            mark_failed(checkin, ts=checkin.date_added)
+            mark_failed(checkin, failed_at=checkin.date_added)
 
         monitor_incidents = MonitorIncident.objects.filter(monitor_environment=monitor_environment)
         assert len(monitor_incidents) == 2
@@ -410,15 +410,15 @@ def test_mark_failed_issue_threshold_timeout(self, mock_produce_occurrence_to_ka
         for _ in range(0, failure_issue_threshold - 1):
             checkin = checkins.pop(0)
             checkin.update(status=CheckInStatus.TIMEOUT)
-            mark_failed(checkin, ts=checkin.date_added)
+            mark_failed(checkin, failed_at=checkin.date_added)
 
         # failure has not hit threshold, monitor should be in an OK status
         monitor_environment = MonitorEnvironment.objects.get(id=monitor_environment.id)
         assert monitor_environment.status == MonitorStatus.OK
 
         checkin = checkins.pop(0)
         checkin.update(status=CheckInStatus.TIMEOUT)
-        mark_failed(checkin, ts=checkin.date_added)
+        mark_failed(checkin, failed_at=checkin.date_added)
 
         # failure has hit threshold, monitor should be in a failed state
         monitor_environment = MonitorEnvironment.objects.get(id=monitor_environment.id)
@@ -473,7 +473,7 @@ def test_mark_failed_issue_threshold_disabled(self, mock_produce_occurrence_to_k
                 project_id=self.project.id,
                 status=CheckInStatus.UNKNOWN,
             )
-            mark_failed(checkin, ts=checkin.date_added)
+            mark_failed(checkin, failed_at=checkin.date_added)
 
         monitor.refresh_from_db()
         monitor_environment.refresh_from_db()
@@ -516,7 +516,7 @@ def test_mark_failed_issue_assignment(self):
             project_id=self.project.id,
             status=CheckInStatus.IN_PROGRESS,
         )
-        mark_failed(checkin, ts=checkin.date_added)
+        mark_failed(checkin, failed_at=checkin.date_added)
 
         # failure has hit threshold, monitor should be in a failed state
         monitor_environment = MonitorEnvironment.objects.get(id=monitor_environment.id)

diff --git a/tests/sentry/monitors/logic/test_mark_ok.py b/tests/sentry/monitors/logic/test_mark_ok.py
@@ -56,7 +56,7 @@ def test_mark_ok_simple(self):
             status=CheckInStatus.OK,
             date_added=now,
         )
-        mark_ok(success_checkin, ts=now)
+        mark_ok(success_checkin, now)
 
         # Monitor has recovered to OK with updated upcoming timestamps
         monitor_environment.refresh_from_db()
@@ -101,7 +101,7 @@ def test_muted_ok(self):
             status=CheckInStatus.OK,
             date_added=now,
         )
-        mark_ok(success_checkin, ts=now)
+        mark_ok(success_checkin, now)
 
         # Monitor has recovered to OK with updated upcoming timestamps
         monitor_environment.refresh_from_db()
@@ -294,7 +294,7 @@ def test_mark_ok_broken_recovery(self, mock_record):
             status=CheckInStatus.OK,
             date_added=now,
         )
-        mark_ok(success_checkin, ts=now)
+        mark_ok(success_checkin, now)
 
         # Monitor has recovered to OK with updated upcoming timestamps
         monitor_environment.refresh_from_db()