Skip to content

Commit

Permalink
Revert "ref(seer grouping): Use new CircuitBreaker class for circui…
Browse files Browse the repository at this point in the history
…t breaking (#74563)"

This reverts commit e117cd0.

Co-authored-by: asottile-sentry <103459774+asottile-sentry@users.noreply.github.com>
  • Loading branch information
2 people authored and Christinarlong committed Jul 26, 2024
1 parent 71c47ae commit 8cfe2c5
Show file tree
Hide file tree
Showing 8 changed files with 75 additions and 141 deletions.
1 change: 0 additions & 1 deletion src/sentry/conf/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3423,7 +3423,6 @@ def custom_parameter_sort(parameter: dict) -> tuple[str, int]:
SEER_HASH_GROUPING_RECORDS_DELETE_URL = (
f"/{SEER_SIMILARITY_MODEL_VERSION}/issues/similar-issues/grouping-record/delete-by-hash"
)
SEER_SIMILARITY_CIRCUIT_BREAKER_KEY = "seer.similarity"

SIMILARITY_BACKFILL_COHORT_MAP: dict[str, list[int]] = {}

Expand Down
40 changes: 31 additions & 9 deletions src/sentry/event_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,9 @@
from sentry.utils.circuit_breaker import (
ERROR_COUNT_CACHE_KEY,
CircuitBreakerPassthrough,
CircuitBreakerTripped,
circuit_breaker_activated,
with_circuit_breaker,
)
from sentry.utils.dates import to_datetime
from sentry.utils.event import has_event_minified_stack_trace, has_stacktrace, is_handled
Expand Down Expand Up @@ -1528,18 +1530,13 @@ def _save_aggregate(
seer_matched_group = None

if should_call_seer_for_grouping(event, primary_hashes):
metrics.incr(
"grouping.similarity.did_call_seer",
# TODO: Consider lowering this (in all the spots this metric is
# collected) once we roll Seer grouping out more widely
sample_rate=1.0,
tags={"call_made": True, "blocker": "none"},
)
try:
# If the `projects:similarity-embeddings-grouping` feature is disabled,
# we'll still get back result metadata, but `seer_matched_group` will be None
seer_response_data, seer_matched_group = get_seer_similar_issues(
event, primary_hashes
seer_response_data, seer_matched_group = with_circuit_breaker(
"event_manager.get_seer_similar_issues",
lambda: get_seer_similar_issues(event, primary_hashes),
options.get("seer.similarity.circuit-breaker-config"),
)
event.data["seer_similarity"] = seer_response_data

Expand All @@ -1550,8 +1547,33 @@ def _save_aggregate(
"seer_similarity"
] = seer_response_data

metrics.incr(
"grouping.similarity.did_call_seer",
# TODO: Consider lowering this (in all the spots this metric is
# collected) once we roll Seer grouping out more widely
sample_rate=1.0,
tags={"call_made": True, "blocker": "none"},
)

except CircuitBreakerTripped:
# TODO: Do we want to include all of the conditions which cause us to log a
# `grouping.similarity.seer_call_blocked` metric (here and in
# `should_call_seer_for_grouping`) under a single outcome tag on the span
# and timer metric below and in `record_calculation_metric_with_result`
# (also below)? Right now they just fall into the `new_group` bucket.
metrics.incr(
"grouping.similarity.did_call_seer",
sample_rate=1.0,
tags={"call_made": False, "blocker": "circuit-breaker"},
)

# Insurance - in theory we shouldn't ever land here
except Exception as e:
metrics.incr(
"grouping.similarity.did_call_seer",
sample_rate=1.0,
tags={"call_made": True, "blocker": "none"},
)
sentry_sdk.capture_exception(
e, tags={"event": event.event_id, "project": project.id}
)
Expand Down
40 changes: 7 additions & 33 deletions src/sentry/grouping/ingest/seer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,7 @@
from sentry.grouping.result import CalculatedHashes
from sentry.models.group import Group
from sentry.models.project import Project
from sentry.seer.similarity.similar_issues import (
get_similarity_data_from_seer,
seer_similarity_circuit_breaker,
)
from sentry.seer.similarity.similar_issues import get_similarity_data_from_seer
from sentry.seer.similarity.types import SeerSimilarIssuesMetadata, SimilarIssuesEmbeddingsRequest
from sentry.seer.similarity.utils import (
event_content_is_seer_eligible,
Expand Down Expand Up @@ -48,11 +45,12 @@ def should_call_seer_for_grouping(event: Event, primary_hashes: CalculatedHashes
# (Checking the rate limit for calling Seer also increments the counter of how many times we've
# tried to call it, and if we fail any of the other checks, it shouldn't count as an attempt.
# Thus we only want to run the rate limit check if every other check has already succeeded.)
if (
killswitch_enabled(project.id, event)
or _circuit_breaker_broken(event, project)
or _ratelimiting_enabled(event, project)
):
#
# Note: The circuit breaker check which might naturally be here alongside its killswitch
# and rate limiting friends instead happens in the `with_circuit_breaker` helper used where
# `get_seer_similar_issues` is actually called. (It has to be there in order for it to track
# errors arising from that call.)
if killswitch_enabled(project.id, event) or _ratelimiting_enabled(event, project):
return False

return True
Expand Down Expand Up @@ -159,30 +157,6 @@ def _ratelimiting_enabled(event: Event, project: Project) -> bool:
return False


def _circuit_breaker_broken(event: Event, project: Project) -> bool:
circuit_broken = not seer_similarity_circuit_breaker.should_allow_request()

if circuit_broken:
logger.warning(
"should_call_seer_for_grouping.circuit_breaker_tripped",
extra={
"event_id": event.event_id,
"project_id": project.id,
**options.get("seer.similarity.circuit-breaker-config"),
},
)
metrics.incr(
"grouping.similarity.circuit_breaker_tripped",
)
metrics.incr(
"grouping.similarity.did_call_seer",
sample_rate=1.0,
tags={"call_made": False, "blocker": "circuit-breaker"},
)

return circuit_broken


def get_seer_similar_issues(
event: Event,
primary_hashes: CalculatedHashes,
Expand Down
15 changes: 3 additions & 12 deletions src/sentry/options/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -892,21 +892,12 @@
flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE,
)

# TODO: The default error limit here was estimated based on EA traffic. (In an average 10 min
# period, there are roughly 35K events without matching hashes. About 2% of orgs are EA, so for
# simplicity, assume 2% of those events are from EA orgs. If we're willing to tolerate up to a 95%
# failure rate, then we need 35K * 0.02 * 0.95 events to fail to trip the breaker.)
#
# When we GA, we should multiply both the limits by 50 (to remove the 2% part of the current
# calculation), and remove this TODO.
register(
"seer.similarity.circuit-breaker-config",
type=Dict,
default={
"error_limit": 666,
"error_limit_window": 600, # 10 min
"broken_state_duration": 300, # 5 min
},
# TODO: For now we're using the defaults for everything but `allow_passthrough`. We may want to
# revisit that choice in the future.
default={"allow_passthrough": True},
flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE,
)

Expand Down
17 changes: 1 addition & 16 deletions src/sentry/seer/similarity/similar_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,7 @@
from django.conf import settings
from urllib3.exceptions import MaxRetryError, TimeoutError

from sentry import options
from sentry.conf.server import (
SEER_MAX_GROUPING_DISTANCE,
SEER_SIMILAR_ISSUES_URL,
SEER_SIMILARITY_CIRCUIT_BREAKER_KEY,
)
from sentry.conf.server import SEER_MAX_GROUPING_DISTANCE, SEER_SIMILAR_ISSUES_URL
from sentry.models.grouphash import GroupHash
from sentry.net.http import connection_from_url
from sentry.seer.signed_seer_api import make_signed_seer_api_request
Expand All @@ -20,7 +15,6 @@
)
from sentry.tasks.delete_seer_grouping_records import delete_seer_grouping_records_by_hash
from sentry.utils import json, metrics
from sentry.utils.circuit_breaker2 import CircuitBreaker
from sentry.utils.json import JSONDecodeError, apply_key_filter

logger = logging.getLogger(__name__)
Expand All @@ -36,11 +30,6 @@
timeout=settings.SEER_GROUPING_TIMEOUT,
)

seer_similarity_circuit_breaker = CircuitBreaker(
SEER_SIMILARITY_CIRCUIT_BREAKER_KEY,
options.get("seer.similarity.circuit-breaker-config"),
)


def get_similarity_data_from_seer(
similar_issues_request: SimilarIssuesEmbeddingsRequest,
Expand Down Expand Up @@ -122,7 +111,6 @@ def get_similarity_data_from_seer(
sample_rate=SIMILARITY_REQUEST_METRIC_SAMPLE_RATE,
tags={**metric_tags, "outcome": "error", "error": type(e).__name__},
)
seer_similarity_circuit_breaker.record_error()
return []

metric_tags["response_status"] = response.status
Expand All @@ -149,9 +137,6 @@ def get_similarity_data_from_seer(
},
)

if response.status >= 500:
seer_similarity_circuit_breaker.record_error()

return []

try:
Expand Down
18 changes: 18 additions & 0 deletions tests/sentry/event_manager/grouping/test_seer_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from sentry.testutils.helpers.eventprocessing import save_new_event
from sentry.testutils.helpers.features import with_feature
from sentry.testutils.pytest.mocking import capture_results
from sentry.utils.circuit_breaker import with_circuit_breaker
from sentry.utils.types import NonNone


Expand Down Expand Up @@ -151,6 +152,23 @@ def test_obeys_seer_similarity_flags(self):
assert get_seer_similar_issues_return_values[0][1] == existing_event.group
assert new_event.group_id == existing_event.group_id

@patch("sentry.event_manager.should_call_seer_for_grouping", return_value=True)
@patch("sentry.event_manager.with_circuit_breaker", wraps=with_circuit_breaker)
@patch("sentry.event_manager.get_seer_similar_issues", return_value=({}, None))
def test_obeys_circult_breaker(
self, mock_get_seer_similar_issues: MagicMock, mock_with_circuit_breaker: MagicMock, _
):
with patch("sentry.utils.circuit_breaker._should_call_callback", return_value=True):
save_new_event({"message": "Dogs are great!"}, self.project)
assert mock_with_circuit_breaker.call_count == 1
assert mock_get_seer_similar_issues.call_count == 1

with patch("sentry.utils.circuit_breaker._should_call_callback", return_value=False):
save_new_event({"message": "Adopt don't shop"}, self.project)

assert mock_with_circuit_breaker.call_count == 2 # increased
assert mock_get_seer_similar_issues.call_count == 1 # didn't increase

@patch("sentry.event_manager.should_call_seer_for_grouping", return_value=True)
@patch("sentry.event_manager.get_seer_similar_issues", return_value=({}, None))
def test_calls_seer_if_no_group_found(self, mock_get_seer_similar_issues: MagicMock, _):
Expand Down
12 changes: 0 additions & 12 deletions tests/sentry/grouping/ingest/test_seer.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,18 +137,6 @@ def test_obeys_project_ratelimit(self):
is expected_result
)

@with_feature("projects:similarity-embeddings-grouping")
def test_obeys_circuit_breaker(self):
for request_allowed, expected_result in [(True, True), (False, False)]:
with patch(
"sentry.grouping.ingest.seer.seer_similarity_circuit_breaker.should_allow_request",
return_value=request_allowed,
):
assert (
should_call_seer_for_grouping(self.event, self.primary_hashes)
is expected_result
)

@with_feature("projects:similarity-embeddings-grouping")
def test_obeys_customized_fingerprint_check(self):
default_fingerprint_event = Event(
Expand Down
73 changes: 15 additions & 58 deletions tests/sentry/seer/similarity/test_similar_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,17 +96,9 @@ def test_no_groups_found(self, mock_seer_request: MagicMock, mock_metrics_incr:
tags={"response_status": 200, "outcome": "no_similar_groups"},
)

@mock.patch(
"sentry.seer.similarity.similar_issues.seer_similarity_circuit_breaker.record_error"
)
@mock.patch("sentry.seer.similarity.similar_issues.metrics.incr")
@mock.patch("sentry.seer.similarity.similar_issues.seer_grouping_connection_pool.urlopen")
def test_bad_response_data(
self,
mock_seer_request: MagicMock,
mock_metrics_incr: MagicMock,
mock_record_circuit_breaker_error: MagicMock,
):
def test_bad_response_data(self, mock_seer_request: MagicMock, mock_metrics_incr: MagicMock):
cases: list[tuple[Any, str]] = [
(None, "AttributeError"),
([], "AttributeError"),
Expand Down Expand Up @@ -147,22 +139,14 @@ def test_bad_response_data(
sample_rate=SIMILARITY_REQUEST_METRIC_SAMPLE_RATE,
tags={"response_status": 200, "outcome": "error", "error": expected_error},
)
assert mock_record_circuit_breaker_error.call_count == 0

mock_metrics_incr.reset_mock()

@mock.patch(
"sentry.seer.similarity.similar_issues.seer_similarity_circuit_breaker.record_error"
)
@mock.patch("sentry.seer.similarity.similar_issues.metrics.incr")
@mock.patch("sentry.seer.similarity.similar_issues.logger")
@mock.patch("sentry.seer.similarity.similar_issues.seer_grouping_connection_pool.urlopen")
def test_redirect(
self,
mock_seer_request: MagicMock,
mock_logger: MagicMock,
mock_metrics_incr: MagicMock,
mock_record_circuit_breaker_error: MagicMock,
self, mock_seer_request: MagicMock, mock_logger: MagicMock, mock_metrics_incr: MagicMock
):
mock_seer_request.return_value = HTTPResponse(
status=308, headers={"location": "/new/and/improved/endpoint/"}
Expand All @@ -177,20 +161,12 @@ def test_redirect(
sample_rate=SIMILARITY_REQUEST_METRIC_SAMPLE_RATE,
tags={"response_status": 308, "outcome": "error", "error": "Redirect"},
)
assert mock_record_circuit_breaker_error.call_count == 0

@mock.patch(
"sentry.seer.similarity.similar_issues.seer_similarity_circuit_breaker.record_error"
)
@mock.patch("sentry.seer.similarity.similar_issues.metrics.incr")
@mock.patch("sentry.seer.similarity.similar_issues.logger")
@mock.patch("sentry.seer.similarity.similar_issues.seer_grouping_connection_pool.urlopen")
def test_request_error(
self,
mock_seer_request: MagicMock,
mock_logger: MagicMock,
mock_metrics_incr: MagicMock,
mock_record_circuit_breaker_error: MagicMock,
self, mock_seer_request: MagicMock, mock_logger: MagicMock, mock_metrics_incr: MagicMock
):
for request_error, expected_error_tag in [
(TimeoutError, "TimeoutError"),
Expand All @@ -216,44 +192,25 @@ def test_request_error(
sample_rate=SIMILARITY_REQUEST_METRIC_SAMPLE_RATE,
tags={"outcome": "error", "error": expected_error_tag},
)
assert mock_record_circuit_breaker_error.call_count == 1

mock_logger.warning.reset_mock()
mock_metrics_incr.reset_mock()
mock_record_circuit_breaker_error.reset_mock()

@mock.patch(
"sentry.seer.similarity.similar_issues.seer_similarity_circuit_breaker.record_error"
)
@mock.patch("sentry.seer.similarity.similar_issues.metrics.incr")
@mock.patch("sentry.seer.similarity.similar_issues.logger")
@mock.patch("sentry.seer.similarity.similar_issues.seer_grouping_connection_pool.urlopen")
def test_error_status(
self,
mock_seer_request: MagicMock,
mock_logger: MagicMock,
mock_metrics_incr: MagicMock,
mock_record_circuit_breaker_error: MagicMock,
self, mock_seer_request: MagicMock, mock_logger: MagicMock, mock_metrics_incr: MagicMock
):
for response, status, counts_for_circuit_breaker in [
("No soup for you", 403, False),
("No soup, period", 500, True),
]:
mock_seer_request.return_value = HTTPResponse(response, status=status)
mock_seer_request.return_value = HTTPResponse("No soup for you", status=403)

assert get_similarity_data_from_seer(self.request_params) == []
mock_logger.error.assert_called_with(
f"Received {status} when calling Seer endpoint {SEER_SIMILAR_ISSUES_URL}.",
extra={"response_data": response},
)
mock_metrics_incr.assert_any_call(
"seer.similar_issues_request",
sample_rate=SIMILARITY_REQUEST_METRIC_SAMPLE_RATE,
tags={"response_status": status, "outcome": "error", "error": "RequestError"},
)
assert mock_record_circuit_breaker_error.call_count == (
1 if counts_for_circuit_breaker else 0
)
assert get_similarity_data_from_seer(self.request_params) == []
mock_logger.error.assert_called_with(
f"Received 403 when calling Seer endpoint {SEER_SIMILAR_ISSUES_URL}.",
extra={"response_data": "No soup for you"},
)
mock_metrics_incr.assert_any_call(
"seer.similar_issues_request",
sample_rate=SIMILARITY_REQUEST_METRIC_SAMPLE_RATE,
tags={"response_status": 403, "outcome": "error", "error": "RequestError"},
)

@mock.patch("sentry.seer.similarity.similar_issues.seer_grouping_connection_pool.urlopen")
def test_returns_sorted_results(self, mock_seer_request: MagicMock):
Expand Down

0 comments on commit 8cfe2c5

Please sign in to comment.