Skip to content

Commit

Permalink
perf(replay): reduce postgres queries in recording consumer using get…
Browse files Browse the repository at this point in the history
…_from_cache (#77365)

@cmanallen pointed out the queries we make in _handle_breadcrumb, used
to check feature flags and project options, could overload Postgres as
we scale. To fix this we can do the query at the top-level and use
`get_from_cache`

---------

Co-authored-by: getsantry[bot] <66042841+getsantry[bot]@users.noreply.github.com>
  • Loading branch information
aliu39 and getsantry[bot] committed Sep 20, 2024
1 parent 972b13a commit f68f0d0
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 57 deletions.
4 changes: 3 additions & 1 deletion src/sentry/replays/consumers/recording_buffered.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
from sentry_kafka_schemas.schema_types.ingest_replay_recordings_v1 import ReplayRecording

from sentry.conf.types.kafka_definition import Topic, get_topic_codec
from sentry.models.project import Project
from sentry.replays.lib.storage import (
RecordingSegmentStorageMeta,
make_recording_filename,
Expand Down Expand Up @@ -318,8 +319,9 @@ def process_message(buffer: RecordingBuffer, message: bytes) -> None:
else None
)

project = Project.objects.get_from_cache(id=decoded_message["project_id"])
replay_actions = parse_replay_actions(
decoded_message["project_id"],
project,
decoded_message["replay_id"],
decoded_message["retention_days"],
parsed_recording_data,
Expand Down
3 changes: 2 additions & 1 deletion src/sentry/replays/usecases/ingest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,10 @@ def recording_post_processor(
op="replays.usecases.ingest.parse_and_emit_replay_actions",
description="parse_and_emit_replay_actions",
):
project = Project.objects.get_from_cache(id=message.project_id)
parse_and_emit_replay_actions(
retention_days=message.retention_days,
project_id=message.project_id,
project=project,
replay_id=message.replay_id,
segment_data=parsed_segment_data,
replay_event=parsed_replay_event,
Expand Down
64 changes: 40 additions & 24 deletions src/sentry/replays/usecases/ingest/dom_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,15 @@ class ReplayActionsEvent(TypedDict):


def parse_and_emit_replay_actions(
project_id: int,
project: Project,
replay_id: str,
retention_days: int,
segment_data: list[dict[str, Any]],
replay_event: dict[str, Any] | None,
) -> None:
with metrics.timer("replays.usecases.ingest.dom_index.parse_and_emit_replay_actions"):
message = parse_replay_actions(
project_id, replay_id, retention_days, segment_data, replay_event
project, replay_id, retention_days, segment_data, replay_event
)
if message is not None:
emit_replay_actions(message)
Expand All @@ -82,19 +82,19 @@ def emit_replay_actions(action: ReplayActionsEvent) -> None:


def parse_replay_actions(
project_id: int,
project: Project,
replay_id: str,
retention_days: int,
segment_data: list[dict[str, Any]],
replay_event: dict[str, Any] | None,
) -> ReplayActionsEvent | None:
"""Parse RRWeb payload to ReplayActionsEvent."""
actions = get_user_actions(project_id, replay_id, segment_data, replay_event)
actions = get_user_actions(project, replay_id, segment_data, replay_event)
if len(actions) == 0:
return None

payload = create_replay_actions_payload(replay_id, actions)
return create_replay_actions_event(replay_id, project_id, retention_days, payload)
return create_replay_actions_event(replay_id, project.id, retention_days, payload)


def create_replay_actions_event(
Expand Down Expand Up @@ -153,7 +153,7 @@ def log_canvas_size(


def get_user_actions(
project_id: int,
project: Project,
replay_id: str,
events: list[dict[str, Any]],
replay_event: dict[str, Any] | None,
Expand All @@ -177,6 +177,10 @@ def get_user_actions(
"textContent": "Helloworld!"
}
"""
# Feature flag and project option queries
should_report_rage = _should_report_rage_click_issue(project)
should_report_hydration = _should_report_hydration_error_issue(project)

result: list[ReplayActionsEventPayloadClick] = []
for event in _iter_custom_events(events):
if len(result) == 20:
Expand All @@ -185,15 +189,22 @@ def get_user_actions(
tag = event.get("data", {}).get("tag")

if tag == "breadcrumb":
click = _handle_breadcrumb(event, project_id, replay_id, replay_event)
click = _handle_breadcrumb(
event,
project,
replay_id,
replay_event,
should_report_rage_click_issue=should_report_rage,
should_report_hydration_error_issue=should_report_hydration,
)
if click is not None:
result.append(click)
# look for request / response breadcrumbs and report metrics on them
if tag == "performanceSpan":
_handle_resource_metric_event(event)
# log the SDK options sent from the SDK 1/500 times
if tag == "options" and random.randint(0, 499) < 1:
_handle_options_logging_event(project_id, replay_id, event)
_handle_options_logging_event(project.id, replay_id, event)
# log large dom mutation breadcrumb events 1/100 times

payload = event.get("data", {}).get("payload", {})
Expand All @@ -203,7 +214,7 @@ def get_user_actions(
and payload.get("category") == "replay.mutations"
and random.randint(0, 500) < 1
):
_handle_mutations_event(project_id, replay_id, event)
_handle_mutations_event(project.id, replay_id, event)

return result

Expand Down Expand Up @@ -287,21 +298,21 @@ def _parse_classes(classes: str) -> list[str]:
return list(filter(lambda n: n != "", classes.split(" ")))[:10]


def _should_report_hydration_error_issue(project_id: int) -> bool:
project = Project.objects.get(id=project_id)
def _should_report_hydration_error_issue(project: Project) -> bool:
"""
The feature is controlled by Sentry admins for release of the feature,
while the project option is controlled by the project owner, and is a
permanent setting
Checks the feature that's controlled by Sentry admins for release of the feature,
and the permanent project option, controlled by the project owner.
"""
return features.has(
"organizations:session-replay-hydration-error-issue-creation",
project.organization,
) and project.get_option("sentry:replay_hydration_error_issues")


def _should_report_rage_click_issue(project_id: int) -> bool:
project = Project.objects.get(id=project_id)
def _should_report_rage_click_issue(project: Project) -> bool:
"""
Checks the project option, controlled by a project owner.
"""
return project.get_option("sentry:replay_rage_click_issues")


Expand Down Expand Up @@ -374,7 +385,12 @@ def _handle_mutations_event(project_id: int, replay_id: str, event: dict[str, An


def _handle_breadcrumb(
event: dict[str, Any], project_id: int, replay_id: str, replay_event: dict[str, Any] | None
event: dict[str, Any],
project: Project,
replay_id: str,
replay_event: dict[str, Any] | None,
should_report_rage_click_issue=False,
should_report_hydration_error_issue=False,
) -> ReplayActionsEventPayloadClick | None:

click = None
Expand All @@ -399,15 +415,15 @@ def _handle_breadcrumb(
payload["data"].get("clickCount", 0) or payload["data"].get("clickcount", 0)
) >= 5
click = create_click_event(
payload, replay_id, is_dead=True, is_rage=is_rage, project_id=project_id
payload, replay_id, is_dead=True, is_rage=is_rage, project_id=project.id
)
if click is not None:
if is_rage:
metrics.incr("replay.rage_click_detected")
if _should_report_rage_click_issue(project_id):
if should_report_rage_click_issue:
if replay_event is not None:
report_rage_click_issue_with_replay_event(
project_id,
project.id,
replay_id,
payload["timestamp"],
payload["message"],
Expand All @@ -418,24 +434,24 @@ def _handle_breadcrumb(
)
# Log the event for tracking.
log = event["data"].get("payload", {}).copy()
log["project_id"] = project_id
log["project_id"] = project.id
log["replay_id"] = replay_id
log["dom_tree"] = log.pop("message")

return click

elif category == "ui.click":
click = create_click_event(
payload, replay_id, is_dead=False, is_rage=False, project_id=project_id
payload, replay_id, is_dead=False, is_rage=False, project_id=project.id
)
if click is not None:
return click

elif category == "replay.hydrate-error":
metrics.incr("replay.hydration_error_breadcrumb")
if replay_event is not None and _should_report_hydration_error_issue(project_id):
if replay_event is not None and should_report_hydration_error_issue:
report_hydration_error_issue_with_replay_event(
project_id,
project.id,
replay_id,
payload["timestamp"],
payload.get("data", {}).get("url"),
Expand Down
Loading

0 comments on commit f68f0d0

Please sign in to comment.