Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(replays): Accept timestamp parameter when calculating accessibility issues #60983

Merged
merged 8 commits into from
Dec 4, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/sentry/replays/blueprints/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,10 @@ This resource does not accept any URI parameters and is not paginated. Responses

### Fetch Replay Accessibility Issues [GET]

- Parameters

- timestamp (optional, number) - The timestamp which to render to for accessibility analysis.
cmanallen marked this conversation as resolved.
Show resolved Hide resolved

Retrieve a collection of accessibility issues.

**Attributes**
Expand Down
1 change: 1 addition & 0 deletions src/sentry/replays/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
REPLAY_DURATION_HOURS = 1
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@
from sentry.api.bases.project import ProjectEndpoint
from sentry.models.project import Project
from sentry.replays.lib.storage import make_filename
from sentry.replays.usecases.reader import fetch_direct_storage_segments_meta
from sentry.replays.usecases.reader import (
fetch_direct_storage_segments_meta,
segment_row_to_storage_meta,
)
from sentry.replays.usecases.segment import query_segment_storage_meta_by_timestamp
from sentry.types.ratelimit import RateLimit, RateLimitCategory
from sentry.utils.cursors import Cursor, CursorResult

Expand Down Expand Up @@ -67,13 +71,40 @@ def get(self, request: Request, project: Project, replay_id: str) -> Response:
except ValueError:
return Response(status=404)

try:
timestamp = int(request.GET.get("timestamp", ""))
except TypeError:
timestamp = None
except ValueError:
raise ParseError("Invalid timestamp value specified.")

def data_fn(offset, limit):
# Increment a counter for every call to the accessibility service.
metrics.incr("session-replay-accessibility-issues-count")

# We only support direct-storage. Filestore is deprecated and should be removed from
# the driver.
segments = fetch_direct_storage_segments_meta(project.id, replay_id, offset, limit)
# We only support direct-storage. Filestore is deprecated and should be removed
# from the driver.
if timestamp is None:
# If no timestamp is provided we render 5 segments by convention.
segments = fetch_direct_storage_segments_meta(
project.id,
replay_id,
offset,
limit=5,
)
else:
# If a timestamp was provided we fetch every segment that started prior to the
# timestamp value.
results = query_segment_storage_meta_by_timestamp(
project.organization.id,
project.id,
replay_id,
timestamp,
)
segments = [
segment_row_to_storage_meta(project.id, replay_id, row)
for row in results["data"]
]

# Make a POST request to the replay-analyzer service. The files will be downloaded
# and evaluated on the remote system. The accessibility output is then redirected to
Expand Down
27 changes: 16 additions & 11 deletions src/sentry/replays/usecases/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,17 +226,22 @@ def _fetch_segments_from_snuba(
)
response = raw_snql_query(snuba_request, "replays.query.download_replay_segments")

return [
RecordingSegmentStorageMeta(
project_id=project_id,
replay_id=replay_id,
segment_id=item["segment_id"],
retention_days=item["retention_days"],
date_added=datetime.fromisoformat(item["timestamp"]),
file_id=None,
)
for item in response["data"]
]
return [segment_row_to_storage_meta(project_id, replay_id, item) for item in response["data"]]


def segment_row_to_storage_meta(
project_id: int,
replay_id: str,
row,
) -> RecordingSegmentStorageMeta:
return RecordingSegmentStorageMeta(
project_id=project_id,
replay_id=replay_id,
segment_id=row["segment_id"],
retention_days=row["retention_days"],
date_added=datetime.fromisoformat(row["timestamp"]),
file_id=None,
)


# BLOB DOWNLOAD BEHAVIOR.
Expand Down
67 changes: 67 additions & 0 deletions src/sentry/replays/usecases/segment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from __future__ import annotations

from datetime import datetime, timedelta
from typing import Any, Mapping

from snuba_sdk import Column, Condition, Entity, Granularity, Op, Query, Request
from snuba_sdk.orderby import Direction, OrderBy

from sentry.replays.constants import REPLAY_DURATION_HOURS
from sentry.utils.snuba import raw_snql_query


def query_segment_storage_meta_by_timestamp(
organization_id: int,
project_id: int,
replay_id: str,
timestamp: int,
) -> Mapping[str, Any]:
# These timestamps do not specify a timezone. They are presumed UTC but we can not verify.
# Since these times originate from the same place it is enough to treat them relatively. We
# ignore the issue of timezone and utilize the replay's internal consistency.
#
# This means calls to the server's internal clock are prohibited.
end = datetime.fromtimestamp(timestamp + 1)

# For safety look back one additional hour.
start = end - timedelta(hours=REPLAY_DURATION_HOURS + 1)

query = query_storage_meta()
query = query.set_where(
[
Condition(Column("project_id"), Op.EQ, project_id),
Condition(Column("replay_id"), Op.EQ, replay_id),
Condition(Column("segment_id"), Op.IS_NOT_NULL),
Condition(Column("timestamp"), Op.GTE, start),
Condition(Column("timestamp"), Op.LT, end),
]
)

results = raw_snql_query(
Request(
dataset="replays",
app_id="replay-backend-web",
query=query,
tenant_ids={"organization_id": organization_id},
),
referrer="replays.query.query_segment_from_timestamp",
# There's a very low probability two users will query the same timestamp on the same
# replay. Unless there's some accessibility url sharing behavior in the product. Until
# then this is disabled to save us from a request to redis.
use_cache=False,
)

items = results["data"]
if len(items) == 0:
raise ValueError("No segment could be found for timestamp.")

return results


def query_storage_meta() -> Query:
return Query(
match=Entity("replays"),
select=[Column("segment_id"), Column("retention_days"), Column("timestamp")],
orderby=[OrderBy(Column("segment_id"), Direction.ASC)],
granularity=Granularity(3600),
)
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
with self.feature(REPLAYS_FEATURES):
response = self.client.get(self.url)

assert request_accessibility_issues.called

Check failure on line 73 in tests/sentry/replays/test_project_replay_accessibility_issues.py

View workflow job for this annotation

GitHub Actions / backend test (8, 14)

OrganizationReplayDetailsTest.test_get_replay_accessibility_issues AssertionError: assert False + where False = <MagicMock name='request_accessibility_issues' id='139669892762544'>.called
assert response.status_code == 200
assert response.has_header("X-Hits")

Expand All @@ -86,3 +86,83 @@
assert "alternatives" in response_data["data"][0]["elements"][0]
assert "element" in response_data["data"][0]["elements"][0]
assert "target" in response_data["data"][0]["elements"][0]

@patch(
"sentry.replays.endpoints.project_replay_accessibility_issues.request_accessibility_issues"
)
def test_get_replay_accessibility_issues_by_timestamp(self, request_accessibility_issues):
request_accessibility_issues.return_value = {
"meta": {"total": 1},
"data": [
{
"elements": [
{
"alternatives": [{"id": "button-has-visible-text", "message": "m"}],
"element": '<button class="svelte-19ke1iv">',
"target": ["button:nth-child(1)"],
}
],
"help_url": "url",
"help": "Buttons must have discernible text",
"id": "button-name",
"impact": "critical",
"timestamp": 1695967678108,
}
],
}

replay_id = self.replay_id
seq1_timestamp = datetime.datetime.now() - datetime.timedelta(seconds=10)
seq2_timestamp = datetime.datetime.now() - datetime.timedelta(seconds=5)

self.store_replays(mock_replay(seq1_timestamp, self.project.id, replay_id, segment_id=0))
self.store_replays(mock_replay(seq2_timestamp, self.project.id, replay_id, segment_id=1))

with self.feature(REPLAYS_FEATURES):
# Query at a time interval which returns both segments.
response = self.client.get(self.url + f"?timestamp={int(seq2_timestamp.timestamp())}")
assert request_accessibility_issues.called
assert len(request_accessibility_issues.call_args[0][0]) == 2
assert response.status_code == 200

@patch(
"sentry.replays.endpoints.project_replay_accessibility_issues.request_accessibility_issues"
)
def test_get_replay_accessibility_issues_by_timestamp_reduced_set(
self, request_accessibility_issues
):
request_accessibility_issues.return_value = {
"meta": {"total": 1},
"data": [
{
"elements": [
{
"alternatives": [{"id": "button-has-visible-text", "message": "m"}],
"element": '<button class="svelte-19ke1iv">',
"target": ["button:nth-child(1)"],
}
],
"help_url": "url",
"help": "Buttons must have discernible text",
"id": "button-name",
"impact": "critical",
"timestamp": 1695967678108,
}
],
}

replay_id = self.replay_id
seq1_timestamp = datetime.datetime.now() - datetime.timedelta(seconds=10)
seq2_timestamp = datetime.datetime.now() - datetime.timedelta(seconds=5)

self.store_replays(mock_replay(seq1_timestamp, self.project.id, replay_id, segment_id=0))
self.store_replays(mock_replay(seq2_timestamp, self.project.id, replay_id, segment_id=1))

with self.feature(REPLAYS_FEATURES):
# Query at a time interval which returns only the first segment.
response = self.client.get(
self.url + f"?timestamp={int(seq2_timestamp.timestamp()) - 1}"
)
assert request_accessibility_issues.called
assert len(request_accessibility_issues.call_args[0][0]) == 1
assert response.status_code == 200
Loading