Skip to content

Commit

Permalink
feat(replays): Accept timestamp parameter when calculating accessibil…
Browse files Browse the repository at this point in the history
…ity issues (#60983)

Allows point in time accessibility analysis.

Epic: #55501
  • Loading branch information
cmanallen committed Dec 4, 2023
1 parent c82fb30 commit 8b63a74
Show file tree
Hide file tree
Showing 6 changed files with 208 additions and 16 deletions.
4 changes: 4 additions & 0 deletions src/sentry/replays/blueprints/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,10 @@ This resource does not accept any URI parameters and is not paginated. Responses

### Fetch Replay Accessibility Issues [GET]

- Parameters

- timestamp (optional, number) - A UNIX timestamp (seconds since epoch) marking the last moment to render a replay for accessibility analysis.

Retrieve a collection of accessibility issues.

**Attributes**
Expand Down
1 change: 1 addition & 0 deletions src/sentry/replays/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
REPLAY_DURATION_HOURS = 1
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@
from sentry.api.bases.project import ProjectEndpoint
from sentry.models.project import Project
from sentry.replays.lib.storage import make_filename
from sentry.replays.usecases.reader import fetch_direct_storage_segments_meta
from sentry.replays.usecases.reader import (
fetch_direct_storage_segments_meta,
segment_row_to_storage_meta,
)
from sentry.replays.usecases.segment import query_segment_storage_meta_by_timestamp
from sentry.types.ratelimit import RateLimit, RateLimitCategory
from sentry.utils.cursors import Cursor, CursorResult

Expand Down Expand Up @@ -67,13 +71,44 @@ def get(self, request: Request, project: Project, replay_id: str) -> Response:
except ValueError:
return Response(status=404)

timestamp_param = request.GET.get("timestamp", None)
if timestamp_param is None:
timestamp = None
else:
try:
timestamp = int(timestamp_param)
except TypeError:
timestamp = None
except ValueError:
raise ParseError("Invalid timestamp value specified.")

def data_fn(offset, limit):
# Increment a counter for every call to the accessibility service.
metrics.incr("session-replay-accessibility-issues-count")

# We only support direct-storage. Filestore is deprecated and should be removed from
# the driver.
segments = fetch_direct_storage_segments_meta(project.id, replay_id, offset, limit)
# We only support direct-storage. Filestore is deprecated and should be removed
# from the driver.
if timestamp is None:
# If no timestamp is provided we render 5 segments by convention.
segments = fetch_direct_storage_segments_meta(
project.id,
replay_id,
offset,
limit=5,
)
else:
# If a timestamp was provided we fetch every segment that started prior to the
# timestamp value.
results = query_segment_storage_meta_by_timestamp(
project.organization.id,
project.id,
replay_id,
timestamp,
)
segments = [
segment_row_to_storage_meta(project.id, replay_id, row)
for row in results["data"]
]

# Make a POST request to the replay-analyzer service. The files will be downloaded
# and evaluated on the remote system. The accessibility output is then redirected to
Expand Down
27 changes: 16 additions & 11 deletions src/sentry/replays/usecases/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,17 +226,22 @@ def _fetch_segments_from_snuba(
)
response = raw_snql_query(snuba_request, "replays.query.download_replay_segments")

return [
RecordingSegmentStorageMeta(
project_id=project_id,
replay_id=replay_id,
segment_id=item["segment_id"],
retention_days=item["retention_days"],
date_added=datetime.fromisoformat(item["timestamp"]),
file_id=None,
)
for item in response["data"]
]
return [segment_row_to_storage_meta(project_id, replay_id, item) for item in response["data"]]


def segment_row_to_storage_meta(
project_id: int,
replay_id: str,
row,
) -> RecordingSegmentStorageMeta:
return RecordingSegmentStorageMeta(
project_id=project_id,
replay_id=replay_id,
segment_id=row["segment_id"],
retention_days=row["retention_days"],
date_added=datetime.fromisoformat(row["timestamp"]),
file_id=None,
)


# BLOB DOWNLOAD BEHAVIOR.
Expand Down
67 changes: 67 additions & 0 deletions src/sentry/replays/usecases/segment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from __future__ import annotations

from datetime import datetime, timedelta
from typing import Any, Mapping

from snuba_sdk import Column, Condition, Entity, Granularity, Op, Query, Request
from snuba_sdk.orderby import Direction, OrderBy

from sentry.replays.constants import REPLAY_DURATION_HOURS
from sentry.utils.snuba import raw_snql_query


def query_segment_storage_meta_by_timestamp(
organization_id: int,
project_id: int,
replay_id: str,
timestamp: int,
) -> Mapping[str, Any]:
# These timestamps do not specify a timezone. They are presumed UTC but we can not verify.
# Since these times originate from the same place it is enough to treat them relatively. We
# ignore the issue of timezone and utilize the replay's internal consistency.
#
# This means calls to the server's internal clock are prohibited.
end = datetime.fromtimestamp(timestamp + 1)

# For safety look back one additional hour.
start = end - timedelta(hours=REPLAY_DURATION_HOURS + 1)

query = query_storage_meta()
query = query.set_where(
[
Condition(Column("project_id"), Op.EQ, project_id),
Condition(Column("replay_id"), Op.EQ, replay_id),
Condition(Column("segment_id"), Op.IS_NOT_NULL),
Condition(Column("timestamp"), Op.GTE, start),
Condition(Column("timestamp"), Op.LT, end),
]
)

results = raw_snql_query(
Request(
dataset="replays",
app_id="replay-backend-web",
query=query,
tenant_ids={"organization_id": organization_id},
),
referrer="replays.query.query_segment_from_timestamp",
# There's a very low probability two users will query the same timestamp on the same
# replay. Unless there's some accessibility url sharing behavior in the product. Until
# then this is disabled to save us from a request to redis.
use_cache=False,
)

items = results["data"]
if len(items) == 0:
raise ValueError("No segment could be found for timestamp.")

return results


def query_storage_meta() -> Query:
return Query(
match=Entity("replays"),
select=[Column("segment_id"), Column("retention_days"), Column("timestamp")],
orderby=[OrderBy(Column("segment_id"), Direction.ASC)],
granularity=Granularity(3600),
)
82 changes: 81 additions & 1 deletion tests/sentry/replays/test_project_replay_accessibility_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ def test_get_replay_accessibility_issues(self, request_accessibility_issues):
with self.feature(REPLAYS_FEATURES):
response = self.client.get(self.url)

assert request_accessibility_issues.called
assert response.status_code == 200
assert response.has_header("X-Hits")
assert request_accessibility_issues.called

response_data = response.json()
assert len(response_data["data"]) == 1
Expand All @@ -86,3 +86,83 @@ def test_get_replay_accessibility_issues(self, request_accessibility_issues):
assert "alternatives" in response_data["data"][0]["elements"][0]
assert "element" in response_data["data"][0]["elements"][0]
assert "target" in response_data["data"][0]["elements"][0]

@patch(
"sentry.replays.endpoints.project_replay_accessibility_issues.request_accessibility_issues"
)
def test_get_replay_accessibility_issues_by_timestamp(self, request_accessibility_issues):
request_accessibility_issues.return_value = {
"meta": {"total": 1},
"data": [
{
"elements": [
{
"alternatives": [{"id": "button-has-visible-text", "message": "m"}],
"element": '<button class="svelte-19ke1iv">',
"target": ["button:nth-child(1)"],
}
],
"help_url": "url",
"help": "Buttons must have discernible text",
"id": "button-name",
"impact": "critical",
"timestamp": 1695967678108,
}
],
}

replay_id = self.replay_id
seq1_timestamp = datetime.datetime.now() - datetime.timedelta(seconds=10)
seq2_timestamp = datetime.datetime.now() - datetime.timedelta(seconds=5)

self.store_replays(mock_replay(seq1_timestamp, self.project.id, replay_id, segment_id=0))
self.store_replays(mock_replay(seq2_timestamp, self.project.id, replay_id, segment_id=1))

with self.feature(REPLAYS_FEATURES):
# Query at a time interval which returns both segments.
response = self.client.get(self.url + f"?timestamp={int(seq2_timestamp.timestamp())}")
assert request_accessibility_issues.called
assert len(request_accessibility_issues.call_args[0][0]) == 2
assert response.status_code == 200

@patch(
"sentry.replays.endpoints.project_replay_accessibility_issues.request_accessibility_issues"
)
def test_get_replay_accessibility_issues_by_timestamp_reduced_set(
self, request_accessibility_issues
):
request_accessibility_issues.return_value = {
"meta": {"total": 1},
"data": [
{
"elements": [
{
"alternatives": [{"id": "button-has-visible-text", "message": "m"}],
"element": '<button class="svelte-19ke1iv">',
"target": ["button:nth-child(1)"],
}
],
"help_url": "url",
"help": "Buttons must have discernible text",
"id": "button-name",
"impact": "critical",
"timestamp": 1695967678108,
}
],
}

replay_id = self.replay_id
seq1_timestamp = datetime.datetime.now() - datetime.timedelta(seconds=10)
seq2_timestamp = datetime.datetime.now() - datetime.timedelta(seconds=5)

self.store_replays(mock_replay(seq1_timestamp, self.project.id, replay_id, segment_id=0))
self.store_replays(mock_replay(seq2_timestamp, self.project.id, replay_id, segment_id=1))

with self.feature(REPLAYS_FEATURES):
# Query at a time interval which returns only the first segment.
response = self.client.get(
self.url + f"?timestamp={int(seq2_timestamp.timestamp()) - 1}"
)
assert request_accessibility_issues.called
assert len(request_accessibility_issues.call_args[0][0]) == 1
assert response.status_code == 200

0 comments on commit 8b63a74

Please sign in to comment.