Skip to content

Commit

Permalink
feat(replays): switch to replay_id column from tag (#68950)
Browse files Browse the repository at this point in the history
We want to have the replay_id column be queried on instead of the tag
that we previously had, as we no longer are populating the tag for non
javascript platforms. Depends on
getsentry/snuba#5777.
  • Loading branch information
JoshFerge authored Apr 22, 2024
1 parent f69663c commit 6db38a2
Show file tree
Hide file tree
Showing 8 changed files with 62 additions and 83 deletions.
2 changes: 1 addition & 1 deletion src/sentry/models/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ class EventOrdering(Enum):
LATEST = ["-timestamp", "-event_id"]
OLDEST = ["timestamp", "event_id"]
MOST_HELPFUL = [
"-replayId",
"-replay.id",
"-profile.id",
"num_processing_errors",
"-trace.sampled",
Expand Down
9 changes: 0 additions & 9 deletions src/sentry/replays/endpoints/organization_replay_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,6 @@
from sentry.snuba.dataset import Dataset
from sentry.types.ratelimit import RateLimit, RateLimitCategory

MAX_REPLAY_COUNT = 51
MAX_VALS_PROVIDED = {
"issue.id": 25,
"transaction": 25,
"replay_id": 100,
}

FILTER_HAS_A_REPLAY = "AND !replayId:''"


class ReplayDataSourceValidator(serializers.Serializer):
data_source = serializers.ChoiceField(
Expand Down
31 changes: 16 additions & 15 deletions src/sentry/replays/usecases/replay_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from sentry.api.event_search import ParenExpression, SearchFilter, parse_search_query
from sentry.models.group import Group
from sentry.replays.query import query_replays_count
from sentry.search.events.builder import QueryBuilder
from sentry.search.events.types import QueryBuilderConfig, SnubaParams
from sentry.search.events.types import SnubaParams
from sentry.snuba import discover, issue_platform
from sentry.snuba.dataset import Dataset

MAX_REPLAY_COUNT = 51
Expand All @@ -20,7 +20,7 @@
"replay_id": 100,
}

FILTER_HAS_A_REPLAY = " AND !replayId:''"
FILTER_HAS_A_REPLAY = ' AND !replay.id:""'


def get_replay_counts(snuba_params: SnubaParams, query, return_ids, data_source) -> dict[str, Any]:
Expand All @@ -30,6 +30,7 @@ def get_replay_counts(snuba_params: SnubaParams, query, return_ids, data_source)
- If the identifier is 'replay_id', the returned count is always 1. Use this to check the existence of replay_ids
- Set the flag 'return_ids' to get the replay_ids (32 char hex strings) for each identifier
"""

if snuba_params.start is None or snuba_params.end is None or snuba_params.organization is None:
raise ValueError("Must provide start and end")

Expand Down Expand Up @@ -62,6 +63,12 @@ def _get_replay_id_mappings(
If select_column is replay_id, return an identity map of replay_id -> [replay_id].
The keys of the returned dict are UUIDs, represented as 32 char hex strings (all '-'s stripped)
"""

if data_source == Dataset.Discover:
search_query_func = discover.query
elif data_source == Dataset.IssuePlatform:
search_query_func = issue_platform.query # type: ignore[assignment]

select_column, column_value = _get_select_column(query)
query = query + FILTER_HAS_A_REPLAY if data_source == Dataset.Discover else query

Expand Down Expand Up @@ -95,27 +102,21 @@ def _get_replay_id_mappings(
projects=[group.project for group in groups],
)

builder = QueryBuilder(
dataset=data_source,
results = search_query_func(
params={},
snuba_params=snuba_params,
selected_columns=["group_uniq_array(100,replayId)", select_column],
selected_columns=["group_uniq_array(100,replay.id)", select_column],
query=query,
limit=25,
offset=0,
config=QueryBuilderConfig(
functions_acl=["group_uniq_array"],
),
)

discover_results = builder.run_query(
referrer="api.organization-issue-replay-count", use_cache=True
functions_acl=["group_uniq_array"],
referrer="api.organization-issue-replay-count",
)

replay_id_to_issue_map = defaultdict(list)

for row in discover_results["data"]:
for replay_id in row["group_uniq_array_100_replayId"]:
for row in results["data"]:
for replay_id in row["group_uniq_array_100_replay_id"]:
# When no replay exists these strings are provided in their empty
# state rather than null. This can cause downstream problems so
# we filter them out.
Expand Down
7 changes: 3 additions & 4 deletions src/sentry/search/events/builder/discover.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,13 +307,11 @@ def resolve_column_name(self, col: str) -> str:
# TODO when utils/snuba.py becomes typed don't need this extra annotation
column_resolver: Callable[[str], str] = resolve_column(self.dataset)
column_name = column_resolver(col)

# If the original column was passed in as tag[X], then there won't be a conflict
# and there's no need to prefix the tag
if not col.startswith("tags[") and column_name.startswith("tags["):
self.prefixed_to_tag_map[f"tags_{col}"] = col
self.tag_to_prefixed_map[col] = f"tags_{col}"

return column_name

def resolve_query(
Expand Down Expand Up @@ -1091,7 +1089,6 @@ def aliased_column(self, name: str) -> SelectType:
:param name: The unresolved sentry name.
:param alias: The expected alias in the result.
"""

# TODO: This method should use an aliased column from the SDK once
# that is available to skip these hacks that we currently have to
# do aliasing.
Expand Down Expand Up @@ -1578,14 +1575,16 @@ def default_filter_converter(
raise InvalidSearchQuery(INVALID_SPAN_ID.format(name))

# Validate event ids, trace ids, and profile ids are uuids
if name in {"id", "trace", "profile.id"}:
if name in {"id", "trace", "profile.id", "replay.id"}:
if search_filter.value.is_wildcard():
raise InvalidSearchQuery(WILDCARD_NOT_ALLOWED.format(name))
elif not search_filter.value.is_event_id():
if name == "trace":
label = "Filter Trace ID"
elif name == "profile.id":
label = "Filter Profile ID"
elif name == "replay.id":
label = "Filter Replay ID"
else:
label = "Filter ID"
raise InvalidSearchQuery(INVALID_ID_DETAILS.format(label))
Expand Down
16 changes: 14 additions & 2 deletions src/sentry/snuba/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,8 +742,20 @@ class Columns(Enum):
REPLAY_ID = Column(
group_name=None,
event_name="replay_id",
transaction_name=None,
discover_name=None,
transaction_name="replay_id",
discover_name="replay_id",
issue_platform_name="replay_id",
alias="replay.id",
)
# We used to set the replay_id as a tag on error events as
# replayId. We allow this query for backwards compatibility,
# but in the future shouldn't be displayed in the UI anywhere
# as a suggested column.
REPLAY_ID_DEPRECATED = Column(
group_name=None,
event_name="replay_id",
transaction_name="replay_id",
discover_name="replay_id",
issue_platform_name="replay_id",
alias="replayId",
)
Expand Down
5 changes: 3 additions & 2 deletions src/sentry/utils/snuba.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,12 +144,13 @@ def log_snuba_info(content):
"category": "sentry_tags[category]",
"span.category": "sentry_tags[category]",
"span.status_code": "sentry_tags[status_code]",
"replay_id": "sentry_tags[replay_id]",
"replay.id": "sentry_tags[replay_id]",
"resource.render_blocking_status": "sentry_tags[resource.render_blocking_status]",
"http.response_content_length": "sentry_tags[http.response_content_length]",
"http.decoded_response_content_length": "sentry_tags[http.decoded_response_content_length]",
"http.response_transfer_size": "sentry_tags[http.response_transfer_size]",
"app_start_type": "sentry_tags[app_start_type]",
"replay.id": "sentry_tags[replay_id]",
"browser.name": "sentry_tags[browser.name]",
"origin.transaction": "sentry_tags[transaction]",
"is_transaction": "is_segment",
Expand Down Expand Up @@ -1225,6 +1226,7 @@ def _resolve_column(col):

# Some dataset specific logic:
if dataset == Dataset.Discover:

if isinstance(col, (list, tuple)) or col in ("project_id", "group_id"):
return col
elif (
Expand All @@ -1249,7 +1251,6 @@ def _resolve_column(col):
span_op_breakdown_name = get_span_op_breakdown_name(col)
if "span_op_breakdowns_key" in DATASETS[dataset] and span_op_breakdown_name:
return f"span_op_breakdowns[{span_op_breakdown_name}]"

return f"tags[{col}]"

return _resolve_column
Expand Down
66 changes: 19 additions & 47 deletions tests/sentry/replays/test_organization_replay_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@ class OrganizationReplayCountEndpointTest(
def setUp(self):
super().setUp()
self.project.update(flags=F("flags").bitor(Project.flags.has_replays))
self.min_ago = before_now(minutes=1)
self.min_ago = before_now(minutes=2)
self.login_as(user=self.user)
self.url = reverse(
"sentry-api-0-organization-replay-count",
kwargs={"organization_slug": self.project.organization.slug},
)
self.features = {"organizations:session-replay": True}

def test_simple(self):
def test_simple_b(self):
event_id_a = "a" * 32
event_id_b = "b" * 32
replay1_id = uuid.uuid4().hex
Expand Down Expand Up @@ -70,7 +70,7 @@ def test_simple(self):
data={
"event_id": event_id_a,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay1_id},
"contexts": {"replay": {"replay_id": replay1_id}},
"fingerprint": ["group-1"],
},
project_id=self.project.id,
Expand All @@ -79,7 +79,7 @@ def test_simple(self):
data={
"event_id": uuid.uuid4().hex,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay2_id},
"contexts": {"replay": {"replay_id": replay2_id}},
"fingerprint": ["group-1"],
},
project_id=self.project.id,
Expand All @@ -88,7 +88,9 @@ def test_simple(self):
data={
"event_id": event_id_b,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": "z" * 32}, # a replay id that doesn't exist
"contexts": {
"replay": {"replay_id": uuid.uuid4().hex}
}, # a replay id that doesn't exist
"fingerprint": ["group-1"],
},
project_id=self.project.id,
Expand All @@ -97,14 +99,15 @@ def test_simple(self):
data={
"event_id": event_id_b,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay3_id},
"contexts": {"replay": {"replay_id": replay3_id}},
"fingerprint": ["group-2"],
},
project_id=self.project.id,
)

query = {"query": f"(issue.id:[{event_a.group.id}, {event_c.group.id}] or abc)"}
with self.feature(self.features):

response = self.client.get(self.url, query, format="json")

expected = {
Expand Down Expand Up @@ -146,7 +149,7 @@ def test_simple_return_ids(self):
data={
"event_id": event_id_a,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay1_id},
"contexts": {"replay": {"replay_id": replay1_id}},
"fingerprint": ["group-1"],
},
project_id=self.project.id,
Expand All @@ -155,7 +158,7 @@ def test_simple_return_ids(self):
data={
"event_id": uuid.uuid4().hex,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay2_id},
"contexts": {"replay": {"replay_id": replay2_id}},
"fingerprint": ["group-1"],
},
project_id=self.project.id,
Expand All @@ -164,7 +167,7 @@ def test_simple_return_ids(self):
data={
"event_id": event_id_b,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": "z" * 32}, # a replay id that doesn't exist
"contexts": {"replay": {"replay_id": uuid.uuid4().hex}},
"fingerprint": ["group-1"],
},
project_id=self.project.id,
Expand All @@ -173,7 +176,7 @@ def test_simple_return_ids(self):
data={
"event_id": event_id_b,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay3_id},
"contexts": {"replay": {"replay_id": replay3_id}},
"fingerprint": ["group-2"],
},
project_id=self.project.id,
Expand Down Expand Up @@ -315,7 +318,7 @@ def test_one_replay_multiple_issues(self):
data={
"event_id": event_id_a,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay1_id},
"contexts": {"replay": {"replay_id": replay1_id}},
"fingerprint": ["group-1"],
},
project_id=self.project.id,
Expand All @@ -324,7 +327,7 @@ def test_one_replay_multiple_issues(self):
data={
"event_id": event_id_b,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay1_id},
"contexts": {"replay": {"replay_id": replay1_id}},
"fingerprint": ["group-2"],
},
project_id=self.project.id,
Expand Down Expand Up @@ -357,7 +360,7 @@ def test_one_replay_same_issue_twice(self):
data={
"event_id": event_id_a,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay1_id},
"contexts": {"replay": {"replay_id": replay1_id}},
"fingerprint": ["group-1"],
},
project_id=self.project.id,
Expand All @@ -366,7 +369,7 @@ def test_one_replay_same_issue_twice(self):
data={
"event_id": event_id_b,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay1_id},
"contexts": {"replay": {"replay_id": replay1_id}},
"fingerprint": ["group-1"],
},
project_id=self.project.id,
Expand Down Expand Up @@ -397,7 +400,7 @@ def test_simple_transaction(self):
data={
"event_id": event_id_a,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay1_id},
"contexts": {"replay": {"replay_id": replay1_id}},
"transaction": "t-1",
},
project_id=self.project.id,
Expand All @@ -413,36 +416,6 @@ def test_simple_transaction(self):
assert response.status_code == 200, response.content
assert response.data == expected

def test_max_51(self):
replay_ids = [uuid.uuid4().hex for _ in range(100)]
for replay_id in replay_ids:
self.store_replays(
mock_replay(
datetime.datetime.now() - datetime.timedelta(seconds=22),
self.project.id,
replay_id,
)
)
event_a = self.store_event(
data={
"event_id": uuid.uuid4().hex,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay_id},
"fingerprint": ["group-1"],
},
project_id=self.project.id,
)

query = {"query": f"issue.id:[{event_a.group.id}]"}
with self.feature(self.features):
response = self.client.get(self.url, query, format="json")

expected = {
event_a.group.id: 51,
}
assert response.status_code == 200, response.content
assert response.data == expected

def test_invalid_params_need_one_issue_id(self):
query = {"query": ""}
with self.feature(self.features):
Expand Down Expand Up @@ -580,7 +553,7 @@ def test_cross_organization_lookups(self):
data={
"event_id": event_id_a,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay1_id},
"contexts": {"replay": {"replay_id": replay1_id}},
"fingerprint": ["group-1"],
},
project_id=self.project.id,
Expand All @@ -607,7 +580,6 @@ def test_cross_organization_lookups(self):
data={
"event_id": event_id_b,
"timestamp": iso_format(self.min_ago),
"tags": {"replayId": replay2_id},
"fingerprint": ["group-2"],
},
project_id=project.id,
Expand Down
Loading

0 comments on commit 6db38a2

Please sign in to comment.