Skip to content

Commit

Permalink
feat(spans): Fix requests table span indexed perf (#76604)
Browse files Browse the repository at this point in the history
### Summary
This changes the condition so that 'span.module:http' on the requests
table (and any other request that filters on a known category) uses a
condition that clickhouse can recognize in order to do indexed span
lookups, instead of the existing transform which misses the index.
Should be multiple times faster after this change.

#### SnQL
This simplifies the filtering when using known (non "Other") categories
with `span.module:`

**Query before**
```
'query': 'MATCH (spans) SELECT sentry_tags[status_code] AS '
          '`span.status_code`, op AS `span.op`, span_id AS `id`, project_id AS '
          "`project.name` WHERE if(in(op, array('db.redis', 'db.sql.room')), "
          "transform(op, array('db.redis', 'db.sql.room'), array('cache', "
          "'other'), 'other'), transform(sentry_tags[category], array('cache', "
          "'db', 'http', 'queue', 'resource'), array('cache', 'db', 'http', "
          "'queue', 'resource'), 'other')) AS `span.module` = 'other' AND "
          "ifNull(sentry_tags[status_code], '') = '200' AND timestamp >= "
          "toDateTime('2024-05-29T15:03:43.573039') AND timestamp < "
          "toDateTime('2024-08-27T15:03:43.573039') AND project_id IN "
          'array(4554597948391425) LIMIT 101 OFFSET 0
```

**Query after**
```
 'query': 'MATCH (spans) SELECT sentry_tags[status_code] AS '
          '`span.status_code`, op AS `span.op`, span_id AS `id`, project_id AS '
          "`project.name` WHERE sentry_tags[category] = 'http' AND "
          "ifNull(sentry_tags[status_code], '') = '200' AND timestamp >= "
          "toDateTime('2024-05-29T15:06:35.906113') AND timestamp < "
          "toDateTime('2024-08-27T15:06:35.906113') AND project_id IN "
          'array(4554597959663617) LIMIT 101 OFFSET 0',
```
  • Loading branch information
k-fish authored Aug 27, 2024
1 parent 9bc349c commit d1d3a32
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 14 deletions.
2 changes: 2 additions & 0 deletions src/sentry/search/events/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,8 @@ class ThresholdDict(TypedDict):

METRICS_FUNCTION_ALIASES: dict[str, str] = {}

SPAN_MODULE_CATEGORY_VALUES = ["cache", "db", "http", "queue", "resource"]

SPAN_FUNCTION_ALIASES = {
"sps": "eps",
"spm": "epm",
Expand Down
16 changes: 2 additions & 14 deletions src/sentry/search/events/datasets/field_aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,20 +107,8 @@ def resolve_span_module(builder: BaseQueryBuilder, alias: str) -> SelectType:
"transform",
[
builder.column("span.category"),
[
"cache",
"db",
"http",
"queue",
"resource",
],
[
"cache",
"db",
"http",
"queue",
"resource",
],
constants.SPAN_MODULE_CATEGORY_VALUES,
constants.SPAN_MODULE_CATEGORY_VALUES,
"other",
],
),
Expand Down
14 changes: 14 additions & 0 deletions src/sentry/search/events/datasets/filter_aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,20 @@ def lowercase_search(builder: BaseQueryBuilder, search_filter: SearchFilter) ->
)


def span_module_filter_converter(
builder: BaseQueryBuilder, search_filter: SearchFilter
) -> WhereType | None:
module_value = search_filter.value.raw_value.lower()

if module_value != "cache" and module_value in constants.SPAN_MODULE_CATEGORY_VALUES:
# Creating the condition this way hits the tags index for span_module if using an actual value
# "Other" doesn't work for filtering in this way so we fall back to the transform of the span module field.
# "Cache" may be mapped on a per span basis, so it can not skip the transform and hit the index.
return Condition(builder.resolve_field("sentry_tags[category]"), Op.EQ, module_value)

return builder.default_filter_converter(search_filter)


def span_status_filter_converter(
builder: BaseQueryBuilder, search_filter: SearchFilter
) -> WhereType | None:
Expand Down
3 changes: 3 additions & 0 deletions src/sentry/search/events/datasets/spans_indexed.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ def search_filter_converter(
constants.SPAN_OP: lambda search_filter: filter_aliases.lowercase_search(
self.builder, search_filter
),
constants.SPAN_MODULE_ALIAS: lambda search_filter: filter_aliases.span_module_filter_converter(
self.builder, search_filter
),
constants.SPAN_STATUS: lambda search_filter: filter_aliases.span_status_filter_converter(
self.builder, search_filter
),
Expand Down
13 changes: 13 additions & 0 deletions tests/sentry/search/events/builder/test_spans_indexed.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,3 +496,16 @@ def test_id_column_permit_in_operator(params, column, query, operator):
or nullable_condition in builder.where
or non_nullable_condition in builder.where
)


@django_db_all
def test_span_module_optimization_where_clause(params):
builder = SpansIndexedQueryBuilder(
Dataset.SpansIndexed,
params,
query="span.module:http",
selected_columns=["count"],
)

condition = Condition(builder.resolve_field("sentry_tags[category]"), Op.EQ, "http")
assert condition in builder.where
40 changes: 40 additions & 0 deletions tests/snuba/api/endpoints/test_organization_events_span_indexed.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,46 @@ def test_network_span(self):
assert data[0]["span.status_code"] == "200"
assert meta["dataset"] == self.dataset

def test_other_category_span(self):
self.store_spans(
[
self.create_span(
{
"sentry_tags": {
"action": "GET",
"category": "alternative",
"description": "GET https://*.resource.com",
"domain": "*.resource.com",
"op": "alternative",
"status_code": "200",
"transaction": "/api/0/data/",
"transaction.method": "GET",
"transaction.op": "http.server",
}
},
start_ts=self.ten_mins_ago,
),
],
is_eap=self.is_eap,
)

response = self.do_request(
{
"field": ["span.op", "span.status_code"],
"query": "span.module:other span.status_code:200",
"project": self.project.id,
"dataset": self.dataset,
}
)

assert response.status_code == 200, response.content
data = response.data["data"]
meta = response.data["meta"]
assert len(data) == 1
assert data[0]["span.op"] == "alternative"
assert data[0]["span.status_code"] == "200"
assert meta["dataset"] == self.dataset

def test_inp_span(self):
replay_id = uuid.uuid4().hex
self.store_spans(
Expand Down

0 comments on commit d1d3a32

Please sign in to comment.