From 7dc1ecab8964998aa704183582d35fb2ab9b7e18 Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Thu, 19 Sep 2024 16:43:51 -0400 Subject: [PATCH] feat(eap): prefix all sentry_tags items with sentry. --- snuba/web/rpc/common.py | 52 ++++++++++++++++------------- tests/web/rpc/test_common.py | 25 +++++++++----- tests/web/rpc/test_span_samples.py | 53 ++++++++++++++++++------------ 3 files changed, 79 insertions(+), 51 deletions(-) diff --git a/snuba/web/rpc/common.py b/snuba/web/rpc/common.py index 04ea50857a..3a32426e61 100644 --- a/snuba/web/rpc/common.py +++ b/snuba/web/rpc/common.py @@ -66,25 +66,29 @@ def transform(exp: Expression) -> Expression: # These are the columns which aren't stored in attr_str_ nor attr_num_ in clickhouse NORMALIZED_COLUMNS: Final[Mapping[str, AttributeKey.Type.ValueType]] = { - "organization_id": AttributeKey.Type.TYPE_INT, - "project_id": AttributeKey.Type.TYPE_INT, - "service": AttributeKey.Type.TYPE_STRING, - "span_id": AttributeKey.Type.TYPE_STRING, # this is converted by a processor on the storage - "parent_span_id": AttributeKey.Type.TYPE_STRING, # this is converted by a processor on the storage - "segment_id": AttributeKey.Type.TYPE_STRING, # this is converted by a processor on the storage - "segment_name": AttributeKey.Type.TYPE_STRING, - "is_segment": AttributeKey.Type.TYPE_BOOLEAN, - "duration_ms": AttributeKey.Type.TYPE_INT, - "exclusive_time_ms": AttributeKey.Type.TYPE_INT, - "retention_days": AttributeKey.Type.TYPE_INT, - "name": AttributeKey.Type.TYPE_STRING, - "sample_weight": AttributeKey.Type.TYPE_FLOAT, - "timestamp": AttributeKey.Type.TYPE_UNSPECIFIED, - "start_timestamp": AttributeKey.Type.TYPE_UNSPECIFIED, - "end_timestamp": AttributeKey.Type.TYPE_UNSPECIFIED, + "sentry.organization_id": AttributeKey.Type.TYPE_INT, + "sentry.project_id": AttributeKey.Type.TYPE_INT, + "sentry.service": AttributeKey.Type.TYPE_STRING, + "sentry.span_id": AttributeKey.Type.TYPE_STRING, # this is converted by a processor on the storage + "sentry.parent_span_id": AttributeKey.Type.TYPE_STRING, # this is converted by a processor on the storage + "sentry.segment_id": AttributeKey.Type.TYPE_STRING, # this is converted by a processor on the storage + "sentry.segment_name": AttributeKey.Type.TYPE_STRING, + "sentry.is_segment": AttributeKey.Type.TYPE_BOOLEAN, + "sentry.duration_ms": AttributeKey.Type.TYPE_INT, + "sentry.exclusive_time_ms": AttributeKey.Type.TYPE_INT, + "sentry.retention_days": AttributeKey.Type.TYPE_INT, + "sentry.name": AttributeKey.Type.TYPE_STRING, + "sentry.sample_weight": AttributeKey.Type.TYPE_FLOAT, + "sentry.timestamp": AttributeKey.Type.TYPE_UNSPECIFIED, + "sentry.start_timestamp": AttributeKey.Type.TYPE_UNSPECIFIED, + "sentry.end_timestamp": AttributeKey.Type.TYPE_UNSPECIFIED, } -TIMESTAMP_COLUMNS: Final[Set[str]] = {"timestamp", "start_timestamp", "end_timestamp"} +TIMESTAMP_COLUMNS: Final[Set[str]] = { + "sentry.timestamp", + "sentry.start_timestamp", + "sentry.end_timestamp", +} def attribute_key_to_expression(attr_key: AttributeKey) -> Expression: @@ -94,7 +98,7 @@ def attribute_key_to_expression(attr_key: AttributeKey) -> Expression: ) alias = attr_key.name - if attr_key.name == "trace_id": + if attr_key.name == "sentry.trace_id": if attr_key.type == AttributeKey.Type.TYPE_STRING: return f.CAST(column("trace_id"), "String", alias=alias) raise BadSnubaRPCRequestException( @@ -103,18 +107,22 @@ def attribute_key_to_expression(attr_key: AttributeKey) -> Expression: if attr_key.name in TIMESTAMP_COLUMNS: if attr_key.type == AttributeKey.Type.TYPE_STRING: - return f.CAST(column(attr_key.name), "String", alias=alias) + return f.CAST( + column(attr_key.name[len("sentry.") :]), "String", alias=alias + ) if attr_key.type == AttributeKey.Type.TYPE_INT: - return f.CAST(column(attr_key.name), "Int64", alias=alias) + return f.CAST(column(attr_key.name[len("sentry.") :]), "Int64", alias=alias) if attr_key.type == AttributeKey.Type.TYPE_FLOAT: - return f.CAST(column(attr_key.name), "Float64", alias=alias) + return f.CAST( + column(attr_key.name[len("sentry.") :]), "Float64", alias=alias + ) raise BadSnubaRPCRequestException( f"Attribute {attr_key.name} must be requested as a string, float, or integer, got {attr_key.type}" ) if attr_key.name in NORMALIZED_COLUMNS: if NORMALIZED_COLUMNS[attr_key.name] == attr_key.type: - return column(attr_key.name, alias=attr_key.name) + return column(attr_key.name[len("sentry.") :], alias=attr_key.name) raise BadSnubaRPCRequestException( f"Attribute {attr_key.name} must be requested as {NORMALIZED_COLUMNS[attr_key.name]}, got {attr_key.type}" ) diff --git a/tests/web/rpc/test_common.py b/tests/web/rpc/test_common.py index 2a01779109..c1ff109689 100644 --- a/tests/web/rpc/test_common.py +++ b/tests/web/rpc/test_common.py @@ -11,39 +11,48 @@ def test_expression_trace_id(self) -> None: assert attribute_key_to_expression( AttributeKey( type=AttributeKey.TYPE_STRING, - name="trace_id", + name="sentry.trace_id", ), - ) == f.CAST(column("trace_id"), "String", alias="trace_id") + ) == f.CAST(column("trace_id"), "String", alias="sentry.trace_id") def test_timestamp_columns(self) -> None: - for col in ["timestamp", "start_timestamp", "end_timestamp"]: + for col in [ + "sentry.timestamp", + "sentry.start_timestamp", + "sentry.end_timestamp", + ]: assert attribute_key_to_expression( AttributeKey( type=AttributeKey.TYPE_STRING, name=col, ), - ) == f.CAST(column(col), "String", alias=col) + ) == f.CAST(column(col[len("sentry.") :]), "String", alias=col) assert attribute_key_to_expression( AttributeKey( type=AttributeKey.TYPE_INT, name=col, ), - ) == f.CAST(column(col), "Int64", alias=col) + ) == f.CAST(column(col[len("sentry.") :]), "Int64", alias=col) assert attribute_key_to_expression( AttributeKey( type=AttributeKey.TYPE_FLOAT, name=col, ), - ) == f.CAST(column(col), "Float64", alias=col) + ) == f.CAST(column(col[len("sentry.") :]), "Float64", alias=col) def test_normalized_col(self) -> None: - for col in ["span_id", "parent_span_id", "segment_id", "service"]: + for col in [ + "sentry.span_id", + "sentry.parent_span_id", + "sentry.segment_id", + "sentry.service", + ]: assert attribute_key_to_expression( AttributeKey( type=AttributeKey.TYPE_STRING, name=col, ), - ) == column(col, alias=col) + ) == column(col[len("sentry.") :], alias=col) def test_attributes(self) -> None: assert attribute_key_to_expression( diff --git a/tests/web/rpc/test_span_samples.py b/tests/web/rpc/test_span_samples.py index 1e70c69ceb..bfa52a9f28 100644 --- a/tests/web/rpc/test_span_samples.py +++ b/tests/web/rpc/test_span_samples.py @@ -129,13 +129,13 @@ def test_basic(self) -> None: ), filter=TraceItemFilter( exists_filter=ExistsFilter( - key=AttributeKey(type=AttributeKey.TYPE_STRING, name="category") + key=AttributeKey(type=AttributeKey.TYPE_STRING, name="color") ) ), - keys=[AttributeKey(type=AttributeKey.TYPE_STRING, name="platform")], + keys=[AttributeKey(type=AttributeKey.TYPE_STRING, name="location")], order_by=[ SpanSamplesRequest.OrderBy( - key=AttributeKey(type=AttributeKey.TYPE_STRING, name="status") + key=AttributeKey(type=AttributeKey.TYPE_STRING, name="location") ) ], limit=10, @@ -159,13 +159,13 @@ def test_with_data(self, setup_teardown: Any) -> None: ), filter=TraceItemFilter( exists_filter=ExistsFilter( - key=AttributeKey(type=AttributeKey.TYPE_STRING, name="category") + key=AttributeKey(type=AttributeKey.TYPE_STRING, name="color") ) ), - keys=[AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.sdk.name")], + keys=[AttributeKey(type=AttributeKey.TYPE_STRING, name="server_name")], order_by=[ SpanSamplesRequest.OrderBy( - key=AttributeKey(type=AttributeKey.TYPE_STRING, name="status") + key=AttributeKey(type=AttributeKey.TYPE_STRING, name="server_name") ) ], limit=61, @@ -174,7 +174,7 @@ def test_with_data(self, setup_teardown: Any) -> None: assert [ dict((k, x.results[k].val_str) for k in x.results) for x in response.span_samples - ] == [{"sentry.sdk.name": "sentry.python.django"} for _ in range(60)] + ] == [{"server_name": "D23CXQ4GK2.local"} for _ in range(60)] def test_booleans_and_number_compares(self, setup_teardown: Any) -> None: ts = Timestamp(seconds=int(BASE_TIME.timestamp())) @@ -215,12 +215,14 @@ def test_booleans_and_number_compares(self, setup_teardown: Any) -> None: ) ), keys=[ - AttributeKey(type=AttributeKey.TYPE_BOOLEAN, name="is_segment"), - AttributeKey(type=AttributeKey.TYPE_STRING, name="span_id"), + AttributeKey(type=AttributeKey.TYPE_BOOLEAN, name="sentry.is_segment"), + AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.span_id"), ], order_by=[ SpanSamplesRequest.OrderBy( - key=AttributeKey(type=AttributeKey.TYPE_STRING, name="status") + key=AttributeKey( + type=AttributeKey.TYPE_STRING, name="sentry.status" + ) ) ], limit=61, @@ -231,7 +233,10 @@ def test_booleans_and_number_compares(self, setup_teardown: Any) -> None: (k, (x.results[k].val_bool or x.results[k].val_str)) for k in x.results ) for x in response.span_samples - ] == [{"is_segment": True, "span_id": "123456781234567d"} for _ in range(60)] + ] == [ + {"sentry.is_segment": True, "sentry.span_id": "123456781234567d"} + for _ in range(60) + ] def test_with_virtual_columns(self, setup_teardown: Any) -> None: ts = Timestamp(seconds=int(BASE_TIME.timestamp())) @@ -247,12 +252,16 @@ def test_with_virtual_columns(self, setup_teardown: Any) -> None: ), filter=TraceItemFilter( exists_filter=ExistsFilter( - key=AttributeKey(type=AttributeKey.TYPE_STRING, name="category") + key=AttributeKey( + type=AttributeKey.TYPE_STRING, name="sentry.category" + ) ) ), keys=[ - AttributeKey(type=AttributeKey.TYPE_STRING, name="project_name"), - AttributeKey(type=AttributeKey.TYPE_STRING, name="release_version"), + AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.project_name"), + AttributeKey( + type=AttributeKey.TYPE_STRING, name="sentry.release_version" + ), AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.sdk.name"), ], order_by=[ @@ -265,13 +274,13 @@ def test_with_virtual_columns(self, setup_teardown: Any) -> None: limit=61, virtual_column_contexts=[ VirtualColumnContext( - from_column_name="project_id", - to_column_name="project_name", + from_column_name="sentry.project_id", + to_column_name="sentry.project_name", value_map={"1": "sentry", "2": "snuba"}, ), VirtualColumnContext( - from_column_name="release", - to_column_name="release_version", + from_column_name="sentry.release", + to_column_name="sentry.release_version", value_map={_RELEASE_TAG: "4.2.0.69"}, ), ], @@ -282,9 +291,9 @@ def test_with_virtual_columns(self, setup_teardown: Any) -> None: for x in response.span_samples ] == [ { - "project_name": "sentry", + "sentry.project_name": "sentry", "sentry.sdk.name": "sentry.python.django", - "release_version": "4.2.0.69", + "sentry.release_version": "4.2.0.69", } for _ in range(60) ] @@ -303,7 +312,9 @@ def test_order_by_virtual_columns(self, setup_teardown: Any) -> None: ), filter=TraceItemFilter( exists_filter=ExistsFilter( - key=AttributeKey(type=AttributeKey.TYPE_STRING, name="category") + key=AttributeKey( + type=AttributeKey.TYPE_STRING, name="sentry.category" + ) ) ), keys=[