From f6425ebb7cffec84cfb134a03bcf889f573c6ba7 Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Thu, 19 Sep 2024 16:42:01 -0400 Subject: [PATCH 1/4] fix(eap): make truncate even more permissive --- snuba/web/rpc/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snuba/web/rpc/common.py b/snuba/web/rpc/common.py index 716cd2162d..04ea50857a 100644 --- a/snuba/web/rpc/common.py +++ b/snuba/web/rpc/common.py @@ -26,7 +26,7 @@ def truncate_request_meta_to_day(meta: RequestMeta) -> None: start_timestamp = datetime.utcfromtimestamp(meta.start_timestamp.seconds) end_timestamp = datetime.utcfromtimestamp(meta.end_timestamp.seconds) start_timestamp = start_timestamp.replace( - day=start_timestamp.day, hour=0, minute=0, second=0, microsecond=0 + day=start_timestamp.day - 1, hour=0, minute=0, second=0, microsecond=0 ) end_timestamp = end_timestamp.replace( day=end_timestamp.day + 1, hour=0, minute=0, second=0, microsecond=0 From 3ddd63b86ba9d711cad6bb461fce49c345219eea Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Thu, 19 Sep 2024 16:14:11 -0400 Subject: [PATCH 2/4] fix(eap): namespace sentry_tags with sentry. --- rust_snuba/src/processors/eap_spans.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust_snuba/src/processors/eap_spans.rs b/rust_snuba/src/processors/eap_spans.rs index a78181e4a6..2242b4bb8d 100644 --- a/rust_snuba/src/processors/eap_spans.rs +++ b/rust_snuba/src/processors/eap_spans.rs @@ -139,7 +139,7 @@ impl From for EAPSpan { if k == "transaction" { res.segment_name = v.clone(); } else { - insert_string(k.clone(), v.clone()); + insert_string(format!("sentry.{}", k), v.clone()); } }) } From 7dc1ecab8964998aa704183582d35fb2ab9b7e18 Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Thu, 19 Sep 2024 16:43:51 -0400 Subject: [PATCH 3/4] feat(eap): prefix all sentry_tags items with sentry. --- snuba/web/rpc/common.py | 52 ++++++++++++++++------------- tests/web/rpc/test_common.py | 25 +++++++++----- tests/web/rpc/test_span_samples.py | 53 ++++++++++++++++++------------ 3 files changed, 79 insertions(+), 51 deletions(-) diff --git a/snuba/web/rpc/common.py b/snuba/web/rpc/common.py index 04ea50857a..3a32426e61 100644 --- a/snuba/web/rpc/common.py +++ b/snuba/web/rpc/common.py @@ -66,25 +66,29 @@ def transform(exp: Expression) -> Expression: # These are the columns which aren't stored in attr_str_ nor attr_num_ in clickhouse NORMALIZED_COLUMNS: Final[Mapping[str, AttributeKey.Type.ValueType]] = { - "organization_id": AttributeKey.Type.TYPE_INT, - "project_id": AttributeKey.Type.TYPE_INT, - "service": AttributeKey.Type.TYPE_STRING, - "span_id": AttributeKey.Type.TYPE_STRING, # this is converted by a processor on the storage - "parent_span_id": AttributeKey.Type.TYPE_STRING, # this is converted by a processor on the storage - "segment_id": AttributeKey.Type.TYPE_STRING, # this is converted by a processor on the storage - "segment_name": AttributeKey.Type.TYPE_STRING, - "is_segment": AttributeKey.Type.TYPE_BOOLEAN, - "duration_ms": AttributeKey.Type.TYPE_INT, - "exclusive_time_ms": AttributeKey.Type.TYPE_INT, - "retention_days": AttributeKey.Type.TYPE_INT, - "name": AttributeKey.Type.TYPE_STRING, - "sample_weight": AttributeKey.Type.TYPE_FLOAT, - "timestamp": AttributeKey.Type.TYPE_UNSPECIFIED, - "start_timestamp": AttributeKey.Type.TYPE_UNSPECIFIED, - "end_timestamp": AttributeKey.Type.TYPE_UNSPECIFIED, + "sentry.organization_id": AttributeKey.Type.TYPE_INT, + "sentry.project_id": AttributeKey.Type.TYPE_INT, + "sentry.service": AttributeKey.Type.TYPE_STRING, + "sentry.span_id": AttributeKey.Type.TYPE_STRING, # this is converted by a processor on the storage + "sentry.parent_span_id": AttributeKey.Type.TYPE_STRING, # this is converted by a processor on the storage + "sentry.segment_id": AttributeKey.Type.TYPE_STRING, # this is converted by a processor on the storage + "sentry.segment_name": AttributeKey.Type.TYPE_STRING, + "sentry.is_segment": AttributeKey.Type.TYPE_BOOLEAN, + "sentry.duration_ms": AttributeKey.Type.TYPE_INT, + "sentry.exclusive_time_ms": AttributeKey.Type.TYPE_INT, + "sentry.retention_days": AttributeKey.Type.TYPE_INT, + "sentry.name": AttributeKey.Type.TYPE_STRING, + "sentry.sample_weight": AttributeKey.Type.TYPE_FLOAT, + "sentry.timestamp": AttributeKey.Type.TYPE_UNSPECIFIED, + "sentry.start_timestamp": AttributeKey.Type.TYPE_UNSPECIFIED, + "sentry.end_timestamp": AttributeKey.Type.TYPE_UNSPECIFIED, } -TIMESTAMP_COLUMNS: Final[Set[str]] = {"timestamp", "start_timestamp", "end_timestamp"} +TIMESTAMP_COLUMNS: Final[Set[str]] = { + "sentry.timestamp", + "sentry.start_timestamp", + "sentry.end_timestamp", +} def attribute_key_to_expression(attr_key: AttributeKey) -> Expression: @@ -94,7 +98,7 @@ def attribute_key_to_expression(attr_key: AttributeKey) -> Expression: ) alias = attr_key.name - if attr_key.name == "trace_id": + if attr_key.name == "sentry.trace_id": if attr_key.type == AttributeKey.Type.TYPE_STRING: return f.CAST(column("trace_id"), "String", alias=alias) raise BadSnubaRPCRequestException( @@ -103,18 +107,22 @@ def attribute_key_to_expression(attr_key: AttributeKey) -> Expression: if attr_key.name in TIMESTAMP_COLUMNS: if attr_key.type == AttributeKey.Type.TYPE_STRING: - return f.CAST(column(attr_key.name), "String", alias=alias) + return f.CAST( + column(attr_key.name[len("sentry.") :]), "String", alias=alias + ) if attr_key.type == AttributeKey.Type.TYPE_INT: - return f.CAST(column(attr_key.name), "Int64", alias=alias) + return f.CAST(column(attr_key.name[len("sentry.") :]), "Int64", alias=alias) if attr_key.type == AttributeKey.Type.TYPE_FLOAT: - return f.CAST(column(attr_key.name), "Float64", alias=alias) + return f.CAST( + column(attr_key.name[len("sentry.") :]), "Float64", alias=alias + ) raise BadSnubaRPCRequestException( f"Attribute {attr_key.name} must be requested as a string, float, or integer, got {attr_key.type}" ) if attr_key.name in NORMALIZED_COLUMNS: if NORMALIZED_COLUMNS[attr_key.name] == attr_key.type: - return column(attr_key.name, alias=attr_key.name) + return column(attr_key.name[len("sentry.") :], alias=attr_key.name) raise BadSnubaRPCRequestException( f"Attribute {attr_key.name} must be requested as {NORMALIZED_COLUMNS[attr_key.name]}, got {attr_key.type}" ) diff --git a/tests/web/rpc/test_common.py b/tests/web/rpc/test_common.py index 2a01779109..c1ff109689 100644 --- a/tests/web/rpc/test_common.py +++ b/tests/web/rpc/test_common.py @@ -11,39 +11,48 @@ def test_expression_trace_id(self) -> None: assert attribute_key_to_expression( AttributeKey( type=AttributeKey.TYPE_STRING, - name="trace_id", + name="sentry.trace_id", ), - ) == f.CAST(column("trace_id"), "String", alias="trace_id") + ) == f.CAST(column("trace_id"), "String", alias="sentry.trace_id") def test_timestamp_columns(self) -> None: - for col in ["timestamp", "start_timestamp", "end_timestamp"]: + for col in [ + "sentry.timestamp", + "sentry.start_timestamp", + "sentry.end_timestamp", + ]: assert attribute_key_to_expression( AttributeKey( type=AttributeKey.TYPE_STRING, name=col, ), - ) == f.CAST(column(col), "String", alias=col) + ) == f.CAST(column(col[len("sentry.") :]), "String", alias=col) assert attribute_key_to_expression( AttributeKey( type=AttributeKey.TYPE_INT, name=col, ), - ) == f.CAST(column(col), "Int64", alias=col) + ) == f.CAST(column(col[len("sentry.") :]), "Int64", alias=col) assert attribute_key_to_expression( AttributeKey( type=AttributeKey.TYPE_FLOAT, name=col, ), - ) == f.CAST(column(col), "Float64", alias=col) + ) == f.CAST(column(col[len("sentry.") :]), "Float64", alias=col) def test_normalized_col(self) -> None: - for col in ["span_id", "parent_span_id", "segment_id", "service"]: + for col in [ + "sentry.span_id", + "sentry.parent_span_id", + "sentry.segment_id", + "sentry.service", + ]: assert attribute_key_to_expression( AttributeKey( type=AttributeKey.TYPE_STRING, name=col, ), - ) == column(col, alias=col) + ) == column(col[len("sentry.") :], alias=col) def test_attributes(self) -> None: assert attribute_key_to_expression( diff --git a/tests/web/rpc/test_span_samples.py b/tests/web/rpc/test_span_samples.py index 1e70c69ceb..bfa52a9f28 100644 --- a/tests/web/rpc/test_span_samples.py +++ b/tests/web/rpc/test_span_samples.py @@ -129,13 +129,13 @@ def test_basic(self) -> None: ), filter=TraceItemFilter( exists_filter=ExistsFilter( - key=AttributeKey(type=AttributeKey.TYPE_STRING, name="category") + key=AttributeKey(type=AttributeKey.TYPE_STRING, name="color") ) ), - keys=[AttributeKey(type=AttributeKey.TYPE_STRING, name="platform")], + keys=[AttributeKey(type=AttributeKey.TYPE_STRING, name="location")], order_by=[ SpanSamplesRequest.OrderBy( - key=AttributeKey(type=AttributeKey.TYPE_STRING, name="status") + key=AttributeKey(type=AttributeKey.TYPE_STRING, name="location") ) ], limit=10, @@ -159,13 +159,13 @@ def test_with_data(self, setup_teardown: Any) -> None: ), filter=TraceItemFilter( exists_filter=ExistsFilter( - key=AttributeKey(type=AttributeKey.TYPE_STRING, name="category") + key=AttributeKey(type=AttributeKey.TYPE_STRING, name="color") ) ), - keys=[AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.sdk.name")], + keys=[AttributeKey(type=AttributeKey.TYPE_STRING, name="server_name")], order_by=[ SpanSamplesRequest.OrderBy( - key=AttributeKey(type=AttributeKey.TYPE_STRING, name="status") + key=AttributeKey(type=AttributeKey.TYPE_STRING, name="server_name") ) ], limit=61, @@ -174,7 +174,7 @@ def test_with_data(self, setup_teardown: Any) -> None: assert [ dict((k, x.results[k].val_str) for k in x.results) for x in response.span_samples - ] == [{"sentry.sdk.name": "sentry.python.django"} for _ in range(60)] + ] == [{"server_name": "D23CXQ4GK2.local"} for _ in range(60)] def test_booleans_and_number_compares(self, setup_teardown: Any) -> None: ts = Timestamp(seconds=int(BASE_TIME.timestamp())) @@ -215,12 +215,14 @@ def test_booleans_and_number_compares(self, setup_teardown: Any) -> None: ) ), keys=[ - AttributeKey(type=AttributeKey.TYPE_BOOLEAN, name="is_segment"), - AttributeKey(type=AttributeKey.TYPE_STRING, name="span_id"), + AttributeKey(type=AttributeKey.TYPE_BOOLEAN, name="sentry.is_segment"), + AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.span_id"), ], order_by=[ SpanSamplesRequest.OrderBy( - key=AttributeKey(type=AttributeKey.TYPE_STRING, name="status") + key=AttributeKey( + type=AttributeKey.TYPE_STRING, name="sentry.status" + ) ) ], limit=61, @@ -231,7 +233,10 @@ def test_booleans_and_number_compares(self, setup_teardown: Any) -> None: (k, (x.results[k].val_bool or x.results[k].val_str)) for k in x.results ) for x in response.span_samples - ] == [{"is_segment": True, "span_id": "123456781234567d"} for _ in range(60)] + ] == [ + {"sentry.is_segment": True, "sentry.span_id": "123456781234567d"} + for _ in range(60) + ] def test_with_virtual_columns(self, setup_teardown: Any) -> None: ts = Timestamp(seconds=int(BASE_TIME.timestamp())) @@ -247,12 +252,16 @@ def test_with_virtual_columns(self, setup_teardown: Any) -> None: ), filter=TraceItemFilter( exists_filter=ExistsFilter( - key=AttributeKey(type=AttributeKey.TYPE_STRING, name="category") + key=AttributeKey( + type=AttributeKey.TYPE_STRING, name="sentry.category" + ) ) ), keys=[ - AttributeKey(type=AttributeKey.TYPE_STRING, name="project_name"), - AttributeKey(type=AttributeKey.TYPE_STRING, name="release_version"), + AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.project_name"), + AttributeKey( + type=AttributeKey.TYPE_STRING, name="sentry.release_version" + ), AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.sdk.name"), ], order_by=[ @@ -265,13 +274,13 @@ def test_with_virtual_columns(self, setup_teardown: Any) -> None: limit=61, virtual_column_contexts=[ VirtualColumnContext( - from_column_name="project_id", - to_column_name="project_name", + from_column_name="sentry.project_id", + to_column_name="sentry.project_name", value_map={"1": "sentry", "2": "snuba"}, ), VirtualColumnContext( - from_column_name="release", - to_column_name="release_version", + from_column_name="sentry.release", + to_column_name="sentry.release_version", value_map={_RELEASE_TAG: "4.2.0.69"}, ), ], @@ -282,9 +291,9 @@ def test_with_virtual_columns(self, setup_teardown: Any) -> None: for x in response.span_samples ] == [ { - "project_name": "sentry", + "sentry.project_name": "sentry", "sentry.sdk.name": "sentry.python.django", - "release_version": "4.2.0.69", + "sentry.release_version": "4.2.0.69", } for _ in range(60) ] @@ -303,7 +312,9 @@ def test_order_by_virtual_columns(self, setup_teardown: Any) -> None: ), filter=TraceItemFilter( exists_filter=ExistsFilter( - key=AttributeKey(type=AttributeKey.TYPE_STRING, name="category") + key=AttributeKey( + type=AttributeKey.TYPE_STRING, name="sentry.category" + ) ) ), keys=[ From 6f82257fb0c6505dcd4dde47413ee9fb1d08a7ab Mon Sep 17 00:00:00 2001 From: Colin Chartier Date: Fri, 20 Sep 2024 11:28:52 -0400 Subject: [PATCH 4/4] insta --- ...sors__eap_spans__tests__serialization.snap | 42 ++++++++++--------- ...essor-snuba-spans__1__basic_span.json.snap | 42 +++++++++---------- 2 files changed, 43 insertions(+), 41 deletions(-) diff --git a/rust_snuba/src/processors/snapshots/rust_snuba__processors__eap_spans__tests__serialization.snap b/rust_snuba/src/processors/snapshots/rust_snuba__processors__eap_spans__tests__serialization.snap index 05eac7c04f..cd97bb3923 100644 --- a/rust_snuba/src/processors/snapshots/rust_snuba__processors__eap_spans__tests__serialization.snap +++ b/rust_snuba/src/processors/snapshots/rust_snuba__processors__eap_spans__tests__serialization.snap @@ -24,17 +24,17 @@ expression: span "sampling_weight_2": 100, "sign": 1, "attr_str_0": { - "relay_protocol_version": "3", - "transaction.op": "http.server" + "relay_protocol_version": "3" + }, + "attr_str_1": { + "sentry.thread.name": "uWSGIWorker1Core0" }, "attr_num_1": { "my.neg.float.field": -101.2, "my.true.bool.field": 1.0 }, - "attr_str_2": { - "trace.status": "ok", - "transaction.method": "POST", - "user": "ip:127.0.0.1" + "attr_str_3": { + "sentry.thread.id": "8522009600" }, "attr_str_4": { "thread.id": "8522009600" @@ -42,30 +42,36 @@ expression: span "attr_str_5": { "http.status_code": "200", "sentry.release": "backend@24.7.0.dev0+c45b49caed1e5fcbf70097ab3f434b487c359b6b", - "sentry.sdk.name": "sentry.python.django" + "sentry.sdk.name": "sentry.python.django", + "sentry.transaction.op": "http.server" }, "attr_num_5": { "my.neg.field": -100.0 }, "attr_str_6": { - "op": "http.server", "relay_id": "88888888-4444-4444-8444-cccccccccccc", "thread.name": "uWSGIWorker1Core0" }, "attr_num_6": { "my.false.bool.field": 0.0 }, + "attr_str_7": { + "sentry.trace.status": "ok" + }, "attr_str_8": { - "sdk.name": "sentry.python.django" + "sentry.category": "http" }, "attr_str_9": { - "sentry.environment": "development", - "status_code": "200" + "sentry.environment": "development" }, "attr_str_10": { - "release": "backend@24.7.0.dev0+c45b49caed1e5fcbf70097ab3f434b487c359b6b", "sentry.sdk.version": "2.7.0" }, + "attr_str_11": { + "sentry.platform": "python", + "sentry.transaction.method": "POST", + "sentry.user": "ip:127.0.0.1" + }, "attr_num_11": { "num_of_spans": 50.0 }, @@ -73,12 +79,8 @@ expression: span "relay_use_post_or_schedule_rejected": "version", "server_name": "D23CXQ4GK2.local" }, - "attr_str_13": { - "category": "http" - }, "attr_str_14": { - "environment": "development", - "platform": "python" + "sentry.status": "ok" }, "attr_num_14": { "my.int.field": 2000.0 @@ -87,16 +89,16 @@ expression: span "relay_endpoint_version": "3", "relay_no_cache": "False", "relay_use_post_or_schedule": "True", - "sdk.version": "2.7.0", "spans_over_limit": "False" }, "attr_num_17": { "my.float.field": 101.2 }, "attr_str_18": { - "sentry.segment.name": "/api/0/relays/projectconfigs/" + "sentry.segment.name": "/api/0/relays/projectconfigs/", + "sentry.status_code": "200" }, "attr_str_19": { - "status": "ok" + "sentry.op": "http.server" } } diff --git a/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@snuba-spans-EAPSpansMessageProcessor-snuba-spans__1__basic_span.json.snap b/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@snuba-spans-EAPSpansMessageProcessor-snuba-spans__1__basic_span.json.snap index 64a32b2761..147200a467 100644 --- a/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@snuba-spans-EAPSpansMessageProcessor-snuba-spans__1__basic_span.json.snap +++ b/rust_snuba/src/processors/snapshots/rust_snuba__processors__tests__schemas@snuba-spans-EAPSpansMessageProcessor-snuba-spans__1__basic_span.json.snap @@ -10,36 +10,36 @@ expression: snapshot_payload "http.response_content_length": 100.0 }, "attr_str_0": { - "group": "deadbeefdeadbeef", - "tag3": "True", - "transaction.op": "navigation" + "tag3": "True" + }, + "attr_str_11": { + "sentry.http.method": "GET", + "sentry.transaction.method": "GET" + }, + "attr_str_12": { + "sentry.domain": "targetdomain.tld:targetport" + }, + "attr_str_13": { + "sentry.system": "python" + }, + "attr_str_14": { + "sentry.status": "ok" }, "attr_str_18": { + "sentry.action": "GET", + "sentry.status_code": "200", "tag1": "value1" }, "attr_str_19": { - "status": "ok", + "sentry.group": "deadbeefdeadbeef", + "sentry.op": "http.client", "tag2": "123" }, - "attr_str_2": { - "transaction.method": "GET" - }, - "attr_str_4": { - "http.method": "GET", - "system": "python" - }, "attr_str_5": { - "domain": "targetdomain.tld:targetport", - "module": "http" - }, - "attr_str_6": { - "op": "http.client" - }, - "attr_str_7": { - "action": "GET" + "sentry.transaction.op": "navigation" }, - "attr_str_9": { - "status_code": "200" + "attr_str_8": { + "sentry.module": "http" }, "duration_ms": 1000, "end_timestamp": 1715868486370551,