From 7dc1ecab8964998aa704183582d35fb2ab9b7e18 Mon Sep 17 00:00:00 2001
From: Colin Chartier <colin.chartier@sentry.io>
Date: Thu, 19 Sep 2024 16:43:51 -0400
Subject: [PATCH] feat(eap): prefix all sentry_tags items with sentry.

---
 snuba/web/rpc/common.py            | 52 ++++++++++++++++-------------
 tests/web/rpc/test_common.py       | 25 +++++++++-----
 tests/web/rpc/test_span_samples.py | 53 ++++++++++++++++++------------
 3 files changed, 79 insertions(+), 51 deletions(-)

diff --git a/snuba/web/rpc/common.py b/snuba/web/rpc/common.py
index 04ea50857a..3a32426e61 100644
--- a/snuba/web/rpc/common.py
+++ b/snuba/web/rpc/common.py
@@ -66,25 +66,29 @@ def transform(exp: Expression) -> Expression:
 
 # These are the columns which aren't stored in attr_str_ nor attr_num_ in clickhouse
 NORMALIZED_COLUMNS: Final[Mapping[str, AttributeKey.Type.ValueType]] = {
-    "organization_id": AttributeKey.Type.TYPE_INT,
-    "project_id": AttributeKey.Type.TYPE_INT,
-    "service": AttributeKey.Type.TYPE_STRING,
-    "span_id": AttributeKey.Type.TYPE_STRING,  # this is converted by a processor on the storage
-    "parent_span_id": AttributeKey.Type.TYPE_STRING,  # this is converted by a processor on the storage
-    "segment_id": AttributeKey.Type.TYPE_STRING,  # this is converted by a processor on the storage
-    "segment_name": AttributeKey.Type.TYPE_STRING,
-    "is_segment": AttributeKey.Type.TYPE_BOOLEAN,
-    "duration_ms": AttributeKey.Type.TYPE_INT,
-    "exclusive_time_ms": AttributeKey.Type.TYPE_INT,
-    "retention_days": AttributeKey.Type.TYPE_INT,
-    "name": AttributeKey.Type.TYPE_STRING,
-    "sample_weight": AttributeKey.Type.TYPE_FLOAT,
-    "timestamp": AttributeKey.Type.TYPE_UNSPECIFIED,
-    "start_timestamp": AttributeKey.Type.TYPE_UNSPECIFIED,
-    "end_timestamp": AttributeKey.Type.TYPE_UNSPECIFIED,
+    "sentry.organization_id": AttributeKey.Type.TYPE_INT,
+    "sentry.project_id": AttributeKey.Type.TYPE_INT,
+    "sentry.service": AttributeKey.Type.TYPE_STRING,
+    "sentry.span_id": AttributeKey.Type.TYPE_STRING,  # this is converted by a processor on the storage
+    "sentry.parent_span_id": AttributeKey.Type.TYPE_STRING,  # this is converted by a processor on the storage
+    "sentry.segment_id": AttributeKey.Type.TYPE_STRING,  # this is converted by a processor on the storage
+    "sentry.segment_name": AttributeKey.Type.TYPE_STRING,
+    "sentry.is_segment": AttributeKey.Type.TYPE_BOOLEAN,
+    "sentry.duration_ms": AttributeKey.Type.TYPE_INT,
+    "sentry.exclusive_time_ms": AttributeKey.Type.TYPE_INT,
+    "sentry.retention_days": AttributeKey.Type.TYPE_INT,
+    "sentry.name": AttributeKey.Type.TYPE_STRING,
+    "sentry.sample_weight": AttributeKey.Type.TYPE_FLOAT,
+    "sentry.timestamp": AttributeKey.Type.TYPE_UNSPECIFIED,
+    "sentry.start_timestamp": AttributeKey.Type.TYPE_UNSPECIFIED,
+    "sentry.end_timestamp": AttributeKey.Type.TYPE_UNSPECIFIED,
 }
 
-TIMESTAMP_COLUMNS: Final[Set[str]] = {"timestamp", "start_timestamp", "end_timestamp"}
+TIMESTAMP_COLUMNS: Final[Set[str]] = {
+    "sentry.timestamp",
+    "sentry.start_timestamp",
+    "sentry.end_timestamp",
+}
 
 
 def attribute_key_to_expression(attr_key: AttributeKey) -> Expression:
@@ -94,7 +98,7 @@ def attribute_key_to_expression(attr_key: AttributeKey) -> Expression:
         )
     alias = attr_key.name
 
-    if attr_key.name == "trace_id":
+    if attr_key.name == "sentry.trace_id":
         if attr_key.type == AttributeKey.Type.TYPE_STRING:
             return f.CAST(column("trace_id"), "String", alias=alias)
         raise BadSnubaRPCRequestException(
@@ -103,18 +107,22 @@ def attribute_key_to_expression(attr_key: AttributeKey) -> Expression:
 
     if attr_key.name in TIMESTAMP_COLUMNS:
         if attr_key.type == AttributeKey.Type.TYPE_STRING:
-            return f.CAST(column(attr_key.name), "String", alias=alias)
+            return f.CAST(
+                column(attr_key.name[len("sentry.") :]), "String", alias=alias
+            )
         if attr_key.type == AttributeKey.Type.TYPE_INT:
-            return f.CAST(column(attr_key.name), "Int64", alias=alias)
+            return f.CAST(column(attr_key.name[len("sentry.") :]), "Int64", alias=alias)
         if attr_key.type == AttributeKey.Type.TYPE_FLOAT:
-            return f.CAST(column(attr_key.name), "Float64", alias=alias)
+            return f.CAST(
+                column(attr_key.name[len("sentry.") :]), "Float64", alias=alias
+            )
         raise BadSnubaRPCRequestException(
             f"Attribute {attr_key.name} must be requested as a string, float, or integer, got {attr_key.type}"
         )
 
     if attr_key.name in NORMALIZED_COLUMNS:
         if NORMALIZED_COLUMNS[attr_key.name] == attr_key.type:
-            return column(attr_key.name, alias=attr_key.name)
+            return column(attr_key.name[len("sentry.") :], alias=attr_key.name)
         raise BadSnubaRPCRequestException(
             f"Attribute {attr_key.name} must be requested as {NORMALIZED_COLUMNS[attr_key.name]}, got {attr_key.type}"
         )
diff --git a/tests/web/rpc/test_common.py b/tests/web/rpc/test_common.py
index 2a01779109..c1ff109689 100644
--- a/tests/web/rpc/test_common.py
+++ b/tests/web/rpc/test_common.py
@@ -11,39 +11,48 @@ def test_expression_trace_id(self) -> None:
         assert attribute_key_to_expression(
             AttributeKey(
                 type=AttributeKey.TYPE_STRING,
-                name="trace_id",
+                name="sentry.trace_id",
             ),
-        ) == f.CAST(column("trace_id"), "String", alias="trace_id")
+        ) == f.CAST(column("trace_id"), "String", alias="sentry.trace_id")
 
     def test_timestamp_columns(self) -> None:
-        for col in ["timestamp", "start_timestamp", "end_timestamp"]:
+        for col in [
+            "sentry.timestamp",
+            "sentry.start_timestamp",
+            "sentry.end_timestamp",
+        ]:
             assert attribute_key_to_expression(
                 AttributeKey(
                     type=AttributeKey.TYPE_STRING,
                     name=col,
                 ),
-            ) == f.CAST(column(col), "String", alias=col)
+            ) == f.CAST(column(col[len("sentry.") :]), "String", alias=col)
             assert attribute_key_to_expression(
                 AttributeKey(
                     type=AttributeKey.TYPE_INT,
                     name=col,
                 ),
-            ) == f.CAST(column(col), "Int64", alias=col)
+            ) == f.CAST(column(col[len("sentry.") :]), "Int64", alias=col)
             assert attribute_key_to_expression(
                 AttributeKey(
                     type=AttributeKey.TYPE_FLOAT,
                     name=col,
                 ),
-            ) == f.CAST(column(col), "Float64", alias=col)
+            ) == f.CAST(column(col[len("sentry.") :]), "Float64", alias=col)
 
     def test_normalized_col(self) -> None:
-        for col in ["span_id", "parent_span_id", "segment_id", "service"]:
+        for col in [
+            "sentry.span_id",
+            "sentry.parent_span_id",
+            "sentry.segment_id",
+            "sentry.service",
+        ]:
             assert attribute_key_to_expression(
                 AttributeKey(
                     type=AttributeKey.TYPE_STRING,
                     name=col,
                 ),
-            ) == column(col, alias=col)
+            ) == column(col[len("sentry.") :], alias=col)
 
     def test_attributes(self) -> None:
         assert attribute_key_to_expression(
diff --git a/tests/web/rpc/test_span_samples.py b/tests/web/rpc/test_span_samples.py
index 1e70c69ceb..bfa52a9f28 100644
--- a/tests/web/rpc/test_span_samples.py
+++ b/tests/web/rpc/test_span_samples.py
@@ -129,13 +129,13 @@ def test_basic(self) -> None:
             ),
             filter=TraceItemFilter(
                 exists_filter=ExistsFilter(
-                    key=AttributeKey(type=AttributeKey.TYPE_STRING, name="category")
+                    key=AttributeKey(type=AttributeKey.TYPE_STRING, name="color")
                 )
             ),
-            keys=[AttributeKey(type=AttributeKey.TYPE_STRING, name="platform")],
+            keys=[AttributeKey(type=AttributeKey.TYPE_STRING, name="location")],
             order_by=[
                 SpanSamplesRequest.OrderBy(
-                    key=AttributeKey(type=AttributeKey.TYPE_STRING, name="status")
+                    key=AttributeKey(type=AttributeKey.TYPE_STRING, name="location")
                 )
             ],
             limit=10,
@@ -159,13 +159,13 @@ def test_with_data(self, setup_teardown: Any) -> None:
             ),
             filter=TraceItemFilter(
                 exists_filter=ExistsFilter(
-                    key=AttributeKey(type=AttributeKey.TYPE_STRING, name="category")
+                    key=AttributeKey(type=AttributeKey.TYPE_STRING, name="color")
                 )
             ),
-            keys=[AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.sdk.name")],
+            keys=[AttributeKey(type=AttributeKey.TYPE_STRING, name="server_name")],
             order_by=[
                 SpanSamplesRequest.OrderBy(
-                    key=AttributeKey(type=AttributeKey.TYPE_STRING, name="status")
+                    key=AttributeKey(type=AttributeKey.TYPE_STRING, name="server_name")
                 )
             ],
             limit=61,
@@ -174,7 +174,7 @@ def test_with_data(self, setup_teardown: Any) -> None:
         assert [
             dict((k, x.results[k].val_str) for k in x.results)
             for x in response.span_samples
-        ] == [{"sentry.sdk.name": "sentry.python.django"} for _ in range(60)]
+        ] == [{"server_name": "D23CXQ4GK2.local"} for _ in range(60)]
 
     def test_booleans_and_number_compares(self, setup_teardown: Any) -> None:
         ts = Timestamp(seconds=int(BASE_TIME.timestamp()))
@@ -215,12 +215,14 @@ def test_booleans_and_number_compares(self, setup_teardown: Any) -> None:
                 )
             ),
             keys=[
-                AttributeKey(type=AttributeKey.TYPE_BOOLEAN, name="is_segment"),
-                AttributeKey(type=AttributeKey.TYPE_STRING, name="span_id"),
+                AttributeKey(type=AttributeKey.TYPE_BOOLEAN, name="sentry.is_segment"),
+                AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.span_id"),
             ],
             order_by=[
                 SpanSamplesRequest.OrderBy(
-                    key=AttributeKey(type=AttributeKey.TYPE_STRING, name="status")
+                    key=AttributeKey(
+                        type=AttributeKey.TYPE_STRING, name="sentry.status"
+                    )
                 )
             ],
             limit=61,
@@ -231,7 +233,10 @@ def test_booleans_and_number_compares(self, setup_teardown: Any) -> None:
                 (k, (x.results[k].val_bool or x.results[k].val_str)) for k in x.results
             )
             for x in response.span_samples
-        ] == [{"is_segment": True, "span_id": "123456781234567d"} for _ in range(60)]
+        ] == [
+            {"sentry.is_segment": True, "sentry.span_id": "123456781234567d"}
+            for _ in range(60)
+        ]
 
     def test_with_virtual_columns(self, setup_teardown: Any) -> None:
         ts = Timestamp(seconds=int(BASE_TIME.timestamp()))
@@ -247,12 +252,16 @@ def test_with_virtual_columns(self, setup_teardown: Any) -> None:
             ),
             filter=TraceItemFilter(
                 exists_filter=ExistsFilter(
-                    key=AttributeKey(type=AttributeKey.TYPE_STRING, name="category")
+                    key=AttributeKey(
+                        type=AttributeKey.TYPE_STRING, name="sentry.category"
+                    )
                 )
             ),
             keys=[
-                AttributeKey(type=AttributeKey.TYPE_STRING, name="project_name"),
-                AttributeKey(type=AttributeKey.TYPE_STRING, name="release_version"),
+                AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.project_name"),
+                AttributeKey(
+                    type=AttributeKey.TYPE_STRING, name="sentry.release_version"
+                ),
                 AttributeKey(type=AttributeKey.TYPE_STRING, name="sentry.sdk.name"),
             ],
             order_by=[
@@ -265,13 +274,13 @@ def test_with_virtual_columns(self, setup_teardown: Any) -> None:
             limit=61,
             virtual_column_contexts=[
                 VirtualColumnContext(
-                    from_column_name="project_id",
-                    to_column_name="project_name",
+                    from_column_name="sentry.project_id",
+                    to_column_name="sentry.project_name",
                     value_map={"1": "sentry", "2": "snuba"},
                 ),
                 VirtualColumnContext(
-                    from_column_name="release",
-                    to_column_name="release_version",
+                    from_column_name="sentry.release",
+                    to_column_name="sentry.release_version",
                     value_map={_RELEASE_TAG: "4.2.0.69"},
                 ),
             ],
@@ -282,9 +291,9 @@ def test_with_virtual_columns(self, setup_teardown: Any) -> None:
             for x in response.span_samples
         ] == [
             {
-                "project_name": "sentry",
+                "sentry.project_name": "sentry",
                 "sentry.sdk.name": "sentry.python.django",
-                "release_version": "4.2.0.69",
+                "sentry.release_version": "4.2.0.69",
             }
             for _ in range(60)
         ]
@@ -303,7 +312,9 @@ def test_order_by_virtual_columns(self, setup_teardown: Any) -> None:
             ),
             filter=TraceItemFilter(
                 exists_filter=ExistsFilter(
-                    key=AttributeKey(type=AttributeKey.TYPE_STRING, name="category")
+                    key=AttributeKey(
+                        type=AttributeKey.TYPE_STRING, name="sentry.category"
+                    )
                 )
             ),
             keys=[