From ba31b763b9a8451504b6838855c450ccd3368567 Mon Sep 17 00:00:00 2001 From: Pierre Massat Date: Sun, 12 May 2024 10:55:10 -0400 Subject: [PATCH] feat(profiles): Add a dataset to store profile chunks --- snuba/clusters/storage_sets.py | 1 + snuba/migrations/group_loader.py | 10 +++ snuba/migrations/groups.py | 8 ++ snuba/settings/__init__.py | 1 + snuba/settings/settings_distributed.py | 1 + .../settings_test_distributed_migrations.py | 1 + .../0001_create_profile_chunks_table.py | 73 +++++++++++++++++++ 7 files changed, 95 insertions(+) create mode 100644 snuba/snuba_migrations/profile_chunks/0001_create_profile_chunks_table.py diff --git a/snuba/clusters/storage_sets.py b/snuba/clusters/storage_sets.py index 4e04f9e4c91..9aa5522f161 100644 --- a/snuba/clusters/storage_sets.py +++ b/snuba/clusters/storage_sets.py @@ -19,6 +19,7 @@ "SPANS": "spans", "GROUP_ATTRIBUTES": "group_attributes", "METRICS_SUMMARIES": "metrics_summaries", + "PROFILE_CHUNKS": "profile_chunks", } diff --git a/snuba/migrations/group_loader.py b/snuba/migrations/group_loader.py index 2a99fa22060..41ac0fba8d2 100644 --- a/snuba/migrations/group_loader.py +++ b/snuba/migrations/group_loader.py @@ -403,3 +403,13 @@ def get_migrations(self) -> Sequence[str]: "0002_metrics_summaries_add_tags_hashmap", "0003_metrics_summaries_add_segment_id_duration_group_columns", ] + + +class ProfileChunksLoader(DirectoryLoader): + def __init__(self) -> None: + super().__init__("snuba.snuba_migrations.profile_chunks") + + def get_migrations(self) -> Sequence[str]: + return [ + "0001_create_profile_chunks_table", + ] diff --git a/snuba/migrations/groups.py b/snuba/migrations/groups.py index 90b21c7864d..f6cad117021 100644 --- a/snuba/migrations/groups.py +++ b/snuba/migrations/groups.py @@ -13,6 +13,7 @@ MetricsLoader, MetricsSummariesLoader, OutcomesLoader, + ProfileChunksLoader, ProfilesLoader, QuerylogLoader, ReplaysLoader, @@ -43,6 +44,7 @@ class MigrationGroup(Enum): SPANS = "spans" GROUP_ATTRIBUTES = "group_attributes" METRICS_SUMMARIES = "metrics_summaries" + PROFILE_CHUNKS = "profile_chunks" # Migration groups are mandatory by default. Specific groups can @@ -59,6 +61,7 @@ class MigrationGroup(Enum): MigrationGroup.SEARCH_ISSUES, MigrationGroup.GROUP_ATTRIBUTES, MigrationGroup.METRICS_SUMMARIES, + MigrationGroup.PROFILE_CHUNKS, } @@ -169,6 +172,11 @@ def __init__( storage_sets_keys={StorageSetKey.METRICS_SUMMARIES}, readiness_state=ReadinessState.PARTIAL, ), + MigrationGroup.PROFILE_CHUNKS: _MigrationGroup( + loader=ProfileChunksLoader(), + storage_sets_keys={StorageSetKey.PROFILE_CHUNKS}, + readiness_state=ReadinessState.PARTIAL, + ), } diff --git a/snuba/settings/__init__.py b/snuba/settings/__init__.py index 7b9dda7ee64..b98441c35bb 100644 --- a/snuba/settings/__init__.py +++ b/snuba/settings/__init__.py @@ -114,6 +114,7 @@ "group_attributes", "generic_metrics_gauges", "metrics_summaries", + "profile_chunks", }, "single_node": True, }, diff --git a/snuba/settings/settings_distributed.py b/snuba/settings/settings_distributed.py index 52cad87dc05..62ce8730724 100644 --- a/snuba/settings/settings_distributed.py +++ b/snuba/settings/settings_distributed.py @@ -34,6 +34,7 @@ "group_attributes", "generic_metrics_gauges", "metrics_summaries", + "profile_chunks", }, "single_node": False, "cluster_name": "cluster_one_sh", diff --git a/snuba/settings/settings_test_distributed_migrations.py b/snuba/settings/settings_test_distributed_migrations.py index 293f1eae446..e502ae8a4b9 100644 --- a/snuba/settings/settings_test_distributed_migrations.py +++ b/snuba/settings/settings_test_distributed_migrations.py @@ -58,6 +58,7 @@ "group_attributes", "generic_metrics_gauges", "metrics_summaries", + "profile_chunks", }, "single_node": False, "cluster_name": "storage_cluster", diff --git a/snuba/snuba_migrations/profile_chunks/0001_create_profile_chunks_table.py b/snuba/snuba_migrations/profile_chunks/0001_create_profile_chunks_table.py new file mode 100644 index 00000000000..1a1bb57b8e1 --- /dev/null +++ b/snuba/snuba_migrations/profile_chunks/0001_create_profile_chunks_table.py @@ -0,0 +1,73 @@ +from typing import List, Sequence + +from snuba.clickhouse.columns import UUID, Column, DateTime64, UInt +from snuba.clusters.storage_sets import StorageSetKey +from snuba.migrations import migration, operations, table_engines +from snuba.migrations.columns import MigrationModifiers as Modifiers +from snuba.migrations.operations import OperationTarget, SqlOperation + +storage_set = StorageSetKey.PROFILE_CHUNKS +table_prefix = "profile_chunks" +local_table_name = f"{table_prefix}_local" +dist_table_name = f"{table_prefix}_dist" + +UNKNOWN_SPAN_STATUS = 2 + +columns: List[Column[Modifiers]] = [ + Column("project_id", UInt(64)), + Column("profiler_id", UUID()), + Column("chunk_id", UUID()), + Column("start_timestamp", DateTime64(Modifiers(codecs=["DoubleDelta"]))), + Column("end_timestamp", DateTime64(Modifiers(codecs=["DoubleDelta"]))), + Column("retention_days", UInt(16)), + Column("partition", UInt(16)), + Column("offset", UInt(64)), + Column("deleted", UInt(8)), +] + + +class Migration(migration.ClickhouseNodeMigration): + blocking = False + + def forwards_ops(self) -> Sequence[SqlOperation]: + return [ + operations.CreateTable( + storage_set=storage_set, + table_name=local_table_name, + columns=columns, + engine=table_engines.ReplacingMergeTree( + order_by="(project_id, profile_id, start_timestamp, end_timestamp, cityHash64(chunk_id))", + partition_by="(retention_days, toStartOfDay(start_timestamp))", + sample_by="cityHash64(profiler_id)", + settings={"index_granularity": "8192"}, + storage_set=storage_set, + ttl="end_timestamp + toIntervalDay(retention_days)", + version_column="deleted", + ), + target=OperationTarget.LOCAL, + ), + operations.CreateTable( + storage_set=storage_set, + table_name=dist_table_name, + columns=columns, + engine=table_engines.Distributed( + local_table_name=local_table_name, + sharding_key="cityHash64(profiler_id)", + ), + target=OperationTarget.DISTRIBUTED, + ), + ] + + def backwards_ops(self) -> Sequence[SqlOperation]: + return [ + operations.DropTable( + storage_set=storage_set, + table_name=dist_table_name, + target=OperationTarget.DISTRIBUTED, + ), + operations.DropTable( + storage_set=storage_set, + table_name=local_table_name, + target=OperationTarget.LOCAL, + ), + ]