Skip to content

Commit

Permalink
add a functions summaries dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
viglia committed Sep 12, 2024
1 parent 81a5359 commit a578ab3
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 0 deletions.
1 change: 1 addition & 0 deletions snuba/clusters/storage_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
"GROUP_ATTRIBUTES": "group_attributes",
"METRICS_SUMMARIES": "metrics_summaries",
"PROFILE_CHUNKS": "profile_chunks",
"FUNCTIONS_SUMMARIES": "functions_summaries",
}


Expand Down
5 changes: 5 additions & 0 deletions snuba/migrations/group_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,8 @@ def __init__(self) -> None:
class ProfileChunksLoader(DirectoryLoader):
def __init__(self) -> None:
super().__init__("snuba.snuba_migrations.profile_chunks")


class FunctionsSummariesLoader(DirectoryLoader):
def __init__(self) -> None:
super().__init__("snuba.snuba_migrations.functions_summaries")
8 changes: 8 additions & 0 deletions snuba/migrations/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
EventsAnalyticsPlatformLoader,
EventsLoader,
FunctionsLoader,
FunctionsSummariesLoader,
GenericMetricsLoader,
GroupAttributesLoader,
GroupLoader,
Expand Down Expand Up @@ -47,6 +48,7 @@ class MigrationGroup(Enum):
GROUP_ATTRIBUTES = "group_attributes"
METRICS_SUMMARIES = "metrics_summaries"
PROFILE_CHUNKS = "profile_chunks"
FUNCTIONS_SUMMARIES = "functions_summaries"


# Migration groups are mandatory by default. Specific groups can
Expand All @@ -64,6 +66,7 @@ class MigrationGroup(Enum):
MigrationGroup.GROUP_ATTRIBUTES,
MigrationGroup.METRICS_SUMMARIES,
MigrationGroup.PROFILE_CHUNKS,
MigrationGroup.FUNCTIONS_SUMMARIES,
}


Expand Down Expand Up @@ -186,6 +189,11 @@ def __init__(
storage_sets_keys={StorageSetKey.PROFILE_CHUNKS},
readiness_state=ReadinessState.PARTIAL,
),
MigrationGroup.FUNCTIONS_SUMMARIES: _MigrationGroup(
loader=FunctionsSummariesLoader(),
storage_sets_keys={StorageSetKey.FUNCTIONS_SUMMARIES},
readiness_state=ReadinessState.EXPERIMENTAL,
),
}


Expand Down
1 change: 1 addition & 0 deletions snuba/settings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@
"generic_metrics_gauges",
"metrics_summaries",
"profile_chunks",
"functions_summaries",
},
"single_node": True,
},
Expand Down
1 change: 1 addition & 0 deletions snuba/settings/settings_distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
"generic_metrics_gauges",
"metrics_summaries",
"profile_chunks",
"functions_summaries",
},
"single_node": False,
"cluster_name": "cluster_one_sh",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
from typing import List, Sequence

from snuba.clickhouse.columns import UUID, Column, DateTime, String, UInt
from snuba.clusters.storage_sets import StorageSetKey
from snuba.migrations import migration, operations, table_engines
from snuba.migrations.columns import MigrationModifiers as Modifiers
from snuba.migrations.operations import OperationTarget, SqlOperation
from snuba.utils.schemas import Float

storage_set_name = StorageSetKey.FUNCTIONS_SUMMARIES
local_table_name = "functions_summaries_local"
dist_table_name = "functions_summaries_dist"

columns: List[Column[Modifiers]] = [
Column("project_id", UInt(64)),
# profile_id is nullable since this will only be used by transaction-based profiling
Column("profile_id", UUID(Modifiers(nullable=True, low_cardinality=False))),
# profiler_id is nullable since this will only be used by continuous profiling
Column("profiler_id", UUID(Modifiers(nullable=True, low_cardinality=False))),
# transaction_name is nullable since this will only be used by transaction-based profiling
Column("transaction_name", String(Modifiers(nullable=True, low_cardinality=True))),
Column("thread_id", String()),
Column("min", Float(64)),
Column("max", Float(64)),
Column("sum", Float(64)),
Column("count", UInt(64)),
Column("end_timestamp", DateTime()),
Column("platform", String(Modifiers(low_cardinality=True))),
Column("environment", String(Modifiers(nullable=True, low_cardinality=True))),
Column("release", String(Modifiers(nullable=True, low_cardinality=True))),
# snuba internals
Column("retention_days", UInt(16)),
]


class Migration(migration.ClickhouseNodeMigration):
blocking = False

def forwards_ops(self) -> Sequence[SqlOperation]:
return [
operations.CreateTable(
storage_set=storage_set_name,
table_name=local_table_name,
columns=columns,
engine=table_engines.MergeTree(
order_by="(project_id, end_timestamp)",
partition_by="(retention_days, toMonday(end_timestamp))",
settings={"index_granularity": "8192"},
storage_set=storage_set_name,
ttl="end_timestamp + toIntervalDay(retention_days)",
),
target=OperationTarget.LOCAL,
),
operations.CreateTable(
storage_set=storage_set_name,
table_name=dist_table_name,
columns=columns,
engine=table_engines.Distributed(
local_table_name=local_table_name,
sharding_key=None,
),
target=OperationTarget.DISTRIBUTED,
),
]

def backwards_ops(self) -> Sequence[SqlOperation]:
return [
operations.DropTable(
storage_set=storage_set_name,
table_name=local_table_name,
target=OperationTarget.LOCAL,
),
operations.DropTable(
storage_set=storage_set_name,
table_name=dist_table_name,
target=OperationTarget.DISTRIBUTED,
),
]

0 comments on commit a578ab3

Please sign in to comment.