Skip to content

Commit

Permalink
fix(eap-spans): Drop ineffective indices to speed up insertion (#6206)
Browse files Browse the repository at this point in the history
  • Loading branch information
phacops committed Aug 14, 2024
1 parent 88a31de commit e13eb69
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 0 deletions.
27 changes: 27 additions & 0 deletions snuba/migrations/operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,33 @@ def format_sql(self) -> str:
)


class DropIndices(SqlOperation):
"""
Drops many indices.
Only works with the MergeTree family of tables.
In ClickHouse versions prior to 20.1.2.4, this requires setting
allow_experimental_data_skipping_indices = 1
"""

def __init__(
self,
storage_set: StorageSetKey,
table_name: str,
indices: Sequence[str],
target: OperationTarget = OperationTarget.UNSET,
):
super().__init__(storage_set, target=target)
self.__table_name = table_name
self.__indices = indices

def format_sql(self) -> str:
statements = [f"DROP INDEX IF EXISTS {idx}" for idx in self.__indices]

return f"ALTER TABLE {self.__table_name} {', '.join(statements)};"


class InsertIntoSelect(SqlOperation):
"""
Inserts the results of a select query. Source and destination tables must be
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from typing import Sequence

from snuba.clusters.storage_sets import StorageSetKey
from snuba.migrations import migration, operations
from snuba.migrations.operations import AddIndicesData, OperationTarget, SqlOperation

storage_set_name = StorageSetKey.EVENTS_ANALYTICS_PLATFORM
local_table_name = "eap_spans_local"
num_attr_buckets = 20

indices: Sequence[AddIndicesData] = [
AddIndicesData(
name=f"bf_attr_str_{i}",
expression=f"mapKeys(attr_str_{i})",
type="bloom_filter",
granularity=1,
)
for i in range(num_attr_buckets)
] + [
AddIndicesData(
name=f"bf_attr_num_{i}",
expression=f"mapKeys(attr_num_{i})",
type="bloom_filter",
granularity=1,
)
for i in range(num_attr_buckets)
]


class Migration(migration.ClickhouseNodeMigration):
blocking = False

def forwards_ops(self) -> Sequence[SqlOperation]:
return [
operations.DropIndices(
storage_set=storage_set_name,
table_name=local_table_name,
indices=[idx.name for idx in indices],
target=OperationTarget.LOCAL,
),
operations.DropIndex(
storage_set=StorageSetKey.EVENTS_ANALYTICS_PLATFORM,
table_name="eap_spans_local",
index_name="bf_project_id",
target=operations.OperationTarget.LOCAL,
),
]

def backwards_ops(self) -> Sequence[SqlOperation]:
return [
operations.AddIndices(
storage_set=storage_set_name,
table_name=local_table_name,
indices=indices,
target=OperationTarget.LOCAL,
),
operations.AddIndex(
storage_set=StorageSetKey.EVENTS_ANALYTICS_PLATFORM,
table_name="eap_spans_local",
index_name="bf_project_id",
index_expression="project_id",
index_type="bloom_filter",
granularity=1,
target=operations.OperationTarget.LOCAL,
),
]

0 comments on commit e13eb69

Please sign in to comment.