Revert "Revert "feat: Remove query splitters from the API (#5571)""

This reverts commit ab239e8.
getsentry · Feb 22, 2024 · 3c3aaac · 3c3aaac
1 parent ab239e8
commit 3c3aaac
Show file tree

Hide file tree

Showing 18 changed files with 54 additions and 1,077 deletions.
diff --git a/Makefile b/Makefile
@@ -42,7 +42,7 @@ api-tests:
 	SNUBA_SETTINGS=test pytest -vv tests/*_api.py
 
 backend-typing:
-	mypy snuba tests scripts --strict --config-file mypy.ini --exclude 'tests/datasets|tests/query|tests/test_split.py'
+	mypy snuba tests scripts --strict --config-file mypy.ini --exclude 'tests/datasets|tests/query'
 
 install-python-dependencies:
 	pip uninstall -qqy uwsgi  # pip doesn't do well with swapping drop-ins

diff --git a/docs/source/architecture/queryprocessing.rst b/docs/source/architecture/queryprocessing.rst
@@ -122,24 +122,6 @@ finds equality conditions on tags and replace them with the equivalent condition
 on a tags hashmap (where we have a bloom filter index) making the filtering
 operation faster.
 
-Query Splitter
---------------
-
-Some queries can be executed in an optimized way by splitting them into multiple
-individual Clickhouse queries and by assembling the results of each one of them.
-
-Two examples are time splitting and column splitting. Both hare `in this file <https://github.com/getsentry/snuba/blob/master/snuba/web/split.py>`_.
-
-Time splitting splits a query (that does not contain aggregations and is properly
-sorted) into multiple ones over a variable time range that increases in size
-progressively and executes them in sequence stopping as soon we have enough
-results.
-
-Column splitting splits filtering and column fetching. It executes the filtering
-part of the query on a minimal number of columns so Clickhouse loads fewer columns,
-then, through a second query, fetches the missing columns only for the rows
-filtered by the first query.
-
 Query Formatter
 ---------------
 

diff --git a/snuba/datasets/configuration/discover/storages/discover.yaml b/snuba/datasets/configuration/discover/storages/discover.yaml
@@ -163,12 +163,3 @@ allocation_policies:
       default_config_overrides:
         is_enforced: 0
         is_active: 0
-query_splitters:
-  - splitter: ColumnSplitQueryStrategy
-    args:
-      id_column: event_id
-      project_column: project_id
-      timestamp_column: timestamp
-  - splitter: TimeSplitQueryStrategy
-    args:
-      timestamp_col: timestamp
diff --git a/snuba/datasets/configuration/events/storages/errors.yaml b/snuba/datasets/configuration/events/storages/errors.yaml
@@ -323,15 +323,6 @@ query_processors:
         - environment
         - project_id
   - processor: TableRateLimit
-query_splitters:
-  - splitter: ColumnSplitQueryStrategy
-    args:
-      id_column: event_id
-      project_column: project_id
-      timestamp_column: timestamp
-  - splitter: TimeSplitQueryStrategy
-    args:
-      timestamp_col: timestamp
 mandatory_condition_checkers:
   - condition: ProjectIdEnforcer
 replacer_processor:

diff --git a/snuba/datasets/configuration/events/storages/errors_ro.yaml b/snuba/datasets/configuration/events/storages/errors_ro.yaml
@@ -322,12 +322,3 @@ query_processors:
         - environment
         - project_id
   - processor: TableRateLimit
-query_splitters:
-  - splitter: ColumnSplitQueryStrategy
-    args:
-      id_column: event_id
-      project_column: project_id
-      timestamp_column: timestamp
-  - splitter: TimeSplitQueryStrategy
-    args:
-      timestamp_col: timestamp
diff --git a/snuba/datasets/configuration/json_schema.py b/snuba/datasets/configuration/json_schema.py
@@ -87,6 +87,7 @@ def string_with_description(description: str) -> dict[str, str]:
     "description": "The stream loader for a writing to ClickHouse. This provides what is needed to start a Kafka consumer and fill in the ClickHouse table.",
 }
 
+
 ######
 # Column specific json schemas
 def make_column_schema(
@@ -332,11 +333,6 @@ def registered_class_array_schema(
     "QueryProcessor",
     "Name of ClickhouseQueryProcessor class config key. Responsible for the transformation applied to a query.",
 )
-STORAGE_QUERY_SPLITTERS_SCHEMA = registered_class_array_schema(
-    "splitter",
-    "QuerySplitStrategy",
-    "Name of QuerySplitStrategy class config key. Responsible for splitting a query into two at runtime and combining the results.",
-)
 STORAGE_MANDATORY_CONDITION_CHECKERS_SCHEMA = registered_class_array_schema(
     "condition",
     "ConditionChecker",
@@ -542,7 +538,6 @@ def registered_class_array_schema(
         "readiness_state": READINESS_STATE_SCHEMA,
         "schema": SCHEMA_SCHEMA,
         "query_processors": STORAGE_QUERY_PROCESSORS_SCHEMA,
-        "query_splitters": STORAGE_QUERY_SPLITTERS_SCHEMA,
         "mandatory_condition_checkers": STORAGE_MANDATORY_CONDITION_CHECKERS_SCHEMA,
         "allocation_policies": STORAGE_ALLOCATION_POLICIES_SCHEMA,
     },
@@ -569,7 +564,6 @@ def registered_class_array_schema(
         "schema": SCHEMA_SCHEMA,
         "stream_loader": STREAM_LOADER_SCHEMA,
         "query_processors": STORAGE_QUERY_PROCESSORS_SCHEMA,
-        "query_splitters": STORAGE_QUERY_SPLITTERS_SCHEMA,
         "mandatory_condition_checkers": STORAGE_MANDATORY_CONDITION_CHECKERS_SCHEMA,
         "allocation_policies": STORAGE_ALLOCATION_POLICIES_SCHEMA,
         "replacer_processor": STORAGE_REPLACER_PROCESSOR_SCHEMA,
@@ -607,7 +601,6 @@ def registered_class_array_schema(
         "postgres_table": TYPE_STRING,
         "row_processor": CDC_STORAGE_ROW_PROCESSOR_SCHEMA,
         "query_processors": STORAGE_QUERY_PROCESSORS_SCHEMA,
-        "query_splitters": STORAGE_QUERY_SPLITTERS_SCHEMA,
         "mandatory_condition_checkers": STORAGE_MANDATORY_CONDITION_CHECKERS_SCHEMA,
         "allocation_policies": STORAGE_ALLOCATION_POLICIES_SCHEMA,
         "replacer_processor": STORAGE_REPLACER_PROCESSOR_SCHEMA,

diff --git a/snuba/datasets/configuration/spans/storages/metrics_summaries.yaml b/snuba/datasets/configuration/spans/storages/metrics_summaries.yaml
@@ -97,11 +97,6 @@ query_processors:
   - processor: TableRateLimit
   - processor: TupleUnaliaser
 
-query_splitters:
-  - splitter: TimeSplitQueryStrategy
-    args:
-      timestamp_col: end_timestamp
-
 mandatory_condition_checkers:
   - condition: ProjectIdEnforcer
 

diff --git a/snuba/datasets/configuration/spans/storages/spans.yaml b/snuba/datasets/configuration/spans/storages/spans.yaml
@@ -169,11 +169,6 @@ query_processors:
   - processor: TableRateLimit
   - processor: TupleUnaliaser
 
-query_splitters:
-  - splitter: TimeSplitQueryStrategy
-    args:
-      timestamp_col: end_timestamp
-
 mandatory_condition_checkers:
   - condition: ProjectIdEnforcer
 

diff --git a/snuba/datasets/configuration/storage_builder.py b/snuba/datasets/configuration/storage_builder.py
@@ -11,7 +11,6 @@
 from snuba.datasets.configuration.utils import (
     get_mandatory_condition_checkers,
     get_query_processors,
-    get_query_splitters,
     parse_columns,
 )
 from snuba.datasets.message_filters import StreamMessageFilter
@@ -43,7 +42,6 @@
 STREAM_LOADER = "stream_loader"
 PRE_FILTER = "pre_filter"
 QUERY_PROCESSORS = "query_processors"
-QUERY_SPLITTERS = "query_splitters"
 MANDATORY_CONDITION_CHECKERS = "mandatory_condition_checkers"
 WRITER_OPTIONS = "writer_options"
 SUBCRIPTION_SCHEDULER_MODE = "subscription_scheduler_mode"
@@ -79,9 +77,6 @@ def __build_readable_storage_kwargs(config: dict[str, Any]) -> dict[str, Any]:
         QUERY_PROCESSORS: get_query_processors(
             config[QUERY_PROCESSORS] if QUERY_PROCESSORS in config else []
         ),
-        QUERY_SPLITTERS: get_query_splitters(
-            config[QUERY_SPLITTERS] if QUERY_SPLITTERS in config else []
-        ),
         MANDATORY_CONDITION_CHECKERS: get_mandatory_condition_checkers(
             config[MANDATORY_CONDITION_CHECKERS]
             if MANDATORY_CONDITION_CHECKERS in config

diff --git a/snuba/datasets/configuration/transactions/storages/transactions.yaml b/snuba/datasets/configuration/transactions/storages/transactions.yaml
@@ -248,11 +248,6 @@ query_processors:
   - processor: TableRateLimit
   - processor: TupleUnaliaser
 
-query_splitters:
-  - splitter: TimeSplitQueryStrategy
-    args:
-      timestamp_col: finish_ts
-
 mandatory_condition_checkers:
   - condition: ProjectIdEnforcer
 

diff --git a/snuba/datasets/configuration/utils.py b/snuba/datasets/configuration/utils.py
@@ -13,7 +13,6 @@
     String,
     UInt,
 )
-from snuba.datasets.plans.splitters import QuerySplitStrategy
 from snuba.query.processors.condition_checkers import ConditionChecker
 from snuba.query.processors.physical import ClickhouseQueryProcessor
 from snuba.utils.schemas import (
@@ -31,11 +30,6 @@ class QueryProcessorDefinition(TypedDict):
     args: dict[str, Any]
 
 
-class QuerySplitterDefinition(TypedDict):
-    splitter: str
-    args: dict[str, Any]
-
-
 class MandatoryConditionCheckerDefinition(TypedDict):
     condition: str
     args: dict[str, Any]
@@ -52,17 +46,6 @@ def get_query_processors(
     ]
 
 
-def get_query_splitters(
-    query_splitter_objects: list[QuerySplitterDefinition],
-) -> list[QuerySplitStrategy]:
-    return [
-        QuerySplitStrategy.get_from_name(qs["splitter"]).from_kwargs(
-            **qs.get("args", {})
-        )
-        for qs in query_splitter_objects
-    ]
-
-
 def get_mandatory_condition_checkers(
     mandatory_condition_checkers_objects: list[MandatoryConditionCheckerDefinition],
 ) -> list[ConditionChecker]:

diff --git a/snuba/datasets/plans/splitters/__init__.py b/snuba/datasets/plans/splitters/__init__.py