diff --git a/snuba/admin/production_queries/prod_queries.py b/snuba/admin/production_queries/prod_queries.py index f21f50f88a..48158d8afc 100644 --- a/snuba/admin/production_queries/prod_queries.py +++ b/snuba/admin/production_queries/prod_queries.py @@ -44,7 +44,7 @@ def _validate_projects_in_query(body: Dict[str, Any], dataset: Dataset) -> None: return request_parts = RequestSchema.build(HTTPQuerySettings).validate(body) - query = parse_snql_query(request_parts.query["query"], dataset)[0] + query = parse_snql_query(request_parts.query["query"], dataset) project_ids = get_object_ids_in_query_ast(query, "project_id") if project_ids == set(): raise InvalidQueryException("Missing project ID") diff --git a/snuba/query/mql/parser.py b/snuba/query/mql/parser.py index 6a002becc1..b5314a07c7 100644 --- a/snuba/query/mql/parser.py +++ b/snuba/query/mql/parser.py @@ -40,7 +40,6 @@ FilterInSelectOptimizer, ) from snuba.query.query_settings import HTTPQuerySettings, QuerySettings -from snuba.query.snql.anonymize import format_snql_anonymized from snuba.query.snql.parser import ( MAX_LIMIT, POST_PROCESSORS, @@ -1076,7 +1075,7 @@ def parse_mql_query( dataset: Dataset, custom_processing: Optional[CustomProcessors] = None, settings: QuerySettings | None = None, -) -> Tuple[Union[CompositeQuery[QueryEntity], LogicalQuery], str]: +) -> Union[CompositeQuery[QueryEntity], LogicalQuery]: with sentry_sdk.start_span(op="parser", description="parse_mql_query_initial"): query = parse_mql_query_body(body, dataset) with sentry_sdk.start_span( @@ -1095,10 +1094,6 @@ def parse_mql_query( with sentry_sdk.start_span(op="processor", description="treeify_conditions"): _post_process(query, [_treeify_or_and_conditions], settings) - # TODO: Figure out what to put for the anonymized string - with sentry_sdk.start_span(op="parser", description="anonymize_snql_query"): - snql_anonymized = format_snql_anonymized(query).get_sql() - with sentry_sdk.start_span(op="processor", description="post_processors"): _post_process( query, @@ -1126,4 +1121,4 @@ def parse_mql_query( with sentry_sdk.start_span(op="validate", description="expression_validators"): _post_process(query, VALIDATORS) - return query, snql_anonymized + return query diff --git a/snuba/query/parser/exceptions.py b/snuba/query/parser/exceptions.py index d7cb004627..6b55df87e7 100644 --- a/snuba/query/parser/exceptions.py +++ b/snuba/query/parser/exceptions.py @@ -25,9 +25,7 @@ class PostProcessingError(Exception): def __init__( self, query: Query | CompositeQuery[Entity], - snql_anonymized: str, message: str | None = None, ): super().__init__(message) self.query = query - self.snql_anonymized = snql_anonymized diff --git a/snuba/query/snql/anonymize.py b/snuba/query/snql/anonymize.py deleted file mode 100644 index c9d9a91355..0000000000 --- a/snuba/query/snql/anonymize.py +++ /dev/null @@ -1,124 +0,0 @@ -from typing import Optional, Sequence, Type, Union - -from snuba.clickhouse.formatter.expression import ExpressionFormatterAnonymized -from snuba.clickhouse.formatter.nodes import ( - FormattedNode, - FormattedQuery, - FormattedSubQuery, - PaddingNode, - StringNode, -) -from snuba.clickhouse.formatter.query import ( - _build_optional_string_node, - _format_arrayjoin, - _format_groupby, - _format_limit, - _format_limitby, - _format_orderby, - _format_select, -) -from snuba.query import ProcessableQuery -from snuba.query import Query as AbstractQuery -from snuba.query.composite import CompositeQuery -from snuba.query.data_source.join import IndividualNode, JoinClause, JoinVisitor -from snuba.query.data_source.simple import Entity -from snuba.query.data_source.visitor import DataSourceVisitor -from snuba.query.expressions import ExpressionVisitor -from snuba.query.logical import Query -from snuba.query.logical import Query as LogicalQuery - - -def format_snql_anonymized( - query: Union[LogicalQuery, CompositeQuery[Entity]] -) -> FormattedQuery: - - return FormattedQuery(_format_query_content(query, ExpressionFormatterAnonymized)) - - -class StringQueryFormatter( - DataSourceVisitor[FormattedNode, Entity], JoinVisitor[FormattedNode, Entity] -): - def __init__(self, expression_formatter_type: Type[ExpressionVisitor[str]]): - self.__expression_formatter_type = expression_formatter_type - - def _visit_simple_source(self, data_source: Entity) -> StringNode: - sample_val = data_source.sample - sample_str = f" SAMPLE {sample_val}" if sample_val is not None else "" - return StringNode(f"{data_source.human_readable_id}{sample_str}") - - def _visit_join(self, data_source: JoinClause[Entity]) -> StringNode: - return self.visit_join_clause(data_source) - - def _visit_simple_query( - self, data_source: ProcessableQuery[Entity] - ) -> FormattedSubQuery: - assert isinstance(data_source, LogicalQuery) - return self._visit_query(data_source) - - def _visit_composite_query( - self, data_source: CompositeQuery[Entity] - ) -> FormattedSubQuery: - return self._visit_query(data_source) - - def _visit_query( - self, data_source: Union[Query, CompositeQuery[Entity]] - ) -> FormattedSubQuery: - return FormattedSubQuery( - _format_query_content(data_source, self.__expression_formatter_type) - ) - - def visit_individual_node(self, node: IndividualNode[Entity]) -> StringNode: - return StringNode(f"{node.alias}, {self.visit(node.data_source)}") - - def visit_join_clause(self, node: JoinClause[Entity]) -> StringNode: - left = f"LEFT {node.left_node.accept(self)}" - type = f"TYPE {node.join_type}" - right = f"RIGHT {node.right_node.accept(self)}\n" - on = "".join( - [ - f"{c.left.table_alias}.{c.left.column} {c.right.table_alias}.{c.right.column}" - for c in node.keys - ] - ) - - return StringNode(f"{left} {type} {right} ON {on}") - - -def _format_query_content( - query: Union[LogicalQuery, CompositeQuery[Entity]], - expression_formatter_type: Type[ExpressionVisitor[str]], -) -> Sequence[FormattedNode]: - formatter = expression_formatter_type() - - return [ - v - for v in [ - PaddingNode( - "MATCH", - StringQueryFormatter(expression_formatter_type).visit( - query.get_from_clause() - ), - ), - _format_select(query, formatter), - _format_groupby(query, formatter), - _format_arrayjoin(query, formatter), - _build_optional_string_node("WHERE", query.get_condition(), formatter), - _build_optional_string_node("HAVING", query.get_having(), formatter), - _format_orderby(query, formatter), - _format_limitby(query, formatter), - _format_limit(query, formatter), - _format_granularity(query, formatter), - ] - if v is not None - ] - - -def _format_granularity( - query: AbstractQuery, formatter: ExpressionVisitor[str] -) -> Optional[StringNode]: - ast_granularity = query.get_granularity() - return ( - StringNode(f"GRANULARITY {ast_granularity}") - if ast_granularity is not None - else None - ) diff --git a/snuba/query/snql/parser.py b/snuba/query/snql/parser.py index 0ae88cf5a3..956ea6707e 100644 --- a/snuba/query/snql/parser.py +++ b/snuba/query/snql/parser.py @@ -74,7 +74,6 @@ from snuba.query.parser.exceptions import ParsingException, PostProcessingError from snuba.query.query_settings import QuerySettings from snuba.query.schema import POSITIVE_OPERATORS -from snuba.query.snql.anonymize import format_snql_anonymized from snuba.query.snql.discover_entity_selection import select_discover_entity from snuba.query.snql.expression_visitor import ( HighPriArithmetic, @@ -1503,10 +1502,9 @@ def parse_snql_query( dataset: Dataset, custom_processing: Optional[CustomProcessors] = None, settings: QuerySettings | None = None, -) -> Tuple[Union[CompositeQuery[QueryEntity], LogicalQuery], str]: +) -> Union[CompositeQuery[QueryEntity], LogicalQuery]: with sentry_sdk.start_span(op="parser", description="parse_snql_query_initial"): query = parse_snql_query_initial(body) - snql_anonymized = "" if settings and settings.get_dry_run(): explain_meta.set_original_ast(str(query)) @@ -1518,9 +1516,6 @@ def parse_snql_query( with sentry_sdk.start_span(op="processor", description="treeify_conditions"): _post_process(query, [_treeify_or_and_conditions], settings) - with sentry_sdk.start_span(op="parser", description="anonymize_snql_query"): - snql_anonymized = format_snql_anonymized(query).get_sql() - with sentry_sdk.start_span(op="processor", description="post_processors"): _post_process( query, @@ -1544,8 +1539,8 @@ def parse_snql_query( # Validating with sentry_sdk.start_span(op="validate", description="expression_validators"): _post_process(query, VALIDATORS) - return query, snql_anonymized + return query except InvalidQueryException: raise except Exception: - raise PostProcessingError(query, snql_anonymized) + raise PostProcessingError(query) diff --git a/snuba/querylog/query_metadata.py b/snuba/querylog/query_metadata.py index 597f9021a3..72ff1fcd4e 100644 --- a/snuba/querylog/query_metadata.py +++ b/snuba/querylog/query_metadata.py @@ -278,7 +278,7 @@ def __init__( self.entity = entity_name self.query_list: MutableSequence[ClickhouseQueryMetadata] = [] self.projects = ProjectsFinder().visit(request.query) - self.snql_anonymized = request.snql_anonymized + self.snql_anonymized = "" else: self.start_timestamp = start_timestamp self.end_timestamp = end_timestamp diff --git a/snuba/request/__init__.py b/snuba/request/__init__.py index 1407313822..df18e0c52f 100644 --- a/snuba/request/__init__.py +++ b/snuba/request/__init__.py @@ -18,9 +18,6 @@ class Request: query_settings: QuerySettings attribution_info: AttributionInfo - # TODO: This should maybe not live on the request - snql_anonymized: str - @property def referrer(self) -> str: return self.attribution_info.referrer diff --git a/snuba/request/validation.py b/snuba/request/validation.py index f651d87dfb..76bdd87424 100644 --- a/snuba/request/validation.py +++ b/snuba/request/validation.py @@ -3,7 +3,7 @@ import random import textwrap import uuid -from typing import Any, Dict, MutableMapping, Optional, Protocol, Tuple, Type, Union +from typing import Any, Dict, MutableMapping, Optional, Protocol, Type, Union import sentry_sdk @@ -44,7 +44,7 @@ def __call__( settings: QuerySettings, dataset: Dataset, custom_processing: Optional[CustomProcessors] = ..., - ) -> Tuple[Union[Query, CompositeQuery[Entity]], str]: + ) -> Union[Query, CompositeQuery[Entity]]: ... @@ -53,7 +53,7 @@ def parse_snql_query( settings: QuerySettings, dataset: Dataset, custom_processing: Optional[CustomProcessors] = None, -) -> Tuple[Union[Query, CompositeQuery[Entity]], str]: +) -> Union[Query, CompositeQuery[Entity]]: return _parse_snql_query( request_parts.query["query"], dataset, custom_processing, settings ) @@ -64,7 +64,7 @@ def parse_mql_query( settings: QuerySettings, dataset: Dataset, custom_processing: Optional[CustomProcessors] = None, -) -> Tuple[Union[Query, CompositeQuery[Entity]], str]: +) -> Union[Query, CompositeQuery[Entity]]: return _parse_mql_query( request_parts.query["query"], request_parts.query["mql_context"], @@ -121,14 +121,11 @@ def build_request( referrer = _get_referrer(request_parts, referrer) settings_obj = _get_settings_object(settings_class, request_parts, referrer) try: - query, snql_anonymized = parser( - request_parts, settings_obj, dataset, custom_processing - ) + query = parser(request_parts, settings_obj, dataset, custom_processing) except PostProcessingError as exception: query = exception.query - snql_anonymized = exception.snql_anonymized request = _build_request( - body, request_parts, referrer, settings_obj, query, snql_anonymized + body, request_parts, referrer, settings_obj, query ) query_metadata = SnubaQueryMetadata( request, get_dataset_name(dataset), timer @@ -136,9 +133,7 @@ def build_request( state.record_query(query_metadata.to_dict()) raise - request = _build_request( - body, request_parts, referrer, settings_obj, query, snql_anonymized - ) + request = _build_request(body, request_parts, referrer, settings_obj, query) except (InvalidJsonRequestException, InvalidQueryException) as exception: request_status = get_request_status(exception) record_invalid_request( @@ -239,7 +234,6 @@ def _build_request( referrer: str, settings: QuerySettings, query: Query | CompositeQuery[Entity], - snql_anonymized: str, ) -> Request: org_ids = get_object_ids_in_query_ast(query, "org_id") if org_ids is not None and len(org_ids) == 1: @@ -257,5 +251,4 @@ def _build_request( query=query, attribution_info=attribution_info, query_settings=settings, - snql_anonymized=snql_anonymized, ) diff --git a/tests/clickhouse/query_dsl/test_project_id.py b/tests/clickhouse/query_dsl/test_project_id.py index cf285baf10..b206ba2f97 100644 --- a/tests/clickhouse/query_dsl/test_project_id.py +++ b/tests/clickhouse/query_dsl/test_project_id.py @@ -181,14 +181,14 @@ def test_find_projects( with pytest.raises(ValidationException): request = json_to_snql(query_body, "events") request.validate() - query, _ = parse_snql_query(str(request.query), events) + query = parse_snql_query(str(request.query), events) assert isinstance(query, Query) run_entity_validators(query, HTTPQuerySettings()) identity_translate(query) else: request = json_to_snql(query_body, "events") request.validate() - query, _ = parse_snql_query(str(request.query), events) + query = parse_snql_query(str(request.query), events) assert isinstance(query, Query) run_entity_validators(query, HTTPQuerySettings()) translated_query = identity_translate(query) diff --git a/tests/clickhouse/query_dsl/test_time_range.py b/tests/clickhouse/query_dsl/test_time_range.py index d369ef9256..7d1ce26705 100644 --- a/tests/clickhouse/query_dsl/test_time_range.py +++ b/tests/clickhouse/query_dsl/test_time_range.py @@ -30,7 +30,7 @@ def test_get_time_range() -> None: events = get_dataset("events") entity = get_entity(EntityKey.EVENTS) - query, _ = parse_snql_query(body, events) + query = parse_snql_query(body, events) assert isinstance(query, Query) processors = entity.get_query_processors() for processor in processors: diff --git a/tests/datasets/entities/storage_selectors/test_errors.py b/tests/datasets/entities/storage_selectors/test_errors.py index 701b018a1e..1b86e25d73 100644 --- a/tests/datasets/entities/storage_selectors/test_errors.py +++ b/tests/datasets/entities/storage_selectors/test_errors.py @@ -115,7 +115,7 @@ def test_query_storage_selector( use_readable: bool, expected_storage: Storage, ) -> None: - query, _ = parse_snql_query(str(snql_query), dataset) + query = parse_snql_query(str(snql_query), dataset) assert isinstance(query, Query) if use_readable: @@ -127,7 +127,7 @@ def test_query_storage_selector( def test_assert_raises() -> None: - query, _ = parse_snql_query( + query = parse_snql_query( """ MATCH (events) SELECT event_id diff --git a/tests/datasets/entities/storage_selectors/test_selector.py b/tests/datasets/entities/storage_selectors/test_selector.py index 068402abfa..287004dddb 100644 --- a/tests/datasets/entities/storage_selectors/test_selector.py +++ b/tests/datasets/entities/storage_selectors/test_selector.py @@ -66,7 +66,7 @@ def test_default_query_storage_selector( selector: QueryStorageSelector, expected_storage: Storage, ) -> None: - query, _ = parse_snql_query(str(snql_query), dataset) + query = parse_snql_query(str(snql_query), dataset) assert isinstance(query, Query) selected_storage = selector.select_storage( @@ -76,7 +76,7 @@ def test_default_query_storage_selector( def test_assert_raises() -> None: - query, _ = parse_snql_query( + query = parse_snql_query( """ MATCH (generic_metrics_sets) SELECT uniq(value) AS unique_values BY project_id, org_id WHERE org_id = 1 diff --git a/tests/datasets/entities/storage_selectors/test_sessions.py b/tests/datasets/entities/storage_selectors/test_sessions.py index 48ff840ab8..0b721b44b5 100644 --- a/tests/datasets/entities/storage_selectors/test_sessions.py +++ b/tests/datasets/entities/storage_selectors/test_sessions.py @@ -70,7 +70,7 @@ def test_query_storage_selector( selector: QueryStorageSelector, expected_storage: Storage, ) -> None: - query, _ = parse_snql_query(str(snql_query), dataset) + query = parse_snql_query(str(snql_query), dataset) assert isinstance(query, Query) selected_storage = selector.select_storage( query, SubscriptionQuerySettings(), storage_connections @@ -79,7 +79,7 @@ def test_query_storage_selector( def test_assert_raises() -> None: - query, _ = parse_snql_query( + query = parse_snql_query( """ MATCH (events) SELECT event_id diff --git a/tests/datasets/entities/test_pluggable_entity.py b/tests/datasets/entities/test_pluggable_entity.py index 1d7f578fa7..76835b3af8 100644 --- a/tests/datasets/entities/test_pluggable_entity.py +++ b/tests/datasets/entities/test_pluggable_entity.py @@ -102,14 +102,11 @@ def pluggable_sets_entity() -> PluggableEntity: def build_request(query_body: Mapping[str, str]) -> Request: generic_metrics_dataset = get_dataset("generic_metrics") - query, snql_anonymized = parse_snql_query( - query_body["query"], generic_metrics_dataset - ) + query = parse_snql_query(query_body["query"], generic_metrics_dataset) request = Request( id="", original_body=query_body, query=query, - snql_anonymized=snql_anonymized, query_settings=HTTPQuerySettings(referrer=""), attribution_info=AttributionInfo( get_app_id("blah"), {"tenant_type": "tenant_id"}, "blah", None, None, None diff --git a/tests/datasets/test_discover.py b/tests/datasets/test_discover.py index bda40f8685..b36eef291d 100644 --- a/tests/datasets/test_discover.py +++ b/tests/datasets/test_discover.py @@ -172,7 +172,7 @@ def test_data_source( request = json_to_snql(query_body, "discover") request.validate() - query, _ = parse_snql_query(str(request.query), dataset) + query = parse_snql_query(str(request.query), dataset) entity = query.get_from_clause() assert isinstance(entity, EntitySource) assert entity.key == expected_entity diff --git a/tests/datasets/test_events_processing.py b/tests/datasets/test_events_processing.py index 7775f0487b..6881aecac9 100644 --- a/tests/datasets/test_events_processing.py +++ b/tests/datasets/test_events_processing.py @@ -31,12 +31,11 @@ def test_events_processing() -> None: events_dataset = get_dataset("events") - query, snql_anonymized = parse_snql_query(query_body["query"], events_dataset) + query = parse_snql_query(query_body["query"], events_dataset) request = Request( id="", original_body=query_body, query=query, - snql_anonymized=snql_anonymized, query_settings=HTTPQuerySettings(referrer=""), attribution_info=AttributionInfo( get_app_id("blah"), {"tenant_type": "tenant_id"}, "blah", None, None, None diff --git a/tests/datasets/test_group_attributes_join.py b/tests/datasets/test_group_attributes_join.py index 57015cf735..a887f5f6bd 100644 --- a/tests/datasets/test_group_attributes_join.py +++ b/tests/datasets/test_group_attributes_join.py @@ -117,7 +117,7 @@ def query_events_joined_group_attributes(self): "ntime": self.next_time, } - query, _ = parse_snql_query(str(query_body), get_dataset("events")) + query = parse_snql_query(str(query_body), get_dataset("events")) attribution_info = AttributionInfo( app_id=AppID(key=""), tenant_ids={}, @@ -133,7 +133,6 @@ def query_events_joined_group_attributes(self): query=query, query_settings=settings, attribution_info=attribution_info, - snql_anonymized="", ) pipeline_result = EntityProcessingStage().execute( @@ -351,7 +350,7 @@ def assert_joined_final( {"stats": {}, "sql": "", "experiments": {}}, ) - query, _ = parse_snql_query(str(query_body), get_dataset("search_issues")) + query = parse_snql_query(str(query_body), get_dataset("search_issues")) attribution_info = AttributionInfo( app_id=AppID(key=""), tenant_ids={}, @@ -367,7 +366,6 @@ def assert_joined_final( query=query, query_settings=settings, attribution_info=attribution_info, - snql_anonymized="", ) pipeline_result = EntityProcessingStage().execute( diff --git a/tests/datasets/test_metrics_processing.py b/tests/datasets/test_metrics_processing.py index 4ff24b2904..19b281657f 100644 --- a/tests/datasets/test_metrics_processing.py +++ b/tests/datasets/test_metrics_processing.py @@ -236,13 +236,12 @@ def test_metrics_processing( } metrics_dataset = get_dataset("metrics") - query, snql_anonymized = parse_snql_query(query_body["query"], metrics_dataset) + query = parse_snql_query(query_body["query"], metrics_dataset) request = Request( id="", original_body=query_body, query=query, - snql_anonymized="", query_settings=HTTPQuerySettings(referrer=""), attribution_info=AttributionInfo( get_app_id("blah"), {"tenant_type": "tenant_id"}, "blah", None, None, None diff --git a/tests/datasets/test_querylog_processor.py b/tests/datasets/test_querylog_processor.py index 6837c164b5..0b073c55ac 100644 --- a/tests/datasets/test_querylog_processor.py +++ b/tests/datasets/test_querylog_processor.py @@ -44,7 +44,6 @@ def test_simple() -> None: id=uuid.UUID("a" * 32).hex, original_body=request_body, query=query, - snql_anonymized="", query_settings=HTTPQuerySettings(referrer="search"), attribution_info=AttributionInfo( get_app_id("default"), @@ -97,7 +96,7 @@ def test_simple() -> None: ) ], projects={2}, - snql_anonymized=request.snql_anonymized, + snql_anonymized="", entity=EntityKey.EVENTS.value, ).to_dict() diff --git a/tests/datasets/test_search_issues_processor.py b/tests/datasets/test_search_issues_processor.py index e2a36ce4db..63cf061449 100644 --- a/tests/datasets/test_search_issues_processor.py +++ b/tests/datasets/test_search_issues_processor.py @@ -480,6 +480,6 @@ def test_data_source( # request = json_to_snql(query_body, "search_issues") request.validate() - query, _ = parse_snql_query(str(request.query), dataset) + query = parse_snql_query(str(request.query), dataset) assert query.get_from_clause().key == EntityKey.SEARCH_ISSUES diff --git a/tests/datasets/test_sessions_processing.py b/tests/datasets/test_sessions_processing.py deleted file mode 100644 index 80085ad015..0000000000 --- a/tests/datasets/test_sessions_processing.py +++ /dev/null @@ -1,238 +0,0 @@ -from typing import Any, MutableMapping - -import pytest -from snuba_sdk.legacy import json_to_snql - -from snuba.attribution import get_app_id -from snuba.attribution.attribution_info import AttributionInfo -from snuba.datasets.factory import get_dataset -from snuba.datasets.schemas.tables import TableSchema -from snuba.datasets.storages.factory import get_storage -from snuba.datasets.storages.storage_key import StorageKey -from snuba.pipeline.query_pipeline import QueryPipelineResult -from snuba.pipeline.stages.query_processing import ( - EntityProcessingStage, - StorageProcessingStage, -) -from snuba.query import SelectedExpression -from snuba.query.expressions import Column, CurriedFunctionCall, FunctionCall, Literal -from snuba.query.query_settings import HTTPQuerySettings, SubscriptionQuerySettings -from snuba.query.snql.parser import parse_snql_query -from snuba.request import Request -from snuba.utils.metrics.timer import Timer - -sessions_read_schema = get_storage(StorageKey.SESSIONS_HOURLY).get_schema() -sessions_raw_schema = get_storage(StorageKey.SESSIONS_RAW).get_schema() -assert isinstance(sessions_read_schema, TableSchema) -assert isinstance(sessions_raw_schema, TableSchema) - - -@pytest.mark.clickhouse_db -def test_sessions_processing() -> None: - query_body = { - "query": """ - MATCH (sessions) - SELECT duration_quantiles, sessions, users - WHERE org_id = 1 - AND project_id = 1 - AND started >= toDateTime('2020-01-01T12:00:00') - AND started < toDateTime('2020-01-02T12:00:00') - """, - "dataset": "sessions", - } - - sessions = get_dataset("sessions") - - query, snql_anonymized = parse_snql_query(query_body["query"], sessions) - request = Request( - id="a", - original_body=query_body, - query=query, - snql_anonymized=snql_anonymized, - query_settings=HTTPQuerySettings(referrer=""), - attribution_info=AttributionInfo( - get_app_id("default"), {"tenant_type": "tenant_id"}, "", None, None, None - ), - ) - pipeline_result = EntityProcessingStage().execute( - QueryPipelineResult( - data=request, - query_settings=request.query_settings, - timer=Timer(name="bloop"), - error=None, - ) - ) - clickhouse_query = StorageProcessingStage().execute(pipeline_result).data - - quantiles = tuple(Literal(None, quant) for quant in [0.5, 0.75, 0.9, 0.95, 0.99, 1]) - assert clickhouse_query.get_selected_columns() == [ - SelectedExpression( - "duration_quantiles", - CurriedFunctionCall( - "_snuba_duration_quantiles", - FunctionCall( - None, - "quantilesIfMerge", - quantiles, - ), - (Column(None, None, "duration_quantiles"),), - ), - ), - SelectedExpression( - "sessions", - FunctionCall( - "_snuba_sessions", - "plus", - ( - FunctionCall( - None, "countIfMerge", (Column(None, None, "sessions"),) - ), - FunctionCall( - None, - "sumIfMerge", - (Column(None, None, "sessions_preaggr"),), - ), - ), - ), - ), - SelectedExpression( - "users", - FunctionCall("_snuba_users", "uniqIfMerge", (Column(None, None, "users"),)), - ), - ] - - -selector_tests = [ - pytest.param( - { - "selected_columns": ["sessions", "bucketed_started"], - "groupby": ["bucketed_started"], - "conditions": [ - ["org_id", "=", 1], - ["project_id", "=", 1], - ["started", ">=", "2020-01-01T12:00:00"], - ["started", "<", "2020-01-02T12:00:00"], - ], - }, - False, - sessions_read_schema.get_table_name(), - id="Select hourly by default", - ), - pytest.param( - { - "selected_columns": ["sessions"], - "granularity": 60, - "conditions": [ - ["org_id", "=", 1], - ["project_id", "=", 1], - ["started", ">=", "2020-01-01T12:00:00"], - ["started", "<", "2020-01-02T12:00:00"], - ], - }, - False, - sessions_read_schema.get_table_name(), - id="Select hourly if not grouped by started time", - ), - pytest.param( - { - "selected_columns": ["sessions", "bucketed_started"], - "groupby": ["bucketed_started"], - "granularity": 60, - "conditions": [ - ("org_id", "=", 1), - ("project_id", "=", 1), - ("started", ">=", "2019-09-19T10:00:00"), - ("started", "<", "2019-09-19T12:00:00"), - ], - }, - False, - sessions_raw_schema.get_table_name(), - id="Select raw depending on granularity", - ), - pytest.param( - { - "selected_columns": [], - "aggregations": [ - [ - "if(greater(sessions, 0), divide(sessions_crashed, sessions), null)", - None, - "crash_rate_alert_aggregate", - ] - ], - "conditions": [ - ("org_id", "=", 1), - ("project_id", "=", 1), - ("started", ">=", "2019-09-19T10:00:00"), - ("started", "<", "2019-09-19T11:00:00"), - ], - }, - True, - sessions_raw_schema.get_table_name(), - id="Select raw if its a dataset subscription and time_window is <=1h", - ), - pytest.param( - { - "selected_columns": [], - "aggregations": [ - [ - "if(greater(sessions, 0), divide(sessions_crashed, sessions), null)", - None, - "crash_rate_alert_aggregate", - ] - ], - "conditions": [ - ("org_id", "=", 1), - ("project_id", "=", 1), - ("started", ">=", "2019-09-19T10:00:00"), - ("started", "<", "2019-09-19T12:00:00"), - ], - }, - True, - sessions_read_schema.get_table_name(), - id="Select materialized if its a dataset subscription and time_window > 1h", - ), -] - - -@pytest.mark.parametrize( - "query_body, is_subscription, expected_table", - selector_tests, -) -@pytest.mark.clickhouse_db -def test_select_storage( - query_body: MutableMapping[str, Any], is_subscription: bool, expected_table: str -) -> None: - sessions = get_dataset("sessions") - request = json_to_snql(query_body, "sessions") - request.validate() - query, snql_anonymized = parse_snql_query(str(request.query), sessions) - subscription_settings = ( - SubscriptionQuerySettings if is_subscription else HTTPQuerySettings - ) - - request = Request( - id="a", - original_body=request.to_dict(), - query=query, - snql_anonymized=snql_anonymized, - query_settings=subscription_settings(referrer=""), - attribution_info=AttributionInfo( - get_app_id("default"), - {"tenant_type": "tenant_id"}, - "blah", - None, - None, - None, - ), - ) - pipeline_result = EntityProcessingStage().execute( - QueryPipelineResult( - data=request, - query_settings=request.query_settings, - timer=Timer(name="bloop"), - error=None, - ) - ) - clickhouse_query = StorageProcessingStage().execute(pipeline_result).data - - assert clickhouse_query.get_from_clause().table_name == expected_table diff --git a/tests/pipeline/test_entity_processing_stage.py b/tests/pipeline/test_entity_processing_stage.py index 8cb91b2eb7..ef629d87f8 100644 --- a/tests/pipeline/test_entity_processing_stage.py +++ b/tests/pipeline/test_entity_processing_stage.py @@ -122,7 +122,6 @@ def test_basic( id="", original_body=query_body, query=logical_query, - snql_anonymized="", query_settings=query_settings, attribution_info=AttributionInfo( get_app_id("blah"), {"tenant_type": "tenant_id"}, "blah", None, None, None diff --git a/tests/pipeline/test_entity_processing_stage_composite.py b/tests/pipeline/test_entity_processing_stage_composite.py index 5c5e367986..1435ba3a5b 100644 --- a/tests/pipeline/test_entity_processing_stage_composite.py +++ b/tests/pipeline/test_entity_processing_stage_composite.py @@ -437,7 +437,6 @@ def test_composite( id="", original_body={"query": "placeholder"}, query=logical_query, - snql_anonymized="", query_settings=HTTPQuerySettings(), attribution_info=AttributionInfo( get_app_id("blah"), {"tenant_type": "tenant_id"}, "blah", None, None, None diff --git a/tests/pipeline/test_execution_stage.py b/tests/pipeline/test_execution_stage.py index c7c0c1c2ba..f5d2957a67 100644 --- a/tests/pipeline/test_execution_stage.py +++ b/tests/pipeline/test_execution_stage.py @@ -72,7 +72,6 @@ def get_fake_metadata() -> SnubaQueryMetadata: None, None, ), - "", ), "blah", Timer("woof"), diff --git a/tests/query/parser/test_formula_mql_query.py b/tests/query/parser/test_formula_mql_query.py index d36be56007..c4e31c9e65 100644 --- a/tests/query/parser/test_formula_mql_query.py +++ b/tests/query/parser/test_formula_mql_query.py @@ -301,7 +301,7 @@ def test_simple_formula() -> None: generic_metrics = get_dataset( "generic_metrics", ) - query, _ = parse_mql_query(str(query_body), mql_context, generic_metrics) + query = parse_mql_query(str(query_body), mql_context, generic_metrics) eq, reason = query.equals(expected) assert eq, reason @@ -367,7 +367,7 @@ def test_simple_formula_with_leading_literals() -> None: generic_metrics = get_dataset( "generic_metrics", ) - query, _ = parse_mql_query(str(query_body), mql_context, generic_metrics) + query = parse_mql_query(str(query_body), mql_context, generic_metrics) eq, reason = query.equals(expected) assert eq, reason @@ -433,7 +433,7 @@ def test_groupby() -> None: generic_metrics = get_dataset( "generic_metrics", ) - query, _ = parse_mql_query(str(query_body), mql_context, generic_metrics) + query = parse_mql_query(str(query_body), mql_context, generic_metrics) eq, reason = query.equals(expected) assert eq, reason @@ -528,7 +528,7 @@ def test_curried_aggregate() -> None: generic_metrics = get_dataset( "generic_metrics", ) - query, _ = parse_mql_query(str(query_body), mql_context, generic_metrics) + query = parse_mql_query(str(query_body), mql_context, generic_metrics) eq, reason = query.equals(expected) assert eq, reason @@ -599,7 +599,7 @@ def test_bracketing_rules() -> None: generic_metrics = get_dataset( "generic_metrics", ) - query, _ = parse_mql_query(str(query_body), mql_context, generic_metrics) + query = parse_mql_query(str(query_body), mql_context, generic_metrics) eq, reason = query.equals(expected) assert eq, reason @@ -689,7 +689,7 @@ def test_formula_filters() -> None: generic_metrics = get_dataset( "generic_metrics", ) - query, _ = parse_mql_query(str(query_body), mql_context, generic_metrics) + query = parse_mql_query(str(query_body), mql_context, generic_metrics) eq, reason = query.equals(expected) assert eq, reason @@ -770,7 +770,7 @@ def test_formula_groupby() -> None: generic_metrics = get_dataset( "generic_metrics", ) - query, _ = parse_mql_query(str(query_body), mql_context, generic_metrics) + query = parse_mql_query(str(query_body), mql_context, generic_metrics) eq, reason = query.equals(expected) assert eq, reason @@ -826,7 +826,7 @@ def test_formula_scalar_value() -> None: generic_metrics = get_dataset( "generic_metrics", ) - query, _ = parse_mql_query(str(query_body), mql_context, generic_metrics) + query = parse_mql_query(str(query_body), mql_context, generic_metrics) eq, reason = query.equals(expected) assert eq, reason @@ -875,7 +875,7 @@ def test_arbitrary_functions() -> None: generic_metrics = get_dataset( "generic_metrics", ) - query, _ = parse_mql_query(str(query_body), mql_context, generic_metrics) + query = parse_mql_query(str(query_body), mql_context, generic_metrics) eq, reason = query.equals(expected) assert eq, reason @@ -924,7 +924,7 @@ def test_arbitrary_functions_with_formula() -> None: generic_metrics = get_dataset( "generic_metrics", ) - query, _ = parse_mql_query(str(query_body), mql_context, generic_metrics) + query = parse_mql_query(str(query_body), mql_context, generic_metrics) eq, reason = query.equals(expected) assert eq, reason @@ -982,6 +982,6 @@ def test_arbitrary_functions_with_formula_and_filters() -> None: generic_metrics = get_dataset( "generic_metrics", ) - query, _ = parse_mql_query(str(query_body), mql_context, generic_metrics) + query = parse_mql_query(str(query_body), mql_context, generic_metrics) eq, reason = query.equals(expected) assert eq, reason diff --git a/tests/query/parser/test_mql_query.py b/tests/query/parser/test_mql_query.py index c4c67a5328..75378912e9 100644 --- a/tests/query/parser/test_mql_query.py +++ b/tests/query/parser/test_mql_query.py @@ -2576,7 +2576,7 @@ def test_format_expressions_from_mql( query_body: str, mql_context: Dict[str, Any], expected_query: Query, dataset: str ) -> None: generic_metrics = get_dataset(dataset) - query, _ = parse_mql_query(str(query_body), mql_context, generic_metrics) + query = parse_mql_query(str(query_body), mql_context, generic_metrics) eq, reason = query.equals(expected_query) assert eq, reason @@ -2748,7 +2748,7 @@ def test_invalid_format_expressions_from_mql( ) -> None: generic_metrics = get_dataset("generic_metrics") with pytest.raises(type(error), match=re.escape(str(error))): - query, _ = parse_mql_query(query_body, mql_context, generic_metrics) + parse_mql_query(query_body, mql_context, generic_metrics) def test_pushdown_error_query(): diff --git a/tests/query/parser/test_query.py b/tests/query/parser/test_query.py index 8373a39d59..b2b9bce829 100644 --- a/tests/query/parser/test_query.py +++ b/tests/query/parser/test_query.py @@ -1185,7 +1185,7 @@ def snql_conditions_with_default(*conditions: str) -> str: @pytest.mark.parametrize("query_body, expected_query", snql_test_cases) def test_format_expressions_from_snql(query_body: str, expected_query: Query) -> None: events = get_dataset("events") - query, _ = parse_snql_query(str(query_body), events) + query = parse_snql_query(str(query_body), events) eq, reason = query.equals(expected_query) assert eq, reason @@ -1237,7 +1237,7 @@ def test_treeify() -> None: WHERE project_id IN array(4552673527463954) AND timestamp < toDateTime('2023-09-22T18:18:10.891157') AND timestamp >= toDateTime('2023-06-24T18:18:10.891157') HAVING or(1, 1, 1, 1) != 0 LIMIT 10 """ - query_ast, _ = parse_snql_query(query, get_dataset("replays")) + query_ast = parse_snql_query(query, get_dataset("replays")) having = query_ast.get_having() expected = binary_condition( ConditionFunctions.NEQ, diff --git a/tests/query/processors/test_arrayjoin_optimizer.py b/tests/query/processors/test_arrayjoin_optimizer.py index 091d8f7615..fa857a8de1 100644 --- a/tests/query/processors/test_arrayjoin_optimizer.py +++ b/tests/query/processors/test_arrayjoin_optimizer.py @@ -417,13 +417,11 @@ def with_required(condition: Expression) -> Expression: def parse_and_process(snql_query: str) -> ClickhouseQuery: dataset = get_dataset("transactions") - query, snql_anonymized = parse_snql_query(str(snql_query), dataset) - # print(query) + query = parse_snql_query(str(snql_query), dataset) request = Request( id="a", original_body={"query": snql_query, "dataset": "transactions"}, query=query, - snql_anonymized=snql_anonymized, query_settings=HTTPQuerySettings(referrer="r"), attribution_info=AttributionInfo( get_app_id("blah"), {"tenant_type": "tenant_id"}, "blah", None, None, None diff --git a/tests/query/processors/test_prewhere.py b/tests/query/processors/test_prewhere.py index 45e0e7d541..280a1d04bd 100644 --- a/tests/query/processors/test_prewhere.py +++ b/tests/query/processors/test_prewhere.py @@ -235,7 +235,7 @@ def test_prewhere( ] request = json_to_snql(query_body, "events") request.validate() - snql_query, _ = parse_snql_query(str(request.query), events) + snql_query = parse_snql_query(str(request.query), events) assert isinstance(snql_query, Query) query = identity_translate(snql_query) diff --git a/tests/query/snql/test_query.py b/tests/query/snql/test_query.py index ee1fc5ba55..42c620c091 100644 --- a/tests/query/snql/test_query.py +++ b/tests/query/snql/test_query.py @@ -2066,7 +2066,7 @@ def events_mock(relationship: str) -> JoinRelationship: events_entity = get_entity(EntityKey.EVENTS) with mock.patch.object(events_entity, "get_join_relationship", events_mock): - query, _ = parse_snql_query(query_body, events) + query = parse_snql_query(query_body, events) eq, reason = query.equals(expected_query) assert eq, reason diff --git a/tests/query/snql/test_query_column_validation.py b/tests/query/snql/test_query_column_validation.py index 7914bdbcb2..a26e6bada6 100644 --- a/tests/query/snql/test_query_column_validation.py +++ b/tests/query/snql/test_query_column_validation.py @@ -459,6 +459,6 @@ def events_mock(relationship: str) -> JoinRelationship: events_entity = get_entity(EntityKey.EVENTS) monkeypatch.setattr(events_entity, "get_join_relationship", events_mock) - query, _ = parse_snql_query(query_body, events) + query = parse_snql_query(query_body, events) eq, reason = query.equals(expected_query) assert eq, reason diff --git a/tests/query/snql/test_snql_anonymizer.py b/tests/query/snql/test_snql_anonymizer.py deleted file mode 100644 index 63f0fd8945..0000000000 --- a/tests/query/snql/test_snql_anonymizer.py +++ /dev/null @@ -1,177 +0,0 @@ -from unittest import mock - -import pytest - -from snuba.datasets.entities.entity_key import EntityKey -from snuba.datasets.entities.factory import get_entity -from snuba.datasets.factory import get_dataset -from snuba.query.data_source.join import JoinRelationship, JoinType -from snuba.query.snql.parser import parse_snql_query - - -def build_cond(tn: str) -> str: - time_column = "finish_ts" if tn == "t" else "timestamp" - tn = tn + "." if tn else "" - return f"{tn}project_id=1 AND {tn}{time_column}>=toDateTime('2021-01-01') AND {tn}{time_column} 1 AND {added_condition}", - ( - "MATCH Entity(events) " - "SELECT `tags[key]`, `contexts[lcp.elementSize]`, (count() AS count) " - "GROUP BY `tags[key]`, `contexts[lcp.elementSize]` " - "WHERE greater(`contexts[lcp.elementSize]`, -1337) " - "AND equals(project_id, -1337) AND " - "greaterOrEquals(timestamp, toDateTime('$S')) AND " - "less(timestamp, toDateTime('$S')) " - "LIMIT 1000 OFFSET 0" - ), - id="Basic query with subscriptables", - ), - pytest.param( - f"MATCH (events) SELECT event_id WHERE (event_id!='bob' OR group_id<2 AND (location='here' OR partition>0)) AND {added_condition}", - ( - "MATCH Entity(events) " - "SELECT event_id " - "WHERE (notEquals(event_id, '$S') " - "OR less(group_id, -1337) " - "AND (equals(location, '$S') " - "OR greater(partition, -1337))) " - "AND equals(project_id, -1337) " - "AND greaterOrEquals(timestamp, toDateTime('$S')) " - "AND less(timestamp, toDateTime('$S')) " - "LIMIT 1000 OFFSET 0" - ), - id="Query with multiple / complex conditions joined by parenthesized / regular AND / OR", - ), - pytest.param( - """MATCH (events) - SELECT event_id, tags[c] - WHERE project_id IN tuple( 2 , 3) - AND timestamp>=toDateTime('2021-01-01') - AND timestamp (t: transactions), - (e: events) -[assigned]-> (ga: groupassignee) - SELECT 4-5, ga.offset - WHERE {build_cond('e')} AND {build_cond('t')}""", - ( - "MATCH " - "LEFT " - "LEFT e, Entity(events) " - "TYPE JoinType.INNER RIGHT ga, Entity(groupassignee)\n ON e.event_id ga.group_id " - "TYPE JoinType.INNER RIGHT t, Entity(transactions)\n ON e.event_id t.event_id " - "SELECT (minus(-1337, -1337) AS `4-5`), ga.offset " - "WHERE equals(e.project_id, -1337) " - "AND greaterOrEquals(e.timestamp, toDateTime('$S')) " - "AND less(e.timestamp, toDateTime('$S')) " - "AND equals(t.project_id, -1337) " - "AND greaterOrEquals(t.finish_ts, toDateTime('$S')) " - "AND less(t.finish_ts, toDateTime('$S')) " - "LIMIT 1000 OFFSET 0" - ), - id="Multi join match", - ), - pytest.param( - "MATCH { MATCH (events) SELECT count() AS count BY title WHERE %s } SELECT max(count) AS max_count" - % added_condition, - ( - "MATCH " - "(MATCH Entity(events) " - "SELECT title, (count() AS count) " - "GROUP BY title " - "WHERE equals(project_id, -1337) " - "AND greaterOrEquals(timestamp, toDateTime('$S')) " - "AND less(timestamp, toDateTime('$S'))) " - "SELECT (max(count) AS max_count) " - "LIMIT 1000 OFFSET 0" - ), - id="sub query match", - ), - pytest.param( - f"""MATCH (discover_events) - SELECT count() AS count BY transaction_name AS tn - WHERE {added_condition} - """, - ( - "MATCH Entity(discover_events) " - "SELECT transaction_name, (count() AS count) " - "GROUP BY transaction_name " - "WHERE equals(project_id, -1337) " - "AND greaterOrEquals(timestamp, toDateTime('$S')) " - "AND less(timestamp, toDateTime('$S')) " - "LIMIT 1000 OFFSET 0" - ), - id="aliased columns in select and group by", - ), -] - - -@pytest.mark.parametrize("query_body, expected_snql_anonymized", test_cases) -def test_format_expressions(query_body: str, expected_snql_anonymized: str) -> None: - events = get_dataset("events") - # TODO: Potentially remove this once entities have actual join relationships - mapping = { - "contains": (EntityKey.TRANSACTIONS, "event_id"), - "assigned": (EntityKey.GROUPASSIGNEE, "group_id"), - "bookmark": (EntityKey.GROUPEDMESSAGE, "first_release_id"), - } - - def events_mock(relationship: str) -> JoinRelationship: - entity_key, rhs_column = mapping[relationship] - return JoinRelationship( - rhs_entity=entity_key, - join_type=JoinType.INNER, - columns=[("event_id", rhs_column)], - equivalences=[], - ) - - events_entity = get_entity(EntityKey.EVENTS) - with mock.patch.object(events_entity, "get_join_relationship", events_mock): - _, snql_anonymized = parse_snql_query(query_body, events) - - assert snql_anonymized == expected_snql_anonymized diff --git a/tests/query/test_query_ast.py b/tests/query/test_query_ast.py index 3275893e1e..87aec98714 100644 --- a/tests/query/test_query_ast.py +++ b/tests/query/test_query_ast.py @@ -173,7 +173,7 @@ def test_get_all_columns_legacy() -> None: events = get_dataset("events") request = json_to_snql(query_body, "events") request.validate() - query, _ = parse_snql_query(str(request.query), events) + query = parse_snql_query(str(request.query), events) assert query.get_all_ast_referenced_columns() == { Column("_snuba_title", None, "title"), @@ -213,7 +213,7 @@ def test_get_all_columns() -> None: HAVING trace_sampled > 1 """ events = get_dataset("events") - query, _ = parse_snql_query(query_body, events) + query = parse_snql_query(query_body, events) assert query.get_all_ast_referenced_columns() == { Column("_snuba_partition", None, "partition"), @@ -342,7 +342,7 @@ def test_alias_validation( request = json_to_snql(query_body, "events") request.validate() settings = HTTPQuerySettings() - query, _ = parse_snql_query(str(request.query), events) + query = parse_snql_query(str(request.query), events) attribution_info = AttributionInfo( app_id=AppID(key=""), tenant_ids={}, @@ -357,7 +357,6 @@ def test_alias_validation( query=query, query_settings=settings, attribution_info=attribution_info, - snql_anonymized="", ) pipeline_result = EntityProcessingStage().execute( diff --git a/tests/web/test_query_cache.py b/tests/web/test_query_cache.py index bcff178af4..552862a5a2 100644 --- a/tests/web/test_query_cache.py +++ b/tests/web/test_query_cache.py @@ -44,7 +44,6 @@ def run_query() -> None: id="asd", original_body={}, query=query, - snql_anonymized="", query_settings=query_settings, attribution_info=AttributionInfo( get_app_id("blah"), diff --git a/tests/web/test_transform_names.py b/tests/web/test_transform_names.py index a7688889a3..089a61f11d 100644 --- a/tests/web/test_transform_names.py +++ b/tests/web/test_transform_names.py @@ -89,7 +89,6 @@ def test_transform_column_names() -> None: id="asd", original_body={}, query=query, - snql_anonymized="", query_settings=query_settings, attribution_info=AttributionInfo( get_app_id("blah"),