diff --git a/snuba/query/processors/logical/granularity_processor.py b/snuba/query/processors/logical/granularity_processor.py index abc7ee22e3..0c6a110d09 100644 --- a/snuba/query/processors/logical/granularity_processor.py +++ b/snuba/query/processors/logical/granularity_processor.py @@ -1,9 +1,29 @@ -from typing import Mapping, NamedTuple +from abc import abstractmethod +from typing import List, Mapping, NamedTuple, Optional -from snuba.query.conditions import ConditionFunctions, binary_condition +from snuba.query.conditions import ( + BooleanFunctions, + ConditionFunctions, + binary_condition, +) from snuba.query.exceptions import InvalidGranularityException -from snuba.query.expressions import Column, Literal +from snuba.query.expressions import Column as ColumnExp +from snuba.query.expressions import Expression +from snuba.query.expressions import FunctionCall as FunctionCallExp +from snuba.query.expressions import Literal as LiteralExp from snuba.query.logical import Query +from snuba.query.matchers import ( + Any, + AnyExpression, + Column, + FunctionCall, + Integer, + Literal, + MatchResult, + Or, + Param, + String, +) from snuba.query.processors.logical import LogicalQueryProcessor from snuba.query.query_settings import QuerySettings @@ -12,36 +32,170 @@ DEFAULT_GRANULARITY_RAW = 60 -class GranularityProcessor(LogicalQueryProcessor): - """Use the granularity set on the query to filter on the granularity column""" +class BaseGranularityProcessor(LogicalQueryProcessor): + @abstractmethod + def get_and_process_granularities(self, query: Query) -> None: + raise NotImplementedError - @staticmethod - def __get_granularity(query: Query) -> int: - """Find the best fitting granularity for this query""" - requested_granularity = query.get_granularity() + @abstractmethod + def process_query(self, query: Query, query_settings: QuerySettings) -> None: + raise NotImplementedError - if requested_granularity is None: - return DEFAULT_GRANULARITY_RAW - elif requested_granularity > 0: - for granularity in reversed(GRANULARITIES_AVAILABLE): - if (requested_granularity % granularity) == 0: + @abstractmethod + def get_highest_common_available_granularity_multiple( + self, + selected_granularity: int, + ) -> int: + raise NotImplementedError - return granularity + def find_granularities_in_expression( + self, expression: Optional[Expression] + ) -> List[MatchResult]: + """ + Finds all granularity conditions in an expression. Returns List[Tuple[MatchResult, int]] + where [0] is the matched condition and [1] is highest common available granularity multiple + """ + matches: List[MatchResult] = [] + match = FunctionCall( + String(ConditionFunctions.EQ), + ( + Column(column_name=String("granularity")), + Literal(value=Param("granularity", Any(int))), + ), + ).match(expression) + if match is not None: + matches.append(match) - raise InvalidGranularityException( - f"Granularity must be multiple of one of {GRANULARITIES_AVAILABLE}" - ) + match = FunctionCall( + Param( + "operator", + Or([String(BooleanFunctions.AND), String(BooleanFunctions.OR)]), + ), + (Param("lhs", AnyExpression()), Param("rhs", AnyExpression())), + ).match(expression) - def process_query(self, query: Query, query_settings: QuerySettings) -> None: - granularity = self.__get_granularity(query) + if match is not None: + lhs_granularity = self.find_granularities_in_expression( + match.expression("lhs") + ) + rhs_granularity = self.find_granularities_in_expression( + match.expression("rhs") + ) + matches.extend(rhs_granularity) + matches.extend(lhs_granularity) + return matches + + def add_granularity_condition( + self, query: Query, selected_granularity: int + ) -> None: query.add_condition_to_ast( binary_condition( ConditionFunctions.EQ, - Column(None, None, "granularity"), - Literal(None, granularity), + ColumnExp(None, None, "granularity"), + LiteralExp(None, selected_granularity), ) ) + def replace_granularity_condition( + self, query: Query, match: MatchResult, selected_granularity: int + ) -> None: + # TODO: fix this + def process_condition(exp: Expression) -> Expression: + result = FunctionCall( + String(ConditionFunctions.EQ), + ( + Column(column_name=String("granularity")), + Literal( + value=Param( + "granularity", Integer(match.integer("granularity")) + ) + ), + ), + ).match(exp) + if result is not None: + assert isinstance(exp, FunctionCallExp) + return FunctionCallExp( + exp.alias, + exp.function_name, + (exp.parameters[0], LiteralExp(None, selected_granularity)), + ) + + return exp + + condition = query.get_condition() + if condition: + query.set_ast_condition(condition.transform(process_condition)) + + +class GranularityProcessor(BaseGranularityProcessor): + """ + A granularity processor which finds the granularity in the query, + validates/transforms its value according to GRANULARITIES_AVAILABLE, and + transforms the conditions appropriately to reflect this change.""" + + def get_highest_common_available_granularity_multiple( + self, + requested_granularity: int, + ) -> int: + for granularity in reversed(GRANULARITIES_AVAILABLE): + if requested_granularity > 0 and (requested_granularity % granularity) == 0: + return granularity + raise InvalidGranularityException( + f"Granularity must be multiple of one of {GRANULARITIES_AVAILABLE}" + ) + + def get_and_process_granularities(self, query: Query) -> None: + requested_granularity = query.get_granularity() + expression = query.get_condition() + granularities_in_condition = self.find_granularities_in_expression(expression) + + # If not granularity was provided in clause and condition, then provide a single default + if requested_granularity is None and not granularities_in_condition: + self.add_granularity_condition(query, DEFAULT_GRANULARITY_RAW) + return + + # If granularities were provided within both the GRANULARITY clause and WHERE clause, then raise an error + if granularities_in_condition and requested_granularity: + raise InvalidGranularityException( + "Granularities cannot be specified in both the GRANULARITY clause and WHERE clause." + ) + + # 1. Identifies where the granularity was provided (GRANULARITY clause vs WHERE clause) + # 2. Gets the highest common multiple of GRANULARITIES_AVAILABLE + # 3. Process the query + # a. If granularity was found in GRANULARITY clause, simply just add a new condition. + # b. If found in WHERE clause, replace the old condition with the new one. + if ( + requested_granularity + and requested_granularity > 0 + and not granularities_in_condition + ): + selected_granularity = ( + self.get_highest_common_available_granularity_multiple( + requested_granularity + ) + ) + self.add_granularity_condition(query, selected_granularity) + elif requested_granularity is None and len(granularities_in_condition) > 0: + for match in granularities_in_condition: + selected_granularity = ( + self.get_highest_common_available_granularity_multiple( + match.integer("granularity") + ) + ) + self.replace_granularity_condition( + query, + match, + selected_granularity, + ) + else: + raise InvalidGranularityException( + "Could not select granularity from either clause or condition." + ) + + def process_query(self, query: Query, query_settings: QuerySettings) -> None: + self.get_and_process_granularities(query) + class GranularityMapping(NamedTuple): raw: int @@ -56,11 +210,13 @@ class GranularityMapping(NamedTuple): DEFAULT_MAPPED_GRANULARITY_ENUM = 1 -class MappedGranularityProcessor(LogicalQueryProcessor): +class MappedGranularityProcessor(BaseGranularityProcessor): """ - Use the granularity set on the query to filter on the granularity column, - supporting generic-metrics style enum mapping (e.g. input granularity of 60s - is mapped to the enum granularity of 1) + A mapped granularity processor which finds the granularity in the query, + validates/transforms its value according to the generic-metrics style + enum mapping (e.g. input granularity of 60s is mapped to the enum + granularity of 1), and transforms the conditions appropriately to reflect + this change. """ def __init__( @@ -90,27 +246,73 @@ def __init__( ] self._default_granularity_enum = default_granularity - def __get_granularity(self, query: Query) -> int: - """Find the best fitting granularity for this query""" - requested_granularity = query.get_granularity() - - if requested_granularity is None: - return self._default_granularity_enum - elif requested_granularity > 0: - for mapping in self._accepted_granularities: - if requested_granularity % mapping.raw == 0: - return mapping.enum_value + def get_highest_common_available_granularity_multiple( + self, + requested_granularity: int, + ) -> int: + # If the requested granularity is already mapped to the enum, then just return the value. + min_enum_granularity = min( + [mapping.enum_value for mapping in self._accepted_granularities] + ) + max_enum_granularity = max( + [mapping.enum_value for mapping in self._accepted_granularities] + ) + if min_enum_granularity <= requested_granularity <= max_enum_granularity: + return requested_granularity + # If the requested granularity is not mapped, find it's correct mapping + for mapping in self._accepted_granularities: + if requested_granularity > 0 and requested_granularity % mapping.raw == 0: + return mapping.enum_value raise InvalidGranularityException( f"Granularity must be multiple of one of {self._available_granularities_values}" ) - def process_query(self, query: Query, query_settings: QuerySettings) -> None: - granularity = self.__get_granularity(query) - query.add_condition_to_ast( - binary_condition( - ConditionFunctions.EQ, - Column(None, None, "granularity"), - Literal(None, granularity), + def get_and_process_granularities(self, query: Query) -> None: + """Find the best fitting granularity for this query""" + requested_granularity = query.get_granularity() + expression = query.get_condition() + granularities_in_condition = self.find_granularities_in_expression(expression) + + # If not granularity was provided in clause and condition, then provide a default + if requested_granularity is None and not granularities_in_condition: + self.add_granularity_condition(query, self._default_granularity_enum) + return + + # If multiple granularities were provided within the clause and/or condition, then raise an error + if len(granularities_in_condition) > 0 and requested_granularity: + raise InvalidGranularityException( + "Granularities cannot be specified in both the GRANULARITY clause and WHERE clause." ) - ) + + # 1. Identifies where the granularity was provided (GRANULARITY clause vs WHERE clause) + # 2. Gets the highest common multiple of GRANULARITIES_AVAILABLE + # 3. Process the query + # a. If granularity was found in GRANULARITY clause, simple just add a condition. + # b. If found in WHERE clause, replace the old condition and add a new one. + if ( + requested_granularity + and requested_granularity > 0 + and not granularities_in_condition + ): + selected_granularity = ( + self.get_highest_common_available_granularity_multiple( + requested_granularity + ) + ) + self.add_granularity_condition(query, selected_granularity) + elif requested_granularity is None and len(granularities_in_condition) > 0: + for match in granularities_in_condition: + selected_granularity = ( + self.get_highest_common_available_granularity_multiple( + match.integer("granularity") + ) + ) + self.replace_granularity_condition(query, match, selected_granularity) + else: + raise InvalidGranularityException( + "Could not select granularity from either clause or condition." + ) + + def process_query(self, query: Query, query_settings: QuerySettings) -> None: + self.get_and_process_granularities(query) diff --git a/tests/query/processors/test_granularity_processor.py b/tests/query/processors/test_granularity_processor.py index c6db6f721d..e98e3fa310 100644 --- a/tests/query/processors/test_granularity_processor.py +++ b/tests/query/processors/test_granularity_processor.py @@ -1,4 +1,5 @@ -from typing import Optional +from datetime import datetime +from typing import List, Optional import pytest @@ -90,6 +91,198 @@ def test_granularity_added( ) +@pytest.mark.parametrize( + "entity_key,column", + [ + (EntityKey.METRICS_COUNTERS, "value"), + (EntityKey.METRICS_DISTRIBUTIONS, "percentiles"), + (EntityKey.METRICS_SETS, "value"), + ], +) +@pytest.mark.parametrize( + "requested_granularity, query_granularity", + [ + (10, 10), + (60, 60), + (90, 10), + (120, 60), + (60 * 60, 3600), + (90 * 60, 60), + (120 * 60, 3600), + (24 * 60 * 60, 86400), + (32 * 60 * 60, 3600), + (48 * 60 * 60, 86400), + (13, None), + (0, None), + ], +) +def test_granularity_added_in_condition( + entity_key: EntityKey, + column: str, + requested_granularity: Optional[int], + query_granularity: int, +) -> None: + query = Query( + QueryEntity(entity_key, ColumnSet([])), + selected_columns=[SelectedExpression(column, Column(None, None, column))], + condition=binary_condition( + BooleanFunctions.AND, + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "granularity"), + Literal(None, requested_granularity), + ), + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "metric_id"), + Literal(None, 123), + ), + ), + ) + try: + GranularityProcessor().process_query(query, HTTPQuerySettings()) + except InvalidGranularityException: + assert query_granularity is None + else: + assert query == Query( + QueryEntity(entity_key, ColumnSet([])), + selected_columns=[SelectedExpression(column, Column(None, None, column))], + condition=binary_condition( + BooleanFunctions.AND, + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "granularity"), + Literal(None, query_granularity), + ), + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "metric_id"), + Literal(None, 123), + ), + ), + ) + + +@pytest.mark.parametrize( + "entity_key,column", + [ + (EntityKey.METRICS_COUNTERS, "value"), + (EntityKey.METRICS_DISTRIBUTIONS, "percentiles"), + (EntityKey.METRICS_SETS, "value"), + ], +) +@pytest.mark.parametrize( + "requested_granularity, query_granularity", + [ + ([10, 10], [10, 10]), + ([60, 60], [60, 60]), + ([90, 60], [10, 60]), + ([120, 10], [60, 10]), + ([10, 60 * 60], [10, 3600]), + ([90 * 60, 120 * 60], [60, 3600]), + ([24 * 60 * 60, 32 * 60 * 60], [86400, 3600]), + ([13, 10], [None, 10]), + ([10, 0], [10, None]), + ], +) +def test_multiple_granularities_added_in_condition( + entity_key: EntityKey, + column: str, + requested_granularity: List[int], + query_granularity: List[int], +) -> None: + query_with_multiple_conditions = Query( + QueryEntity(entity_key, ColumnSet([])), + selected_columns=[SelectedExpression(column, Column(None, None, column))], + condition=binary_condition( + BooleanFunctions.AND, + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "metric_id"), + Literal(None, 123), + ), + binary_condition( + BooleanFunctions.OR, + binary_condition( + BooleanFunctions.AND, + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "granularity"), + Literal(None, requested_granularity[0]), + ), + binary_condition( + ConditionFunctions.GT, + Column(None, None, "timestamp"), + Literal(None, datetime(2020, 8, 1)), + ), + ), + binary_condition( + BooleanFunctions.AND, + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "granularity"), + Literal(None, requested_granularity[1]), + ), + binary_condition( + ConditionFunctions.LT, + Column(None, None, "timestamp"), + Literal(None, datetime(2020, 8, 1)), + ), + ), + ), + ), + ) + + try: + GranularityProcessor().process_query( + query_with_multiple_conditions, HTTPQuerySettings() + ) + except InvalidGranularityException: + assert query_granularity[0] is None or query_granularity[1] is None + else: + assert query_with_multiple_conditions == Query( + QueryEntity(entity_key, ColumnSet([])), + selected_columns=[SelectedExpression(column, Column(None, None, column))], + condition=binary_condition( + BooleanFunctions.AND, + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "metric_id"), + Literal(None, 123), + ), + binary_condition( + BooleanFunctions.OR, + binary_condition( + BooleanFunctions.AND, + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "granularity"), + Literal(None, query_granularity[0]), + ), + binary_condition( + ConditionFunctions.GT, + Column(None, None, "timestamp"), + Literal(None, datetime(2020, 8, 1)), + ), + ), + binary_condition( + BooleanFunctions.AND, + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "granularity"), + Literal(None, query_granularity[1]), + ), + binary_condition( + ConditionFunctions.LT, + Column(None, None, "timestamp"), + Literal(None, datetime(2020, 8, 1)), + ), + ), + ), + ), + ) + + @pytest.mark.parametrize( "entity_key,column", [ @@ -156,3 +349,197 @@ def test_granularity_enum_mapping( ), granularity=(requested_granularity), ) + + +@pytest.mark.parametrize( + "entity_key,column", + [ + (EntityKey.GENERIC_METRICS_DISTRIBUTIONS, "percentiles"), + (EntityKey.GENERIC_METRICS_SETS, "value"), + ], +) +@pytest.mark.parametrize( + "requested_granularity, query_granularity", + [ + (10, None), + (60, 1), + (90, None), + (120, 1), + (60 * 60, 2), + (90 * 60, 1), + (120 * 60, 2), + (24 * 60 * 60, 3), + (32 * 60 * 60, 2), + (48 * 60 * 60, 3), + (13, None), + (0, None), + ], +) +def test_granularity_enum_mapping_in_condition( + entity_key: EntityKey, + column: str, + requested_granularity: Optional[int], + query_granularity: int, +) -> None: + query = Query( + QueryEntity(entity_key, ColumnSet([])), + selected_columns=[SelectedExpression(column, Column(None, None, column))], + condition=binary_condition( + BooleanFunctions.AND, + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "granularity"), + Literal(None, requested_granularity), + ), + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "metric_id"), + Literal(None, 123), + ), + ), + ) + + try: + MappedGranularityProcessor( + accepted_granularities=PERFORMANCE_GRANULARITIES, + default_granularity=DEFAULT_MAPPED_GRANULARITY_ENUM, + ).process_query(query, HTTPQuerySettings()) + except InvalidGranularityException: + assert query_granularity is None + else: + assert query == Query( + QueryEntity(entity_key, ColumnSet([])), + selected_columns=[SelectedExpression(column, Column(None, None, column))], + condition=binary_condition( + BooleanFunctions.AND, + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "granularity"), + Literal(None, query_granularity), + ), + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "metric_id"), + Literal(None, 123), + ), + ), + ) + + +@pytest.mark.parametrize( + "entity_key,column", + [ + (EntityKey.GENERIC_METRICS_DISTRIBUTIONS, "percentiles"), + (EntityKey.GENERIC_METRICS_SETS, "value"), + ], +) +@pytest.mark.parametrize( + "requested_granularity, query_granularity", + [ + ([1, 1], [1, 1]), + ([2, 2], [2, 2]), + ([60 * 60, 60], [2, 1]), + ([60 * 60, 60 * 60], [2, 2]), + ([24 * 60 * 60, 120 * 60], [3, 2]), + ([48 * 60 * 60, 1], [3, 1]), + ([13, 1], [None, 1]), + ([10, 0], [10, None]), + ], +) +def test_multiple_granularities_enum_mapping_in_condition( + entity_key: EntityKey, + column: str, + requested_granularity: List[int], + query_granularity: List[int], +) -> None: + query = Query( + QueryEntity(entity_key, ColumnSet([])), + selected_columns=[SelectedExpression(column, Column(None, None, column))], + condition=binary_condition( + BooleanFunctions.AND, + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "metric_id"), + Literal(None, 123), + ), + binary_condition( + BooleanFunctions.OR, + binary_condition( + BooleanFunctions.AND, + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "granularity"), + Literal(None, requested_granularity[0]), + ), + binary_condition( + ConditionFunctions.GT, + Column(None, None, "timestamp"), + Literal(None, datetime(2020, 8, 1)), + ), + ), + binary_condition( + BooleanFunctions.AND, + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "granularity"), + Literal(None, requested_granularity[1]), + ), + binary_condition( + ConditionFunctions.LT, + Column(None, None, "timestamp"), + Literal(None, datetime(2020, 8, 1)), + ), + ), + ), + ), + ) + + try: + MappedGranularityProcessor( + accepted_granularities=PERFORMANCE_GRANULARITIES, + default_granularity=DEFAULT_MAPPED_GRANULARITY_ENUM, + ).process_query(query, HTTPQuerySettings()) + except InvalidGranularityException: + assert query_granularity[0] is None or query_granularity[1] is None + else: + assert query == Query( + QueryEntity(entity_key, ColumnSet([])), + selected_columns=[SelectedExpression(column, Column(None, None, column))], + condition=binary_condition( + BooleanFunctions.AND, + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "metric_id"), + Literal(None, 123), + ), + binary_condition( + BooleanFunctions.OR, + binary_condition( + BooleanFunctions.AND, + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "granularity"), + Literal(None, query_granularity[0]), + ), + binary_condition( + ConditionFunctions.GT, + Column(None, None, "timestamp"), + Literal(None, datetime(2020, 8, 1)), + ), + ), + binary_condition( + BooleanFunctions.AND, + binary_condition( + ConditionFunctions.EQ, + Column(None, None, "granularity"), + Literal(None, query_granularity[1]), + ), + binary_condition( + ConditionFunctions.LT, + Column(None, None, "timestamp"), + Literal(None, datetime(2020, 8, 1)), + ), + ), + ), + ), + )