From 0490995c7d559fe40081aa8b657fc23e13967d80 Mon Sep 17 00:00:00 2001 From: Ogi <86684834+obostjancic@users.noreply.github.com> Date: Tue, 8 Aug 2023 15:25:03 +0200 Subject: [PATCH] fix(metric-extraction): improve is on-demand metric check (#54360) --- src/sentry/relay/config/metric_extraction.py | 20 ++++-- src/sentry/snuba/metrics/extraction.py | 69 ++++++++++++-------- tests/sentry/snuba/test_extraction.py | 61 ++++++++++++++++- 3 files changed, 113 insertions(+), 37 deletions(-) diff --git a/src/sentry/relay/config/metric_extraction.py b/src/sentry/relay/config/metric_extraction.py index bad2c8286b28d..2bf46c7e06451 100644 --- a/src/sentry/relay/config/metric_extraction.py +++ b/src/sentry/relay/config/metric_extraction.py @@ -33,8 +33,8 @@ # Maximum number of custom metrics that can be extracted for alerts and widgets with # advanced filter expressions. -# TODO(Ogi): remove this, or enforce limits for alerts and widgets separately. -_MAX_ON_DEMAND_METRICS = 100 +_MAX_ON_DEMAND_ALERTS = 100 +_MAX_ON_DEMAND_WIDGETS = 500 HashMetricSpec = Tuple[str, MetricSpec] @@ -66,10 +66,6 @@ def get_metric_extraction_config(project: Project) -> Optional[MetricExtractionC if not metrics: return None - if len(metrics) > _MAX_ON_DEMAND_METRICS: - logger.error("Too many on demand metrics for project") - metrics = metrics[:_MAX_ON_DEMAND_METRICS] - return { "version": _METRIC_EXTRACTION_VERSION, "metrics": metrics, @@ -87,6 +83,12 @@ def _get_alert_metric_specs(project: Project) -> List[HashMetricSpec]: if result := _convert_snuba_query_to_metric(alert.snuba_query): specs.append(result) + if len(specs) > _MAX_ON_DEMAND_ALERTS: + logger.error( + "Too many (%s) on demand metric alerts for project %s", len(specs), project.slug + ) + specs = specs[:_MAX_ON_DEMAND_ALERTS] + return specs @@ -107,6 +109,12 @@ def _get_widget_metric_specs(project: Project) -> List[HashMetricSpec]: for result in _convert_widget_query_to_metric(widget): specs.append(result) + if len(specs) > _MAX_ON_DEMAND_WIDGETS: + logger.error( + "Too many (%s) on demand metric widgets for project %s", len(specs), project.slug + ) + specs = specs[:_MAX_ON_DEMAND_WIDGETS] + return specs diff --git a/src/sentry/snuba/metrics/extraction.py b/src/sentry/snuba/metrics/extraction.py index ff8d4f486524c..cc22ca5b276c7 100644 --- a/src/sentry/snuba/metrics/extraction.py +++ b/src/sentry/snuba/metrics/extraction.py @@ -18,7 +18,7 @@ from typing_extensions import NotRequired from sentry.api import event_search -from sentry.api.event_search import ParenExpression, SearchFilter +from sentry.api.event_search import AggregateFilter, ParenExpression, SearchFilter from sentry.exceptions import InvalidSearchQuery from sentry.search.events import fields from sentry.snuba.dataset import Dataset @@ -225,54 +225,59 @@ def is_on_demand_metric_query( if is_standard_metrics_compatible(dataset, aggregate, query): return False - for field in _get_aggregate_fields(aggregate): - if not _is_on_demand_supported_field(field): - return False try: + if not _is_on_demand_supported_aggregate(aggregate): + return False + return _is_on_demand_supported_query(event_search.parse_search_query(query)) except InvalidSearchQuery: logger.error(f"Failed to parse search query: {query}", exc_info=True) return False +def _is_on_demand_supported_aggregate(aggregate: str) -> bool: + # equations are currently not supported + if aggregate.startswith("equation|"): + return False + + function, arguments, _ = fields.parse_function(aggregate) + + if function not in _SEARCH_TO_METRIC_AGGREGATES.keys(): + return False + # count() has no arguments + if function == "count": + return True + + return _is_on_demand_supported_field(arguments[0]) + + +def _is_standard_metric_compatible_aggregate(aggregate: str) -> bool: + """Returns ``True`` if the aggregate is compatible with standard metrics.""" + + function, arguments, _ = fields.parse_function(aggregate) + # count() has no arguments + if function == "count": + return True + + return _is_standard_metrics_field(arguments[0]) + + def is_standard_metrics_compatible( dataset: Optional[Union[str, Dataset]], aggregate: str, query: Optional[str] ) -> bool: - """Returns ``True`` if the query can be supported by standard metrics.""" - if not dataset or Dataset(dataset) not in [Dataset.Metrics, Dataset.PerformanceMetrics]: return False - for field in _get_aggregate_fields(aggregate): - if not _is_standard_metrics_field(field): - return False try: + if not _is_standard_metric_compatible_aggregate(aggregate): + return False + return _is_standard_metrics_query(event_search.parse_search_query(query)) except InvalidSearchQuery: logger.error(f"Failed to parse search query: {query}", exc_info=True) return False -def _get_aggregate_fields(aggregate: str) -> Sequence[str]: - """ - Returns any fields referenced by the arguments of supported aggregate - functions, otherwise ``None``. - """ - _SUPPORTED_AGG_FNS = ("count_if", "count_unique") - - if not aggregate.startswith(_SUPPORTED_AGG_FNS): - return [] - - try: - function, arguments, _ = fields.parse_function(aggregate) - if function in _SUPPORTED_AGG_FNS and arguments: - return [arguments[0]] - except InvalidSearchQuery: - logger.error(f"Failed to parse aggregate: {aggregate}", exc_info=True) - - return [] - - def _is_standard_metrics_query(tokens: Sequence[QueryToken]) -> bool: """ Recursively checks if any of the supplied token contain search filters that can't be handled by standard metrics. @@ -432,7 +437,13 @@ def _is_on_demand_supported_query(tokens: Sequence[QueryToken]) -> bool: def _is_on_demand_supported_search_filter(token: QueryToken) -> bool: + if isinstance(token, AggregateFilter): + return False + if isinstance(token, SearchFilter): + if not _SEARCH_TO_RELAY_OPERATORS.get(token.operator): + return False + return _is_on_demand_supported_field(token.key.name) if isinstance(token, ParenExpression): diff --git a/tests/sentry/snuba/test_extraction.py b/tests/sentry/snuba/test_extraction.py index 42fca3c1ccbd7..f704c599ea9cd 100644 --- a/tests/sentry/snuba/test_extraction.py +++ b/tests/sentry/snuba/test_extraction.py @@ -66,7 +66,7 @@ def test_on_demand_queries(self): is True ) - def test_standard_comaptible_queries(self): + def test_standard_compatible_queries(self): assert is_on_demand_metric_query(self.perf_metrics, "count()", "") is False assert is_on_demand_metric_query(self.perf_metrics, "count()", "environment:dev") is False assert ( @@ -86,7 +86,7 @@ def test_standard_comaptible_queries(self): assert is_on_demand_metric_query(self.perf_metrics, "foo.bar", "") is False assert is_on_demand_metric_query(self.perf_metrics, "count()", "foo.bar") is False - def test_countif(self): + def test_count_if(self): assert ( is_on_demand_metric_query( self.perf_metrics, "count_if(transaction.duration,equals,300)", "" @@ -98,6 +98,63 @@ def test_countif(self): is False ) + def test_unsupported_aggregate_functions(self): + assert ( + is_on_demand_metric_query( + self.perf_metrics, "count_unique(transaction.duration)", "transaction.duration:>=1" + ) + is False + ) + assert ( + is_on_demand_metric_query( + self.perf_metrics, "min(transaction.duration)", "transaction.duration:>=1" + ) + is False + ) + assert ( + is_on_demand_metric_query( + self.perf_metrics, "any(transaction.duration)", "transaction.duration:>=1" + ) + is False + ) + + def test_unsupported_aggregate_fields(self): + assert ( + is_on_demand_metric_query(self.perf_metrics, "message", "transaction.duration:>=1") + is False + ) + assert ( + is_on_demand_metric_query(self.perf_metrics, "title", "transaction.duration:>=1") + is False + ) + + def test_unsupported_operators(self): + assert ( + is_on_demand_metric_query(self.perf_metrics, "count()", "transaction.duration:[1,2,3]") + is False + ) + assert ( + is_on_demand_metric_query(self.perf_metrics, "count()", "!transaction.duration:[1,2,3]") + is False + ) + + def test_unsupported_equations(self): + assert ( + is_on_demand_metric_query( + self.perf_metrics, "equation|count() / count()", "transaction.duration:>0" + ) + is False + ) + assert ( + is_on_demand_metric_query(self.perf_metrics, "equation|count() / count()", "") is False + ) + + def test_unsupported_aggregate_filter(self): + assert ( + is_on_demand_metric_query(self.perf_metrics, "count()", "p75(measurements.fcp):>100") + is False + ) + class TestIsStandardMetricsCompatible: perf_metrics = Dataset.PerformanceMetrics