From 0490995c7d559fe40081aa8b657fc23e13967d80 Mon Sep 17 00:00:00 2001
From: Ogi <86684834+obostjancic@users.noreply.github.com>
Date: Tue, 8 Aug 2023 15:25:03 +0200
Subject: [PATCH] fix(metric-extraction): improve is on-demand metric check
 (#54360)

---
 src/sentry/relay/config/metric_extraction.py | 20 ++++--
 src/sentry/snuba/metrics/extraction.py       | 69 ++++++++++++--------
 tests/sentry/snuba/test_extraction.py        | 61 ++++++++++++++++-
 3 files changed, 113 insertions(+), 37 deletions(-)

diff --git a/src/sentry/relay/config/metric_extraction.py b/src/sentry/relay/config/metric_extraction.py
index bad2c8286b28d..2bf46c7e06451 100644
--- a/src/sentry/relay/config/metric_extraction.py
+++ b/src/sentry/relay/config/metric_extraction.py
@@ -33,8 +33,8 @@
 
 # Maximum number of custom metrics that can be extracted for alerts and widgets with
 # advanced filter expressions.
-# TODO(Ogi): remove this, or enforce limits for alerts and widgets separately.
-_MAX_ON_DEMAND_METRICS = 100
+_MAX_ON_DEMAND_ALERTS = 100
+_MAX_ON_DEMAND_WIDGETS = 500
 
 HashMetricSpec = Tuple[str, MetricSpec]
 
@@ -66,10 +66,6 @@ def get_metric_extraction_config(project: Project) -> Optional[MetricExtractionC
     if not metrics:
         return None
 
-    if len(metrics) > _MAX_ON_DEMAND_METRICS:
-        logger.error("Too many on demand metrics for project")
-        metrics = metrics[:_MAX_ON_DEMAND_METRICS]
-
     return {
         "version": _METRIC_EXTRACTION_VERSION,
         "metrics": metrics,
@@ -87,6 +83,12 @@ def _get_alert_metric_specs(project: Project) -> List[HashMetricSpec]:
         if result := _convert_snuba_query_to_metric(alert.snuba_query):
             specs.append(result)
 
+    if len(specs) > _MAX_ON_DEMAND_ALERTS:
+        logger.error(
+            "Too many (%s) on demand metric alerts for project %s", len(specs), project.slug
+        )
+        specs = specs[:_MAX_ON_DEMAND_ALERTS]
+
     return specs
 
 
@@ -107,6 +109,12 @@ def _get_widget_metric_specs(project: Project) -> List[HashMetricSpec]:
         for result in _convert_widget_query_to_metric(widget):
             specs.append(result)
 
+    if len(specs) > _MAX_ON_DEMAND_WIDGETS:
+        logger.error(
+            "Too many (%s) on demand metric widgets for project %s", len(specs), project.slug
+        )
+        specs = specs[:_MAX_ON_DEMAND_WIDGETS]
+
     return specs
 
 
diff --git a/src/sentry/snuba/metrics/extraction.py b/src/sentry/snuba/metrics/extraction.py
index ff8d4f486524c..cc22ca5b276c7 100644
--- a/src/sentry/snuba/metrics/extraction.py
+++ b/src/sentry/snuba/metrics/extraction.py
@@ -18,7 +18,7 @@
 from typing_extensions import NotRequired
 
 from sentry.api import event_search
-from sentry.api.event_search import ParenExpression, SearchFilter
+from sentry.api.event_search import AggregateFilter, ParenExpression, SearchFilter
 from sentry.exceptions import InvalidSearchQuery
 from sentry.search.events import fields
 from sentry.snuba.dataset import Dataset
@@ -225,54 +225,59 @@ def is_on_demand_metric_query(
     if is_standard_metrics_compatible(dataset, aggregate, query):
         return False
 
-    for field in _get_aggregate_fields(aggregate):
-        if not _is_on_demand_supported_field(field):
-            return False
     try:
+        if not _is_on_demand_supported_aggregate(aggregate):
+            return False
+
         return _is_on_demand_supported_query(event_search.parse_search_query(query))
     except InvalidSearchQuery:
         logger.error(f"Failed to parse search query: {query}", exc_info=True)
         return False
 
 
+def _is_on_demand_supported_aggregate(aggregate: str) -> bool:
+    # equations are currently not supported
+    if aggregate.startswith("equation|"):
+        return False
+
+    function, arguments, _ = fields.parse_function(aggregate)
+
+    if function not in _SEARCH_TO_METRIC_AGGREGATES.keys():
+        return False
+    # count() has no arguments
+    if function == "count":
+        return True
+
+    return _is_on_demand_supported_field(arguments[0])
+
+
+def _is_standard_metric_compatible_aggregate(aggregate: str) -> bool:
+    """Returns ``True`` if the aggregate is compatible with standard metrics."""
+
+    function, arguments, _ = fields.parse_function(aggregate)
+    # count() has no arguments
+    if function == "count":
+        return True
+
+    return _is_standard_metrics_field(arguments[0])
+
+
 def is_standard_metrics_compatible(
     dataset: Optional[Union[str, Dataset]], aggregate: str, query: Optional[str]
 ) -> bool:
-    """Returns ``True`` if the query can be supported by standard metrics."""
-
     if not dataset or Dataset(dataset) not in [Dataset.Metrics, Dataset.PerformanceMetrics]:
         return False
 
-    for field in _get_aggregate_fields(aggregate):
-        if not _is_standard_metrics_field(field):
-            return False
     try:
+        if not _is_standard_metric_compatible_aggregate(aggregate):
+            return False
+
         return _is_standard_metrics_query(event_search.parse_search_query(query))
     except InvalidSearchQuery:
         logger.error(f"Failed to parse search query: {query}", exc_info=True)
         return False
 
 
-def _get_aggregate_fields(aggregate: str) -> Sequence[str]:
-    """
-    Returns any fields referenced by the arguments of supported aggregate
-    functions, otherwise ``None``.
-    """
-    _SUPPORTED_AGG_FNS = ("count_if", "count_unique")
-
-    if not aggregate.startswith(_SUPPORTED_AGG_FNS):
-        return []
-
-    try:
-        function, arguments, _ = fields.parse_function(aggregate)
-        if function in _SUPPORTED_AGG_FNS and arguments:
-            return [arguments[0]]
-    except InvalidSearchQuery:
-        logger.error(f"Failed to parse aggregate: {aggregate}", exc_info=True)
-
-    return []
-
-
 def _is_standard_metrics_query(tokens: Sequence[QueryToken]) -> bool:
     """
     Recursively checks if any of the supplied token contain search filters that can't be handled by standard metrics.
@@ -432,7 +437,13 @@ def _is_on_demand_supported_query(tokens: Sequence[QueryToken]) -> bool:
 
 
 def _is_on_demand_supported_search_filter(token: QueryToken) -> bool:
+    if isinstance(token, AggregateFilter):
+        return False
+
     if isinstance(token, SearchFilter):
+        if not _SEARCH_TO_RELAY_OPERATORS.get(token.operator):
+            return False
+
         return _is_on_demand_supported_field(token.key.name)
 
     if isinstance(token, ParenExpression):
diff --git a/tests/sentry/snuba/test_extraction.py b/tests/sentry/snuba/test_extraction.py
index 42fca3c1ccbd7..f704c599ea9cd 100644
--- a/tests/sentry/snuba/test_extraction.py
+++ b/tests/sentry/snuba/test_extraction.py
@@ -66,7 +66,7 @@ def test_on_demand_queries(self):
             is True
         )
 
-    def test_standard_comaptible_queries(self):
+    def test_standard_compatible_queries(self):
         assert is_on_demand_metric_query(self.perf_metrics, "count()", "") is False
         assert is_on_demand_metric_query(self.perf_metrics, "count()", "environment:dev") is False
         assert (
@@ -86,7 +86,7 @@ def test_standard_comaptible_queries(self):
         assert is_on_demand_metric_query(self.perf_metrics, "foo.bar", "") is False
         assert is_on_demand_metric_query(self.perf_metrics, "count()", "foo.bar") is False
 
-    def test_countif(self):
+    def test_count_if(self):
         assert (
             is_on_demand_metric_query(
                 self.perf_metrics, "count_if(transaction.duration,equals,300)", ""
@@ -98,6 +98,63 @@ def test_countif(self):
             is False
         )
 
+    def test_unsupported_aggregate_functions(self):
+        assert (
+            is_on_demand_metric_query(
+                self.perf_metrics, "count_unique(transaction.duration)", "transaction.duration:>=1"
+            )
+            is False
+        )
+        assert (
+            is_on_demand_metric_query(
+                self.perf_metrics, "min(transaction.duration)", "transaction.duration:>=1"
+            )
+            is False
+        )
+        assert (
+            is_on_demand_metric_query(
+                self.perf_metrics, "any(transaction.duration)", "transaction.duration:>=1"
+            )
+            is False
+        )
+
+    def test_unsupported_aggregate_fields(self):
+        assert (
+            is_on_demand_metric_query(self.perf_metrics, "message", "transaction.duration:>=1")
+            is False
+        )
+        assert (
+            is_on_demand_metric_query(self.perf_metrics, "title", "transaction.duration:>=1")
+            is False
+        )
+
+    def test_unsupported_operators(self):
+        assert (
+            is_on_demand_metric_query(self.perf_metrics, "count()", "transaction.duration:[1,2,3]")
+            is False
+        )
+        assert (
+            is_on_demand_metric_query(self.perf_metrics, "count()", "!transaction.duration:[1,2,3]")
+            is False
+        )
+
+    def test_unsupported_equations(self):
+        assert (
+            is_on_demand_metric_query(
+                self.perf_metrics, "equation|count() / count()", "transaction.duration:>0"
+            )
+            is False
+        )
+        assert (
+            is_on_demand_metric_query(self.perf_metrics, "equation|count() / count()", "") is False
+        )
+
+    def test_unsupported_aggregate_filter(self):
+        assert (
+            is_on_demand_metric_query(self.perf_metrics, "count()", "p75(measurements.fcp):>100")
+            is False
+        )
+
 
 class TestIsStandardMetricsCompatible:
     perf_metrics = Dataset.PerformanceMetrics