diff --git a/src/sentry/api/endpoints/organization_measurements_meta.py b/src/sentry/api/endpoints/organization_measurements_meta.py index 2611ed5a22c83..ae6c448472941 100644 --- a/src/sentry/api/endpoints/organization_measurements_meta.py +++ b/src/sentry/api/endpoints/organization_measurements_meta.py @@ -6,7 +6,7 @@ from sentry.api.bases import NoProjects, OrganizationEventsEndpointBase from sentry.models import Organization from sentry.search.events.constants import METRIC_FUNCTION_LIST_BY_TYPE -from sentry.sentry_metrics.configuration import UseCaseKey +from sentry.sentry_metrics.use_case_id_registry import UseCaseID from sentry.snuba.metrics.datasource import get_custom_measurements @@ -24,7 +24,7 @@ def get(self, request: Request, organization: Organization) -> Response: organization_id=organization.id, start=params["start"], end=params["end"], - use_case_id=UseCaseKey.PERFORMANCE, + use_case_id=UseCaseID.TRANSACTIONS, ) with start_span(op="transform", description="metric meta"): diff --git a/src/sentry/release_health/release_monitor/metrics.py b/src/sentry/release_health/release_monitor/metrics.py index b09426aa7e44f..6dd488e6f60b5 100644 --- a/src/sentry/release_health/release_monitor/metrics.py +++ b/src/sentry/release_health/release_monitor/metrics.py @@ -2,7 +2,7 @@ import time from collections import defaultdict from datetime import datetime, timedelta -from typing import Mapping, Sequence +from typing import Mapping, Sequence, Set from snuba_sdk import ( Column, @@ -21,6 +21,7 @@ from sentry.sentry_metrics import indexer from sentry.sentry_metrics.configuration import UseCaseKey from sentry.sentry_metrics.indexer.strings import SESSION_METRIC_NAMES +from sentry.sentry_metrics.use_case_id_registry import UseCaseID from sentry.sentry_metrics.utils import resolve_tag_key from sentry.snuba.dataset import Dataset, EntityKey from sentry.snuba.metrics.naming_layer.mri import SessionMRI @@ -164,13 +165,18 @@ def fetch_project_release_health_totals( if more_results: data = data[:-1] + # convert indexes back to strings + indexes: Set[int] = set() for row in data: - env_name = indexer.reverse_resolve( - UseCaseKey.RELEASE_HEALTH, org_id, row[env_key] - ) - release_name = indexer.reverse_resolve( - UseCaseKey.RELEASE_HEALTH, org_id, row[release_key] - ) + indexes.add(row[env_key]) + indexes.add(row[release_key]) + resolved_strings = indexer.bulk_reverse_resolve( + UseCaseID.SESSIONS, org_id, indexes + ) + + for row in data: + env_name = resolved_strings.get(row[env_key]) + release_name = resolved_strings.get(row[release_key]) row_totals = totals[row["project_id"]].setdefault( env_name, {"total_sessions": 0, "releases": defaultdict(int)} # type: ignore ) diff --git a/src/sentry/sentry_metrics/utils.py b/src/sentry/sentry_metrics/utils.py index 2345a49950aba..2871acc03c062 100644 --- a/src/sentry/sentry_metrics/utils.py +++ b/src/sentry/sentry_metrics/utils.py @@ -1,8 +1,9 @@ -from typing import Optional, Sequence, Union +from typing import Collection, Dict, Mapping, Optional, Sequence, Set, Union, cast from sentry.api.utils import InvalidParams from sentry.sentry_metrics import indexer from sentry.sentry_metrics.configuration import UseCaseKey +from sentry.sentry_metrics.use_case_id_registry import UseCaseID #: Special integer used to represent a string missing from the indexer STRING_NOT_FOUND = -1 @@ -27,6 +28,48 @@ def reverse_resolve_tag_value( return reverse_resolve(use_case_id, org_id, index) +def bulk_reverse_resolve_tag_value( + use_case_id: UseCaseID, org_id: int, values: Collection[Union[int, str, None]] +) -> Mapping[Union[int, str], str]: + """ + Reverse resolves a mixture of indexes and strings in bulk + + if the element is already a string it maps it to itself + if the element is None it ignores it + if the element is a positive integer it tries to resolve it + if the element is 0 or a negative number it ignores it + + The result is a dictionary that is a mixture of strings and ints mapped to the resolved string, + which is either itself (in case of string keys) or the reverse_resolved string (in case of positive integers) + + Example: + bulk_reverse_resolve_tag_value( UseCaseKey:PERFORMANCE, 1, [ -1, 0, 1, "some-string", "abc", 7, 33333]) + would return something like this ( presuming that no string was found for 33333 ) + { + 1: "tag-a", + "some-string": "some-string", + "abc": "abc", + 7: "tag-b", + } + + """ + ret_val: Dict[Union[int, str], str] = {} + + indexes_to_resolve: Set[int] = set() + for value in values: + if isinstance(value, str): + ret_val[value] = value # we already have a string no need to reverse resolve it + elif isinstance(value, int) and value > 0: # resolve valid int, do nothing for None + indexes_to_resolve.add(value) + + resolved_indexes = cast( + Mapping[Union[int, str], str], + indexer.bulk_reverse_resolve(use_case_id, org_id, indexes_to_resolve), + ) + + return {**ret_val, **resolved_indexes} + + def reverse_resolve(use_case_id: UseCaseKey, org_id: int, index: int) -> str: assert index > 0 resolved = indexer.reverse_resolve(use_case_id, org_id, index) @@ -37,6 +80,19 @@ def reverse_resolve(use_case_id: UseCaseKey, org_id: int, index: int) -> str: return resolved +def bulk_reverse_resolve( + use_case_id: UseCaseID, org_id: int, indexes: Collection[int] +) -> Mapping[int, str]: + + # de duplicate indexes + indexes_to_resolve = set() + for idx in indexes: + if idx > 0: + indexes_to_resolve.add(idx) + + return indexer.bulk_reverse_resolve(use_case_id, org_id, indexes_to_resolve) + + def reverse_resolve_weak(use_case_id: UseCaseKey, org_id: int, index: int) -> Optional[str]: """ Resolve an index value back to a string, special-casing 0 to return None. diff --git a/src/sentry/snuba/metrics/datasource.py b/src/sentry/snuba/metrics/datasource.py index da45bd4daec6a..38300181854d0 100644 --- a/src/sentry/snuba/metrics/datasource.py +++ b/src/sentry/snuba/metrics/datasource.py @@ -1,5 +1,7 @@ from __future__ import annotations +from sentry.sentry_metrics.use_case_id_registry import UseCaseID + """ Module that gets both metadata and time series from Snuba. For metadata, it fetch metrics metadata (metric names, tag names, tag values, ...) from snuba. @@ -27,13 +29,17 @@ from sentry.sentry_metrics.configuration import UseCaseKey from sentry.sentry_metrics.utils import ( MetricIndexNotFound, + bulk_reverse_resolve, + bulk_reverse_resolve_tag_value, resolve_tag_key, - reverse_resolve, - reverse_resolve_tag_value, ) from sentry.snuba.dataset import Dataset, EntityKey from sentry.snuba.metrics.fields import run_metrics_query -from sentry.snuba.metrics.fields.base import get_derived_metrics, org_id_from_projects +from sentry.snuba.metrics.fields.base import ( + SnubaDataType, + get_derived_metrics, + org_id_from_projects, +) from sentry.snuba.metrics.naming_layer.mapping import get_all_mris, get_mri from sentry.snuba.metrics.naming_layer.mri import MRI_SCHEMA_REGEX, is_custom_measurement, parse_mri from sentry.snuba.metrics.query import Groupable, MetricField, MetricsQuery @@ -70,7 +76,7 @@ def _get_metrics_for_entity( org_id: int, start: Optional[datetime] = None, end: Optional[datetime] = None, -) -> Mapping[str, Any]: +) -> List[SnubaDataType]: return run_metrics_query( entity_key=entity_key, select=[Column("metric_id")], @@ -179,21 +185,26 @@ def get_custom_measurements( organization_id: int, start: Optional[datetime] = None, end: Optional[datetime] = None, - use_case_id: UseCaseKey = UseCaseKey.PERFORMANCE, + use_case_id: UseCaseID = UseCaseID.TRANSACTIONS, ) -> Sequence[MetricMeta]: assert project_ids metrics_meta = [] for metric_type in CUSTOM_MEASUREMENT_DATASETS: - for row in _get_metrics_for_entity( + rows = _get_metrics_for_entity( entity_key=METRIC_TYPE_TO_ENTITY[metric_type], project_ids=project_ids, org_id=organization_id, start=start, end=end, - ): - mri = reverse_resolve(use_case_id, organization_id, row["metric_id"]) - parsed_mri = parse_mri(mri) + ) + + mri_indexes = {row["metric_id"] for row in rows} + mris = bulk_reverse_resolve(use_case_id, organization_id, mri_indexes) + + for row in rows: + mri_index = row.get("metric_id") + parsed_mri = parse_mri(mris.get(mri_index)) if parsed_mri is not None and is_custom_measurement(parsed_mri): metrics_meta.append( MetricMeta( @@ -282,7 +293,6 @@ def _fetch_tags_or_values_for_metrics( column: str, use_case_id: UseCaseKey, ) -> Tuple[Union[Sequence[Tag], Sequence[TagValue]], Optional[str]]: - assert len({p.organization_id for p in projects}) == 1 metric_mris = [get_mri(metric_name) for metric_name in metric_names] if metric_names else [] @@ -371,6 +381,7 @@ def _fetch_tags_or_values_for_mri( raise InvalidParams(error_str) tag_or_value_id_lists = tag_or_value_ids_per_metric_id.values() + tag_or_value_ids: Set[Union[int, str, None]] if metric_mris: # If there are metric_ids that map to the metric_names provided as an arg that were not # found in the dataset, then we raise an instance of InvalidParams exception @@ -399,23 +410,25 @@ def _fetch_tags_or_values_for_mri( if column.startswith(("tags[", "tags_raw[")): tag_id = column.split("[")[1].split("]")[0] + resolved_ids = bulk_reverse_resolve_tag_value( + use_case_id, org_id, [int(tag_id), *tag_or_value_ids] + ) + resolved_key = resolved_ids.get(int(tag_id)) tags_or_values = [ { - "key": reverse_resolve(use_case_id, org_id, int(tag_id)), - "value": reverse_resolve_tag_value(use_case_id, org_id, value_id), + "key": resolved_key, + "value": resolved_ids.get(value_id), } for value_id in tag_or_value_ids ] tags_or_values.sort(key=lambda tag: (tag["key"], tag["value"])) else: tags_or_values = [] + resolved_ids = bulk_reverse_resolve(use_case_id, org_id, tag_or_value_ids) for tag_id in tag_or_value_ids: - try: - resolved = reverse_resolve(use_case_id, org_id, tag_id) - if resolved not in UNALLOWED_TAGS: - tags_or_values.append({"key": resolved}) - except MetricIndexNotFound: - continue + resolved = resolved_ids.get(tag_id) + if resolved is not None and resolved not in UNALLOWED_TAGS: + tags_or_values.append({"key": resolved}) tags_or_values.sort(key=itemgetter("key")) diff --git a/src/sentry/utils/pytest/metrics.py b/src/sentry/utils/pytest/metrics.py index 3fcc3347a1602..f3c77ddf304f8 100644 --- a/src/sentry/utils/pytest/metrics.py +++ b/src/sentry/utils/pytest/metrics.py @@ -34,6 +34,9 @@ def control_metrics_access(monkeypatch, request, set_sentry_option): monkeypatch.setattr( "sentry.sentry_metrics.indexer.reverse_resolve", mock_indexer.reverse_resolve ) + monkeypatch.setattr( + "sentry.sentry_metrics.indexer.bulk_reverse_resolve", mock_indexer.bulk_reverse_resolve + ) old_resolve = indexer.resolve diff --git a/tests/sentry/api/endpoints/test_organization_metric_tags.py b/tests/sentry/api/endpoints/test_organization_metric_tags.py index 3b164b87e621a..f83014523bb5e 100644 --- a/tests/sentry/api/endpoints/test_organization_metric_tags.py +++ b/tests/sentry/api/endpoints/test_organization_metric_tags.py @@ -1,11 +1,11 @@ import time +from typing import Collection from unittest.mock import patch import pytest from sentry.sentry_metrics import indexer from sentry.sentry_metrics.use_case_id_registry import UseCaseID -from sentry.sentry_metrics.utils import MetricIndexNotFound from sentry.snuba.metrics.naming_layer import get_mri from sentry.snuba.metrics.naming_layer.mri import SessionMRI from sentry.snuba.metrics.naming_layer.public import SessionMetricKey @@ -19,8 +19,8 @@ pytestmark = pytest.mark.sentry_metrics -def mocked_reverse_resolve(use_case_id, org_id: int, index: int): - raise MetricIndexNotFound() +def mocked_bulk_reverse_resolve(use_case_id, org_id: int, ids: Collection[int]): + return {} @region_silo_test(stable=True) @@ -98,8 +98,8 @@ def test_mixed_metric_identifiers(self): assert response.data == [] @patch( - "sentry.snuba.metrics.datasource.reverse_resolve", - mocked_reverse_resolve, + "sentry.snuba.metrics.datasource.bulk_reverse_resolve", + mocked_bulk_reverse_resolve, ) def test_unknown_tag(self): response = self.get_success_response( diff --git a/tests/sentry/snuba/metrics/test_metrics_layer/test_metrics_enhanced_performance.py b/tests/sentry/snuba/metrics/test_metrics_layer/test_metrics_enhanced_performance.py index c32a16fc0d524..31b877a6e804a 100644 --- a/tests/sentry/snuba/metrics/test_metrics_layer/test_metrics_enhanced_performance.py +++ b/tests/sentry/snuba/metrics/test_metrics_layer/test_metrics_enhanced_performance.py @@ -21,6 +21,7 @@ ) from sentry.sentry_metrics import indexer from sentry.sentry_metrics.configuration import UseCaseKey +from sentry.sentry_metrics.use_case_id_registry import UseCaseID from sentry.snuba.metrics import ( MAX_POINTS, MetricConditionField, @@ -2063,7 +2064,7 @@ def test_simple(self): project_ids=[self.project.id], organization_id=self.organization.id, start=self.day_ago, - use_case_id=UseCaseKey.PERFORMANCE, + use_case_id=UseCaseID.TRANSACTIONS, ) assert result == [ { @@ -2116,7 +2117,7 @@ def test_metric_outside_query_daterange(self): project_ids=[self.project.id], organization_id=self.organization.id, start=self.day_ago, - use_case_id=UseCaseKey.PERFORMANCE, + use_case_id=UseCaseID.TRANSACTIONS, ) assert result == [