From b14d14a009e1f40585cb8e0028dd3e427cd2fbad Mon Sep 17 00:00:00 2001 From: getsentry-bot Date: Tue, 4 Jul 2023 13:29:44 +0000 Subject: [PATCH] Revert "feat(metrics): Use bulk_reverse_reslove in metrics (#52115)" This reverts commit 34b3834d48060499cea4e3bf8fc144aaa7d43357. Co-authored-by: RaduW <5281987+RaduW@users.noreply.github.com> --- .../organization_measurements_meta.py | 4 +- .../release_health/release_monitor/metrics.py | 20 +++---- src/sentry/sentry_metrics/utils.py | 58 +------------------ src/sentry/snuba/metrics/datasource.py | 49 ++++++---------- src/sentry/utils/pytest/metrics.py | 3 - .../test_organization_metric_tags.py | 10 ++-- .../test_metrics_enhanced_performance.py | 5 +- 7 files changed, 35 insertions(+), 114 deletions(-) diff --git a/src/sentry/api/endpoints/organization_measurements_meta.py b/src/sentry/api/endpoints/organization_measurements_meta.py index ae6c4484729411..2611ed5a22c835 100644 --- a/src/sentry/api/endpoints/organization_measurements_meta.py +++ b/src/sentry/api/endpoints/organization_measurements_meta.py @@ -6,7 +6,7 @@ from sentry.api.bases import NoProjects, OrganizationEventsEndpointBase from sentry.models import Organization from sentry.search.events.constants import METRIC_FUNCTION_LIST_BY_TYPE -from sentry.sentry_metrics.use_case_id_registry import UseCaseID +from sentry.sentry_metrics.configuration import UseCaseKey from sentry.snuba.metrics.datasource import get_custom_measurements @@ -24,7 +24,7 @@ def get(self, request: Request, organization: Organization) -> Response: organization_id=organization.id, start=params["start"], end=params["end"], - use_case_id=UseCaseID.TRANSACTIONS, + use_case_id=UseCaseKey.PERFORMANCE, ) with start_span(op="transform", description="metric meta"): diff --git a/src/sentry/release_health/release_monitor/metrics.py b/src/sentry/release_health/release_monitor/metrics.py index 6dd488e6f60b5f..b09426aa7e44fb 100644 --- a/src/sentry/release_health/release_monitor/metrics.py +++ b/src/sentry/release_health/release_monitor/metrics.py @@ -2,7 +2,7 @@ import time from collections import defaultdict from datetime import datetime, timedelta -from typing import Mapping, Sequence, Set +from typing import Mapping, Sequence from snuba_sdk import ( Column, @@ -21,7 +21,6 @@ from sentry.sentry_metrics import indexer from sentry.sentry_metrics.configuration import UseCaseKey from sentry.sentry_metrics.indexer.strings import SESSION_METRIC_NAMES -from sentry.sentry_metrics.use_case_id_registry import UseCaseID from sentry.sentry_metrics.utils import resolve_tag_key from sentry.snuba.dataset import Dataset, EntityKey from sentry.snuba.metrics.naming_layer.mri import SessionMRI @@ -165,18 +164,13 @@ def fetch_project_release_health_totals( if more_results: data = data[:-1] - # convert indexes back to strings - indexes: Set[int] = set() for row in data: - indexes.add(row[env_key]) - indexes.add(row[release_key]) - resolved_strings = indexer.bulk_reverse_resolve( - UseCaseID.SESSIONS, org_id, indexes - ) - - for row in data: - env_name = resolved_strings.get(row[env_key]) - release_name = resolved_strings.get(row[release_key]) + env_name = indexer.reverse_resolve( + UseCaseKey.RELEASE_HEALTH, org_id, row[env_key] + ) + release_name = indexer.reverse_resolve( + UseCaseKey.RELEASE_HEALTH, org_id, row[release_key] + ) row_totals = totals[row["project_id"]].setdefault( env_name, {"total_sessions": 0, "releases": defaultdict(int)} # type: ignore ) diff --git a/src/sentry/sentry_metrics/utils.py b/src/sentry/sentry_metrics/utils.py index 2871acc03c0621..2345a49950aba7 100644 --- a/src/sentry/sentry_metrics/utils.py +++ b/src/sentry/sentry_metrics/utils.py @@ -1,9 +1,8 @@ -from typing import Collection, Dict, Mapping, Optional, Sequence, Set, Union, cast +from typing import Optional, Sequence, Union from sentry.api.utils import InvalidParams from sentry.sentry_metrics import indexer from sentry.sentry_metrics.configuration import UseCaseKey -from sentry.sentry_metrics.use_case_id_registry import UseCaseID #: Special integer used to represent a string missing from the indexer STRING_NOT_FOUND = -1 @@ -28,48 +27,6 @@ def reverse_resolve_tag_value( return reverse_resolve(use_case_id, org_id, index) -def bulk_reverse_resolve_tag_value( - use_case_id: UseCaseID, org_id: int, values: Collection[Union[int, str, None]] -) -> Mapping[Union[int, str], str]: - """ - Reverse resolves a mixture of indexes and strings in bulk - - if the element is already a string it maps it to itself - if the element is None it ignores it - if the element is a positive integer it tries to resolve it - if the element is 0 or a negative number it ignores it - - The result is a dictionary that is a mixture of strings and ints mapped to the resolved string, - which is either itself (in case of string keys) or the reverse_resolved string (in case of positive integers) - - Example: - bulk_reverse_resolve_tag_value( UseCaseKey:PERFORMANCE, 1, [ -1, 0, 1, "some-string", "abc", 7, 33333]) - would return something like this ( presuming that no string was found for 33333 ) - { - 1: "tag-a", - "some-string": "some-string", - "abc": "abc", - 7: "tag-b", - } - - """ - ret_val: Dict[Union[int, str], str] = {} - - indexes_to_resolve: Set[int] = set() - for value in values: - if isinstance(value, str): - ret_val[value] = value # we already have a string no need to reverse resolve it - elif isinstance(value, int) and value > 0: # resolve valid int, do nothing for None - indexes_to_resolve.add(value) - - resolved_indexes = cast( - Mapping[Union[int, str], str], - indexer.bulk_reverse_resolve(use_case_id, org_id, indexes_to_resolve), - ) - - return {**ret_val, **resolved_indexes} - - def reverse_resolve(use_case_id: UseCaseKey, org_id: int, index: int) -> str: assert index > 0 resolved = indexer.reverse_resolve(use_case_id, org_id, index) @@ -80,19 +37,6 @@ def reverse_resolve(use_case_id: UseCaseKey, org_id: int, index: int) -> str: return resolved -def bulk_reverse_resolve( - use_case_id: UseCaseID, org_id: int, indexes: Collection[int] -) -> Mapping[int, str]: - - # de duplicate indexes - indexes_to_resolve = set() - for idx in indexes: - if idx > 0: - indexes_to_resolve.add(idx) - - return indexer.bulk_reverse_resolve(use_case_id, org_id, indexes_to_resolve) - - def reverse_resolve_weak(use_case_id: UseCaseKey, org_id: int, index: int) -> Optional[str]: """ Resolve an index value back to a string, special-casing 0 to return None. diff --git a/src/sentry/snuba/metrics/datasource.py b/src/sentry/snuba/metrics/datasource.py index 38300181854d06..da45bd4daec6a5 100644 --- a/src/sentry/snuba/metrics/datasource.py +++ b/src/sentry/snuba/metrics/datasource.py @@ -1,7 +1,5 @@ from __future__ import annotations -from sentry.sentry_metrics.use_case_id_registry import UseCaseID - """ Module that gets both metadata and time series from Snuba. For metadata, it fetch metrics metadata (metric names, tag names, tag values, ...) from snuba. @@ -29,17 +27,13 @@ from sentry.sentry_metrics.configuration import UseCaseKey from sentry.sentry_metrics.utils import ( MetricIndexNotFound, - bulk_reverse_resolve, - bulk_reverse_resolve_tag_value, resolve_tag_key, + reverse_resolve, + reverse_resolve_tag_value, ) from sentry.snuba.dataset import Dataset, EntityKey from sentry.snuba.metrics.fields import run_metrics_query -from sentry.snuba.metrics.fields.base import ( - SnubaDataType, - get_derived_metrics, - org_id_from_projects, -) +from sentry.snuba.metrics.fields.base import get_derived_metrics, org_id_from_projects from sentry.snuba.metrics.naming_layer.mapping import get_all_mris, get_mri from sentry.snuba.metrics.naming_layer.mri import MRI_SCHEMA_REGEX, is_custom_measurement, parse_mri from sentry.snuba.metrics.query import Groupable, MetricField, MetricsQuery @@ -76,7 +70,7 @@ def _get_metrics_for_entity( org_id: int, start: Optional[datetime] = None, end: Optional[datetime] = None, -) -> List[SnubaDataType]: +) -> Mapping[str, Any]: return run_metrics_query( entity_key=entity_key, select=[Column("metric_id")], @@ -185,26 +179,21 @@ def get_custom_measurements( organization_id: int, start: Optional[datetime] = None, end: Optional[datetime] = None, - use_case_id: UseCaseID = UseCaseID.TRANSACTIONS, + use_case_id: UseCaseKey = UseCaseKey.PERFORMANCE, ) -> Sequence[MetricMeta]: assert project_ids metrics_meta = [] for metric_type in CUSTOM_MEASUREMENT_DATASETS: - rows = _get_metrics_for_entity( + for row in _get_metrics_for_entity( entity_key=METRIC_TYPE_TO_ENTITY[metric_type], project_ids=project_ids, org_id=organization_id, start=start, end=end, - ) - - mri_indexes = {row["metric_id"] for row in rows} - mris = bulk_reverse_resolve(use_case_id, organization_id, mri_indexes) - - for row in rows: - mri_index = row.get("metric_id") - parsed_mri = parse_mri(mris.get(mri_index)) + ): + mri = reverse_resolve(use_case_id, organization_id, row["metric_id"]) + parsed_mri = parse_mri(mri) if parsed_mri is not None and is_custom_measurement(parsed_mri): metrics_meta.append( MetricMeta( @@ -293,6 +282,7 @@ def _fetch_tags_or_values_for_metrics( column: str, use_case_id: UseCaseKey, ) -> Tuple[Union[Sequence[Tag], Sequence[TagValue]], Optional[str]]: + assert len({p.organization_id for p in projects}) == 1 metric_mris = [get_mri(metric_name) for metric_name in metric_names] if metric_names else [] @@ -381,7 +371,6 @@ def _fetch_tags_or_values_for_mri( raise InvalidParams(error_str) tag_or_value_id_lists = tag_or_value_ids_per_metric_id.values() - tag_or_value_ids: Set[Union[int, str, None]] if metric_mris: # If there are metric_ids that map to the metric_names provided as an arg that were not # found in the dataset, then we raise an instance of InvalidParams exception @@ -410,25 +399,23 @@ def _fetch_tags_or_values_for_mri( if column.startswith(("tags[", "tags_raw[")): tag_id = column.split("[")[1].split("]")[0] - resolved_ids = bulk_reverse_resolve_tag_value( - use_case_id, org_id, [int(tag_id), *tag_or_value_ids] - ) - resolved_key = resolved_ids.get(int(tag_id)) tags_or_values = [ { - "key": resolved_key, - "value": resolved_ids.get(value_id), + "key": reverse_resolve(use_case_id, org_id, int(tag_id)), + "value": reverse_resolve_tag_value(use_case_id, org_id, value_id), } for value_id in tag_or_value_ids ] tags_or_values.sort(key=lambda tag: (tag["key"], tag["value"])) else: tags_or_values = [] - resolved_ids = bulk_reverse_resolve(use_case_id, org_id, tag_or_value_ids) for tag_id in tag_or_value_ids: - resolved = resolved_ids.get(tag_id) - if resolved is not None and resolved not in UNALLOWED_TAGS: - tags_or_values.append({"key": resolved}) + try: + resolved = reverse_resolve(use_case_id, org_id, tag_id) + if resolved not in UNALLOWED_TAGS: + tags_or_values.append({"key": resolved}) + except MetricIndexNotFound: + continue tags_or_values.sort(key=itemgetter("key")) diff --git a/src/sentry/utils/pytest/metrics.py b/src/sentry/utils/pytest/metrics.py index f3c77ddf304f83..3fcc3347a16026 100644 --- a/src/sentry/utils/pytest/metrics.py +++ b/src/sentry/utils/pytest/metrics.py @@ -34,9 +34,6 @@ def control_metrics_access(monkeypatch, request, set_sentry_option): monkeypatch.setattr( "sentry.sentry_metrics.indexer.reverse_resolve", mock_indexer.reverse_resolve ) - monkeypatch.setattr( - "sentry.sentry_metrics.indexer.bulk_reverse_resolve", mock_indexer.bulk_reverse_resolve - ) old_resolve = indexer.resolve diff --git a/tests/sentry/api/endpoints/test_organization_metric_tags.py b/tests/sentry/api/endpoints/test_organization_metric_tags.py index f83014523bb5ed..3b164b87e621a2 100644 --- a/tests/sentry/api/endpoints/test_organization_metric_tags.py +++ b/tests/sentry/api/endpoints/test_organization_metric_tags.py @@ -1,11 +1,11 @@ import time -from typing import Collection from unittest.mock import patch import pytest from sentry.sentry_metrics import indexer from sentry.sentry_metrics.use_case_id_registry import UseCaseID +from sentry.sentry_metrics.utils import MetricIndexNotFound from sentry.snuba.metrics.naming_layer import get_mri from sentry.snuba.metrics.naming_layer.mri import SessionMRI from sentry.snuba.metrics.naming_layer.public import SessionMetricKey @@ -19,8 +19,8 @@ pytestmark = pytest.mark.sentry_metrics -def mocked_bulk_reverse_resolve(use_case_id, org_id: int, ids: Collection[int]): - return {} +def mocked_reverse_resolve(use_case_id, org_id: int, index: int): + raise MetricIndexNotFound() @region_silo_test(stable=True) @@ -98,8 +98,8 @@ def test_mixed_metric_identifiers(self): assert response.data == [] @patch( - "sentry.snuba.metrics.datasource.bulk_reverse_resolve", - mocked_bulk_reverse_resolve, + "sentry.snuba.metrics.datasource.reverse_resolve", + mocked_reverse_resolve, ) def test_unknown_tag(self): response = self.get_success_response( diff --git a/tests/sentry/snuba/metrics/test_metrics_layer/test_metrics_enhanced_performance.py b/tests/sentry/snuba/metrics/test_metrics_layer/test_metrics_enhanced_performance.py index 31b877a6e804a6..c32a16fc0d5246 100644 --- a/tests/sentry/snuba/metrics/test_metrics_layer/test_metrics_enhanced_performance.py +++ b/tests/sentry/snuba/metrics/test_metrics_layer/test_metrics_enhanced_performance.py @@ -21,7 +21,6 @@ ) from sentry.sentry_metrics import indexer from sentry.sentry_metrics.configuration import UseCaseKey -from sentry.sentry_metrics.use_case_id_registry import UseCaseID from sentry.snuba.metrics import ( MAX_POINTS, MetricConditionField, @@ -2064,7 +2063,7 @@ def test_simple(self): project_ids=[self.project.id], organization_id=self.organization.id, start=self.day_ago, - use_case_id=UseCaseID.TRANSACTIONS, + use_case_id=UseCaseKey.PERFORMANCE, ) assert result == [ { @@ -2117,7 +2116,7 @@ def test_metric_outside_query_daterange(self): project_ids=[self.project.id], organization_id=self.organization.id, start=self.day_ago, - use_case_id=UseCaseID.TRANSACTIONS, + use_case_id=UseCaseKey.PERFORMANCE, ) assert result == [