Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(metrics): Use bulk_reverse_reslove in metrics #52115

Merged
merged 6 commits into from
Jul 4, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/sentry/api/endpoints/organization_measurements_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from sentry.api.bases import NoProjects, OrganizationEventsEndpointBase
from sentry.models import Organization
from sentry.search.events.constants import METRIC_FUNCTION_LIST_BY_TYPE
from sentry.sentry_metrics.configuration import UseCaseKey
from sentry.sentry_metrics.use_case_id_registry import UseCaseID
from sentry.snuba.metrics.datasource import get_custom_measurements


Expand All @@ -24,7 +24,7 @@ def get(self, request: Request, organization: Organization) -> Response:
organization_id=organization.id,
start=params["start"],
end=params["end"],
use_case_id=UseCaseKey.PERFORMANCE,
use_case_id=UseCaseID.TRANSACTIONS,
)

with start_span(op="transform", description="metric meta"):
Expand Down
20 changes: 13 additions & 7 deletions src/sentry/release_health/release_monitor/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import time
from collections import defaultdict
from datetime import datetime, timedelta
from typing import Mapping, Sequence
from typing import Mapping, Sequence, Set

from snuba_sdk import (
Column,
Expand All @@ -21,6 +21,7 @@
from sentry.sentry_metrics import indexer
from sentry.sentry_metrics.configuration import UseCaseKey
from sentry.sentry_metrics.indexer.strings import SESSION_METRIC_NAMES
from sentry.sentry_metrics.use_case_id_registry import UseCaseID
from sentry.sentry_metrics.utils import resolve_tag_key
from sentry.snuba.dataset import Dataset, EntityKey
from sentry.snuba.metrics.naming_layer.mri import SessionMRI
Expand Down Expand Up @@ -164,13 +165,18 @@ def fetch_project_release_health_totals(
if more_results:
data = data[:-1]

# convert indexes back to strings
indexes: Set[int] = set()
for row in data:
env_name = indexer.reverse_resolve(
UseCaseKey.RELEASE_HEALTH, org_id, row[env_key]
)
release_name = indexer.reverse_resolve(
UseCaseKey.RELEASE_HEALTH, org_id, row[release_key]
)
indexes.add(row[env_key])
indexes.add(row[release_key])
resolved_strings = indexer.bulk_reverse_resolve(
UseCaseID.SESSIONS, org_id, indexes
)

for row in data:
env_name = resolved_strings.get(row[env_key])
release_name = resolved_strings.get(row[release_key])
row_totals = totals[row["project_id"]].setdefault(
env_name, {"total_sessions": 0, "releases": defaultdict(int)} # type: ignore
)
Expand Down
58 changes: 57 additions & 1 deletion src/sentry/sentry_metrics/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from typing import Optional, Sequence, Union
from typing import Collection, Dict, Mapping, Optional, Sequence, Set, Union, cast

from sentry.api.utils import InvalidParams
from sentry.sentry_metrics import indexer
from sentry.sentry_metrics.configuration import UseCaseKey
from sentry.sentry_metrics.use_case_id_registry import UseCaseID

#: Special integer used to represent a string missing from the indexer
STRING_NOT_FOUND = -1
Expand All @@ -27,6 +28,48 @@ def reverse_resolve_tag_value(
return reverse_resolve(use_case_id, org_id, index)


def bulk_reverse_resolve_tag_value(
use_case_id: UseCaseID, org_id: int, indexes: Collection[Union[int, str, None]]
obostjancic marked this conversation as resolved.
Show resolved Hide resolved
) -> Mapping[Union[int, str], str]:
"""
Reverse resolves a mixture of indexes and strings in bulk

if the element is already a string it maps it to itself
if the element is None it ignores it
if the element is a positive integer it tries to resolve it
if the element is 0 or a negative number it ignores it

The result is a dictionary that is a mixture of strings and ints mapped to the resolved string,
which is either itself (in case of string keys) or the reverse_resolved string (in case of positive integers)

Example:
bulk_reverse_resolve_tag_value( UseCaseKey:PERFORMANCE, 1, [ -1, 0, 1, "some-string", "abc", 7, 33333])
would return something like this ( presuming that no string was found for 33333 )
{
1: "tag-a",
"some-string": "some-string",
"abc": "abc",
7: "tag-b",
}

"""
ret_val: Dict[Union[int, str], str] = {}

indexes_to_resolve: Set[int] = set()
for index in indexes:
if isinstance(index, str):
ret_val[index] = index # we already have a string no need to reverse resolve it
elif isinstance(index, int) and index > 0: # resolve valid int, do nothing for None
indexes_to_resolve.add(index)

resolved_indexes = cast(
Mapping[Union[int, str], str],
indexer.bulk_reverse_resolve(use_case_id, org_id, indexes_to_resolve),
)

return {**ret_val, **resolved_indexes}


def reverse_resolve(use_case_id: UseCaseKey, org_id: int, index: int) -> str:
assert index > 0
resolved = indexer.reverse_resolve(use_case_id, org_id, index)
Expand All @@ -37,6 +80,19 @@ def reverse_resolve(use_case_id: UseCaseKey, org_id: int, index: int) -> str:
return resolved


def bulk_reverse_resolve(
use_case_id: UseCaseID, org_id: int, indexes: Collection[int]
) -> Mapping[int, str]:

# de duplicate indexes
indexes_to_resolve = set()
for idx in indexes:
if idx > 0:
indexes_to_resolve.add(idx)

return indexer.bulk_reverse_resolve(use_case_id, org_id, indexes_to_resolve)


def reverse_resolve_weak(use_case_id: UseCaseKey, org_id: int, index: int) -> Optional[str]:
"""
Resolve an index value back to a string, special-casing 0 to return None.
Expand Down
49 changes: 31 additions & 18 deletions src/sentry/snuba/metrics/datasource.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

from sentry.sentry_metrics.use_case_id_registry import UseCaseID

"""
Module that gets both metadata and time series from Snuba.
For metadata, it fetch metrics metadata (metric names, tag names, tag values, ...) from snuba.
Expand Down Expand Up @@ -27,13 +29,17 @@
from sentry.sentry_metrics.configuration import UseCaseKey
from sentry.sentry_metrics.utils import (
MetricIndexNotFound,
bulk_reverse_resolve,
bulk_reverse_resolve_tag_value,
resolve_tag_key,
reverse_resolve,
reverse_resolve_tag_value,
)
from sentry.snuba.dataset import Dataset, EntityKey
from sentry.snuba.metrics.fields import run_metrics_query
from sentry.snuba.metrics.fields.base import get_derived_metrics, org_id_from_projects
from sentry.snuba.metrics.fields.base import (
SnubaDataType,
get_derived_metrics,
org_id_from_projects,
)
from sentry.snuba.metrics.naming_layer.mapping import get_all_mris, get_mri
from sentry.snuba.metrics.naming_layer.mri import MRI_SCHEMA_REGEX, is_custom_measurement, parse_mri
from sentry.snuba.metrics.query import Groupable, MetricField, MetricsQuery
Expand Down Expand Up @@ -70,7 +76,7 @@ def _get_metrics_for_entity(
org_id: int,
start: Optional[datetime] = None,
end: Optional[datetime] = None,
) -> Mapping[str, Any]:
) -> List[SnubaDataType]:
return run_metrics_query(
entity_key=entity_key,
select=[Column("metric_id")],
Expand Down Expand Up @@ -179,21 +185,26 @@ def get_custom_measurements(
organization_id: int,
start: Optional[datetime] = None,
end: Optional[datetime] = None,
use_case_id: UseCaseKey = UseCaseKey.PERFORMANCE,
use_case_id: UseCaseID = UseCaseID.TRANSACTIONS,
) -> Sequence[MetricMeta]:
assert project_ids

metrics_meta = []
for metric_type in CUSTOM_MEASUREMENT_DATASETS:
for row in _get_metrics_for_entity(
rows = _get_metrics_for_entity(
entity_key=METRIC_TYPE_TO_ENTITY[metric_type],
project_ids=project_ids,
org_id=organization_id,
start=start,
end=end,
):
mri = reverse_resolve(use_case_id, organization_id, row["metric_id"])
parsed_mri = parse_mri(mri)
)

mri_indexes = {row["metric_id"] for row in rows}
mris = bulk_reverse_resolve(use_case_id, organization_id, mri_indexes)

for row in rows:
mri_index = row.get("metric_id")
parsed_mri = parse_mri(mris.get(mri_index))
if parsed_mri is not None and is_custom_measurement(parsed_mri):
metrics_meta.append(
MetricMeta(
Expand Down Expand Up @@ -282,7 +293,6 @@ def _fetch_tags_or_values_for_metrics(
column: str,
use_case_id: UseCaseKey,
) -> Tuple[Union[Sequence[Tag], Sequence[TagValue]], Optional[str]]:

assert len({p.organization_id for p in projects}) == 1

metric_mris = [get_mri(metric_name) for metric_name in metric_names] if metric_names else []
Expand Down Expand Up @@ -371,6 +381,7 @@ def _fetch_tags_or_values_for_mri(
raise InvalidParams(error_str)

tag_or_value_id_lists = tag_or_value_ids_per_metric_id.values()
tag_or_value_ids: Set[Union[int, str, None]]
if metric_mris:
# If there are metric_ids that map to the metric_names provided as an arg that were not
# found in the dataset, then we raise an instance of InvalidParams exception
Expand Down Expand Up @@ -399,23 +410,25 @@ def _fetch_tags_or_values_for_mri(

if column.startswith(("tags[", "tags_raw[")):
tag_id = column.split("[")[1].split("]")[0]
resolved_ids = bulk_reverse_resolve_tag_value(
use_case_id, org_id, [int(tag_id), *tag_or_value_ids]
)
resolved_key = resolved_ids.get(int(tag_id))
tags_or_values = [
{
"key": reverse_resolve(use_case_id, org_id, int(tag_id)),
"value": reverse_resolve_tag_value(use_case_id, org_id, value_id),
"key": resolved_key,
"value": resolved_ids.get(value_id),
}
for value_id in tag_or_value_ids
]
tags_or_values.sort(key=lambda tag: (tag["key"], tag["value"]))
else:
tags_or_values = []
resolved_ids = bulk_reverse_resolve(use_case_id, org_id, tag_or_value_ids)
for tag_id in tag_or_value_ids:
try:
resolved = reverse_resolve(use_case_id, org_id, tag_id)
if resolved not in UNALLOWED_TAGS:
tags_or_values.append({"key": resolved})
except MetricIndexNotFound:
continue
resolved = resolved_ids.get(tag_id)
if resolved is not None and resolved not in UNALLOWED_TAGS:
tags_or_values.append({"key": resolved})

tags_or_values.sort(key=itemgetter("key"))

Expand Down
3 changes: 3 additions & 0 deletions src/sentry/utils/pytest/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ def control_metrics_access(monkeypatch, request, set_sentry_option):
monkeypatch.setattr(
"sentry.sentry_metrics.indexer.reverse_resolve", mock_indexer.reverse_resolve
)
monkeypatch.setattr(
"sentry.sentry_metrics.indexer.bulk_reverse_resolve", mock_indexer.bulk_reverse_resolve
)

old_resolve = indexer.resolve

Expand Down
10 changes: 5 additions & 5 deletions tests/sentry/api/endpoints/test_organization_metric_tags.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import time
from typing import Collection
from unittest.mock import patch

import pytest

from sentry.sentry_metrics import indexer
from sentry.sentry_metrics.use_case_id_registry import UseCaseID
from sentry.sentry_metrics.utils import MetricIndexNotFound
from sentry.snuba.metrics.naming_layer import get_mri
from sentry.snuba.metrics.naming_layer.mri import SessionMRI
from sentry.snuba.metrics.naming_layer.public import SessionMetricKey
Expand All @@ -19,8 +19,8 @@
pytestmark = pytest.mark.sentry_metrics


def mocked_reverse_resolve(use_case_id, org_id: int, index: int):
raise MetricIndexNotFound()
def mocked_bulk_reverse_resolve(use_case_id, org_id: int, ids: Collection[int]):
return {}


@region_silo_test(stable=True)
Expand Down Expand Up @@ -98,8 +98,8 @@ def test_mixed_metric_identifiers(self):
assert response.data == []

@patch(
"sentry.snuba.metrics.datasource.reverse_resolve",
mocked_reverse_resolve,
"sentry.snuba.metrics.datasource.bulk_reverse_resolve",
mocked_bulk_reverse_resolve,
)
def test_unknown_tag(self):
response = self.get_success_response(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
)
from sentry.sentry_metrics import indexer
from sentry.sentry_metrics.configuration import UseCaseKey
from sentry.sentry_metrics.use_case_id_registry import UseCaseID
from sentry.snuba.metrics import (
MAX_POINTS,
MetricConditionField,
Expand Down Expand Up @@ -2063,7 +2064,7 @@ def test_simple(self):
project_ids=[self.project.id],
organization_id=self.organization.id,
start=self.day_ago,
use_case_id=UseCaseKey.PERFORMANCE,
use_case_id=UseCaseID.TRANSACTIONS,
)
assert result == [
{
Expand Down Expand Up @@ -2116,7 +2117,7 @@ def test_metric_outside_query_daterange(self):
project_ids=[self.project.id],
organization_id=self.organization.id,
start=self.day_ago,
use_case_id=UseCaseKey.PERFORMANCE,
use_case_id=UseCaseID.TRANSACTIONS,
)

assert result == [
Expand Down