Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ref(generic-metrics): Migrate UseCaseKey to UseCaseID for organization metrics #51776

Merged
merged 17 commits into from
Jul 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions src/sentry/api/endpoints/organization_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from sentry.api.exceptions import ResourceDoesNotExist
from sentry.api.paginator import GenericOffsetPaginator
from sentry.api.utils import InvalidParams
from sentry.sentry_metrics.configuration import UseCaseKey
from sentry.sentry_metrics.use_case_id_registry import UseCaseID
from sentry.sentry_metrics.utils import string_to_use_case_id
from sentry.snuba.metrics import (
QueryDefinition,
get_metrics,
Expand All @@ -21,17 +22,18 @@
from sentry.utils.cursors import Cursor, CursorResult


def get_use_case_id(request: Request) -> UseCaseKey:
def get_use_case_id(request: Request) -> UseCaseID:
obostjancic marked this conversation as resolved.
Show resolved Hide resolved
"""
Get useCase from query params and validate it against UseCaseKey enum type
Get useCase from query params and validate it against UseCaseID enum type
Raise a ParseError if the use_case parameter is invalid.
"""

try:
return UseCaseKey(request.GET.get("useCase", "release-health"))
use_case_param = request.GET.get("useCase", "sessions")
return string_to_use_case_id(use_case_param)
except ValueError:
raise ParseError(
detail=f"Invalid useCase parameter. Please use one of: {[uc.value for uc in UseCaseKey]}"
detail=f"Invalid useCase parameter. Please use one of: {[uc.value for uc in UseCaseID]}"
)


Expand Down
4 changes: 2 additions & 2 deletions src/sentry/ingest/billing_metrics_consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
from django.conf import settings

from sentry.constants import DataCategory
from sentry.sentry_metrics.configuration import UseCaseKey
from sentry.sentry_metrics.indexer.strings import SHARED_TAG_STRINGS, TRANSACTION_METRICS_NAMES
from sentry.sentry_metrics.use_case_id_registry import UseCaseID
from sentry.sentry_metrics.utils import reverse_resolve_tag_value
from sentry.utils import json
from sentry.utils.kafka_config import get_kafka_consumer_cluster_options, get_topic_definition
Expand Down Expand Up @@ -143,7 +143,7 @@ def _has_profile(self, bucket: MetricsBucket) -> bool:
return bool(
(tag_value := bucket["tags"].get(self.profile_tag_key))
and "true"
== reverse_resolve_tag_value(UseCaseKey.PERFORMANCE, bucket["org_id"], tag_value)
== reverse_resolve_tag_value(UseCaseID.TRANSACTIONS, bucket["org_id"], tag_value)
)

def _produce_billing_outcomes(self, payload: MetricsBucket) -> None:
Expand Down
4 changes: 2 additions & 2 deletions src/sentry/release_health/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
)
from sentry.release_health.metrics_sessions_v2 import run_sessions_query
from sentry.sentry_metrics import indexer
from sentry.sentry_metrics.configuration import UseCaseKey
from sentry.sentry_metrics.use_case_id_registry import UseCaseID
from sentry.snuba.metrics import (
MetricField,
MetricGroupByField,
Expand Down Expand Up @@ -75,7 +75,7 @@
HOUR = MINUTE * 60
DAY = HOUR * 24
LEGACY_SESSIONS_DEFAULT_ROLLUP = HOUR
USE_CASE_ID = UseCaseKey.RELEASE_HEALTH
USE_CASE_ID = UseCaseID.SESSIONS

logger = logging.getLogger(__name__)

Expand Down
4 changes: 2 additions & 2 deletions src/sentry/release_health/metrics_sessions_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
SessionsQueryResult,
SessionsQueryValue,
)
from sentry.sentry_metrics.configuration import UseCaseKey
from sentry.sentry_metrics.use_case_id_registry import UseCaseID
from sentry.snuba.metrics import get_public_name_from_mri
from sentry.snuba.metrics.datasource import get_series
from sentry.snuba.metrics.naming_layer import SessionMRI
Expand Down Expand Up @@ -570,7 +570,7 @@ def run_sessions_query(
metrics_results = get_series(
projects,
metrics_query,
use_case_id=UseCaseKey.RELEASE_HEALTH,
use_case_id=UseCaseID.SESSIONS,
tenant_ids={"organization_id": org_id},
)
except OrderByNotSupportedOverCompositeEntityException:
Expand Down
7 changes: 3 additions & 4 deletions src/sentry/release_health/release_monitor/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

from sentry.release_health.release_monitor.base import BaseReleaseMonitorBackend, Totals
from sentry.sentry_metrics import indexer
from sentry.sentry_metrics.configuration import UseCaseKey
from sentry.sentry_metrics.indexer.strings import SESSION_METRIC_NAMES
from sentry.sentry_metrics.use_case_id_registry import UseCaseID
from sentry.sentry_metrics.utils import resolve_tag_key
Expand Down Expand Up @@ -103,9 +102,9 @@ def fetch_project_release_health_totals(
totals: Totals = defaultdict(dict)
with metrics.timer("release_monitor.fetch_project_release_health_totals.loop"):
while (time.time() - start_time) < self.MAX_SECONDS:
release_key = resolve_tag_key(UseCaseKey.RELEASE_HEALTH, org_id, "release")
release_key = resolve_tag_key(UseCaseID.SESSIONS, org_id, "release")
release_col = Column(release_key)
env_key = resolve_tag_key(UseCaseKey.RELEASE_HEALTH, org_id, "environment")
env_key = resolve_tag_key(UseCaseID.SESSIONS, org_id, "environment")
env_col = Column(env_key)
query = (
Query(
Expand Down Expand Up @@ -134,7 +133,7 @@ def fetch_project_release_health_totals(
Column("metric_id"),
Op.EQ,
indexer.resolve(
UseCaseKey.RELEASE_HEALTH, org_id, SessionMRI.SESSION.value
UseCaseID.SESSIONS, org_id, SessionMRI.SESSION.value
),
),
],
Expand Down
13 changes: 5 additions & 8 deletions src/sentry/search/events/builder/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
WhereType,
)
from sentry.sentry_metrics import indexer
from sentry.sentry_metrics.configuration import UseCaseKey
from sentry.sentry_metrics.use_case_id_registry import UseCaseID
from sentry.snuba.dataset import Dataset
from sentry.snuba.metrics.extraction import QUERY_HASH_KEY, OndemandMetricSpec
Expand Down Expand Up @@ -703,9 +702,9 @@ def run_query(self, referrer: str, use_cache: bool = False) -> Any:
metrics_data = get_series(
projects=self.params.projects,
metrics_query=metric_query,
use_case_id=UseCaseKey.PERFORMANCE
use_case_id=UseCaseID.TRANSACTIONS
if self.is_performance
else UseCaseKey.RELEASE_HEALTH,
else UseCaseID.SESSIONS,
include_meta=True,
tenant_ids=self.tenant_ids,
)
Expand Down Expand Up @@ -898,9 +897,7 @@ def get_snql_query(self) -> Request:
snuba_queries, _ = SnubaQueryBuilder(
projects=self.params.projects,
metrics_query=metrics_query,
use_case_id=UseCaseKey.PERFORMANCE
if self.is_performance
else UseCaseKey.RELEASE_HEALTH,
use_case_id=UseCaseID.TRANSACTIONS if self.is_performance else UseCaseID.SESSIONS,
).get_snuba_queries()

if len(snuba_queries) != 1:
Expand Down Expand Up @@ -1112,9 +1109,9 @@ def run_query(self, referrer: str, use_cache: bool = False) -> Any:
metrics_data = get_series(
projects=self.params.projects,
metrics_query=metric_query,
use_case_id=UseCaseKey.PERFORMANCE
use_case_id=UseCaseID.TRANSACTIONS
if self.is_performance
else UseCaseKey.RELEASE_HEALTH,
else UseCaseID.SESSIONS,
include_meta=True,
tenant_ids=self.tenant_ids,
)
Expand Down
6 changes: 3 additions & 3 deletions src/sentry/sentry_metrics/use_case_id_registry.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from __future__ import annotations

from enum import Enum
from typing import Mapping
from typing import Mapping, Optional

from sentry.sentry_metrics.configuration import UseCaseKey

Expand Down Expand Up @@ -40,5 +40,5 @@ class UseCaseID(Enum):
}


def get_metric_path_from_usecase(use_case: UseCaseID) -> UseCaseKey:
return METRIC_PATH_MAPPING[use_case]
def get_use_case_key(use_case_id: UseCaseID) -> Optional[UseCaseKey]:
return METRIC_PATH_MAPPING.get(use_case_id)
john-z-yang marked this conversation as resolved.
Show resolved Hide resolved
54 changes: 40 additions & 14 deletions src/sentry/sentry_metrics/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from sentry.api.utils import InvalidParams
from sentry.sentry_metrics import indexer
from sentry.sentry_metrics.configuration import UseCaseKey
from sentry.sentry_metrics.use_case_id_registry import UseCaseID
from sentry.sentry_metrics.indexer.base import to_use_case_id
from sentry.sentry_metrics.use_case_id_registry import METRIC_PATH_MAPPING, UseCaseID

#: Special integer used to represent a string missing from the indexer
STRING_NOT_FOUND = -1
Expand All @@ -16,9 +17,22 @@ class MetricIndexNotFound(InvalidParams):
pass


def string_to_use_case_id(value: str) -> UseCaseID:
try:
return UseCaseID(value)
except ValueError:
# param doesn't appear to be a UseCaseID try with the obsolete UseCaseKey
# will raise ValueError if it fails
return to_use_case_id(UseCaseKey(value))


def reverse_resolve_tag_value(
use_case_id: UseCaseKey, org_id: int, index: Union[int, str, None], weak: bool = False
use_case_id: Union[UseCaseID, UseCaseKey],
org_id: int,
Comment on lines 29 to +31
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would not do it here ( but at the call site... as high as possible).

I would leave reverse_resolve_tag_value, reverse_resolve, reverse_resolve_weak, resolve... to take only UseCaseID.

If we allow them to take either UseCaseKey or UseCaseID we just moved the problem from the indexer here ( an improvment but not by much).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with @RaduW on this. I would prefer to have all the downstream methods accept UseCaseId and perform coercions at top level if we don't want to also refactor there.

index: Union[int, str, None],
weak: bool = False,
) -> Optional[str]:
use_case_id = to_use_case_id(use_case_id)
if isinstance(index, str) or index is None:
return index
else:
Expand Down Expand Up @@ -70,8 +84,9 @@ def bulk_reverse_resolve_tag_value(
return {**ret_val, **resolved_indexes}


def reverse_resolve(use_case_id: UseCaseKey, org_id: int, index: int) -> str:
def reverse_resolve(use_case_id: Union[UseCaseID, UseCaseKey], org_id: int, index: int) -> str:
assert index > 0
use_case_id = to_use_case_id(use_case_id)
resolved = indexer.reverse_resolve(use_case_id, org_id, index)
# The indexer should never return None for integers > 0:
if resolved is None:
Expand All @@ -93,52 +108,61 @@ def bulk_reverse_resolve(
return indexer.bulk_reverse_resolve(use_case_id, org_id, indexes_to_resolve)


def reverse_resolve_weak(use_case_id: UseCaseKey, org_id: int, index: int) -> Optional[str]:
def reverse_resolve_weak(
use_case_id: Union[UseCaseID, UseCaseKey], org_id: int, index: int
) -> Optional[str]:
"""
Resolve an index value back to a string, special-casing 0 to return None.

This is useful in situations where a `GROUP BY tags[123]` clause produces a
tuple for metric buckets that are missing that tag, i.e. `tags[123] == 0`.
"""

use_case_id = to_use_case_id(use_case_id)
if index == TAG_NOT_SET:
return None

return reverse_resolve(use_case_id, org_id, index)


def resolve(
use_case_id: UseCaseKey,
use_case_id: Union[UseCaseID, UseCaseKey],
org_id: int,
string: str,
) -> int:
use_case_id = to_use_case_id(use_case_id)
resolved = indexer.resolve(use_case_id, org_id, string)
if resolved is None:
raise MetricIndexNotFound(f"Unknown string: {string!r}")

return resolved


def resolve_tag_key(use_case_id: UseCaseKey, org_id: int, string: str) -> str:
def resolve_tag_key(use_case_id: Union[UseCaseID, UseCaseKey], org_id: int, string: str) -> str:
use_case_id = to_use_case_id(use_case_id)
resolved = resolve(use_case_id, org_id, string)
assert use_case_id in (UseCaseKey.PERFORMANCE, UseCaseKey.RELEASE_HEALTH)
if use_case_id == UseCaseKey.PERFORMANCE:
assert isinstance(use_case_id, UseCaseID)
if METRIC_PATH_MAPPING[use_case_id] is UseCaseKey.PERFORMANCE:
return f"tags_raw[{resolved}]"
else:
return f"tags[{resolved}]"


def resolve_tag_value(use_case_id: UseCaseKey, org_id: int, string: str) -> Union[str, int]:
def resolve_tag_value(
use_case_id: Union[UseCaseID, UseCaseKey], org_id: int, string: str
) -> Union[str, int]:
use_case_id = to_use_case_id(use_case_id)
assert isinstance(string, str)
assert use_case_id in (UseCaseKey.PERFORMANCE, UseCaseKey.RELEASE_HEALTH)
if use_case_id == UseCaseKey.PERFORMANCE:
assert isinstance(use_case_id, UseCaseID)
if METRIC_PATH_MAPPING[use_case_id] is UseCaseKey.PERFORMANCE:
return string
return resolve_weak(use_case_id, org_id, string)


def resolve_tag_values(
use_case_id: UseCaseKey, org_id: int, strings: Sequence[str]
use_case_id: Union[UseCaseID, UseCaseKey], org_id: int, strings: Sequence[str]
) -> Sequence[Union[str, int]]:
use_case_id = to_use_case_id(use_case_id)
rv = []
for string in strings:
resolved = resolve_tag_value(use_case_id, org_id, string)
Expand All @@ -148,14 +172,15 @@ def resolve_tag_values(
return rv


def resolve_weak(use_case_id: UseCaseKey, org_id: int, string: str) -> int:
def resolve_weak(use_case_id: Union[UseCaseID, UseCaseKey], org_id: int, string: str) -> int:
"""
A version of `resolve` that returns -1 for missing values.

When using `resolve_weak` to produce a WHERE-clause, it is quite
useful to make the WHERE-clause "impossible" with `WHERE x = -1` instead of
explicitly handling that exception.
"""
use_case_id = to_use_case_id(use_case_id)
resolved = indexer.resolve(use_case_id, org_id, string)
if resolved is None:
return STRING_NOT_FOUND
Expand All @@ -164,12 +189,13 @@ def resolve_weak(use_case_id: UseCaseKey, org_id: int, string: str) -> int:


def resolve_many_weak(
use_case_id: UseCaseKey, org_id: int, strings: Sequence[str]
use_case_id: Union[UseCaseID, UseCaseKey], org_id: int, strings: Sequence[str]
) -> Sequence[int]:
"""
Resolve multiple values at once, omitting missing ones. This is useful in
the same way as `resolve_weak` is, e.g. `WHERE x in values`.
"""
use_case_id = to_use_case_id(use_case_id)
rv = []
for string in strings:
resolved = resolve_weak(use_case_id, org_id, string)
Expand Down
20 changes: 10 additions & 10 deletions src/sentry/snuba/entity_subscription.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from sentry.constants import CRASH_RATE_ALERT_AGGREGATE_ALIAS, CRASH_RATE_ALERT_SESSION_COUNT_ALIAS
from sentry.exceptions import InvalidQuerySubscription, UnsupportedQuerySubscription
from sentry.models import Environment, Organization
from sentry.sentry_metrics.configuration import UseCaseKey
from sentry.sentry_metrics.use_case_id_registry import UseCaseID
from sentry.sentry_metrics.utils import (
MetricIndexNotFound,
resolve,
Expand Down Expand Up @@ -324,29 +324,29 @@ def get_entity_extra_params(self) -> Mapping[str, Any]:
"granularity": self.get_granularity(),
}

def _get_use_case_key(self) -> UseCaseKey:
def _get_use_case_id(self) -> UseCaseID:
if self.dataset == Dataset.PerformanceMetrics:
return UseCaseKey.PERFORMANCE
return UseCaseID.TRANSACTIONS
else:
return UseCaseKey.RELEASE_HEALTH
return UseCaseID.SESSIONS

def resolve_tag_key_if_needed(self, string: str) -> str:
if self.use_metrics_layer:
return string

return resolve_tag_key(self._get_use_case_key(), self.org_id, string)
return resolve_tag_key(self._get_use_case_id(), self.org_id, string)

def resolve_tag_value_if_needed(self, string: str) -> Union[str, int]:
if self.use_metrics_layer:
return string

return resolve_tag_value(self._get_use_case_key(), self.org_id, string)
return resolve_tag_value(self._get_use_case_id(), self.org_id, string)

def resolve_tag_values_if_needed(self, strings: Sequence[str]) -> Sequence[Union[str, int]]:
if self.use_metrics_layer:
return strings

return resolve_tag_values(self._get_use_case_key(), self.org_id, strings)
return resolve_tag_values(self._get_use_case_id(), self.org_id, strings)

def _get_environment_condition(self, environment_name: str) -> Condition:
return Condition(
Expand Down Expand Up @@ -445,10 +445,10 @@ def translate_sessions_tag_keys_and_values(
value_col_name = alias if alias else "value"
try:
translated_data: Dict[str, Any] = {}
session_status = resolve_tag_key(UseCaseKey.RELEASE_HEALTH, org_id, "session.status")
session_status = resolve_tag_key(UseCaseID.SESSIONS, org_id, "session.status")
for row in data:
tag_value = reverse_resolve_tag_value(
UseCaseKey.RELEASE_HEALTH, org_id, row[session_status]
UseCaseID.SESSIONS, org_id, row[session_status]
)
if tag_value is None:
raise MetricIndexNotFound()
Expand Down Expand Up @@ -540,7 +540,7 @@ def get_snql_extra_conditions(self) -> List[Condition]:
Condition(
Column("metric_id"),
Op.EQ,
resolve(UseCaseKey.RELEASE_HEALTH, self.org_id, self.metric_key.value),
resolve(UseCaseID.SESSIONS, self.org_id, self.metric_key.value),
)
]

Expand Down
Loading
Loading