Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ref(ddm): Dual write to Sentry metrics as well as Datadog #5474

Merged
merged 6 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions snuba/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@ def setup_sentry() -> None:
release=os.getenv("SNUBA_RELEASE"),
traces_sample_rate=settings.SENTRY_TRACE_SAMPLE_RATE,
profiles_sample_rate=settings.SNUBA_PROFILES_SAMPLE_RATE,
_experiments={
# Turns on the metrics module
"enable_metrics": True,
# Enables sending of code locations for metrics
"metric_code_locations": True,
},
)

from snuba.utils.profiler import run_ondemand_profiler
Expand Down
35 changes: 32 additions & 3 deletions snuba/utils/metrics/backends/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ class MetricsBackend(ABC):

@abstractmethod
def increment(
self, name: str, value: Union[int, float] = 1, tags: Optional[Tags] = None
self,
name: str,
value: Union[int, float] = 1,
tags: Optional[Tags] = None,
unit: Optional[str] = None,
) -> None:
"""
Increment a counter metric. These increments can also be
Expand All @@ -27,7 +31,11 @@ def increment(

@abstractmethod
def gauge(
self, name: str, value: Union[int, float], tags: Optional[Tags] = None
self,
name: str,
value: Union[int, float],
tags: Optional[Tags] = None,
unit: Optional[str] = None,
) -> None:
"""
Emit a metric that is the authoritative value for a quantity at a point in time
Expand All @@ -40,7 +48,11 @@ def gauge(

@abstractmethod
def timing(
self, name: str, value: Union[int, float], tags: Optional[Tags] = None
self,
name: str,
value: Union[int, float],
tags: Optional[Tags] = None,
unit: Optional[str] = None,
) -> None:
"""
Emit a metric for the timing performance of an operation.
Expand All @@ -51,6 +63,23 @@ def timing(
"""
raise NotImplementedError

@abstractmethod
def distribution(
self,
name: str,
value: Union[int, float],
tags: Optional[Tags] = None,
unit: Optional[str] = None,
) -> None:
"""
Emit a metric for the performance of an operation.

Example:

metrics.distribution("request.size", request_size_in_bytes)
"""
raise NotImplementedError

@abstractmethod
def events(
self,
Expand Down
32 changes: 29 additions & 3 deletions snuba/utils/metrics/backends/datadog.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,11 @@ def __normalize_tags(self, tags: Optional[Tags]) -> Optional[Sequence[str]]:
return [f"{key}:{value.replace('|', '_')}" for key, value in tags.items()]

def increment(
self, name: str, value: Union[int, float] = 1, tags: Optional[Tags] = None
self,
name: str,
value: Union[int, float] = 1,
tags: Optional[Tags] = None,
unit: Optional[str] = None,
) -> None:
self.__client.increment(
name,
Expand All @@ -57,7 +61,11 @@ def increment(
)

def gauge(
self, name: str, value: Union[int, float], tags: Optional[Tags] = None
self,
name: str,
value: Union[int, float],
tags: Optional[Tags] = None,
unit: Optional[str] = None,
) -> None:
self.__client.gauge(
name,
Expand All @@ -67,7 +75,11 @@ def gauge(
)

def timing(
self, name: str, value: Union[int, float], tags: Optional[Tags] = None
self,
name: str,
value: Union[int, float],
tags: Optional[Tags] = None,
unit: Optional[str] = None,
) -> None:
self.__client.timing(
name,
Expand All @@ -76,6 +88,20 @@ def timing(
sample_rate=self.__sample_rates.get(name, 1.0),
)

def distribution(
self,
name: str,
value: Union[int, float],
tags: Optional[Tags] = None,
unit: Optional[str] = None,
) -> None:
self.__client.distribution(
name,
value,
tags=self.__normalize_tags(tags),
sample_rate=self.__sample_rates.get(name, 1.0),
)

def events(
self,
title: str,
Expand Down
80 changes: 80 additions & 0 deletions snuba/utils/metrics/backends/dualwrite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from __future__ import annotations

import random

from snuba import settings, state
from snuba.utils.metrics.backends.abstract import MetricsBackend
from snuba.utils.metrics.backends.datadog import DatadogMetricsBackend
from snuba.utils.metrics.backends.sentry import SentryMetricsBackend
from snuba.utils.metrics.types import Tags


class SentryDatadogMetricsBackend(MetricsBackend):
"""
A metrics backend that records metrics to Sentry and Datadog.
"""

def __init__(
self, datadog: DatadogMetricsBackend, sentry: SentryMetricsBackend
) -> None:
self.datadog = datadog
self.sentry = sentry

def _use_sentry(self) -> bool:
if state.get_config("use_sentry_metrics", "0") == "1":
return bool(random.random() < settings.DDM_METRICS_SAMPLE_RATE)
return False

def increment(
self,
name: str,
value: int | float = 1,
tags: Tags | None = None,
unit: str | None = None,
) -> None:
self.datadog.increment(name, value, tags, unit)
if self._use_sentry():
self.sentry.increment(name, value, tags, unit)

def gauge(
self,
name: str,
value: int | float,
tags: Tags | None = None,
unit: str | None = None,
) -> None:
self.datadog.gauge(name, value, tags, unit)
if self._use_sentry():
self.sentry.gauge(name, value, tags, unit)

def timing(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

timings are distributions -- i don't think we need both. the implementation in the SDK is identical

Copy link
Member Author

@evanh evanh Jan 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They're not identical, because in the SDK they accept different units. The timing ones specifically only accept timing units, and distributions are more flexible.

So we should be using distribution when we are logging things that aren't timing based. See https://github.com/getsentry/snuba/blob/master/snuba/clickhouse/http.py#L253 for an example that should be changed to distribution.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we have a default impl for timing that uses distribution internally or the other way around? The impl really is identical in the SDK (the only thing that differs is type hints and return value) -- i get that you want to have different defaults for unit in each case

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not both? Both exist in the Sentry SDK and in Datadog. Why not use the SDKs as intended?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because in both SDKs they're literally just aliases and now you're forcing every backend to implement those aliases.

the purpose of sentry_sdk.metrics.timing is to provide a decorator and context manager to measure a codeblock -- we're not using that

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

forcing every backend to implement those aliases.

This is true, but we are missing the distribution functionality from all our backends, so I would have to add that no matter what (timing is a subset of distribution). Why not leave timing in place, add distributions, and then our backends line up with both of the production SDKs (DD and Sentry).

literally just aliases

If the aliases are already available, why not leverage them? Why in turn make timing another alias on distribution? The DD and Sentry SDKs already provide that alias for us.

Copy link
Member

@untitaker untitaker Jan 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would have to add that no matter what

you really don't, you could continue to use timing or rename it to distribution if you want to. timing is not a subset of distribution in master branch, it's just distribution with maybe an unfortunate (not-technically-correct) name. the semantic difference was created by adding unit to the interface. i don't see the point in adding more methods that do the same thing but only differ in their default for unit, but I don't feel too strongly about all of this so i'm approving

self,
name: str,
value: int | float,
tags: Tags | None = None,
unit: str | None = None,
) -> None:
self.datadog.timing(name, value, tags, unit)
if self._use_sentry():
self.sentry.timing(name, value, tags, unit)

def distribution(
self,
name: str,
value: int | float,
tags: Tags | None = None,
unit: str | None = None,
) -> None:
self.datadog.distribution(name, value, tags, unit)
if self._use_sentry():
self.sentry.distribution(name, value, tags, unit)

def events(
self,
title: str,
text: str,
alert_type: str,
priority: str,
tags: Tags | None = None,
) -> None:
self.datadog.events(title, text, alert_type, priority, tags)
31 changes: 28 additions & 3 deletions snuba/utils/metrics/backends/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,11 @@ def __validate_tags(self, tags: Tags) -> None:
assert isinstance(v, str)

def increment(
self, name: str, value: Union[int, float] = 1, tags: Optional[Tags] = None
self,
name: str,
value: Union[int, float] = 1,
tags: Optional[Tags] = None,
unit: Optional[str] = None,
) -> None:
if self.__strict:
assert isinstance(name, str)
Expand All @@ -35,7 +39,11 @@ def increment(
self.__validate_tags(tags)

def gauge(
self, name: str, value: Union[int, float], tags: Optional[Tags] = None
self,
name: str,
value: Union[int, float],
tags: Optional[Tags] = None,
unit: Optional[str] = None,
) -> None:
if self.__strict:
assert isinstance(name, str)
Expand All @@ -44,7 +52,24 @@ def gauge(
self.__validate_tags(tags)

def timing(
self, name: str, value: Union[int, float], tags: Optional[Tags] = None
self,
name: str,
value: Union[int, float],
tags: Optional[Tags] = None,
unit: Optional[str] = None,
) -> None:
if self.__strict:
assert isinstance(name, str)
assert isinstance(value, (int, float))
if tags is not None:
self.__validate_tags(tags)

def distribution(
self,
name: str,
value: Union[int, float],
tags: Optional[Tags] = None,
unit: Optional[str] = None,
) -> None:
if self.__strict:
assert isinstance(name, str)
Expand Down
62 changes: 62 additions & 0 deletions snuba/utils/metrics/backends/sentry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from __future__ import annotations

from sentry_sdk import metrics

from snuba.utils.metrics.backends.abstract import MetricsBackend
from snuba.utils.metrics.types import Tags


class SentryMetricsBackend(MetricsBackend):
"""
A metrics backend that records metrics to Sentry.
"""

def __init__(self) -> None:
return None # Sentry doesn't require any setup

def increment(
self,
name: str,
value: int | float = 1,
tags: Tags | None = None,
unit: str | None = None,
) -> None:
metrics.incr(name, value, unit or "none", tags)

def gauge(
self,
name: str,
value: int | float,
tags: Tags | None = None,
unit: str | None = None,
) -> None:
metrics.gauge(name, value, unit or "none", tags)

def timing(
self,
name: str,
value: int | float,
tags: Tags | None = None,
unit: str | None = None,
) -> None:
# The Sentry SDK has strict typing on the unit, so it doesn't allow passing arbitrary units
metrics.timing(name, value, unit or "millisecond", tags) # type: ignore

def distribution(
self,
name: str,
value: int | float,
tags: Tags | None = None,
unit: str | None = None,
) -> None:
metrics.distribution(name, value, unit or "none", tags)

def events(
self,
title: str,
text: str,
alert_type: str,
priority: str,
tags: Tags | None = None,
) -> None:
return None # Sentry doesn't support events
Loading
Loading