Skip to content

Commit

Permalink
feat(metric-meta): Normalize invalid metric names (#2769)
Browse files Browse the repository at this point in the history
Sentry currently generates metrics with dashes in their names. Generally
we want to normalize invalid metric names instead of dropping them. The
Python SDK already has this logic, we're now also implementing it in
relay.
Consecutive invalid characters will be replaced with a single
underscore. Metric names still need to start with a valid character.
  • Loading branch information
Dav1dde authored Nov 27, 2023
1 parent 9c571cf commit b5a2c23
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 24 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

- `normalize_performance_score` now handles `PerformanceScoreProfile` configs with zero weight components and component weight sums of any number greater than 0. ([#2756](https://github.com/getsentry/relay/pull/2756))

**Features**:

- Normalize invalid metric names. ([#2769](https://github.com/getsentry/relay/pull/2769))

**Internal**:

- Add support for metric metadata. ([#2751](https://github.com/getsentry/relay/pull/2751))
Expand Down
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions relay-base-schema/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ license-file = "../LICENSE.md"
publish = false

[dependencies]
regex = { workspace = true }
relay-common = { path = "../relay-common" }
relay-protocol = { path = "../relay-protocol" }
schemars = { workspace = true, optional = true }
Expand Down
30 changes: 20 additions & 10 deletions relay-base-schema/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,31 @@
//! Type definitions for Sentry metrics.

use std::fmt;
use regex::Regex;
use std::{borrow::Cow, fmt, sync::OnceLock};

use relay_protocol::{Annotated, Empty, ErrorKind, FromValue, IntoValue, SkipSerialization, Value};

/// Validates a metric name. This is the statsd name, i.e. without type or unit.
/// Validates a metric name and normalizes it. This is the statsd name, i.e. without type or unit.
///
/// Metric names cannot be empty, must begin with a letter and can consist of ASCII alphanumerics,
/// underscores, slashes and periods.
pub fn is_valid_metric_name(name: &str) -> bool {
let mut iter = name.as_bytes().iter();
if let Some(first_byte) = iter.next() {
if first_byte.is_ascii_alphabetic() {
return iter.all(|b| b.is_ascii_alphanumeric() || matches!(b, b'.' | b'_' | b'/'));
}
/// underscores, dashes, slashes and periods.
///
/// The function validates that the first character of the metric must be ASCII alphanumeric,
/// later consecutive invalid characters in the name will be replaced with underscores.
pub fn try_normalize_metric_name(name: &str) -> Option<Cow<'_, str>> {
static NORMALIZE_RE: OnceLock<Regex> = OnceLock::new();

if !can_be_valid_metric_name(name) {
return None;
}
false

let normalize_re = NORMALIZE_RE.get_or_init(|| Regex::new("[^a-zA-Z0-9_/.]+").unwrap());
Some(normalize_re.replace_all(name, "_"))
}

/// Returns whether [`try_normalize_metric_name`] can normalize the passed name.
pub fn can_be_valid_metric_name(name: &str) -> bool {
name.starts_with(|c: char| c.is_ascii_alphabetic())
}

/// The unit of measurement of a metric value.
Expand Down
6 changes: 4 additions & 2 deletions relay-event-normalization/src/normalize/processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ use std::mem;
use std::ops::Range;

use chrono::{DateTime, Duration, Utc};
use relay_base_schema::metrics::{is_valid_metric_name, DurationUnit, FractionUnit, MetricUnit};
use relay_base_schema::metrics::{
can_be_valid_metric_name, DurationUnit, FractionUnit, MetricUnit,
};
use relay_common::time::UnixTimestamp;
use relay_event_schema::processor::{
self, MaxChars, ProcessingAction, ProcessingResult, ProcessingState, Processor,
Expand Down Expand Up @@ -959,7 +961,7 @@ fn remove_invalid_measurements(
None => return false,
};

if !is_valid_metric_name(name) {
if !can_be_valid_metric_name(name) {
meta.add_error(Error::invalid(format!(
"Metric name contains invalid characters: \"{name}\""
)));
Expand Down
4 changes: 2 additions & 2 deletions relay-metrics/src/bucket.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1051,8 +1051,8 @@ mod tests {
fn test_parse_invalid_name() {
let s = "foo#bar:42|c";
let timestamp = UnixTimestamp::from_secs(4711);
let metric = Bucket::parse(s.as_bytes(), timestamp);
assert!(metric.is_err());
let metric = Bucket::parse(s.as_bytes(), timestamp).unwrap();
assert_eq!(metric.name, "c:custom/foo_bar@none");
}

#[test]
Expand Down
68 changes: 58 additions & 10 deletions relay-metrics/src/protocol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,14 +269,18 @@ impl<'a> MetricResourceIdentifier<'a> {
) -> Result<Self, ParseMetricError> {
let (name_and_namespace, unit) = parse_name_unit(string).ok_or(ParseMetricError(()))?;

let (raw_namespace, name) = name_and_namespace
.split_once('/')
.unwrap_or(("custom", name_and_namespace));
let (namespace, name) = match name_and_namespace.split_once('/') {
Some((raw_namespace, name)) => (raw_namespace.parse()?, name),
None => (MetricNamespace::Custom, name_and_namespace),
};

let name = relay_base_schema::metrics::try_normalize_metric_name(name)
.ok_or(ParseMetricError(()))?;

Ok(MetricResourceIdentifier {
ty,
name: Cow::Borrowed(name),
namespace: raw_namespace.parse()?,
name,
namespace,
unit,
})
}
Expand Down Expand Up @@ -350,14 +354,11 @@ pub(crate) fn validate_tag_value(tag_value: &mut String) {

/// Parses the `name[@unit]` part of a metric string.
///
/// Returns [`MetricUnit::None`] if no unit is specified. Returns `None` if the name or value are
/// invalid.
/// Returns [`MetricUnit::None`] if no unit is specified. Returns `None` if value is invalid.
/// The name is not normalized.
fn parse_name_unit(string: &str) -> Option<(&str, MetricUnit)> {
let mut components = string.split('@');
let name = components.next()?;
if !relay_base_schema::metrics::is_valid_metric_name(name) {
return None;
}

let unit = match components.next() {
Some(s) => s.parse().ok()?,
Expand Down Expand Up @@ -448,6 +449,53 @@ mod tests {
assert!(MetricResourceIdentifier::parse("foo").is_err());
}

#[test]
fn test_invalid_names_should_normalize() {
assert_eq!(
MetricResourceIdentifier::parse("c:f?o").unwrap().name,
"f_o"
);
assert_eq!(
MetricResourceIdentifier::parse("c:f??o").unwrap().name,
"f_o"
);
assert_eq!(
MetricResourceIdentifier::parse("c:föo").unwrap().name,
"f_o"
);
assert_eq!(
MetricResourceIdentifier::parse("c:custom/f?o")
.unwrap()
.name,
"f_o"
);
assert_eq!(
MetricResourceIdentifier::parse("c:custom/f??o")
.unwrap()
.name,
"f_o"
);
assert_eq!(
MetricResourceIdentifier::parse("c:custom/föo")
.unwrap()
.name,
"f_o"
);
}

#[test]
fn test_normalize_dash_to_underscore() {
assert_eq!(
MetricResourceIdentifier::parse("d:foo.bar.blob-size@second").unwrap(),
MetricResourceIdentifier {
ty: MetricType::Distribution,
namespace: MetricNamespace::Custom,
name: "foo.bar.blob_size".into(),
unit: MetricUnit::Duration(DurationUnit::Second),
},
);
}

#[test]
fn test_deserialize_mri() {
assert_eq!(
Expand Down

0 comments on commit b5a2c23

Please sign in to comment.