diff --git a/relay-metrics/src/aggregator.rs b/relay-metrics/src/aggregator.rs index 3e20c9fd45..1065c059d0 100644 --- a/relay-metrics/src/aggregator.rs +++ b/relay-metrics/src/aggregator.rs @@ -881,7 +881,7 @@ mod tests { Bucket { timestamp, width: 0, - name: "c:transactions/foo".into(), + name: "c:transactions/foo@none".into(), value: BucketValue::counter(42.into()), tags: BTreeMap::new(), metadata: BucketMetadata::new(timestamp), @@ -1387,52 +1387,7 @@ mod tests { } #[test] - fn test_validate_bucket_key_chars() { - let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); - - let bucket_key = BucketKey { - project_key, - timestamp: UnixTimestamp::now(), - metric_name: "c:transactions/hergus.bergus".into(), - tags: { - let mut tags = BTreeMap::new(); - // There are some SDKs which mess up content encodings, and interpret the raw bytes - // of an UTF-16 string as UTF-8. Leading to ASCII - // strings getting null-bytes interleaved. - // - // Somehow those values end up as release tag in sessions, while in error events we - // haven't observed this malformed encoding. We believe it's slightly better to - // strip out NUL-bytes instead of dropping the tag such that those values line up - // again across sessions and events. Should that cause too high cardinality we'll - // have to drop tags. - // - // Note that releases are validated separately against much stricter character set, - // but the above idea should still apply to other tags. - tags.insert( - "is_it_garbage".to_owned(), - "a\0b\0s\0o\0l\0u\0t\0e\0l\0y".to_owned(), - ); - tags.insert("another\0garbage".to_owned(), "bye".to_owned()); - tags - }, - extracted_from_indexed: false, - }; - - let mut bucket_key = validate_bucket_key(bucket_key, &test_config()).unwrap(); - - assert_eq!(bucket_key.tags.len(), 1); - assert_eq!( - bucket_key.tags.get("is_it_garbage"), - Some(&"absolutely".to_owned()) - ); - assert_eq!(bucket_key.tags.get("another\0garbage"), None); - - bucket_key.metric_name = "hergus\0bergus".into(); - validate_bucket_key(bucket_key, &test_config()).unwrap_err(); - } - - #[test] - fn test_validate_bucket_key_str_lens() { + fn test_validate_bucket_key_str_length() { relay_test::setup(); let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap(); diff --git a/relay-metrics/src/protocol.rs b/relay-metrics/src/protocol.rs index 87db66c7dc..8ef5962ad6 100644 --- a/relay-metrics/src/protocol.rs +++ b/relay-metrics/src/protocol.rs @@ -56,7 +56,7 @@ pub fn normalize_bucket(bucket: &mut Bucket) -> Result<(), NormalizationError> { /// Normalization includes expanding valid metric names without a namespace to the default /// namespace. /// -/// Invalid metric names are rejected with [`Error`]. +/// Invalid metric names are rejected with [`NormalizationError`]. fn normalize_metric_name(name: &mut MetricName) -> Result<(), NormalizationError> { *name = match MetricResourceIdentifier::parse(name) { Ok(mri) => { @@ -86,7 +86,7 @@ fn normalize_metric_tags(tags: &mut MetricTags) { return false; } - validate_tag_value(tag_value); + normalize_tag_value(tag_value); true }); @@ -143,15 +143,15 @@ pub(crate) fn escape_tag_value(raw: &str) -> String { /// [`validate_tag_value`]. pub(crate) fn unescape_tag_value(escaped: &str) -> Result { let mut unescaped = unescaper::unescape(escaped)?; - validate_tag_value(&mut unescaped); + normalize_tag_value(&mut unescaped); Ok(unescaped) } -/// Validates a tag value. +/// Normalizes a tag value. /// /// Tag values are never entirely rejected, but invalid characters (control characters) are stripped /// out. -pub(crate) fn validate_tag_value(tag_value: &mut String) { +pub(crate) fn normalize_tag_value(tag_value: &mut String) { tag_value.retain(|c| !c.is_control()); } @@ -167,6 +167,10 @@ pub(crate) fn hash_set_value(string: &str) -> u32 { #[cfg(test)] mod tests { + use insta::assert_json_snapshot; + + use crate::BucketValue; + use super::*; #[test] @@ -226,4 +230,101 @@ mod tests { assert_eq!(escape_tag_value("plain\u{07}text"), "plaintext"); assert_eq!(escape_tag_value("plain\u{9c}text"), "plaintext"); } + + #[test] + fn test_normalize_invalid_name() { + let mut bucket = Bucket { + timestamp: UnixTimestamp::from_secs(5000), + width: 0, + name: "c:transactions/\0hergus.bergus@none".into(), + value: BucketValue::Counter(0.into()), + tags: Default::default(), + metadata: Default::default(), + }; + + assert!(matches!( + normalize_bucket(&mut bucket), + Err(NormalizationError::InvalidMetricName(_)) + )); + } + + #[test] + fn test_normalize_invalid_namespace() { + let mut bucket = Bucket { + timestamp: UnixTimestamp::from_secs(5000), + width: 0, + name: "c:lol/hergus.bergus@none".into(), + value: BucketValue::Counter(0.into()), + tags: Default::default(), + metadata: Default::default(), + }; + + assert!(matches!( + normalize_bucket(&mut bucket), + Err(NormalizationError::UnsupportedNamespace) + )); + } + + #[test] + fn test_normalize_name() { + let mut bucket = Bucket { + timestamp: UnixTimestamp::from_secs(5000), + width: 0, + name: "c:hergus\0\0bergus".into(), + value: BucketValue::Counter(0.into()), + tags: Default::default(), + metadata: Default::default(), + }; + + normalize_bucket(&mut bucket).unwrap(); + + assert_eq!(&bucket.name, "c:custom/hergus_bergus@none"); + } + + #[test] + fn test_normalize_tag_key_chars() { + let mut bucket = Bucket { + timestamp: UnixTimestamp::from_secs(5000), + width: 0, + name: "c:transactions/hergus.bergus".into(), + value: BucketValue::Counter(0.into()), + tags: { + let mut tags = MetricTags::new(); + // There are some SDKs which mess up content encodings, and interpret the raw bytes + // of an UTF-16 string as UTF-8. Leading to ASCII + // strings getting null-bytes interleaved. + // + // Somehow those values end up as release tag in sessions, while in error events we + // haven't observed this malformed encoding. We believe it's slightly better to + // strip out NUL-bytes instead of dropping the tag such that those values line up + // again across sessions and events. Should that cause too high cardinality we'll + // have to drop tags. + // + // Note that releases are validated separately against much stricter character set, + // but the above idea should still apply to other tags. + tags.insert( + "is_it_garbage".to_owned(), + "a\0b\0s\0o\0l\0u\0t\0e\0l\0y".to_owned(), + ); + tags.insert("another\0garbage".to_owned(), "bye".to_owned()); + tags + }, + metadata: Default::default(), + }; + + normalize_bucket(&mut bucket).unwrap(); + + assert_json_snapshot!(bucket, @r###" + { + "timestamp": 5000, + "width": 0, + "name": "c:transactions/hergus.bergus@none", + "type": "c", + "value": 0.0, + "tags": { + "is_it_garbage": "absolutely" + } + } + "###); + } }