add normalization tests

getsentry · Aug 7, 2024 · a6b1002 · a6b1002
1 parent 37e53c5
commit a6b1002
Show file tree

Hide file tree

Showing 2 changed files with 108 additions and 52 deletions.
diff --git a/relay-metrics/src/aggregator.rs b/relay-metrics/src/aggregator.rs
@@ -881,7 +881,7 @@ mod tests {
         Bucket {
             timestamp,
             width: 0,
-            name: "c:transactions/foo".into(),
+            name: "c:transactions/foo@none".into(),
             value: BucketValue::counter(42.into()),
             tags: BTreeMap::new(),
             metadata: BucketMetadata::new(timestamp),
@@ -1387,52 +1387,7 @@ mod tests {
     }
 
     #[test]
-    fn test_validate_bucket_key_chars() {
-        let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap();
-
-        let bucket_key = BucketKey {
-            project_key,
-            timestamp: UnixTimestamp::now(),
-            metric_name: "c:transactions/hergus.bergus".into(),
-            tags: {
-                let mut tags = BTreeMap::new();
-                // There are some SDKs which mess up content encodings, and interpret the raw bytes
-                // of an UTF-16 string as UTF-8. Leading to ASCII
-                // strings getting null-bytes interleaved.
-                //
-                // Somehow those values end up as release tag in sessions, while in error events we
-                // haven't observed this malformed encoding. We believe it's slightly better to
-                // strip out NUL-bytes instead of dropping the tag such that those values line up
-                // again across sessions and events. Should that cause too high cardinality we'll
-                // have to drop tags.
-                //
-                // Note that releases are validated separately against much stricter character set,
-                // but the above idea should still apply to other tags.
-                tags.insert(
-                    "is_it_garbage".to_owned(),
-                    "a\0b\0s\0o\0l\0u\0t\0e\0l\0y".to_owned(),
-                );
-                tags.insert("another\0garbage".to_owned(), "bye".to_owned());
-                tags
-            },
-            extracted_from_indexed: false,
-        };
-
-        let mut bucket_key = validate_bucket_key(bucket_key, &test_config()).unwrap();
-
-        assert_eq!(bucket_key.tags.len(), 1);
-        assert_eq!(
-            bucket_key.tags.get("is_it_garbage"),
-            Some(&"absolutely".to_owned())
-        );
-        assert_eq!(bucket_key.tags.get("another\0garbage"), None);
-
-        bucket_key.metric_name = "hergus\0bergus".into();
-        validate_bucket_key(bucket_key, &test_config()).unwrap_err();
-    }
-
-    #[test]
-    fn test_validate_bucket_key_str_lens() {
+    fn test_validate_bucket_key_str_length() {
         relay_test::setup();
         let project_key = ProjectKey::parse("a94ae32be2584e0bbd7a4cbb95971fee").unwrap();
 

diff --git a/relay-metrics/src/protocol.rs b/relay-metrics/src/protocol.rs
@@ -56,7 +56,7 @@ pub fn normalize_bucket(bucket: &mut Bucket) -> Result<(), NormalizationError> {
 /// Normalization includes expanding valid metric names without a namespace to the default
 /// namespace.
 ///
-/// Invalid metric names are rejected with [`Error`].
+/// Invalid metric names are rejected with [`NormalizationError`].
 fn normalize_metric_name(name: &mut MetricName) -> Result<(), NormalizationError> {
     *name = match MetricResourceIdentifier::parse(name) {
         Ok(mri) => {
@@ -86,7 +86,7 @@ fn normalize_metric_tags(tags: &mut MetricTags) {
             return false;
         }
 
-        validate_tag_value(tag_value);
+        normalize_tag_value(tag_value);
 
         true
     });
@@ -143,15 +143,15 @@ pub(crate) fn escape_tag_value(raw: &str) -> String {
 /// [`validate_tag_value`].
 pub(crate) fn unescape_tag_value(escaped: &str) -> Result<String, UnescapeError> {
     let mut unescaped = unescaper::unescape(escaped)?;
-    validate_tag_value(&mut unescaped);
+    normalize_tag_value(&mut unescaped);
     Ok(unescaped)
 }
 
-/// Validates a tag value.
+/// Normalizes a tag value.
 ///
 /// Tag values are never entirely rejected, but invalid characters (control characters) are stripped
 /// out.
-pub(crate) fn validate_tag_value(tag_value: &mut String) {
+pub(crate) fn normalize_tag_value(tag_value: &mut String) {
     tag_value.retain(|c| !c.is_control());
 }
 
@@ -167,6 +167,10 @@ pub(crate) fn hash_set_value(string: &str) -> u32 {
 
 #[cfg(test)]
 mod tests {
+    use insta::assert_json_snapshot;
+
+    use crate::BucketValue;
+
     use super::*;
 
     #[test]
@@ -226,4 +230,101 @@ mod tests {
         assert_eq!(escape_tag_value("plain\u{07}text"), "plaintext");
         assert_eq!(escape_tag_value("plain\u{9c}text"), "plaintext");
     }
+
+    #[test]
+    fn test_normalize_invalid_name() {
+        let mut bucket = Bucket {
+            timestamp: UnixTimestamp::from_secs(5000),
+            width: 0,
+            name: "c:transactions/\0hergus.bergus@none".into(),
+            value: BucketValue::Counter(0.into()),
+            tags: Default::default(),
+            metadata: Default::default(),
+        };
+
+        assert!(matches!(
+            normalize_bucket(&mut bucket),
+            Err(NormalizationError::InvalidMetricName(_))
+        ));
+    }
+
+    #[test]
+    fn test_normalize_invalid_namespace() {
+        let mut bucket = Bucket {
+            timestamp: UnixTimestamp::from_secs(5000),
+            width: 0,
+            name: "c:lol/hergus.bergus@none".into(),
+            value: BucketValue::Counter(0.into()),
+            tags: Default::default(),
+            metadata: Default::default(),
+        };
+
+        assert!(matches!(
+            normalize_bucket(&mut bucket),
+            Err(NormalizationError::UnsupportedNamespace)
+        ));
+    }
+
+    #[test]
+    fn test_normalize_name() {
+        let mut bucket = Bucket {
+            timestamp: UnixTimestamp::from_secs(5000),
+            width: 0,
+            name: "c:hergus\0\0bergus".into(),
+            value: BucketValue::Counter(0.into()),
+            tags: Default::default(),
+            metadata: Default::default(),
+        };
+
+        normalize_bucket(&mut bucket).unwrap();
+
+        assert_eq!(&bucket.name, "c:custom/hergus_bergus@none");
+    }
+
+    #[test]
+    fn test_normalize_tag_key_chars() {
+        let mut bucket = Bucket {
+            timestamp: UnixTimestamp::from_secs(5000),
+            width: 0,
+            name: "c:transactions/hergus.bergus".into(),
+            value: BucketValue::Counter(0.into()),
+            tags: {
+                let mut tags = MetricTags::new();
+                // There are some SDKs which mess up content encodings, and interpret the raw bytes
+                // of an UTF-16 string as UTF-8. Leading to ASCII
+                // strings getting null-bytes interleaved.
+                //
+                // Somehow those values end up as release tag in sessions, while in error events we
+                // haven't observed this malformed encoding. We believe it's slightly better to
+                // strip out NUL-bytes instead of dropping the tag such that those values line up
+                // again across sessions and events. Should that cause too high cardinality we'll
+                // have to drop tags.
+                //
+                // Note that releases are validated separately against much stricter character set,
+                // but the above idea should still apply to other tags.
+                tags.insert(
+                    "is_it_garbage".to_owned(),
+                    "a\0b\0s\0o\0l\0u\0t\0e\0l\0y".to_owned(),
+                );
+                tags.insert("another\0garbage".to_owned(), "bye".to_owned());
+                tags
+            },
+            metadata: Default::default(),
+        };
+
+        normalize_bucket(&mut bucket).unwrap();
+
+        assert_json_snapshot!(bucket, @r###"
+        {
+          "timestamp": 5000,
+          "width": 0,
+          "name": "c:transactions/hergus.bergus@none",
+          "type": "c",
+          "value": 0.0,
+          "tags": {
+            "is_it_garbage": "absolutely"
+          }
+        }
+        "###);
+    }
 }