From bc70b9e3401bccb69e362acf16b9e6bf630aa3a2 Mon Sep 17 00:00:00 2001 From: Ash <0Calories@users.noreply.github.com> Date: Wed, 30 Oct 2024 11:17:26 -0400 Subject: [PATCH] ref(mongodb): Change grouping strategy for arrays and `buffer` (#4186) Couple of changes here: - Arrays in MongoDB queries were problematic for grouping, because any variant of the same query that has a different number of elements would result in an entirely **new group** being created. This resulted in high cardinality for a few orgs - Removes the 'buffer' key-value pair from queries, since this is an under the hood mechanism which has very little relevance to the query itself. This causes unnecessary noise and makes queries hard to read, so we can scrub them out entirely ### Examples ![image](https://github.com/user-attachments/assets/d360e760-413e-424e-b5f6-85b70684844b) This would become: ``` "_id": {} ``` ![image](https://github.com/user-attachments/assets/93c73cfc-a6b5-4a56-bc85-e858c31dbe21) This would become: ``` "filter": { "_id": { "$in": ["..."] } } ``` --- CHANGELOG.md | 1 + .../src/normalize/span/description/mod.rs | 30 +++++++++++++++---- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fec1353e90..bb3310320e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - Add a metric that counts span volume in the root project for dynamic sampling (`c:spans/count_per_root_project@none`). ([#4134](https://github.com/getsentry/relay/pull/4134)) - Add a tag `target_project_id` to both root project metrics for dynamic sampling (`c:transactions/count_per_root_project@none` and `c:spans/count_per_root_project@none`) which shows the flow trace traffic from root to target projects. ([#4170](https://github.com/getsentry/relay/pull/4170)) +- Remove `buffer` entries and scrub array contents from MongoDB queries. ([#4186](https://github.com/getsentry/relay/pull/4186)) - Use `DateTime` instead of `Instant` for tracking the received time of the `Envelope`. ([#4184](https://github.com/getsentry/relay/pull/4184)) ## 24.10.0 diff --git a/relay-event-normalization/src/normalize/span/description/mod.rs b/relay-event-normalization/src/normalize/span/description/mod.rs index a912d47ca1..a8ade50f29 100644 --- a/relay-event-normalization/src/normalize/span/description/mod.rs +++ b/relay-event-normalization/src/normalize/span/description/mod.rs @@ -573,6 +573,9 @@ fn scrub_mongodb_query(query: &str, command: &str, collection: &str) -> Option { - for value in arr.iter_mut() { - scrub_mongodb_visit_node(value, recursion_limit - 1); - } + arr.clear(); + arr.push(Value::String("...".to_owned())); } Value::String(str) => { str.clear(); @@ -1572,10 +1574,10 @@ mod tests { mongodb_scrubbing_test!( mongodb_max_depth, - r#"{"insert": "coll", "documents": [{"foo": {"bar": {"baz": "quux"}}}]}"#, - "insert", + r#"{"update": "coll", "updates": {"q": {"_id": "1"}, "u": {"$set": {"foo": {"bar": {"baz": "quux"}}}}}}"#, + "update", "coll", - r#"{"documents":[{"foo":{"bar":"?"}}],"insert":"coll"}"# + r#"{"update":"coll","updates":{"q":{"_id":"?"},"u":{"$set":{"foo":"?"}}}}"# ); mongodb_scrubbing_test!( @@ -1585,4 +1587,20 @@ mod tests { "documents001", r#"{"find":"documents{%s}","showRecordId":"?"}"# ); + + mongodb_scrubbing_test!( + mongodb_query_with_array, + r#"{"insert": "documents", "documents": [{"foo": "bar"}, {"baz": "quux"}, {"qux": "quuz"}]}"#, + "insert", + "documents", + r#"{"documents":["..."],"insert":"documents"}"# + ); + + mongodb_scrubbing_test!( + mongodb_query_with_buffer, + r#"{"insert": "documents", "buffer": {"0": "a", "1": "b", "2": "c"}, "documents": [{"foo": "bar"}]}"#, + "insert", + "documents", + r#"{"documents":["..."],"insert":"documents"}"# + ); }