Skip to content

Commit

Permalink
ref(mongodb): Change grouping strategy for arrays and buffer (#4186)
Browse files Browse the repository at this point in the history
Couple of changes here:

- Arrays in MongoDB queries were problematic for grouping, because any
variant of the same query that has a different number of elements would
result in an entirely **new group** being created. This resulted in high
cardinality for a few orgs
- Removes the 'buffer' key-value pair from queries, since this is an
under the hood mechanism which has very little relevance to the query
itself. This causes unnecessary noise and makes queries hard to read, so
we can scrub them out entirely

### Examples


![image](https://github.com/user-attachments/assets/d360e760-413e-424e-b5f6-85b70684844b)

This would become:
```
"_id": {}
```


![image](https://github.com/user-attachments/assets/93c73cfc-a6b5-4a56-bc85-e858c31dbe21)
This would become:
```
"filter": {
	"_id": {
		"$in": ["..."]
	}
}
```
  • Loading branch information
0Calories authored Oct 30, 2024
1 parent 733363d commit bc70b9e
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 6 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

- Add a metric that counts span volume in the root project for dynamic sampling (`c:spans/count_per_root_project@none`). ([#4134](https://github.com/getsentry/relay/pull/4134))
- Add a tag `target_project_id` to both root project metrics for dynamic sampling (`c:transactions/count_per_root_project@none` and `c:spans/count_per_root_project@none`) which shows the flow trace traffic from root to target projects. ([#4170](https://github.com/getsentry/relay/pull/4170))
- Remove `buffer` entries and scrub array contents from MongoDB queries. ([#4186](https://github.com/getsentry/relay/pull/4186))
- Use `DateTime<Utc>` instead of `Instant` for tracking the received time of the `Envelope`. ([#4184](https://github.com/getsentry/relay/pull/4184))

## 24.10.0
Expand Down
30 changes: 24 additions & 6 deletions relay-event-normalization/src/normalize/span/description/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -573,6 +573,9 @@ fn scrub_mongodb_query(query: &str, command: &str, collection: &str) -> Option<S

let root = query.as_object_mut()?;

// Buffers are unnecessary noise so the entire key-value pair should be removed
root.remove("buffer");

for value in root.values_mut() {
scrub_mongodb_visit_node(value, 3);
}
Expand Down Expand Up @@ -607,9 +610,8 @@ fn scrub_mongodb_visit_node(value: &mut Value, recursion_limit: usize) {
}
}
Value::Array(arr) => {
for value in arr.iter_mut() {
scrub_mongodb_visit_node(value, recursion_limit - 1);
}
arr.clear();
arr.push(Value::String("...".to_owned()));
}
Value::String(str) => {
str.clear();
Expand Down Expand Up @@ -1572,10 +1574,10 @@ mod tests {

mongodb_scrubbing_test!(
mongodb_max_depth,
r#"{"insert": "coll", "documents": [{"foo": {"bar": {"baz": "quux"}}}]}"#,
"insert",
r#"{"update": "coll", "updates": {"q": {"_id": "1"}, "u": {"$set": {"foo": {"bar": {"baz": "quux"}}}}}}"#,
"update",
"coll",
r#"{"documents":[{"foo":{"bar":"?"}}],"insert":"coll"}"#
r#"{"update":"coll","updates":{"q":{"_id":"?"},"u":{"$set":{"foo":"?"}}}}"#
);

mongodb_scrubbing_test!(
Expand All @@ -1585,4 +1587,20 @@ mod tests {
"documents001",
r#"{"find":"documents{%s}","showRecordId":"?"}"#
);

mongodb_scrubbing_test!(
mongodb_query_with_array,
r#"{"insert": "documents", "documents": [{"foo": "bar"}, {"baz": "quux"}, {"qux": "quuz"}]}"#,
"insert",
"documents",
r#"{"documents":["..."],"insert":"documents"}"#
);

mongodb_scrubbing_test!(
mongodb_query_with_buffer,
r#"{"insert": "documents", "buffer": {"0": "a", "1": "b", "2": "c"}, "documents": [{"foo": "bar"}]}"#,
"insert",
"documents",
r#"{"documents":["..."],"insert":"documents"}"#
);
}

0 comments on commit bc70b9e

Please sign in to comment.