Skip to content

Commit

Permalink
feat(spans): Scrub repeated VALUES from SQL queries (#2302)
Browse files Browse the repository at this point in the history
Collapse multiple SQL `VALUES` into a single one to improve span
grouping, e.g.

```SQL
INSERT INTO "sentry_commitfilechange" ("organization_id", "commit_id", "filename", "type")
VALUES (%s, %s, %s, %s), (%s, %s, %s, %s), (%s, %s, %s, %s)
```

becomes

```SQL
INSERT INTO "sentry_commitfilechange" ("organization_id", "commit_id", "filename", "type")
VALUES (%s)
```

See [internal
issue](https://www.notion.so/sentry/Parameterize-INSERT-VALUES-statements-02f612c61ca244d38cff13d7d6a7d251?pvs=4).
  • Loading branch information
jjbayer authored Jul 12, 2023
1 parent 44f98d4 commit 0002cff
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 0 deletions.
2 changes: 2 additions & 0 deletions relay-general/src/store/regexes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ pub static SQL_NORMALIZER_REGEX: Lazy<Regex> = Lazy::new(|| {
r#"(?xi)
# Capture parameters in `IN` statements.
((?-x)IN \((?P<in>(%s|\$?\d+|\?)(\s*,\s*(%s|\$?\d+|\?))*)\)) |
# Capture parameters in `VALUES`.
((?-x)VALUES \((?P<values>(\)\s*,\s*\(|%s|\$?\d+|\?|,|\s)*)\)) |
# Capture `SAVEPOINT` savepoints.
((?-x)SAVEPOINT (?P<savepoint>(?:(?:"[^"]+")|(?:'[^']+')|(?:`[^`]+`)|(?:[a-z]\w+)))) |
# Capture single-quoted strings, including the remaining substring if `\'` is found.
Expand Down
14 changes: 14 additions & 0 deletions relay-general/src/store/transactions/processor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2918,6 +2918,20 @@ mod tests {
""
);

span_description_test!(
span_description_scrub_values,
"INSERT INTO a (b, c, d, e) VALUES (%s, %s, %s, %s)",
"db.sql.query",
"INSERT INTO a (b, c, d, e) VALUES (%s)"
);

span_description_test!(
span_description_scrub_values_multi,
"INSERT INTO a (b, c, d, e) VALUES (%s, %s, %s, %s), (%s, %s, %s, %s), (%s, %s, %s, %s) ON CONFLICT DO NOTHING",
"db.sql.query",
"INSERT INTO a (b, c, d, e) VALUES (%s) ON CONFLICT DO NOTHING"
);

span_description_test!(
span_description_scrub_clickhouse,
"SELECT (toStartOfHour(finish_ts, 'Universal') AS _snuba_time), (uniqIf((nullIf(user, '') AS _snuba_user), greater(multiIf(equals(tupleElement(('duration', 300), 1), 'lcp'), (if(has(measurements.key, 'lcp'), arrayElement(measurements.value, indexOf(measurements.key, 'lcp')), NULL) AS `_snuba_measurements[lcp]`), (duration AS _snuba_duration)), multiply(tupleElement(('duration', 300), 2), 4))) AS _snuba_count_miserable_user), (ifNull(divide(plus(_snuba_count_miserable_user, 4.56), plus(nullIf(uniqIf(_snuba_user, greater(multiIf(equals(tupleElement(('duration', 300), 1), 'lcp'), `_snuba_measurements[lcp]`, _snuba_duration), 0)), 0), 113.45)), 0) AS _snuba_user_misery), _snuba_count_miserable_user, (divide(countIf(notEquals(transaction_status, 0) AND notEquals(transaction_status, 1) AND notEquals(transaction_status, 2)), count()) AS _snuba_failure_rate), (divide(count(), divide(3600.0, 60)) AS _snuba_tpm_3600) FROM transactions_dist WHERE equals(('transaction' AS _snuba_type), 'transaction') AND greaterOrEquals((finish_ts AS _snuba_finish_ts), toDateTime('2023-06-13T09:08:51', 'Universal')) AND less(_snuba_finish_ts, toDateTime('2023-07-11T09:08:51', 'Universal')) AND in((project_id AS _snuba_project_id), [123, 456, 789]) AND equals((environment AS _snuba_environment), 'production') GROUP BY _snuba_time ORDER BY _snuba_time ASC LIMIT 10000 OFFSET 0",
Expand Down

0 comments on commit 0002cff

Please sign in to comment.