Skip to content

Commit

Permalink
fix(spans): Truncate span descriptions for span metrics (#2246)
Browse files Browse the repository at this point in the history
  • Loading branch information
iker-barriocanal authored Jun 26, 2023
1 parent 1f96e67 commit c956a6f
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 3 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions relay-server/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ axum = { version = "0.6.15", features = ["headers", "macros", "matched-path", "m
axum-server = "0.4.7"
backoff = "0.4.0"
brotli = "3.3.4"
bytecount = "0.6.0"
bytes = { version = "1.4.0" }
chrono = { version = "0.4.24", default-features = false, features = ["clock", "std", "serde"] }
data-encoding = "2.3.3"
Expand Down
62 changes: 59 additions & 3 deletions relay-server/src/metrics_extraction/spans/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,11 +186,18 @@ pub(crate) fn extract_span_metrics(
);

if let Some(scrubbed_desc) = sanitized_description {
span_tags.insert(SpanTagKey::Description, scrubbed_desc.to_owned());
// Truncating the span description's tag value is, for now,
// a temporary solution to not get large descriptions dropped. The
// group tag mustn't be affected by this, and still be
// computed from the full, untruncated description.

let mut span_group = format!("{:?}", md5::compute(scrubbed_desc));
let mut span_group = format!("{:?}", md5::compute(&scrubbed_desc));
span_group.truncate(16);
span_tags.insert(SpanTagKey::Group, span_group);

let truncated =
truncate_string(scrubbed_desc, aggregator_config.max_tag_value_length);
span_tags.insert(SpanTagKey::Description, truncated);
}
}

Expand Down Expand Up @@ -303,6 +310,32 @@ fn sanitized_span_description(
Some(sanitized)
}

/// Trims the given string with the given maximum bytes. Splitting only happens
/// on char boundaries.
///
/// If the string is short, it remains unchanged. If it's long, this method
/// truncates it to the maximum allowed size and sets the last character to
/// `*`.
fn truncate_string(mut string: String, max_bytes: usize) -> String {
if string.len() <= max_bytes {
return string;
}

if max_bytes == 0 {
return String::new();
}

let mut cutoff = max_bytes - 1; // Leave space for `*`

while cutoff > 0 && !string.is_char_boundary(cutoff) {
cutoff -= 1;
}

string.truncate(cutoff);
string.push('*');
string
}

/// Regex with a capture group to extract the database action from a query.
///
/// Currently, we're only interested in either `SELECT` or `INSERT` statements.
Expand Down Expand Up @@ -539,7 +572,30 @@ mod tests {

use relay_metrics::AggregatorConfig;

use crate::metrics_extraction::spans::extract_span_metrics;
use crate::metrics_extraction::spans::{extract_span_metrics, truncate_string};

#[test]
fn test_truncate_string_no_panic() {
let string = "ÆÆ".to_owned();

let truncated = truncate_string(string.clone(), 0);
assert_eq!(truncated, "");

let truncated = truncate_string(string.clone(), 1);
assert_eq!(truncated, "*");

let truncated = truncate_string(string.clone(), 2);
assert_eq!(truncated, "*");

let truncated = truncate_string(string.clone(), 3);
assert_eq!(truncated, "Æ*");

let truncated = truncate_string(string.clone(), 4);
assert_eq!(truncated, "ÆÆ");

let truncated = truncate_string(string, 5);
assert_eq!(truncated, "ÆÆ");
}

macro_rules! span_transaction_method_test {
// Tests transaction.method is picked from the right place.
Expand Down
82 changes: 82 additions & 0 deletions tests/integration/test_metrics.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import hashlib
import time
from collections import defaultdict
from datetime import datetime, timedelta, timezone
Expand Down Expand Up @@ -1066,3 +1067,84 @@ def test_limit_custom_measurements(
"d:transactions/measurements.foo@none",
"d:transactions/measurements.bar@none",
}


@pytest.mark.parametrize(
"sent_description, expected_description",
[
(
"SELECT column FROM table WHERE another_col = %s",
"SELECT column FROM table WHERE another_col = %s",
),
(
"SELECT column FROM table WHERE another_col = %s AND yet_another_col = something_very_longgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg",
"SELECT column FROM table WHERE another_col = %s AND yet_another_col = something_very_longgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg*",
),
],
ids=["Must not truncate short descriptions", "Must truncate long descriptions"],
)
def test_span_metrics(
transactions_consumer,
metrics_consumer,
mini_sentry,
relay_with_processing,
sent_description,
expected_description,
):
project_id = 42
mini_sentry.add_full_project_config(project_id)
config = mini_sentry.project_configs[project_id]["config"]
config["transactionMetrics"] = {
"version": 1,
}
config.setdefault("features", []).append("projects:span-metrics-extraction")

transaction = {
"event_id": "d2132d31b39445f1938d7e21b6bf0ec4",
"type": "transaction",
"transaction": "/organizations/:orgId/performance/:eventSlug/",
"transaction_info": {"source": "route"},
"start_timestamp": 1597976392.6542819,
"timestamp": 1597976400.6189718,
"user": {"id": "user123", "geo": {"country_code": "ES"}},
"contexts": {
"trace": {
"trace_id": "4C79F60C11214EB38604F4AE0781BFB2",
"span_id": "FA90FDEAD5F74052",
"type": "trace",
}
},
"spans": [
{
"description": sent_description,
"op": "db",
"parent_span_id": "8f5a2b8768cafb4e",
"span_id": "bd429c44b67a3eb4",
"start_timestamp": 1597976393.4619668,
"timestamp": 1597976393.4718769,
"trace_id": "ff62a8b040f340bda5d830223def1d81",
}
],
}
# Default timestamp is so old that relay drops metrics, setting a more recent one avoids the drop.
timestamp = datetime.now(tz=timezone.utc)
transaction["timestamp"] = timestamp.isoformat()

metrics_consumer = metrics_consumer()
tx_consumer = transactions_consumer()
processing = relay_with_processing(options=TEST_CONFIG)
processing.send_transaction(project_id, transaction)

transaction, _ = tx_consumer.get_event()
assert transaction["spans"][0]["description"] == sent_description

expected_group = hashlib.md5(sent_description.encode("utf-8")).hexdigest()[:16]

metrics = metrics_consumer.get_metrics()
span_metrics = [
metric for metric in metrics if metric["name"].startswith("spans", 2)
]
assert len(span_metrics) == 3
for metric in span_metrics:
assert metric["tags"]["span.description"] == expected_description
assert metric["tags"]["span.group"] == expected_group

0 comments on commit c956a6f

Please sign in to comment.