Skip to content

Commit

Permalink
fix(spans): Do not trim essential fields (#3670)
Browse files Browse the repository at this point in the history
Since #3645, we're seeing
incomplete or missing trace IDs on spans (see linked sentry issues).
What happens is that after visiting a long `span.description`, there is
no budget left for essential fields and the trimming processor drops or
trims them.

* Introduce `trim = "false"` field attribute to disallow trimming of
specific fields.
* Apply the new field to `span.trace_id` and other essential fields to
prevent them from being partially scrubbed or dropped entirely.

Current limitations:

- Not trimming these fields leads to over-accepting bytes. This should
not matter as long as the fields that have `trim = "false"` are limited
in size (which they are currently not, see
#3535 as follow-up).
- The more correct solution would be to subtract the length of `trim =
"false"` fields _before_ trimming any other fields.
- Alternatively, we could decide to never over-accept, and drop items
after visiting them if the budget is negative. I implemented this in
[1136f76](1136f76)
but it resulted in more failing tests, because the current trimming
processor already over-accepts a little bit (key lengths), and I did not
want to touch the default behavior. cc @Dav1dde.

Fixes
[RELAY-2NY7](https://sentry.my.sentry.io/organizations/sentry/issues/1005495/),
[RELAY-2NY8](https://sentry.my.sentry.io/organizations/sentry/issues/1005496/),
[SNUBA-552](https://sentry.sentry.io/issues/5429321769/).
  • Loading branch information
jjbayer authored Jun 4, 2024
1 parent 02ef222 commit 909fea6
Show file tree
Hide file tree
Showing 11 changed files with 193 additions and 38 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- Apply globally defined metric tags to legacy transaction metrics. ([#3615](https://github.com/getsentry/relay/pull/3615))
- Limit the maximum size of spans in an transaction to 800 kib. ([#3645](https://github.com/getsentry/relay/pull/3645))
- Scrub identifiers in spans with `op:db` and `db_system:redis`. ([#3642](https://github.com/getsentry/relay/pull/3642))
- Stop trimming important span fields by marking them `trim = "false"`. ([#3670](https://github.com/getsentry/relay/pull/3670))

**Features**:

Expand Down
21 changes: 21 additions & 0 deletions relay-event-derive/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ struct FieldAttrs {
max_chars_allowance: Option<TokenStream>,
max_depth: Option<TokenStream>,
max_bytes: Option<TokenStream>,
trim: Option<bool>,
}

impl FieldAttrs {
Expand Down Expand Up @@ -366,6 +367,14 @@ impl FieldAttrs {
quote!(crate::processor::Pii::False)
};

let trim = if let Some(trim) = self.trim {
quote!(#trim)
} else if let Some(ref parent_attrs) = inherit_from_field_attrs {
quote!(#parent_attrs.trim)
} else {
quote!(true)
};

let retain = self.retain;

let max_chars = if let Some(ref max_chars) = self.max_chars {
Expand Down Expand Up @@ -421,6 +430,7 @@ impl FieldAttrs {
max_bytes: #max_bytes,
pii: #pii,
retain: #retain,
trim: #trim,
}
})
}
Expand Down Expand Up @@ -595,6 +605,17 @@ fn parse_field_attributes(
panic!("Got non string literal for retain");
}
}
} else if ident == "trim" {
match name_value.lit {
Lit::Str(litstr) => match litstr.value().as_str() {
"true" => rv.trim = None,
"false" => rv.trim = Some(false),
other => panic!("Unknown value {other}"),
},
_ => {
panic!("Got non string literal for trim");
}
}
} else if ident == "legacy_alias" || ident == "skip_serialization" {
// Skip
} else {
Expand Down
6 changes: 3 additions & 3 deletions relay-event-normalization/src/normalize/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1706,14 +1706,14 @@ mod tests {
timestamp: ~,
start_timestamp: ~,
exclusive_time: ~,
description: ~,
op: "app_start_cold",
span_id: ~,
parent_span_id: ~,
trace_id: ~,
segment_id: ~,
is_segment: ~,
status: ~,
description: ~,
tags: ~,
origin: ~,
profile_id: ~,
Expand Down Expand Up @@ -1751,14 +1751,14 @@ mod tests {
timestamp: ~,
start_timestamp: ~,
exclusive_time: ~,
description: ~,
op: "app.start.cold",
span_id: ~,
parent_span_id: ~,
trace_id: ~,
segment_id: ~,
is_segment: ~,
status: ~,
description: ~,
tags: ~,
origin: ~,
profile_id: ~,
Expand Down Expand Up @@ -1796,14 +1796,14 @@ mod tests {
timestamp: ~,
start_timestamp: ~,
exclusive_time: ~,
description: ~,
op: "app.start.warm",
span_id: ~,
parent_span_id: ~,
trace_id: ~,
segment_id: ~,
is_segment: ~,
status: ~,
description: ~,
tags: ~,
origin: ~,
profile_id: ~,
Expand Down
151 changes: 139 additions & 12 deletions relay-event-normalization/src/trimming.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,16 @@ impl Processor for TrimmingProcessor {
});
}

if self.remaining_size() == Some(0) {
// TODO: Create remarks (ensure they do not bloat event)
return Err(ProcessingAction::DeleteValueHard);
}
if self.remaining_depth(state) == Some(0) {
// TODO: Create remarks (ensure they do not bloat event)
return Err(ProcessingAction::DeleteValueHard);
if state.attrs().trim {
if self.remaining_size() == Some(0) {
// TODO: Create remarks (ensure they do not bloat event)
return Err(ProcessingAction::DeleteValueHard);
}
if self.remaining_depth(state) == Some(0) {
// TODO: Create remarks (ensure they do not bloat event)
return Err(ProcessingAction::DeleteValueHard);
}
}

Ok(())
}

Expand Down Expand Up @@ -131,6 +132,10 @@ impl Processor for TrimmingProcessor {
trim_string(value, meta, max_chars, state.attrs().max_chars_allowance);
}

if !state.attrs().trim {
return Ok(());
}

if let Some(size_state) = self.size_state.last() {
if let Some(size_remaining) = size_state.size_remaining {
trim_string(value, meta, size_remaining, 0);
Expand All @@ -149,6 +154,10 @@ impl Processor for TrimmingProcessor {
where
T: ProcessValue,
{
if !state.attrs().trim {
return Ok(());
}

// If we need to check the bag size, then we go down a different path
if !self.size_state.is_empty() {
let original_length = value.len();
Expand All @@ -159,7 +168,7 @@ impl Processor for TrimmingProcessor {

let mut split_index = None;
for (index, item) in value.iter_mut().enumerate() {
if self.remaining_size().unwrap() == 0 {
if self.remaining_size() == Some(0) {
split_index = Some(index);
break;
}
Expand Down Expand Up @@ -191,6 +200,10 @@ impl Processor for TrimmingProcessor {
where
T: ProcessValue,
{
if !state.attrs().trim {
return Ok(());
}

// If we need to check the bag size, then we go down a different path
if !self.size_state.is_empty() {
let original_length = value.len();
Expand All @@ -201,7 +214,7 @@ impl Processor for TrimmingProcessor {

let mut split_key = None;
for (key, item) in value.iter_mut() {
if self.remaining_size().unwrap() == 0 {
if self.remaining_size() == Some(0) {
split_key = Some(key.to_owned());
break;
}
Expand Down Expand Up @@ -230,6 +243,10 @@ impl Processor for TrimmingProcessor {
_meta: &mut Meta,
state: &ProcessingState<'_>,
) -> ProcessingResult {
if !state.attrs().trim {
return Ok(());
}

match value {
Value::Array(_) | Value::Object(_) => {
if self.remaining_depth(state) == Some(1) {
Expand All @@ -252,6 +269,10 @@ impl Processor for TrimmingProcessor {
_meta: &mut Meta,
state: &ProcessingState<'_>,
) -> ProcessingResult {
if !state.attrs().trim {
return Ok(());
}

processor::apply(&mut stacktrace.frames, |frames, meta| {
enforce_frame_hard_limit(frames, meta, 250);
Ok(())
Expand Down Expand Up @@ -393,9 +414,10 @@ mod tests {
use std::iter::repeat;

use relay_event_schema::protocol::{
Breadcrumb, Context, Contexts, Event, Exception, ExtraValue, Span, TagEntry, Tags, Values,
Breadcrumb, Context, Contexts, Event, Exception, ExtraValue, Span, SpanId, TagEntry, Tags,
TraceId, Values,
};
use relay_protocol::{Map, Remark, SerializableAnnotated};
use relay_protocol::{get_value, Map, Remark, SerializableAnnotated};
use similar_asserts::assert_eq;

use crate::MaxChars;
Expand Down Expand Up @@ -930,4 +952,109 @@ mod tests {

assert_eq!(event.0.unwrap().spans.0.unwrap().len(), 8);
}

#[test]
fn test_too_many_spans_trimmed_trace_id() {
let original_description = "a".repeat(819163);
let original_trace_id = TraceId("b".repeat(48));
let mut event = Annotated::new(Event {
spans: Annotated::new(vec![
Span {
description: original_description.clone().into(),
..Default::default()
}
.into(),
Span {
trace_id: original_trace_id.clone().into(),
..Default::default()
}
.into(),
]),
..Default::default()
});

let mut processor = TrimmingProcessor::new();
processor::process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();

assert_eq!(
get_value!(event.spans[0].description!),
&original_description
);
// Trace ID would be trimmed without `trim = "false"`
assert_eq!(get_value!(event.spans[1].trace_id!), &original_trace_id);
}

#[test]
fn test_too_many_spans_trimmed_trace_id_drop() {
let original_description = "a".repeat(819163);
let original_span_id = SpanId("b".repeat(48));
let original_trace_id = TraceId("c".repeat(48));
let original_segment_id = SpanId("d".repeat(48));
let original_op = "e".repeat(129);

let mut event = Annotated::new(Event {
spans: Annotated::new(vec![
Span {
description: original_description.clone().into(),
..Default::default()
}
.into(),
Span {
span_id: original_span_id.clone().into(),
trace_id: original_trace_id.clone().into(),
segment_id: original_segment_id.clone().into(),
is_segment: false.into(),
op: original_op.clone().into(),
..Default::default()
}
.into(),
]),
..Default::default()
});

let mut processor = TrimmingProcessor::new();
processor::process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();

assert_eq!(
get_value!(event.spans[0].description!),
&original_description
);
// These fields would be dropped without `trim = "false"`
assert_eq!(get_value!(event.spans[1].span_id!), &original_span_id);
assert_eq!(get_value!(event.spans[1].trace_id!), &original_trace_id);
assert_eq!(get_value!(event.spans[1].segment_id!), &original_segment_id);
assert_eq!(get_value!(event.spans[1].is_segment!), &false);

// span.op is trimmed to its max_chars, but not dropped:
assert_eq!(get_value!(event.spans[1].op!).len(), 128);
}

#[test]
fn test_trim_false_contributes_to_budget() {
for span_id in ["short", "looooooooooooooooooooooooooong"] {
let original_span_id = SpanId(span_id.to_owned());
let original_description = "a".repeat(900000);

let mut event = Annotated::new(Event {
spans: Annotated::new(vec![Span {
span_id: original_span_id.clone().into(),
description: original_description.clone().into(),
..Default::default()
}
.into()]),
..Default::default()
});

let mut processor = TrimmingProcessor::new();
processor::process_value(&mut event, &mut processor, ProcessingState::root()).unwrap();

assert_eq!(get_value!(event.spans[0].span_id!).as_ref(), span_id);

// The amount of trimming on the description depends on the length of the span id.
assert_eq!(
get_value!(event.spans[0].description!).len(),
1024 * 800 - 12 - span_id.len(),
);
}
}
}
3 changes: 3 additions & 0 deletions relay-event-schema/src/processor/attrs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ pub struct FieldAttrs {
pub pii: Pii,
/// Whether additional properties should be retained during normalization.
pub retain: bool,
/// Whether the trimming processor is allowed to shorten or drop this field.
pub trim: bool,
}

/// A set of characters allowed or denied for a (string) field.
Expand Down Expand Up @@ -167,6 +169,7 @@ impl FieldAttrs {
max_bytes: None,
pii: Pii::False,
retain: false,
trim: true,
}
}

Expand Down
Loading

0 comments on commit 909fea6

Please sign in to comment.