diff --git a/CHANGELOG.md b/CHANGELOG.md index 00cc735e4e..0469c95242 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## Unreleased + +**Internal**: + +- Treat arrays of pairs as key-value mappings during PII scrubbing. ([#3639](https://github.com/getsentry/relay/pull/3639)) + ## 24.5.1 **Bug fixes**: diff --git a/relay-pii/src/convert.rs b/relay-pii/src/convert.rs index eadbf44497..f2243c2e91 100644 --- a/relay-pii/src/convert.rs +++ b/relay-pii/src/convert.rs @@ -1656,4 +1656,92 @@ THd+9FBxiHLGXNKhG/FRSyREXEt+NyYIf/0cyByc9tNksat794ddUqnLOg0vwSkv process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap(); assert_annotated_snapshot!(data); } + + #[test] + fn test_pairlist_scrubbed_with_matching_keys() { + let mut data = Event::from_value( + serde_json::json!({ + "threads": { + "values": [ + { + "stacktrace": { + "frames": [ + { + "vars": { + "request": { + "headers": [ + [ + "secret", + // Should be filtered because of key `secret`. + "A1BBC234QWERTY0987MNBV012765HJKL" + ], + [ + "passwd", + // Should be filtered because of key `passwd`. + "my_password" + ] + ] + } + } + } + + ] + } + } + ] + } + }) + .into(), + ); + + let pii_config = simple_enabled_pii_config(); + let mut pii_processor = PiiProcessor::new(pii_config.compiled()); + process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap(); + assert_annotated_snapshot!(data); + } + + #[test] + fn test_pairlist_scrubbed_with_matching_values() { + let mut data = Event::from_value( + serde_json::json!({ + "threads": { + "values": [ + { + "stacktrace": { + "frames": [ + { + "vars": { + "request": { + "headers": [ + [ + "some_random_value", + // Should be filtered because it's treated + // as individual value. + "my_password" + ], + [ + "some_random_value_2", + // Should not be filtered because the value + // is scraped by default. + "abc" + ] + ] + } + } + } + + ] + } + } + ] + } + }) + .into(), + ); + + let pii_config = simple_enabled_pii_config(); + let mut pii_processor = PiiProcessor::new(pii_config.compiled()); + process_value(&mut data, &mut pii_processor, ProcessingState::root()).unwrap(); + assert_annotated_snapshot!(data); + } } diff --git a/relay-pii/src/processor.rs b/relay-pii/src/processor.rs index 9e379db759..26fac7787d 100644 --- a/relay-pii/src/processor.rs +++ b/relay-pii/src/processor.rs @@ -5,13 +5,13 @@ use std::sync::OnceLock; use regex::Regex; use relay_event_schema::processor::{ - self, enum_set, Chunk, Pii, ProcessValue, ProcessingAction, ProcessingResult, ProcessingState, - Processor, ValueType, + self, enum_set, process_value, Chunk, Pii, ProcessValue, ProcessingAction, ProcessingResult, + ProcessingState, Processor, ValueType, }; use relay_event_schema::protocol::{ AsPair, Event, IpAddr, NativeImagePath, PairList, Replay, ResponseContext, User, }; -use relay_protocol::{Annotated, Meta, Remark, RemarkType, Value}; +use relay_protocol::{Annotated, Array, Meta, Remark, RemarkType, Value}; use crate::compiledconfig::{CompiledPiiConfig, RuleRef}; use crate::config::RuleType; @@ -101,6 +101,48 @@ impl<'a> Processor for PiiProcessor<'a> { self.apply_all_rules(meta, state, None) } + fn process_array( + &mut self, + array: &mut Array, + _meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult + where + T: ProcessValue, + { + if is_pairlist(array) { + for annotated in array { + let mut mapped = mem::take(annotated).map_value(T::into_value); + + if let Some(Value::Array(ref mut pair)) = mapped.value_mut() { + let mut value = mem::take(&mut pair[1]); + let value_type = ValueType::for_field(&value); + + if let Some(key_name) = &pair[0].as_str() { + // We enter the key of the first element of the array, since we treat it + // as a pair. + let key_state = + state.enter_borrowed(key_name, state.inner_attrs(), value_type); + // We process the value with a state that "simulates" the first value of the + // array as if it was the key of a dictionary. + process_value(&mut value, self, &key_state)?; + } + + // Put value back into pair. + pair[1] = value; + } + + // Put pair back into array. + *annotated = T::from_value(mapped); + } + + Ok(()) + } else { + // If we didn't find a pairlist, we can process child values as normal. + array.process_child_values(self, state) + } + } + fn process_string( &mut self, value: &mut String, @@ -202,6 +244,69 @@ impl<'a> Processor for PiiProcessor<'a> { } } +#[derive(Default)] +struct PairListProcessor { + is_pair: bool, + has_string_key: bool, +} + +impl PairListProcessor { + /// Returns true if the processor identified the supplied data as an array composed of + /// a key (string) and a value. + fn is_pair_array(&self) -> bool { + self.is_pair && self.has_string_key + } +} + +impl Processor for PairListProcessor { + fn process_array( + &mut self, + value: &mut Array, + _meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult + where + T: ProcessValue, + { + self.is_pair = state.depth() == 0 && value.len() == 2; + if self.is_pair { + let key_type = ValueType::for_field(&value[0]); + process_value( + &mut value[0], + self, + &state.enter_index(0, state.inner_attrs(), key_type), + )?; + } + + Ok(()) + } + + fn process_string( + &mut self, + _value: &mut String, + _meta: &mut Meta, + state: &ProcessingState<'_>, + ) -> ProcessingResult where { + if state.depth() == 1 && state.path().index() == Some(0) { + self.has_string_key = true; + } + + Ok(()) + } +} + +fn is_pairlist(array: &mut Array) -> bool { + for element in array.iter_mut() { + let mut visitor = PairListProcessor::default(); + process_value(element, &mut visitor, ProcessingState::root()).ok(); + if !visitor.is_pair_array() { + return false; + } + } + + !array.is_empty() +} + /// Scrubs GraphQL variables from the event. pub fn scrub_graphql(event: &mut Event) { let mut keys: BTreeSet<&str> = BTreeSet::new(); @@ -466,7 +571,7 @@ fn insert_replacement_chunks(rule: &RuleRef, text: &str, output: &mut Vec::from_json(case).unwrap(); + let Annotated(Some(Value::Array(mut a)), _) = v else { + panic!() + }; + assert_eq!(is_pairlist(&mut a), expected, "{case}"); + } + } + + #[test] + fn test_tuple_array_scrubbed_with_path_selector() { + // We expect that both of these configs express the same semantics. + let configs = vec![ + // This configuration matches on the authorization element (the 1st element of the array + // represents the key). + r##" + { + "applications": { + "exception.values.0.stacktrace.frames.0.vars.headers.authorization": ["@anything:replace"] + } + } + "##, + // This configuration matches on the 2nd element of the array. + r##" + { + "applications": { + "exception.values.0.stacktrace.frames.0.vars.headers.0.1": ["@anything:replace"] + } + } + "##, + ]; + + let mut event = Event::from_value( + serde_json::json!( + { + "message": "hi", + "exception": { + "values": [ + { + "type": "BrokenException", + "value": "Something failed", + "stacktrace": { + "frames": [ + { + "vars": { + "headers": [ + ["authorization", "Bearer abc123"] + ] + } + } + ] + } + } + ] + } + }) + .into(), + ); + + for config in configs { + let config = serde_json::from_str::(config).unwrap(); + let mut processor = PiiProcessor::new(config.compiled()); + process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); + + let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap(); + + allow_duplicates!(assert_debug_snapshot!(vars, @r#" + FrameVars( + { + "headers": Array( + [ + Array( + [ + String( + "authorization", + ), + Annotated( + String( + "[Filtered]", + ), + Meta { + remarks: [ + Remark { + ty: Substituted, + rule_id: "@anything:replace", + range: Some( + ( + 0, + 10, + ), + ), + }, + ], + errors: [], + original_length: Some( + 13, + ), + original_value: None, + }, + ), + ], + ), + ], + ), + }, + ) + "#)); + } + } + + #[test] + fn test_tuple_array_scrubbed_with_string_selector_and_password_matcher() { + let config = serde_json::from_str::( + r##" + { + "applications": { + "$string": ["@password:remove"] + } + } + "##, + ) + .unwrap(); + + let mut event = Event::from_value( + serde_json::json!( + { + "message": "hi", + "exception": { + "values": [ + { + "type": "BrokenException", + "value": "Something failed", + "stacktrace": { + "frames": [ + { + "vars": { + "headers": [ + ["authorization", "abc123"] + ] + } + } + ] + } + } + ] + } + }) + .into(), + ); + + let mut processor = PiiProcessor::new(config.compiled()); + process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); + + let vars = get_value!(event.exceptions.values[0].stacktrace.frames[0].vars).unwrap(); + + assert_debug_snapshot!(vars, @r###" + FrameVars( + { + "headers": Array( + [ + Array( + [ + String( + "authorization", + ), + Meta { + remarks: [ + Remark { + ty: Removed, + rule_id: "@password:remove", + range: None, + }, + ], + errors: [], + original_length: None, + original_value: None, + }, + ], + ), + ], + ), + }, + ) + "###); + } } diff --git a/relay-pii/src/selector.rs b/relay-pii/src/selector.rs index 481c2876d7..4732249d4a 100644 --- a/relay-pii/src/selector.rs +++ b/relay-pii/src/selector.rs @@ -154,7 +154,7 @@ impl SelectorPathItem { /// A selector that can match paths of processing states. /// -/// To use a a selector, you most likely want to check whether it matches the path of a +/// To use a selector, you most likely want to check whether it matches the path of a /// [`ProcessingState`]. For this you turn the state into a [`Path`] using /// [`ProcessingState::path`] and call [`SelectorSpec::matches_path`], which will iterate through /// the path items in the processing state and check whether the selector matches. diff --git a/relay-pii/src/snapshots/relay_pii__convert__tests__bearer_tokens_scrubbed.snap b/relay-pii/src/snapshots/relay_pii__convert__tests__bearer_tokens_scrubbed.snap index 162f954c18..12634a1f33 100644 --- a/relay-pii/src/snapshots/relay_pii__convert__tests__bearer_tokens_scrubbed.snap +++ b/relay-pii/src/snapshots/relay_pii__convert__tests__bearer_tokens_scrubbed.snap @@ -13,7 +13,7 @@ expression: data "request": { "headers": [ [ - "[Filtered]", + "AuthToken", "[Filtered]" ] ] @@ -36,19 +36,6 @@ expression: data "request": { "headers": { "0": { - "0": { - "": { - "rem": [ - [ - "@password:filter", - "s", - 0, - 10 - ] - ], - "len": 9 - } - }, "1": { "": { "rem": [ diff --git a/relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed_with_matching_keys.snap b/relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed_with_matching_keys.snap new file mode 100644 index 0000000000..937416f08e --- /dev/null +++ b/relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed_with_matching_keys.snap @@ -0,0 +1,82 @@ +--- +source: relay-pii/src/convert.rs +expression: data +--- +{ + "threads": { + "values": [ + { + "stacktrace": { + "frames": [ + { + "vars": { + "request": { + "headers": [ + [ + "secret", + "[Filtered]" + ], + [ + "passwd", + "[Filtered]" + ] + ] + } + } + } + ] + } + } + ] + }, + "_meta": { + "threads": { + "values": { + "0": { + "stacktrace": { + "frames": { + "0": { + "vars": { + "request": { + "headers": { + "0": { + "1": { + "": { + "rem": [ + [ + "@password:filter", + "s", + 0, + 10 + ] + ], + "len": 32 + } + } + }, + "1": { + "1": { + "": { + "rem": [ + [ + "@password:filter", + "s", + 0, + 10 + ] + ], + "len": 11 + } + } + } + } + } + } + } + } + } + } + } + } + } +} diff --git a/relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed_with_matching_values.snap b/relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed_with_matching_values.snap new file mode 100644 index 0000000000..27477e18e8 --- /dev/null +++ b/relay-pii/src/snapshots/relay_pii__convert__tests__pairlist_scrubbed_with_matching_values.snap @@ -0,0 +1,67 @@ +--- +source: relay-pii/src/convert.rs +expression: data +--- +{ + "threads": { + "values": [ + { + "stacktrace": { + "frames": [ + { + "vars": { + "request": { + "headers": [ + [ + "some_random_value", + "[Filtered]" + ], + [ + "some_random_value_2", + "abc" + ] + ] + } + } + } + ] + } + } + ] + }, + "_meta": { + "threads": { + "values": { + "0": { + "stacktrace": { + "frames": { + "0": { + "vars": { + "request": { + "headers": { + "0": { + "1": { + "": { + "rem": [ + [ + "@password:filter", + "s", + 0, + 10 + ] + ], + "len": 11 + } + } + } + } + } + } + } + } + } + } + } + } + } +}