From 81b3737d8046bdd42dcae68668caf0a7a6518a71 Mon Sep 17 00:00:00 2001 From: Reinaldy Rafli Date: Fri, 12 Jul 2024 09:29:40 +0700 Subject: [PATCH 1/9] feat(normalization): bypass host scrubbing via allow list --- relay-cabi/src/processing.rs | 1 + relay-dynamic-config/src/global.rs | 10 ++++ relay-event-normalization/src/event.rs | 6 ++- .../src/normalize/span/description/mod.rs | 52 ++++++++++++------- .../src/normalize/span/tag_extraction.rs | 19 +++---- relay-server/src/metrics_extraction/event.rs | 2 +- relay-server/src/services/processor.rs | 6 +++ relay-server/src/services/processor/span.rs | 4 +- 8 files changed, 69 insertions(+), 31 deletions(-) diff --git a/relay-cabi/src/processing.rs b/relay-cabi/src/processing.rs index eb9acfac7c..52cbaa018b 100644 --- a/relay-cabi/src/processing.rs +++ b/relay-cabi/src/processing.rs @@ -273,6 +273,7 @@ pub unsafe extern "C" fn relay_store_normalizer_normalize_event( measurements: None, normalize_spans: config.normalize_spans, replay_id: config.replay_id, + http_scrubbing_allow_list: None, // only supported in relay }; normalize_event(&mut event, &normalization_config); diff --git a/relay-dynamic-config/src/global.rs b/relay-dynamic-config/src/global.rs index 35e950da7a..7ac0adef22 100644 --- a/relay-dynamic-config/src/global.rs +++ b/relay-dynamic-config/src/global.rs @@ -227,6 +227,16 @@ pub struct Options { )] pub extrapolation_duplication_limit: usize, + /// List of values on span description that are allowed to be sent to Sentry without being scrubbed. + /// + /// At this point, it doesn't accept IP addresses in CIDR format.. yet. + #[serde( + rename = "relay.scrubbing.http-host-allow-list", + deserialize_with = "default_on_error", + skip_serializing_if = "Vec::is_empty" + )] + pub http_scrubbing_allow_list: Vec, + /// All other unknown options. #[serde(flatten)] other: HashMap, diff --git a/relay-event-normalization/src/event.rs b/relay-event-normalization/src/event.rs index 86b0398128..696c642315 100644 --- a/relay-event-normalization/src/event.rs +++ b/relay-event-normalization/src/event.rs @@ -154,6 +154,9 @@ pub struct NormalizationConfig<'a> { /// /// It is persisted into the event payload for correlation. pub replay_id: Option, + + /// Controls list of hosts to be excluded from scrubbing + pub http_scrubbing_allow_list: Option>, } impl<'a> Default for NormalizationConfig<'a> { @@ -185,6 +188,7 @@ impl<'a> Default for NormalizationConfig<'a> { measurements: None, normalize_spans: true, replay_id: Default::default(), + http_scrubbing_allow_list: None, } } } @@ -317,7 +321,7 @@ fn normalize(event: &mut Event, meta: &mut Meta, config: &NormalizationConfig) { } if config.enrich_spans { - extract_span_tags_from_event(event, config.max_tag_value_length); + extract_span_tags_from_event(event, config.max_tag_value_length, config.http_scrubbing_allow_list.clone()); } if let Some(context) = event.context_mut::() { diff --git a/relay-event-normalization/src/normalize/span/description/mod.rs b/relay-event-normalization/src/normalize/span/description/mod.rs index 224f383dcc..14e13f439f 100644 --- a/relay-event-normalization/src/normalize/span/description/mod.rs +++ b/relay-event-normalization/src/normalize/span/description/mod.rs @@ -38,6 +38,7 @@ const DOMAIN_ALLOW_LIST: &[&str] = &["localhost"]; /// Returns `None` if no scrubbing can be performed. pub(crate) fn scrub_span_description( span: &Span, + http_scrubbing_allow_list: Option>, ) -> (Option, Option>) { let Some(description) = span.description.as_str() else { return (None, None); @@ -56,7 +57,7 @@ pub(crate) fn scrub_span_description( .as_str() .map(|op| op.split_once('.').unwrap_or((op, ""))) .and_then(|(op, sub)| match (op, sub) { - ("http", _) => scrub_http(description), + ("http", _) => scrub_http(description, http_scrubbing_allow_list), ("cache", _) | ("db", "redis") => scrub_redis_keys(description), ("db", _) if db_system == Some("redis") => scrub_redis_keys(description), ("db", sub) => { @@ -166,7 +167,7 @@ fn scrub_supabase(string: &str) -> Option { Some(DB_SUPABASE_REGEX.replace_all(string, "{%s}").into()) } -fn scrub_http(string: &str) -> Option { +fn scrub_http(string: &str, allow_list: Option>) -> Option { let (method, url) = string.split_once(' ')?; if !HTTP_METHOD_EXTRACTOR_REGEX.is_match(method) { return None; @@ -179,7 +180,11 @@ fn scrub_http(string: &str) -> Option { let scrubbed = match Url::parse(url) { Ok(url) => { let scheme = url.scheme(); - let scrubbed_host = url.host().map(scrub_host); + let scrubbed_host = if let Some(host) = url.host() { + Some(scrub_host(host, allow_list)) + } else { + None + }; let domain = concatenate_host_and_port(scrubbed_host.as_deref(), url.port()); format!("{method} {scheme}://{domain}") @@ -222,10 +227,17 @@ fn scrub_file(description: &str) -> Option { /// use std::net::{Ipv4Addr, Ipv6Addr}; /// use relay_event_normalization::span::description::scrub_host; /// -/// assert_eq!(scrub_host(Host::Domain("foo.bar.baz")), "*.bar.baz"); -/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::LOCALHOST)), "127.0.0.1"); +/// assert_eq!(scrub_host(Host::Domain("foo.bar.baz"), None), "*.bar.baz"); +/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::LOCALHOST), None), "127.0.0.1"); +/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::new(8, 8, 8, 8)), Some(vec![String::from("8.8.8.8")])), "8.8.8.8"); /// ``` -pub fn scrub_host(host: Host<&str>) -> Cow<'_, str> { +pub fn scrub_host(host: Host<&str>, allow_list: Option>) -> Cow<'_, str> { + if let Some(allow_list) = allow_list { + if allow_list.contains(&host.to_string().into()) { + return host.to_string().into(); + } + } + match host { Host::Ipv4(ip) => Cow::Borrowed(scrub_ipv4(ip)), Host::Ipv6(ip) => Cow::Borrowed(scrub_ipv6(ip)), @@ -282,9 +294,9 @@ pub fn scrub_ipv6(ip: Ipv6Addr) -> &'static str { /// ``` /// use relay_event_normalization::span::description::scrub_domain_name; /// -/// assert_eq!(scrub_domain_name("my.domain.com"), "*.domain.com"); -/// assert_eq!(scrub_domain_name("data.bbc.co.uk"), "*.bbc.co.uk"); -/// assert_eq!(scrub_domain_name("hello world"), "hello world"); +/// assert_eq!(scrub_domain_name("my.domain.com", None), "*.domain.com"); +/// assert_eq!(scrub_domain_name("data.bbc.co.uk", None), "*.bbc.co.uk"); +/// assert_eq!(scrub_domain_name("hello world", None), "hello world"); /// ``` pub fn scrub_domain_name(domain: &str) -> Cow<'_, str> { if DOMAIN_ALLOW_LIST.contains(&domain) { @@ -394,7 +406,11 @@ fn scrub_resource(resource_type: &str, string: &str) -> Option { return Some("browser-extension://*".to_owned()); } scheme => { - let scrubbed_host = url.host().map(scrub_host); + let scrubbed_host = if let Some(host) = url.host() { + Some(scrub_host(host, None)) + } else { + None + }; let domain = concatenate_host_and_port(scrubbed_host.as_deref(), url.port()); let segment_count = url.path_segments().map(|s| s.count()).unwrap_or_default(); @@ -562,7 +578,7 @@ mod tests { .description .set_value(Some($description_in.into())); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap()); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), None); if $expected == "" { assert!(scrubbed.0.is_none()); @@ -1121,7 +1137,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); let span = span.value_mut().as_mut().unwrap(); - let scrubbed = scrub_span_description(span); + let scrubbed = scrub_span_description(span, None); assert_eq!(scrubbed.0.as_deref(), Some("SELECT %s")); } @@ -1134,7 +1150,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap()); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), None); // When db.system is missing, no scrubbed description (i.e. no group) is set. assert!(scrubbed.0.is_none()); @@ -1152,7 +1168,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap()); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), None); // Can be scrubbed with db system. assert_eq!(scrubbed.0.as_deref(), Some("SELECT a FROM b")); @@ -1170,7 +1186,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap()); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), None); // NOTE: this should return `DEL *`, but we cannot detect lowercase command names yet. assert_eq!(scrubbed.0.as_deref(), Some("*")); @@ -1186,7 +1202,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap()); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), None); assert_eq!(scrubbed.0.as_deref(), Some("INSERTED * 'UAEventData'")); } @@ -1201,7 +1217,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap()); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), None); assert_eq!( scrubbed.0.as_deref(), @@ -1221,7 +1237,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap()); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), None); // Can be scrubbed with db system. assert_eq!(scrubbed.0.as_deref(), Some("my-component-name")); diff --git a/relay-event-normalization/src/normalize/span/tag_extraction.rs b/relay-event-normalization/src/normalize/span/tag_extraction.rs index 7e987ea400..bedac2ce44 100644 --- a/relay-event-normalization/src/normalize/span/tag_extraction.rs +++ b/relay-event-normalization/src/normalize/span/tag_extraction.rs @@ -176,13 +176,13 @@ impl std::fmt::Display for RenderBlockingStatus { /// Wrapper for [`extract_span_tags`]. /// /// Tags longer than `max_tag_value_size` bytes will be truncated. -pub(crate) fn extract_span_tags_from_event(event: &mut Event, max_tag_value_size: usize) { +pub(crate) fn extract_span_tags_from_event(event: &mut Event, max_tag_value_size: usize, http_scrubbing_allow_list: Option>) { // Temporarily take ownership to pass both an event reference and a mutable span reference to `extract_span_tags`. let mut spans = std::mem::take(&mut event.spans); let Some(spans_vec) = spans.value_mut() else { return; }; - extract_span_tags(event, spans_vec.as_mut_slice(), max_tag_value_size); + extract_span_tags(event, spans_vec.as_mut_slice(), max_tag_value_size, http_scrubbing_allow_list); event.spans = spans; } @@ -190,7 +190,7 @@ pub(crate) fn extract_span_tags_from_event(event: &mut Event, max_tag_value_size /// Extracts tags and measurements from event and spans and materializes them into the spans. /// /// Tags longer than `max_tag_value_size` bytes will be truncated. -pub fn extract_span_tags(event: &Event, spans: &mut [Annotated], max_tag_value_size: usize) { +pub fn extract_span_tags(event: &Event, spans: &mut [Annotated], max_tag_value_size: usize, http_scrubbing_allow_list: Option>) { // TODO: To prevent differences between metrics and payloads, we should not extract tags here // when they have already been extracted by a downstream relay. let shared_tags = extract_shared_tags(event); @@ -207,7 +207,7 @@ pub fn extract_span_tags(event: &Event, spans: &mut [Annotated], max_tag_v continue; }; - let tags = extract_tags(span, max_tag_value_size, ttid, ttfd, is_mobile, start_type); + let tags = extract_tags(span, max_tag_value_size, ttid, ttfd, is_mobile, start_type, http_scrubbing_allow_list.clone()); span.sentry_tags = Annotated::new( shared_tags @@ -449,6 +449,7 @@ pub fn extract_tags( full_display: Option, is_mobile: bool, start_type: Option<&str>, + http_scrubbing_allow_list: Option>, ) -> BTreeMap { let mut span_tags: BTreeMap = BTreeMap::new(); @@ -475,7 +476,7 @@ pub fn extract_tags( span_tags.insert(SpanTagKey::Category, category.to_owned()); } - let (scrubbed_description, parsed_sql) = scrub_span_description(span); + let (scrubbed_description, parsed_sql) = scrub_span_description(span, http_scrubbing_allow_list); let action = match (category, span_op.as_str(), &scrubbed_description) { (Some("http"), _, _) => span @@ -1418,7 +1419,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200); + extract_span_tags_from_event(&mut event, 200, None); let spans = event.spans.value().unwrap(); @@ -2075,7 +2076,7 @@ LIMIT 1 .unwrap() .into_value() .unwrap(); - let tags = extract_tags(&span, 200, None, None, false, None); + let tags = extract_tags(&span, 200, None, None, false, None, None); assert_eq!( tags.get(&SpanTagKey::BrowserName), @@ -2146,7 +2147,7 @@ LIMIT 1 .unwrap() .into_value() .unwrap(); - let tags = extract_tags(&span, 200, None, None, false, None); + let tags = extract_tags(&span, 200, None, None, false, None, None); assert_eq!( tags.get(&SpanTagKey::MessagingDestinationName), @@ -2369,7 +2370,7 @@ LIMIT 1 .unwrap(); span.description.set_value(Some(description.into())); - extract_tags(&span, 200, None, None, false, None) + extract_tags(&span, 200, None, None, false, None, None) } #[test] diff --git a/relay-server/src/metrics_extraction/event.rs b/relay-server/src/metrics_extraction/event.rs index 3528990009..1a157e7746 100644 --- a/relay-server/src/metrics_extraction/event.rs +++ b/relay-server/src/metrics_extraction/event.rs @@ -69,7 +69,7 @@ fn extract_span_metrics_for_event( output: &mut Vec, ) { relay_statsd::metric!(timer(RelayTimers::EventProcessingSpanMetricsExtraction), { - if let Some(transaction_span) = extract_transaction_span(event, max_tag_value_size) { + if let Some(transaction_span) = extract_transaction_span(event, max_tag_value_size, None) { let (metrics, metrics_summary) = metrics_summary::extract_and_summarize_metrics(&transaction_span, config); metrics_summary.apply_on(&mut event._metrics_summary); diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index d7dfeeafd0..374d9c38f5 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -1481,6 +1481,7 @@ impl EnvelopeProcessorService { let global_config = self.inner.global_config.current(); let ai_model_costs = global_config.ai_model_costs.clone().ok(); + let http_scrubbing_allow_list = global_config.options.http_scrubbing_allow_list.clone(); utils::log_transaction_name_metrics(&mut state.event, |event| { let event_validation_config = EventValidationConfig { @@ -1564,6 +1565,11 @@ impl EnvelopeProcessorService { .envelope() .dsc() .and_then(|ctx| ctx.replay_id), + http_scrubbing_allow_list: if http_scrubbing_allow_list.is_empty() { + None + } else { + Some(http_scrubbing_allow_list.clone()) + }, }; metric!(timer(RelayTimers::EventProcessingNormalization), { diff --git a/relay-server/src/services/processor/span.rs b/relay-server/src/services/processor/span.rs index a58830543f..caa5f93b80 100644 --- a/relay-server/src/services/processor/span.rs +++ b/relay-server/src/services/processor/span.rs @@ -30,10 +30,10 @@ pub fn filter(state: &mut ProcessEnvelopeState) { /// Creates a span from the transaction and applies tag extraction on it. /// /// Returns `None` when [`tag_extraction::extract_span_tags`] clears the span, which it shouldn't. -pub fn extract_transaction_span(event: &Event, max_tag_value_size: usize) -> Option { +pub fn extract_transaction_span(event: &Event, max_tag_value_size: usize, http_scrubbing_allow_list: Option>) -> Option { let mut spans = [Span::from(event).into()]; - tag_extraction::extract_span_tags(event, &mut spans, max_tag_value_size); + tag_extraction::extract_span_tags(event, &mut spans, max_tag_value_size, http_scrubbing_allow_list); tag_extraction::extract_segment_span_tags(event, &mut spans); spans.into_iter().next().and_then(Annotated::into_value) From 5104b8c82a29d82e267439ae8c611b669b179d04 Mon Sep 17 00:00:00 2001 From: Reinaldy Rafli Date: Fri, 12 Jul 2024 09:33:18 +0700 Subject: [PATCH 2/9] chore: changelog entry --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 404a28170c..9085a68367 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,9 @@ - Incorrect span outcomes when generated from a indexed transaction quota. ([#3793](https://github.com/getsentry/relay/pull/3793)) - Report outcomes for spans when transactions are rate limited. ([#3749](https://github.com/getsentry/relay/pull/3749)) +**Features**: +- Allow list for excluding certain host/IPs from scrubbing in spans. ([#3813](https://github.com/getsentry/relay/pull/3813)) + **Internal**: - Aggregate metrics before rate limiting. ([#3746](https://github.com/getsentry/relay/pull/3746)) From 29b777297c8709525f814ecd7978d28f189f6484 Mon Sep 17 00:00:00 2001 From: Reinaldy Rafli Date: Fri, 12 Jul 2024 18:57:07 +0700 Subject: [PATCH 3/9] Apply suggestions from code review Co-authored-by: Joris Bayer --- relay-dynamic-config/src/global.rs | 4 ++-- relay-event-normalization/src/event.rs | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/relay-dynamic-config/src/global.rs b/relay-dynamic-config/src/global.rs index 7ac0adef22..ae5e70565d 100644 --- a/relay-dynamic-config/src/global.rs +++ b/relay-dynamic-config/src/global.rs @@ -231,11 +231,11 @@ pub struct Options { /// /// At this point, it doesn't accept IP addresses in CIDR format.. yet. #[serde( - rename = "relay.scrubbing.http-host-allow-list", + rename = "relay.span-normalization.allowed_hosts", deserialize_with = "default_on_error", skip_serializing_if = "Vec::is_empty" )] - pub http_scrubbing_allow_list: Vec, + pub http_span_allowed_hosts: Vec, /// All other unknown options. #[serde(flatten)] diff --git a/relay-event-normalization/src/event.rs b/relay-event-normalization/src/event.rs index 696c642315..44b037c59c 100644 --- a/relay-event-normalization/src/event.rs +++ b/relay-event-normalization/src/event.rs @@ -156,7 +156,7 @@ pub struct NormalizationConfig<'a> { pub replay_id: Option, /// Controls list of hosts to be excluded from scrubbing - pub http_scrubbing_allow_list: Option>, + pub span_allowed_hosts: &'a [String]>, } impl<'a> Default for NormalizationConfig<'a> { From a6f8e902ec8e7df13d9667c7941b8f8cfe24f28e Mon Sep 17 00:00:00 2001 From: Reinaldy Rafli Date: Fri, 12 Jul 2024 19:28:00 +0700 Subject: [PATCH 4/9] feat: remove Option> into regular rust slice --- relay-cabi/src/processing.rs | 2 +- relay-event-normalization/src/event.rs | 10 ++++-- .../src/normalize/span/description/mod.rs | 24 ++++++------- .../src/normalize/span/tag_extraction.rs | 36 +++++++++++++++---- relay-server/src/metrics_extraction/event.rs | 2 +- relay-server/src/services/processor.rs | 8 ++--- relay-server/src/services/processor/span.rs | 8 +++-- 7 files changed, 57 insertions(+), 33 deletions(-) diff --git a/relay-cabi/src/processing.rs b/relay-cabi/src/processing.rs index 52cbaa018b..4c9761e0bd 100644 --- a/relay-cabi/src/processing.rs +++ b/relay-cabi/src/processing.rs @@ -273,7 +273,7 @@ pub unsafe extern "C" fn relay_store_normalizer_normalize_event( measurements: None, normalize_spans: config.normalize_spans, replay_id: config.replay_id, - http_scrubbing_allow_list: None, // only supported in relay + span_allowed_hosts: &[], // only supported in relay }; normalize_event(&mut event, &normalization_config); diff --git a/relay-event-normalization/src/event.rs b/relay-event-normalization/src/event.rs index 44b037c59c..6f8464f033 100644 --- a/relay-event-normalization/src/event.rs +++ b/relay-event-normalization/src/event.rs @@ -156,7 +156,7 @@ pub struct NormalizationConfig<'a> { pub replay_id: Option, /// Controls list of hosts to be excluded from scrubbing - pub span_allowed_hosts: &'a [String]>, + pub span_allowed_hosts: &'a [String], } impl<'a> Default for NormalizationConfig<'a> { @@ -188,7 +188,7 @@ impl<'a> Default for NormalizationConfig<'a> { measurements: None, normalize_spans: true, replay_id: Default::default(), - http_scrubbing_allow_list: None, + span_allowed_hosts: Default::default(), } } } @@ -321,7 +321,11 @@ fn normalize(event: &mut Event, meta: &mut Meta, config: &NormalizationConfig) { } if config.enrich_spans { - extract_span_tags_from_event(event, config.max_tag_value_length, config.http_scrubbing_allow_list.clone()); + extract_span_tags_from_event( + event, + config.max_tag_value_length, + config.span_allowed_hosts, + ); } if let Some(context) = event.context_mut::() { diff --git a/relay-event-normalization/src/normalize/span/description/mod.rs b/relay-event-normalization/src/normalize/span/description/mod.rs index 14e13f439f..5579b2a530 100644 --- a/relay-event-normalization/src/normalize/span/description/mod.rs +++ b/relay-event-normalization/src/normalize/span/description/mod.rs @@ -36,9 +36,9 @@ const DOMAIN_ALLOW_LIST: &[&str] = &["localhost"]; /// Attempts to replace identifiers in the span description with placeholders. /// /// Returns `None` if no scrubbing can be performed. -pub(crate) fn scrub_span_description( +pub(crate) fn scrub_span_description<'a>( span: &Span, - http_scrubbing_allow_list: Option>, + span_allowed_hosts: &'a [String], ) -> (Option, Option>) { let Some(description) = span.description.as_str() else { return (None, None); @@ -57,7 +57,7 @@ pub(crate) fn scrub_span_description( .as_str() .map(|op| op.split_once('.').unwrap_or((op, ""))) .and_then(|(op, sub)| match (op, sub) { - ("http", _) => scrub_http(description, http_scrubbing_allow_list), + ("http", _) => scrub_http(description, span_allowed_hosts), ("cache", _) | ("db", "redis") => scrub_redis_keys(description), ("db", _) if db_system == Some("redis") => scrub_redis_keys(description), ("db", sub) => { @@ -167,7 +167,7 @@ fn scrub_supabase(string: &str) -> Option { Some(DB_SUPABASE_REGEX.replace_all(string, "{%s}").into()) } -fn scrub_http(string: &str, allow_list: Option>) -> Option { +fn scrub_http(string: &str, allow_list: &[String]) -> Option { let (method, url) = string.split_once(' ')?; if !HTTP_METHOD_EXTRACTOR_REGEX.is_match(method) { return None; @@ -227,15 +227,13 @@ fn scrub_file(description: &str) -> Option { /// use std::net::{Ipv4Addr, Ipv6Addr}; /// use relay_event_normalization::span::description::scrub_host; /// -/// assert_eq!(scrub_host(Host::Domain("foo.bar.baz"), None), "*.bar.baz"); -/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::LOCALHOST), None), "127.0.0.1"); -/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::new(8, 8, 8, 8)), Some(vec![String::from("8.8.8.8")])), "8.8.8.8"); +/// assert_eq!(scrub_host(Host::Domain("foo.bar.baz"), &*[]), "*.bar.baz"); +/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::LOCALHOST), &*[]), "127.0.0.1"); +/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::new(8, 8, 8, 8)), &*[String::from("8.8.8.8")], "8.8.8.8"); /// ``` -pub fn scrub_host(host: Host<&str>, allow_list: Option>) -> Cow<'_, str> { - if let Some(allow_list) = allow_list { - if allow_list.contains(&host.to_string().into()) { - return host.to_string().into(); - } +pub fn scrub_host<'a>(host: Host<&'a str>, allow_list: &'a [String]) -> Cow<'a, str> { + if allow_list.contains(&host.to_string().into()) { + return host.to_string().into(); } match host { @@ -407,7 +405,7 @@ fn scrub_resource(resource_type: &str, string: &str) -> Option { } scheme => { let scrubbed_host = if let Some(host) = url.host() { - Some(scrub_host(host, None)) + Some(scrub_host(host, &[])) } else { None }; diff --git a/relay-event-normalization/src/normalize/span/tag_extraction.rs b/relay-event-normalization/src/normalize/span/tag_extraction.rs index bedac2ce44..adfd153838 100644 --- a/relay-event-normalization/src/normalize/span/tag_extraction.rs +++ b/relay-event-normalization/src/normalize/span/tag_extraction.rs @@ -176,13 +176,22 @@ impl std::fmt::Display for RenderBlockingStatus { /// Wrapper for [`extract_span_tags`]. /// /// Tags longer than `max_tag_value_size` bytes will be truncated. -pub(crate) fn extract_span_tags_from_event(event: &mut Event, max_tag_value_size: usize, http_scrubbing_allow_list: Option>) { +pub(crate) fn extract_span_tags_from_event( + event: &mut Event, + max_tag_value_size: usize, + http_scrubbing_allow_list: &[String], +) { // Temporarily take ownership to pass both an event reference and a mutable span reference to `extract_span_tags`. let mut spans = std::mem::take(&mut event.spans); let Some(spans_vec) = spans.value_mut() else { return; }; - extract_span_tags(event, spans_vec.as_mut_slice(), max_tag_value_size, http_scrubbing_allow_list); + extract_span_tags( + event, + spans_vec.as_mut_slice(), + max_tag_value_size, + http_scrubbing_allow_list, + ); event.spans = spans; } @@ -190,7 +199,12 @@ pub(crate) fn extract_span_tags_from_event(event: &mut Event, max_tag_value_size /// Extracts tags and measurements from event and spans and materializes them into the spans. /// /// Tags longer than `max_tag_value_size` bytes will be truncated. -pub fn extract_span_tags(event: &Event, spans: &mut [Annotated], max_tag_value_size: usize, http_scrubbing_allow_list: Option>) { +pub fn extract_span_tags<'a>( + event: &Event, + spans: &mut [Annotated], + max_tag_value_size: usize, + span_allowed_hosts: &'a [String], +) { // TODO: To prevent differences between metrics and payloads, we should not extract tags here // when they have already been extracted by a downstream relay. let shared_tags = extract_shared_tags(event); @@ -207,7 +221,15 @@ pub fn extract_span_tags(event: &Event, spans: &mut [Annotated], max_tag_v continue; }; - let tags = extract_tags(span, max_tag_value_size, ttid, ttfd, is_mobile, start_type, http_scrubbing_allow_list.clone()); + let tags = extract_tags( + span, + max_tag_value_size, + ttid, + ttfd, + is_mobile, + start_type, + span_allowed_hosts, + ); span.sentry_tags = Annotated::new( shared_tags @@ -442,14 +464,14 @@ fn extract_segment_tags(event: &Event) -> BTreeMap { /// [span operations](https://develop.sentry.dev/sdk/performance/span-operations/) and /// existing [span data](https://develop.sentry.dev/sdk/performance/span-data-conventions/) fields, /// and rely on Sentry conventions and heuristics. -pub fn extract_tags( +pub fn extract_tags<'a>( span: &Span, max_tag_value_size: usize, initial_display: Option, full_display: Option, is_mobile: bool, start_type: Option<&str>, - http_scrubbing_allow_list: Option>, + span_allowed_hosts: &'a [String], ) -> BTreeMap { let mut span_tags: BTreeMap = BTreeMap::new(); @@ -476,7 +498,7 @@ pub fn extract_tags( span_tags.insert(SpanTagKey::Category, category.to_owned()); } - let (scrubbed_description, parsed_sql) = scrub_span_description(span, http_scrubbing_allow_list); + let (scrubbed_description, parsed_sql) = scrub_span_description(span, span_allowed_hosts); let action = match (category, span_op.as_str(), &scrubbed_description) { (Some("http"), _, _) => span diff --git a/relay-server/src/metrics_extraction/event.rs b/relay-server/src/metrics_extraction/event.rs index 1a157e7746..17da2f1842 100644 --- a/relay-server/src/metrics_extraction/event.rs +++ b/relay-server/src/metrics_extraction/event.rs @@ -69,7 +69,7 @@ fn extract_span_metrics_for_event( output: &mut Vec, ) { relay_statsd::metric!(timer(RelayTimers::EventProcessingSpanMetricsExtraction), { - if let Some(transaction_span) = extract_transaction_span(event, max_tag_value_size, None) { + if let Some(transaction_span) = extract_transaction_span(event, max_tag_value_size, &[]) { let (metrics, metrics_summary) = metrics_summary::extract_and_summarize_metrics(&transaction_span, config); metrics_summary.apply_on(&mut event._metrics_summary); diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 374d9c38f5..87dc6beedc 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -1481,7 +1481,7 @@ impl EnvelopeProcessorService { let global_config = self.inner.global_config.current(); let ai_model_costs = global_config.ai_model_costs.clone().ok(); - let http_scrubbing_allow_list = global_config.options.http_scrubbing_allow_list.clone(); + let http_span_allowed_hosts = global_config.options.http_span_allowed_hosts.clone(); utils::log_transaction_name_metrics(&mut state.event, |event| { let event_validation_config = EventValidationConfig { @@ -1565,11 +1565,7 @@ impl EnvelopeProcessorService { .envelope() .dsc() .and_then(|ctx| ctx.replay_id), - http_scrubbing_allow_list: if http_scrubbing_allow_list.is_empty() { - None - } else { - Some(http_scrubbing_allow_list.clone()) - }, + span_allowed_hosts: http_span_allowed_hosts.as_ref(), }; metric!(timer(RelayTimers::EventProcessingNormalization), { diff --git a/relay-server/src/services/processor/span.rs b/relay-server/src/services/processor/span.rs index caa5f93b80..9554ad88f2 100644 --- a/relay-server/src/services/processor/span.rs +++ b/relay-server/src/services/processor/span.rs @@ -30,10 +30,14 @@ pub fn filter(state: &mut ProcessEnvelopeState) { /// Creates a span from the transaction and applies tag extraction on it. /// /// Returns `None` when [`tag_extraction::extract_span_tags`] clears the span, which it shouldn't. -pub fn extract_transaction_span(event: &Event, max_tag_value_size: usize, http_scrubbing_allow_list: Option>) -> Option { +pub fn extract_transaction_span( + event: &Event, + max_tag_value_size: usize, + span_allowed_hosts: &[String], +) -> Option { let mut spans = [Span::from(event).into()]; - tag_extraction::extract_span_tags(event, &mut spans, max_tag_value_size, http_scrubbing_allow_list); + tag_extraction::extract_span_tags(event, &mut spans, max_tag_value_size, span_allowed_hosts); tag_extraction::extract_segment_span_tags(event, &mut spans); spans.into_iter().next().and_then(Annotated::into_value) From 5fa93c5ab9d120c3ca80aedb8aa438976affb16e Mon Sep 17 00:00:00 2001 From: Reinaldy Rafli Date: Fri, 12 Jul 2024 20:25:53 +0700 Subject: [PATCH 5/9] fix(normalization): apply formatting & lint from clippy --- .../src/normalize/span/description/mod.rs | 40 ++++++++----------- .../src/normalize/span/tag_extraction.rs | 8 ++-- .../src/services/processor/span/processing.rs | 4 +- 3 files changed, 23 insertions(+), 29 deletions(-) diff --git a/relay-event-normalization/src/normalize/span/description/mod.rs b/relay-event-normalization/src/normalize/span/description/mod.rs index 5579b2a530..212bd21655 100644 --- a/relay-event-normalization/src/normalize/span/description/mod.rs +++ b/relay-event-normalization/src/normalize/span/description/mod.rs @@ -36,9 +36,9 @@ const DOMAIN_ALLOW_LIST: &[&str] = &["localhost"]; /// Attempts to replace identifiers in the span description with placeholders. /// /// Returns `None` if no scrubbing can be performed. -pub(crate) fn scrub_span_description<'a>( +pub(crate) fn scrub_span_description( span: &Span, - span_allowed_hosts: &'a [String], + span_allowed_hosts: &[String], ) -> (Option, Option>) { let Some(description) = span.description.as_str() else { return (None, None); @@ -180,11 +180,7 @@ fn scrub_http(string: &str, allow_list: &[String]) -> Option { let scrubbed = match Url::parse(url) { Ok(url) => { let scheme = url.scheme(); - let scrubbed_host = if let Some(host) = url.host() { - Some(scrub_host(host, allow_list)) - } else { - None - }; + let scrubbed_host = url.host().map(|host| scrub_host(host, allow_list)); let domain = concatenate_host_and_port(scrubbed_host.as_deref(), url.port()); format!("{method} {scheme}://{domain}") @@ -227,12 +223,12 @@ fn scrub_file(description: &str) -> Option { /// use std::net::{Ipv4Addr, Ipv6Addr}; /// use relay_event_normalization::span::description::scrub_host; /// -/// assert_eq!(scrub_host(Host::Domain("foo.bar.baz"), &*[]), "*.bar.baz"); -/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::LOCALHOST), &*[]), "127.0.0.1"); -/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::new(8, 8, 8, 8)), &*[String::from("8.8.8.8")], "8.8.8.8"); +/// assert_eq!(scrub_host(Host::Domain("foo.bar.baz"), &[]), "*.bar.baz"); +/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::LOCALHOST), &[]), "127.0.0.1"); +/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::new(8, 8, 8, 8)), &[String::from("8.8.8.8")]), "8.8.8.8"); /// ``` pub fn scrub_host<'a>(host: Host<&'a str>, allow_list: &'a [String]) -> Cow<'a, str> { - if allow_list.contains(&host.to_string().into()) { + if allow_list.contains(&host.to_string()) { return host.to_string().into(); } @@ -404,11 +400,7 @@ fn scrub_resource(resource_type: &str, string: &str) -> Option { return Some("browser-extension://*".to_owned()); } scheme => { - let scrubbed_host = if let Some(host) = url.host() { - Some(scrub_host(host, &[])) - } else { - None - }; + let scrubbed_host = url.host().map(|host| scrub_host(host, &[])); let domain = concatenate_host_and_port(scrubbed_host.as_deref(), url.port()); let segment_count = url.path_segments().map(|s| s.count()).unwrap_or_default(); @@ -576,7 +568,7 @@ mod tests { .description .set_value(Some($description_in.into())); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), None); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]); if $expected == "" { assert!(scrubbed.0.is_none()); @@ -1135,7 +1127,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); let span = span.value_mut().as_mut().unwrap(); - let scrubbed = scrub_span_description(span, None); + let scrubbed = scrub_span_description(span, &[]); assert_eq!(scrubbed.0.as_deref(), Some("SELECT %s")); } @@ -1148,7 +1140,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), None); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]); // When db.system is missing, no scrubbed description (i.e. no group) is set. assert!(scrubbed.0.is_none()); @@ -1166,7 +1158,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), None); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]); // Can be scrubbed with db system. assert_eq!(scrubbed.0.as_deref(), Some("SELECT a FROM b")); @@ -1184,7 +1176,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), None); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]); // NOTE: this should return `DEL *`, but we cannot detect lowercase command names yet. assert_eq!(scrubbed.0.as_deref(), Some("*")); @@ -1200,7 +1192,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), None); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]); assert_eq!(scrubbed.0.as_deref(), Some("INSERTED * 'UAEventData'")); } @@ -1215,7 +1207,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), None); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]); assert_eq!( scrubbed.0.as_deref(), @@ -1235,7 +1227,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), None); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]); // Can be scrubbed with db system. assert_eq!(scrubbed.0.as_deref(), Some("my-component-name")); diff --git a/relay-event-normalization/src/normalize/span/tag_extraction.rs b/relay-event-normalization/src/normalize/span/tag_extraction.rs index adfd153838..1b2963e533 100644 --- a/relay-event-normalization/src/normalize/span/tag_extraction.rs +++ b/relay-event-normalization/src/normalize/span/tag_extraction.rs @@ -199,11 +199,11 @@ pub(crate) fn extract_span_tags_from_event( /// Extracts tags and measurements from event and spans and materializes them into the spans. /// /// Tags longer than `max_tag_value_size` bytes will be truncated. -pub fn extract_span_tags<'a>( +pub fn extract_span_tags( event: &Event, spans: &mut [Annotated], max_tag_value_size: usize, - span_allowed_hosts: &'a [String], + span_allowed_hosts: &[String], ) { // TODO: To prevent differences between metrics and payloads, we should not extract tags here // when they have already been extracted by a downstream relay. @@ -464,14 +464,14 @@ fn extract_segment_tags(event: &Event) -> BTreeMap { /// [span operations](https://develop.sentry.dev/sdk/performance/span-operations/) and /// existing [span data](https://develop.sentry.dev/sdk/performance/span-data-conventions/) fields, /// and rely on Sentry conventions and heuristics. -pub fn extract_tags<'a>( +pub fn extract_tags( span: &Span, max_tag_value_size: usize, initial_display: Option, full_display: Option, is_mobile: bool, start_type: Option<&str>, - span_allowed_hosts: &'a [String], + span_allowed_hosts: &[String], ) -> BTreeMap { let mut span_tags: BTreeMap = BTreeMap::new(); diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index aa9407e164..622ee9223f 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -292,6 +292,7 @@ pub fn extract_from_event( .aggregator_config_for(MetricNamespace::Spans) .aggregator .max_tag_value_length, + &[], ) else { return; }; @@ -498,7 +499,8 @@ fn normalize( // Tag extraction: let is_mobile = false; // TODO: find a way to determine is_mobile from a standalone span. - let tags = tag_extraction::extract_tags(span, max_tag_value_size, None, None, is_mobile, None); + let tags = + tag_extraction::extract_tags(span, max_tag_value_size, None, None, is_mobile, None, &[]); span.sentry_tags = Annotated::new( tags.into_iter() .map(|(k, v)| (k.sentry_tag_key().to_owned(), Annotated::new(v))) From 02d488ccf4dbbe7b0958262f2ec9abd94e8fc2bc Mon Sep 17 00:00:00 2001 From: Reinaldy Rafli Date: Sat, 20 Jul 2024 19:59:08 +0700 Subject: [PATCH 6/9] fix(normalization): another attempt to fix cargo check --- .../src/normalize/span/tag_extraction.rs | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/relay-event-normalization/src/normalize/span/tag_extraction.rs b/relay-event-normalization/src/normalize/span/tag_extraction.rs index 1b2963e533..212701ee09 100644 --- a/relay-event-normalization/src/normalize/span/tag_extraction.rs +++ b/relay-event-normalization/src/normalize/span/tag_extraction.rs @@ -1441,7 +1441,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200, None); + extract_span_tags_from_event(&mut event, 200, &[]); let spans = event.spans.value().unwrap(); @@ -1503,7 +1503,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200); + extract_span_tags_from_event(&mut event, 200, &[]); let span = &event.spans.value().unwrap()[0]; @@ -1621,7 +1621,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200); + extract_span_tags_from_event(&mut event, 200, &[]); let span_1 = &event.spans.value().unwrap()[0]; let span_2 = &event.spans.value().unwrap()[1]; @@ -1674,7 +1674,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200); + extract_span_tags_from_event(&mut event, 200, &[]); let span = &event .spans @@ -1782,7 +1782,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200); + extract_span_tags_from_event(&mut event, 200, &[]); let span_1 = &event.spans.value().unwrap()[0]; let span_2 = &event.spans.value().unwrap()[1]; @@ -1905,7 +1905,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200); + extract_span_tags_from_event(&mut event, 200, &[]); let span_1 = &event.spans.value().unwrap()[0]; let span_2 = &event.spans.value().unwrap()[1]; @@ -2013,7 +2013,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200); + extract_span_tags_from_event(&mut event, 200, &[]); let span = &event.spans.value().unwrap()[0]; @@ -2070,7 +2070,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200); + extract_span_tags_from_event(&mut event, 200, &[]); let span = &event.spans.value().unwrap()[0]; let tags = span.value().unwrap().sentry_tags.value().unwrap(); @@ -2098,7 +2098,7 @@ LIMIT 1 .unwrap() .into_value() .unwrap(); - let tags = extract_tags(&span, 200, None, None, false, None, None); + let tags = extract_tags(&span, 200, None, None, false, None, &[]); assert_eq!( tags.get(&SpanTagKey::BrowserName), @@ -2138,7 +2138,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200); + extract_span_tags_from_event(&mut event, 200, &[]); let span = &event.spans.value().unwrap()[0]; let tags = span.value().unwrap().sentry_tags.value().unwrap(); @@ -2169,7 +2169,7 @@ LIMIT 1 .unwrap() .into_value() .unwrap(); - let tags = extract_tags(&span, 200, None, None, false, None, None); + let tags = extract_tags(&span, 200, None, None, false, None, &[]); assert_eq!( tags.get(&SpanTagKey::MessagingDestinationName), @@ -2294,7 +2294,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200); + extract_span_tags_from_event(&mut event, 200, &[]); let span = &event.spans.value().unwrap()[0]; let tags = span.value().unwrap().sentry_tags.value().unwrap(); @@ -2392,7 +2392,7 @@ LIMIT 1 .unwrap(); span.description.set_value(Some(description.into())); - extract_tags(&span, 200, None, None, false, None, None) + extract_tags(&span, 200, None, None, false, None, &[]) } #[test] From 311dc6a8fcc95b65dc93cdc7ac659a8f9143a5a2 Mon Sep 17 00:00:00 2001 From: Reinaldy Rafli Date: Sat, 27 Jul 2024 19:04:37 +0700 Subject: [PATCH 7/9] chore: update tests --- .../src/normalize/span/description/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/relay-event-normalization/src/normalize/span/description/mod.rs b/relay-event-normalization/src/normalize/span/description/mod.rs index 212bd21655..1d88e7fe14 100644 --- a/relay-event-normalization/src/normalize/span/description/mod.rs +++ b/relay-event-normalization/src/normalize/span/description/mod.rs @@ -288,9 +288,9 @@ pub fn scrub_ipv6(ip: Ipv6Addr) -> &'static str { /// ``` /// use relay_event_normalization::span::description::scrub_domain_name; /// -/// assert_eq!(scrub_domain_name("my.domain.com", None), "*.domain.com"); -/// assert_eq!(scrub_domain_name("data.bbc.co.uk", None), "*.bbc.co.uk"); -/// assert_eq!(scrub_domain_name("hello world", None), "hello world"); +/// assert_eq!(scrub_domain_name("my.domain.com"), "*.domain.com"); +/// assert_eq!(scrub_domain_name("data.bbc.co.uk"), "*.bbc.co.uk"); +/// assert_eq!(scrub_domain_name("hello world"), "hello world"); /// ``` pub fn scrub_domain_name(domain: &str) -> Cow<'_, str> { if DOMAIN_ALLOW_LIST.contains(&domain) { From 67689930224b7b7ef6ccfcc98e52aab16092aaad Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Mon, 29 Jul 2024 08:12:33 +0200 Subject: [PATCH 8/9] ref: Use allow list for standalone spans, deserialize hosts directly --- Cargo.lock | 1 + relay-dynamic-config/Cargo.toml | 1 + relay-dynamic-config/src/global.rs | 3 ++- relay-event-normalization/src/event.rs | 3 ++- .../src/normalize/span/description/mod.rs | 8 ++++---- .../src/normalize/span/tag_extraction.rs | 8 ++++---- relay-server/src/services/processor.rs | 4 ++-- relay-server/src/services/processor/span.rs | 3 ++- .../src/services/processor/span/processing.rs | 17 +++++++++++++++-- 9 files changed, 33 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 205df1640a..489eb44636 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3744,6 +3744,7 @@ dependencies = [ "serde", "serde_json", "similar-asserts", + "url", ] [[package]] diff --git a/relay-dynamic-config/Cargo.toml b/relay-dynamic-config/Cargo.toml index 39d9a1a9d2..e1be64eb4a 100644 --- a/relay-dynamic-config/Cargo.toml +++ b/relay-dynamic-config/Cargo.toml @@ -30,6 +30,7 @@ relay-quotas = { workspace = true } relay-sampling = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +url = { workspace = true } [dev-dependencies] similar-asserts = { workspace = true } diff --git a/relay-dynamic-config/src/global.rs b/relay-dynamic-config/src/global.rs index 291a570b56..321c4cbfc1 100644 --- a/relay-dynamic-config/src/global.rs +++ b/relay-dynamic-config/src/global.rs @@ -10,6 +10,7 @@ use relay_filter::GenericFiltersConfig; use relay_quotas::Quota; use serde::{de, Deserialize, Serialize}; use serde_json::Value; +use url::Host; use crate::{defaults, ErrorBoundary, MetricExtractionGroup, MetricExtractionGroups}; @@ -233,7 +234,7 @@ pub struct Options { deserialize_with = "default_on_error", skip_serializing_if = "Vec::is_empty" )] - pub http_span_allowed_hosts: Vec, + pub http_span_allowed_hosts: Vec, /// All other unknown options. #[serde(flatten)] diff --git a/relay-event-normalization/src/event.rs b/relay-event-normalization/src/event.rs index 4437415dc3..bb859fbcfa 100644 --- a/relay-event-normalization/src/event.rs +++ b/relay-event-normalization/src/event.rs @@ -25,6 +25,7 @@ use relay_protocol::{ RemarkType, Value, }; use smallvec::SmallVec; +use url::Host; use uuid::Uuid; use crate::normalize::request; @@ -156,7 +157,7 @@ pub struct NormalizationConfig<'a> { pub replay_id: Option, /// Controls list of hosts to be excluded from scrubbing - pub span_allowed_hosts: &'a [String], + pub span_allowed_hosts: &'a [Host], } impl<'a> Default for NormalizationConfig<'a> { diff --git a/relay-event-normalization/src/normalize/span/description/mod.rs b/relay-event-normalization/src/normalize/span/description/mod.rs index 1d88e7fe14..277d3aa775 100644 --- a/relay-event-normalization/src/normalize/span/description/mod.rs +++ b/relay-event-normalization/src/normalize/span/description/mod.rs @@ -38,7 +38,7 @@ const DOMAIN_ALLOW_LIST: &[&str] = &["localhost"]; /// Returns `None` if no scrubbing can be performed. pub(crate) fn scrub_span_description( span: &Span, - span_allowed_hosts: &[String], + span_allowed_hosts: &[Host], ) -> (Option, Option>) { let Some(description) = span.description.as_str() else { return (None, None); @@ -167,7 +167,7 @@ fn scrub_supabase(string: &str) -> Option { Some(DB_SUPABASE_REGEX.replace_all(string, "{%s}").into()) } -fn scrub_http(string: &str, allow_list: &[String]) -> Option { +fn scrub_http(string: &str, allow_list: &[Host]) -> Option { let (method, url) = string.split_once(' ')?; if !HTTP_METHOD_EXTRACTOR_REGEX.is_match(method) { return None; @@ -227,8 +227,8 @@ fn scrub_file(description: &str) -> Option { /// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::LOCALHOST), &[]), "127.0.0.1"); /// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::new(8, 8, 8, 8)), &[String::from("8.8.8.8")]), "8.8.8.8"); /// ``` -pub fn scrub_host<'a>(host: Host<&'a str>, allow_list: &'a [String]) -> Cow<'a, str> { - if allow_list.contains(&host.to_string()) { +pub fn scrub_host<'a>(host: Host<&'a str>, allow_list: &'a [Host]) -> Cow<'a, str> { + if allow_list.iter().any(|allowed_host| &host == allowed_host) { return host.to_string().into(); } diff --git a/relay-event-normalization/src/normalize/span/tag_extraction.rs b/relay-event-normalization/src/normalize/span/tag_extraction.rs index 212701ee09..aa917d354a 100644 --- a/relay-event-normalization/src/normalize/span/tag_extraction.rs +++ b/relay-event-normalization/src/normalize/span/tag_extraction.rs @@ -16,7 +16,7 @@ use relay_event_schema::protocol::{ use relay_protocol::{Annotated, Value}; use sqlparser::ast::Visit; use sqlparser::ast::{ObjectName, Visitor}; -use url::Url; +use url::{Host, Url}; use crate::span::description::{ concatenate_host_and_port, scrub_domain_name, scrub_span_description, @@ -179,7 +179,7 @@ impl std::fmt::Display for RenderBlockingStatus { pub(crate) fn extract_span_tags_from_event( event: &mut Event, max_tag_value_size: usize, - http_scrubbing_allow_list: &[String], + http_scrubbing_allow_list: &[Host], ) { // Temporarily take ownership to pass both an event reference and a mutable span reference to `extract_span_tags`. let mut spans = std::mem::take(&mut event.spans); @@ -203,7 +203,7 @@ pub fn extract_span_tags( event: &Event, spans: &mut [Annotated], max_tag_value_size: usize, - span_allowed_hosts: &[String], + span_allowed_hosts: &[Host], ) { // TODO: To prevent differences between metrics and payloads, we should not extract tags here // when they have already been extracted by a downstream relay. @@ -471,7 +471,7 @@ pub fn extract_tags( full_display: Option, is_mobile: bool, start_type: Option<&str>, - span_allowed_hosts: &[String], + span_allowed_hosts: &[Host], ) -> BTreeMap { let mut span_tags: BTreeMap = BTreeMap::new(); diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 98a8b6a28b..cc9f541108 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -1484,7 +1484,7 @@ impl EnvelopeProcessorService { let global_config = self.inner.global_config.current(); let ai_model_costs = global_config.ai_model_costs.clone().ok(); - let http_span_allowed_hosts = global_config.options.http_span_allowed_hosts.clone(); + let http_span_allowed_hosts = global_config.options.http_span_allowed_hosts.as_slice(); utils::log_transaction_name_metrics(&mut state.event, |event| { let event_validation_config = EventValidationConfig { @@ -1568,7 +1568,7 @@ impl EnvelopeProcessorService { .envelope() .dsc() .and_then(|ctx| ctx.replay_id), - span_allowed_hosts: http_span_allowed_hosts.as_ref(), + span_allowed_hosts: http_span_allowed_hosts, }; metric!(timer(RelayTimers::EventProcessingNormalization), { diff --git a/relay-server/src/services/processor/span.rs b/relay-server/src/services/processor/span.rs index 9554ad88f2..0796b4f9af 100644 --- a/relay-server/src/services/processor/span.rs +++ b/relay-server/src/services/processor/span.rs @@ -4,6 +4,7 @@ use relay_dynamic_config::Feature; use relay_event_normalization::span::tag_extraction; use relay_event_schema::protocol::{Event, Span}; use relay_protocol::Annotated; +use url::Host; use crate::services::processor::SpanGroup; use crate::{services::processor::ProcessEnvelopeState, utils::ItemAction}; @@ -33,7 +34,7 @@ pub fn filter(state: &mut ProcessEnvelopeState) { pub fn extract_transaction_span( event: &Event, max_tag_value_size: usize, - span_allowed_hosts: &[String], + span_allowed_hosts: &[Host], ) -> Option { let mut spans = [Span::from(event).into()]; diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index dc032a3ade..e9cc0b956d 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -26,6 +26,7 @@ use relay_pii::PiiProcessor; use relay_protocol::{Annotated, Empty}; use relay_quotas::DataCategory; use relay_spans::{otel_to_sentry_span, otel_trace::Span as OtelSpan}; +use url::Host; use crate::envelope::{ContentType, Item, ItemType}; use crate::metrics_extraction::metrics_summary; @@ -342,6 +343,8 @@ struct NormalizeSpanConfig<'a> { user_agent: Option, /// Client hints parsed from the request. client_hints: ClientHints, + /// Hosts that are not replaced by "*" in HTTP span grouping. + allowed_hosts: &'a [Host], } impl<'a> NormalizeSpanConfig<'a> { @@ -352,6 +355,7 @@ impl<'a> NormalizeSpanConfig<'a> { managed_envelope: &ManagedEnvelope, ) -> Self { let aggregator_config = config.aggregator_config_for(MetricNamespace::Spans); + Self { received_at: managed_envelope.received_at(), timestamp_range: aggregator_config.aggregator.timestamp_range(), @@ -377,6 +381,7 @@ impl<'a> NormalizeSpanConfig<'a> { .user_agent() .map(String::from), client_hints: managed_envelope.meta().client_hints().clone(), + allowed_hosts: global_config.options.http_span_allowed_hosts.as_slice(), } } } @@ -428,6 +433,7 @@ fn normalize( tx_name_rules, user_agent, client_hints, + allowed_hosts, } = config; set_segment_attributes(annotated_span); @@ -487,8 +493,15 @@ fn normalize( // Tag extraction: let is_mobile = false; // TODO: find a way to determine is_mobile from a standalone span. - let tags = - tag_extraction::extract_tags(span, max_tag_value_size, None, None, is_mobile, None, &[]); + let tags = tag_extraction::extract_tags( + span, + max_tag_value_size, + None, + None, + is_mobile, + None, + allowed_hosts, + ); span.sentry_tags = Annotated::new( tags.into_iter() .map(|(k, v)| (k.sentry_tag_key().to_owned(), Annotated::new(v))) From e4470318be1b79bc4607118eaf68833ab5d8dcec Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Mon, 29 Jul 2024 08:18:18 +0200 Subject: [PATCH 9/9] fix: doctest --- relay-event-normalization/src/normalize/span/description/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-event-normalization/src/normalize/span/description/mod.rs b/relay-event-normalization/src/normalize/span/description/mod.rs index 277d3aa775..31cc0f0002 100644 --- a/relay-event-normalization/src/normalize/span/description/mod.rs +++ b/relay-event-normalization/src/normalize/span/description/mod.rs @@ -225,7 +225,7 @@ fn scrub_file(description: &str) -> Option { /// /// assert_eq!(scrub_host(Host::Domain("foo.bar.baz"), &[]), "*.bar.baz"); /// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::LOCALHOST), &[]), "127.0.0.1"); -/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::new(8, 8, 8, 8)), &[String::from("8.8.8.8")]), "8.8.8.8"); +/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::new(8, 8, 8, 8)), &[Host::parse("8.8.8.8").unwrap()]), "8.8.8.8"); /// ``` pub fn scrub_host<'a>(host: Host<&'a str>, allow_list: &'a [Host]) -> Cow<'a, str> { if allow_list.iter().any(|allowed_host| &host == allowed_host) {