diff --git a/CHANGELOG.md b/CHANGELOG.md index 6ae0835420..52d616045f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,9 @@ - Report outcomes for spans when transactions are rate limited. ([#3749](https://github.com/getsentry/relay/pull/3749)) - Only transfer valid profile ids. ([#3809](https://github.com/getsentry/relay/pull/3809)) +**Features**: +- Allow list for excluding certain host/IPs from scrubbing in spans. ([#3813](https://github.com/getsentry/relay/pull/3813)) + **Internal**: - Aggregate metrics before rate limiting. ([#3746](https://github.com/getsentry/relay/pull/3746)) diff --git a/Cargo.lock b/Cargo.lock index 217027fb9a..320b43cf55 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3744,6 +3744,7 @@ dependencies = [ "serde", "serde_json", "similar-asserts", + "url", ] [[package]] diff --git a/relay-cabi/src/processing.rs b/relay-cabi/src/processing.rs index eb9acfac7c..4c9761e0bd 100644 --- a/relay-cabi/src/processing.rs +++ b/relay-cabi/src/processing.rs @@ -273,6 +273,7 @@ pub unsafe extern "C" fn relay_store_normalizer_normalize_event( measurements: None, normalize_spans: config.normalize_spans, replay_id: config.replay_id, + span_allowed_hosts: &[], // only supported in relay }; normalize_event(&mut event, &normalization_config); diff --git a/relay-dynamic-config/Cargo.toml b/relay-dynamic-config/Cargo.toml index 39d9a1a9d2..e1be64eb4a 100644 --- a/relay-dynamic-config/Cargo.toml +++ b/relay-dynamic-config/Cargo.toml @@ -30,6 +30,7 @@ relay-quotas = { workspace = true } relay-sampling = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +url = { workspace = true } [dev-dependencies] similar-asserts = { workspace = true } diff --git a/relay-dynamic-config/src/global.rs b/relay-dynamic-config/src/global.rs index 982eb86024..321c4cbfc1 100644 --- a/relay-dynamic-config/src/global.rs +++ b/relay-dynamic-config/src/global.rs @@ -10,6 +10,7 @@ use relay_filter::GenericFiltersConfig; use relay_quotas::Quota; use serde::{de, Deserialize, Serialize}; use serde_json::Value; +use url::Host; use crate::{defaults, ErrorBoundary, MetricExtractionGroup, MetricExtractionGroups}; @@ -225,6 +226,16 @@ pub struct Options { )] pub extrapolation_duplication_limit: usize, + /// List of values on span description that are allowed to be sent to Sentry without being scrubbed. + /// + /// At this point, it doesn't accept IP addresses in CIDR format.. yet. + #[serde( + rename = "relay.span-normalization.allowed_hosts", + deserialize_with = "default_on_error", + skip_serializing_if = "Vec::is_empty" + )] + pub http_span_allowed_hosts: Vec, + /// All other unknown options. #[serde(flatten)] other: HashMap, diff --git a/relay-event-normalization/src/event.rs b/relay-event-normalization/src/event.rs index 5a0f4266a1..bb859fbcfa 100644 --- a/relay-event-normalization/src/event.rs +++ b/relay-event-normalization/src/event.rs @@ -25,6 +25,7 @@ use relay_protocol::{ RemarkType, Value, }; use smallvec::SmallVec; +use url::Host; use uuid::Uuid; use crate::normalize::request; @@ -154,6 +155,9 @@ pub struct NormalizationConfig<'a> { /// /// It is persisted into the event payload for correlation. pub replay_id: Option, + + /// Controls list of hosts to be excluded from scrubbing + pub span_allowed_hosts: &'a [Host], } impl<'a> Default for NormalizationConfig<'a> { @@ -185,6 +189,7 @@ impl<'a> Default for NormalizationConfig<'a> { measurements: None, normalize_spans: true, replay_id: Default::default(), + span_allowed_hosts: Default::default(), } } } @@ -323,7 +328,11 @@ fn normalize(event: &mut Event, meta: &mut Meta, config: &NormalizationConfig) { } if config.enrich_spans { - extract_span_tags_from_event(event, config.max_tag_value_length); + extract_span_tags_from_event( + event, + config.max_tag_value_length, + config.span_allowed_hosts, + ); } if let Some(context) = event.context_mut::() { diff --git a/relay-event-normalization/src/normalize/span/description/mod.rs b/relay-event-normalization/src/normalize/span/description/mod.rs index 224f383dcc..31cc0f0002 100644 --- a/relay-event-normalization/src/normalize/span/description/mod.rs +++ b/relay-event-normalization/src/normalize/span/description/mod.rs @@ -38,6 +38,7 @@ const DOMAIN_ALLOW_LIST: &[&str] = &["localhost"]; /// Returns `None` if no scrubbing can be performed. pub(crate) fn scrub_span_description( span: &Span, + span_allowed_hosts: &[Host], ) -> (Option, Option>) { let Some(description) = span.description.as_str() else { return (None, None); @@ -56,7 +57,7 @@ pub(crate) fn scrub_span_description( .as_str() .map(|op| op.split_once('.').unwrap_or((op, ""))) .and_then(|(op, sub)| match (op, sub) { - ("http", _) => scrub_http(description), + ("http", _) => scrub_http(description, span_allowed_hosts), ("cache", _) | ("db", "redis") => scrub_redis_keys(description), ("db", _) if db_system == Some("redis") => scrub_redis_keys(description), ("db", sub) => { @@ -166,7 +167,7 @@ fn scrub_supabase(string: &str) -> Option { Some(DB_SUPABASE_REGEX.replace_all(string, "{%s}").into()) } -fn scrub_http(string: &str) -> Option { +fn scrub_http(string: &str, allow_list: &[Host]) -> Option { let (method, url) = string.split_once(' ')?; if !HTTP_METHOD_EXTRACTOR_REGEX.is_match(method) { return None; @@ -179,7 +180,7 @@ fn scrub_http(string: &str) -> Option { let scrubbed = match Url::parse(url) { Ok(url) => { let scheme = url.scheme(); - let scrubbed_host = url.host().map(scrub_host); + let scrubbed_host = url.host().map(|host| scrub_host(host, allow_list)); let domain = concatenate_host_and_port(scrubbed_host.as_deref(), url.port()); format!("{method} {scheme}://{domain}") @@ -222,10 +223,15 @@ fn scrub_file(description: &str) -> Option { /// use std::net::{Ipv4Addr, Ipv6Addr}; /// use relay_event_normalization::span::description::scrub_host; /// -/// assert_eq!(scrub_host(Host::Domain("foo.bar.baz")), "*.bar.baz"); -/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::LOCALHOST)), "127.0.0.1"); +/// assert_eq!(scrub_host(Host::Domain("foo.bar.baz"), &[]), "*.bar.baz"); +/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::LOCALHOST), &[]), "127.0.0.1"); +/// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::new(8, 8, 8, 8)), &[Host::parse("8.8.8.8").unwrap()]), "8.8.8.8"); /// ``` -pub fn scrub_host(host: Host<&str>) -> Cow<'_, str> { +pub fn scrub_host<'a>(host: Host<&'a str>, allow_list: &'a [Host]) -> Cow<'a, str> { + if allow_list.iter().any(|allowed_host| &host == allowed_host) { + return host.to_string().into(); + } + match host { Host::Ipv4(ip) => Cow::Borrowed(scrub_ipv4(ip)), Host::Ipv6(ip) => Cow::Borrowed(scrub_ipv6(ip)), @@ -394,7 +400,7 @@ fn scrub_resource(resource_type: &str, string: &str) -> Option { return Some("browser-extension://*".to_owned()); } scheme => { - let scrubbed_host = url.host().map(scrub_host); + let scrubbed_host = url.host().map(|host| scrub_host(host, &[])); let domain = concatenate_host_and_port(scrubbed_host.as_deref(), url.port()); let segment_count = url.path_segments().map(|s| s.count()).unwrap_or_default(); @@ -562,7 +568,7 @@ mod tests { .description .set_value(Some($description_in.into())); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap()); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]); if $expected == "" { assert!(scrubbed.0.is_none()); @@ -1121,7 +1127,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); let span = span.value_mut().as_mut().unwrap(); - let scrubbed = scrub_span_description(span); + let scrubbed = scrub_span_description(span, &[]); assert_eq!(scrubbed.0.as_deref(), Some("SELECT %s")); } @@ -1134,7 +1140,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap()); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]); // When db.system is missing, no scrubbed description (i.e. no group) is set. assert!(scrubbed.0.is_none()); @@ -1152,7 +1158,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap()); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]); // Can be scrubbed with db system. assert_eq!(scrubbed.0.as_deref(), Some("SELECT a FROM b")); @@ -1170,7 +1176,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap()); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]); // NOTE: this should return `DEL *`, but we cannot detect lowercase command names yet. assert_eq!(scrubbed.0.as_deref(), Some("*")); @@ -1186,7 +1192,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap()); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]); assert_eq!(scrubbed.0.as_deref(), Some("INSERTED * 'UAEventData'")); } @@ -1201,7 +1207,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap()); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]); assert_eq!( scrubbed.0.as_deref(), @@ -1221,7 +1227,7 @@ mod tests { let mut span = Annotated::::from_json(json).unwrap(); - let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap()); + let scrubbed = scrub_span_description(span.value_mut().as_mut().unwrap(), &[]); // Can be scrubbed with db system. assert_eq!(scrubbed.0.as_deref(), Some("my-component-name")); diff --git a/relay-event-normalization/src/normalize/span/tag_extraction.rs b/relay-event-normalization/src/normalize/span/tag_extraction.rs index ef3e1e0676..9fb4f4b6a2 100644 --- a/relay-event-normalization/src/normalize/span/tag_extraction.rs +++ b/relay-event-normalization/src/normalize/span/tag_extraction.rs @@ -16,7 +16,7 @@ use relay_event_schema::protocol::{ use relay_protocol::{Annotated, Empty, Value}; use sqlparser::ast::Visit; use sqlparser::ast::{ObjectName, Visitor}; -use url::Url; +use url::{Host, Url}; use crate::span::description::{ concatenate_host_and_port, scrub_domain_name, scrub_span_description, @@ -176,13 +176,22 @@ impl std::fmt::Display for RenderBlockingStatus { /// Wrapper for [`extract_span_tags`]. /// /// Tags longer than `max_tag_value_size` bytes will be truncated. -pub(crate) fn extract_span_tags_from_event(event: &mut Event, max_tag_value_size: usize) { +pub(crate) fn extract_span_tags_from_event( + event: &mut Event, + max_tag_value_size: usize, + http_scrubbing_allow_list: &[Host], +) { // Temporarily take ownership to pass both an event reference and a mutable span reference to `extract_span_tags`. let mut spans = std::mem::take(&mut event.spans); let Some(spans_vec) = spans.value_mut() else { return; }; - extract_span_tags(event, spans_vec.as_mut_slice(), max_tag_value_size); + extract_span_tags( + event, + spans_vec.as_mut_slice(), + max_tag_value_size, + http_scrubbing_allow_list, + ); event.spans = spans; } @@ -190,7 +199,12 @@ pub(crate) fn extract_span_tags_from_event(event: &mut Event, max_tag_value_size /// Extracts tags and measurements from event and spans and materializes them into the spans. /// /// Tags longer than `max_tag_value_size` bytes will be truncated. -pub fn extract_span_tags(event: &Event, spans: &mut [Annotated], max_tag_value_size: usize) { +pub fn extract_span_tags( + event: &Event, + spans: &mut [Annotated], + max_tag_value_size: usize, + span_allowed_hosts: &[Host], +) { // TODO: To prevent differences between metrics and payloads, we should not extract tags here // when they have already been extracted by a downstream relay. let shared_tags = extract_shared_tags(event); @@ -208,7 +222,15 @@ pub fn extract_span_tags(event: &Event, spans: &mut [Annotated], max_tag_v continue; }; - let tags = extract_tags(span, max_tag_value_size, ttid, ttfd, is_mobile, start_type); + let tags = extract_tags( + span, + max_tag_value_size, + ttid, + ttfd, + is_mobile, + start_type, + span_allowed_hosts, + ); span.sentry_tags = Annotated::new( shared_tags @@ -461,6 +483,7 @@ pub fn extract_tags( full_display: Option, is_mobile: bool, start_type: Option<&str>, + span_allowed_hosts: &[Host], ) -> BTreeMap { let mut span_tags: BTreeMap = BTreeMap::new(); @@ -487,7 +510,7 @@ pub fn extract_tags( span_tags.insert(SpanTagKey::Category, category.to_owned()); } - let (scrubbed_description, parsed_sql) = scrub_span_description(span); + let (scrubbed_description, parsed_sql) = scrub_span_description(span, span_allowed_hosts); let action = match (category, span_op.as_str(), &scrubbed_description) { (Some("http"), _, _) => span @@ -1451,7 +1474,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200); + extract_span_tags_from_event(&mut event, 200, &[]); let spans = event.spans.value().unwrap(); @@ -1513,7 +1536,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200); + extract_span_tags_from_event(&mut event, 200, &[]); let span = &event.spans.value().unwrap()[0]; @@ -1631,7 +1654,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200); + extract_span_tags_from_event(&mut event, 200, &[]); let span_1 = &event.spans.value().unwrap()[0]; let span_2 = &event.spans.value().unwrap()[1]; @@ -1689,7 +1712,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200); + extract_span_tags_from_event(&mut event, 200, &[]); let span = &event .spans @@ -1803,7 +1826,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200); + extract_span_tags_from_event(&mut event, 200, &[]); let span_1 = &event.spans.value().unwrap()[0]; let span_2 = &event.spans.value().unwrap()[1]; @@ -1926,7 +1949,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200); + extract_span_tags_from_event(&mut event, 200, &[]); let span_1 = &event.spans.value().unwrap()[0]; let span_2 = &event.spans.value().unwrap()[1]; @@ -2034,7 +2057,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200); + extract_span_tags_from_event(&mut event, 200, &[]); let span = &event.spans.value().unwrap()[0]; @@ -2091,7 +2114,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200); + extract_span_tags_from_event(&mut event, 200, &[]); let span = &event.spans.value().unwrap()[0]; let tags = span.value().unwrap().sentry_tags.value().unwrap(); @@ -2119,7 +2142,7 @@ LIMIT 1 .unwrap() .into_value() .unwrap(); - let tags = extract_tags(&span, 200, None, None, false, None); + let tags = extract_tags(&span, 200, None, None, false, None, &[]); assert_eq!( tags.get(&SpanTagKey::BrowserName), @@ -2159,7 +2182,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200); + extract_span_tags_from_event(&mut event, 200, &[]); let span = &event.spans.value().unwrap()[0]; let tags = span.value().unwrap().sentry_tags.value().unwrap(); @@ -2190,7 +2213,7 @@ LIMIT 1 .unwrap() .into_value() .unwrap(); - let tags = extract_tags(&span, 200, None, None, false, None); + let tags = extract_tags(&span, 200, None, None, false, None, &[]); assert_eq!( tags.get(&SpanTagKey::MessagingDestinationName), @@ -2315,7 +2338,7 @@ LIMIT 1 .into_value() .unwrap(); - extract_span_tags_from_event(&mut event, 200); + extract_span_tags_from_event(&mut event, 200, &[]); let span = &event.spans.value().unwrap()[0]; let tags = span.value().unwrap().sentry_tags.value().unwrap(); @@ -2413,7 +2436,7 @@ LIMIT 1 .unwrap(); span.description.set_value(Some(description.into())); - extract_tags(&span, 200, None, None, false, None) + extract_tags(&span, 200, None, None, false, None, &[]) } #[test] diff --git a/relay-server/src/metrics_extraction/event.rs b/relay-server/src/metrics_extraction/event.rs index a3f80e810e..701318a635 100644 --- a/relay-server/src/metrics_extraction/event.rs +++ b/relay-server/src/metrics_extraction/event.rs @@ -69,7 +69,7 @@ fn extract_span_metrics_for_event( output: &mut Vec, ) { relay_statsd::metric!(timer(RelayTimers::EventProcessingSpanMetricsExtraction), { - if let Some(transaction_span) = extract_transaction_span(event, max_tag_value_size) { + if let Some(transaction_span) = extract_transaction_span(event, max_tag_value_size, &[]) { let (metrics, metrics_summary) = metrics_summary::extract_and_summarize_metrics(&transaction_span, config); metrics_summary.apply_on(&mut event._metrics_summary); diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 678104294a..f05745e9a8 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -1572,6 +1572,7 @@ impl EnvelopeProcessorService { let global_config = self.inner.global_config.current(); let ai_model_costs = global_config.ai_model_costs.clone().ok(); + let http_span_allowed_hosts = global_config.options.http_span_allowed_hosts.as_slice(); utils::log_transaction_name_metrics(&mut state.event, |event| { let event_validation_config = EventValidationConfig { @@ -1655,6 +1656,7 @@ impl EnvelopeProcessorService { .envelope() .dsc() .and_then(|ctx| ctx.replay_id), + span_allowed_hosts: http_span_allowed_hosts, }; metric!(timer(RelayTimers::EventProcessingNormalization), { diff --git a/relay-server/src/services/processor/span.rs b/relay-server/src/services/processor/span.rs index a58830543f..0796b4f9af 100644 --- a/relay-server/src/services/processor/span.rs +++ b/relay-server/src/services/processor/span.rs @@ -4,6 +4,7 @@ use relay_dynamic_config::Feature; use relay_event_normalization::span::tag_extraction; use relay_event_schema::protocol::{Event, Span}; use relay_protocol::Annotated; +use url::Host; use crate::services::processor::SpanGroup; use crate::{services::processor::ProcessEnvelopeState, utils::ItemAction}; @@ -30,10 +31,14 @@ pub fn filter(state: &mut ProcessEnvelopeState) { /// Creates a span from the transaction and applies tag extraction on it. /// /// Returns `None` when [`tag_extraction::extract_span_tags`] clears the span, which it shouldn't. -pub fn extract_transaction_span(event: &Event, max_tag_value_size: usize) -> Option { +pub fn extract_transaction_span( + event: &Event, + max_tag_value_size: usize, + span_allowed_hosts: &[Host], +) -> Option { let mut spans = [Span::from(event).into()]; - tag_extraction::extract_span_tags(event, &mut spans, max_tag_value_size); + tag_extraction::extract_span_tags(event, &mut spans, max_tag_value_size, span_allowed_hosts); tag_extraction::extract_segment_span_tags(event, &mut spans); spans.into_iter().next().and_then(Annotated::into_value) diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index 3fe882be44..e9cc0b956d 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -26,6 +26,7 @@ use relay_pii::PiiProcessor; use relay_protocol::{Annotated, Empty}; use relay_quotas::DataCategory; use relay_spans::{otel_to_sentry_span, otel_trace::Span as OtelSpan}; +use url::Host; use crate::envelope::{ContentType, Item, ItemType}; use crate::metrics_extraction::metrics_summary; @@ -272,6 +273,7 @@ pub fn extract_from_event( .aggregator_config_for(MetricNamespace::Spans) .aggregator .max_tag_value_length, + &[], ) else { return; }; @@ -341,6 +343,8 @@ struct NormalizeSpanConfig<'a> { user_agent: Option, /// Client hints parsed from the request. client_hints: ClientHints, + /// Hosts that are not replaced by "*" in HTTP span grouping. + allowed_hosts: &'a [Host], } impl<'a> NormalizeSpanConfig<'a> { @@ -351,6 +355,7 @@ impl<'a> NormalizeSpanConfig<'a> { managed_envelope: &ManagedEnvelope, ) -> Self { let aggregator_config = config.aggregator_config_for(MetricNamespace::Spans); + Self { received_at: managed_envelope.received_at(), timestamp_range: aggregator_config.aggregator.timestamp_range(), @@ -376,6 +381,7 @@ impl<'a> NormalizeSpanConfig<'a> { .user_agent() .map(String::from), client_hints: managed_envelope.meta().client_hints().clone(), + allowed_hosts: global_config.options.http_span_allowed_hosts.as_slice(), } } } @@ -427,6 +433,7 @@ fn normalize( tx_name_rules, user_agent, client_hints, + allowed_hosts, } = config; set_segment_attributes(annotated_span); @@ -486,7 +493,15 @@ fn normalize( // Tag extraction: let is_mobile = false; // TODO: find a way to determine is_mobile from a standalone span. - let tags = tag_extraction::extract_tags(span, max_tag_value_size, None, None, is_mobile, None); + let tags = tag_extraction::extract_tags( + span, + max_tag_value_size, + None, + None, + is_mobile, + None, + allowed_hosts, + ); span.sentry_tags = Annotated::new( tags.into_iter() .map(|(k, v)| (k.sentry_tag_key().to_owned(), Annotated::new(v)))