diff --git a/Cargo.lock b/Cargo.lock index 205df1640a..489eb44636 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3744,6 +3744,7 @@ dependencies = [ "serde", "serde_json", "similar-asserts", + "url", ] [[package]] diff --git a/relay-dynamic-config/Cargo.toml b/relay-dynamic-config/Cargo.toml index 39d9a1a9d2..e1be64eb4a 100644 --- a/relay-dynamic-config/Cargo.toml +++ b/relay-dynamic-config/Cargo.toml @@ -30,6 +30,7 @@ relay-quotas = { workspace = true } relay-sampling = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +url = { workspace = true } [dev-dependencies] similar-asserts = { workspace = true } diff --git a/relay-dynamic-config/src/global.rs b/relay-dynamic-config/src/global.rs index 291a570b56..321c4cbfc1 100644 --- a/relay-dynamic-config/src/global.rs +++ b/relay-dynamic-config/src/global.rs @@ -10,6 +10,7 @@ use relay_filter::GenericFiltersConfig; use relay_quotas::Quota; use serde::{de, Deserialize, Serialize}; use serde_json::Value; +use url::Host; use crate::{defaults, ErrorBoundary, MetricExtractionGroup, MetricExtractionGroups}; @@ -233,7 +234,7 @@ pub struct Options { deserialize_with = "default_on_error", skip_serializing_if = "Vec::is_empty" )] - pub http_span_allowed_hosts: Vec, + pub http_span_allowed_hosts: Vec, /// All other unknown options. #[serde(flatten)] diff --git a/relay-event-normalization/src/event.rs b/relay-event-normalization/src/event.rs index 4437415dc3..bb859fbcfa 100644 --- a/relay-event-normalization/src/event.rs +++ b/relay-event-normalization/src/event.rs @@ -25,6 +25,7 @@ use relay_protocol::{ RemarkType, Value, }; use smallvec::SmallVec; +use url::Host; use uuid::Uuid; use crate::normalize::request; @@ -156,7 +157,7 @@ pub struct NormalizationConfig<'a> { pub replay_id: Option, /// Controls list of hosts to be excluded from scrubbing - pub span_allowed_hosts: &'a [String], + pub span_allowed_hosts: &'a [Host], } impl<'a> Default for NormalizationConfig<'a> { diff --git a/relay-event-normalization/src/normalize/span/description/mod.rs b/relay-event-normalization/src/normalize/span/description/mod.rs index 1d88e7fe14..277d3aa775 100644 --- a/relay-event-normalization/src/normalize/span/description/mod.rs +++ b/relay-event-normalization/src/normalize/span/description/mod.rs @@ -38,7 +38,7 @@ const DOMAIN_ALLOW_LIST: &[&str] = &["localhost"]; /// Returns `None` if no scrubbing can be performed. pub(crate) fn scrub_span_description( span: &Span, - span_allowed_hosts: &[String], + span_allowed_hosts: &[Host], ) -> (Option, Option>) { let Some(description) = span.description.as_str() else { return (None, None); @@ -167,7 +167,7 @@ fn scrub_supabase(string: &str) -> Option { Some(DB_SUPABASE_REGEX.replace_all(string, "{%s}").into()) } -fn scrub_http(string: &str, allow_list: &[String]) -> Option { +fn scrub_http(string: &str, allow_list: &[Host]) -> Option { let (method, url) = string.split_once(' ')?; if !HTTP_METHOD_EXTRACTOR_REGEX.is_match(method) { return None; @@ -227,8 +227,8 @@ fn scrub_file(description: &str) -> Option { /// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::LOCALHOST), &[]), "127.0.0.1"); /// assert_eq!(scrub_host(Host::Ipv4(Ipv4Addr::new(8, 8, 8, 8)), &[String::from("8.8.8.8")]), "8.8.8.8"); /// ``` -pub fn scrub_host<'a>(host: Host<&'a str>, allow_list: &'a [String]) -> Cow<'a, str> { - if allow_list.contains(&host.to_string()) { +pub fn scrub_host<'a>(host: Host<&'a str>, allow_list: &'a [Host]) -> Cow<'a, str> { + if allow_list.iter().any(|allowed_host| &host == allowed_host) { return host.to_string().into(); } diff --git a/relay-event-normalization/src/normalize/span/tag_extraction.rs b/relay-event-normalization/src/normalize/span/tag_extraction.rs index 212701ee09..aa917d354a 100644 --- a/relay-event-normalization/src/normalize/span/tag_extraction.rs +++ b/relay-event-normalization/src/normalize/span/tag_extraction.rs @@ -16,7 +16,7 @@ use relay_event_schema::protocol::{ use relay_protocol::{Annotated, Value}; use sqlparser::ast::Visit; use sqlparser::ast::{ObjectName, Visitor}; -use url::Url; +use url::{Host, Url}; use crate::span::description::{ concatenate_host_and_port, scrub_domain_name, scrub_span_description, @@ -179,7 +179,7 @@ impl std::fmt::Display for RenderBlockingStatus { pub(crate) fn extract_span_tags_from_event( event: &mut Event, max_tag_value_size: usize, - http_scrubbing_allow_list: &[String], + http_scrubbing_allow_list: &[Host], ) { // Temporarily take ownership to pass both an event reference and a mutable span reference to `extract_span_tags`. let mut spans = std::mem::take(&mut event.spans); @@ -203,7 +203,7 @@ pub fn extract_span_tags( event: &Event, spans: &mut [Annotated], max_tag_value_size: usize, - span_allowed_hosts: &[String], + span_allowed_hosts: &[Host], ) { // TODO: To prevent differences between metrics and payloads, we should not extract tags here // when they have already been extracted by a downstream relay. @@ -471,7 +471,7 @@ pub fn extract_tags( full_display: Option, is_mobile: bool, start_type: Option<&str>, - span_allowed_hosts: &[String], + span_allowed_hosts: &[Host], ) -> BTreeMap { let mut span_tags: BTreeMap = BTreeMap::new(); diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 98a8b6a28b..cc9f541108 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -1484,7 +1484,7 @@ impl EnvelopeProcessorService { let global_config = self.inner.global_config.current(); let ai_model_costs = global_config.ai_model_costs.clone().ok(); - let http_span_allowed_hosts = global_config.options.http_span_allowed_hosts.clone(); + let http_span_allowed_hosts = global_config.options.http_span_allowed_hosts.as_slice(); utils::log_transaction_name_metrics(&mut state.event, |event| { let event_validation_config = EventValidationConfig { @@ -1568,7 +1568,7 @@ impl EnvelopeProcessorService { .envelope() .dsc() .and_then(|ctx| ctx.replay_id), - span_allowed_hosts: http_span_allowed_hosts.as_ref(), + span_allowed_hosts: http_span_allowed_hosts, }; metric!(timer(RelayTimers::EventProcessingNormalization), { diff --git a/relay-server/src/services/processor/span.rs b/relay-server/src/services/processor/span.rs index 9554ad88f2..0796b4f9af 100644 --- a/relay-server/src/services/processor/span.rs +++ b/relay-server/src/services/processor/span.rs @@ -4,6 +4,7 @@ use relay_dynamic_config::Feature; use relay_event_normalization::span::tag_extraction; use relay_event_schema::protocol::{Event, Span}; use relay_protocol::Annotated; +use url::Host; use crate::services::processor::SpanGroup; use crate::{services::processor::ProcessEnvelopeState, utils::ItemAction}; @@ -33,7 +34,7 @@ pub fn filter(state: &mut ProcessEnvelopeState) { pub fn extract_transaction_span( event: &Event, max_tag_value_size: usize, - span_allowed_hosts: &[String], + span_allowed_hosts: &[Host], ) -> Option { let mut spans = [Span::from(event).into()]; diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index dc032a3ade..e9cc0b956d 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -26,6 +26,7 @@ use relay_pii::PiiProcessor; use relay_protocol::{Annotated, Empty}; use relay_quotas::DataCategory; use relay_spans::{otel_to_sentry_span, otel_trace::Span as OtelSpan}; +use url::Host; use crate::envelope::{ContentType, Item, ItemType}; use crate::metrics_extraction::metrics_summary; @@ -342,6 +343,8 @@ struct NormalizeSpanConfig<'a> { user_agent: Option, /// Client hints parsed from the request. client_hints: ClientHints, + /// Hosts that are not replaced by "*" in HTTP span grouping. + allowed_hosts: &'a [Host], } impl<'a> NormalizeSpanConfig<'a> { @@ -352,6 +355,7 @@ impl<'a> NormalizeSpanConfig<'a> { managed_envelope: &ManagedEnvelope, ) -> Self { let aggregator_config = config.aggregator_config_for(MetricNamespace::Spans); + Self { received_at: managed_envelope.received_at(), timestamp_range: aggregator_config.aggregator.timestamp_range(), @@ -377,6 +381,7 @@ impl<'a> NormalizeSpanConfig<'a> { .user_agent() .map(String::from), client_hints: managed_envelope.meta().client_hints().clone(), + allowed_hosts: global_config.options.http_span_allowed_hosts.as_slice(), } } } @@ -428,6 +433,7 @@ fn normalize( tx_name_rules, user_agent, client_hints, + allowed_hosts, } = config; set_segment_attributes(annotated_span); @@ -487,8 +493,15 @@ fn normalize( // Tag extraction: let is_mobile = false; // TODO: find a way to determine is_mobile from a standalone span. - let tags = - tag_extraction::extract_tags(span, max_tag_value_size, None, None, is_mobile, None, &[]); + let tags = tag_extraction::extract_tags( + span, + max_tag_value_size, + None, + None, + is_mobile, + None, + allowed_hosts, + ); span.sentry_tags = Annotated::new( tags.into_iter() .map(|(k, v)| (k.sentry_tag_key().to_owned(), Annotated::new(v)))