diff --git a/CHANGELOG.md b/CHANGELOG.md index 696b1aa22f..56b55decb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ - Add a config option to add default tags to all Relay Sentry events. ([#3944](https://github.com/getsentry/relay/pull/3944)) - Automatically derive `client.address` and `user.geo` for standalone spans. ([#4047](https://github.com/getsentry/relay/pull/4047)) +- Add support for uploading compressed (gzip, xz, zstd, bzip2) minidumps. ([#4029](https://github.com/getsentry/relay/pull/4029)) - Configurable span.op inference. ([#4056](https://github.com/getsentry/relay/pull/4056)) **Internal:** diff --git a/Cargo.lock b/Cargo.lock index 1b383fb836..96c3d76c99 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -520,6 +520,27 @@ dependencies = [ "serde", ] +[[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "cadence" version = "1.4.0" @@ -2084,6 +2105,26 @@ dependencies = [ "winapi", ] +[[package]] +name = "liblzma" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7c45fc6fcf5b527d3cf89c1dee8c327943984b0dc8bfcf6e100473b00969e63" +dependencies = [ + "liblzma-sys", +] + +[[package]] +name = "liblzma-sys" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63117d31458acdb7b406f6c60090aa8e1e7cd6e283f8ee02ce585ed68c53fe39" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + [[package]] name = "libm" version = "0.2.7" @@ -3702,6 +3743,7 @@ dependencies = [ "backoff", "brotli", "bytes", + "bzip2", "chrono", "criterion", "data-encoding", @@ -3713,6 +3755,7 @@ dependencies = [ "insta", "itertools 0.13.0", "json-forensics", + "liblzma", "mime", "minidump", "multer", @@ -5647,7 +5690,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index c53dcf749a..f582dbfb1f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -69,6 +69,7 @@ bindgen = "0.70.1" brotli = "6.0.0" bytecount = "0.6.0" bytes = "1.4.0" +bzip2 = "0.4.4" cadence = "1.4.0" chrono = { version = "0.4.31", default-features = false, features = [ "std", @@ -104,6 +105,7 @@ ipnetwork = "0.20.0" itertools = "0.13.0" json-forensics = "0.1.1" lru = "0.12.4" +liblzma = "0.3.4" maxminddb = "0.24.0" memchr = "2.7.4" md5 = "0.7.0" diff --git a/relay-server/Cargo.toml b/relay-server/Cargo.toml index 1a787ecde4..f079baea06 100644 --- a/relay-server/Cargo.toml +++ b/relay-server/Cargo.toml @@ -16,7 +16,6 @@ processing = [ "dep:minidump", "dep:symbolic-common", "dep:symbolic-unreal", - "dep:zstd", "relay-cardinality/redis", "relay-config/processing", "relay-kafka/producer", @@ -39,6 +38,7 @@ arc-swap = { workspace = true } backoff = { workspace = true } brotli = { workspace = true } bytes = { workspace = true, features = ["serde"] } +bzip2 = { workspace = true } chrono = { workspace = true, features = ["clock"] } data-encoding = { workspace = true } flate2 = { workspace = true } @@ -48,6 +48,7 @@ hashbrown = { workspace = true } hyper-util = { workspace = true } itertools = { workspace = true } json-forensics = { workspace = true } +liblzma = { workspace = true } mime = { workspace = true } minidump = { workspace = true, optional = true } multer = { workspace = true } @@ -121,7 +122,7 @@ tower-http = { workspace = true, default-features = false, features = [ ] } url = { workspace = true, features = ["serde"] } uuid = { workspace = true, features = ["v5"] } -zstd = { workspace = true, optional = true } +zstd = { workspace = true } semver = { workspace = true } [dev-dependencies] diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index 8ca600ef7c..e16afb6025 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -67,6 +67,9 @@ pub enum BadStoreRequest { #[error("missing minidump")] MissingMinidump, + #[error("invalid compression container")] + InvalidCompressionContainer(#[source] std::io::Error), + #[error("invalid event id")] InvalidEventId, diff --git a/relay-server/src/endpoints/minidump.rs b/relay-server/src/endpoints/minidump.rs index 80d513d709..577b5bc6d6 100644 --- a/relay-server/src/endpoints/minidump.rs +++ b/relay-server/src/endpoints/minidump.rs @@ -1,17 +1,24 @@ -use std::convert::Infallible; - use axum::extract::{DefaultBodyLimit, Request}; use axum::response::IntoResponse; use axum::routing::{post, MethodRouter}; use axum::RequestExt; use bytes::Bytes; +use bzip2::read::BzDecoder; +use flate2::read::GzDecoder; +use liblzma::read::XzDecoder; use multer::Multipart; use relay_config::Config; use relay_event_schema::protocol::EventId; +use std::convert::Infallible; +use std::error::Error; +use std::io::Cursor; +use std::io::Read; +use zstd::stream::Decoder as ZstdDecoder; use crate::constants::{ITEM_NAME_BREADCRUMBS1, ITEM_NAME_BREADCRUMBS2, ITEM_NAME_EVENT}; use crate::endpoints::common::{self, BadStoreRequest, TextResponse}; -use crate::envelope::{AttachmentType, ContentType, Envelope, Item, ItemType}; +use crate::envelope::ContentType::Minidump; +use crate::envelope::{AttachmentType, Envelope, Item, ItemType}; use crate::extractors::{RawContentType, Remote, RequestMeta}; use crate::service::ServiceState; use crate::utils; @@ -31,6 +38,15 @@ const MINIDUMP_FILE_NAME: &str = "Minidump"; const MINIDUMP_MAGIC_HEADER_LE: &[u8] = b"MDMP"; const MINIDUMP_MAGIC_HEADER_BE: &[u8] = b"PMDM"; +/// Magic bytes for gzip compressed minidump containers. +const GZIP_MAGIC_HEADER: &[u8] = b"\x1F\x8B"; +/// Magic bytes for xz compressed minidump containers. +const XZ_MAGIC_HEADER: &[u8] = b"\xFD\x37\x7A\x58\x5A\x00"; +/// Magic bytes for bzip2 compressed minidump containers. +const BZIP2_MAGIC_HEADER: &[u8] = b"\x42\x5A\x68"; +/// Magic bytes for zstd compressed minidump containers. +const ZSTD_MAGIC_HEADER: &[u8] = b"\x28\xB5\x2F\xFD"; + /// Content types by which standalone uploads can be recognized. const MINIDUMP_RAW_CONTENT_TYPES: &[&str] = &["application/octet-stream", "application/x-dmp"]; @@ -43,6 +59,66 @@ fn validate_minidump(data: &[u8]) -> Result<(), BadStoreRequest> { Ok(()) } +/// Convenience wrapper to let a decoder decode its full input into a buffer +fn run_decoder(decoder: &mut Box) -> std::io::Result> { + let mut buffer = Vec::new(); + decoder.read_to_end(&mut buffer)?; + Ok(buffer) +} + +/// Creates a decoder based on the magic bytes the minidump payload +fn decoder_from(minidump_data: Bytes) -> Option> { + if minidump_data.starts_with(GZIP_MAGIC_HEADER) { + return Some(Box::new(GzDecoder::new(Cursor::new(minidump_data)))); + } else if minidump_data.starts_with(XZ_MAGIC_HEADER) { + return Some(Box::new(XzDecoder::new(Cursor::new(minidump_data)))); + } else if minidump_data.starts_with(BZIP2_MAGIC_HEADER) { + return Some(Box::new(BzDecoder::new(Cursor::new(minidump_data)))); + } else if minidump_data.starts_with(ZSTD_MAGIC_HEADER) { + return match ZstdDecoder::new(Cursor::new(minidump_data)) { + Ok(decoder) => Some(Box::new(decoder)), + Err(ref err) => { + relay_log::error!(error = err as &dyn Error, "failed to create ZstdDecoder"); + None + } + }; + } + + None +} + +/// Tries to decode a minidump using any of the supported compression formats +/// or returns the provided minidump payload untouched if no format where detected +fn decode_minidump(minidump_data: Bytes) -> Result { + match decoder_from(minidump_data.clone()) { + Some(mut decoder) => { + match run_decoder(&mut decoder) { + Ok(decoded) => Ok(Bytes::from(decoded)), + Err(err) => { + // we detected a compression container but failed to decode it + relay_log::trace!("invalid compression container"); + Err(BadStoreRequest::InvalidCompressionContainer(err)) + } + } + } + None => { + // this means we haven't detected any compression container + // proceed to process the payload untouched (as a plain minidump). + Ok(minidump_data) + } + } +} + +/// Removes any compression container file extensions from the minidump +/// filename so it can be updated in the item. Otherwise, attachments that +/// have been decoded would still show the extension in the UI, which is misleading. +fn remove_container_extension(filename: &str) -> &str { + [".gz", ".xz", ".bz2", ".zst"] + .into_iter() + .find_map(|suffix| filename.strip_suffix(suffix)) + .unwrap_or(filename) +} + fn infer_attachment_type(field_name: Option<&str>) -> AttachmentType { match field_name.unwrap_or("") { MINIDUMP_FIELD_NAME => AttachmentType::Minidump, @@ -94,11 +170,17 @@ async fn extract_multipart( let embedded_opt = extract_embedded_minidump(minidump_item.payload()).await?; if let Some(embedded) = embedded_opt { - minidump_item.set_payload(ContentType::Minidump, embedded); + minidump_item.set_payload(Minidump, embedded); } + minidump_item.set_payload(Minidump, decode_minidump(minidump_item.payload())?); + validate_minidump(&minidump_item.payload())?; + if let Some(minidump_filename) = minidump_item.filename() { + minidump_item.set_filename(remove_container_extension(minidump_filename).to_owned()); + } + let event_id = common::event_id_from_items(&items)?.unwrap_or_else(EventId::new); let mut envelope = Envelope::from_request(Some(event_id), meta); @@ -110,10 +192,10 @@ async fn extract_multipart( } fn extract_raw_minidump(data: Bytes, meta: RequestMeta) -> Result, BadStoreRequest> { - validate_minidump(&data)?; - let mut item = Item::new(ItemType::Attachment); - item.set_payload(ContentType::Minidump, data); + + item.set_payload(Minidump, decode_minidump(data)?); + validate_minidump(&item.payload())?; item.set_filename(MINIDUMP_FILE_NAME); item.set_attachment_type(AttachmentType::Minidump); @@ -162,10 +244,17 @@ pub fn route(config: &Config) -> MethodRouter { #[cfg(test)] mod tests { + use crate::envelope::ContentType; + use crate::utils::{multipart_items, FormDataIter}; use axum::body::Body; + use bzip2::write::BzEncoder; + use bzip2::Compression as BzCompression; + use flate2::write::GzEncoder; + use flate2::Compression as GzCompression; + use liblzma::write::XzEncoder; use relay_config::Config; - - use crate::utils::{multipart_items, FormDataIter}; + use std::io::Write; + use zstd::stream::Encoder as ZstdEncoder; use super::*; @@ -181,6 +270,87 @@ mod tests { assert!(validate_minidump(garbage).is_err()); } + type EncodeFunction = fn(&[u8]) -> Result>; + + fn encode_gzip(be_minidump: &[u8]) -> Result> { + let mut encoder = GzEncoder::new(Vec::new(), GzCompression::default()); + encoder.write_all(be_minidump)?; + let compressed = encoder.finish()?; + Ok(Bytes::from(compressed)) + } + fn encode_bzip(be_minidump: &[u8]) -> Result> { + let mut encoder = BzEncoder::new(Vec::new(), BzCompression::default()); + encoder.write_all(be_minidump)?; + let compressed = encoder.finish()?; + Ok(Bytes::from(compressed)) + } + fn encode_xz(be_minidump: &[u8]) -> Result> { + let mut encoder = XzEncoder::new(Vec::new(), 6); + encoder.write_all(be_minidump)?; + let compressed = encoder.finish()?; + Ok(Bytes::from(compressed)) + } + fn encode_zst(be_minidump: &[u8]) -> Result> { + let mut encoder = ZstdEncoder::new(Vec::new(), 0)?; + encoder.write_all(be_minidump)?; + let compressed = encoder.finish()?; + Ok(Bytes::from(compressed)) + } + + #[test] + fn test_validate_encoded_minidump() -> Result<(), Box> { + let encoders: Vec = vec![encode_gzip, encode_zst, encode_bzip, encode_xz]; + + for encoder in &encoders { + let be_minidump = b"PMDMxxxxxx"; + let compressed = encoder(be_minidump)?; + let mut decoder = decoder_from(compressed).unwrap(); + assert!(run_decoder(&mut decoder).is_ok()); + + let le_minidump = b"MDMPxxxxxx"; + let compressed = encoder(le_minidump)?; + let mut decoder = decoder_from(compressed).unwrap(); + assert!(run_decoder(&mut decoder).is_ok()); + + let garbage = b"xxxxxx"; + let compressed = encoder(garbage)?; + let mut decoder = decoder_from(compressed).unwrap(); + let decoded = run_decoder(&mut decoder); + assert!(decoded.is_ok()); + assert!(validate_minidump(&decoded.unwrap()).is_err()); + } + + Ok(()) + } + + #[test] + fn test_remove_container_extension() -> Result<(), Box> { + assert_eq!(remove_container_extension("minidump"), "minidump"); + assert_eq!(remove_container_extension("minidump.gz"), "minidump"); + assert_eq!(remove_container_extension("minidump.bz2"), "minidump"); + assert_eq!(remove_container_extension("minidump.xz"), "minidump"); + assert_eq!(remove_container_extension("minidump.zst"), "minidump"); + assert_eq!(remove_container_extension("minidump.dmp"), "minidump.dmp"); + assert_eq!( + remove_container_extension("minidump.dmp.gz"), + "minidump.dmp" + ); + assert_eq!( + remove_container_extension("minidump.dmp.bz2"), + "minidump.dmp" + ); + assert_eq!( + remove_container_extension("minidump.dmp.xz"), + "minidump.dmp" + ); + assert_eq!( + remove_container_extension("minidump.dmp.zst"), + "minidump.dmp" + ); + + Ok(()) + } + #[tokio::test] async fn test_minidump_multipart_attachments() -> anyhow::Result<()> { let multipart_body: &[u8] = @@ -210,8 +380,7 @@ mod tests { "content-type", "multipart/form-data; boundary=---MultipartBoundary-sQ95dYmFvVzJ2UcOSdGPBkqrW0syf0Uw---", ) - .body(Body::from(multipart_body)) - .unwrap(); + .body(Body::from(multipart_body))?; let config = Config::default(); diff --git a/tests/integration/fixtures/native/minidump.dmp.bz2 b/tests/integration/fixtures/native/minidump.dmp.bz2 new file mode 100644 index 0000000000..3531c59567 Binary files /dev/null and b/tests/integration/fixtures/native/minidump.dmp.bz2 differ diff --git a/tests/integration/fixtures/native/minidump.dmp.gz b/tests/integration/fixtures/native/minidump.dmp.gz new file mode 100644 index 0000000000..7f7025f8a5 Binary files /dev/null and b/tests/integration/fixtures/native/minidump.dmp.gz differ diff --git a/tests/integration/fixtures/native/minidump.dmp.xz b/tests/integration/fixtures/native/minidump.dmp.xz new file mode 100644 index 0000000000..7f3b6db7e6 Binary files /dev/null and b/tests/integration/fixtures/native/minidump.dmp.xz differ diff --git a/tests/integration/fixtures/native/minidump.dmp.zst b/tests/integration/fixtures/native/minidump.dmp.zst new file mode 100644 index 0000000000..1cfc531ef9 Binary files /dev/null and b/tests/integration/fixtures/native/minidump.dmp.zst differ diff --git a/tests/integration/test_minidump.py b/tests/integration/test_minidump.py index ad1368ac97..b4c4bba378 100644 --- a/tests/integration/test_minidump.py +++ b/tests/integration/test_minidump.py @@ -359,14 +359,37 @@ def test_minidump_invalid_nested_formdata(mini_sentry, relay): relay.send_minidump(project_id=project_id, files=attachments) -@pytest.mark.parametrize("rate_limit", [None, "attachment", "transaction"]) +@pytest.mark.parametrize( + "rate_limit,minidump_filename", + [ + (None, "minidump.dmp"), + ("attachment", "minidump.dmp"), + ("transaction", "minidump.dmp"), + (None, "minidump.dmp.gz"), + (None, "minidump.dmp.xz"), + (None, "minidump.dmp.bz2"), + (None, "minidump.dmp.zst"), + ], +) def test_minidump_with_processing( - mini_sentry, relay_with_processing, attachments_consumer, rate_limit + mini_sentry, + relay_with_processing, + attachments_consumer, + rate_limit, + minidump_filename, ): dmp_path = os.path.join(os.path.dirname(__file__), "fixtures/native/minidump.dmp") with open(dmp_path, "rb") as f: content = f.read() + # if we test a compressed minidump fixture we load both, the plain dump and the compressed one. + if minidump_filename != "minidump.dmp": + compressed_dmp_path = os.path.join( + os.path.dirname(__file__), f"fixtures/native/{minidump_filename}" + ) + with open(compressed_dmp_path, "rb") as f: + compressed_content = f.read() + relay = relay_with_processing() project_id = 42 @@ -392,7 +415,16 @@ def test_minidump_with_processing( attachments_consumer = attachments_consumer() - attachments = [(MINIDUMP_ATTACHMENT_NAME, "minidump.dmp", content)] + # if we test a compressed minidump fixture we upload the compressed content + # but retrieve the uncompressed minidump content from the `attachments_consumer` below. + attachments = [ + ( + MINIDUMP_ATTACHMENT_NAME, + minidump_filename, + content if minidump_filename == "minidump.dmp" else compressed_content, + ) + ] + response = relay.send_minidump(project_id=project_id, files=attachments) attachment = b"" @@ -421,6 +453,7 @@ def test_minidump_with_processing( "id": attachment_id, "name": "minidump.dmp", "attachment_type": "event.minidump", + "content_type": "application/x-dmp", "chunks": num_chunks, "size": len(content), "rate_limited": rate_limit == "attachment",