Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(server): accept minidumps in a gzip container #4029

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 78 additions & 4 deletions relay-server/src/endpoints/minidump.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
use std::convert::Infallible;
use std::io::Cursor;
use std::io::Read;

use axum::extract::{DefaultBodyLimit, Request};
use axum::response::IntoResponse;
use axum::routing::{post, MethodRouter};
use axum::RequestExt;
use bytes::Bytes;
use flate2::read::GzDecoder;
use multer::Multipart;
use relay_config::Config;
use relay_event_schema::protocol::EventId;
Expand All @@ -31,6 +34,8 @@ const MINIDUMP_FILE_NAME: &str = "Minidump";
const MINIDUMP_MAGIC_HEADER_LE: &[u8] = b"MDMP";
const MINIDUMP_MAGIC_HEADER_BE: &[u8] = b"PMDM";

const GZIP_MAGIC_HEADER: &[u8] = &[0x1f, 0x8b];

/// Content types by which standalone uploads can be recognized.
const MINIDUMP_RAW_CONTENT_TYPES: &[&str] = &["application/octet-stream", "application/x-dmp"];

Expand All @@ -43,6 +48,24 @@ fn validate_minidump(data: &[u8]) -> Result<(), BadStoreRequest> {
Ok(())
}

fn validate_gzip_minidump(data: &[u8]) -> Result<(), BadStoreRequest> {
if !data.starts_with(GZIP_MAGIC_HEADER) {
relay_log::trace!("invalid minidump file");
return Err(BadStoreRequest::InvalidMinidump);
}

let mut decoder = GzDecoder::new(data);
let mut magic_bytes = [0u8; 4];

match decoder.read_exact(&mut magic_bytes) {
Ok(_) => validate_minidump(&magic_bytes),
Err(_) => {
relay_log::trace!("invalid minidump file");
Err(BadStoreRequest::InvalidMinidump)
}
}
}

fn infer_attachment_type(field_name: Option<&str>) -> AttachmentType {
match field_name.unwrap_or("") {
MINIDUMP_FIELD_NAME => AttachmentType::Minidump,
Expand Down Expand Up @@ -97,7 +120,11 @@ async fn extract_multipart(
minidump_item.set_payload(ContentType::Minidump, embedded);
}

validate_minidump(&minidump_item.payload())?;
if validate_minidump(&minidump_item.payload()).is_err() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Small suggestion: Similar to how we check for embedded minidumps, could we invert the logic slightly to fully separate concerns? Right now, validate_gzip_minidump fails with an error "invalid minidump" where actually it tests for a gzip header.

I'm thinking about control flow like this:

  1. Check for a compression header
    1. If there is compression, then try to decompress and propagate all errors
    2. Replace the payload bytes with the decompressed bytes
  2. Now check the minidump header once.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, of course, this makes sense. I was only prepping a PoC to check for basic warning signs and whether you want automatic detection/decoding in the endpoint. I am also unhappy with the gzip being the fallback to the typical case 😅.

validate_gzip_minidump(&minidump_item.payload())?;
let unzipped = extract_minidump_from_gzip(minidump_item.payload())?;
minidump_item.set_payload(ContentType::Minidump, unzipped);
}

let event_id = common::event_id_from_items(&items)?.unwrap_or_else(EventId::new);
let mut envelope = Envelope::from_request(Some(event_id), meta);
Expand All @@ -109,11 +136,31 @@ async fn extract_multipart(
Ok(envelope)
}

fn extract_raw_minidump(data: Bytes, meta: RequestMeta) -> Result<Box<Envelope>, BadStoreRequest> {
validate_minidump(&data)?;
fn extract_minidump_from_gzip(data: Bytes) -> Result<Bytes, BadStoreRequest> {
let cursor = Cursor::new(data);
let mut decoder = GzDecoder::new(cursor);
let mut buffer = Vec::new();
match decoder.read_to_end(&mut buffer) {
Ok(_) => Ok(Bytes::from(buffer)),
Err(_) => {
relay_log::trace!("invalid minidump file");
Err(BadStoreRequest::InvalidMinidump)
}
}
}

fn extract_raw_minidump(data: Bytes, meta: RequestMeta) -> Result<Box<Envelope>, BadStoreRequest> {
let mut item = Item::new(ItemType::Attachment);
item.set_payload(ContentType::Minidump, data);

match validate_minidump(&data) {
Ok(_) => item.set_payload(ContentType::Minidump, data),
Err(_) => {
validate_gzip_minidump(&data)?;
let unzipped = extract_minidump_from_gzip(data)?;
item.set_payload(ContentType::Minidump, unzipped);
}
}

item.set_filename(MINIDUMP_FILE_NAME);
item.set_attachment_type(AttachmentType::Minidump);

Expand Down Expand Up @@ -163,7 +210,10 @@ pub fn route(config: &Config) -> MethodRouter<ServiceState> {
#[cfg(test)]
mod tests {
use axum::body::Body;
use flate2::write::GzEncoder;
use flate2::Compression;
use relay_config::Config;
use std::io::Write;

use crate::utils::{multipart_items, FormDataIter};

Expand All @@ -181,6 +231,30 @@ mod tests {
assert!(validate_minidump(garbage).is_err());
}

fn encode_gzip(be_minidump: &[u8]) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
encoder.write_all(be_minidump)?;
let compressed = encoder.finish()?;
Ok(compressed)
}

#[test]
fn test_validate_gzip_minidump() -> Result<(), Box<dyn std::error::Error>> {
let be_minidump = b"PMDMxxxxxx";
let compressed = encode_gzip(be_minidump).unwrap();
assert!(validate_gzip_minidump(&compressed).is_ok());

let le_minidump = b"MDMPxxxxxx";
let compressed = encode_gzip(le_minidump).unwrap();
assert!(validate_gzip_minidump(&compressed).is_ok());

let garbage = b"xxxxxx";
let compressed = encode_gzip(garbage).unwrap();
assert!(validate_gzip_minidump(&compressed).is_err());

Ok(())
}

#[tokio::test]
async fn test_minidump_multipart_attachments() -> anyhow::Result<()> {
let multipart_body: &[u8] =
Expand Down
24 changes: 19 additions & 5 deletions tests/integration/test_minidump.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,11 +359,25 @@ def test_minidump_invalid_nested_formdata(mini_sentry, relay):
relay.send_minidump(project_id=project_id, files=attachments)


@pytest.mark.parametrize("rate_limit", [None, "attachment", "transaction"])
@pytest.mark.parametrize(
"rate_limit,minidump_filename",
[
(None, "minidump.dmp"),
("attachment", "minidump.dmp"),
("transaction", "minidump.dmp"),
(None, "minidump.dmp.gz"),
],
)
def test_minidump_with_processing(
mini_sentry, relay_with_processing, attachments_consumer, rate_limit
mini_sentry,
relay_with_processing,
attachments_consumer,
rate_limit,
minidump_filename,
):
dmp_path = os.path.join(os.path.dirname(__file__), "fixtures/native/minidump.dmp")
dmp_path = os.path.join(
os.path.dirname(__file__), f"fixtures/native/{minidump_filename}"
)
with open(dmp_path, "rb") as f:
content = f.read()

Expand Down Expand Up @@ -392,7 +406,7 @@ def test_minidump_with_processing(

attachments_consumer = attachments_consumer()

attachments = [(MINIDUMP_ATTACHMENT_NAME, "minidump.dmp", content)]
attachments = [(MINIDUMP_ATTACHMENT_NAME, minidump_filename, content)]
response = relay.send_minidump(project_id=project_id, files=attachments)

attachment = b""
Expand All @@ -419,7 +433,7 @@ def test_minidump_with_processing(
assert list(message["attachments"]) == [
{
"id": attachment_id,
"name": "minidump.dmp",
"name": minidump_filename,
"attachment_type": "event.minidump",
"chunks": num_chunks,
"size": len(content),
Expand Down
Loading