From 4c7c08d0673a118d50f8262df0facd0a0e98bbbd Mon Sep 17 00:00:00 2001 From: Jan Michael Auer Date: Fri, 6 Sep 2024 09:35:53 +0200 Subject: [PATCH] ref(server): Use multer directly (#3978) Switches to direct use of `multer` instead of `axum`'s wrapper. This allows us to use constraints to configure the maximum body size and maximum field size directly on the `Multipart` instance instead of handling this manually. In order to keep using the multipart type as an extractor, we define `Remote` which is a shallow transparent wrapper around any remote type we would like to implement `FromRequest`, `FromRequestParts`, or `IntoResponse` for. This way, we can use the `Multipart` and `multer::Error` types directly in our signatures. Usage of this type is documented on the type. --- Cargo.lock | 1 - relay-server/Cargo.toml | 7 +-- relay-server/src/endpoints/attachments.rs | 26 +++----- relay-server/src/endpoints/common.rs | 5 +- relay-server/src/endpoints/minidump.rs | 37 +++++------ relay-server/src/endpoints/mod.rs | 2 +- relay-server/src/extractors/mod.rs | 2 + relay-server/src/extractors/remote.rs | 71 +++++++++++++++++++++ relay-server/src/utils/multipart.rs | 77 +++++++++++------------ tests/integration/test_basic.py | 21 ++++--- 10 files changed, 149 insertions(+), 100 deletions(-) create mode 100644 relay-server/src/extractors/remote.rs diff --git a/Cargo.lock b/Cargo.lock index 62739692b2..366e49a289 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -262,7 +262,6 @@ dependencies = [ "matchit", "memchr", "mime", - "multer", "percent-encoding", "pin-project-lite", "rustversion", diff --git a/relay-server/Cargo.toml b/relay-server/Cargo.toml index de5d50d311..fb0b7c56f9 100644 --- a/relay-server/Cargo.toml +++ b/relay-server/Cargo.toml @@ -32,12 +32,7 @@ workspace = true [dependencies] anyhow = { workspace = true } serde_path_to_error = { workspace = true } -axum = { workspace = true, features = [ - "macros", - "matched-path", - "multipart", - "tracing", -] } +axum = { workspace = true, features = ["macros", "matched-path", "tracing"] } axum-extra = { workspace = true } axum-server = { workspace = true } arc-swap = { workspace = true } diff --git a/relay-server/src/endpoints/attachments.rs b/relay-server/src/endpoints/attachments.rs index b112ab113f..7e1d33bd1d 100644 --- a/relay-server/src/endpoints/attachments.rs +++ b/relay-server/src/endpoints/attachments.rs @@ -1,49 +1,43 @@ -use axum::extract::{DefaultBodyLimit, Multipart, Path}; +use axum::extract::Path; use axum::http::StatusCode; use axum::response::IntoResponse; -use axum::routing::{post, MethodRouter}; -use relay_config::Config; +use multer::Multipart; use relay_event_schema::protocol::EventId; use serde::Deserialize; use crate::endpoints::common::{self, BadStoreRequest}; use crate::envelope::{AttachmentType, Envelope}; -use crate::extractors::RequestMeta; +use crate::extractors::{Remote, RequestMeta}; use crate::service::ServiceState; use crate::utils; #[derive(Debug, Deserialize)] -struct AttachmentPath { +pub struct AttachmentPath { event_id: EventId, } async fn extract_envelope( - config: &Config, meta: RequestMeta, path: AttachmentPath, - multipart: Multipart, + multipart: Multipart<'static>, ) -> Result, BadStoreRequest> { - let max_size = config.max_attachment_size(); - let items = utils::multipart_items(multipart, max_size, |_| AttachmentType::default()).await?; + let items = utils::multipart_items(multipart, |_| AttachmentType::default()).await?; let mut envelope = Envelope::from_request(Some(path.event_id), meta); for item in items { envelope.add_item(item); } + Ok(envelope) } -async fn handle( +pub async fn handle( state: ServiceState, meta: RequestMeta, Path(path): Path, - multipart: Multipart, + Remote(multipart): Remote>, ) -> Result { - let envelope = extract_envelope(state.config(), meta, path, multipart).await?; + let envelope = extract_envelope(meta, path, multipart).await?; common::handle_envelope(&state, envelope).await?; Ok(StatusCode::CREATED) } - -pub fn route(config: &Config) -> MethodRouter { - post(handle).route_layer(DefaultBodyLimit::max(config.max_attachments_size())) -} diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index bec9ebfd5c..d7ea170161 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -15,7 +15,7 @@ use crate::services::outcome::{DiscardReason, Outcome}; use crate::services::processor::{MetricData, ProcessMetricMeta, ProcessingGroup}; use crate::services::project_cache::{CheckEnvelope, ProcessMetrics, ValidateEnvelope}; use crate::statsd::{RelayCounters, RelayHistograms}; -use crate::utils::{self, ApiErrorResponse, FormDataIter, ManagedEnvelope, MultipartError}; +use crate::utils::{self, ApiErrorResponse, FormDataIter, ManagedEnvelope}; #[derive(Clone, Copy, Debug, thiserror::Error)] #[error("the service is overloaded")] @@ -61,9 +61,6 @@ pub enum BadStoreRequest { #[error("invalid multipart data")] InvalidMultipart(#[from] multer::Error), - #[error("invalid multipart data")] - InvalidMultipartAxum(#[from] MultipartError), - #[error("invalid minidump")] InvalidMinidump, diff --git a/relay-server/src/endpoints/minidump.rs b/relay-server/src/endpoints/minidump.rs index e1b78c68ee..80d513d709 100644 --- a/relay-server/src/endpoints/minidump.rs +++ b/relay-server/src/endpoints/minidump.rs @@ -1,18 +1,18 @@ use std::convert::Infallible; -use axum::extract::{DefaultBodyLimit, Multipart, Request}; +use axum::extract::{DefaultBodyLimit, Request}; use axum::response::IntoResponse; use axum::routing::{post, MethodRouter}; use axum::RequestExt; use bytes::Bytes; -use futures::{future, FutureExt}; +use multer::Multipart; use relay_config::Config; use relay_event_schema::protocol::EventId; use crate::constants::{ITEM_NAME_BREADCRUMBS1, ITEM_NAME_BREADCRUMBS2, ITEM_NAME_EVENT}; use crate::endpoints::common::{self, BadStoreRequest, TextResponse}; use crate::envelope::{AttachmentType, ContentType, Envelope, Item, ItemType}; -use crate::extractors::{RawContentType, RequestMeta}; +use crate::extractors::{RawContentType, Remote, RequestMeta}; use crate::service::ServiceState; use crate::utils; @@ -69,8 +69,8 @@ async fn extract_embedded_minidump(payload: Bytes) -> Result, BadS None => return Ok(None), }; - let stream = future::ok::<_, Infallible>(payload.clone()).into_stream(); - let mut multipart = multer::Multipart::new(stream, boundary); + let stream = futures::stream::once(async { Ok::<_, Infallible>(payload.clone()) }); + let mut multipart = Multipart::new(stream, boundary); while let Some(field) = multipart.next_field().await? { if field.name() == Some(MINIDUMP_FIELD_NAME) { @@ -82,12 +82,10 @@ async fn extract_embedded_minidump(payload: Bytes) -> Result, BadS } async fn extract_multipart( - config: &Config, - multipart: Multipart, + multipart: Multipart<'static>, meta: RequestMeta, ) -> Result, BadStoreRequest> { - let max_size = config.max_attachment_size(); - let mut items = utils::multipart_items(multipart, max_size, infer_attachment_type).await?; + let mut items = utils::multipart_items(multipart, infer_attachment_type).await?; let minidump_item = items .iter_mut() @@ -139,7 +137,8 @@ async fn handle( let envelope = if MINIDUMP_RAW_CONTENT_TYPES.contains(&content_type.as_ref()) { extract_raw_minidump(request.extract().await?, meta)? } else { - extract_multipart(state.config(), request.extract().await?, meta).await? + let Remote(multipart) = request.extract_with_state(&state).await?; + extract_multipart(multipart, meta).await? }; let id = envelope.event_id(); @@ -156,15 +155,15 @@ async fn handle( } pub fn route(config: &Config) -> MethodRouter { - post(handle).route_layer(DefaultBodyLimit::max(config.max_attachments_size())) + // Set the single-attachment limit that applies only for raw minidumps. Multipart bypasses the + // limited body and applies its own limits. + post(handle).route_layer(DefaultBodyLimit::max(config.max_attachment_size())) } #[cfg(test)] mod tests { use axum::body::Body; - use axum::extract::FromRequest; - - use relay_config::ByteSize; + use relay_config::Config; use crate::utils::{multipart_items, FormDataIter}; @@ -214,14 +213,10 @@ mod tests { .body(Body::from(multipart_body)) .unwrap(); - let multipart = Multipart::from_request(request, &()).await?; + let config = Config::default(); - let items = multipart_items( - multipart, - ByteSize::mebibytes(100).as_bytes(), - infer_attachment_type, - ) - .await?; + let multipart = utils::multipart_from_request(request, &config)?; + let items = multipart_items(multipart, infer_attachment_type).await?; // we expect the multipart body to contain // * one arbitrary attachment from the user (a `config.json`) diff --git a/relay-server/src/endpoints/mod.rs b/relay-server/src/endpoints/mod.rs index c0e7a6b740..d590c529a1 100644 --- a/relay-server/src/endpoints/mod.rs +++ b/relay-server/src/endpoints/mod.rs @@ -72,7 +72,7 @@ pub fn routes(config: &Config) -> Router{ // No mandatory trailing slash here because people already use it like this. .route("/api/:project_id/minidump", minidump::route(config)) .route("/api/:project_id/minidump/", minidump::route(config)) - .route("/api/:project_id/events/:event_id/attachments/", attachments::route(config)) + .route("/api/:project_id/events/:event_id/attachments/", post(attachments::handle)) .route("/api/:project_id/unreal/:sentry_key/", unreal::route(config)) // NOTE: If you add a new (non-experimental) route here, please also list it in // https://github.com/getsentry/sentry-docs/blob/master/docs/product/relay/operating-guidelines.mdx diff --git a/relay-server/src/extractors/mod.rs b/relay-server/src/extractors/mod.rs index 7d4b621451..4878817d1f 100644 --- a/relay-server/src/extractors/mod.rs +++ b/relay-server/src/extractors/mod.rs @@ -1,6 +1,7 @@ mod content_type; mod forwarded_for; mod mime; +mod remote; mod request_meta; mod signed_json; mod start_time; @@ -8,6 +9,7 @@ mod start_time; pub use self::content_type::*; pub use self::forwarded_for::*; pub use self::mime::*; +pub use self::remote::*; pub use self::request_meta::*; pub use self::signed_json::*; pub use self::start_time::*; diff --git a/relay-server/src/extractors/remote.rs b/relay-server/src/extractors/remote.rs new file mode 100644 index 0000000000..f611476abc --- /dev/null +++ b/relay-server/src/extractors/remote.rs @@ -0,0 +1,71 @@ +//! Extractors for types from other crates via [`Remote`]. + +use axum::extract::{FromRequest, Request}; +use axum::http::StatusCode; +use axum::response::{IntoResponse, Response}; +use multer::Multipart; + +use crate::service::ServiceState; +use crate::utils::{self, ApiErrorResponse}; + +/// A transparent wrapper around a remote type that implements [`FromRequest`] or [`IntoResponse`]. +/// +/// # Example +/// +/// ```ignore +/// use std::convert::Infallible; +/// +/// use axum::extract::{FromRequest, Request}; +/// use axum::response::IntoResponse; +/// +/// use crate::extractors::Remote; +/// +/// // Derive `FromRequest` for `bool` for illustration purposes: +/// #[axum::async_trait] +/// impl axum::extract::FromRequest for Remote { +/// type Rejection = Remote; +/// +/// async fn from_request(request: Request) -> Result { +/// Ok(Remote(true)) +/// } +/// } +/// +/// impl IntoResponse for Remote { +/// fn into_response(self) -> axum::response::Response { +/// match self.0 {} +/// } +/// } +/// ``` +#[derive(Debug)] +pub struct Remote(pub T); + +impl From for Remote { + fn from(inner: T) -> Self { + Self(inner) + } +} + +#[axum::async_trait] +impl FromRequest for Remote> { + type Rejection = Remote; + + async fn from_request(request: Request, state: &ServiceState) -> Result { + utils::multipart_from_request(request, state.config()) + .map(Remote) + .map_err(Remote) + } +} + +impl IntoResponse for Remote { + fn into_response(self) -> Response { + let Self(ref error) = self; + + let status_code = match error { + multer::Error::FieldSizeExceeded { .. } => StatusCode::PAYLOAD_TOO_LARGE, + multer::Error::StreamSizeExceeded { .. } => StatusCode::PAYLOAD_TOO_LARGE, + _ => StatusCode::BAD_REQUEST, + }; + + (status_code, ApiErrorResponse::from_error(error)).into_response() + } +} diff --git a/relay-server/src/utils/multipart.rs b/relay-server/src/utils/multipart.rs index 8952d0b257..990d41f309 100644 --- a/relay-server/src/utils/multipart.rs +++ b/relay-server/src/utils/multipart.rs @@ -1,6 +1,8 @@ use std::io; -use axum::extract::multipart::{Field, Multipart}; +use axum::extract::Request; +use multer::Multipart; +use relay_config::Config; use serde::{Deserialize, Serialize}; use crate::envelope::{AttachmentType, ContentType, Item, ItemType, Items}; @@ -147,58 +149,32 @@ pub fn get_multipart_boundary(data: &[u8]) -> Option<&str> { .and_then(|slice| std::str::from_utf8(&slice[2..]).ok()) } -#[derive(Debug, thiserror::Error)] -pub enum MultipartError { - #[error("field exceeded the size limit")] - FieldSizeExceeded, - #[error(transparent)] - Raw(#[from] axum::extract::multipart::MultipartError), -} - -async fn field_data<'a>(field: &mut Field<'a>, limit: usize) -> Result, MultipartError> { - let mut body = Vec::new(); - - while let Some(chunk) = field.chunk().await? { - if body.len() + chunk.len() > limit { - return Err(MultipartError::FieldSizeExceeded); - } - body.extend_from_slice(&chunk); - } - - Ok(body) -} - pub async fn multipart_items( - mut multipart: Multipart, - item_limit: usize, + mut multipart: Multipart<'_>, mut infer_type: F, -) -> Result +) -> Result where F: FnMut(Option<&str>) -> AttachmentType, { let mut items = Items::new(); let mut form_data = FormDataWriter::new(); - while let Some(mut field) = multipart.next_field().await? { + while let Some(field) = multipart.next_field().await? { if let Some(file_name) = field.file_name() { let mut item = Item::new(ItemType::Attachment); item.set_attachment_type(infer_type(field.name())); item.set_filename(file_name); // Extract the body after the immutable borrow on `file_name` is gone. if let Some(content_type) = field.content_type() { - item.set_payload( - content_type.into(), - field_data(&mut field, item_limit).await?, - ); + item.set_payload(content_type.as_ref().into(), field.bytes().await?); } else { - item.set_payload_without_content_type(field_data(&mut field, item_limit).await?); + item.set_payload_without_content_type(field.bytes().await?); } items.push(item); } else if let Some(field_name) = field.name().map(str::to_owned) { - let data = field_data(&mut field, item_limit).await?; - // Ensure to decode this safely to match Django's POST data behavior. This allows us to + // Ensure to decode this SAFELY to match Django's POST data behavior. This allows us to // process sentry event payloads even if they contain invalid encoding. - let string = String::from_utf8_lossy(&data); + let string = field.text().await?; form_data.append(&field_name, &string); } else { relay_log::trace!("multipart content without name or file_name"); @@ -217,11 +193,31 @@ where Ok(items) } +pub fn multipart_from_request( + request: Request, + config: &Config, +) -> Result, multer::Error> { + let content_type = request + .headers() + .get("content-type") + .and_then(|v| v.to_str().ok()) + .unwrap_or(""); + let boundary = multer::parse_boundary(content_type)?; + + let limits = multer::SizeLimit::new() + .whole_stream(config.max_attachments_size() as u64) + .per_field(config.max_attachment_size() as u64); + + Ok(Multipart::with_constraints( + request.into_body().into_data_stream(), + boundary, + multer::Constraints::new().size_limit(limits), + )) +} + #[cfg(test)] mod tests { - use axum::body::Body; - use axum::extract::FromRequest; - use axum::http::Request; + use std::convert::Infallible; use super::*; @@ -283,12 +279,9 @@ mod tests { let data = "--X-BOUNDARY\r\nContent-Disposition: form-data; \ name=\"my_text_field\"\r\n\r\nabcd\r\n--X-BOUNDARY--"; // No trailing newline - let request = Request::builder() - .header("content-type", "multipart/form-data; boundary=X-BOUNDARY") - .body(Body::from(data)) - .unwrap(); + let stream = futures::stream::once(async { Ok::<_, Infallible>(data) }); + let mut multipart = Multipart::new(stream, "X-BOUNDARY"); - let mut multipart = Multipart::from_request(request, &()).await?; assert!(multipart.next_field().await?.is_some()); assert!(multipart.next_field().await?.is_none()); diff --git a/tests/integration/test_basic.py b/tests/integration/test_basic.py index 3783bdaf26..6b6a943b20 100644 --- a/tests/integration/test_basic.py +++ b/tests/integration/test_basic.py @@ -153,16 +153,15 @@ def test_store_allowed_origins_passes(mini_sentry, relay, allowed_origins): @pytest.mark.parametrize( - "route,status_code", + "route", [ - ("/api/42/store/", 413), - ("/api/42/envelope/", 413), - ("/api/42/attachment/", 413), - # Minidumps attempt to read the first line (using dedicated limits) and parse it - ("/api/42/minidump/", 400), + "/api/42/store/", + "/api/42/envelope/", + "/api/42/attachment/", + "/api/42/minidump/", ], ) -def test_zipbomb_content_encoding(mini_sentry, relay, route, status_code): +def test_zipbomb_content_encoding(mini_sentry, relay, route): project_id = 42 mini_sentry.add_basic_project_config(project_id) relay = relay( @@ -189,11 +188,15 @@ def test_zipbomb_content_encoding(mini_sentry, relay, route, status_code): route, mini_sentry.get_dsn_public_key(project_id), ), - headers={"content-encoding": "gzip", "content-length": str(size)}, + headers={ + "content-encoding": "gzip", + "content-length": str(size), + "content-type": "application/octet-stream", + }, data=f, ) - assert response.status_code == status_code + assert response.status_code == 413 @pytest.mark.parametrize("content_encoding", ["gzip", "deflate", "identity", ""])