From 1962846f7a05f186448a96bccb4e6250392e08cb Mon Sep 17 00:00:00 2001 From: TheBobBobs <84781603+TheBobBobs@users.noreply.github.com> Date: Wed, 5 Jul 2023 19:48:52 +0000 Subject: [PATCH] feat: allow setting max bytes to proxy/embed --- Cargo.toml | 1 + src/routes/proxy.rs | 11 ++++----- src/util/request.rs | 54 +++++++++++++++++++++++++++++++++++-------- src/util/result.rs | 2 ++ src/util/variables.rs | 2 ++ 5 files changed, 53 insertions(+), 17 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b2f0b8e..7cb4757 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,5 +20,6 @@ env_logger = "0.8" dotenv = "0.15" tempfile = "3.2.0" ffprobe = "0.3.0" +encoding_rs = "0.8" validator = { version = "0.15.0", features = ["derive"] } diff --git a/src/routes/proxy.rs b/src/routes/proxy.rs index 3e7dcfd..78b67b0 100644 --- a/src/routes/proxy.rs +++ b/src/routes/proxy.rs @@ -1,7 +1,7 @@ use actix_web::{web::Query, HttpResponse, Responder}; use serde::Deserialize; -use crate::util::request::fetch; +use crate::util::request::{fetch, get_bytes}; use crate::util::result::Error; #[derive(Deserialize)] @@ -11,14 +11,11 @@ pub struct Parameters { pub async fn get(info: Query) -> Result { let url = info.into_inner().url; - let (resp, mime) = fetch(&url).await?; + let (mut resp, mime) = fetch(&url).await?; if matches!(mime.type_(), mime::IMAGE | mime::VIDEO) { - let body = resp - .bytes() - .await - .map_err(|_| Error::FailedToConsumeBytes)?; - Ok(HttpResponse::Ok().body(body)) + let bytes = get_bytes(&mut resp).await?; + Ok(HttpResponse::Ok().body(bytes)) } else { Err(Error::NotAllowedToProxy) } diff --git a/src/util/request.rs b/src/util/request.rs index 10a202b..f89ce38 100644 --- a/src/util/request.rs +++ b/src/util/request.rs @@ -1,12 +1,17 @@ use std::time::Duration; +use actix_web::web::Bytes; +use encoding_rs::{Encoding, UTF_8_INIT}; use mime::Mime; -use reqwest::{header::CONTENT_TYPE, Client, Response}; +use reqwest::{ + header::{self, CONTENT_TYPE}, + Client, Response, +}; use scraper::Html; use std::io::Write; use tempfile::NamedTempFile; -use super::result::Error; +use super::{result::Error, variables::MAX_BYTES}; lazy_static! { static ref CLIENT: Client = reqwest::Client::builder() @@ -41,9 +46,41 @@ pub async fn fetch(url: &str) -> Result<(Response, Mime), Error> { Ok((resp, mime)) } -pub async fn consume_fragment(resp: Response) -> Result { - let body = resp.text().await.map_err(|_| Error::FailedToConsumeText)?; - Ok(Html::parse_document(&body)) +pub async fn get_bytes(resp: &mut Response) -> Result { + let content_length = resp.content_length().unwrap_or(0) as usize; + if content_length > *MAX_BYTES { + return Err(Error::ExceedsMaxBytes); + } + let mut bytes = Vec::with_capacity(content_length); + while let Some(chunk) = resp + .chunk() + .await + .map_err(|_| Error::FailedToConsumeBytes)? + { + if bytes.len() + chunk.len() > *MAX_BYTES { + return Err(Error::ExceedsMaxBytes); + } + bytes.extend(chunk) + } + Ok(Bytes::from(bytes)) +} + +pub async fn consume_fragment(mut resp: Response) -> Result { + let bytes = get_bytes(&mut resp).await?; + + let content_type = resp + .headers() + .get(header::CONTENT_TYPE) + .and_then(|value| value.to_str().ok()) + .and_then(|value| value.parse::().ok()); + let encoding_name = content_type + .as_ref() + .and_then(|mime| mime.get_param("charset").map(|charset| charset.as_str())) + .unwrap_or("utf-8"); + let encoding = Encoding::for_label(encoding_name.as_bytes()).unwrap_or(&UTF_8_INIT); + + let (text, _, _) = encoding.decode(&bytes); + Ok(Html::parse_document(&text)) } pub fn determine_video_size(path: &std::path::Path) -> Result<(isize, isize), Error> { @@ -61,11 +98,8 @@ pub fn determine_video_size(path: &std::path::Path) -> Result<(isize, isize), Er Err(Error::ProbeError) } -pub async fn consume_size(resp: Response, mime: Mime) -> Result<(isize, isize), Error> { - let bytes = resp - .bytes() - .await - .map_err(|_| Error::FailedToConsumeBytes)?; +pub async fn consume_size(mut resp: Response, mime: Mime) -> Result<(isize, isize), Error> { + let bytes = get_bytes(&mut resp).await?; match mime.type_() { mime::IMAGE => { diff --git a/src/util/result.rs b/src/util/result.rs index 0af7aef..0dbe4f6 100644 --- a/src/util/result.rs +++ b/src/util/result.rs @@ -17,6 +17,7 @@ pub enum Error { MissingContentType, NotAllowedToProxy, ConversionFailed, + ExceedsMaxBytes, ReqwestFailed, RequestFailed, ProbeError, @@ -45,6 +46,7 @@ impl ResponseError for Error { Error::MissingContentType => StatusCode::BAD_REQUEST, Error::NotAllowedToProxy => StatusCode::BAD_REQUEST, Error::ConversionFailed => StatusCode::INTERNAL_SERVER_ERROR, + Error::ExceedsMaxBytes => StatusCode::BAD_REQUEST, Error::ReqwestFailed => StatusCode::INTERNAL_SERVER_ERROR, Error::RequestFailed => StatusCode::BAD_REQUEST, Error::ProbeError => StatusCode::INTERNAL_SERVER_ERROR, diff --git a/src/util/variables.rs b/src/util/variables.rs index db07a70..c472715 100644 --- a/src/util/variables.rs +++ b/src/util/variables.rs @@ -4,4 +4,6 @@ lazy_static! { // Application Settings pub static ref HOST: String = env::var("JANUARY_HOST").expect("Missing JANUARY_HOST environment variable."); + pub static ref MAX_BYTES: usize = + env::var("JANUARY_MAX_BYTES").unwrap_or("104857600".to_string()).parse().expect("Invalid JANUARY_MAX_BYTES environment variable."); }