Skip to content

Commit

Permalink
fix: remove dependency on regex to reduce WASM size (#29)
Browse files Browse the repository at this point in the history
Using `regex`-crate increases the WASM-size of canisters by over 100 kB.
This PR replaces `regex` with a custom function for "manual" parsing of
`Content-Range`-header. For example the canister
`http_gateway_canister_custom_assets` with `regex` had size 466'631
bytes, while with the custom function only 344'593 bytes.

[TT-440](https://dfinity.atlassian.net/browse/TT-440)

[TT-440]:
https://dfinity.atlassian.net/browse/TT-440?atlOrigin=eyJpIjoiNWRkNTljNzYxNjVmNDY3MDlhMDU5Y2ZhYzA5YTRkZjUiLCJwIjoiZ2l0aHViLWNvbS1KU1cifQ
  • Loading branch information
przydatek authored Nov 12, 2024
1 parent 7647269 commit 1de90ba
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 47 deletions.
2 changes: 0 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ bytes = "1"
base64 = "0.22"
lazy_static = "1"
rand_chacha = "0.3"
regex = "1"
serde = "1"
serde_cbor = "0.11"
sha2 = "0.10"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ candid.workspace = true
http.workspace = true
ic-cdk.workspace = true
ic-cdk-macros.workspace = true
regex.workspace = true
serde.workspace = true
serde_cbor.workspace = true
sha2.workspace = true
Expand Down
31 changes: 22 additions & 9 deletions examples/http-gateway/canister/src/custom_assets/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,15 +162,28 @@ fn get_header_value(headers: &[HeaderField], header_name: &str) -> Option<String

fn get_content_range_begin(content_range_header_value: &str) -> usize {
// expected format: `bytes 21010-47021/47022`
let re = regex::Regex::new(r"bytes\s+(\d+)-(\d+)/(\d+)").expect("invalid RE");
let caps = re
.captures(content_range_header_value)
.expect("malformed Content-Range header");
caps.get(1)
.expect("missing range-begin")
.as_str()
.parse()
.expect("malformed range-begin")
let str_value = content_range_header_value.trim();
if !str_value.starts_with("bytes ") {
panic!(
"Invalid Content-Range header: {}",
content_range_header_value
);
}
let str_value = str_value.trim_start_matches("bytes ");

let str_value_parts = str_value.split('-').collect::<Vec<_>>();
if str_value_parts.len() != 2 {
panic!(
"Invalid bytes spec in Content-Range header: {}",
content_range_header_value
);
}
let range_begin = str_value_parts[0].parse::<usize>().expect(&format!(
"Invalid range_begin in: {}",
content_range_header_value
));
// Note: skipping the check whether range_end and total_length are sane.
range_begin
}

thread_local! {
Expand Down
1 change: 0 additions & 1 deletion packages/ic-http-gateway/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ http.workspace = true
http-body.workspace = true
http-body-util.workspace = true
bytes.workspace = true
regex.workspace = true

ic-agent.workspace = true
ic-utils.workspace = true
Expand Down
89 changes: 56 additions & 33 deletions packages/ic-http-gateway/src/response/response_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ use ic_utils::{
StreamingCallbackHttpResponse, StreamingStrategy, Token,
},
};
use regex::Regex;

// Limit the total number of calls to an HTTP Request loop to 1000 for now.
static MAX_HTTP_REQUEST_STREAM_CALLBACK_CALL_COUNT: usize = 1000;
Expand Down Expand Up @@ -175,44 +174,68 @@ struct ContentRangeValues {
pub total_length: usize,
}

fn parse_content_range_header_str(str_value: &str) -> Result<ContentRangeValues, AgentError> {
fn parse_content_range_header_str(
content_range_str: &str,
) -> Result<ContentRangeValues, AgentError> {
// expected format: `bytes 21010-47021/47022`
let re = Regex::new(r"bytes\s+(\d+)-(\d+)/(\d+)").unwrap();
let Some(caps) = re.captures(str_value) else {
return Err(AgentError::InvalidHttpResponse(
"malformed Content-Range header".to_string(),
));
};
let range_begin: usize = caps
.get(1)
.ok_or_else(|| AgentError::InvalidHttpResponse("missing range-begin".to_string()))?
.as_str()
.parse()
.map_err(|_| AgentError::InvalidHttpResponse("malformed range-begin".to_string()))?;
let range_end: usize = caps
.get(2)
.ok_or_else(|| AgentError::InvalidHttpResponse("missing range-end".to_string()))?
.as_str()
.parse()
.map_err(|_| AgentError::InvalidHttpResponse("malformed range-end".to_string()))?;
let total_length: usize = caps
.get(3)
.ok_or_else(|| AgentError::InvalidHttpResponse("missing size".to_string()))?
.as_str()
.parse()
.map_err(|_| AgentError::InvalidHttpResponse("malformed size".to_string()))?;
let range_values = ContentRangeValues {
let str_value = content_range_str.trim();
if !str_value.starts_with("bytes ") {
return Err(AgentError::InvalidHttpResponse(format!(
"Invalid Content-Range header '{}'",
content_range_str
)));
}
let str_value = str_value.trim_start_matches("bytes ");

let str_value_parts = str_value.split('-').collect::<Vec<_>>();
if str_value_parts.len() != 2 {
return Err(AgentError::InvalidHttpResponse(format!(
"Invalid bytes spec in Content-Range header '{}'",
content_range_str
)));
}
let range_begin = str_value_parts[0].parse::<usize>().map_err(|e| {
AgentError::InvalidHttpResponse(format!(
"Invalid range_begin in '{}': {}",
content_range_str, e
))
})?;

let other_value_parts = str_value_parts[1].split('/').collect::<Vec<_>>();
if other_value_parts.len() != 2 {
return Err(AgentError::InvalidHttpResponse(format!(
"Invalid bytes spec in Content-Range header '{}'",
content_range_str
)));
}
let range_end = other_value_parts[0].parse::<usize>().map_err(|e| {
AgentError::InvalidHttpResponse(format!(
"Invalid range_end in '{}': {}",
content_range_str, e
))
})?;
let total_length = other_value_parts[1].parse::<usize>().map_err(|e| {
AgentError::InvalidHttpResponse(format!(
"Invalid total_length in '{}': {}",
content_range_str, e
))
})?;

let rv = ContentRangeValues {
range_begin,
range_end,
total_length,
};
if range_begin > range_end || range_begin >= total_length || range_end >= total_length {
if rv.range_begin > rv.range_end
|| rv.range_begin >= rv.total_length
|| rv.range_end >= rv.total_length
{
Err(AgentError::InvalidHttpResponse(format!(
"inconsistent Content-Range header {:?}",
range_values
"inconsistent Content-Range header {}: {:?}",
content_range_str, rv
)))
} else {
Ok(range_values)
Ok(rv)
}
}

Expand Down Expand Up @@ -428,7 +451,7 @@ mod tests {
];
for input in malformed_inputs {
let result = parse_content_range_header_str(input);
assert_matches!(result, Err(e) if format!("{}", e).contains("malformed Content-Range header"));
assert_matches!(result, Err(e) if format!("{}", e).contains("Invalid "));
}
}

Expand Down Expand Up @@ -494,7 +517,7 @@ mod tests {
Cow::from("bytes 42/10"),
)];
let result = get_initial_stream_state(http_request, canister_id, &response_headers, false);
assert_matches!(result, Err(e) if format!("{}", e).contains("malformed Content-Range header"));
assert_matches!(result, Err(e) if format!("{}", e).contains("Invalid bytes spec in Content-Range header"));
}

#[test]
Expand Down

0 comments on commit 1de90ba

Please sign in to comment.