From 4a5f85ba97f8a688e13e2faaf29c758b1f5805ed Mon Sep 17 00:00:00 2001 From: Sunshine Date: Mon, 2 Dec 2024 20:23:11 -0100 Subject: [PATCH] add support for parsing multiple link type (rel) attribute values --- src/html.rs | 65 +++++++++++++++++++++-------------- tests/html/mod.rs | 1 + tests/html/parse_link_type.rs | 58 +++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+), 25 deletions(-) create mode 100644 tests/html/parse_link_type.rs diff --git a/src/html.rs b/src/html.rs index 6a80626b..2eb2e9d5 100644 --- a/src/html.rs +++ b/src/html.rs @@ -23,6 +23,15 @@ use crate::url::{ }; use crate::utils::{parse_content_type, retrieve_asset}; +#[derive(PartialEq, Eq)] +pub enum LinkType { + Alternate, + DnsPrefetch, + Icon, + Preload, + Stylesheet, +} + struct SrcSetItem<'a> { path: &'a str, descriptor: &'a str, @@ -141,26 +150,6 @@ pub fn create_metadata_tag(url: &Url) -> String { ) } -pub fn determine_link_node_type(node: &Handle) -> &str { - let mut link_type: &str = "unknown"; - - if let Some(link_attr_rel_value) = get_node_attr(node, "rel") { - if is_icon(&link_attr_rel_value) { - link_type = "icon"; - } else if link_attr_rel_value.eq_ignore_ascii_case("stylesheet") - || link_attr_rel_value.eq_ignore_ascii_case("alternate stylesheet") - { - link_type = "stylesheet"; - } else if link_attr_rel_value.eq_ignore_ascii_case("preload") { - link_type = "preload"; - } else if link_attr_rel_value.eq_ignore_ascii_case("dns-prefetch") { - link_type = "dns-prefetch"; - } - } - - link_type -} - pub fn embed_srcset( cache: &mut HashMap>, client: &Client, @@ -454,6 +443,26 @@ pub fn is_icon(attr_value: &str) -> bool { ICON_VALUES.contains(&attr_value.to_lowercase().as_str()) } +pub fn parse_link_type(link_attr_rel_value: &str) -> Vec { + let mut types: Vec = vec![]; + + for link_attr_rel_type in link_attr_rel_value.split_whitespace() { + if link_attr_rel_type.eq_ignore_ascii_case("alternate") { + types.push(LinkType::Alternate); + } else if link_attr_rel_type.eq_ignore_ascii_case("dns-prefetch") { + types.push(LinkType::DnsPrefetch); + } else if link_attr_rel_type.eq_ignore_ascii_case("preload") { + types.push(LinkType::Preload); + } else if link_attr_rel_type.eq_ignore_ascii_case("stylesheet") { + types.push(LinkType::Stylesheet); + } else if is_icon(&link_attr_rel_type) { + types.push(LinkType::Icon); + } + } + + types +} + pub fn set_base_url(document: &Handle, desired_base_href: String) -> RcDom { let mut buf: Vec = Vec::new(); serialize( @@ -665,7 +674,10 @@ pub fn retrieve_and_embed_asset( s = String::from_utf8_lossy(&data).to_string(); } - if node_name == "link" && determine_link_node_type(node) == "stylesheet" { + if node_name == "link" + && parse_link_type(&get_node_attr(node, "rel").unwrap_or(String::from(""))) + .contains(&LinkType::Stylesheet) + { // Stylesheet LINK elements require special treatment let css: String = embed_css(cache, client, &final_url, &s, options); @@ -757,9 +769,10 @@ pub fn walk_and_embed_assets( } } "link" => { - let link_type: &str = determine_link_node_type(node); + let link_node_types: Vec = + parse_link_type(&get_node_attr(node, "rel").unwrap_or(String::from(""))); - if link_type == "icon" { + if link_node_types.contains(&LinkType::Icon) { // Find and resolve LINK's href attribute if let Some(link_attr_href_value) = get_node_attr(node, "href") { if !options.no_images && !link_attr_href_value.is_empty() { @@ -776,7 +789,7 @@ pub fn walk_and_embed_assets( set_node_attr(node, "href", None); } } - } else if link_type == "stylesheet" { + } else if link_node_types.contains(&LinkType::Stylesheet) { // Resolve LINK's href attribute if let Some(link_attr_href_value) = get_node_attr(node, "href") { if options.no_css { @@ -797,7 +810,9 @@ pub fn walk_and_embed_assets( } } } - } else if link_type == "preload" || link_type == "dns-prefetch" { + } else if link_node_types.contains(&LinkType::Preload) + || link_node_types.contains(&LinkType::DnsPrefetch) + { // Since all resources are embedded as data URLs, preloading and prefetching are not necessary set_node_attr(node, "rel", None); } else { diff --git a/tests/html/mod.rs b/tests/html/mod.rs index 955d28de..91e4354c 100644 --- a/tests/html/mod.rs +++ b/tests/html/mod.rs @@ -9,6 +9,7 @@ mod get_node_attr; mod get_node_name; mod has_favicon; mod is_icon; +mod parse_link_type; mod serialize_document; mod set_node_attr; mod walk_and_embed_assets; diff --git a/tests/html/parse_link_type.rs b/tests/html/parse_link_type.rs new file mode 100644 index 00000000..230b427b --- /dev/null +++ b/tests/html/parse_link_type.rs @@ -0,0 +1,58 @@ +// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ +// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ +// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ +// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ +// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ +// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +#[cfg(test)] +mod passing { + use monolith::html; + + #[test] + fn icon() { + assert!(html::parse_link_type("icon").contains(&html::LinkType::Icon)); + } + + #[test] + fn shortcut_icon_capitalized() { + assert!(html::parse_link_type("Shortcut Icon").contains(&html::LinkType::Icon)); + } + + #[test] + fn stylesheet() { + assert!(html::parse_link_type("stylesheet").contains(&html::LinkType::Stylesheet)); + } + + #[test] + fn preload_stylesheet() { + assert!(html::parse_link_type("preload stylesheet").contains(&html::LinkType::Stylesheet)); + } +} + +// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ +// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ +// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ +// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ +// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ +// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +#[cfg(test)] +mod failing { + use monolith::html; + + #[test] + fn mask_icon() { + assert!(html::parse_link_type("mask-icon").is_empty()); + } + + #[test] + fn fluid_icon() { + assert!(html::parse_link_type("fluid-icon").is_empty()); + } + + #[test] + fn empty_string() { + assert!(html::parse_link_type("").is_empty()); + } +}