Skip to content

Commit

Permalink
do not indent links based on depth in the output
Browse files Browse the repository at this point in the history
  • Loading branch information
snshn committed May 17, 2024
1 parent 6798cad commit 87eb197
Show file tree
Hide file tree
Showing 13 changed files with 68 additions and 216 deletions.
35 changes: 3 additions & 32 deletions src/css.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ pub fn embed_css(
document_url: &Url,
css: &str,
options: &Options,
depth: u32,
) -> String {
let mut input = ParserInput::new(&css);
let mut parser = Parser::new(&mut input);
Expand All @@ -47,7 +46,6 @@ pub fn embed_css(
document_url,
&mut parser,
options,
depth,
"",
"",
"",
Expand Down Expand Up @@ -81,7 +79,6 @@ pub fn process_css<'a>(
document_url: &Url,
parser: &mut Parser,
options: &Options,
depth: u32,
rule_name: &str,
prop_name: &str,
func_name: &str,
Expand Down Expand Up @@ -135,7 +132,6 @@ pub fn process_css<'a>(
document_url,
parser,
options,
depth,
rule_name,
curr_prop.as_str(),
func_name,
Expand Down Expand Up @@ -190,14 +186,7 @@ pub fn process_css<'a>(
}

let import_full_url: Url = resolve_url(&document_url, value);
match retrieve_asset(
cache,
client,
&document_url,
&import_full_url,
options,
depth + 1,
) {
match retrieve_asset(cache, client, &document_url, &import_full_url, options) {
Ok((
import_contents,
import_final_url,
Expand All @@ -213,7 +202,6 @@ pub fn process_css<'a>(
&import_final_url,
&String::from_utf8_lossy(&import_contents),
options,
depth + 1,
)
.as_bytes(),
&import_final_url,
Expand Down Expand Up @@ -251,7 +239,6 @@ pub fn process_css<'a>(
&document_url,
&resolved_url,
options,
depth + 1,
) {
Ok((data, final_url, media_type, charset)) => {
let mut data_url =
Expand Down Expand Up @@ -341,14 +328,7 @@ pub fn process_css<'a>(
result.push_str("url(");
if is_import {
let full_url: Url = resolve_url(&document_url, value);
match retrieve_asset(
cache,
client,
&document_url,
&full_url,
options,
depth + 1,
) {
match retrieve_asset(cache, client, &document_url, &full_url, options) {
Ok((css, final_url, media_type, charset)) => {
let mut data_url = create_data_url(
&media_type,
Expand All @@ -359,7 +339,6 @@ pub fn process_css<'a>(
&final_url,
&String::from_utf8_lossy(&css),
options,
depth + 1,
)
.as_bytes(),
&final_url,
Expand All @@ -380,14 +359,7 @@ pub fn process_css<'a>(
result.push_str(format_quoted_string(EMPTY_IMAGE_DATA_URL).as_str());
} else {
let full_url: Url = resolve_url(&document_url, value);
match retrieve_asset(
cache,
client,
&document_url,
&full_url,
options,
depth + 1,
) {
match retrieve_asset(cache, client, &document_url, &full_url, options) {
Ok((data, final_url, media_type, charset)) => {
let mut data_url =
create_data_url(&media_type, &charset, &data, &final_url);
Expand Down Expand Up @@ -423,7 +395,6 @@ pub fn process_css<'a>(
document_url,
parser,
options,
depth,
curr_rule.as_str(),
curr_prop.as_str(),
function_name,
Expand Down
63 changes: 8 additions & 55 deletions src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,6 @@ pub fn embed_srcset(
document_url: &Url,
srcset: &str,
options: &Options,
depth: u32,
) -> String {
let mut array: Vec<SrcSetItem> = vec![];
let re = Regex::new(r",\s+").unwrap();
Expand All @@ -186,14 +185,7 @@ pub fn embed_srcset(
result.push_str(EMPTY_IMAGE_DATA_URL);
} else {
let image_full_url: Url = resolve_url(&document_url, part.path);
match retrieve_asset(
cache,
client,
&document_url,
&image_full_url,
options,
depth + 1,
) {
match retrieve_asset(cache, client, &document_url, &image_full_url, options) {
Ok((image_data, image_final_url, image_media_type, image_charset)) => {
let mut image_data_url = create_data_url(
&image_media_type,
Expand Down Expand Up @@ -611,18 +603,10 @@ pub fn retrieve_and_embed_asset(
attr_name: &str,
attr_value: &str,
options: &Options,
depth: u32,
) {
let resolved_url: Url = resolve_url(document_url, attr_value);

match retrieve_asset(
cache,
client,
&document_url.clone(),
&resolved_url,
options,
depth + 1,
) {
match retrieve_asset(cache, client, &document_url.clone(), &resolved_url, options) {
Ok((data, final_url, mut media_type, charset)) => {
let node_name: &str = get_node_name(&node).unwrap();

Expand Down Expand Up @@ -651,7 +635,7 @@ pub fn retrieve_and_embed_asset(

if node_name == "link" && determine_link_node_type(node) == "stylesheet" {
// Stylesheet LINK elements require special treatment
let css: String = embed_css(cache, client, &final_url, &s, options, depth + 1);
let css: String = embed_css(cache, client, &final_url, &s, options);

// Create and embed data URL
let css_data_url =
Expand All @@ -660,14 +644,7 @@ pub fn retrieve_and_embed_asset(
} else if node_name == "frame" || node_name == "iframe" {
// (I)FRAMEs are also quite different from conventional resources
let frame_dom = html_to_dom(&data, charset.clone());
walk_and_embed_assets(
cache,
client,
&final_url,
&frame_dom.document,
&options,
depth + 1,
);
walk_and_embed_assets(cache, client, &final_url, &frame_dom.document, &options);

let mut frame_data: Vec<u8> = Vec::new();
serialize(
Expand Down Expand Up @@ -722,13 +699,12 @@ pub fn walk_and_embed_assets(
document_url: &Url,
node: &Handle,
options: &Options,
depth: u32,
) {
match node.data {
NodeData::Document => {
// Dig deeper
for child in node.children.borrow().iter() {
walk_and_embed_assets(cache, client, &document_url, child, options, depth);
walk_and_embed_assets(cache, client, &document_url, child, options);
}
}
NodeData::Element {
Expand Down Expand Up @@ -763,7 +739,6 @@ pub fn walk_and_embed_assets(
"href",
&link_attr_href_value,
options,
depth,
);
} else {
set_node_attr(node, "href", None);
Expand All @@ -786,7 +761,6 @@ pub fn walk_and_embed_assets(
"href",
&link_attr_href_value,
options,
depth,
);
}
}
Expand Down Expand Up @@ -828,7 +802,6 @@ pub fn walk_and_embed_assets(
"background",
&body_attr_background_value,
options,
depth,
);
}
}
Expand Down Expand Up @@ -874,22 +847,15 @@ pub fn walk_and_embed_assets(
"src",
&img_full_url,
options,
depth,
);
}
}

// Resolve srcset attribute
if let Some(img_srcset) = get_node_attr(node, "srcset") {
if !img_srcset.is_empty() {
let resolved_srcset: String = embed_srcset(
cache,
client,
&document_url,
&img_srcset,
options,
depth,
);
let resolved_srcset: String =
embed_srcset(cache, client, &document_url, &img_srcset, options);
set_node_attr(node, "srcset", Some(resolved_srcset));
}
}
Expand Down Expand Up @@ -919,7 +885,6 @@ pub fn walk_and_embed_assets(
"src",
&input_attr_src_value,
options,
depth,
);
}
}
Expand Down Expand Up @@ -952,7 +917,6 @@ pub fn walk_and_embed_assets(
"href",
&image_href,
options,
depth,
);
}
}
Expand All @@ -973,7 +937,6 @@ pub fn walk_and_embed_assets(
"src",
&source_attr_src_value,
options,
depth,
);
}
} else if parent_node_name == "video" {
Expand All @@ -988,7 +951,6 @@ pub fn walk_and_embed_assets(
"src",
&source_attr_src_value,
options,
depth,
);
}
}
Expand All @@ -1010,7 +972,6 @@ pub fn walk_and_embed_assets(
&document_url,
&source_attr_srcset_value,
options,
depth,
);
set_node_attr(node, "srcset", Some(resolved_srcset));
}
Expand Down Expand Up @@ -1063,7 +1024,6 @@ pub fn walk_and_embed_assets(
"src",
&script_attr_src.unwrap_or_default(),
options,
depth,
);
}
}
Expand All @@ -1081,7 +1041,6 @@ pub fn walk_and_embed_assets(
&document_url,
tendril.as_ref(),
options,
depth,
);
tendril.clear();
tendril.push_slice(&replacement);
Expand Down Expand Up @@ -1113,7 +1072,6 @@ pub fn walk_and_embed_assets(
"src",
&frame_attr_src_value,
options,
depth,
);
}
}
Expand All @@ -1133,7 +1091,6 @@ pub fn walk_and_embed_assets(
"src",
&audio_attr_src_value,
options,
depth,
);
}
}
Expand All @@ -1152,7 +1109,6 @@ pub fn walk_and_embed_assets(
"src",
&video_attr_src_value,
options,
depth,
);
}
}
Expand All @@ -1176,7 +1132,6 @@ pub fn walk_and_embed_assets(
"poster",
&video_attr_poster_value,
options,
depth,
);
}
}
Expand All @@ -1200,7 +1155,6 @@ pub fn walk_and_embed_assets(
&document_url,
&noscript_contents_dom.document,
&options,
depth,
);
// Get rid of original contents
noscript_contents.clear();
Expand Down Expand Up @@ -1241,7 +1195,6 @@ pub fn walk_and_embed_assets(
&document_url,
&node_attr_style_value,
options,
depth,
);
set_node_attr(node, "style", Some(embedded_style));
}
Expand All @@ -1265,7 +1218,7 @@ pub fn walk_and_embed_assets(

// Dig deeper
for child in node.children.borrow().iter() {
walk_and_embed_assets(cache, client, &document_url, child, options, depth);
walk_and_embed_assets(cache, client, &document_url, child, options);
}
}
_ => {
Expand Down
13 changes: 3 additions & 10 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ fn main() {
|| (target_url.scheme() == "http" || target_url.scheme() == "https")
|| target_url.scheme() == "data"
{
match retrieve_asset(&mut cache, &client, &target_url, &target_url, &options, 0) {
match retrieve_asset(&mut cache, &client, &target_url, &target_url, &options) {
Ok((retrieved_data, final_url, media_type, charset)) => {
// Provide output as text without processing it, the way browsers do
if !media_type.eq_ignore_ascii_case("text/html")
Expand Down Expand Up @@ -306,7 +306,7 @@ fn main() {
}

// Traverse through the document and embed remote assets
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options);

// Update or add new BASE element to reroute network requests and hash-links
if let Some(new_base_url) = options.base_url.clone() {
Expand All @@ -320,14 +320,7 @@ fn main() {
{
let favicon_ico_url: Url = resolve_url(&base_url, "/favicon.ico");

match retrieve_asset(
&mut cache,
&client,
&target_url,
&favicon_ico_url,
&options,
0,
) {
match retrieve_asset(&mut cache, &client, &target_url, &favicon_ico_url, &options) {
Ok((data, final_url, media_type, charset)) => {
let favicon_data_url: Url =
create_data_url(&media_type, &charset, &data, &final_url);
Expand Down
Loading

0 comments on commit 87eb197

Please sign in to comment.