diff --git a/README.md b/README.md index 3d1198d7..edf934a1 100644 --- a/README.md +++ b/README.md @@ -119,9 +119,10 @@ cat index.html | monolith -aIiFfcMv -b https://original.site/ - > result.html - `-b`: Use custom `base URL` - `-B`: Forbid retrieving assets from specified domain(s) - `-c`: Exclude CSS - - `-C`: Save document using custom `charset` + - `-C`: Read cookies from `file` - `-d`: Allow retrieving assets only from specified `domain(s)` - `-e`: Ignore network errors + - `-E`: Save document using custom `encoding` - `-f`: Omit frames - `-F`: Exclude web fonts - `-i`: Remove images diff --git a/src/cookies.rs b/src/cookies.rs new file mode 100644 index 00000000..f961aa04 --- /dev/null +++ b/src/cookies.rs @@ -0,0 +1,55 @@ +use std::time::{SystemTime, UNIX_EPOCH}; + +pub struct Cookie { + pub domain: String, + pub tailmatch: bool, + pub path: String, + pub secure: bool, + pub expires: u64, + pub name: String, + pub value: String, +} + +pub enum CookieFileContentsParseError { + InvalidHeader, +} + +impl Cookie { + pub fn is_expired(&self) -> bool { + let start = SystemTime::now(); + let since_the_epoch = start + .duration_since(UNIX_EPOCH) + .expect("Time went backwards"); + + self.expires < since_the_epoch.as_secs() + } +} + +pub fn parse_cookie_file_contents( + cookie_file_contents: &str, +) -> Result, CookieFileContentsParseError> { + let mut cookies: Vec = Vec::new(); + + for (i, line) in cookie_file_contents.lines().enumerate() { + if i == 0 { + if !line.eq_ignore_ascii_case("# HTTP Cookie File") + && !line.eq_ignore_ascii_case("# Netscape HTTP Cookie File") + { + return Err(CookieFileContentsParseError::InvalidHeader); + } + } else { + let mut fields = line.split("\t"); + cookies.push(Cookie { + domain: fields.next().unwrap().to_string(), + tailmatch: fields.next().unwrap().to_string() == "TRUE", + path: fields.next().unwrap().to_string(), + secure: fields.next().unwrap().to_string() == "TRUE", + expires: fields.next().unwrap().parse::().unwrap(), + name: fields.next().unwrap().to_string(), + value: fields.next().unwrap().to_string(), + }); + } + } + + Ok(cookies) +} diff --git a/src/lib.rs b/src/lib.rs index 57cd530f..460d3888 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +pub mod cookies; pub mod css; pub mod html; pub mod js; diff --git a/src/main.rs b/src/main.rs index 0517d57c..aecbb741 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,6 +10,7 @@ use std::process; use std::time::Duration; use url::Url; +use monolith::cookies::parse_cookie_file_contents; use monolith::html::{ add_favicon, create_metadata_tag, get_base_url, get_charset, has_favicon, html_to_dom, serialize_document, set_base_url, set_charset, walk_and_embed_assets, @@ -74,10 +75,10 @@ fn main() { process::exit(1); } - // Check if custom charset is valid - if let Some(custom_charset) = options.charset.clone() { - if !Encoding::for_label_no_replacement(custom_charset.as_bytes()).is_some() { - eprintln!("Unknown encoding: {}", &custom_charset); + // Check if custom encoding is valid + if let Some(custom_encoding) = options.encoding.clone() { + if !Encoding::for_label_no_replacement(custom_encoding.as_bytes()).is_some() { + eprintln!("Unknown encoding: {}", &custom_encoding); process::exit(1); } } @@ -139,6 +140,31 @@ fn main() { }, }; + // Deal with cookie file + if let Some(opt_cookies) = options.cookies.clone() { + match std::fs::read_to_string(opt_cookies) { + Ok(str) => match parse_cookie_file_contents(&str) { + Ok(cookies) => { + for c in &cookies { + println!( + "{} {} {} {} {} {} {}", + c.domain, c.tailmatch, c.path, c.secure, c.expires, c.name, c.value + ); + println!("^ is expired: {}", c.is_expired()); + } + } + Err(_) => { + eprintln!("Could not parse specified cookie file"); + process::exit(1); + } + }, + Err(_) => { + eprintln!("Could not read specified cookie file"); + process::exit(1); + } + } + } + // Initialize client let mut cache = HashMap::new(); let mut header_map = HeaderMap::new(); @@ -315,8 +341,8 @@ fn main() { } // Save using specified charset, if given - if let Some(custom_charset) = options.charset.clone() { - document_encoding = custom_charset; + if let Some(custom_encoding) = options.encoding.clone() { + document_encoding = custom_encoding; dom = set_charset(dom, document_encoding.clone()); } diff --git a/src/opts.rs b/src/opts.rs index bee5d86c..d54f9f1d 100644 --- a/src/opts.rs +++ b/src/opts.rs @@ -7,9 +7,10 @@ pub struct Options { pub base_url: Option, pub blacklist_domains: bool, pub no_css: bool, - pub charset: Option, + pub cookies: Option, pub domains: Option>, pub ignore_errors: bool, + pub encoding: Option, pub no_frames: bool, pub no_fonts: bool, pub no_images: bool, @@ -48,13 +49,13 @@ impl Options { .version(env!("CARGO_PKG_VERSION")) .author(format!("\n{}\n\n", env!("CARGO_PKG_AUTHORS").replace(':', "\n")).as_str()) .about(format!("{}\n{}", ASCII, env!("CARGO_PKG_DESCRIPTION")).as_str()) - .args_from_usage("-a, --no-audio 'Removes audio sources'") - .args_from_usage("-b, --base-url=[http://localhost/] 'Sets custom base URL'") + .args_from_usage("-a, --no-audio 'Remove audio sources'") + .args_from_usage("-b, --base-url=[http://localhost/] 'Set custom base URL'") .args_from_usage( "-B, --blacklist-domains 'Treat list of specified domains as blacklist'", ) - .args_from_usage("-c, --no-css 'Removes CSS'") - .args_from_usage("-C, --charset=[UTF-8] 'Enforces custom encoding'") + .args_from_usage("-c, --no-css 'Remove CSS'") + .args_from_usage("-C, --cookies=[cookies.txt] 'Specify cookie file'") .arg( Arg::with_name("domains") .short('d') @@ -65,23 +66,24 @@ impl Options { .help("Specify domains to use for white/black-listing"), ) .args_from_usage("-e, --ignore-errors 'Ignore network errors'") - .args_from_usage("-f, --no-frames 'Removes frames and iframes'") - .args_from_usage("-F, --no-fonts 'Removes fonts'") - .args_from_usage("-i, --no-images 'Removes images'") - .args_from_usage("-I, --isolate 'Cuts off document from the Internet'") - .args_from_usage("-j, --no-js 'Removes JavaScript'") - .args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'") - .args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'") + .args_from_usage("-E, --encoding=[UTF-8] 'Enforce custom charset'") + .args_from_usage("-f, --no-frames 'Remove frames and iframes'") + .args_from_usage("-F, --no-fonts 'Remove fonts'") + .args_from_usage("-i, --no-images 'Remove images'") + .args_from_usage("-I, --isolate 'Cut off document from the Internet'") + .args_from_usage("-j, --no-js 'Remove JavaScript'") + .args_from_usage("-k, --insecure 'Allow invalid X.509 (TLS) certificates'") + .args_from_usage("-M, --no-metadata 'Exclude timestamp and source information'") .args_from_usage( - "-n, --unwrap-noscript 'Replaces NOSCRIPT elements with their contents'", + "-n, --unwrap-noscript 'Replace NOSCRIPT elements with their contents'", ) .args_from_usage( - "-o, --output=[document.html] 'Writes output to , use - for STDOUT'", + "-o, --output=[document.html] 'Write output to , use - for STDOUT'", ) - .args_from_usage("-s, --silent 'Suppresses verbosity'") - .args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'") - .args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'") - .args_from_usage("-v, --no-video 'Removes video sources'") + .args_from_usage("-s, --silent 'Suppress verbosity'") + .args_from_usage("-t, --timeout=[60] 'Adjust network request timeout'") + .args_from_usage("-u, --user-agent=[Firefox] 'Set custom User-Agent string'") + .args_from_usage("-v, --no-video 'Remove video sources'") .arg( Arg::with_name("target") .required(true) @@ -103,8 +105,8 @@ impl Options { } options.blacklist_domains = app.is_present("blacklist-domains"); options.no_css = app.is_present("no-css"); - if let Some(charset) = app.value_of("charset") { - options.charset = Some(charset.to_string()); + if let Some(encoding) = app.value_of("encoding") { + options.encoding = Some(encoding.to_string()); } if let Some(domains) = app.get_many::("domains") { let list_of_domains: Vec = domains.map(|v| v.clone()).collect::>(); @@ -119,6 +121,9 @@ impl Options { options.insecure = app.is_present("insecure"); options.no_metadata = app.is_present("no-metadata"); options.output = app.value_of("output").unwrap_or("").to_string(); + if let Some(cookies) = app.value_of("cookies") { + options.cookies = Some(cookies.to_string()); + } options.silent = app.is_present("silent"); options.timeout = app .value_of("timeout")