Skip to content

Commit

Permalink
add option for reading Netscape cookie file
Browse files Browse the repository at this point in the history
  • Loading branch information
Sunshine committed Nov 25, 2022
1 parent db04d11 commit 35e3b3e
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 27 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,10 @@ cat index.html | monolith -aIiFfcMv -b https://original.site/ - > result.html
- `-b`: Use custom `base URL`
- `-B`: Forbid retrieving assets from specified domain(s)
- `-c`: Exclude CSS
- `-C`: Save document using custom `charset`
- `-C`: Read cookies from `file`
- `-d`: Allow retrieving assets only from specified `domain(s)`
- `-e`: Ignore network errors
- `-E`: Save document using custom `encoding`
- `-f`: Omit frames
- `-F`: Exclude web fonts
- `-i`: Remove images
Expand Down
55 changes: 55 additions & 0 deletions src/cookies.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
use std::time::{SystemTime, UNIX_EPOCH};

pub struct Cookie {
pub domain: String,
pub tailmatch: bool,
pub path: String,
pub secure: bool,
pub expires: u64,
pub name: String,
pub value: String,
}

pub enum CookieFileContentsParseError {
InvalidHeader,
}

impl Cookie {
pub fn is_expired(&self) -> bool {
let start = SystemTime::now();
let since_the_epoch = start
.duration_since(UNIX_EPOCH)
.expect("Time went backwards");

self.expires < since_the_epoch.as_secs()
}
}

pub fn parse_cookie_file_contents(
cookie_file_contents: &str,
) -> Result<Vec<Cookie>, CookieFileContentsParseError> {
let mut cookies: Vec<Cookie> = Vec::new();

for (i, line) in cookie_file_contents.lines().enumerate() {
if i == 0 {
if !line.eq_ignore_ascii_case("# HTTP Cookie File")
&& !line.eq_ignore_ascii_case("# Netscape HTTP Cookie File")
{
return Err(CookieFileContentsParseError::InvalidHeader);
}
} else {
let mut fields = line.split("\t");
cookies.push(Cookie {
domain: fields.next().unwrap().to_string(),
tailmatch: fields.next().unwrap().to_string() == "TRUE",
path: fields.next().unwrap().to_string(),
secure: fields.next().unwrap().to_string() == "TRUE",
expires: fields.next().unwrap().parse::<u64>().unwrap(),
name: fields.next().unwrap().to_string(),
value: fields.next().unwrap().to_string(),
});
}
}

Ok(cookies)
}
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod cookies;
pub mod css;
pub mod html;
pub mod js;
Expand Down
38 changes: 32 additions & 6 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use std::process;
use std::time::Duration;
use url::Url;

use monolith::cookies::parse_cookie_file_contents;
use monolith::html::{
add_favicon, create_metadata_tag, get_base_url, get_charset, has_favicon, html_to_dom,
serialize_document, set_base_url, set_charset, walk_and_embed_assets,
Expand Down Expand Up @@ -74,10 +75,10 @@ fn main() {
process::exit(1);
}

// Check if custom charset is valid
if let Some(custom_charset) = options.charset.clone() {
if !Encoding::for_label_no_replacement(custom_charset.as_bytes()).is_some() {
eprintln!("Unknown encoding: {}", &custom_charset);
// Check if custom encoding is valid
if let Some(custom_encoding) = options.encoding.clone() {
if !Encoding::for_label_no_replacement(custom_encoding.as_bytes()).is_some() {
eprintln!("Unknown encoding: {}", &custom_encoding);
process::exit(1);
}
}
Expand Down Expand Up @@ -139,6 +140,31 @@ fn main() {
},
};

// Deal with cookie file
if let Some(opt_cookies) = options.cookies.clone() {
match std::fs::read_to_string(opt_cookies) {
Ok(str) => match parse_cookie_file_contents(&str) {
Ok(cookies) => {
for c in &cookies {
println!(
"{} {} {} {} {} {} {}",
c.domain, c.tailmatch, c.path, c.secure, c.expires, c.name, c.value
);
println!("^ is expired: {}", c.is_expired());
}
}
Err(_) => {
eprintln!("Could not parse specified cookie file");
process::exit(1);
}
},
Err(_) => {
eprintln!("Could not read specified cookie file");
process::exit(1);
}
}
}

// Initialize client
let mut cache = HashMap::new();
let mut header_map = HeaderMap::new();
Expand Down Expand Up @@ -315,8 +341,8 @@ fn main() {
}

// Save using specified charset, if given
if let Some(custom_charset) = options.charset.clone() {
document_encoding = custom_charset;
if let Some(custom_encoding) = options.encoding.clone() {
document_encoding = custom_encoding;
dom = set_charset(dom, document_encoding.clone());
}

Expand Down
45 changes: 25 additions & 20 deletions src/opts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ pub struct Options {
pub base_url: Option<String>,
pub blacklist_domains: bool,
pub no_css: bool,
pub charset: Option<String>,
pub cookies: Option<String>,
pub domains: Option<Vec<String>>,
pub ignore_errors: bool,
pub encoding: Option<String>,
pub no_frames: bool,
pub no_fonts: bool,
pub no_images: bool,
Expand Down Expand Up @@ -48,13 +49,13 @@ impl Options {
.version(env!("CARGO_PKG_VERSION"))
.author(format!("\n{}\n\n", env!("CARGO_PKG_AUTHORS").replace(':', "\n")).as_str())
.about(format!("{}\n{}", ASCII, env!("CARGO_PKG_DESCRIPTION")).as_str())
.args_from_usage("-a, --no-audio 'Removes audio sources'")
.args_from_usage("-b, --base-url=[http://localhost/] 'Sets custom base URL'")
.args_from_usage("-a, --no-audio 'Remove audio sources'")
.args_from_usage("-b, --base-url=[http://localhost/] 'Set custom base URL'")
.args_from_usage(
"-B, --blacklist-domains 'Treat list of specified domains as blacklist'",
)
.args_from_usage("-c, --no-css 'Removes CSS'")
.args_from_usage("-C, --charset=[UTF-8] 'Enforces custom encoding'")
.args_from_usage("-c, --no-css 'Remove CSS'")
.args_from_usage("-C, --cookies=[cookies.txt] 'Specify cookie file'")
.arg(
Arg::with_name("domains")
.short('d')
Expand All @@ -65,23 +66,24 @@ impl Options {
.help("Specify domains to use for white/black-listing"),
)
.args_from_usage("-e, --ignore-errors 'Ignore network errors'")
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
.args_from_usage("-F, --no-fonts 'Removes fonts'")
.args_from_usage("-i, --no-images 'Removes images'")
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
.args_from_usage("-j, --no-js 'Removes JavaScript'")
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
.args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'")
.args_from_usage("-E, --encoding=[UTF-8] 'Enforce custom charset'")
.args_from_usage("-f, --no-frames 'Remove frames and iframes'")
.args_from_usage("-F, --no-fonts 'Remove fonts'")
.args_from_usage("-i, --no-images 'Remove images'")
.args_from_usage("-I, --isolate 'Cut off document from the Internet'")
.args_from_usage("-j, --no-js 'Remove JavaScript'")
.args_from_usage("-k, --insecure 'Allow invalid X.509 (TLS) certificates'")
.args_from_usage("-M, --no-metadata 'Exclude timestamp and source information'")
.args_from_usage(
"-n, --unwrap-noscript 'Replaces NOSCRIPT elements with their contents'",
"-n, --unwrap-noscript 'Replace NOSCRIPT elements with their contents'",
)
.args_from_usage(
"-o, --output=[document.html] 'Writes output to <file>, use - for STDOUT'",
"-o, --output=[document.html] 'Write output to <file>, use - for STDOUT'",
)
.args_from_usage("-s, --silent 'Suppresses verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
.args_from_usage("-v, --no-video 'Removes video sources'")
.args_from_usage("-s, --silent 'Suppress verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjust network request timeout'")
.args_from_usage("-u, --user-agent=[Firefox] 'Set custom User-Agent string'")
.args_from_usage("-v, --no-video 'Remove video sources'")
.arg(
Arg::with_name("target")
.required(true)
Expand All @@ -103,8 +105,8 @@ impl Options {
}
options.blacklist_domains = app.is_present("blacklist-domains");
options.no_css = app.is_present("no-css");
if let Some(charset) = app.value_of("charset") {
options.charset = Some(charset.to_string());
if let Some(encoding) = app.value_of("encoding") {
options.encoding = Some(encoding.to_string());
}
if let Some(domains) = app.get_many::<String>("domains") {
let list_of_domains: Vec<String> = domains.map(|v| v.clone()).collect::<Vec<_>>();
Expand All @@ -119,6 +121,9 @@ impl Options {
options.insecure = app.is_present("insecure");
options.no_metadata = app.is_present("no-metadata");
options.output = app.value_of("output").unwrap_or("").to_string();
if let Some(cookies) = app.value_of("cookies") {
options.cookies = Some(cookies.to_string());
}
options.silent = app.is_present("silent");
options.timeout = app
.value_of("timeout")
Expand Down

0 comments on commit 35e3b3e

Please sign in to comment.