diff --git a/.gitignore b/.gitignore index c41cc9e..ba0edcf 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ -/target \ No newline at end of file +/target +*.pem +*.sh \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index ecfba8e..2819fd5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2187,7 +2187,7 @@ dependencies = [ [[package]] name = "wp" -version = "0.1.8" +version = "0.1.9" dependencies = [ "clap", "indicatif", diff --git a/Cargo.toml b/Cargo.toml index e12097d..356707c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "wp" license = "GPLv3" -version = "0.1.8" +version = "0.1.9" description = "Wikipedia tools on your terminal." edition = "2021" authors = ["Aniruddha Mukherjee "] diff --git a/src/core.rs b/src/core.rs index 856e009..a6a532f 100644 --- a/src/core.rs +++ b/src/core.rs @@ -34,11 +34,9 @@ fn remove_nested_braces(input: &str) -> String { let mut chars = input.chars().peekable(); while let Some(c) = chars.next() { if c == '{' && chars.peek() == Some(&'{') { - // Skip the next character since we found {{ chars.next(); stack += 1; } else if c == '}' && chars.peek() == Some(&'}') { - // Skip the next character since we found }} chars.next(); if stack > 0 { stack -= 1; @@ -95,22 +93,24 @@ fn parse_text(characters: &Vec) -> Option> { let mut current = 0; let mut tokens: Vec = Vec::new(); let mut is_bullet = false; - // const MAX_ITERATIONS = 1000000; - println!("Charatcers: {}", characters.len()); + // Very nutty, bad will have to do for now + const MAX_ITERATINS: i32 = 150000; + let mut iter_count = 0; - while current < characters.len() { + while current < characters.len() && iter_count < MAX_ITERATINS { + iter_count += 1; match characters[current] { '{' => { // Assuming we can only have three levels of nesting // This is some convoluted shit thanks to wikipedia's format:( current += 2; - while advance(characters, &mut current) != '}' { + while advance(characters, &mut current, &mut iter_count) != '}' { if characters[current] == '{' { current += 1; - while advance(characters, &mut current) != '}' { + while advance(characters, &mut current, &mut iter_count) != '}' { if characters[current] == '{' { current += 1; - while advance(characters, &mut current) != '}' {} + while advance(characters, &mut current, &mut iter_count) != '}' {} current += 1; } } @@ -123,7 +123,7 @@ fn parse_text(characters: &Vec) -> Option> { if peek_ahead(&characters, current) == '\'' { let mut apostrophe_count = 0; let mut format = FormatType::Bold; - while advance(&characters, &mut current) == '\'' { + while advance(&characters, &mut current, &mut iter_count) == '\'' { apostrophe_count += 1; } if apostrophe_count == 2 { @@ -141,7 +141,7 @@ fn parse_text(characters: &Vec) -> Option> { } start = current - 1; - while advance(&characters, &mut current) != '\'' {} + while advance(&characters, &mut current, &mut iter_count) != '\'' {} add_token(&mut tokens, start, current, format); current += apostrophe_count - 1; } else { @@ -156,10 +156,10 @@ fn parse_text(characters: &Vec) -> Option> { let mut has_pipe = false; current += 2; start = current; - while advance(characters, &mut current) != ']' { + while advance(characters, &mut current, &mut iter_count) != ']' { if characters[current] == '[' { has_nesting = true; - while advance(characters, &mut current) != ']' {} + while advance(characters, &mut current, &mut iter_count) != ']' {} current += 1; } else if characters[current] == '|' { has_pipe = true; @@ -177,14 +177,14 @@ fn parse_text(characters: &Vec) -> Option> { add_space(&mut tokens, current); current += 1; } - '<' => while advance(characters, &mut current) != '>' {}, + '<' => while advance(characters, &mut current, &mut iter_count) != '>' {}, '=' => { let mut equals_count = 0; - while advance(characters, &mut current) == '=' { + while advance(characters, &mut current, &mut iter_count) == '=' { equals_count += 1; } start = current - 1; - while advance(characters, &mut current) != '=' {} + while advance(characters, &mut current, &mut iter_count) != '=' {} match equals_count { 2 => { add_token(&mut tokens, start, current, FormatType::Title); @@ -208,7 +208,7 @@ fn parse_text(characters: &Vec) -> Option> { } else if characters[current] == '"' { current += 1; start = current; - while advance(characters, &mut current) != '\\' {} + while advance(characters, &mut current, &mut iter_count) != '\\' {} add_token(&mut tokens, start, current, FormatType::InlineQuote); } current += 1; diff --git a/src/main.rs b/src/main.rs index 71369a0..b53894e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -45,6 +45,7 @@ fn main() { let (plaintext, url_title) = plaintext_from_link(&link); if args.save { let mut hasher = DefaultHasher::new(); + save_to_disk(&plaintext, &url_title, &mut hasher, false); } else { output_to_stdout(&plaintext); diff --git a/src/scraper.rs b/src/scraper.rs index a34c272..8a29281 100644 --- a/src/scraper.rs +++ b/src/scraper.rs @@ -82,7 +82,6 @@ pub fn bulk_download_or_save_links( let url = Url::parse(start_url)?; let main_url = url.host_str().ok_or("Invalid URL")?; - println!("šŸ’­ Links will be saved to your current directory as zip."); println!("āš” Scraping links..."); let mut next_batch_link = start_url.to_string(); @@ -136,6 +135,10 @@ pub fn bulk_download_or_save_links( } else { // Download straight from the links! println!("āš” Proceeding with the downloads..."); + match fs::create_dir("wp_downloads") { + Ok(()) => println!("Directory created successfully"), + Err(err) => println!("Error creating directory: {}", err), + } let dir_path = Path::new("."); let files: Vec<_> = fs::read_dir(dir_path)? .filter_map(|entry| entry.ok()) @@ -144,6 +147,7 @@ pub fn bulk_download_or_save_links( && entry.path().extension().and_then(|s| s.to_str()) == Some("links") }) .collect(); + for each_file in files { let file_path = each_file.path(); match download_from_file(file_path.to_str().unwrap()) { diff --git a/src/utils.rs b/src/utils.rs index aba981c..07ff74a 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -11,12 +11,19 @@ use regex::Regex; use crate::{plaintext_from_link, FormatType, Token}; -pub fn advance(text: &Vec, current: &mut usize) -> char { - *current += 1; - if *current < text.len() { - return text[*current - 1]; +pub fn advance(text: &Vec, current: &mut usize, iter: &mut i32) -> char { + let max = 150000; + if *iter < max { + *iter += 1; + *current += 1; + if *current < text.len() { + return text[*current - 1]; + } + return '\0'; + } else { + panic!("Infinite loop"); + // exit(1); } - return '\0'; } pub fn add_token(tokens: &mut Vec, start: usize, current: usize, format: FormatType) { @@ -96,7 +103,13 @@ pub fn save_to_disk( article_title.hash(hasher); let hash = hasher.finish(); let hash = format!("{:x}.txt", hash); - let path = Path::new(&hash); + let file_path; + if is_bulk { + file_path = format!("./wp_downloads/{}", hash); + } else { + file_path = format!("{}", hash); + } + let path = Path::new(&file_path); let mut file = match File::create(&path) { Err(why) => panic!("Error: Couldn't create {}: {}", path.display(), why), @@ -138,7 +151,8 @@ pub fn download_from_file(link: &str) -> Option { ))); println!("šŸ” Total links found: {}", total_count); - println!("šŸ—ƒļø Downloading articles in bulk...\n"); + println!("šŸ—ƒļø Downloading articles in bulk in wp_downlods...\n"); + for link in list_of_links { let bar = Arc::clone(&bar); let handle = spawn(move || { @@ -149,10 +163,13 @@ pub fn download_from_file(link: &str) -> Option { }); handles.push(handle); } + // Just pure evil for handle in handles { - handle.join().unwrap(); + match handle.join() { + Ok(_) => (), + Err(err) => println!("Thread returned an error: {:?}", err), + } } - bar.lock().unwrap().finish_and_clear(); println!("\nāœ… Download complete.");