Skip to content

Commit

Permalink
password extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
domai-tb committed Aug 15, 2024
1 parent 87b0dbc commit 0074fe7
Show file tree
Hide file tree
Showing 4 changed files with 276 additions and 1 deletion.
12 changes: 12 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[package]
name = "leakh"
version = "0.1.0"
edition = "2021"

[dependencies]
serde = "1.0"
serde_derive = "1.0"
toml = "0.5"
regex = "1.8"
clap = "4.3.10"
crossbeam = "0.8.4"
52 changes: 51 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,52 @@
# leakh
Small utility and helper command to handle password leakage files.

leakh is a multi-threaded command line utility and helper tool to handle password leakage files.

It uses regular expressions to extract passwords from `.txt` or `.csv` files. Each file, inside the given `directory`, is read out by a seperate thread that returns the list of all passwords and the count how often it appeard inside the list. After extracting all passwords, the resulting list is sorted accordingly to the count and douplicates are removed. It will write each password with its count in a seperate `$(output).stats.csv` file.

## Usage

```bash
Extracts passwords from files

Usage: leakh [OPTIONS] --config <FILE> --directory <DIR> --output <FILE>

Options:
-c, --config <FILE> Specifies the config file
-d, --directory <DIR> Specifies the directory to scan for files
-o, --output <FILE> Specifies the output file location
-v, --verbose Enables verbose output
-h, --help Print help
-V, --version Print version
```

## Configuration

leakh uses a configuration file that follows the `.toml` syntax.

```toml
# Default configuration for all files
[default]
# Regex pattern to extract password (e.g., password is after the second ":")
pattern = "^[^:]+:(\\S[^\n]*)"
# Minimum length for passwords to be considered valid
min_length = 6
# List of unwanted strings to filter out using regular expressions
unwanted_strings = [
"imap\\.[^\\s]+",
"smtp\\.[^\\s]+",
"NULL",
"^#file_links.*",
"^lUCKY&quot;=&quot;=STEVEN.*"
]

# Optional specific configurations for individual files
[files]

# Custom configuration for "special_file.txt"
[files."special_file.txt"]
pattern = "\\|\\s*(\\S+)"
unwanted_strings = ["domain\\.com", "test\\.com"]
min_length = 8

```
23 changes: 23 additions & 0 deletions leakh.config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Default configuration for all files
[default]
# Regex pattern to extract password (e.g., password is after the second ":")
pattern = "^[^:]+:(\\S[^\n]*)"
# Minimum length for passwords to be considered valid
min_length = 6
# List of unwanted strings to filter out using regular expressions
unwanted_strings = [
"imap\\.[^\\s]+",
"smtp\\.[^\\s]+",
"NULL",
"^#file_links.*",
"^lUCKY&quot;=&quot;=STEVEN.*"
]

# Optional specific configurations for individual files
[files]

# Custom configuration for "special_file.txt"
# [files."special_file.txt"]
# pattern = "\\|\\s*(\\S+)"
# unwanted_strings = ["domain\\.com", "test\\.com"]
# min_length = 8
190 changes: 190 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
use clap::{Arg, Command};
use std::fs::{self, File};
use std::io::{BufReader, BufRead, Write};
use std::path::Path;
use std::collections::HashMap;
use serde_derive::Deserialize;
use regex::Regex;
use crossbeam::channel;
use std::thread;

// Struct for configuration from TOML file
#[derive(Clone, Debug, Deserialize)]
struct Config {
default: FileConfig,
files: Option<HashMap<String, FileConfig>>,
}

#[derive(Clone, Debug, Deserialize)]
struct FileConfig {
pattern: String, // regex pattern to extract password
unwanted_strings: Vec<String>, // list of unwanted strings (as regex patterns)
min_length: usize, // minimum length for passwords
}

fn main() {
// Define command-line arguments using clap
let matches = Command::new("Password Extractor")
.version("1.0")
.author("Your Name <your.email@example.com>")
.about("Extracts passwords from files")
.arg(
Arg::new("config")
.short('c')
.long("config")
.value_name("FILE")
.help("Specifies the config file")
.required(true),
)
.arg(
Arg::new("directory")
.short('d')
.long("directory")
.value_name("DIR")
.help("Specifies the directory to scan for files")
.required(true),
)
.arg(
Arg::new("output")
.short('o')
.long("output")
.value_name("FILE")
.help("Specifies the output file location")
.required(true),
)
.arg(
Arg::new("verbose")
.short('v')
.long("verbose")
.help("Enables verbose output")
.action(clap::ArgAction::SetTrue),
)
.get_matches();

let config_path = matches.get_one::<String>("config").unwrap();
let directory_path = matches.get_one::<String>("directory").unwrap();
let output_path = matches.get_one::<String>("output").unwrap();
let verbose = *matches.get_one::<bool>("verbose").unwrap();

// Load and parse the configuration file
if verbose {
println!("Loading configuration from: {}", config_path);
}
let config: Config = load_config(config_path);

// Set up a channel to communicate between threads
let (sender, receiver) = channel::unbounded();

// Process the directory
for entry in fs::read_dir(directory_path).expect("Unable to read directory") {
let entry = entry.expect("Failed to read directory entry");
let path = entry.path();
if let Some(extension) = path.extension() {
match extension.to_str() {
Some("txt") | Some("csv") => {
println!("Processing file: {}", path.display());

let c_config = config.clone();
let c_path = path.clone();
let c_sender = sender.clone();

thread::spawn(move || {
let mut local_password_counts: HashMap<String, usize> = HashMap::new();
process_file(&c_path, &c_config, &mut local_password_counts, verbose);
c_sender.send(local_password_counts).expect("Failed to send results from thread");
});
}
_ => {
println!("Ignore file: {}", path.display());
}
}
}
}

// Close the sending side of the channel so the receiver will know when to stop
drop(sender);

// Collect all the results from the threads
let mut password_counts: HashMap<String, usize> = HashMap::new();
for local_counts in receiver {
for (password, count) in local_counts {
*password_counts.entry(password).or_insert(0) += count;
}
}

// Sort passwords by count and write output
let mut sorted_passwords: Vec<(String, usize)> = password_counts.into_iter().collect();
sorted_passwords.sort_by(|a, b| b.1.cmp(&a.1));
write_output(output_path, &sorted_passwords);

println!("Password extraction complete. Output written to: {}", output_path);
}

// Load and parse the configuration file
fn load_config(config_path: &str) -> Config {
let config_data = fs::read_to_string(config_path).expect("Unable to read config file");
toml::from_str(&config_data).expect("Invalid TOML format")
}

// Process a single file based on the configuration
fn process_file(path: &Path, config: &Config, password_counts: &mut HashMap<String, usize>, verbose: bool) {
let file_name = path.file_name().unwrap().to_str().unwrap();
let file_config = config.files.as_ref()
.and_then(|files| files.get(file_name))
.unwrap_or(&config.default);

let pattern = Regex::new(&file_config.pattern).expect("Invalid regex pattern");

let file = File::open(path).expect("Unable to open file");
let reader = BufReader::new(file);

for line in reader.lines() {
let line = line.expect("Unable to read line");
if let Some(password) = extract_password(&line, &pattern, file_config, verbose) {
*password_counts.entry(password).or_insert(0) += 1;
}
}
}

// Extract the password from a line using the given pattern and filters
fn extract_password(line: &str, pattern: &Regex, config: &FileConfig, verbose: bool) -> Option<String> {
if let Some(caps) = pattern.captures(line) {
let password = caps[1].to_string();

// Compile the unwanted strings into regex patterns
for unwanted in &config.unwanted_strings {
let unwanted_pattern = Regex::new(unwanted).expect("Invalid unwanted string pattern");
if unwanted_pattern.is_match(&password) {
if verbose {
println!("Filtered out unwanted password: {}", password);
}
return None;
}
}

// Filter out passwords that are shorter than the minimum length
if password.len() < config.min_length {
if verbose {
println!("Filtered out short password: {}", password);
}
return None;
}

return Some(password);
}
None
}

// Write the sorted passwords to the output file
fn write_output(output_path: &str, sorted_passwords: &[(String, usize)]) {
let mut file = File::create(output_path).expect("Unable to create output file");
let mut stats_file = File::create(format!("{}.stats.csv", output_path)).expect("Unable to create stats file");

// stats file header
writeln!(stats_file, "Password,Count").expect("Unable to write to stats file");

for (password, count) in sorted_passwords {
writeln!(file, "{}", password).expect("Unable to write to output file");
writeln!(stats_file, "{},{}", password, count).expect("Unable to write to stats file");
}
}

0 comments on commit 0074fe7

Please sign in to comment.