From 166dbd2cc878e30c51538b919abc1aaea4465c45 Mon Sep 17 00:00:00 2001 From: Sylvain Reynaud Date: Wed, 15 Feb 2023 23:02:48 +0100 Subject: [PATCH] feat(#289): add haveibeenpwned check (#1253) --- Cargo.lock | 107 +++++++++++++++++++++++++++++++++- backend/tests/check_email.rs | 4 +- cli/src/main.rs | 7 ++- core/Cargo.toml | 1 + core/src/haveibeenpwned.rs | 55 +++++++++++++++++ core/src/lib.rs | 8 ++- core/src/misc/mod.rs | 17 +++++- core/src/util/input_output.rs | 17 +++++- 8 files changed, 206 insertions(+), 10 deletions(-) create mode 100644 core/src/haveibeenpwned.rs diff --git a/Cargo.lock b/Cargo.lock index b366a54d2..fc20e4a0b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -424,6 +424,7 @@ dependencies = [ "log", "mailchecker", "md5", + "pwned", "rand", "regex", "reqwest", @@ -475,7 +476,7 @@ dependencies = [ "clap_lex", "indexmap", "once_cell", - "strsim", + "strsim 0.10.0", "termcolor", "textwrap", ] @@ -624,6 +625,41 @@ dependencies = [ "syn", ] +[[package]] +name = "darling" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d706e75d87e35569db781a9b5e2416cff1236a47ed380831f959382ccd5f858" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c960ae2da4de88a91b2d920c2a7233b400bc33cb28453a2987822d8392519b" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim 0.9.3", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72" +dependencies = [ + "darling_core", + "quote", + "syn", +] + [[package]] name = "data-encoding" version = "2.3.2" @@ -640,6 +676,31 @@ dependencies = [ "uuid 0.8.2", ] +[[package]] +name = "derive_builder" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2658621297f2cf68762a6f7dc0bb7e1ff2cfd6583daef8ee0fed6f7ec468ec0" +dependencies = [ + "darling", + "derive_builder_core", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "derive_builder_core" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2791ea3e372c8495c0bc2033991d76b512cd799d07491fbd6890124db9458bef" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "digest" version = "0.10.3" @@ -1208,6 +1269,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "0.2.3" @@ -1778,6 +1845,21 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "pwned" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f75258f75c681eb691607acdb325d0150907bc68826365b07476834df2664974" +dependencies = [ + "derive_builder", + "reqwest", + "serde", + "serde_derive", + "serde_json", + "sha1 0.6.1", + "thiserror", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -2180,6 +2262,15 @@ dependencies = [ "digest", ] +[[package]] +name = "sha1" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1da05c97445caa12d05e848c4a4fcbbea29e748ac28f7e80e9b010392063770" +dependencies = [ + "sha1_smol", +] + [[package]] name = "sha1" version = "0.10.4" @@ -2191,6 +2282,12 @@ dependencies = [ "digest", ] +[[package]] +name = "sha1_smol" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012" + [[package]] name = "sha2" version = "0.10.2" @@ -2306,7 +2403,7 @@ dependencies = [ "rand", "serde", "serde_json", - "sha1", + "sha1 0.10.4", "sha2", "smallvec", "sqlformat", @@ -2392,6 +2489,12 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "strsim" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6446ced80d6c486436db5c078dde11a9f73d42b57fb273121e160b84f63d894c" + [[package]] name = "strsim" version = "0.10.0" diff --git a/backend/tests/check_email.rs b/backend/tests/check_email.rs index a9d217487..ca857d5ad 100644 --- a/backend/tests/check_email.rs +++ b/backend/tests/check_email.rs @@ -23,8 +23,8 @@ use reacher_backend::routes::create_routes; use warp::http::StatusCode; use warp::test::request; -const FOO_BAR_RESPONSE: &str = r#"{"input":"foo@bar","is_reachable":"invalid","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"can_connect_smtp":false,"has_full_inbox":false,"is_catch_all":false,"is_deliverable":false,"is_disabled":false},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":"","normalized_email":null,"suggestion":null}}"#; -const FOO_BAR_BAZ_RESPONSE: &str = r#"{"input":"foo@bar.baz","is_reachable":"invalid","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"can_connect_smtp":false,"has_full_inbox":false,"is_catch_all":false,"is_deliverable":false,"is_disabled":false},"syntax":{"address":"foo@bar.baz","domain":"bar.baz","is_valid_syntax":true,"username":"foo","normalized_email":"foo@bar.baz","suggestion":null}}"#; +const FOO_BAR_RESPONSE: &str = r#"{"input":"foo@bar","is_reachable":"invalid","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null,"haveibeenpwned":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"can_connect_smtp":false,"has_full_inbox":false,"is_catch_all":false,"is_deliverable":false,"is_disabled":false},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":"","normalized_email":null,"suggestion":null}}"#; +const FOO_BAR_BAZ_RESPONSE: &str = r#"{"input":"foo@bar.baz","is_reachable":"invalid","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null,"haveibeenpwned":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"can_connect_smtp":false,"has_full_inbox":false,"is_catch_all":false,"is_deliverable":false,"is_disabled":false},"syntax":{"address":"foo@bar.baz","domain":"bar.baz","is_valid_syntax":true,"username":"foo","normalized_email":"foo@bar.baz","suggestion":null}}"#; #[tokio::test] async fn test_input_foo_bar() { diff --git a/cli/src/main.rs b/cli/src/main.rs index 9779f9cfd..1e6d2acfa 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -77,6 +77,10 @@ pub struct Cli { #[clap(long, env, default_value = "false", parse(try_from_str))] pub check_gravatar: bool, + /// HaveIBeenPnwed API key, ignore if not provided. + #[clap(long, env, parse(try_from_str))] + pub haveibeenpwned_api_key: Option, + /// The email to check. pub to_email: String, } @@ -99,7 +103,8 @@ async fn main() -> Result<(), Box> { .set_gmail_use_api(CONF.gmail_use_api) .set_microsoft365_use_api(CONF.microsoft365_use_api) .set_check_gravatar(CONF.check_gravatar) - .set_hotmail_use_headless(CONF.hotmail_use_headless.clone()); + .set_hotmail_use_headless(CONF.hotmail_use_headless.clone()) + .set_haveibeenpwned_api_key(CONF.haveibeenpwned_api_key.clone()); if let Some(proxy_host) = &CONF.proxy_host { input.set_proxy(CheckEmailInputProxy { diff --git a/core/Cargo.toml b/core/Cargo.toml index 3fe6026b9..704a48306 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -31,6 +31,7 @@ serde_json = "1.0.93" trust-dns-proto = "0.21.2" md5 = "0.7.0" levenshtein = "1.0.5" +pwned = "0.5.0" [dev-dependencies] tokio = { version = "1.25.0" } diff --git a/core/src/haveibeenpwned.rs b/core/src/haveibeenpwned.rs new file mode 100644 index 000000000..c279466f7 --- /dev/null +++ b/core/src/haveibeenpwned.rs @@ -0,0 +1,55 @@ +// check-if-email-exists +// Copyright (C) 2018-2022 Reacher + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use crate::util::constants::LOG_TARGET; +use pwned::api::PwnedBuilder; + +/// Check if the email has been found in any breach or paste using the +/// HaveIBeenPwned API. +/// This function will return the number of times the email has been found in +/// any breach. +pub async fn check_haveibeenpwned(to_email: &str, api_key: Option) -> Option { + let pwned = PwnedBuilder::default() + .user_agent("reacher") + .api_key(api_key) + .build() + .unwrap(); + + match pwned.check_email(to_email).await { + Ok(answer) => { + log::debug!( + target: LOG_TARGET, + "Email found in {} breaches", + answer.len() + ); + Some(!answer.is_empty()) + } + Err(e) => { + log::error!( + target: LOG_TARGET, + "Error while checking if email has been pwned: {}", + e + ); + match e { + pwned::errors::Error::IoError(e) => match e.kind() { + std::io::ErrorKind::NotFound => Some(false), + _ => None, + }, + _ => None, + } + } + } +} diff --git a/core/src/lib.rs b/core/src/lib.rs index 8252a0c25..ae1359e67 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -62,6 +62,7 @@ //! } //! ``` +mod haveibeenpwned; pub mod misc; pub mod mx; pub mod smtp; @@ -177,7 +178,12 @@ pub async fn check_email(input: &CheckEmailInput) -> CheckEmailOutput { .collect::>() ); - let my_misc = check_misc(&my_syntax, input.check_gravatar).await; + let my_misc = check_misc( + &my_syntax, + input.check_gravatar, + input.haveibeenpwned_api_key.clone(), + ) + .await; log::debug!( target: LOG_TARGET, "[email={}] Found the following misc details: {:?}", diff --git a/core/src/misc/mod.rs b/core/src/misc/mod.rs index d3a25a991..0fd72362e 100644 --- a/core/src/misc/mod.rs +++ b/core/src/misc/mod.rs @@ -15,6 +15,7 @@ // along with this program. If not, see . mod gravatar; +use crate::haveibeenpwned::check_haveibeenpwned; use serde::{Deserialize, Serialize}; use std::default::Default; @@ -32,6 +33,9 @@ pub struct MiscDetails { /// Is this email a role-based account? pub is_role_account: bool, pub gravatar_url: Option, + /// Is this email address listed in the haveibeenpwned database for + /// previous breaches? + pub haveibeenpwned: Option, } /// Error occured connecting to this email server via SMTP. Right now this @@ -42,7 +46,11 @@ pub struct MiscDetails { pub enum MiscError {} /// Fetch misc details about the email address, such as whether it's disposable. -pub async fn check_misc(syntax: &SyntaxDetails, cfg_check_gravatar: bool) -> MiscDetails { +pub async fn check_misc( + syntax: &SyntaxDetails, + cfg_check_gravatar: bool, + haveibeenpwned_api_key: Option, +) -> MiscDetails { let role_accounts: Vec<&str> = serde_json::from_str(ROLE_ACCOUNTS).expect("roles.json is a valid json. qed."); @@ -58,6 +66,12 @@ pub async fn check_misc(syntax: &SyntaxDetails, cfg_check_gravatar: bool) -> Mis gravatar_url = check_gravatar(address.as_ref()).await; } + let mut haveibeenpwned: Option = None; + + if haveibeenpwned_api_key.is_some() { + haveibeenpwned = check_haveibeenpwned(address.as_ref(), haveibeenpwned_api_key).await; + } + MiscDetails { // mailchecker::is_valid checks also if the syntax is valid. But if // we're here, it means we're sure the syntax is valid, so is_valid @@ -65,5 +79,6 @@ pub async fn check_misc(syntax: &SyntaxDetails, cfg_check_gravatar: bool) -> Mis is_disposable: !mailchecker::is_valid(address.as_ref()), is_role_account: role_accounts.contains(&syntax.username.to_lowercase().as_ref()), gravatar_url, + haveibeenpwned, } } diff --git a/core/src/util/input_output.rs b/core/src/util/input_output.rs index cdf9d91e0..93ff74529 100644 --- a/core/src/util/input_output.rs +++ b/core/src/util/input_output.rs @@ -117,6 +117,9 @@ pub struct CheckEmailInput { // // Defaults to false. pub check_gravatar: bool, + /// Check if a the email address is present in HaveIBeenPwned API. + // If the api_key is filled, HaveIBeenPwned API is checked + pub haveibeenpwned_api_key: Option, /// For Hotmail/Outlook email addresses, use a headless navigator /// connecting to the password recovery page instead of the SMTP server. /// This assumes you have a WebDriver compatible process running, then pass @@ -152,6 +155,7 @@ impl Default for CheckEmailInput { gmail_use_api: false, microsoft365_use_api: false, check_gravatar: false, + haveibeenpwned_api_key: None, retries: 2, } } @@ -282,6 +286,13 @@ impl CheckEmailInput { self } + /// Whether to haveibeenpwned' API for the given email + /// check only if the api_key is set + pub fn set_haveibeenpwned_api_key(&mut self, api_key: Option) -> &mut CheckEmailInput { + self.haveibeenpwned_api_key = api_key; + self + } + /// Set whether or not to use a headless navigator to navigate to Hotmail's /// password recovery page to check if an email exists. If set to /// `Some()`, this endpoint must point to a WebDriver process, @@ -433,20 +444,20 @@ mod tests { let res = dummy_response_with_message("blacklist"); let actual = serde_json::to_string(&res).unwrap(); // Make sure the `description` is present with IpBlacklisted. - let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: blacklist"},"description":"IpBlacklisted"},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":"","normalized_email":null,"suggestion":null}}"#; + let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null,"haveibeenpwned":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: blacklist"},"description":"IpBlacklisted"},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":"","normalized_email":null,"suggestion":null}}"#; assert_eq!(expected, actual); let res = dummy_response_with_message("Client host rejected: cannot find your reverse hostname"); let actual = serde_json::to_string(&res).unwrap(); // Make sure the `description` is present with NeedsRDNs. - let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: Client host rejected: cannot find your reverse hostname"},"description":"NeedsRDNS"},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":"","normalized_email":null,"suggestion":null}}"#; + let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null,"haveibeenpwned":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: Client host rejected: cannot find your reverse hostname"},"description":"NeedsRDNS"},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":"","normalized_email":null,"suggestion":null}}"#; assert_eq!(expected, actual); let res = dummy_response_with_message("foobar"); let actual = serde_json::to_string(&res).unwrap(); // Make sure the `description` is NOT present. - let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: foobar"}},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":"","normalized_email":null,"suggestion":null}}"#; + let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null,"haveibeenpwned":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: foobar"}},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":"","normalized_email":null,"suggestion":null}}"#; assert_eq!(expected, actual); } }