From 6a26035327ab681a65a4f4ba284e155f00680e89 Mon Sep 17 00:00:00 2001 From: Daniel Huth <4455258+Agreon@users.noreply.github.com> Date: Wed, 5 Oct 2022 10:11:21 +1300 Subject: [PATCH] feat(core): Add check gravatar image (#1188) * feat(core): Add check gravatar image * Add missing changes * Fix compilation * Fix tests * Apply PR feedback * Don't panic --- Cargo.lock | 7 ++++ README.md | 2 +- backend/openapi.json | 66 ++++++++++++++++++++++++------ backend/src/routes/bulk/results.rs | 9 ++++ backend/tests/check_email.rs | 4 +- cli/README.md | 3 ++ cli/src/main.rs | 7 +++- core/Cargo.toml | 3 +- core/src/gravatar.rs | 61 +++++++++++++++++++++++++++ core/src/lib.rs | 3 +- core/src/misc.rs | 27 ++++++++---- core/src/util/input_output.rs | 18 ++++++-- 12 files changed, 180 insertions(+), 30 deletions(-) create mode 100644 core/src/gravatar.rs diff --git a/Cargo.lock b/Cargo.lock index 40b97ae9c..f4678bf96 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -416,6 +416,7 @@ dependencies = [ "futures", "log", "mailchecker", + "md5", "rand", "regex", "reqwest", @@ -1335,6 +1336,12 @@ dependencies = [ "digest", ] +[[package]] +name = "md5" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" + [[package]] name = "memchr" version = "2.5.0" diff --git a/README.md b/README.md index 91b038f2f..c52422d3e 100644 --- a/README.md +++ b/README.md @@ -146,10 +146,10 @@ The output will be a JSON with the below format, the fields should be self-expla | ✅ | **Full inbox** | Is the inbox of this mailbox full? | `smtp.has_full_inbox` | | ✅ | **Catch-all address** | Is this email address a [catch-all](https://debounce.io/blog/help/what-is-a-catch-all-or-accept-all/) address? | `smtp.is_catch_all` | | ✅ | **Role account validation** | Is the email address a well-known role account? | `misc.is_role_account` | +| ✅ | **Gravatar Url** | The url of the [Gravatar](https://gravatar.com/) email address profile picture | `misc.gravatar_url` | | 🔜 | **Free email provider check** | Is the email address bound to a known free email provider? | [Issue #89](https://github.com/reacherhq/check-if-email-exists/issues/89) | | 🔜 | **Syntax validation, provider-specific** | According to the syntactic rules of the target mail provider, is the address syntactically valid? | [Issue #90](https://github.com/reacherhq/check-if-email-exists/issues/90) | | 🔜 | **Honeypot detection** | Does email address under test hide a [honeypot](https://en.wikipedia.org/wiki/Spamtrap)? | [Issue #91](https://github.com/reacherhq/check-if-email-exists/issues/91) | -| 🔜 | **Gravatar** | Does this email address have a [Gravatar](https://gravatar.com/) profile picture? | [Issue #92](https://github.com/reacherhq/check-if-email-exists/issues/92) | | 🔜 | **Have I Been Pwned?** | Has this email been compromised in a [data breach](https://haveibeenpwned.com/)? | [Issue #289](https://github.com/reacherhq/check-if-email-exists/issues/289) | ## 🤔 Why? diff --git a/backend/openapi.json b/backend/openapi.json index d42282009..5b04553bf 100644 --- a/backend/openapi.json +++ b/backend/openapi.json @@ -40,7 +40,8 @@ "is_reachable": "invalid", "misc": { "is_disposable": false, - "is_role_account": true + "is_role_account": true, + "gravatar_url": null }, "mx": { "accepts_mail": true, @@ -109,7 +110,8 @@ "is_reachable": "invalid", "misc": { "is_disposable": false, - "is_role_account": true + "is_role_account": true, + "gravatar_url": null }, "mx": { "accepts_mail": true, @@ -182,7 +184,14 @@ "$ref": "#/components/schemas/SyntaxDetails" } }, - "required": ["input", "misc", "mx", "smtp", "syntax", "is_reachable"] + "required": [ + "input", + "misc", + "mx", + "smtp", + "syntax", + "is_reachable" + ] }, "Error": { "title": "Error", @@ -198,7 +207,10 @@ "description": "A human-readable description of the error." } }, - "required": ["type", "message"] + "required": [ + "type", + "message" + ] }, "MiscDetails": { "title": "MiscDetails", @@ -212,9 +224,16 @@ "is_role_account": { "type": "boolean", "description": "Is this email a role-based account?" + }, + "gravatar_url": { + "type": "string", + "description": "The Gravatar url of the image belonging to the given email." } }, - "required": ["is_disposable", "is_role_account"] + "required": [ + "is_disposable", + "is_role_account" + ] }, "MxDetails": { "title": "MxDetails", @@ -232,7 +251,10 @@ } } }, - "required": ["accepts_mail", "records"], + "required": [ + "accepts_mail", + "records" + ], "description": "Object holding the MX details of the mail server." }, "SmtpDetails": { @@ -261,7 +283,13 @@ "description": "Has this email address been disabled by the email provider?" } }, - "required": ["can_connect_smtp", "has_full_inbox", "is_catch_all", "is_deliverable", "is_disabled"] + "required": [ + "can_connect_smtp", + "has_full_inbox", + "is_catch_all", + "is_deliverable", + "is_disabled" + ] }, "SyntaxDetails": { "title": "SyntaxDetails", @@ -281,12 +309,21 @@ "description": "The username of the email, i.e. the part before the \"@\" symbol." } }, - "required": ["domain", "is_valid_syntax", "username"] + "required": [ + "domain", + "is_valid_syntax", + "username" + ] }, "Reachable": { "type": "string", "title": "Reachable", - "enum": ["invalid", "unknown", "safe", "risky"], + "enum": [ + "invalid", + "unknown", + "safe", + "risky" + ], "description": "An enum to describe how confident we are that the recipient address is real: `safe`, `risky`, `invalid` and `unknown`. Check our FAQ to know the meanings of the 4 possibilities: https://help.reacher.email/email-attributes-inside-json." }, "CheckEmailInput": { @@ -310,7 +347,9 @@ "$ref": "#/components/schemas/CheckEmailInputProxy" } }, - "required": ["to_email"] + "required": [ + "to_email" + ] }, "CheckEmailInputProxy": { "title": "CheckEmailInputProxy", @@ -333,7 +372,10 @@ "description": "The proxy port." } }, - "required": ["host", "port"] + "required": [ + "host", + "port" + ] } }, "securitySchemes": { @@ -345,4 +387,4 @@ } } } -} +} \ No newline at end of file diff --git a/backend/src/routes/bulk/results.rs b/backend/src/routes/bulk/results.rs index 2e0952a61..7f372fef5 100644 --- a/backend/src/routes/bulk/results.rs +++ b/backend/src/routes/bulk/results.rs @@ -67,6 +67,8 @@ struct JobResultCsvResponse { misc_is_disposable: bool, #[serde(rename = "misc.is_role_account")] misc_is_role_account: bool, + #[serde(rename = "misc.gravatar_url")] + misc_gravatar_url: Option, #[serde(rename = "mx.accepts_mail")] mx_accepts_mail: bool, #[serde(rename = "smtp.can_connect")] @@ -99,6 +101,7 @@ impl TryFrom for JobResultCsvResponse { let mut is_reachable: String = String::default(); let mut misc_is_disposable: bool = false; let mut misc_is_role_account: bool = false; + let mut misc_gravatar_url: Option = None; let mut mx_accepts_mail: bool = false; let mut smtp_can_connect: bool = false; let mut smtp_has_full_inbox: bool = false; @@ -136,6 +139,11 @@ impl TryFrom for JobResultCsvResponse { misc_is_role_account = val.as_bool().ok_or("is_role_account should be a boolean")? } + "gravatar_url" => { + if val.as_str() != None { + misc_gravatar_url = Some(val.to_string()) + } + } _ => {} } } @@ -216,6 +224,7 @@ impl TryFrom for JobResultCsvResponse { is_reachable, misc_is_disposable, misc_is_role_account, + misc_gravatar_url, mx_accepts_mail, smtp_can_connect, smtp_has_full_inbox, diff --git a/backend/tests/check_email.rs b/backend/tests/check_email.rs index cf6278bc3..99e1089eb 100644 --- a/backend/tests/check_email.rs +++ b/backend/tests/check_email.rs @@ -22,8 +22,8 @@ use serde_json; use warp::http::StatusCode; use warp::test::request; -const FOO_BAR_RESPONSE: &str = r#"{"input":"foo@bar","is_reachable":"invalid","misc":{"is_disposable":false,"is_role_account":false},"mx":{"accepts_mail":false,"records":[]},"smtp":{"can_connect_smtp":false,"has_full_inbox":false,"is_catch_all":false,"is_deliverable":false,"is_disabled":false},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":""}}"#; -const FOO_BAR_BAZ_RESPONSE: &str = r#"{"input":"foo@bar.baz","is_reachable":"invalid","misc":{"is_disposable":false,"is_role_account":false},"mx":{"accepts_mail":false,"records":[]},"smtp":{"can_connect_smtp":false,"has_full_inbox":false,"is_catch_all":false,"is_deliverable":false,"is_disabled":false},"syntax":{"address":"foo@bar.baz","domain":"bar.baz","is_valid_syntax":true,"username":"foo"}}"#; +const FOO_BAR_RESPONSE: &str = r#"{"input":"foo@bar","is_reachable":"invalid","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"can_connect_smtp":false,"has_full_inbox":false,"is_catch_all":false,"is_deliverable":false,"is_disabled":false},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":""}}"#; +const FOO_BAR_BAZ_RESPONSE: &str = r#"{"input":"foo@bar.baz","is_reachable":"invalid","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"can_connect_smtp":false,"has_full_inbox":false,"is_catch_all":false,"is_deliverable":false,"is_disabled":false},"syntax":{"address":"foo@bar.baz","domain":"bar.baz","is_valid_syntax":true,"username":"foo"}}"#; #[tokio::test] async fn test_input_foo_bar() { diff --git a/cli/README.md b/cli/README.md index 8bda86bc6..ba3115fa0 100644 --- a/cli/README.md +++ b/cli/README.md @@ -25,6 +25,9 @@ ARGS: The email to check OPTIONS: + --check-gravatar + Whether to check for an existing gravatar image [env: CHECK_GRAVATAR=] [default: false] + --from-email The email to use in the `MAIL FROM:` SMTP command [env: FROM_EMAIL=] [default: user@example.org] diff --git a/cli/src/main.rs b/cli/src/main.rs index a21278ea6..5a9ea8bf7 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -58,6 +58,10 @@ pub struct Cli { #[clap(long, env, default_value = "true", parse(try_from_str))] pub yahoo_use_api: bool, + /// Whether to check if a gravatar image is existing for the given email. + #[clap(long, env, default_value = "false", parse(try_from_str))] + pub check_gravatar: bool, + /// The email to check. pub to_email: String, } @@ -76,7 +80,8 @@ async fn main() -> Result<(), Box> { .set_from_email(CONF.from_email.clone()) .set_hello_name(CONF.hello_name.clone()) .set_smtp_port(CONF.smtp_port) - .set_yahoo_use_api(CONF.yahoo_use_api); + .set_yahoo_use_api(CONF.yahoo_use_api) + .set_check_gravatar(CONF.check_gravatar); if let Some(proxy_host) = &CONF.proxy_host { input.set_proxy(CheckEmailInputProxy { host: proxy_host.clone(), diff --git a/core/Cargo.toml b/core/Cargo.toml index e37b66d5b..2ff79030f 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -23,12 +23,13 @@ futures = { version = "0.3.24", optional = true } fast-socks5 = "0.8.1" log = "0.4.17" mailchecker = "5.0.1" -rand = {version = "0.8.5", features = ["small_rng"] } +rand = { version = "0.8.5", features = ["small_rng"] } regex = "1.6.0" reqwest = { version = "0.11.11", features = ["json", "socks"] } serde = { version = "1.0.145", features = ["derive"] } serde_json = "1.0.85" trust-dns-proto = "0.21.2" +md5 = "0.7.0" [dev-dependencies] tokio = { version = "1.21.2" } diff --git a/core/src/gravatar.rs b/core/src/gravatar.rs new file mode 100644 index 000000000..62145756f --- /dev/null +++ b/core/src/gravatar.rs @@ -0,0 +1,61 @@ +// check-if-email-exists +// Copyright (C) 2018-2022 Reacher + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use crate::util::constants::LOG_TARGET; +use md5; +use md5::Digest; + +const API_BASE_URL: &str = "https://www.gravatar.com/avatar/"; + +pub async fn check_gravatar(to_email: &str) -> Option { + let client = reqwest::Client::new(); + + let mail_hash: Digest = md5::compute(to_email); + + let url = format!("{}{:x}", API_BASE_URL, mail_hash); + + log::debug!( + target: LOG_TARGET, + "[email={}] Request Gravatar API with url: {:?}", + to_email, + url + ); + + let response = client + .get(&url) + // This option is necessary to return a NotFound exception instead of the default gravatar + // image if none for the given email is found. + .query(&[("d", "404")]) + .send() + .await; + + log::debug!( + target: LOG_TARGET, + "[email={}] Gravatar response: {:?}", + to_email, + response + ); + + let response = match response { + Ok(response) => response, + Err(_) => return None, + }; + + match response.status() { + reqwest::StatusCode::OK => Some(url), + _ => None, + } +} diff --git a/core/src/lib.rs b/core/src/lib.rs index 757ba00e9..5b988048a 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -62,6 +62,7 @@ //! } //! ``` +pub mod gravatar; pub mod misc; pub mod mx; pub mod smtp; @@ -171,7 +172,7 @@ pub async fn check_email(input: &CheckEmailInput) -> CheckEmailOutput { .collect::>() ); - let my_misc = check_misc(&my_syntax); + let my_misc = check_misc(&my_syntax, input.check_gravatar).await; log::debug!( target: LOG_TARGET, "[email={}] Found the following misc details: {:?}", diff --git a/core/src/misc.rs b/core/src/misc.rs index 3fc94f4e1..83d9ba78a 100644 --- a/core/src/misc.rs +++ b/core/src/misc.rs @@ -14,6 +14,8 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . +use crate::gravatar::check_gravatar; + use super::syntax::SyntaxDetails; use serde::{Deserialize, Serialize}; use std::default::Default; @@ -27,6 +29,7 @@ pub struct MiscDetails { pub is_disposable: bool, /// Is this email a role-based account? pub is_role_account: bool, + pub gravatar_url: Option, } /// Error occured connecting to this email server via SMTP. Right now this @@ -37,22 +40,28 @@ pub struct MiscDetails { pub enum MiscError {} /// Fetch misc details about the email address, such as whether it's disposable. -pub fn check_misc(syntax: &SyntaxDetails) -> MiscDetails { +pub async fn check_misc(syntax: &SyntaxDetails, cfg_check_gravatar: bool) -> MiscDetails { let role_accounts: Vec<&str> = serde_json::from_str(ROLE_ACCOUNTS).expect("roles.json is a valid json. qed."); + let address = syntax + .address + .as_ref() + .expect("We already checked that the syntax was valid. qed.") + .to_string(); + + let mut gravatar_url: Option = None; + + if cfg_check_gravatar { + gravatar_url = check_gravatar(address.as_ref()).await; + } + MiscDetails { // mailchecker::is_valid checks also if the syntax is valid. But if // we're here, it means we're sure the syntax is valid, so is_valid // actually will only check if it's disposable. - is_disposable: !mailchecker::is_valid( - syntax - .address - .as_ref() - .expect("We already checked that the syntax was valid. qed.") - .to_string() - .as_ref(), - ), + is_disposable: !mailchecker::is_valid(address.as_ref()), is_role_account: role_accounts.contains(&syntax.username.to_lowercase().as_ref()), + gravatar_url, } } diff --git a/core/src/util/input_output.rs b/core/src/util/input_output.rs index 48fe8d904..9b21cc873 100644 --- a/core/src/util/input_output.rs +++ b/core/src/util/input_output.rs @@ -90,6 +90,10 @@ pub struct CheckEmailInput { /// /// Defaults to true. pub yahoo_use_api: bool, + // Whether to check if a gravatar image is existing for the given email. + // + // Defaults to false + pub check_gravatar: bool, /// For Hotmail/Outlook email addresses, use a headless navigator /// connecting to the password recovery page instead of the SMTP server. /// This assumes you have a WebDriver compatible process running, then pass @@ -122,6 +126,7 @@ impl Default for CheckEmailInput { smtp_security: SmtpSecurity::Opportunistic, smtp_timeout: None, yahoo_use_api: true, + check_gravatar: false, retries: 2, } } @@ -229,6 +234,13 @@ impl CheckEmailInput { self } + /// Whether to check if a gravatar image is existing for the given email. + /// Defaults to false. + pub fn set_check_gravatar(&mut self, check_gravatar: bool) -> &mut CheckEmailInput { + self.check_gravatar = check_gravatar; + self + } + /// Set whether or not to use a headless navigator to navigate to Hotmail's /// password recovery page to check if an email exists. If set to /// `Some()`, this endpoint must point to a WebDriver process, @@ -380,20 +392,20 @@ mod tests { let res = dummy_response_with_message("blacklist"); let actual = serde_json::to_string(&res).unwrap(); // Make sure the `description` is present with IpBlacklisted. - let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: blacklist"},"description":"IpBlacklisted"},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":""}}"#; + let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: blacklist"},"description":"IpBlacklisted"},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":""}}"#; assert_eq!(expected, actual); let res = dummy_response_with_message("Client host rejected: cannot find your reverse hostname"); let actual = serde_json::to_string(&res).unwrap(); // Make sure the `description` is present with NeedsRDNs. - let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: Client host rejected: cannot find your reverse hostname"},"description":"NeedsRDNS"},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":""}}"#; + let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: Client host rejected: cannot find your reverse hostname"},"description":"NeedsRDNS"},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":""}}"#; assert_eq!(expected, actual); let res = dummy_response_with_message("foobar"); let actual = serde_json::to_string(&res).unwrap(); // Make sure the `description` is NOT present. - let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: foobar"}},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":""}}"#; + let expected = r#"{"input":"foo","is_reachable":"unknown","misc":{"is_disposable":false,"is_role_account":false,"gravatar_url":null},"mx":{"accepts_mail":false,"records":[]},"smtp":{"error":{"type":"SmtpError","message":"transient: foobar"}},"syntax":{"address":null,"domain":"","is_valid_syntax":false,"username":""}}"#; assert_eq!(expected, actual); } }