Skip to content

Commit

Permalink
refactor!: Use verify method for known providers (#1366)
Browse files Browse the repository at this point in the history
BREAKING CHANGE: For Hotmail, Gmail and Yahoo addresses, the `*_use_api` and `*_use_headless` parameters have been removed and replaced with a `*VerifyMethod`, an enum which can take value Api, Headless or Smtp. If using headless, pass a webdriver address to env variable RCH_WEBDRIVER_ADDR.
  • Loading branch information
amaury1093 authored Oct 25, 2023
1 parent 628b564 commit 5ca4dfa
Show file tree
Hide file tree
Showing 9 changed files with 244 additions and 203 deletions.
42 changes: 25 additions & 17 deletions backend/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,25 @@
"enum": ["invalid", "unknown", "safe", "risky"],
"description": "An enum to describe how confident we are that the recipient address is real: `safe`, `risky`, `invalid` and `unknown`. Check our FAQ to know the meanings of the 4 possibilities: https://help.reacher.email/email-attributes-inside-json."
},
"YahooVerifyMethod": {
"type": "string",
"title": "YahooVerifyMethod",
"enum": ["Api", "Headless", "Smtp"],
"description": "An enum to describe how we verify Yahoo emails."
},
"HotmailVerifyMethod": {
"type": "string",
"title": "HotmailVerifyMethod",
"enum": ["Api", "Headless", "Smtp"],
"description": "An enum to describe how we verify Hotmail emails."
},
"GmailVerifyMethod": {
"type": "string",
"title": "GmailVerifyMethod",
"enum": ["Api", "Smtp"],
"description": "An enum to describe how we verify Gmail emails.",
"x-internal": false
},
"CheckEmailInput": {
"title": "CheckEmailInput",
"type": "object",
Expand All @@ -330,30 +349,19 @@
"type": "number",
"description": "SMTP port to use for email validation. Generally, ports 25, 465, 587 and 2525 are used."
},
"yahoo_use_api": {
"type": "boolean",
"description": "For Yahoo email addresses, use Yahoo's API instead of connecting directly to their SMTP servers."
"yahoo_verify_method": {
"$ref": "#/components/schemas/YahooVerifyMethod"
},
"yahoo_use_headless": {
"type": "boolean",
"description": "For Yahoo email addresses, use Yahoo's account recovery page instead of connecting directly to their SMTP servers."
},
"gmail_use_api": {
"type": "boolean",
"description": "For Gmail email addresses, use Gmail's API instead of connecting directly to their SMTP servers."
"gmail_verify_method": {
"$ref": "#/components/schemas/GmailVerifyMethod"
},
"microsoft365_use_api": {
"type": "boolean",
"description": "For Microsoft 365 email addresses, use OneDrive's API instead of connecting directly to their SMTP servers."
"hotmail_verify_method": {
"$ref": "#/components/schemas/HotmailVerifyMethod"
},
"check_gravatar": {
"type": "boolean",
"description": "Whether to check if a gravatar image is existing for the given email."
},
"hotmail_use_headless": {
"type": "boolean",
"description": "For Hotmail/Outlook email addresses, use a headless navigator connecting to the password recovery page instead of the SMTP server. This assumes you have a WebDriver compatible process running at the address provided by the environment variable `RCH_WEBDRIVER_ADDR`, usually http://localhost:9515. We recommend running chromedriver (and not geckodriver) as it allows parallel requests."
},
"retries": {
"type": "number",
"default": 2,
Expand Down
28 changes: 18 additions & 10 deletions cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,29 @@ ARGS:

OPTIONS:
--check-gravatar <CHECK_GRAVATAR>
Whether to check for an existing gravatar image [env: CHECK_GRAVATAR=] [default: false]
Whether to check if a gravatar image is existing for the given email [env:
CHECK_GRAVATAR=] [default: false]

--from-email <FROM_EMAIL>
The email to use in the `MAIL FROM:` SMTP command [env: FROM_EMAIL=] [default:
user@example.org]
reacher.email@gmail.com]

--gmail-verify-method <GMAIL_VERIFY_METHOD>
Select how to verify Gmail email addresses: Api or Smtp [env: GMAIL_VERIFY_METHOD=]
[default: Smtp]

-h, --help
Print help information

--haveibeenpwned-api-key <HAVEIBEENPWNED_API_KEY>
HaveIBeenPnwed API key, ignore if not provided [env: HAVEIBEENPWNED_API_KEY=]

--hello-name <HELLO_NAME>
The name to use in the `EHLO:` SMTP command [env: HELLO_NAME=] [default: localhost]
The name to use in the `EHLO:` SMTP command [env: HELLO_NAME=] [default: gmail.com]

--hotmail-verify-method <HOTMAIL_VERIFY_METHOD>
Select how to verify Hotmail email addresses: Api, Headless or Smtp [env:
HOTMAIL_VERIFY_METHOD=] [default: Headless]

--proxy-host <PROXY_HOST>
Use the specified SOCKS5 proxy host to perform email verification [env: PROXY_HOST=]
Expand All @@ -59,13 +71,9 @@ OPTIONS:
-V, --version
Print version information

--yahoo-use-api <YAHOO_USE_API>
For Yahoo email addresses, use Yahoo's API instead of connecting directly to their SMTP
servers [env: YAHOO_USE_API=] [default: true]
--gmail-use-api <GMAIL_USE_API>
For Gmail email addresses, use Gmail's API instead of connecting directly to their SMTP
servers [env: GMAIL_USE_API=] [default: false]
--yahoo-verify-method <YAHOO_VERIFY_METHOD>
Select how to verify Yahoo email addresses: Api, Headless or Smtp [env:
YAHOO_VERIFY_METHOD=] [default: Headless]
```
**💡 PRO TIP:** To show debug logs when running the binary, run:
Expand Down
46 changes: 16 additions & 30 deletions cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.

use check_if_email_exists::{check_email, CheckEmailInput, CheckEmailInputProxy};
use check_if_email_exists::{
check_email, CheckEmailInput, CheckEmailInputProxy, GmailVerifyMethod, HotmailVerifyMethod,
YahooVerifyMethod,
};
use clap::Parser;
use once_cell::sync::Lazy;

Expand Down Expand Up @@ -53,32 +56,17 @@ pub struct Cli {
#[clap(long, env, default_value = "25")]
pub smtp_port: u16,

/// For Yahoo email addresses, use Yahoo's API instead of connecting
/// directly to their SMTP servers.
#[clap(long, env, default_value = "true", parse(try_from_str))]
pub yahoo_use_api: bool,
/// Select how to verify Yahoo email addresses: Api, Headless or Smtp.
#[clap(long, env, default_value = "Headless", parse(try_from_str))]
pub yahoo_verify_method: YahooVerifyMethod,

/// For Yahoo addresses, use a headless browser to connect to the
/// Yahoo account recovery page. Requires a webdriver instance
/// listening on RCH_WEBDRIVER_ADDR.
#[clap(long, env)]
pub yahoo_use_headless: bool,

/// For Gmail email addresses, use Gmail's API instead of connecting
/// directly to their SMTP servers.
#[clap(long, env, default_value = "false", parse(try_from_str))]
pub gmail_use_api: bool,
/// Select how to verify Gmail email addresses: Api or Smtp.
#[clap(long, env, default_value = "Smtp", parse(try_from_str))]
pub gmail_verify_method: GmailVerifyMethod,

/// For Hotmail addresses, use a headless browser to connect to the
/// Microsoft account recovery page. Requires a webdriver instance
/// listening on RCH_WEBDRIVER_ADDR.
#[clap(long, env)]
pub hotmail_use_headless: bool,

/// For Microsoft 365 email addresses, use OneDrive's API instead of
/// connecting directly to their SMTP servers.
#[clap(long, env, default_value = "false", parse(try_from_str))]
pub microsoft365_use_api: bool,
/// Select how to verify Hotmail email addresses: Api, Headless or Smtp.
#[clap(long, env, default_value = "Headless", parse(try_from_str))]
pub hotmail_verify_method: HotmailVerifyMethod,

/// Whether to check if a gravatar image is existing for the given email.
#[clap(long, env, default_value = "false", parse(try_from_str))]
Expand Down Expand Up @@ -106,12 +94,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
.set_from_email(CONF.from_email.clone())
.set_hello_name(CONF.hello_name.clone())
.set_smtp_port(CONF.smtp_port)
.set_yahoo_use_api(CONF.yahoo_use_api)
.set_yahoo_use_headless(CONF.yahoo_use_headless)
.set_gmail_use_api(CONF.gmail_use_api)
.set_microsoft365_use_api(CONF.microsoft365_use_api)
.set_yahoo_verify_method(CONF.yahoo_verify_method)
.set_gmail_verify_method(CONF.gmail_verify_method)
.set_hotmail_verify_method(CONF.hotmail_verify_method)
.set_check_gravatar(CONF.check_gravatar)
.set_hotmail_use_headless(CONF.hotmail_use_headless)
.set_haveibeenpwned_api_key(CONF.haveibeenpwned_api_key.clone());

if let Some(proxy_host) = &CONF.proxy_host {
Expand Down
2 changes: 0 additions & 2 deletions core/src/smtp/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ pub enum SmtpError {
HeadlessError(HeadlessError),
/// Error when verifying a Microsoft 365 email via HTTP request.
Microsoft365Error(Microsoft365Error),
/// Headless Navigator not running.
NoHeadlessNavigator,
/// Email is in the `skipped_domains` parameter.
SkippedDomain(String),
}
Expand Down
99 changes: 49 additions & 50 deletions core/src/smtp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,15 @@ use async_smtp::EmailAddress;
use serde::{Deserialize, Serialize};
use trust_dns_proto::rr::Name;

use crate::{util::input_output::CheckEmailInput, LOG_TARGET};
use crate::{
util::input_output::CheckEmailInput, GmailVerifyMethod, HotmailVerifyMethod, YahooVerifyMethod,
};
use connect::check_smtp_with_retry;
pub use error::*;

use self::{
gmail::is_gmail,
outlook::{is_microsoft365, is_outlook},
outlook::{is_hotmail, is_microsoft365},
yahoo::is_yahoo,
};

Expand Down Expand Up @@ -65,66 +67,63 @@ pub async fn check_smtp(
domain: &str,
input: &CheckEmailInput,
) -> Result<SmtpDetails, SmtpError> {
let host: String = host.to_string();
let host = host.to_string();
let to_email_str = to_email.to_string();

if input.skipped_domains.iter().any(|d| host.contains(d)) {
return Err(SmtpError::SkippedDomain(format!(
"Reacher currently cannot verify emails from @{domain}"
)));
}

// Headless checks. Please note that they take precedence over API checks.
#[cfg(feature = "headless")]
{
let webdriver_addr = env::var("RCH_WEBDRIVER_ADDR");

if is_outlook(&host) {
match &webdriver_addr {
Ok(a) => {
return outlook::headless::check_password_recovery(
to_email.to_string().as_str(),
a,
)
.await
.map_err(|err| err.into());
let webdriver_addr = env::var("RCH_WEBDRIVER_ADDR");

if is_hotmail(&host) {
match (&input.hotmail_verify_method, webdriver_addr) {
(HotmailVerifyMethod::OneDriveApi, _) => {
if is_microsoft365(&host) {
match outlook::microsoft365::check_microsoft365_api(to_email, input).await {
Ok(Some(smtp_details)) => return Ok(smtp_details),
// Continue in the event of an error/ambiguous result.
Err(err) => {
return Err(err.into());
}
_ => {}
}
}
_ => return Err(SmtpError::NoHeadlessNavigator),
}
} else if is_yahoo(&host) {
match &webdriver_addr {
Ok(a) => {
return yahoo::check_headless(to_email.to_string().as_str(), a)
.await
.map_err(|err| err.into());
}
_ => return Err(SmtpError::NoHeadlessNavigator),
#[cfg(feature = "headless")]
(HotmailVerifyMethod::Headless, Ok(a)) => {
return outlook::headless::check_password_recovery(
to_email.to_string().as_str(),
&a,
)
.await
.map_err(|err| err.into());
}
}
}

// API checks
if input.gmail_use_api && is_gmail(&host) {
return gmail::check_gmail(to_email, input)
.await
.map_err(|err| err.into());
} else if input.yahoo_use_api && is_yahoo(&host) {
return yahoo::check_api(to_email, input)
.await
.map_err(|err| err.into());
} else if input.microsoft365_use_api && is_microsoft365(&host) {
match outlook::microsoft365::check_microsoft365_api(to_email, input).await {
Ok(Some(smtp_details)) => return Ok(smtp_details),
// Continue in the event of an error/ambiguous result.
Err(err) => {
log::debug!(
target: LOG_TARGET,
"[email={}] microsoft365 error: {:?}",
to_email,
err,
);
_ => {}
};
} else if is_gmail(&host) {
if let GmailVerifyMethod::Api = &input.gmail_verify_method {
return gmail::check_gmail(to_email, input)
.await
.map_err(|err| err.into());
};
} else if is_yahoo(&host) {
match (&input.yahoo_verify_method, webdriver_addr) {
(YahooVerifyMethod::Api, _) => {
return yahoo::check_api(&to_email_str, input)
.await
.map_err(|e| e.into())
}
#[cfg(feature = "headless")]
(YahooVerifyMethod::Headless, Ok(a)) => {
return yahoo::check_headless(&to_email_str, &a)
.await
.map_err(|e| e.into())
}
_ => {}
}
};
}

check_smtp_with_retry(to_email, &host, port, domain, input, input.retries).await
Expand Down
17 changes: 5 additions & 12 deletions core/src/smtp/outlook/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,6 @@ pub mod microsoft365;
/// all Microsoft 365 addresses).
///
/// After some testing I got:
/// - @outlook.* and @hotmail.* -> end with ".olc.protection.outlook.com."
/// - Microsoft 365 emails -> end with ".mail.protection.outlook.com."
pub fn is_outlook(host: &str) -> bool {
host.to_lowercase().ends_with(".protection.outlook.com.")
}

/// Check if a MX host is an @hotmail.* or @outlook.* email.
///
/// After some testing, I got:
/// - *@outlook.com -> `outlook-com.olc.protection.outlook.com.`
/// - *@outlook.fr -> `eur.olc.protection.outlook.com.`
Expand All @@ -23,13 +15,14 @@ pub fn is_outlook(host: &str) -> bool {
///
/// But Microsoft 365 addresses end with "mail.protection.outlook.com."
///
/// So it seems that outlook/hotmail addresses end with `olc.protection.outlook.com.`
/// TL;DR:
/// - @outlook.* and @hotmail.* -> end with ".olc.protection.outlook.com."
/// - Microsoft 365 emails -> end with ".mail.protection.outlook.com."
pub fn is_hotmail(host: &str) -> bool {
host.to_lowercase()
.ends_with(".olc.protection.outlook.com.")
host.to_lowercase().ends_with(".protection.outlook.com.")
}

/// Check if an address is a Microsoft365 email address.
pub fn is_microsoft365(host: &str) -> bool {
is_outlook(host) && !is_hotmail(host)
is_hotmail(host) && !host.ends_with(".olc.protection.outlook.com.")
}
7 changes: 1 addition & 6 deletions core/src/smtp/yahoo/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ use crate::{
smtp::{http_api::create_client, SmtpDetails},
util::{constants::LOG_TARGET, input_output::CheckEmailInput},
};
use async_smtp::EmailAddress;
use regex::Regex;
use serde::{Deserialize, Serialize};

Expand Down Expand Up @@ -76,10 +75,7 @@ struct FormResponse {

/// Use well-crafted HTTP requests to verify if a Yahoo email address exists.
/// Inspired by https://github.com/hbattat/verifyEmail.
pub async fn check_api(
to_email: &EmailAddress,
input: &CheckEmailInput,
) -> Result<SmtpDetails, YahooError> {
pub async fn check_api(to_email: &str, input: &CheckEmailInput) -> Result<SmtpDetails, YahooError> {
let res = create_client(input, "yahoo")?
.get(SIGNUP_PAGE)
.header("User-Agent", USER_AGENT)
Expand All @@ -94,7 +90,6 @@ pub async fn check_api(
}
};

let to_email = to_email.to_string();
log::debug!(
target: LOG_TARGET,
"[email={}] Yahoo succesfully got cookies after response",
Expand Down
Loading

0 comments on commit 5ca4dfa

Please sign in to comment.