Skip to content

Commit

Permalink
feat(core): Add domain-specific rules as JSON file (reacherhq#1347)
Browse files Browse the repository at this point in the history
* feat(core): Add domain-specific rules as JSON file

* Remove timeout for now

* Add has_rule()

* log debug
  • Loading branch information
amaury1093 authored and juhniorsantos committed Apr 11, 2024
1 parent 9e72335 commit 59ed4f6
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 29 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,18 @@ async-std-resolver = "0.21.2"
fantoccini = { version = "0.19.3", optional = true }
futures = { version = "0.3.27", optional = true }
fast-socks5 = "0.9.1"
levenshtein = "1.0.5"
log = "0.4.20"
mailchecker = "5.0.7"
md5 = "0.7.0"
once_cell = "1.17.1"
pwned = "0.5.0"
rand = { version = "0.8.5", features = ["small_rng"] }
regex = "1.9.4"
reqwest = { version = "0.11.16", features = ["json", "socks"] }
serde = { version = "1.0.157", features = ["derive"] }
serde_json = "1.0.95"
trust-dns-proto = "0.21.2"
md5 = "0.7.0"
levenshtein = "1.0.5"
pwned = "0.5.0"

[dev-dependencies]
tokio = { version = "1.28.2" }
Expand Down
1 change: 1 addition & 0 deletions core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
mod haveibeenpwned;
pub mod misc;
pub mod mx;
mod rules;
pub mod smtp;
pub mod syntax;
mod util;
Expand Down
19 changes: 19 additions & 0 deletions core/src/rules.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"by_domain": {
"gmail.com": { "rules": ["SkipCatchAll"] },
"hotmail.com": { "rules": ["SkipCatchAll"] },
"hotmail.fr": { "rules": ["SkipCatchAll"] },
"hotmail.nl": { "rules": ["SkipCatchAll"] },
"yahoo.com": { "rules": ["SkipCatchAll"] },
"yahoo.fr": { "rules": ["SkipCatchAll"] }
},
"by_mx_suffix": {
".antispamcloud.com.": {
"rules": ["SkipCatchAll"],
"_comment": "Some <RCPT TO> take exactly 30s to respond, so we skip the catch-all one, and bump the timeout."
}
},
"rules": {
"SkipCatchAll": { "_comment": "Don't perform catch-all check." }
}
}
71 changes: 71 additions & 0 deletions core/src/rules.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// check-if-email-exists
// Copyright (C) 2018-2022 Reacher

// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published
// by the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.

// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.

//! Read provider- and domain-specific rules from a JSON, then match each
//! email verification to the domain/provider, and translate those rules into
//! code.
//!
//! IMPORTANT: This is still a beta feature, and probably needs refining.
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;

#[derive(Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
pub enum Rule {
/// Don't perform catch-all check.
SkipCatchAll,
}

#[derive(Debug, Deserialize, Serialize)]
struct RulesByDomain {
rules: Vec<Rule>,
}

#[derive(Debug, Deserialize, Serialize)]
struct AllRules {
/// Apply rules by domain name, i.e. after the @ symbol.
by_domain: HashMap<String, RulesByDomain>,
/// Apply rules by the MX host. Since each domain potentially has multiple
/// MX records, we match by their suffix.
by_mx_suffix: HashMap<String, RulesByDomain>,
}

static ALL_RULES: Lazy<AllRules> =
Lazy::new(|| serde_json::from_str::<AllRules>(include_str!("rules.json")).unwrap());

fn does_domain_have_rule(domain: &str, rule: &Rule) -> bool {
if let Some(v) = ALL_RULES.by_domain.get(domain) {
return v.rules.contains(rule);
}

false
}

fn does_mx_have_rule(host: &str, rule: &Rule) -> bool {
for (k, v) in ALL_RULES.by_mx_suffix.iter() {
if host.ends_with(k) {
return v.rules.contains(rule);
}
}

false
}

/// Check if either the domain or the MX host has any given rule.
pub fn has_rule(domain: &str, host: &str, rule: &Rule) -> bool {
does_domain_have_rule(domain, rule) || does_mx_have_rule(host, rule)
}
57 changes: 43 additions & 14 deletions core/src/smtp/connect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@ use std::iter;
use std::str::FromStr;
use std::time::Duration;

use trust_dns_proto::rr::Name;

use super::{gmail::is_gmail, outlook::is_hotmail, parser, yahoo::is_yahoo};
use super::parser;
use super::{SmtpDetails, SmtpError};
use crate::util::{constants::LOG_TARGET, input_output::CheckEmailInput};
use crate::{
rules::{has_rule, Rule},
util::{constants::LOG_TARGET, input_output::CheckEmailInput},
};

/// Try to send an smtp command, close and return Err if fails.
macro_rules! try_smtp (
Expand All @@ -48,13 +49,12 @@ macro_rules! try_smtp (

/// Attempt to connect to host via SMTP, and return SMTP client on success.
async fn connect_to_host(
host: &Name,
host: &str,
port: u16,
input: &CheckEmailInput,
) -> Result<SmtpTransport, SmtpError> {
// hostname verification fails if it ends with '.', for example, using
// SOCKS5 proxies we can `io: incomplete` error.
let host = host.to_string();
let host = host.trim_end_matches('.').to_string();

let security = {
Expand Down Expand Up @@ -220,11 +220,16 @@ async fn email_deliverable(
async fn smtp_is_catch_all(
smtp_transport: &mut SmtpTransport,
domain: &str,
host: &Name,
host: &str,
input: &CheckEmailInput,
) -> Result<bool, SmtpError> {
// Skip catch-all check for known providers.
let host = host.to_string();
if is_gmail(&host) || is_hotmail(&host) || is_yahoo(&host) {
if has_rule(domain, host, &Rule::SkipCatchAll) {
log::debug!(
target: LOG_TARGET,
"[email={}] Skipping catch-all check for [domain={domain}]",
input.to_email
);
return Ok(false);
}

Expand All @@ -247,7 +252,7 @@ async fn smtp_is_catch_all(

async fn create_smtp_future(
to_email: &EmailAddress,
host: &Name,
host: &str,
port: u16,
domain: &str,
input: &CheckEmailInput,
Expand All @@ -256,7 +261,7 @@ async fn create_smtp_future(
// Ok(SmtpDetails { can_connect_smtp: false, ... }).
let mut smtp_transport = connect_to_host(host, port, input).await?;

let is_catch_all = smtp_is_catch_all(&mut smtp_transport, domain, host)
let is_catch_all = smtp_is_catch_all(&mut smtp_transport, domain, host, input)
.await
.unwrap_or(false);
let deliverability = if is_catch_all {
Expand All @@ -278,7 +283,8 @@ async fn create_smtp_future(
if parser::is_err_io_errors(e) {
log::debug!(
target: LOG_TARGET,
"Got `io: incomplete` error, reconnecting."
"[email={}] Got `io: incomplete` error, reconnecting.",
input.to_email
);

let _ = smtp_transport.close().await;
Expand All @@ -299,7 +305,7 @@ async fn create_smtp_future(
/// retries.
async fn check_smtp_without_retry(
to_email: &EmailAddress,
host: &Name,
host: &str,
port: u16,
domain: &str,
input: &CheckEmailInput,
Expand All @@ -325,7 +331,7 @@ async fn check_smtp_without_retry(
#[async_recursion]
pub async fn check_smtp_with_retry(
to_email: &EmailAddress,
host: &Name,
host: &str,
port: u16,
domain: &str,
input: &CheckEmailInput,
Expand Down Expand Up @@ -376,3 +382,26 @@ pub async fn check_smtp_with_retry(
_ => result,
}
}

#[cfg(test)]
mod tests {
use super::*;

#[tokio::test]
async fn should_skip_catch_all() {
let smtp_client = SmtpClient::new("gmail.com".into());
let mut smtp_transport = smtp_client.into_transport();

let r = smtp_is_catch_all(
&mut smtp_transport,
"gmail.com",
"alt4.aspmx.l.google.com.",
&CheckEmailInput::default(),
)
.await;

assert!(!smtp_transport.is_connected()); // We shouldn't connect to google servers.
assert!(r.is_ok());
assert_eq!(false, r.unwrap())
}
}
3 changes: 2 additions & 1 deletion core/src/smtp/gmail.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ pub async fn check_gmail(
})
}

/// Check if the MX host is from Gmail.
/// Check if the MX host is from Google, i.e. either a @gmail.com address, or
/// a Google Suite email.
pub fn is_gmail(host: &str) -> bool {
host.to_lowercase().ends_with(".google.com.")
}
Expand Down
18 changes: 7 additions & 11 deletions core/src/smtp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,29 +62,25 @@ pub async fn check_smtp(
domain: &str,
input: &CheckEmailInput,
) -> Result<SmtpDetails, SmtpError> {
let host_lowercase = host.to_lowercase().to_string();
let host = host.to_string();

if input
.skipped_domains
.iter()
.any(|d| host_lowercase.contains(d))
{
if input.skipped_domains.iter().any(|d| host.contains(d)) {
return Err(SmtpError::SkippedDomain(format!(
"Reacher currently cannot verify emails from @{domain}"
)));
}

if input.yahoo_use_api && is_yahoo(&host_lowercase) {
if input.yahoo_use_api && is_yahoo(&host) {
return yahoo::check_yahoo(to_email, input)
.await
.map_err(|err| err.into());
}
if input.gmail_use_api && is_gmail(&host_lowercase) {
if input.gmail_use_api && is_gmail(&host) {
return gmail::check_gmail(to_email, input)
.await
.map_err(|err| err.into());
}
if input.microsoft365_use_api && is_microsoft365(&host_lowercase) {
if input.microsoft365_use_api && is_microsoft365(&host) {
match outlook::microsoft365::check_microsoft365_api(to_email, input).await {
Ok(Some(smtp_details)) => return Ok(smtp_details),
// Continue in the event of an error/ambiguous result.
Expand All @@ -101,14 +97,14 @@ pub async fn check_smtp(
}
#[cfg(feature = "headless")]
if let Some(webdriver) = &input.hotmail_use_headless {
if is_outlook(&host_lowercase) {
if is_outlook(&host) {
return outlook::hotmail::check_password_recovery(to_email, webdriver)
.await
.map_err(|err| err.into());
}
}

check_smtp_with_retry(to_email, host, port, domain, input, input.retries).await
check_smtp_with_retry(to_email, &host, port, domain, input, input.retries).await
}

#[cfg(test)]
Expand Down

0 comments on commit 59ed4f6

Please sign in to comment.