Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Improve logging, add retries for Yahoo headless, switch to rustls #1549

Merged
merged 10 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,979 changes: 1,038 additions & 941 deletions Cargo.lock

Large diffs are not rendered by default.

16 changes: 11 additions & 5 deletions backend/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,19 @@ lapin = { version = "2.3.1" }
tokio-executor-trait = { version = "2.1.1" }
tokio-reactor-trait = { version = "1.1.0" }
openssl = { version = "0.10.64", features = ["vendored"] }
reqwest = { version = "0.12.5", features = ["json", "socks"] }
sentry = "0.23"
sentry-anyhow = "0.23"
reqwest = { version = "0.12.5", default-features = false, features = [
"json",
"rustls-tls",
] }
sentry = { version = "0.32", default-features = false, features = [
"reqwest",
"rustls",
] }
sentry-anyhow = "0.32"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
sqlx = { version = "0.7", features = [
"runtime-tokio-native-tls",
sqlx = { version = "0.7", default-features = false, features = [
"runtime-tokio-rustls",
"postgres",
"uuid",
"chrono",
Expand Down
10 changes: 5 additions & 5 deletions backend/src/http/v0/bulk/task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,11 +179,11 @@ pub async fn email_verification_task(

debug!(
target: LOG_TARGET,
"Got task result [email={}] for [job={}] and [uuid={}] with [is_reachable={:?}]",
to_email,
task_payload.id,
current_job.id(),
response.is_reachable,
email=to_email,
job_id=task_payload.id,
uuid=?current_job.id(),
is_reachable=?response.is_reachable,
"Task result received"
);

let is_reachable = response.is_reachable == Reachable::Unknown;
Expand Down
1 change: 1 addition & 0 deletions backend/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const CARGO_PKG_VERSION: &str = env!("CARGO_PKG_VERSION");
async fn main() -> Result<(), anyhow::Error> {
// Initialize logging.
tracing_subscriber::fmt::init();

info!(target: LOG_TARGET, version=?CARGO_PKG_VERSION, "Running Reacher");
let mut config = load_config().await?;
config.connect().await?;
Expand Down
14 changes: 12 additions & 2 deletions backend/src/worker/consume.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,12 +149,22 @@ async fn consume_check_email(config: Arc<BackendConfig>) -> Result<(), anyhow::E
let config_clone2 = Arc::clone(&config_clone);
let channel_clone2 = Arc::clone(&channel);

info!(target: LOG_TARGET, email=payload.input.to_email, job_id=?payload.job_id, "Starting task");
info!(
target: LOG_TARGET,
email=payload.input.to_email,
job_id=?payload.job_id,
"Starting task"
);
tokio::spawn(async move {
if let Err(e) =
do_check_email_work(&payload, delivery, channel_clone2, config_clone2).await
{
error!(target: LOG_TARGET, email=payload.input.to_email, error=?e, "Error processing message");
error!(
target: LOG_TARGET,
email=payload.input.to_email,
error=?e,
"Error processing message"
);
capture_anyhow(&e);
}
});
Expand Down
2 changes: 1 addition & 1 deletion cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ path = "src/main.rs"
anyhow = "1.0"
check-if-email-exists = { path = "../core" }
clap = { version = "3.2", features = ["derive", "env"] }
env_logger = "0.11"
once_cell = "1.19"
openssl = { version = "0.10", features = ["vendored"] }
serde = "1.0"
serde_json = "1.0"
tracing-subscriber = "0.3.18"

[dependencies.tokio]
version = "1.40.0"
Expand Down
2 changes: 1 addition & 1 deletion cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ pub(crate) static CONF: Lazy<Cli> = Lazy::new(Cli::parse);

#[tokio::main]
async fn main() -> Result<(), anyhow::Error> {
env_logger::init();
tracing_subscriber::fmt::init();

let to_email = &CONF.to_email;

Expand Down
17 changes: 13 additions & 4 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,29 @@ chrono = { version = "0.4.31", features = ["serde"] }
config = "0.14"
derive_builder = "0.20"
fast-socks5 = "0.9"
fantoccini = { version = "0.21.2" }
fantoccini = { version = "0.21.2", default-features = false, features = [
"rustls-tls",
] }
futures = { version = "0.3.30" }
hickory-proto = "0.24.0"
hickory-resolver = "0.24.0"
levenshtein = "1.0.5"
lettre = { version = "0.11", features = ["smtp-transport"] }
log = "0.4.22"
mailchecker = "6.0.11"
md5 = "0.7.0"
once_cell = "1.19.0"
pwned = "0.5.0"
rand = { version = "0.8.5", features = ["small_rng"] }
regex = "1.11.1"
reqwest = { version = "0.12.5", features = ["json", "socks"] }
sentry = { version = "0.23", optional = true }
reqwest = { version = "0.12.5", default-features = false, features = [
"json",
"rustls-tls",
] }
rustls = { version = "0.23", features = ["ring"] }
sentry = { version = "0.32", default-features = false, features = [
"reqwest",
"rustls",
], optional = true }
serde = { version = "1.0.214", features = ["derive"] }
serde_json = "1.0.133"
thiserror = "1.0"
Expand Down
65 changes: 40 additions & 25 deletions core/src/haveibeenpwned.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,41 +15,56 @@
// along with this program. If not, see <https://www.gnu.org/licenses/>.

use crate::LOG_TARGET;
use pwned::api::PwnedBuilder;
use reqwest::Client;

const MAIN_API_URL: &str = "https://haveibeenpwned.com/api/v3/";

/// Check if the email has been found in any breach or paste using the
/// HaveIBeenPwned API.
/// This function will return the number of times the email has been found in
/// any breach.
pub async fn check_haveibeenpwned(to_email: &str, api_key: Option<String>) -> Option<bool> {
let pwned = PwnedBuilder::default()
.user_agent("reacher")
.api_key(api_key)
.build()
.unwrap();

match pwned.check_email(to_email).await {
Ok(answer) => {
log::debug!(
target: LOG_TARGET,
"Email found in {} breaches",
answer.len()
);
Some(!answer.is_empty())
let client = Client::new();
let url = format!(
"{}breachedaccount/{}?truncateResponse=false",
MAIN_API_URL, to_email
);

let request = client
.get(&url)
.header("User-Agent", "reacher")
.header("hibp-api-key", api_key.unwrap_or_default())
.send()
.await;

match request {
Ok(response) => {
if response.status().is_success() {
let breaches: Vec<serde_json::Value> = response.json().await.unwrap_or_default();
tracing::debug!(
target: LOG_TARGET,
breach_count=breaches.len(),
"HaveIBeenPwned check completed"
);
Some(!breaches.is_empty())
} else if response.status() == reqwest::StatusCode::NOT_FOUND {
Some(false)
} else {
tracing::error!(
target: LOG_TARGET,
status = %response.status(),
"Error checking HaveIBeenPwned"
);
None
}
}
Err(e) => {
log::error!(
tracing::error!(
target: LOG_TARGET,
"Error while checking if email has been pwned: {}",
e
error=?e,
"Error checking HaveIBeenPwned"
);
match e {
pwned::errors::Error::IoError(e) => match e.kind() {
std::io::ErrorKind::NotFound => Some(false),
_ => None,
},
_ => None,
}
None
}
}
}
60 changes: 38 additions & 22 deletions core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,10 @@ use hickory_proto::rr::rdata::MX;
use misc::{check_misc, MiscDetails};
use mx::check_mx;
use rand::Rng;
use rustls::crypto::ring;
use smtp::{check_smtp, SmtpDetails, SmtpError};
pub use smtp::{is_gmail, is_hotmail, is_hotmail_b2b, is_hotmail_b2c, is_yahoo};
use std::sync::Once;
use std::time::{Duration, SystemTime};
use syntax::{check_syntax, get_similar_mail_provider};
pub use util::input_output::*;
Expand All @@ -88,6 +90,16 @@ use crate::rules::{has_rule, Rule};
/// The target where to log check-if-email-exists logs.
pub const LOG_TARGET: &str = "reacher";

static INIT: Once = Once::new();

/// check-if-email-exists uses rustls for its TLS connections. This function
/// initializes the default crypto provider for rustls.
pub fn initialize_crypto_provider() {
INIT.call_once(|| {
ring::default_provider().install_default().unwrap();
});
}

/// Given an email's misc and smtp details, calculate an estimate of our
/// confidence on how reachable the email is.
///
Expand Down Expand Up @@ -120,14 +132,14 @@ fn calculate_reachable(misc: &MiscDetails, smtp: &Result<SmtpDetails, SmtpError>
/// Returns a `CheckEmailOutput` output, whose `is_reachable` field is one of
/// `Safe`, `Invalid`, `Risky` or `Unknown`.
pub async fn check_email(input: &CheckEmailInput) -> CheckEmailOutput {
initialize_crypto_provider();
let start_time = SystemTime::now();
let to_email = &input.to_email;

log::debug!(
tracing::debug!(
target: LOG_TARGET,
"[email={}] Checking email \"{}\"",
to_email,
to_email
email=%to_email,
"Checking email"
);
let mut my_syntax = check_syntax(to_email.as_ref());
if !my_syntax.is_valid_syntax {
Expand All @@ -139,11 +151,11 @@ pub async fn check_email(input: &CheckEmailInput) -> CheckEmailOutput {
};
}

log::debug!(
tracing::debug!(
target: LOG_TARGET,
"[email={}] Found the following syntax validation: {:?}",
to_email,
my_syntax
email=%to_email,
syntax=?my_syntax,
"Found syntax validation"
);

let my_mx = match check_mx(&my_syntax).await {
Expand Down Expand Up @@ -176,17 +188,19 @@ pub async fn check_email(input: &CheckEmailInput) -> CheckEmailOutput {
};
}

log::debug!(
let mx_hosts: Vec<String> = my_mx
.lookup
.as_ref()
.expect("If lookup is error, we already returned. qed.")
.iter()
.map(|host| host.to_string())
.collect();

tracing::debug!(
target: LOG_TARGET,
"[email={}] Found the following MX hosts: {:?}",
to_email,
my_mx
.lookup
.as_ref()
.expect("If lookup is error, we already returned. qed.")
.iter()
.map(|host| host.to_string())
.collect::<Vec<String>>()
email=%to_email,
mx_hosts=?mx_hosts,
"Found MX hosts"
);

let my_misc = check_misc(
Expand All @@ -195,11 +209,12 @@ pub async fn check_email(input: &CheckEmailInput) -> CheckEmailOutput {
input.haveibeenpwned_api_key.clone(),
)
.await;
log::debug!(

tracing::debug!(
target: LOG_TARGET,
"[email={}] Found the following misc details: {:?}",
to_email,
my_misc
email=%to_email,
misc=?my_misc,
"Found misc details"
);

// From the list of MX records, we only choose one: we don't choose the
Expand Down Expand Up @@ -271,6 +286,7 @@ pub async fn check_email(input: &CheckEmailInput) -> CheckEmailOutput {
},
};

#[cfg(feature = "sentry")]
log_unknown_errors(&output, &input.backend_name);

output
Expand Down
10 changes: 5 additions & 5 deletions core/src/misc/gravatar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ pub async fn check_gravatar(to_email: &str) -> Option<String> {

let url = format!("{API_BASE_URL}{mail_hash:x}");

log::debug!(
tracing::debug!(
target: LOG_TARGET,
"[email={}] Request Gravatar API with url: {:?}",
to_email,
url
email=to_email,
url=url,
"Request Gravatar API"
);

let response = client
Expand All @@ -41,7 +41,7 @@ pub async fn check_gravatar(to_email: &str) -> Option<String> {
.send()
.await;

log::debug!(
tracing::debug!(
target: LOG_TARGET,
"[email={}] Gravatar response: {:?}",
to_email,
Expand Down
Loading
Loading