Skip to content

Commit

Permalink
fix: Improve logging, add retries for Yahoo headless, switch to rustls (
Browse files Browse the repository at this point in the history
#1549)

* first fixes

* Switch some logs

* fix more

* use tracing everywhere

* update deps

* Remove native-tls

* fix bugs

* Add rustls

* Fix tests

* ignoring test
  • Loading branch information
amaury1093 authored Dec 20, 2024
1 parent 0d3faff commit b1377db
Show file tree
Hide file tree
Showing 20 changed files with 1,285 additions and 1,089 deletions.
1,979 changes: 1,038 additions & 941 deletions Cargo.lock

Large diffs are not rendered by default.

16 changes: 11 additions & 5 deletions backend/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,19 @@ lapin = { version = "2.3.1" }
tokio-executor-trait = { version = "2.1.1" }
tokio-reactor-trait = { version = "1.1.0" }
openssl = { version = "0.10.64", features = ["vendored"] }
reqwest = { version = "0.12.5", features = ["json", "socks"] }
sentry = "0.23"
sentry-anyhow = "0.23"
reqwest = { version = "0.12.5", default-features = false, features = [
"json",
"rustls-tls",
] }
sentry = { version = "0.32", default-features = false, features = [
"reqwest",
"rustls",
] }
sentry-anyhow = "0.32"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
sqlx = { version = "0.7", features = [
"runtime-tokio-native-tls",
sqlx = { version = "0.7", default-features = false, features = [
"runtime-tokio-rustls",
"postgres",
"uuid",
"chrono",
Expand Down
10 changes: 5 additions & 5 deletions backend/src/http/v0/bulk/task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,11 +179,11 @@ pub async fn email_verification_task(

debug!(
target: LOG_TARGET,
"Got task result [email={}] for [job={}] and [uuid={}] with [is_reachable={:?}]",
to_email,
task_payload.id,
current_job.id(),
response.is_reachable,
email=to_email,
job_id=task_payload.id,
uuid=?current_job.id(),
is_reachable=?response.is_reachable,
"Task result received"
);

let is_reachable = response.is_reachable == Reachable::Unknown;
Expand Down
1 change: 1 addition & 0 deletions backend/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ const CARGO_PKG_VERSION: &str = env!("CARGO_PKG_VERSION");
async fn main() -> Result<(), anyhow::Error> {
// Initialize logging.
tracing_subscriber::fmt::init();

info!(target: LOG_TARGET, version=?CARGO_PKG_VERSION, "Running Reacher");
let mut config = load_config().await?;
config.connect().await?;
Expand Down
14 changes: 12 additions & 2 deletions backend/src/worker/consume.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,12 +149,22 @@ async fn consume_check_email(config: Arc<BackendConfig>) -> Result<(), anyhow::E
let config_clone2 = Arc::clone(&config_clone);
let channel_clone2 = Arc::clone(&channel);

info!(target: LOG_TARGET, email=payload.input.to_email, job_id=?payload.job_id, "Starting task");
info!(
target: LOG_TARGET,
email=payload.input.to_email,
job_id=?payload.job_id,
"Starting task"
);
tokio::spawn(async move {
if let Err(e) =
do_check_email_work(&payload, delivery, channel_clone2, config_clone2).await
{
error!(target: LOG_TARGET, email=payload.input.to_email, error=?e, "Error processing message");
error!(
target: LOG_TARGET,
email=payload.input.to_email,
error=?e,
"Error processing message"
);
capture_anyhow(&e);
}
});
Expand Down
2 changes: 1 addition & 1 deletion cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ path = "src/main.rs"
anyhow = "1.0"
check-if-email-exists = { path = "../core" }
clap = { version = "3.2", features = ["derive", "env"] }
env_logger = "0.11"
once_cell = "1.19"
openssl = { version = "0.10", features = ["vendored"] }
serde = "1.0"
serde_json = "1.0"
tracing-subscriber = "0.3.18"

[dependencies.tokio]
version = "1.40.0"
Expand Down
2 changes: 1 addition & 1 deletion cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ pub(crate) static CONF: Lazy<Cli> = Lazy::new(Cli::parse);

#[tokio::main]
async fn main() -> Result<(), anyhow::Error> {
env_logger::init();
tracing_subscriber::fmt::init();

let to_email = &CONF.to_email;

Expand Down
17 changes: 13 additions & 4 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,29 @@ chrono = { version = "0.4.31", features = ["serde"] }
config = "0.14"
derive_builder = "0.20"
fast-socks5 = "0.9"
fantoccini = { version = "0.21.2" }
fantoccini = { version = "0.21.2", default-features = false, features = [
"rustls-tls",
] }
futures = { version = "0.3.30" }
hickory-proto = "0.24.0"
hickory-resolver = "0.24.0"
levenshtein = "1.0.5"
lettre = { version = "0.11", features = ["smtp-transport"] }
log = "0.4.22"
mailchecker = "6.0.11"
md5 = "0.7.0"
once_cell = "1.19.0"
pwned = "0.5.0"
rand = { version = "0.8.5", features = ["small_rng"] }
regex = "1.11.1"
reqwest = { version = "0.12.5", features = ["json", "socks"] }
sentry = { version = "0.23", optional = true }
reqwest = { version = "0.12.5", default-features = false, features = [
"json",
"rustls-tls",
] }
rustls = { version = "0.23", features = ["ring"] }
sentry = { version = "0.32", default-features = false, features = [
"reqwest",
"rustls",
], optional = true }
serde = { version = "1.0.214", features = ["derive"] }
serde_json = "1.0.133"
thiserror = "1.0"
Expand Down
65 changes: 40 additions & 25 deletions core/src/haveibeenpwned.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,41 +15,56 @@
// along with this program. If not, see <https://www.gnu.org/licenses/>.

use crate::LOG_TARGET;
use pwned::api::PwnedBuilder;
use reqwest::Client;

const MAIN_API_URL: &str = "https://haveibeenpwned.com/api/v3/";

/// Check if the email has been found in any breach or paste using the
/// HaveIBeenPwned API.
/// This function will return the number of times the email has been found in
/// any breach.
pub async fn check_haveibeenpwned(to_email: &str, api_key: Option<String>) -> Option<bool> {
let pwned = PwnedBuilder::default()
.user_agent("reacher")
.api_key(api_key)
.build()
.unwrap();

match pwned.check_email(to_email).await {
Ok(answer) => {
log::debug!(
target: LOG_TARGET,
"Email found in {} breaches",
answer.len()
);
Some(!answer.is_empty())
let client = Client::new();
let url = format!(
"{}breachedaccount/{}?truncateResponse=false",
MAIN_API_URL, to_email
);

let request = client
.get(&url)
.header("User-Agent", "reacher")
.header("hibp-api-key", api_key.unwrap_or_default())
.send()
.await;

match request {
Ok(response) => {
if response.status().is_success() {
let breaches: Vec<serde_json::Value> = response.json().await.unwrap_or_default();
tracing::debug!(
target: LOG_TARGET,
breach_count=breaches.len(),
"HaveIBeenPwned check completed"
);
Some(!breaches.is_empty())
} else if response.status() == reqwest::StatusCode::NOT_FOUND {
Some(false)
} else {
tracing::error!(
target: LOG_TARGET,
status = %response.status(),
"Error checking HaveIBeenPwned"
);
None
}
}
Err(e) => {
log::error!(
tracing::error!(
target: LOG_TARGET,
"Error while checking if email has been pwned: {}",
e
error=?e,
"Error checking HaveIBeenPwned"
);
match e {
pwned::errors::Error::IoError(e) => match e.kind() {
std::io::ErrorKind::NotFound => Some(false),
_ => None,
},
_ => None,
}
None
}
}
}
60 changes: 38 additions & 22 deletions core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,10 @@ use hickory_proto::rr::rdata::MX;
use misc::{check_misc, MiscDetails};
use mx::check_mx;
use rand::Rng;
use rustls::crypto::ring;
use smtp::{check_smtp, SmtpDetails, SmtpError};
pub use smtp::{is_gmail, is_hotmail, is_hotmail_b2b, is_hotmail_b2c, is_yahoo};
use std::sync::Once;
use std::time::{Duration, SystemTime};
use syntax::{check_syntax, get_similar_mail_provider};
pub use util::input_output::*;
Expand All @@ -88,6 +90,16 @@ use crate::rules::{has_rule, Rule};
/// The target where to log check-if-email-exists logs.
pub const LOG_TARGET: &str = "reacher";

static INIT: Once = Once::new();

/// check-if-email-exists uses rustls for its TLS connections. This function
/// initializes the default crypto provider for rustls.
pub fn initialize_crypto_provider() {
INIT.call_once(|| {
ring::default_provider().install_default().unwrap();
});
}

/// Given an email's misc and smtp details, calculate an estimate of our
/// confidence on how reachable the email is.
///
Expand Down Expand Up @@ -120,14 +132,14 @@ fn calculate_reachable(misc: &MiscDetails, smtp: &Result<SmtpDetails, SmtpError>
/// Returns a `CheckEmailOutput` output, whose `is_reachable` field is one of
/// `Safe`, `Invalid`, `Risky` or `Unknown`.
pub async fn check_email(input: &CheckEmailInput) -> CheckEmailOutput {
initialize_crypto_provider();
let start_time = SystemTime::now();
let to_email = &input.to_email;

log::debug!(
tracing::debug!(
target: LOG_TARGET,
"[email={}] Checking email \"{}\"",
to_email,
to_email
email=%to_email,
"Checking email"
);
let mut my_syntax = check_syntax(to_email.as_ref());
if !my_syntax.is_valid_syntax {
Expand All @@ -139,11 +151,11 @@ pub async fn check_email(input: &CheckEmailInput) -> CheckEmailOutput {
};
}

log::debug!(
tracing::debug!(
target: LOG_TARGET,
"[email={}] Found the following syntax validation: {:?}",
to_email,
my_syntax
email=%to_email,
syntax=?my_syntax,
"Found syntax validation"
);

let my_mx = match check_mx(&my_syntax).await {
Expand Down Expand Up @@ -176,17 +188,19 @@ pub async fn check_email(input: &CheckEmailInput) -> CheckEmailOutput {
};
}

log::debug!(
let mx_hosts: Vec<String> = my_mx
.lookup
.as_ref()
.expect("If lookup is error, we already returned. qed.")
.iter()
.map(|host| host.to_string())
.collect();

tracing::debug!(
target: LOG_TARGET,
"[email={}] Found the following MX hosts: {:?}",
to_email,
my_mx
.lookup
.as_ref()
.expect("If lookup is error, we already returned. qed.")
.iter()
.map(|host| host.to_string())
.collect::<Vec<String>>()
email=%to_email,
mx_hosts=?mx_hosts,
"Found MX hosts"
);

let my_misc = check_misc(
Expand All @@ -195,11 +209,12 @@ pub async fn check_email(input: &CheckEmailInput) -> CheckEmailOutput {
input.haveibeenpwned_api_key.clone(),
)
.await;
log::debug!(

tracing::debug!(
target: LOG_TARGET,
"[email={}] Found the following misc details: {:?}",
to_email,
my_misc
email=%to_email,
misc=?my_misc,
"Found misc details"
);

// From the list of MX records, we only choose one: we don't choose the
Expand Down Expand Up @@ -271,6 +286,7 @@ pub async fn check_email(input: &CheckEmailInput) -> CheckEmailOutput {
},
};

#[cfg(feature = "sentry")]
log_unknown_errors(&output, &input.backend_name);

output
Expand Down
10 changes: 5 additions & 5 deletions core/src/misc/gravatar.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ pub async fn check_gravatar(to_email: &str) -> Option<String> {

let url = format!("{API_BASE_URL}{mail_hash:x}");

log::debug!(
tracing::debug!(
target: LOG_TARGET,
"[email={}] Request Gravatar API with url: {:?}",
to_email,
url
email=to_email,
url=url,
"Request Gravatar API"
);

let response = client
Expand All @@ -41,7 +41,7 @@ pub async fn check_gravatar(to_email: &str) -> Option<String> {
.send()
.await;

log::debug!(
tracing::debug!(
target: LOG_TARGET,
"[email={}] Gravatar response: {:?}",
to_email,
Expand Down
Loading

0 comments on commit b1377db

Please sign in to comment.