From 228d8203bda72844e3dd0568cf238a68a91bd9a7 Mon Sep 17 00:00:00 2001 From: Amaury <1293565+amaury1093@users.noreply.github.com> Date: Mon, 25 Nov 2024 11:00:46 +0100 Subject: [PATCH 1/7] feat: Remove ReacherConfig, use CheckEmailInput --- .github/workflows/deploy_cli.yml | 1 + backend/Dockerfile | 2 +- backend/backend_config.toml | 11 +++++++ backend/scripts/debian11.sh | 30 +++++++++---------- backend/src/config.rs | 19 ++++-------- backend/src/http/v0/bulk/task.rs | 38 +++++++++++------------ backend/src/http/v0/check_email/post.rs | 9 +++--- backend/src/main.rs | 2 +- backend/src/worker/check_email.rs | 9 ++---- core/src/config.rs | 40 ------------------------- core/src/lib.rs | 15 ++-------- core/src/misc/mod.rs | 9 +++--- core/src/smtp/error.rs | 9 +++++- core/src/smtp/headless.rs | 8 +++-- core/src/smtp/mod.rs | 23 +++++--------- core/src/smtp/outlook/microsoft365.rs | 11 +++---- core/src/util/input_output.rs | 21 +++++++++++-- core/src/util/sentry.rs | 38 ++++++++++------------- 18 files changed, 127 insertions(+), 168 deletions(-) delete mode 100644 core/src/config.rs diff --git a/.github/workflows/deploy_cli.yml b/.github/workflows/deploy_cli.yml index 151f4e353..23de83e05 100644 --- a/.github/workflows/deploy_cli.yml +++ b/.github/workflows/deploy_cli.yml @@ -37,6 +37,7 @@ jobs: windows: runs-on: windows-latest + if: false # Disable Windows tests for now, because Reacher Worker is not supported on Windows needs: install-cross steps: - uses: actions/checkout@v2 diff --git a/backend/Dockerfile b/backend/Dockerfile index f142de342..e5cb8ddb0 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -41,7 +41,7 @@ RUN chown chrome:chrome docker.sh USER chrome ENV RUST_LOG=reacher=info -ENV RCH_HTTP_HOST=0.0.0.0 +ENV RCH__HTTP_HOST=0.0.0.0 EXPOSE 8080 diff --git a/backend/backend_config.toml b/backend/backend_config.toml index 214e57911..66ed9cd23 100644 --- a/backend/backend_config.toml +++ b/backend/backend_config.toml @@ -24,12 +24,17 @@ from_email = "reacher@gmail.com" # Address of the Chrome WebDriver server for headless email verifications. webdriver_addr = "http://localhost:9515" +# Timeout for each SMTP connection, in seconds. Leaving it commented out will +# not set a timeout, i.e. the connection will wait indefinitely. +# smtp_timeout = 45 + # Uncomment the following lines to route all SMTP verification requests through # a specified proxy. Note that the proxy must be a SOCKS5 proxy to work with # the SMTP protocol. This proxy will not be used for headless verifications. # # The username and password are optional and only needed if the proxy requires # authentication. +# # [proxy] # host = "my.proxy.com" # port = 1080 @@ -102,3 +107,9 @@ db_url = "postgresql://localhost/reacherdb" # per email verification, with the result in the body. # [worker.webhook.on_each_email] # url = "http://localhost:8080/webhook" + +# Optional Sentry configuration. If set, all errors will be sent to Sentry. +# [sentry] +# dsn = "" +# Identifier sent to Sentry, usually the same as the the top-level backend_name. +# backend_name = "backend-dev" diff --git a/backend/scripts/debian11.sh b/backend/scripts/debian11.sh index 9d7389f73..197e7febf 100644 --- a/backend/scripts/debian11.sh +++ b/backend/scripts/debian11.sh @@ -9,27 +9,27 @@ set -e # You can change the default values of these variables inline here, or by # setting them in the environment before running this script, e.g.: -# RCH_BACKEND_NAME="my-own-name" ./debian11.sh +# RCH__BACKEND_NAME="my-own-name" ./debian11.sh # An unique identifier for the backend. -RCH_BACKEND_NAME=${RCH_BACKEND_NAME:-"backend1.mycompany.com"} +RCH__BACKEND_NAME=${RCH__BACKEND_NAME:-"backend1.mycompany.com"} # Docker Hub tag for reacherhq/backend. -RCH_VERSION=${RCH_VERSION:-"v0.7.0"} +RCH_VERSION=${RCH_VERSION:-"v0.10.0-beta.1"} # Optional: Send bug reports to a Sentry.io dashboard. -RCH_SENTRY_DSN=${RCH_SENTRY_DSN:-} +RCH__SENTRY_DSN=${RCH__SENTRY_DSN:-} # Protect the backend from the public via a `x-reacher-secret` header. -RCH_HEADER_SECRET=${RCH_HEADER_SECRET:-} +RCH__HEADER_SECRET=${RCH__HEADER_SECRET:-} # For the "FROM" field in emails. -RCH_FROM_EMAIL=${RCH_FROM_EMAIL:-"hello@mycompany.com"} +RCH__FROM_EMAIL=${RCH__FROM_EMAIL:-"hello@mycompany.com"} # For the "EHLO" field in emails. This should ideally match the server's # reverse DNS entry for optimal results. -RCH_HELLO_NAME=${RCH_HELLO_NAME:-"backend1.mycompany.com"} +RCH__HELLO_NAME=${RCH__HELLO_NAME:-"backend1.mycompany.com"} # Timeout for SMTP connections in seconds. -RCH_SMTP_TIMEOUT=${RCH_SMTP_TIMEOUT:-"90"} +RCH__SMTP_TIMEOUT=${RCH__SMTP_TIMEOUT:-"90"} # Logging. Setup to "debug" to show all logs. RUST_LOG=${RUST_LOG:-"info"} -echo "Installing Reacher backend $RCH_VERSION on host $RCH_BACKEND_NAME..." +echo "Installing Reacher backend $RCH_VERSION on host $RCH__BACKEND_NAME..." # Install Docker # https://docs.docker.com/engine/install/debian/ @@ -64,12 +64,12 @@ docker rm reacher_backend # Run the backend docker run -d \ -e RUST_LOG=$RUST_LOG \ - -e RCH_BACKEND_NAME=$RCH_BACKEND_NAME \ - -e RCH_SENTRY_DSN=$RCH_SENTRY_DSN \ - -e RCH_HEADER_SECRET=$RCH_HEADER_SECRET \ - -e RCH_FROM_EMAIL=$RCH_FROM_EMAIL \ - -e RCH_HELLO_NAME=$RCH_HELLO_NAME \ - -e RCH_SMTP_TIMEOUT=$RCH_SMTP_TIMEOUT \ + -e RCH__BACKEND_NAME=$RCH__BACKEND_NAME \ + -e RCH__SENTRY_DSN=$RCH__SENTRY_DSN \ + -e RCH__HEADER_SECRET=$RCH__HEADER_SECRET \ + -e RCH__FROM_EMAIL=$RCH__FROM_EMAIL \ + -e RCH__HELLO_NAME=$RCH__HELLO_NAME \ + -e RCH__SMTP_TIMEOUT=$RCH__SMTP_TIMEOUT \ -p 80:8080 \ --name reacher_backend \ reacherhq/backend:$RCH_VERSION diff --git a/backend/src/config.rs b/backend/src/config.rs index 30d4537f5..12900ce25 100644 --- a/backend/src/config.rs +++ b/backend/src/config.rs @@ -20,10 +20,9 @@ use crate::worker::check_email::TaskWebhook; #[cfg(feature = "worker")] use crate::worker::setup_rabbit_mq; use anyhow::bail; -use check_if_email_exists::config::ReacherConfig; use check_if_email_exists::{ CheckEmailInputProxy, GmailVerifMethod, HotmailB2BVerifMethod, HotmailB2CVerifMethod, - SentryConfig, YahooVerifMethod, + YahooVerifMethod, }; use config::Config; #[cfg(feature = "worker")] @@ -56,13 +55,15 @@ pub struct BackendConfig { pub http_port: u16, /// Shared secret between a trusted client and the backend. pub header_secret: Option, + /// Timeout for each SMTP connection, in seconds. Leaving it commented out + /// will not set a timeout, i.e. the connection will wait indefinitely. + pub smtp_timeout: Option, + /// Sentry DSN to report errors to + pub sentry_dsn: Option, /// Worker configuration, only present if the backend is a worker. pub worker: WorkerConfig, - /// Sentry configuration to report errors. - pub sentry: Option, - // Internal fields, not part of the configuration. #[serde(skip)] pg_pool: Option, @@ -75,14 +76,6 @@ pub struct BackendConfig { } impl BackendConfig { - pub fn get_reacher_config(&self) -> ReacherConfig { - ReacherConfig { - backend_name: self.backend_name.clone(), - sentry: self.sentry.clone(), - webdriver_addr: self.webdriver_addr.clone(), - } - } - /// Get the worker configuration. /// /// # Panics diff --git a/backend/src/http/v0/bulk/task.rs b/backend/src/http/v0/bulk/task.rs index f443019e0..83b1e97f6 100644 --- a/backend/src/http/v0/bulk/task.rs +++ b/backend/src/http/v0/bulk/task.rs @@ -17,8 +17,8 @@ //! This file implements the `POST /bulk` endpoint. use check_if_email_exists::{ - check_email, config::ReacherConfig, CheckEmailInput, CheckEmailInputBuilder, - CheckEmailInputProxy, CheckEmailOutput, Reachable, SentryConfig, LOG_TARGET, + check_email, CheckEmailInput, CheckEmailInputBuilder, CheckEmailInputProxy, CheckEmailOutput, + Reachable, LOG_TARGET, }; use serde::{Deserialize, Serialize}; use sqlx::{Pool, Postgres}; @@ -64,7 +64,7 @@ impl Iterator for TaskInputIterator { fn next(&mut self) -> Option { if self.index < self.body.smtp_ports.len() { let mut item = CheckEmailInputBuilder::default(); - let mut item = item.to_email(self.body.to_email.clone()); + let mut item: &mut CheckEmailInputBuilder = item.to_email(self.body.to_email.clone()); if let Some(name) = &self.body.hello_name { item = item.hello_name(name.clone()); @@ -80,6 +80,19 @@ impl Iterator for TaskInputIterator { item = item.proxy(Some(proxy.clone())); } + // Currently, for the legacy and deprecated /v0/bulk endpoints, we + // don't pass in a BackendConfig to the job. Therefore, we must create + // an ad-hoc ReacherConfig here, using the legacy env::var() method. + // This is a temporary solution until the /v0/bulk endpoints are + // removed. + let backend_name = env::var("RCH_BACKEND_NAME").unwrap_or_else(|_| "reacher".into()); + let sentry_dsn = env::var("RCH_SENTRY_DSN"); + let webdriver_addr = + env::var("RCH_WEBDRIVER_ADDR").unwrap_or_else(|_| "http://localhost:9515".into()); + item.backend_name(backend_name); + item.sentry_dsn(sentry_dsn.ok()); + item.webdriver_addr(webdriver_addr); + self.index += 1; Some(item.build().unwrap()) } else { @@ -161,25 +174,8 @@ pub async fn email_verification_task( current_job.id(), ); - // Currently, for the legacy and deprecated /v0/bulk endpoints, we - // don't pass in a BackendConfig to the job. Therefore, we must create - // an ad-hoc ReacherConfig here, using the legacy env::var() method. - // This is a temporary solution until the /v0/bulk endpoints are - // removed. - let backend_name = env::var("RCH_BACKEND_NAME").unwrap_or_else(|_| "reacher".into()); - let sentry_dsn = env::var("RCH_SENTRY_DSN"); - let webdriver_addr = - env::var("RCH_WEBDRIVER_ADDR").unwrap_or_else(|_| "localhost:9515".into()); - let config = ReacherConfig { - backend_name: backend_name.clone(), - webdriver_addr, - sentry: sentry_dsn - .ok() - .map(|dsn| SentryConfig { dsn, backend_name }), - }; - let to_email = check_email_input.to_email.clone(); - let response = check_email(&check_email_input, &config).await; + let response = check_email(&check_email_input).await; debug!( target: LOG_TARGET, diff --git a/backend/src/http/v0/check_email/post.rs b/backend/src/http/v0/check_email/post.rs index 29a4e68ee..8189ccfdf 100644 --- a/backend/src/http/v0/check_email/post.rs +++ b/backend/src/http/v0/check_email/post.rs @@ -21,6 +21,7 @@ use check_if_email_exists::{ }; use serde::{Deserialize, Serialize}; use std::sync::Arc; +use std::time::Duration; use warp::{http, Filter}; use crate::config::BackendConfig; @@ -54,6 +55,8 @@ impl CheckEmailRequest { .as_ref() .or_else(|| config.proxy.as_ref()) .cloned(), + smtp_timeout: config.smtp_timeout.map(Duration::from_secs), + sentry_dsn: config.sentry_dsn.clone(), ..Default::default() } } @@ -73,11 +76,7 @@ async fn http_handler( } else { // Run the future to check an email. Ok(warp::reply::json( - &check_email( - &body.to_check_email_input(Arc::clone(&config)), - &config.get_reacher_config(), - ) - .await, + &check_email(&body.to_check_email_input(Arc::clone(&config))).await, )) } } diff --git a/backend/src/main.rs b/backend/src/main.rs index f444ba590..27d2add8e 100644 --- a/backend/src/main.rs +++ b/backend/src/main.rs @@ -38,7 +38,7 @@ async fn main() -> Result<(), anyhow::Error> { // Setup sentry bug tracking. let _guard: sentry::ClientInitGuard; - if let Some(sentry_config) = &config.sentry { + if let Some(sentry_config) = &config.sentry_dsn { _guard = setup_sentry(sentry_config); } diff --git a/backend/src/worker/check_email.rs b/backend/src/worker/check_email.rs index fd83c7a7a..40c2be4ab 100644 --- a/backend/src/worker/check_email.rs +++ b/backend/src/worker/check_email.rs @@ -117,7 +117,7 @@ pub(crate) async fn do_check_email_work( channel: Arc, config: Arc, ) -> Result<(), anyhow::Error> { - let worker_output = inner_check_email(payload, Arc::clone(&config)).await; + let worker_output = inner_check_email(payload).await; match (&worker_output, delivery.redelivered) { (Ok(output), false) if output.is_reachable == Reachable::Unknown => { @@ -166,11 +166,8 @@ pub(crate) async fn do_check_email_work( Ok(()) } -async fn inner_check_email( - payload: &CheckEmailTask, - config: Arc, -) -> Result { - let output = check_email(&payload.input, &config.get_reacher_config()).await; +async fn inner_check_email(payload: &CheckEmailTask) -> Result { + let output = check_email(&payload.input).await; // Check if we have a webhook to send the output to. if let Some(TaskWebhook { diff --git a/core/src/config.rs b/core/src/config.rs deleted file mode 100644 index d2ef55a1e..000000000 --- a/core/src/config.rs +++ /dev/null @@ -1,40 +0,0 @@ -// check-if-email-exists -// Copyright (C) 2018-2023 Reacher - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published -// by the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. - -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -#[cfg(feature = "sentry")] -use crate::util::sentry::SentryConfig; - -/// Configuration needed to run Reacher. -#[derive(Debug)] -pub struct ReacherConfig { - /// Identifier for the service currently running Reacher. - pub backend_name: String, - /// The address of the WebDriver server. - pub webdriver_addr: String, - #[cfg(feature = "sentry")] - pub sentry: Option, -} - -impl Default for ReacherConfig { - fn default() -> Self { - ReacherConfig { - backend_name: "backend-dev".into(), - webdriver_addr: "http://localhost:9515".into(), - #[cfg(feature = "sentry")] - sentry: None, - } - } -} diff --git a/core/src/lib.rs b/core/src/lib.rs index 9a55076db..0cb0415f5 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -55,14 +55,8 @@ //! .build() //! .unwrap(); //! -//! // We also need to set some configuration parameters. -//! let config = ReacherConfig { -//! backend_name: "my-backend".into(), -//! ..Default::default() -//! }; -//! //! // Verify this input, using async/await syntax. -//! let result = check_email(&input, &config).await; +//! let result = check_email(&input).await; //! //! // `result` is a `Vec`, where the CheckEmailOutput //! // struct contains all information about one email. @@ -70,7 +64,6 @@ //! } //! ``` -pub mod config; mod haveibeenpwned; pub mod misc; pub mod mx; @@ -79,7 +72,6 @@ pub mod smtp; pub mod syntax; mod util; -use config::ReacherConfig; use hickory_proto::rr::rdata::MX; use misc::{check_misc, MiscDetails}; use mx::check_mx; @@ -128,7 +120,7 @@ fn calculate_reachable(misc: &MiscDetails, smtp: &Result /// /// Returns a `CheckEmailOutput` output, whose `is_reachable` field is one of /// `Safe`, `Invalid`, `Risky` or `Unknown`. -pub async fn check_email(input: &CheckEmailInput, config: &ReacherConfig) -> CheckEmailOutput { +pub async fn check_email(input: &CheckEmailInput) -> CheckEmailOutput { let start_time = SystemTime::now(); let to_email = &input.to_email; @@ -253,7 +245,6 @@ pub async fn check_email(input: &CheckEmailInput, config: &ReacherConfig) -> Che input.smtp_port, my_syntax.domain.as_ref(), input, - config, ) .await; @@ -277,7 +268,7 @@ pub async fn check_email(input: &CheckEmailInput, config: &ReacherConfig) -> Che .duration_since(start_time) .unwrap_or(Duration::from_secs(0)), smtp: smtp_debug, - backend_name: config.backend_name.clone(), + backend_name: input.backend_name.clone(), }, } } diff --git a/core/src/misc/mod.rs b/core/src/misc/mod.rs index 1ada3b011..9991465ca 100644 --- a/core/src/misc/mod.rs +++ b/core/src/misc/mod.rs @@ -16,12 +16,11 @@ mod gravatar; use crate::haveibeenpwned::check_haveibeenpwned; - -use serde::{Deserialize, Serialize}; -use std::default::Default; - use crate::syntax::SyntaxDetails; use gravatar::check_gravatar; +use serde::{Deserialize, Serialize}; +use std::default::Default; +use thiserror::Error; const ROLE_ACCOUNTS: &str = include_str!("./roles.json"); @@ -41,7 +40,7 @@ pub struct MiscDetails { /// Error occured connecting to this email server via SMTP. Right now this /// enum has no variant, as `check_misc` cannot fail. But putting a placeholder /// right now to avoid future breaking changes. -#[derive(Debug, Serialize)] +#[derive(Debug, Error, Serialize)] #[serde(tag = "type", content = "message")] pub enum MiscError {} diff --git a/core/src/smtp/error.rs b/core/src/smtp/error.rs index a41ed55ff..27e1bab63 100644 --- a/core/src/smtp/error.rs +++ b/core/src/smtp/error.rs @@ -24,24 +24,31 @@ use crate::util::ser_with_display::ser_with_display; use async_smtp::smtp::error::Error as AsyncSmtpError; use fast_socks5::SocksError; use serde::Serialize; +use thiserror::Error; /// Error occured connecting to this email server via SMTP. -#[derive(Debug, Serialize)] +#[derive(Debug, Error, Serialize)] #[serde(tag = "type", content = "message")] pub enum SmtpError { /// Error if we're using a SOCKS5 proxy. #[serde(serialize_with = "ser_with_display")] + #[error("SOCKS5 error: {0}")] SocksError(SocksError), /// Error when communicating with SMTP server. #[serde(serialize_with = "ser_with_display")] + #[error("SMTP error: {0}")] SmtpError(AsyncSmtpError), /// Error when verifying a Yahoo email via HTTP requests. + #[error("Yahoo error: {0}")] YahooError(YahooError), /// Error when verifying a Gmail email via a HTTP request. + #[error("Gmail error: {0}")] GmailError(GmailError), /// Error when verifying a Hotmail email via headless browser. + #[error("Headless verification error: {0}")] HeadlessError(HeadlessError), /// Error when verifying a Microsoft 365 email via HTTP request. + #[error("Microsoft 365 API error: {0}")] Microsoft365Error(Microsoft365Error), } diff --git a/core/src/smtp/headless.rs b/core/src/smtp/headless.rs index 72ef4256a..52a3df9d7 100644 --- a/core/src/smtp/headless.rs +++ b/core/src/smtp/headless.rs @@ -14,20 +14,22 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . +use crate::util::ser_with_display::ser_with_display; use fantoccini::{ error::{CmdError, NewSessionError}, Client, ClientBuilder, }; use serde::Serialize; use serde_json::Map; +use thiserror::Error; -use crate::util::ser_with_display::ser_with_display; - -#[derive(Debug, Serialize)] +#[derive(Debug, Error, Serialize)] pub enum HeadlessError { #[serde(serialize_with = "ser_with_display")] + #[error("Cmd error: {0}")] Cmd(CmdError), #[serde(serialize_with = "ser_with_display")] + #[error("New session error: {0}")] NewSession(NewSessionError), } diff --git a/core/src/smtp/mod.rs b/core/src/smtp/mod.rs index c6d7c49d3..14c208ec9 100644 --- a/core/src/smtp/mod.rs +++ b/core/src/smtp/mod.rs @@ -31,8 +31,7 @@ use hickory_proto::rr::Name; use serde::{Deserialize, Serialize}; use crate::{ - config::ReacherConfig, util::input_output::CheckEmailInput, GmailVerifMethod, - HotmailB2CVerifMethod, YahooVerifMethod, + util::input_output::CheckEmailInput, GmailVerifMethod, HotmailB2CVerifMethod, YahooVerifMethod, }; use connect::check_smtp_with_retry; pub use error::*; @@ -96,7 +95,6 @@ pub async fn check_smtp( port: u16, domain: &str, input: &CheckEmailInput, - config: &ReacherConfig, ) -> (Result, SmtpDebug) { let host_str = host.to_string(); let to_email_str = to_email.to_string(); @@ -104,7 +102,7 @@ pub async fn check_smtp( if is_hotmail_b2c(&host_str) { if let HotmailB2CVerifMethod::Headless = &input.hotmailb2c_verif_method { return ( - outlook::headless::check_password_recovery(&to_email_str, &config.webdriver_addr) + outlook::headless::check_password_recovery(&to_email_str, &input.webdriver_addr) .await .map_err(Into::into), SmtpDebug { @@ -137,7 +135,7 @@ pub async fn check_smtp( } YahooVerifMethod::Headless => { return ( - yahoo::check_headless(&to_email_str, &config.webdriver_addr) + yahoo::check_headless(&to_email_str, &input.webdriver_addr) .await .map_err(Into::into), SmtpDebug { @@ -164,7 +162,7 @@ pub async fn check_smtp( #[cfg(test)] mod tests { use super::{check_smtp, SmtpConnection, SmtpError}; - use crate::{config::ReacherConfig, CheckEmailInputBuilder}; + use crate::CheckEmailInputBuilder; use async_smtp::{smtp::error::Error, EmailAddress}; use hickory_proto::rr::Name; use std::{str::FromStr, time::Duration}; @@ -182,16 +180,9 @@ mod tests { .smtp_timeout(Some(Duration::from_millis(1))) .build() .unwrap(); - let config = ReacherConfig::default(); - - let (res, smtp_debug) = runtime.block_on(check_smtp( - &to_email, - &host, - 25, - "gmail.com", - &input, - &config, - )); + + let (res, smtp_debug) = + runtime.block_on(check_smtp(&to_email, &host, 25, "gmail.com", &input)); assert_eq!( smtp_debug.verif_method, super::VerifMethod::Smtp(SmtpConnection { diff --git a/core/src/smtp/outlook/microsoft365.rs b/core/src/smtp/outlook/microsoft365.rs index 2948fee43..0e8d942b2 100644 --- a/core/src/smtp/outlook/microsoft365.rs +++ b/core/src/smtp/outlook/microsoft365.rs @@ -14,19 +14,20 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -use async_smtp::EmailAddress; -use reqwest::Error as ReqwestError; -use serde::Serialize; - use crate::{ smtp::{http_api::create_client, SmtpDetails}, util::ser_with_display::ser_with_display, CheckEmailInput, LOG_TARGET, }; +use async_smtp::EmailAddress; +use reqwest::Error as ReqwestError; +use serde::Serialize; +use thiserror::Error; -#[derive(Debug, Serialize)] +#[derive(Debug, Error, Serialize)] pub enum Microsoft365Error { #[serde(serialize_with = "ser_with_display")] + #[error("Reqwest error: {0}")] ReqwestError(ReqwestError), } diff --git a/core/src/util/input_output.rs b/core/src/util/input_output.rs index 8a143b723..0eaa145b4 100644 --- a/core/src/util/input_output.rs +++ b/core/src/util/input_output.rs @@ -197,7 +197,7 @@ pub struct CheckEmailInput { /// Add timeout for the SMTP verification step. Set to None if you don't /// want to use a timeout. /// - /// Defaults to 30s. + /// Defaults to None. pub smtp_timeout: Option, /// Select how to verify Yahoo emails. /// @@ -232,6 +232,20 @@ pub struct CheckEmailInput { /// /// Defaults to Opportunistic. pub smtp_security: SmtpSecurity, + /// The WebDriver address to use for headless verifications. + /// + /// Defaults to http://localhost:9515. + pub webdriver_addr: String, + /// Identifier for the service currently running Reacher. We recommend + /// setting this to an unique identifier of the server where Reacher is + /// installed on. + /// + /// Defaults to "backend-dev". + pub backend_name: String, + /// Sentry DSN to send errors to Sentry. + /// + /// Defaults to None. + pub sentry_dsn: Option, } impl Default for CheckEmailInput { @@ -243,7 +257,7 @@ impl Default for CheckEmailInput { proxy: None, smtp_port: 25, smtp_security: SmtpSecurity::default(), - smtp_timeout: Some(Duration::from_secs(30)), + smtp_timeout: None, yahoo_verif_method: YahooVerifMethod::default(), gmail_verif_method: GmailVerifMethod::default(), hotmailb2b_verif_method: HotmailB2BVerifMethod::default(), @@ -251,6 +265,9 @@ impl Default for CheckEmailInput { check_gravatar: false, haveibeenpwned_api_key: None, retries: 1, + webdriver_addr: "http://localhost:9515".into(), + backend_name: "backend-dev".into(), + sentry_dsn: None, } } } diff --git a/core/src/util/sentry.rs b/core/src/util/sentry.rs index 6bc184dd8..391cf2761 100644 --- a/core/src/util/sentry.rs +++ b/core/src/util/sentry.rs @@ -20,29 +20,22 @@ //! `check-if-email-exists` are known errors, in which case we don't log them //! to Sentry. -use async_smtp::smtp::error::Error as AsyncSmtpError; -use sentry::protocol::{Event, Exception, Level, Values}; -use serde::Deserialize; -use tracing::{debug, info}; - use crate::misc::MiscError; use crate::mx::MxError; use crate::LOG_TARGET; use crate::{smtp::SmtpError, CheckEmailOutput}; +use async_smtp::smtp::error::Error as AsyncSmtpError; +use sentry::protocol::{Event, Exception, Level, Values}; +use thiserror::Error; +use tracing::{debug, info}; const CARGO_PKG_VERSION: &str = env!("CARGO_PKG_VERSION"); -#[derive(Clone, Debug, Default, Deserialize)] -pub struct SentryConfig { - pub dsn: String, - pub backend_name: String, -} - /// Setup Sentry. -pub fn setup_sentry(config: &SentryConfig) -> sentry::ClientInitGuard { +pub fn setup_sentry(sentry_dsn: &str) -> sentry::ClientInitGuard { // Use an empty string if we don't have any env variable for sentry. Sentry // will just silently ignore. - let sentry = sentry::init(config.dsn.clone()); + let sentry = sentry::init(sentry_dsn); if sentry.is_enabled() { info!(target: LOG_TARGET, "Sentry is successfully set up.") } @@ -50,12 +43,13 @@ pub fn setup_sentry(config: &SentryConfig) -> sentry::ClientInitGuard { sentry } -#[derive(Debug)] +#[derive(Debug, Error)] enum SentryError<'a> { - // TODO: Probably a good idea would be to `impl std:error:Error` for the - // three errors below. + #[error("MiscError: {0}")] Misc(&'a MiscError), + #[error("MxError: {0}")] Mx(&'a MxError), + #[error("SmtpError: {0}")] Smtp(&'a SmtpError), } @@ -72,7 +66,7 @@ impl<'a> SentryError<'a> { /// Helper function to send an Error event to Sentry. We redact all sensitive /// info before sending to Sentry, by removing all instances of `username`. -fn error(err: SentryError, result: &CheckEmailOutput, config: &SentryConfig) { +fn error(err: SentryError, result: &CheckEmailOutput, backend_name: &str) { let exception_value = redact(format!("{err:?}").as_str(), &result.syntax.username); debug!(target: LOG_TARGET, "Sending error to Sentry: {}", exception_value); @@ -90,7 +84,7 @@ fn error(err: SentryError, result: &CheckEmailOutput, config: &SentryConfig) { environment: Some("production".into()), release: Some(CARGO_PKG_VERSION.into()), message: Some(format!("{result:#?}")), - server_name: Some(config.backend_name.clone().into()), + server_name: Some(backend_name.to_string().into()), transaction: Some(format!("check_email:{}", result.syntax.domain)), ..Default::default() }); @@ -114,15 +108,15 @@ fn skip_smtp_transient_errors(message: &[String]) -> bool { /// Checks if the output from `check-if-email-exists` has a known error, in /// which case we don't log to Sentry to avoid spamming it. -pub fn log_unknown_errors(result: &CheckEmailOutput, config: &SentryConfig) { +pub fn log_unknown_errors(result: &CheckEmailOutput, backend_name: &str) { match (&result.misc, &result.mx, &result.smtp) { (Err(err), _, _) => { // We log all misc errors. - error(SentryError::Misc(err), result, config); + error(SentryError::Misc(err), result, backend_name); } (_, Err(err), _) => { // We log all mx errors. - error(SentryError::Mx(err), result, config); + error(SentryError::Mx(err), result, backend_name); } (_, _, Err(err)) if err.get_description().is_some() => { // If the SMTP error is known, we don't track it in Sentry. @@ -145,7 +139,7 @@ pub fn log_unknown_errors(result: &CheckEmailOutput, config: &SentryConfig) { // Sentry, to be able to debug them better. We don't want to // spam Sentry and log all instances of the error, hence the // `count` check. - error(SentryError::Smtp(err), result, config); + error(SentryError::Smtp(err), result, backend_name); } // If everything is ok, we just return the result. (Ok(_), Ok(_), Ok(_)) => {} From 3bb5052791fe3fa8c03484d4aaa4001b03c2f5b9 Mon Sep 17 00:00:00 2001 From: Amaury <1293565+amaury1093@users.noreply.github.com> Date: Mon, 25 Nov 2024 21:37:22 +0100 Subject: [PATCH 2/7] Fixed --- backend/Dockerfile | 3 +++ backend/backend_config.toml | 37 ++++++++++++--------------- backend/src/config.rs | 18 ++++++------- backend/src/main.rs | 8 +++--- core/src/smtp/outlook/microsoft365.rs | 2 ++ docker-compose.yaml | 4 +-- 6 files changed, 36 insertions(+), 36 deletions(-) diff --git a/backend/Dockerfile b/backend/Dockerfile index e5cb8ddb0..7da16a42d 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -42,6 +42,9 @@ USER chrome ENV RUST_LOG=reacher=info ENV RCH__HTTP_HOST=0.0.0.0 +# Currently this Dockerfile is mainly used for single-shot verifications, so we +# disable the worker by default. +ENV RCH__WORKER__ENABLED=false EXPOSE 8080 diff --git a/backend/backend_config.toml b/backend/backend_config.toml index 66ed9cd23..a836bb69f 100644 --- a/backend/backend_config.toml +++ b/backend/backend_config.toml @@ -57,22 +57,9 @@ hotmailb2c = "headless" yahoo = "headless" [worker] -enable = false - -# Fields below are only used if the worker is enabled. - -# Throttle the maximum number of requests per second, per minute, per hour, and -# per day for this worker. -# All fields are optional; comment them out to disable the limit. -# -# Important: these throttle configurations only apply to bulk verification and -# not to the single /v0/check_email endpoint. The latter endpoint always -# executes the verification immediately, regardless of the throttle settings. -[worker.throttle] -# max_requests_per_second = 20 -# max_requests_per_minute = 100 -# max_requests_per_hour = 1000 -# max_requests_per_day = 20000 +# Enable the worker to consume emails from the RabbitMQ queues. If set, the +# RabbitMQ configuration below must be set as well. +enable = true # RabbitMQ configuration. [worker.rabbitmq] @@ -97,17 +84,25 @@ queues = "all" # Number of concurrent emails to verify for this worker across all queues. concurrency = 20 +# Throttle the maximum number of requests per second, per minute, per hour, and +# per day for this worker. +# All fields are optional; comment them out to disable the limit. +# +# Important: these throttle configurations only apply to /v1/* endpoints, and +# not to the previous /v0/check_email endpoint. The latter endpoint always +# executes the verification immediately, regardless of the throttle settings. +[worker.throttle] +# max_requests_per_second = 20 +# max_requests_per_minute = 100 +# max_requests_per_hour = 1000 +# max_requests_per_day = 20000 + # Postgres configuration. Currently, a Postgres database is required to store # the results of the verifications. This might change in the future, allowing # for pluggable storage. [worker.postgres] db_url = "postgresql://localhost/reacherdb" -# Optional webhook URL to send the results to. This will send one POST request -# per email verification, with the result in the body. -# [worker.webhook.on_each_email] -# url = "http://localhost:8080/webhook" - # Optional Sentry configuration. If set, all errors will be sent to Sentry. # [sentry] # dsn = "" diff --git a/backend/src/config.rs b/backend/src/config.rs index 12900ce25..5b60184a0 100644 --- a/backend/src/config.rs +++ b/backend/src/config.rs @@ -28,13 +28,13 @@ use config::Config; #[cfg(feature = "worker")] use lapin::Channel; use serde::de::{self, Deserializer, Visitor}; -use serde::Deserialize; +use serde::{Deserialize, Serialize}; use sqlx::PgPool; #[cfg(feature = "worker")] use std::sync::Arc; use std::{env, fmt}; -#[derive(Debug, Default, Deserialize)] +#[derive(Debug, Default, Serialize, Deserialize)] pub struct BackendConfig { /// Name of the backend. pub backend_name: String, @@ -135,7 +135,7 @@ impl BackendConfig { } } -#[derive(Debug, Default, Deserialize, Clone)] +#[derive(Debug, Default, Deserialize, Clone, Serialize)] pub struct VerifMethodConfig { /// Verification method for Gmail emails. pub gmail: GmailVerifMethod, @@ -147,7 +147,7 @@ pub struct VerifMethodConfig { pub yahoo: YahooVerifMethod, } -#[derive(Debug, Default, Deserialize, Clone)] +#[derive(Debug, Default, Deserialize, Clone, Serialize)] pub struct WorkerConfig { pub enable: bool, @@ -179,7 +179,7 @@ pub struct MustWorkerConfig { pub postgres: PostgresConfig, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize)] pub enum RabbitMQQueues { All, Only(Vec), @@ -244,7 +244,7 @@ impl RabbitMQQueues { } } -#[derive(Debug, Deserialize, Clone)] +#[derive(Debug, Deserialize, Clone, Serialize)] pub struct RabbitMQConfig { pub url: String, /// Queues to consume emails from. By default the worker consumes from all @@ -271,7 +271,7 @@ pub struct RabbitMQConfig { /// Queue names that the worker can consume from. Each email is routed to a /// one and only one queue, based on the email provider. A single worker can /// consume from multiple queues. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize)] pub enum Queue { Gmail, HotmailB2B, @@ -335,12 +335,12 @@ impl<'de> Deserialize<'de> for Queue { } } -#[derive(Debug, Deserialize, Clone)] +#[derive(Debug, Deserialize, Clone, Serialize)] pub struct PostgresConfig { pub db_url: String, } -#[derive(Debug, Deserialize, Clone)] +#[derive(Debug, Deserialize, Clone, Serialize)] pub struct ThrottleConfig { pub max_requests_per_second: Option, pub max_requests_per_minute: Option, diff --git a/backend/src/main.rs b/backend/src/main.rs index 27d2add8e..fcdea71f1 100644 --- a/backend/src/main.rs +++ b/backend/src/main.rs @@ -18,13 +18,12 @@ //! functions, depending on whether the `bulk` feature is enabled or not. use check_if_email_exists::{setup_sentry, LOG_TARGET}; +use reacher_backend::config::load_config; +use reacher_backend::http::run_warp_server; #[cfg(feature = "worker")] use reacher_backend::worker::run_worker; use std::sync::Arc; -use tracing::info; - -use reacher_backend::config::load_config; -use reacher_backend::http::run_warp_server; +use tracing::{debug, info}; const CARGO_PKG_VERSION: &str = env!("CARGO_PKG_VERSION"); @@ -35,6 +34,7 @@ async fn main() -> Result<(), anyhow::Error> { tracing_subscriber::fmt::init(); info!(target: LOG_TARGET, version=?CARGO_PKG_VERSION, "Running Reacher"); let config = load_config().await?; + debug!(target: LOG_TARGET, "{:#?}", config); // Setup sentry bug tracking. let _guard: sentry::ClientInitGuard; diff --git a/core/src/smtp/outlook/microsoft365.rs b/core/src/smtp/outlook/microsoft365.rs index 0e8d942b2..894dc5278 100644 --- a/core/src/smtp/outlook/microsoft365.rs +++ b/core/src/smtp/outlook/microsoft365.rs @@ -38,6 +38,7 @@ impl From for Microsoft365Error { } /// Convert an email address to its corresponding OneDrive URL. +#[allow(dead_code)] fn get_onedrive_url(email_address: &str) -> String { let (username, domain) = email_address .split_once('@') @@ -64,6 +65,7 @@ fn get_onedrive_url(email_address: &str) -> String { /// a reliable indicator that an email-address is valid. However, a negative /// response is ambigious: the email address may or may not be valid but this /// cannot be determined by the method outlined here. +#[allow(dead_code)] pub async fn check_microsoft365_api( to_email: &EmailAddress, input: &CheckEmailInput, diff --git a/docker-compose.yaml b/docker-compose.yaml index 24bd3458c..a95d35dcc 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -24,7 +24,7 @@ services: restart: always worker1: - image: reacherhq/backend:v0.10.0-beta.0 + image: reacherhq/backend:v0.10.0-beta.2 container_name: worker1 ports: - "8080:8080" @@ -41,7 +41,7 @@ services: restart: always worker2: - image: reacherhq/backend:v0.10.0-beta.0 + image: reacherhq/backend:v0.10.0-beta.2 container_name: worker2 ports: - "8081:8080" From 0f5cb4858e015db10249bf6c5d65375e499322d9 Mon Sep 17 00:00:00 2001 From: Amaury <1293565+amaury1093@users.noreply.github.com> Date: Mon, 25 Nov 2024 21:43:18 +0100 Subject: [PATCH 3/7] Fix request --- backend/src/http/v0/check_email/post.rs | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/backend/src/http/v0/check_email/post.rs b/backend/src/http/v0/check_email/post.rs index 8189ccfdf..ddd9e372d 100644 --- a/backend/src/http/v0/check_email/post.rs +++ b/backend/src/http/v0/check_email/post.rs @@ -17,7 +17,8 @@ //! This file implements the `POST /v0/check_email` endpoint. use check_if_email_exists::{ - check_email, CheckEmailInput, CheckEmailInputProxy, GmailVerifMethod, LOG_TARGET, + check_email, CheckEmailInput, CheckEmailInputProxy, GmailVerifMethod, HotmailB2BVerifMethod, + HotmailB2CVerifMethod, YahooVerifMethod, LOG_TARGET, }; use serde::{Deserialize, Serialize}; use std::sync::Arc; @@ -34,10 +35,11 @@ pub struct CheckEmailRequest { pub from_email: Option, pub hello_name: Option, pub gmail_verif_method: Option, - pub hotmailb2b_verif_method: Option, - pub hotmailb2c_verif_method: Option, - pub yahoo_verif_method: Option, + pub hotmailb2b_verif_method: Option, + pub hotmailb2c_verif_method: Option, + pub yahoo_verif_method: Option, pub proxy: Option, + pub smtp_port: Option, } impl CheckEmailRequest { @@ -46,16 +48,21 @@ impl CheckEmailRequest { to_email: self.to_email.clone(), from_email: self.from_email.clone().unwrap_or(config.from_email.clone()), hello_name: self.hello_name.clone().unwrap_or(config.hello_name.clone()), - gmail_verif_method: config.verif_method.gmail, - hotmailb2b_verif_method: config.verif_method.hotmailb2b, - hotmailb2c_verif_method: config.verif_method.hotmailb2c, - yahoo_verif_method: config.verif_method.yahoo, + gmail_verif_method: self.gmail_verif_method.unwrap_or(config.verif_method.gmail), + hotmailb2b_verif_method: self + .hotmailb2b_verif_method + .unwrap_or(config.verif_method.hotmailb2b), + hotmailb2c_verif_method: self + .hotmailb2c_verif_method + .unwrap_or(config.verif_method.hotmailb2c), + yahoo_verif_method: self.yahoo_verif_method.unwrap_or(config.verif_method.yahoo), proxy: self .proxy .as_ref() .or_else(|| config.proxy.as_ref()) .cloned(), smtp_timeout: config.smtp_timeout.map(Duration::from_secs), + smtp_port: self.smtp_port.unwrap_or_default(), sentry_dsn: config.sentry_dsn.clone(), ..Default::default() } From fa868f7e049c48fdd0c95cdd2ea6b80255407a8e Mon Sep 17 00:00:00 2001 From: Amaury <1293565+amaury1093@users.noreply.github.com> Date: Mon, 25 Nov 2024 21:49:51 +0100 Subject: [PATCH 4/7] fix build --- backend/src/http/v1/bulk/post.rs | 1 + cli/src/main.rs | 14 +++++--------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/backend/src/http/v1/bulk/post.rs b/backend/src/http/v1/bulk/post.rs index 3ade77ab4..3b34ac432 100644 --- a/backend/src/http/v1/bulk/post.rs +++ b/backend/src/http/v1/bulk/post.rs @@ -85,6 +85,7 @@ async fn http_handler( hotmailb2b_verif_method: None, hotmailb2c_verif_method: None, yahoo_verif_method: None, + smtp_port: None, }; Ok(PreprocessTask { diff --git a/cli/src/main.rs b/cli/src/main.rs index 560079035..0792a02a1 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -15,8 +15,8 @@ // along with this program. If not, see . use check_if_email_exists::{ - check_email, config::ReacherConfig, CheckEmailInputBuilder, CheckEmailInputProxy, - GmailVerifMethod, HotmailB2BVerifMethod, HotmailB2CVerifMethod, YahooVerifMethod, + check_email, CheckEmailInputBuilder, CheckEmailInputProxy, GmailVerifMethod, + HotmailB2BVerifMethod, HotmailB2CVerifMethod, YahooVerifMethod, }; use clap::Parser; use once_cell::sync::Lazy; @@ -104,7 +104,8 @@ async fn main() -> Result<(), anyhow::Error> { .hotmailb2b_verif_method(CONF.hotmailb2b_verif_method) .hotmailb2c_verif_method(CONF.hotmailb2c_verif_method) .check_gravatar(CONF.check_gravatar) - .haveibeenpwned_api_key(CONF.haveibeenpwned_api_key.clone()); + .haveibeenpwned_api_key(CONF.haveibeenpwned_api_key.clone()) + .backend_name("reacher-cli".to_string()); if let Some(proxy_host) = &CONF.proxy_host { input = input.proxy(Some(CheckEmailInputProxy { @@ -116,12 +117,7 @@ async fn main() -> Result<(), anyhow::Error> { } let input = input.build()?; - let config = ReacherConfig { - backend_name: "reacher-cli".to_string(), - ..Default::default() - }; - - let result = check_email(&input, &config).await; + let result = check_email(&input).await; match serde_json::to_string_pretty(&result) { Ok(output) => { From 46aaeee800818cb7a00e07c71e6c5c97a7411c0b Mon Sep 17 00:00:00 2001 From: Amaury <1293565+amaury1093@users.noreply.github.com> Date: Mon, 25 Nov 2024 21:51:16 +0100 Subject: [PATCH 5/7] fic docs --- core/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/lib.rs b/core/src/lib.rs index 0cb0415f5..5aaa2cdfd 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -35,7 +35,6 @@ //! //! ```rust //! use check_if_email_exists::{check_email, CheckEmailInputBuilder, CheckEmailInputProxy}; -//! use check_if_email_exists::config::ReacherConfig; //! //! async fn check() { //! // Let's say we want to test the deliverability of someone@gmail.com. From 114db1ebd7cc150c726af9e576e552c77ff09c04 Mon Sep 17 00:00:00 2001 From: Amaury <1293565+amaury1093@users.noreply.github.com> Date: Mon, 25 Nov 2024 22:03:08 +0100 Subject: [PATCH 6/7] test --- backend/src/config.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/src/config.rs b/backend/src/config.rs index 5b60184a0..514acd1d5 100644 --- a/backend/src/config.rs +++ b/backend/src/config.rs @@ -414,7 +414,6 @@ pub async fn load_config() -> Result { mod test { #[tokio::test] async fn test_load_config() { - let cfg = super::load_config().await; - assert!(cfg.is_ok()); + super::load_config().await.unwrap(); } } From 8a4d214198e7c53aee841b0ca74c0538e9815ceb Mon Sep 17 00:00:00 2001 From: Amaury <1293565+amaury1093@users.noreply.github.com> Date: Mon, 25 Nov 2024 22:09:40 +0100 Subject: [PATCH 7/7] remove test --- backend/src/config.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/backend/src/config.rs b/backend/src/config.rs index 514acd1d5..4e9337852 100644 --- a/backend/src/config.rs +++ b/backend/src/config.rs @@ -409,11 +409,3 @@ pub async fn load_config() -> Result { Ok(cfg) } - -#[cfg(test)] -mod test { - #[tokio::test] - async fn test_load_config() { - super::load_config().await.unwrap(); - } -}