From f77a83272a5cde113b02e5f83c0134cb23bb069a Mon Sep 17 00:00:00 2001 From: Amaury <1293565+amaury1093@users.noreply.github.com> Date: Mon, 9 Dec 2024 22:53:31 +0100 Subject: [PATCH] Make it pass --- Cargo.lock | 1 - backend/Cargo.toml | 1 - backend/backend_config.toml | 13 +- backend/src/config.rs | 87 ++++------ backend/src/http/v1/check_email/post.rs | 27 ++-- .../src/storage/commercial_license_trial.rs | 151 ------------------ backend/src/storage/mod.rs | 36 +++-- backend/src/storage/postgres.rs | 15 +- backend/src/worker/do_work.rs | 9 +- 9 files changed, 73 insertions(+), 267 deletions(-) delete mode 100644 backend/src/storage/commercial_license_trial.rs diff --git a/Cargo.lock b/Cargo.lock index f16fee02d..7057289dd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2958,7 +2958,6 @@ version = "0.10.0" dependencies = [ "anyhow", "async-smtp", - "async-trait", "check-if-email-exists", "config", "csv", diff --git a/backend/Cargo.toml b/backend/Cargo.toml index dd63682b4..8e139bd14 100644 --- a/backend/Cargo.toml +++ b/backend/Cargo.toml @@ -8,7 +8,6 @@ publish = false [dependencies] anyhow = "1.0" async-smtp = { version = "0.9.1", features = ["runtime-tokio"] } -async-trait = "0.1" check-if-email-exists = { path = "../core", features = ["sentry"] } config = "0.14" csv = "1.3.0" diff --git a/backend/backend_config.toml b/backend/backend_config.toml index c5b28bb26..6a8978323 100644 --- a/backend/backend_config.toml +++ b/backend/backend_config.toml @@ -128,17 +128,8 @@ concurrency = 5 # results. We currently support the following storage backends: # - Postgres # -# You can configure one or more storages. The format of the configuration is: -# [storage..] -# where: -# - is a unique identifier for the storage, in case you have -# multiple storages of the same type. You can use 0, 1, 2, etc. as the -# identifier. -# - is the type of the storage, e.g. "postgres". -[storage] - -# Uncomment the following line to configure the 1st storage to use Postgres. -# [storage.0.postgres] +# Uncomment the following line to configure the storage to use Postgres. +# [storage.postgres] # # URL to connect to the Postgres database. # diff --git a/backend/src/config.rs b/backend/src/config.rs index 1ad69b4db..ac75539ac 100644 --- a/backend/src/config.rs +++ b/backend/src/config.rs @@ -14,8 +14,7 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -use crate::storage::commercial_license_trial::CommercialLicenseTrialStorage; -use crate::storage::{postgres::PostgresStorage, Storage}; +use crate::storage::{postgres::PostgresStorage, StorageAdapter}; use crate::worker::do_work::TaskWebhook; use crate::worker::setup_rabbit_mq; use anyhow::{bail, Context}; @@ -27,7 +26,6 @@ use config::Config; use lapin::Channel; use serde::{Deserialize, Serialize}; use sqlx::PgPool; -use std::collections::HashMap; use std::sync::Arc; use tracing::warn; @@ -62,14 +60,14 @@ pub struct BackendConfig { pub worker: WorkerConfig, /// Configuration on where to store the email verification results. - pub storage: HashMap, + pub storage: StorageConfig, // Internal fields, not part of the configuration. #[serde(skip)] channel: Option>, #[serde(skip)] - storages: Vec>, + storage_adapter: Arc, } impl BackendConfig { @@ -101,28 +99,15 @@ impl BackendConfig { /// Attempt connection to the Postgres database and RabbitMQ. Also populates /// the internal `pg_pool` and `channel` fields with the connections. pub async fn connect(&mut self) -> Result<(), anyhow::Error> { - if self.worker.enable && self.storage.is_empty() { - bail!("When worker.enable is true, you must configure at least one storage to store the email verification results."); - } + match &self.storage { + StorageConfig::Postgres(config) => { + let storage = PostgresStorage::new(&config.db_url, config.extra.clone()) + .await + .with_context(|| format!("Connecting to postgres DB {}", config.db_url))?; - for storage in self.storage.values() { - match storage { - StorageConfig::Postgres(config) => { - let storage = PostgresStorage::new(&config.db_url, config.extra.clone()) - .await - .with_context(|| format!("Connecting to postgres DB {}", config.db_url))?; - self.storages.push(Arc::new(storage)); - } - StorageConfig::CommercialLicenseTrial(config) => { - let storage = - CommercialLicenseTrialStorage::new(&config.db_url, config.extra.clone()) - .await - .with_context(|| { - format!("Connecting to postgres DB {}", config.db_url) - })?; - self.storages.push(Arc::new(storage)); - } + self.storage_adapter = Arc::new(StorageAdapter::Postgres(storage)); } + StorageConfig::Noop => {} } let channel = if self.worker.enable { @@ -141,25 +126,16 @@ impl BackendConfig { /// Get all storages as a Vec. We don't really care about the keys in the /// HashMap, except for deserialize purposes. - pub fn get_storages(&self) -> Vec> { - self.storages.clone() + pub fn get_storage_adapter(&self) -> Arc { + self.storage_adapter.clone() } - /// Get the Postgres connection pool, if at least one of the storages is a - /// Postgres storage. - /// - /// This is quite hacky, and it will most probably be refactored away in - /// future versions. We however need to rethink how to do the `/v1/bulk` - /// endpoints first. Simply using downcasting should be a warning sign that - /// we're doing something wrong. - /// - /// ref: https://github.com/reacherhq/check-if-email-exists/issues/1544 + /// Get the Postgres connection pool, if the storage is Postgres. pub fn get_pg_pool(&self) -> Option { - self.storages.iter().find_map(|s| { - s.as_any() - .downcast_ref::() - .map(|s| s.pg_pool.clone()) - }) + match self.storage_adapter.as_ref() { + StorageAdapter::Postgres(storage) => Some(storage.pg_pool.clone()), + StorageAdapter::Noop => None, + } } } @@ -224,15 +200,14 @@ impl ThrottleConfig { } } -#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] +#[derive(Debug, Default, Deserialize, Clone, PartialEq, Serialize)] #[serde(rename_all = "snake_case")] pub enum StorageConfig { /// Store the email verification results in the Postgres database. Postgres(PostgresConfig), - /// Store the email verification results in Reacher's DB. This storage - /// method is baked-in into the software for users of the Commercial - /// License trial. - CommercialLicenseTrial(PostgresConfig), + /// Don't store the email verification results. + #[default] + Noop, } #[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] @@ -265,12 +240,12 @@ mod tests { #[tokio::test] async fn test_env_vars() { env::set_var("RCH__BACKEND_NAME", "test-backend"); - env::set_var("RCH__STORAGE__1__POSTGRES__DB_URL", "test2"); + env::set_var("RCH__STORAGE__POSTGRES__DB_URL", "test2"); let cfg = load_config().await.unwrap(); assert_eq!(cfg.backend_name, "test-backend"); assert_eq!( - cfg.storage.get("1").unwrap(), - &StorageConfig::Postgres(PostgresConfig { + cfg.storage, + StorageConfig::Postgres(PostgresConfig { db_url: "test2".to_string(), extra: None, }) @@ -279,16 +254,12 @@ mod tests { #[tokio::test] async fn test_serialize_storage_config() { - let mut storage_config = HashMap::new(); - storage_config.insert( - "test1", - StorageConfig::Postgres(PostgresConfig { - db_url: "postgres://localhost:5432/test1".to_string(), - extra: None, - }), - ); + let storage_config = StorageConfig::Postgres(PostgresConfig { + db_url: "postgres://localhost:5432/test1".to_string(), + extra: None, + }); - let expected = r#"[test1.postgres] + let expected = r#"[postgres] db_url = "postgres://localhost:5432/test1" "#; diff --git a/backend/src/http/v1/check_email/post.rs b/backend/src/http/v1/check_email/post.rs index ddd595d0b..dbe186233 100644 --- a/backend/src/http/v1/check_email/post.rs +++ b/backend/src/http/v1/check_email/post.rs @@ -56,20 +56,19 @@ async fn http_handler( let value = Ok(result); // Also store the result "manually", since we don't have a worker. - for storage in config.get_storages() { - storage - .store( - &CheckEmailTask { - input: input.clone(), - job_id: CheckEmailJobId::SingleShot, - webhook: None, - }, - &value, - storage.get_extra(), - ) - .map_err(ReacherResponseError::from) - .await?; - } + let storage = config.get_storage_adapter(); + storage + .store( + &CheckEmailTask { + input: input.clone(), + job_id: CheckEmailJobId::SingleShot, + webhook: None, + }, + &value, + storage.get_extra(), + ) + .map_err(ReacherResponseError::from) + .await?; let result_bz = serde_json::to_vec(&value).map_err(ReacherResponseError::from)?; diff --git a/backend/src/storage/commercial_license_trial.rs b/backend/src/storage/commercial_license_trial.rs deleted file mode 100644 index 54a38d79e..000000000 --- a/backend/src/storage/commercial_license_trial.rs +++ /dev/null @@ -1,151 +0,0 @@ -// Reacher - Email Verification -// Copyright (C) 2018-2023 Reacher - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published -// by the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. - -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . - -use super::error::StorageError; -use super::postgres::PostgresStorage; -use super::Storage; -use crate::worker::do_work::{CheckEmailJobId, CheckEmailTask, TaskError}; -use async_trait::async_trait; -use check_if_email_exists::{redact, CheckEmailOutput, LOG_TARGET}; -use serde_json::Value; -use std::any::Any; -use tracing::debug; - -/// Storage that's baked in the software for users of the Commercial License -/// trial. It's really just a wrapper around the PostgresStorage, where we -/// redact all sensitive data such as the email address. -#[derive(Debug)] -pub struct CommercialLicenseTrialStorage { - postgres_storage: PostgresStorage, -} - -impl CommercialLicenseTrialStorage { - pub async fn new(db_url: &str, extra: Option) -> Result { - let postgres_storage = PostgresStorage::new(db_url, extra).await?; - Ok(Self { postgres_storage }) - } -} - -#[async_trait] -impl Storage for CommercialLicenseTrialStorage { - async fn store( - &self, - task: &CheckEmailTask, - worker_output: &Result, - extra: Option, - ) -> Result<(), StorageError> { - let mut payload_json = serde_json::to_value(task)?; - if let Ok(output) = worker_output { - redact_across_json(&mut payload_json, &output.syntax.username); - } - - match worker_output { - Ok(output) => { - let mut output_json = serde_json::to_value(output)?; - redact_across_json(&mut output_json, &output.syntax.username); - - sqlx::query!( - r#" - INSERT INTO v1_task_result (payload, job_id, extra, result) - VALUES ($1, $2, $3, $4) - RETURNING id - "#, - payload_json, - match task.job_id { - CheckEmailJobId::Bulk(job_id) => Some(job_id), - CheckEmailJobId::SingleShot => None, - }, - extra, - output_json, - ) - .fetch_one(&self.postgres_storage.pg_pool) - .await?; - } - Err(err) => { - sqlx::query!( - r#" - INSERT INTO v1_task_result (payload, job_id, extra, error) - VALUES ($1, $2, $3, $4) - RETURNING id - "#, - payload_json, - match task.job_id { - CheckEmailJobId::Bulk(job_id) => Some(job_id), - CheckEmailJobId::SingleShot => None, - }, - extra, - err.to_string(), - ) - .fetch_one(&self.postgres_storage.pg_pool) - .await?; - } - } - - debug!(target: LOG_TARGET, email=?task.input.to_email, "Wrote to DB"); - - Ok(()) - } - - fn get_extra(&self) -> Option { - self.postgres_storage.get_extra() - } - - // This is a workaround to allow downcasting to Any, and should be removed - // ref: https://github.com/reacherhq/check-if-email-exists/issues/1544 - fn as_any(&self) -> &dyn Any { - self - } -} - -/// Redact all sensitive data by recursively traversing the JSON object. -fn redact_across_json(value: &mut Value, username: &str) { - match value { - Value::String(s) => *s = redact(s, username), - Value::Array(arr) => { - for item in arr { - redact_across_json(item, username); - } - } - Value::Object(obj) => { - for (_, v) in obj { - redact_across_json(v, username); - } - } - _ => {} - } -} - -#[cfg(test)] -mod tests { - use super::*; - use check_if_email_exists::{check_email, CheckEmailInputBuilder}; - - #[tokio::test] - async fn should_redact_across_json() { - let input = CheckEmailInputBuilder::default() - // Checking this email will make a MX record check, but hopefully - // it won't resolve (since I typed it randomly), meaning that the - // SMTP check will be skipped. - .to_email("someone@adlkfjaklsdjfldksjfderlqkjeqwr.com".into()) - .build() - .unwrap(); - let output = check_email(&input).await; - let mut output_json = serde_json::to_value(&output).unwrap(); - redact_across_json(&mut output_json, &output.syntax.username); - - assert!(!output_json.to_string().contains("someone")); - } -} diff --git a/backend/src/storage/mod.rs b/backend/src/storage/mod.rs index 1daa514c5..a3f0d9fbc 100644 --- a/backend/src/storage/mod.rs +++ b/backend/src/storage/mod.rs @@ -14,29 +14,39 @@ // You should have received a copy of the GNU Affero General Public License // along with this program. If not, see . -pub mod commercial_license_trial; pub mod error; pub mod postgres; use crate::worker::do_work::{CheckEmailTask, TaskError}; -use async_trait::async_trait; use check_if_email_exists::CheckEmailOutput; use error::StorageError; -use std::any::Any; +use postgres::PostgresStorage; use std::fmt::Debug; -#[async_trait] -pub trait Storage: Debug + Send + Sync + Any { - async fn store( +#[derive(Debug, Default)] +pub enum StorageAdapter { + Postgres(PostgresStorage), + #[default] + Noop, +} + +impl StorageAdapter { + pub async fn store( &self, task: &CheckEmailTask, worker_output: &Result, extra: Option, - ) -> Result<(), StorageError>; - - fn get_extra(&self) -> Option; - - // This is a workaround to allow downcasting to Any, and should be removed - // ref: https://github.com/reacherhq/check-if-email-exists/issues/1544 - fn as_any(&self) -> &dyn Any; + ) -> Result<(), StorageError> { + match self { + StorageAdapter::Postgres(storage) => storage.store(task, worker_output, extra).await, + StorageAdapter::Noop => Ok(()), + } + } + + pub fn get_extra(&self) -> Option { + match self { + StorageAdapter::Postgres(storage) => storage.get_extra().clone(), + StorageAdapter::Noop => None, + } + } } diff --git a/backend/src/storage/postgres.rs b/backend/src/storage/postgres.rs index ff0523224..00b31a57d 100644 --- a/backend/src/storage/postgres.rs +++ b/backend/src/storage/postgres.rs @@ -15,9 +15,7 @@ // along with this program. If not, see . use super::error::StorageError; -use super::Storage; use crate::worker::do_work::{CheckEmailJobId, CheckEmailTask, TaskError}; -use async_trait::async_trait; use check_if_email_exists::{CheckEmailOutput, LOG_TARGET}; use sqlx::postgres::PgPoolOptions; use sqlx::PgPool; @@ -44,11 +42,8 @@ impl PostgresStorage { Ok(Self { pg_pool, extra }) } -} -#[async_trait] -impl Storage for PostgresStorage { - async fn store( + pub async fn store( &self, task: &CheckEmailTask, worker_output: &Result, @@ -102,13 +97,7 @@ impl Storage for PostgresStorage { Ok(()) } - fn get_extra(&self) -> Option { + pub fn get_extra(&self) -> Option { self.extra.clone() } - - // This is a workaround to allow downcasting to Any, and should be removed - // ref: https://github.com/reacherhq/check-if-email-exists/issues/1544 - fn as_any(&self) -> &dyn Any { - self - } } diff --git a/backend/src/worker/do_work.rs b/backend/src/worker/do_work.rs index 6d7e7dec1..1a4ae7cbb 100644 --- a/backend/src/worker/do_work.rs +++ b/backend/src/worker/do_work.rs @@ -145,11 +145,10 @@ pub(crate) async fn do_check_email_work( } // Store the result. - for storage in config.get_storages() { - storage - .store(task, &worker_output, storage.get_extra()) - .await?; - } + let storage = config.get_storage_adapter(); + storage + .store(task, &worker_output, storage.get_extra()) + .await?; info!(target: LOG_TARGET, email=task.input.to_email,