From f4a496692cdbaf611f01a588666fa3a2964d9884 Mon Sep 17 00:00:00 2001 From: Amaury <1293565+amaury1729@users.noreply.github.com> Date: Tue, 24 Oct 2023 17:11:35 +0200 Subject: [PATCH 1/4] Move file --- backend/src/routes/bulk/results/csv.rs | 15 +++++++++++++++ .../routes/bulk/{results.rs => results/mod.rs} | 15 +++++---------- 2 files changed, 20 insertions(+), 10 deletions(-) create mode 100644 backend/src/routes/bulk/results/csv.rs rename backend/src/routes/bulk/{results.rs => results/mod.rs} (98%) diff --git a/backend/src/routes/bulk/results/csv.rs b/backend/src/routes/bulk/results/csv.rs new file mode 100644 index 000000000..c6a44f9b3 --- /dev/null +++ b/backend/src/routes/bulk/results/csv.rs @@ -0,0 +1,15 @@ +// Reacher - Email Verification +// Copyright (C) 2018-2022 Reacher + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . diff --git a/backend/src/routes/bulk/results.rs b/backend/src/routes/bulk/results/mod.rs similarity index 98% rename from backend/src/routes/bulk/results.rs rename to backend/src/routes/bulk/results/mod.rs index 815918e5d..a1f864b85 100644 --- a/backend/src/routes/bulk/results.rs +++ b/backend/src/routes/bulk/results/mod.rs @@ -314,13 +314,8 @@ async fn job_result( )) } JobResultResponseFormat::Csv => { - let data = job_result_csv( - job_id, - req.limit.unwrap_or(5000), - req.offset.unwrap_or(0), - conn_pool, - ) - .await?; + let data = + job_result_csv(job_id, req.limit, req.offset.unwrap_or(0), conn_pool).await?; Ok(warp::reply::with_header(data, "Content-Type", "text/csv")) } @@ -329,7 +324,7 @@ async fn job_result( async fn job_result_json( job_id: i32, - limit: u64, + limit: Option, offset: u64, conn_pool: Pool, ) -> Result, warp::Rejection> { @@ -369,7 +364,7 @@ async fn job_result_json( async fn job_result_csv( job_id: i32, - limit: u64, + limit: Option, offset: u64, conn_pool: Pool, ) -> Result, warp::Rejection> { @@ -381,7 +376,7 @@ async fn job_result_csv( LIMIT $2 OFFSET $3 "#, job_id, - limit as i64, + limit, offset as i64 ); From f34b01265e222181f4099701a2b391fe2996fabc Mon Sep 17 00:00:00 2001 From: Amaury <1293565+amaury1729@users.noreply.github.com> Date: Tue, 24 Oct 2023 18:17:32 +0200 Subject: [PATCH 2/4] Fix compiler --- backend/src/routes/bulk/results/csv.rs | 15 - backend/src/routes/bulk/results/csv_helper.rs | 205 ++++++++++++ backend/src/routes/bulk/results/mod.rs | 294 +++--------------- 3 files changed, 251 insertions(+), 263 deletions(-) delete mode 100644 backend/src/routes/bulk/results/csv.rs create mode 100644 backend/src/routes/bulk/results/csv_helper.rs diff --git a/backend/src/routes/bulk/results/csv.rs b/backend/src/routes/bulk/results/csv.rs deleted file mode 100644 index c6a44f9b3..000000000 --- a/backend/src/routes/bulk/results/csv.rs +++ /dev/null @@ -1,15 +0,0 @@ -// Reacher - Email Verification -// Copyright (C) 2018-2022 Reacher - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU Affero General Public License as published -// by the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU Affero General Public License for more details. - -// You should have received a copy of the GNU Affero General Public License -// along with this program. If not, see . diff --git a/backend/src/routes/bulk/results/csv_helper.rs b/backend/src/routes/bulk/results/csv_helper.rs new file mode 100644 index 000000000..81f88ca28 --- /dev/null +++ b/backend/src/routes/bulk/results/csv_helper.rs @@ -0,0 +1,205 @@ +// Reacher - Email Verification +// Copyright (C) 2018-2022 Reacher + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published +// by the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. + +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use serde::Serialize; +use std::convert::TryFrom; + +/// Wrapper for serde json value to convert +/// into a csv response +#[derive(Debug)] +pub struct CsvWrapper(pub serde_json::Value); + +/// Simplified output of `CheckEmailOutput` struct +/// for csv fields. +#[derive(Debug, Serialize)] +pub struct JobResultCsvResponse { + input: String, + is_reachable: String, + #[serde(rename = "misc.is_disposable")] + misc_is_disposable: bool, + #[serde(rename = "misc.is_role_account")] + misc_is_role_account: bool, + #[serde(rename = "misc.gravatar_url")] + misc_gravatar_url: Option, + #[serde(rename = "mx.accepts_mail")] + mx_accepts_mail: bool, + #[serde(rename = "smtp.can_connect")] + smtp_can_connect: bool, + #[serde(rename = "smtp.has_full_inbox")] + smtp_has_full_inbox: bool, + #[serde(rename = "smtp.is_catch_all")] + smtp_is_catch_all: bool, + #[serde(rename = "smtp.is_deliverable")] + smtp_is_deliverable: bool, + #[serde(rename = "smtp.is_disabled")] + smtp_is_disabled: bool, + #[serde(rename = "syntax.is_valid_syntax")] + syntax_is_valid_syntax: bool, + #[serde(rename = "syntax.domain")] + syntax_domain: String, + #[serde(rename = "syntax.username")] + syntax_username: String, + error: Option, +} + +/// Convert csv wrapper to csv response +/// Performs multiple allocations for string fields +/// throw error if field is missing +impl TryFrom for JobResultCsvResponse { + type Error = &'static str; + + fn try_from(value: CsvWrapper) -> Result { + let mut input: String = String::default(); + let mut is_reachable: String = String::default(); + let mut misc_is_disposable: bool = false; + let mut misc_is_role_account: bool = false; + let mut misc_gravatar_url: Option = None; + let mut mx_accepts_mail: bool = false; + let mut smtp_can_connect: bool = false; + let mut smtp_has_full_inbox: bool = false; + let mut smtp_is_catch_all: bool = false; + let mut smtp_is_deliverable: bool = false; + let mut smtp_is_disabled: bool = false; + let mut syntax_is_valid_syntax: bool = false; + let mut syntax_domain: String = String::default(); + let mut syntax_username: String = String::default(); + let mut error: Option = None; + + let top_level = value + .0 + .as_object() + .ok_or("Failed to find top level object")?; + for (key, val) in top_level.keys().zip(top_level.values()) { + match key.as_str() { + "input" => input = val.as_str().ok_or("input should be a string")?.to_string(), + "is_reachable" => { + is_reachable = val + .as_str() + .ok_or("is_reachable should be a string")? + .to_string() + } + "misc" => { + let misc_obj = val.as_object().ok_or("misc field should be an object")?; + for (key, val) in misc_obj.keys().zip(misc_obj.values()) { + match key.as_str() { + "error" => error = Some(val.to_string()), + "is_disposable" => { + misc_is_disposable = + val.as_bool().ok_or("is_disposable should be a boolean")? + } + "is_role_account" => { + misc_is_role_account = + val.as_bool().ok_or("is_role_account should be a boolean")? + } + "gravatar_url" => { + if Option::is_some(&val.as_str()) { + misc_gravatar_url = Some(val.to_string()) + } + } + _ => {} + } + } + } + "mx" => { + let mx_obj = val.as_object().ok_or("mx field should be an object")?; + for (key, val) in mx_obj.keys().zip(mx_obj.values()) { + match key.as_str() { + "error" => error = Some(val.to_string()), + "accepts_email" => { + mx_accepts_mail = + val.as_bool().ok_or("accepts_email should be a boolean")? + } + _ => {} + } + } + } + "smtp" => { + let smtp_obj = val.as_object().ok_or("mx field should be an object")?; + for (key, val) in smtp_obj.keys().zip(smtp_obj.values()) { + match key.as_str() { + "error" => error = Some(val.to_string()), + "can_connect_smtp" => { + smtp_can_connect = val + .as_bool() + .ok_or("can_connect_smtp should be a boolean")? + } + "has_full_inbox" => { + smtp_has_full_inbox = + val.as_bool().ok_or("has_full_inbox should be a boolean")? + } + "is_catch_all" => { + smtp_is_catch_all = + val.as_bool().ok_or("is_catch_all should be a boolean")? + } + "is_deliverable" => { + smtp_is_deliverable = + val.as_bool().ok_or("is_deliverable should be a boolean")? + } + "is_disabled" => { + smtp_is_disabled = + val.as_bool().ok_or("is_disabled should be a boolean")? + } + _ => {} + } + } + } + "syntax" => { + let syntax_obj = val.as_object().ok_or("syntax field should be an object")?; + for (key, val) in syntax_obj.keys().zip(syntax_obj.values()) { + match key.as_str() { + "error" => error = Some(val.to_string()), + "is_valid_syntax" => { + syntax_is_valid_syntax = + val.as_bool().ok_or("is_valid_syntax should be a boolean")? + } + "username" => { + syntax_username = val + .as_str() + .ok_or("username should be a string")? + .to_string() + } + "domain" => { + syntax_domain = + val.as_str().ok_or("domain should be a string")?.to_string() + } + _ => {} + } + } + } + // ignore unknown fields + _ => {} + } + } + + Ok(JobResultCsvResponse { + input, + is_reachable, + misc_is_disposable, + misc_is_role_account, + misc_gravatar_url, + mx_accepts_mail, + smtp_can_connect, + smtp_has_full_inbox, + smtp_is_catch_all, + smtp_is_deliverable, + smtp_is_disabled, + syntax_domain, + syntax_is_valid_syntax, + syntax_username, + error, + }) + } +} diff --git a/backend/src/routes/bulk/results/mod.rs b/backend/src/routes/bulk/results/mod.rs index a1f864b85..1b3d4fac7 100644 --- a/backend/src/routes/bulk/results/mod.rs +++ b/backend/src/routes/bulk/results/mod.rs @@ -16,18 +16,21 @@ //! This file implements the /bulk/{id}/results endpoints. -use std::convert::{TryFrom, TryInto}; - use check_if_email_exists::LOG_TARGET; use csv::WriterBuilder; use serde::{Deserialize, Serialize}; use sqlx::{Executor, Pool, Postgres, Row}; +use std::convert::TryInto; +use std::iter::Iterator; use warp::Filter; use super::{ db::with_db, error::{BulkError, CsvError}, }; +use csv_helper::{CsvWrapper, JobResultCsvResponse}; + +mod csv_helper; /// Defines the download format, passed in as a query param. #[derive(Serialize, Deserialize)] @@ -52,193 +55,6 @@ struct JobResultJsonResponse { results: Vec, } -/// Wrapper for serde json value to convert -/// into a csv response -#[derive(Debug)] -struct CsvWrapper(serde_json::Value); - -/// Simplified output of `CheckEmailOutput` struct -/// for csv fields. -#[derive(Debug, Serialize)] -struct JobResultCsvResponse { - input: String, - is_reachable: String, - #[serde(rename = "misc.is_disposable")] - misc_is_disposable: bool, - #[serde(rename = "misc.is_role_account")] - misc_is_role_account: bool, - #[serde(rename = "misc.gravatar_url")] - misc_gravatar_url: Option, - #[serde(rename = "mx.accepts_mail")] - mx_accepts_mail: bool, - #[serde(rename = "smtp.can_connect")] - smtp_can_connect: bool, - #[serde(rename = "smtp.has_full_inbox")] - smtp_has_full_inbox: bool, - #[serde(rename = "smtp.is_catch_all")] - smtp_is_catch_all: bool, - #[serde(rename = "smtp.is_deliverable")] - smtp_is_deliverable: bool, - #[serde(rename = "smtp.is_disabled")] - smtp_is_disabled: bool, - #[serde(rename = "syntax.is_valid_syntax")] - syntax_is_valid_syntax: bool, - #[serde(rename = "syntax.domain")] - syntax_domain: String, - #[serde(rename = "syntax.username")] - syntax_username: String, - error: Option, -} - -/// Convert csv wrapper to csv response -/// Performs multiple allocations for string fields -/// throw error if field is missing -impl TryFrom for JobResultCsvResponse { - type Error = &'static str; - - fn try_from(value: CsvWrapper) -> Result { - let mut input: String = String::default(); - let mut is_reachable: String = String::default(); - let mut misc_is_disposable: bool = false; - let mut misc_is_role_account: bool = false; - let mut misc_gravatar_url: Option = None; - let mut mx_accepts_mail: bool = false; - let mut smtp_can_connect: bool = false; - let mut smtp_has_full_inbox: bool = false; - let mut smtp_is_catch_all: bool = false; - let mut smtp_is_deliverable: bool = false; - let mut smtp_is_disabled: bool = false; - let mut syntax_is_valid_syntax: bool = false; - let mut syntax_domain: String = String::default(); - let mut syntax_username: String = String::default(); - let mut error: Option = None; - - let top_level = value - .0 - .as_object() - .ok_or("Failed to find top level object")?; - for (key, val) in top_level.keys().zip(top_level.values()) { - match key.as_str() { - "input" => input = val.as_str().ok_or("input should be a string")?.to_string(), - "is_reachable" => { - is_reachable = val - .as_str() - .ok_or("is_reachable should be a string")? - .to_string() - } - "misc" => { - let misc_obj = val.as_object().ok_or("misc field should be an object")?; - for (key, val) in misc_obj.keys().zip(misc_obj.values()) { - match key.as_str() { - "error" => error = Some(val.to_string()), - "is_disposable" => { - misc_is_disposable = - val.as_bool().ok_or("is_disposable should be a boolean")? - } - "is_role_account" => { - misc_is_role_account = - val.as_bool().ok_or("is_role_account should be a boolean")? - } - "gravatar_url" => { - if Option::is_some(&val.as_str()) { - misc_gravatar_url = Some(val.to_string()) - } - } - _ => {} - } - } - } - "mx" => { - let mx_obj = val.as_object().ok_or("mx field should be an object")?; - for (key, val) in mx_obj.keys().zip(mx_obj.values()) { - match key.as_str() { - "error" => error = Some(val.to_string()), - "accepts_email" => { - mx_accepts_mail = - val.as_bool().ok_or("accepts_email should be a boolean")? - } - _ => {} - } - } - } - "smtp" => { - let smtp_obj = val.as_object().ok_or("mx field should be an object")?; - for (key, val) in smtp_obj.keys().zip(smtp_obj.values()) { - match key.as_str() { - "error" => error = Some(val.to_string()), - "can_connect_smtp" => { - smtp_can_connect = val - .as_bool() - .ok_or("can_connect_smtp should be a boolean")? - } - "has_full_inbox" => { - smtp_has_full_inbox = - val.as_bool().ok_or("has_full_inbox should be a boolean")? - } - "is_catch_all" => { - smtp_is_catch_all = - val.as_bool().ok_or("is_catch_all should be a boolean")? - } - "is_deliverable" => { - smtp_is_deliverable = - val.as_bool().ok_or("is_deliverable should be a boolean")? - } - "is_disabled" => { - smtp_is_disabled = - val.as_bool().ok_or("is_disabled should be a boolean")? - } - _ => {} - } - } - } - "syntax" => { - let syntax_obj = val.as_object().ok_or("syntax field should be an object")?; - for (key, val) in syntax_obj.keys().zip(syntax_obj.values()) { - match key.as_str() { - "error" => error = Some(val.to_string()), - "is_valid_syntax" => { - syntax_is_valid_syntax = - val.as_bool().ok_or("is_valid_syntax should be a boolean")? - } - "username" => { - syntax_username = val - .as_str() - .ok_or("username should be a string")? - .to_string() - } - "domain" => { - syntax_domain = - val.as_str().ok_or("domain should be a string")?.to_string() - } - _ => {} - } - } - } - // ignore unknown fields - _ => {} - } - } - - Ok(JobResultCsvResponse { - input, - is_reachable, - misc_is_disposable, - misc_is_role_account, - misc_gravatar_url, - mx_accepts_mail, - smtp_can_connect, - smtp_has_full_inbox, - smtp_is_catch_all, - smtp_is_deliverable, - smtp_is_disabled, - syntax_domain, - syntax_is_valid_syntax, - syntax_username, - error, - }) - } -} - async fn job_result( job_id: i32, conn_pool: Pool, @@ -287,13 +103,8 @@ async fn job_result( let format = req.format.unwrap_or(JobResultResponseFormat::Json); match format { JobResultResponseFormat::Json => { - let data = job_result_json( - job_id, - req.limit.unwrap_or(50), - req.offset.unwrap_or(0), - conn_pool, - ) - .await?; + let data = + job_result_json(job_id, req.limit, req.offset.unwrap_or(0), conn_pool).await?; let reply = serde_json::to_vec(&JobResultJsonResponse { results: data }).map_err(|e| { @@ -322,12 +133,12 @@ async fn job_result( } } -async fn job_result_json( +async fn job_result_as_iter( job_id: i32, limit: Option, offset: u64, conn_pool: Pool, -) -> Result, warp::Rejection> { +) -> Result>, BulkError> { let query = sqlx::query!( r#" SELECT result FROM email_results @@ -336,30 +147,43 @@ async fn job_result_json( LIMIT $2 OFFSET $3 "#, job_id, - limit as i64, + limit.map(|l| l as i64), offset as i64 ); - let rows: Vec = conn_pool - .fetch_all(query) - .await - .map_err(|e| { - log::error!( - target: LOG_TARGET, - "Failed to get results for [job={}] [limit={}] [offset={}] with [error={}]", - job_id, - limit, - offset, - e - ); + let rows = conn_pool.fetch_all(query).await.map_err(|e| { + log::error!( + target: LOG_TARGET, + "Failed to get results for [job={}] [limit={}] [offset={}] with [error={}]", + job_id, + limit.map(|s| s.to_string()).unwrap_or("n/a".into()), + offset, + e + ); - BulkError::from(e) - })? - .iter() - .map(|row| row.get("result")) - .collect(); + BulkError::from(e) + })?; + + Ok(Box::new( + rows.into_iter() + .map(|row| row.get::("result")), + )) +} + +async fn job_result_json( + job_id: i32, + limit: Option, + offset: u64, + conn_pool: Pool, +) -> Result, warp::Rejection> { + // For JSON responses, we don't want ot return more than 50 results at a + // time, to avoid having a too big payload (unless client specifies a limit) - Ok(rows) + Ok( + job_result_as_iter(job_id, limit.or(Some(50)), offset, conn_pool) + .await? + .collect(), + ) } async fn job_result_csv( @@ -368,42 +192,16 @@ async fn job_result_csv( offset: u64, conn_pool: Pool, ) -> Result, warp::Rejection> { - let query = sqlx::query!( - r#" - SELECT result FROM email_results - WHERE job_id = $1 - ORDER BY id - LIMIT $2 OFFSET $3 - "#, - job_id, - limit, - offset as i64 - ); - + let rows = job_result_as_iter(job_id, limit, offset, conn_pool).await?; let mut wtr = WriterBuilder::new().has_headers(true).from_writer(vec![]); - for json_value in conn_pool - .fetch_all(query) - .await - .map_err(|e| { - log::error!( - target: LOG_TARGET, - "Failed to get results for [job={}] with [error={}]", - job_id, - e - ); - - BulkError::from(e) - })? - .iter() - .map(|row| row.get("result")) - { + for json_value in rows { let result_csv: JobResultCsvResponse = CsvWrapper(json_value).try_into().map_err(|e: &'static str| { log::error!( target: LOG_TARGET, "Failed to convert json to csv output struct for [job={}] [limit={}] [offset={}] to csv with [error={}]", job_id, - limit, + limit.map(|s| s.to_string()).unwrap_or("n/a".into()), offset, e ); @@ -415,7 +213,7 @@ async fn job_result_csv( target: LOG_TARGET, "Failed to serialize result for [job={}] [limit={}] [offset={}] to csv with [error={}]", job_id, - limit, + limit.map(|s| s.to_string()).unwrap_or("n/a".into()), offset, e ); @@ -429,7 +227,7 @@ async fn job_result_csv( target: LOG_TARGET, "Failed to convert results for [job={}] [limit={}] [offset={}] to csv with [error={}]", job_id, - limit, + limit.map(|s| s.to_string()).unwrap_or("n/a".into()), offset, e ); From ce2923b5a80ae3389b0807d68d88f0988b7ad55f Mon Sep 17 00:00:00 2001 From: Amaury <1293565+amaury1729@users.noreply.github.com> Date: Tue, 24 Oct 2023 18:20:59 +0200 Subject: [PATCH 3/4] Update comments --- backend/src/routes/bulk/results/mod.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/src/routes/bulk/results/mod.rs b/backend/src/routes/bulk/results/mod.rs index 1b3d4fac7..a20703a53 100644 --- a/backend/src/routes/bulk/results/mod.rs +++ b/backend/src/routes/bulk/results/mod.rs @@ -41,8 +41,7 @@ enum JobResultResponseFormat { } // limit and offset are optional in the request -// if they are unspecified their default values -// are 50 and 0 respectively +// If unspecified, offset will default to 0. #[derive(Serialize, Deserialize)] struct JobResultRequest { format: Option, From 4530a1f7e83f5ebcab61d78e3bf56a909a3fa06a Mon Sep 17 00:00:00 2001 From: Amaury <1293565+amaury1729@users.noreply.github.com> Date: Tue, 24 Oct 2023 18:30:53 +0200 Subject: [PATCH 4/4] fix clippy --- backend/src/routes/bulk/results/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/src/routes/bulk/results/mod.rs b/backend/src/routes/bulk/results/mod.rs index a20703a53..b519270b8 100644 --- a/backend/src/routes/bulk/results/mod.rs +++ b/backend/src/routes/bulk/results/mod.rs @@ -155,7 +155,7 @@ async fn job_result_as_iter( target: LOG_TARGET, "Failed to get results for [job={}] [limit={}] [offset={}] with [error={}]", job_id, - limit.map(|s| s.to_string()).unwrap_or("n/a".into()), + limit.map(|s| s.to_string()).unwrap_or_else(|| "n/a".into()), offset, e ); @@ -200,7 +200,7 @@ async fn job_result_csv( target: LOG_TARGET, "Failed to convert json to csv output struct for [job={}] [limit={}] [offset={}] to csv with [error={}]", job_id, - limit.map(|s| s.to_string()).unwrap_or("n/a".into()), + limit.map(|s| s.to_string()).unwrap_or_else(|| "n/a".into()), offset, e ); @@ -212,7 +212,7 @@ async fn job_result_csv( target: LOG_TARGET, "Failed to serialize result for [job={}] [limit={}] [offset={}] to csv with [error={}]", job_id, - limit.map(|s| s.to_string()).unwrap_or("n/a".into()), + limit.map(|s| s.to_string()).unwrap_or_else(|| "n/a".into()), offset, e ); @@ -226,7 +226,7 @@ async fn job_result_csv( target: LOG_TARGET, "Failed to convert results for [job={}] [limit={}] [offset={}] to csv with [error={}]", job_id, - limit.map(|s| s.to_string()).unwrap_or("n/a".into()), + limit.map(|s| s.to_string()).unwrap_or_else(|| "n/a".into()), offset, e );