Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Remove ReacherConfig, use CheckEmailInput #1538

Merged
merged 7 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/deploy_cli.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ jobs:

windows:
runs-on: windows-latest
if: false # Disable Windows tests for now, because Reacher Worker is not supported on Windows
needs: install-cross
steps:
- uses: actions/checkout@v2
Expand Down
5 changes: 4 additions & 1 deletion backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ RUN chown chrome:chrome docker.sh
USER chrome

ENV RUST_LOG=reacher=info
ENV RCH_HTTP_HOST=0.0.0.0
ENV RCH__HTTP_HOST=0.0.0.0
# Currently this Dockerfile is mainly used for single-shot verifications, so we
# disable the worker by default.
ENV RCH__WORKER__ENABLED=false

EXPOSE 8080

Expand Down
46 changes: 26 additions & 20 deletions backend/backend_config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,17 @@ from_email = "reacher@gmail.com"
# Address of the Chrome WebDriver server for headless email verifications.
webdriver_addr = "http://localhost:9515"

# Timeout for each SMTP connection, in seconds. Leaving it commented out will
# not set a timeout, i.e. the connection will wait indefinitely.
# smtp_timeout = 45

# Uncomment the following lines to route all SMTP verification requests through
# a specified proxy. Note that the proxy must be a SOCKS5 proxy to work with
# the SMTP protocol. This proxy will not be used for headless verifications.
#
# The username and password are optional and only needed if the proxy requires
# authentication.
#
# [proxy]
# host = "my.proxy.com"
# port = 1080
Expand All @@ -52,22 +57,9 @@ hotmailb2c = "headless"
yahoo = "headless"

[worker]
enable = false

# Fields below are only used if the worker is enabled.

# Throttle the maximum number of requests per second, per minute, per hour, and
# per day for this worker.
# All fields are optional; comment them out to disable the limit.
#
# Important: these throttle configurations only apply to bulk verification and
# not to the single /v0/check_email endpoint. The latter endpoint always
# executes the verification immediately, regardless of the throttle settings.
[worker.throttle]
# max_requests_per_second = 20
# max_requests_per_minute = 100
# max_requests_per_hour = 1000
# max_requests_per_day = 20000
# Enable the worker to consume emails from the RabbitMQ queues. If set, the
# RabbitMQ configuration below must be set as well.
enable = true

# RabbitMQ configuration.
[worker.rabbitmq]
Expand All @@ -92,13 +84,27 @@ queues = "all"
# Number of concurrent emails to verify for this worker across all queues.
concurrency = 20

# Throttle the maximum number of requests per second, per minute, per hour, and
# per day for this worker.
# All fields are optional; comment them out to disable the limit.
#
# Important: these throttle configurations only apply to /v1/* endpoints, and
# not to the previous /v0/check_email endpoint. The latter endpoint always
# executes the verification immediately, regardless of the throttle settings.
[worker.throttle]
# max_requests_per_second = 20
# max_requests_per_minute = 100
# max_requests_per_hour = 1000
# max_requests_per_day = 20000

# Postgres configuration. Currently, a Postgres database is required to store
# the results of the verifications. This might change in the future, allowing
# for pluggable storage.
[worker.postgres]
db_url = "postgresql://localhost/reacherdb"

# Optional webhook URL to send the results to. This will send one POST request
# per email verification, with the result in the body.
# [worker.webhook.on_each_email]
# url = "http://localhost:8080/webhook"
# Optional Sentry configuration. If set, all errors will be sent to Sentry.
# [sentry]
# dsn = "<PASTE_YOUR_DSN_NOW>"
# Identifier sent to Sentry, usually the same as the the top-level backend_name.
# backend_name = "backend-dev"
30 changes: 15 additions & 15 deletions backend/scripts/debian11.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,27 @@ set -e

# You can change the default values of these variables inline here, or by
# setting them in the environment before running this script, e.g.:
# RCH_BACKEND_NAME="my-own-name" ./debian11.sh
# RCH__BACKEND_NAME="my-own-name" ./debian11.sh

# An unique identifier for the backend.
RCH_BACKEND_NAME=${RCH_BACKEND_NAME:-"backend1.mycompany.com"}
RCH__BACKEND_NAME=${RCH__BACKEND_NAME:-"backend1.mycompany.com"}
# Docker Hub tag for reacherhq/backend.
RCH_VERSION=${RCH_VERSION:-"v0.7.0"}
RCH_VERSION=${RCH_VERSION:-"v0.10.0-beta.1"}
# Optional: Send bug reports to a Sentry.io dashboard.
RCH_SENTRY_DSN=${RCH_SENTRY_DSN:-}
RCH__SENTRY_DSN=${RCH__SENTRY_DSN:-}
# Protect the backend from the public via a `x-reacher-secret` header.
RCH_HEADER_SECRET=${RCH_HEADER_SECRET:-}
RCH__HEADER_SECRET=${RCH__HEADER_SECRET:-}
# For the "FROM" field in emails.
RCH_FROM_EMAIL=${RCH_FROM_EMAIL:-"hello@mycompany.com"}
RCH__FROM_EMAIL=${RCH__FROM_EMAIL:-"hello@mycompany.com"}
# For the "EHLO" field in emails. This should ideally match the server's
# reverse DNS entry for optimal results.
RCH_HELLO_NAME=${RCH_HELLO_NAME:-"backend1.mycompany.com"}
RCH__HELLO_NAME=${RCH__HELLO_NAME:-"backend1.mycompany.com"}
# Timeout for SMTP connections in seconds.
RCH_SMTP_TIMEOUT=${RCH_SMTP_TIMEOUT:-"90"}
RCH__SMTP_TIMEOUT=${RCH__SMTP_TIMEOUT:-"90"}
# Logging. Setup to "debug" to show all logs.
RUST_LOG=${RUST_LOG:-"info"}

echo "Installing Reacher backend $RCH_VERSION on host $RCH_BACKEND_NAME..."
echo "Installing Reacher backend $RCH_VERSION on host $RCH__BACKEND_NAME..."

# Install Docker
# https://docs.docker.com/engine/install/debian/
Expand Down Expand Up @@ -64,12 +64,12 @@ docker rm reacher_backend
# Run the backend
docker run -d \
-e RUST_LOG=$RUST_LOG \
-e RCH_BACKEND_NAME=$RCH_BACKEND_NAME \
-e RCH_SENTRY_DSN=$RCH_SENTRY_DSN \
-e RCH_HEADER_SECRET=$RCH_HEADER_SECRET \
-e RCH_FROM_EMAIL=$RCH_FROM_EMAIL \
-e RCH_HELLO_NAME=$RCH_HELLO_NAME \
-e RCH_SMTP_TIMEOUT=$RCH_SMTP_TIMEOUT \
-e RCH__BACKEND_NAME=$RCH__BACKEND_NAME \
-e RCH__SENTRY_DSN=$RCH__SENTRY_DSN \
-e RCH__HEADER_SECRET=$RCH__HEADER_SECRET \
-e RCH__FROM_EMAIL=$RCH__FROM_EMAIL \
-e RCH__HELLO_NAME=$RCH__HELLO_NAME \
-e RCH__SMTP_TIMEOUT=$RCH__SMTP_TIMEOUT \
-p 80:8080 \
--name reacher_backend \
reacherhq/backend:$RCH_VERSION
Expand Down
46 changes: 15 additions & 31 deletions backend/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,21 @@ use crate::worker::check_email::TaskWebhook;
#[cfg(feature = "worker")]
use crate::worker::setup_rabbit_mq;
use anyhow::bail;
use check_if_email_exists::config::ReacherConfig;
use check_if_email_exists::{
CheckEmailInputProxy, GmailVerifMethod, HotmailB2BVerifMethod, HotmailB2CVerifMethod,
SentryConfig, YahooVerifMethod,
YahooVerifMethod,
};
use config::Config;
#[cfg(feature = "worker")]
use lapin::Channel;
use serde::de::{self, Deserializer, Visitor};
use serde::Deserialize;
use serde::{Deserialize, Serialize};
use sqlx::PgPool;
#[cfg(feature = "worker")]
use std::sync::Arc;
use std::{env, fmt};

#[derive(Debug, Default, Deserialize)]
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct BackendConfig {
/// Name of the backend.
pub backend_name: String,
Expand All @@ -56,13 +55,15 @@ pub struct BackendConfig {
pub http_port: u16,
/// Shared secret between a trusted client and the backend.
pub header_secret: Option<String>,
/// Timeout for each SMTP connection, in seconds. Leaving it commented out
/// will not set a timeout, i.e. the connection will wait indefinitely.
pub smtp_timeout: Option<u64>,
/// Sentry DSN to report errors to
pub sentry_dsn: Option<String>,

/// Worker configuration, only present if the backend is a worker.
pub worker: WorkerConfig,

/// Sentry configuration to report errors.
pub sentry: Option<SentryConfig>,

// Internal fields, not part of the configuration.
#[serde(skip)]
pg_pool: Option<PgPool>,
Expand All @@ -75,14 +76,6 @@ pub struct BackendConfig {
}

impl BackendConfig {
pub fn get_reacher_config(&self) -> ReacherConfig {
ReacherConfig {
backend_name: self.backend_name.clone(),
sentry: self.sentry.clone(),
webdriver_addr: self.webdriver_addr.clone(),
}
}

/// Get the worker configuration.
///
/// # Panics
Expand Down Expand Up @@ -142,7 +135,7 @@ impl BackendConfig {
}
}

#[derive(Debug, Default, Deserialize, Clone)]
#[derive(Debug, Default, Deserialize, Clone, Serialize)]
pub struct VerifMethodConfig {
/// Verification method for Gmail emails.
pub gmail: GmailVerifMethod,
Expand All @@ -154,7 +147,7 @@ pub struct VerifMethodConfig {
pub yahoo: YahooVerifMethod,
}

#[derive(Debug, Default, Deserialize, Clone)]
#[derive(Debug, Default, Deserialize, Clone, Serialize)]
pub struct WorkerConfig {
pub enable: bool,

Expand Down Expand Up @@ -186,7 +179,7 @@ pub struct MustWorkerConfig {
pub postgres: PostgresConfig,
}

#[derive(Debug, Clone)]
#[derive(Debug, Clone, Serialize)]
pub enum RabbitMQQueues {
All,
Only(Vec<Queue>),
Expand Down Expand Up @@ -251,7 +244,7 @@ impl RabbitMQQueues {
}
}

#[derive(Debug, Deserialize, Clone)]
#[derive(Debug, Deserialize, Clone, Serialize)]
pub struct RabbitMQConfig {
pub url: String,
/// Queues to consume emails from. By default the worker consumes from all
Expand All @@ -278,7 +271,7 @@ pub struct RabbitMQConfig {
/// Queue names that the worker can consume from. Each email is routed to a
/// one and only one queue, based on the email provider. A single worker can
/// consume from multiple queues.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Serialize)]
pub enum Queue {
Gmail,
HotmailB2B,
Expand Down Expand Up @@ -342,12 +335,12 @@ impl<'de> Deserialize<'de> for Queue {
}
}

#[derive(Debug, Deserialize, Clone)]
#[derive(Debug, Deserialize, Clone, Serialize)]
pub struct PostgresConfig {
pub db_url: String,
}

#[derive(Debug, Deserialize, Clone)]
#[derive(Debug, Deserialize, Clone, Serialize)]
pub struct ThrottleConfig {
pub max_requests_per_second: Option<u32>,
pub max_requests_per_minute: Option<u32>,
Expand Down Expand Up @@ -416,12 +409,3 @@ pub async fn load_config() -> Result<BackendConfig, anyhow::Error> {

Ok(cfg)
}

#[cfg(test)]
mod test {
#[tokio::test]
async fn test_load_config() {
let cfg = super::load_config().await;
assert!(cfg.is_ok());
}
}
38 changes: 17 additions & 21 deletions backend/src/http/v0/bulk/task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
//! This file implements the `POST /bulk` endpoint.

use check_if_email_exists::{
check_email, config::ReacherConfig, CheckEmailInput, CheckEmailInputBuilder,
CheckEmailInputProxy, CheckEmailOutput, Reachable, SentryConfig, LOG_TARGET,
check_email, CheckEmailInput, CheckEmailInputBuilder, CheckEmailInputProxy, CheckEmailOutput,
Reachable, LOG_TARGET,
};
use serde::{Deserialize, Serialize};
use sqlx::{Pool, Postgres};
Expand Down Expand Up @@ -64,7 +64,7 @@ impl Iterator for TaskInputIterator {
fn next(&mut self) -> Option<Self::Item> {
if self.index < self.body.smtp_ports.len() {
let mut item = CheckEmailInputBuilder::default();
let mut item = item.to_email(self.body.to_email.clone());
let mut item: &mut CheckEmailInputBuilder = item.to_email(self.body.to_email.clone());

if let Some(name) = &self.body.hello_name {
item = item.hello_name(name.clone());
Expand All @@ -80,6 +80,19 @@ impl Iterator for TaskInputIterator {
item = item.proxy(Some(proxy.clone()));
}

// Currently, for the legacy and deprecated /v0/bulk endpoints, we
// don't pass in a BackendConfig to the job. Therefore, we must create
// an ad-hoc ReacherConfig here, using the legacy env::var() method.
// This is a temporary solution until the /v0/bulk endpoints are
// removed.
let backend_name = env::var("RCH_BACKEND_NAME").unwrap_or_else(|_| "reacher".into());
let sentry_dsn = env::var("RCH_SENTRY_DSN");
let webdriver_addr =
env::var("RCH_WEBDRIVER_ADDR").unwrap_or_else(|_| "http://localhost:9515".into());
item.backend_name(backend_name);
item.sentry_dsn(sentry_dsn.ok());
item.webdriver_addr(webdriver_addr);

self.index += 1;
Some(item.build().unwrap())
} else {
Expand Down Expand Up @@ -161,25 +174,8 @@ pub async fn email_verification_task(
current_job.id(),
);

// Currently, for the legacy and deprecated /v0/bulk endpoints, we
// don't pass in a BackendConfig to the job. Therefore, we must create
// an ad-hoc ReacherConfig here, using the legacy env::var() method.
// This is a temporary solution until the /v0/bulk endpoints are
// removed.
let backend_name = env::var("RCH_BACKEND_NAME").unwrap_or_else(|_| "reacher".into());
let sentry_dsn = env::var("RCH_SENTRY_DSN");
let webdriver_addr =
env::var("RCH_WEBDRIVER_ADDR").unwrap_or_else(|_| "localhost:9515".into());
let config = ReacherConfig {
backend_name: backend_name.clone(),
webdriver_addr,
sentry: sentry_dsn
.ok()
.map(|dsn| SentryConfig { dsn, backend_name }),
};

let to_email = check_email_input.to_email.clone();
let response = check_email(&check_email_input, &config).await;
let response = check_email(&check_email_input).await;

debug!(
target: LOG_TARGET,
Expand Down
Loading
Loading