From 4a6f5158dcf86800c1aa2a2a99ba583fe2ac7bf9 Mon Sep 17 00:00:00 2001
From: Amaury <1293565+amaury1093@users.noreply.github.com>
Date: Wed, 27 Nov 2024 11:22:11 +0100
Subject: [PATCH] chore: Add more comments to BackendConfig
---
README.md | 3 --
backend/README.md | 69 ++++++-------------------------------
backend/backend_config.toml | 64 +++++++++++++++++++++++++++-------
backend/src/config.rs | 7 +++-
docker-compose.yaml | 6 ++--
5 files changed, 72 insertions(+), 77 deletions(-)
diff --git a/README.md b/README.md
index cb69951f5..303f9c945 100644
--- a/README.md
+++ b/README.md
@@ -36,15 +36,12 @@ Then send a `POST http://localhost:8080/v0/check_email` request with the followi
```js
{
"to_email": "someone@gmail.com",
- "from_email": "my@my-server.com", // (optional) email to use in the `FROM` SMTP command, defaults to "user@example.org"
- "hello_name": "my-server.com", // (optional) name to use in the `EHLO` SMTP command, defaults to "localhost"
"proxy": { // (optional) SOCK5 proxy to run the verification through, default is empty
"host": "my-proxy.io",
"port": 1080,
"username": "me", // (optional) Proxy username
"password": "pass" // (optional) Proxy password
},
- "smtp_port": 587 // (optional) SMTP port to do the email verification, defaults to 25
}
```
diff --git a/backend/README.md b/backend/README.md
index bd693811d..166d8a91e 100644
--- a/backend/README.md
+++ b/backend/README.md
@@ -9,10 +9,11 @@
-This crate holds the backend for [Reacher](https://reacher.email). The backend is a HTTP server with the following components:
+This crate holds the backend for [Reacher](https://reacher.email). The backend is both a HTTP server and a email verification worker. It has with the following components:
- [`check-if-email-exists`](https://github.com/reacherhq/check-if-email-exists), which performs the core email verification logic,
-- [`warp`](https://github.com/seanmonstar/warp) web framework.
+- [`warp`](https://github.com/seanmonstar/warp) web framework,
+- [`RabbitMQ`](https://www.rabbitmq.com/) worker for consuming a queue of incoming verification requests.
## Get Started
@@ -29,69 +30,22 @@ Then send a `POST http://localhost:8080/v0/check_email` request with the followi
```js
{
"to_email": "someone@gmail.com",
- "from_email": "my@my-server.com", // (optional) email to use in the `FROM` SMTP command, defaults to "user@example.org"
- "hello_name": "my-server.com", // (optional) name to use in the `EHLO` SMTP command, defaults to "localhost"
"proxy": { // (optional) SOCK5 proxy to run the verification through, default is empty
"host": "my-proxy.io",
"port": 1080,
"username": "me", // (optional) Proxy username
"password": "pass" // (optional) Proxy password
},
- "smtp_port": 587 // (optional) SMTP port to do the email verification, defaults to 25
}
```
-### Configuration
+## Configuration
-These are the environment variables used to configure the HTTP server. To pass them to the Docker container, use the `-e {ENV_VAR}={VALUE}` flag.
+The backend is configured via its [`backend_config.toml`](./backend_config.toml) file.
-| Env Var | Required? | Description | Dockerfile default |
-| ----------------------------------- | --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------- |
-| `RUST_LOG` | No | One of `trace,debug,warn,error,info`. 💡 PRO TIP: `RUST_LOG=debug` is very handful for debugging purposes. | `reacher=info` |
-| `RCH_HTTP_HOST` | No | The host name to bind the HTTP server to. | `0.0.0.0` |
-| `PORT` | No | The port to bind the HTTP server to, often populated by the cloud provider. | `8080` |
-| `RCH_SENTRY_DSN` | No | If set, bug reports will be sent to this [Sentry](https://sentry.io) DSN. | not defined |
-| `RCH_HEADER_SECRET` | No | If set, then all HTTP requests must have the `x-reacher-secret` header set to this value. This is used to protect the backend against public unwanted HTTP requests. | undefined |
-| `RCH_FROM_EMAIL` | No | Email to use in the `` SMTP step. Can be overwritten by each API request's `from_email` field. | reacher.email@gmail.com |
-| `RCH_HELLO_NAME` | No | Name to use in the `` SMTP step. Can be overwritten by each API request's `hello_name` field. | gmail.com |
-| `RCH_SMTP_TIMEOUT` | No | Timeout for each SMTP connection. | 45s |
-| `RCH_WEBDRIVER_ADDR` | No | Set to a running WebDriver process endpoint (e.g. `http://localhost:9515`) to use a headless navigator to password recovery pages to check Yahoo and Hotmail/Outlook addresses. We recommend `chromedriver` as it allows parallel requests. | `http://localhost:9515` |
-| **For Bulk Verification:** | | |
-| `RCH_ENABLE_BULK` | No | If set to `1`, then bulk verification endpoints will be added to the backend. | 0 |
-| `DATABASE_URL` | Yes if `RCH_ENABLE_BULK==1` | [Bulk] Database connection string for storing results and task queue | not defined |
-| `RCH_DATABASE_MAX_CONNECTIONS` | No | [Bulk] Connections created for the database pool | 5 |
-| `RCH_MINIMUM_TASK_CONCURRENCY` | No | [Bulk] Minimum number of concurrent running tasks below which more tasks are fetched | 10 |
-| `RCH_MAXIMUM_CONCURRENT_TASK_FETCH` | No | [Bulk] Maximum number of tasks fetched at once | 20 |
+## API Documentation
-## REST API Documentation
-
-The API exposes the following endpoint: `POST /v0/check_email` expecting the following body:
-
-```js
-{
- "to_email": "someone@gmail.com",
- "from_email": "my@my-server.com", // (optional) email to use in the `FROM` SMTP command, defaults to "user@example.org"
- "hello_name": "my-server.com", // (optional) name to use in the `EHLO` SMTP command, defaults to "localhost"
- "proxy": { // (optional) SOCK5 proxy to run the verification through, default is empty
- "host": "my-proxy.io",
- "port": 1080,
- "username": "me", // (optional) Proxy username
- "password": "pass" // (optional) Proxy password
- },
- "smtp_port": 587 // (optional) SMTP port to do the email verification, defaults to 25
-}
-```
-
-For example, you can send the following `curl` request:
-
-```bash
-curl -X POST \
- -H'Content-Type: application/json' \
- -d'{"to_email":"someone@gmail.com"}' \
- http://localhost:8080/v0/check_email
-```
-
-Also check the [OpenAPI documentation](https://docs.reacher.email/advanced/openapi).
+See the full [OpenAPI documentation](https://docs.reacher.email/advanced/openapi).
## Build From Source
@@ -100,13 +54,10 @@ You can build the backend from source to generate a binary, and run the server l
```bash
# Download the code
$ git clone https://github.com/reacherhq/check-if-email-exists
-$ cd check-if-email-exists
-
-# Build the backend binary in release mode (more performant).
-$ cargo build --release --bin reacher_backend
+$ cd check-if-email-exists/backend
-# Run the binary with some useful logs.
-$ RUST_LOG=info ./target/release/reacher_backend
+# Run the backend binary in release mode (slower build, but more performant).
+$ cargo run --release --bin reacher_backend --features worker
```
The server will then be listening on `http://127.0.0.1:8080`.
diff --git a/backend/backend_config.toml b/backend/backend_config.toml
index a836bb69f..ee794ace9 100644
--- a/backend/backend_config.toml
+++ b/backend/backend_config.toml
@@ -1,40 +1,63 @@
-# Backend configuration
+# Backend configuration.
# Name to identify the backend.
+#
+# Env variable: RCH__BACKEND_NAME
backend_name = "backend-dev"
# Host to bind the backend to.
+#
+# Env variable: RCH__HTTP_HOST
http_host = "127.0.0.1"
# Port for the backend.
+#
+# Env variable: RCH__HTTP_PORT
http_port = 8080
# Shared secret between a trusted client and the backend, required in the
# `x-reacher-secret` header of all incoming requests.
+#
+# Env variable: RCH__HEADER_SECRET
# header_secret = "my-secret"
# Name to use during the EHLO/HELO command in the SMTP conversation.
# Ideally, this should match the reverse DNS of the server's IP address.
-hello_name = "reacher"
+#
+# Env variable: RCH__HELLO_NAME
+hello_name = "localhost"
# Email to use during the MAIL FROM command in the SMTP conversation.
# Ideally, the domain of this email should match the "hello_name" above.
-from_email = "reacher@gmail.com"
+#
+# Env variable: RCH__FROM_EMAIL
+from_email = "hello@localhost"
# Address of the Chrome WebDriver server for headless email verifications.
+#
+# Env variable: RCH__WEBDRIVER_ADDR
webdriver_addr = "http://localhost:9515"
# Timeout for each SMTP connection, in seconds. Leaving it commented out will
# not set a timeout, i.e. the connection will wait indefinitely.
+#
+# Env variable: RCH__SMTP_TIMEOUT
# smtp_timeout = 45
-# Uncomment the following lines to route all SMTP verification requests through
-# a specified proxy. Note that the proxy must be a SOCKS5 proxy to work with
-# the SMTP protocol. This proxy will not be used for headless verifications.
+# Uncomment the lines below to route all SMTP verification requests
+# through a specified proxy. Note that the proxy must be a SOCKS5 proxy to work
+# with the SMTP protocol. This proxy will not be used for headless
+# verifications.
#
# The username and password are optional and only needed if the proxy requires
# authentication.
-#
+#
+# Env variables:
+# - RCH__PROXY__HOST
+# - RCH__PROXY__PORT
+# - RCH__PROXY__USERNAME
+# - RCH__PROXY__PASSWORD
+#
# [proxy]
# host = "my.proxy.com"
# port = 1080
@@ -46,8 +69,12 @@ webdriver_addr = "http://localhost:9515"
# all email providers.
[verif_method]
# Gmail currently only supports the "smtp" method.
+#
+# Env variable: RCH__VERIF_METHOD__GMAIL
gmail = "smtp"
# Hotmail B2B currently only supports the "smtp" method.
+#
+# Env variable: RCH__VERIF_METHOD__HOTMAILB2B
hotmailb2b = "smtp"
# Hotmail B2C supports both "headless" and "smtp" methods. The "headless"
# method is recommended.
@@ -59,10 +86,13 @@ yahoo = "headless"
[worker]
# Enable the worker to consume emails from the RabbitMQ queues. If set, the
# RabbitMQ configuration below must be set as well.
+#
+# Env variable: RCH__WORKER__ENABLE
enable = true
# RabbitMQ configuration.
[worker.rabbitmq]
+# Env variable: RCH__WORKER__RABBITMQ__URL
url = "amqp://guest:guest@localhost:5672"
# Queues to consume emails from. By default, the worker consumes from all
@@ -78,10 +108,14 @@ url = "amqp://guest:guest@localhost:5672"
# - "check.yahoo": subscribe exclusively to Yahoo emails.
# - "check.everything_else": subscribe to all emails that are not Gmail, Yahoo, or Hotmail.
#
+# Env variable: RCH__WORKER__RABBITMQ__QUEUES
+#
# queues = ["check.gmail", "check.hotmail.b2b", "check.hotmail.b2c", "check.yahoo", "check.everything_else"]
queues = "all"
# Number of concurrent emails to verify for this worker across all queues.
+#
+# Env variable: RCH__WORKER__RABBITMQ__CONCURRENCY
concurrency = 20
# Throttle the maximum number of requests per second, per minute, per hour, and
@@ -91,6 +125,12 @@ concurrency = 20
# Important: these throttle configurations only apply to /v1/* endpoints, and
# not to the previous /v0/check_email endpoint. The latter endpoint always
# executes the verification immediately, regardless of the throttle settings.
+#
+# Env variables:
+# - RCH__WORKER__THROTTLE__MAX_REQUESTS_PER_SECOND
+# - RCH__WORKER__THROTTLE__MAX_REQUESTS_PER_MINUTE
+# - RCH__WORKER__THROTTLE__MAX_REQUESTS_PER_HOUR
+# - RCH__WORKER__THROTTLE__MAX_REQUESTS_PER_DAY
[worker.throttle]
# max_requests_per_second = 20
# max_requests_per_minute = 100
@@ -101,10 +141,10 @@ concurrency = 20
# the results of the verifications. This might change in the future, allowing
# for pluggable storage.
[worker.postgres]
+# Env variable: RCH__WORKER__POSTGRES__DB_URL
db_url = "postgresql://localhost/reacherdb"
-# Optional Sentry configuration. If set, all errors will be sent to Sentry.
-# [sentry]
-# dsn = ""
-# Identifier sent to Sentry, usually the same as the the top-level backend_name.
-# backend_name = "backend-dev"
+# Optional Sentry DSN. If set, all errors will be sent to Sentry.
+#
+# Env variable: RCH__SENTRY_DSN
+# sentry_dsn = ""
diff --git a/backend/src/config.rs b/backend/src/config.rs
index 4e9337852..4afdc103c 100644
--- a/backend/src/config.rs
+++ b/backend/src/config.rs
@@ -22,7 +22,7 @@ use crate::worker::setup_rabbit_mq;
use anyhow::bail;
use check_if_email_exists::{
CheckEmailInputProxy, GmailVerifMethod, HotmailB2BVerifMethod, HotmailB2CVerifMethod,
- YahooVerifMethod,
+ YahooVerifMethod, LOG_TARGET,
};
use config::Config;
#[cfg(feature = "worker")]
@@ -33,6 +33,7 @@ use sqlx::PgPool;
#[cfg(feature = "worker")]
use std::sync::Arc;
use std::{env, fmt};
+use tracing::warn;
#[derive(Debug, Default, Serialize, Deserialize)]
pub struct BackendConfig {
@@ -370,6 +371,10 @@ pub async fn load_config() -> Result {
let mut cfg = cfg.try_deserialize::()?;
+ if !cfg.worker.enable && (cfg.worker.rabbitmq.is_some() || cfg.worker.throttle.is_some()) {
+ warn!(target: LOG_TARGET, "worker.enable is set to false, ignoring throttling and concurrency settings.")
+ }
+
let pg_pool = if cfg.worker.enable {
let db_url = cfg
.worker
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 3b3bbdd9c..310999239 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -24,7 +24,7 @@ services:
restart: always
worker1:
- image: reacherhq/backend:v0.10.0-beta.3
+ image: reacherhq/backend:beta
container_name: worker1
ports:
- "8080:8080"
@@ -38,10 +38,11 @@ services:
RCH__WORKER__ENABLE: true
RCH__WORKER__RABBITMQ__URL: amqp://guest:guest@rabbitmq:5672
RCH__WORKER__POSTGRES__DB_URL: postgres://postgres:postgres@postgres:5432/reacher_db
+ RCH__WORKER__THROTTLE__MAX_REQUESTS_PER_DAY: 10000 # Recommended limit per IP per day
restart: always
worker2:
- image: reacherhq/backend:v0.10.0-beta.3
+ image: reacherhq/backend:beta
container_name: worker2
ports:
- "8081:8080"
@@ -55,4 +56,5 @@ services:
RCH__WORKER__ENABLE: true
RCH__WORKER__RABBITMQ__URL: amqp://guest:guest@rabbitmq:5672
RCH__WORKER__POSTGRES__DB_URL: postgres://postgres:postgres@postgres:5432/reacher_db
+ RCH__WORKER__THROTTLE__MAX_REQUESTS_PER_DAY: 10000 # Recommended limit per IP per day
restart: always