From 4a6f5158dcf86800c1aa2a2a99ba583fe2ac7bf9 Mon Sep 17 00:00:00 2001 From: Amaury <1293565+amaury1093@users.noreply.github.com> Date: Wed, 27 Nov 2024 11:22:11 +0100 Subject: [PATCH] chore: Add more comments to BackendConfig --- README.md | 3 -- backend/README.md | 69 ++++++------------------------------- backend/backend_config.toml | 64 +++++++++++++++++++++++++++------- backend/src/config.rs | 7 +++- docker-compose.yaml | 6 ++-- 5 files changed, 72 insertions(+), 77 deletions(-) diff --git a/README.md b/README.md index cb69951f5..303f9c945 100644 --- a/README.md +++ b/README.md @@ -36,15 +36,12 @@ Then send a `POST http://localhost:8080/v0/check_email` request with the followi ```js { "to_email": "someone@gmail.com", - "from_email": "my@my-server.com", // (optional) email to use in the `FROM` SMTP command, defaults to "user@example.org" - "hello_name": "my-server.com", // (optional) name to use in the `EHLO` SMTP command, defaults to "localhost" "proxy": { // (optional) SOCK5 proxy to run the verification through, default is empty "host": "my-proxy.io", "port": 1080, "username": "me", // (optional) Proxy username "password": "pass" // (optional) Proxy password }, - "smtp_port": 587 // (optional) SMTP port to do the email verification, defaults to 25 } ``` diff --git a/backend/README.md b/backend/README.md index bd693811d..166d8a91e 100644 --- a/backend/README.md +++ b/backend/README.md @@ -9,10 +9,11 @@

-This crate holds the backend for [Reacher](https://reacher.email). The backend is a HTTP server with the following components: +This crate holds the backend for [Reacher](https://reacher.email). The backend is both a HTTP server and a email verification worker. It has with the following components: - [`check-if-email-exists`](https://github.com/reacherhq/check-if-email-exists), which performs the core email verification logic, -- [`warp`](https://github.com/seanmonstar/warp) web framework. +- [`warp`](https://github.com/seanmonstar/warp) web framework, +- [`RabbitMQ`](https://www.rabbitmq.com/) worker for consuming a queue of incoming verification requests. ## Get Started @@ -29,69 +30,22 @@ Then send a `POST http://localhost:8080/v0/check_email` request with the followi ```js { "to_email": "someone@gmail.com", - "from_email": "my@my-server.com", // (optional) email to use in the `FROM` SMTP command, defaults to "user@example.org" - "hello_name": "my-server.com", // (optional) name to use in the `EHLO` SMTP command, defaults to "localhost" "proxy": { // (optional) SOCK5 proxy to run the verification through, default is empty "host": "my-proxy.io", "port": 1080, "username": "me", // (optional) Proxy username "password": "pass" // (optional) Proxy password }, - "smtp_port": 587 // (optional) SMTP port to do the email verification, defaults to 25 } ``` -### Configuration +## Configuration -These are the environment variables used to configure the HTTP server. To pass them to the Docker container, use the `-e {ENV_VAR}={VALUE}` flag. +The backend is configured via its [`backend_config.toml`](./backend_config.toml) file. -| Env Var | Required? | Description | Dockerfile default | -| ----------------------------------- | --------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------- | -| `RUST_LOG` | No | One of `trace,debug,warn,error,info`. 💡 PRO TIP: `RUST_LOG=debug` is very handful for debugging purposes. | `reacher=info` | -| `RCH_HTTP_HOST` | No | The host name to bind the HTTP server to. | `0.0.0.0` | -| `PORT` | No | The port to bind the HTTP server to, often populated by the cloud provider. | `8080` | -| `RCH_SENTRY_DSN` | No | If set, bug reports will be sent to this [Sentry](https://sentry.io) DSN. | not defined | -| `RCH_HEADER_SECRET` | No | If set, then all HTTP requests must have the `x-reacher-secret` header set to this value. This is used to protect the backend against public unwanted HTTP requests. | undefined | -| `RCH_FROM_EMAIL` | No | Email to use in the `` SMTP step. Can be overwritten by each API request's `from_email` field. | reacher.email@gmail.com | -| `RCH_HELLO_NAME` | No | Name to use in the `` SMTP step. Can be overwritten by each API request's `hello_name` field. | gmail.com | -| `RCH_SMTP_TIMEOUT` | No | Timeout for each SMTP connection. | 45s | -| `RCH_WEBDRIVER_ADDR` | No | Set to a running WebDriver process endpoint (e.g. `http://localhost:9515`) to use a headless navigator to password recovery pages to check Yahoo and Hotmail/Outlook addresses. We recommend `chromedriver` as it allows parallel requests. | `http://localhost:9515` | -| **For Bulk Verification:** | | | -| `RCH_ENABLE_BULK` | No | If set to `1`, then bulk verification endpoints will be added to the backend. | 0 | -| `DATABASE_URL` | Yes if `RCH_ENABLE_BULK==1` | [Bulk] Database connection string for storing results and task queue | not defined | -| `RCH_DATABASE_MAX_CONNECTIONS` | No | [Bulk] Connections created for the database pool | 5 | -| `RCH_MINIMUM_TASK_CONCURRENCY` | No | [Bulk] Minimum number of concurrent running tasks below which more tasks are fetched | 10 | -| `RCH_MAXIMUM_CONCURRENT_TASK_FETCH` | No | [Bulk] Maximum number of tasks fetched at once | 20 | +## API Documentation -## REST API Documentation - -The API exposes the following endpoint: `POST /v0/check_email` expecting the following body: - -```js -{ - "to_email": "someone@gmail.com", - "from_email": "my@my-server.com", // (optional) email to use in the `FROM` SMTP command, defaults to "user@example.org" - "hello_name": "my-server.com", // (optional) name to use in the `EHLO` SMTP command, defaults to "localhost" - "proxy": { // (optional) SOCK5 proxy to run the verification through, default is empty - "host": "my-proxy.io", - "port": 1080, - "username": "me", // (optional) Proxy username - "password": "pass" // (optional) Proxy password - }, - "smtp_port": 587 // (optional) SMTP port to do the email verification, defaults to 25 -} -``` - -For example, you can send the following `curl` request: - -```bash -curl -X POST \ - -H'Content-Type: application/json' \ - -d'{"to_email":"someone@gmail.com"}' \ - http://localhost:8080/v0/check_email -``` - -Also check the [OpenAPI documentation](https://docs.reacher.email/advanced/openapi). +See the full [OpenAPI documentation](https://docs.reacher.email/advanced/openapi). ## Build From Source @@ -100,13 +54,10 @@ You can build the backend from source to generate a binary, and run the server l ```bash # Download the code $ git clone https://github.com/reacherhq/check-if-email-exists -$ cd check-if-email-exists - -# Build the backend binary in release mode (more performant). -$ cargo build --release --bin reacher_backend +$ cd check-if-email-exists/backend -# Run the binary with some useful logs. -$ RUST_LOG=info ./target/release/reacher_backend +# Run the backend binary in release mode (slower build, but more performant). +$ cargo run --release --bin reacher_backend --features worker ``` The server will then be listening on `http://127.0.0.1:8080`. diff --git a/backend/backend_config.toml b/backend/backend_config.toml index a836bb69f..ee794ace9 100644 --- a/backend/backend_config.toml +++ b/backend/backend_config.toml @@ -1,40 +1,63 @@ -# Backend configuration +# Backend configuration. # Name to identify the backend. +# +# Env variable: RCH__BACKEND_NAME backend_name = "backend-dev" # Host to bind the backend to. +# +# Env variable: RCH__HTTP_HOST http_host = "127.0.0.1" # Port for the backend. +# +# Env variable: RCH__HTTP_PORT http_port = 8080 # Shared secret between a trusted client and the backend, required in the # `x-reacher-secret` header of all incoming requests. +# +# Env variable: RCH__HEADER_SECRET # header_secret = "my-secret" # Name to use during the EHLO/HELO command in the SMTP conversation. # Ideally, this should match the reverse DNS of the server's IP address. -hello_name = "reacher" +# +# Env variable: RCH__HELLO_NAME +hello_name = "localhost" # Email to use during the MAIL FROM command in the SMTP conversation. # Ideally, the domain of this email should match the "hello_name" above. -from_email = "reacher@gmail.com" +# +# Env variable: RCH__FROM_EMAIL +from_email = "hello@localhost" # Address of the Chrome WebDriver server for headless email verifications. +# +# Env variable: RCH__WEBDRIVER_ADDR webdriver_addr = "http://localhost:9515" # Timeout for each SMTP connection, in seconds. Leaving it commented out will # not set a timeout, i.e. the connection will wait indefinitely. +# +# Env variable: RCH__SMTP_TIMEOUT # smtp_timeout = 45 -# Uncomment the following lines to route all SMTP verification requests through -# a specified proxy. Note that the proxy must be a SOCKS5 proxy to work with -# the SMTP protocol. This proxy will not be used for headless verifications. +# Uncomment the lines below to route all SMTP verification requests +# through a specified proxy. Note that the proxy must be a SOCKS5 proxy to work +# with the SMTP protocol. This proxy will not be used for headless +# verifications. # # The username and password are optional and only needed if the proxy requires # authentication. -# +# +# Env variables: +# - RCH__PROXY__HOST +# - RCH__PROXY__PORT +# - RCH__PROXY__USERNAME +# - RCH__PROXY__PASSWORD +# # [proxy] # host = "my.proxy.com" # port = 1080 @@ -46,8 +69,12 @@ webdriver_addr = "http://localhost:9515" # all email providers. [verif_method] # Gmail currently only supports the "smtp" method. +# +# Env variable: RCH__VERIF_METHOD__GMAIL gmail = "smtp" # Hotmail B2B currently only supports the "smtp" method. +# +# Env variable: RCH__VERIF_METHOD__HOTMAILB2B hotmailb2b = "smtp" # Hotmail B2C supports both "headless" and "smtp" methods. The "headless" # method is recommended. @@ -59,10 +86,13 @@ yahoo = "headless" [worker] # Enable the worker to consume emails from the RabbitMQ queues. If set, the # RabbitMQ configuration below must be set as well. +# +# Env variable: RCH__WORKER__ENABLE enable = true # RabbitMQ configuration. [worker.rabbitmq] +# Env variable: RCH__WORKER__RABBITMQ__URL url = "amqp://guest:guest@localhost:5672" # Queues to consume emails from. By default, the worker consumes from all @@ -78,10 +108,14 @@ url = "amqp://guest:guest@localhost:5672" # - "check.yahoo": subscribe exclusively to Yahoo emails. # - "check.everything_else": subscribe to all emails that are not Gmail, Yahoo, or Hotmail. # +# Env variable: RCH__WORKER__RABBITMQ__QUEUES +# # queues = ["check.gmail", "check.hotmail.b2b", "check.hotmail.b2c", "check.yahoo", "check.everything_else"] queues = "all" # Number of concurrent emails to verify for this worker across all queues. +# +# Env variable: RCH__WORKER__RABBITMQ__CONCURRENCY concurrency = 20 # Throttle the maximum number of requests per second, per minute, per hour, and @@ -91,6 +125,12 @@ concurrency = 20 # Important: these throttle configurations only apply to /v1/* endpoints, and # not to the previous /v0/check_email endpoint. The latter endpoint always # executes the verification immediately, regardless of the throttle settings. +# +# Env variables: +# - RCH__WORKER__THROTTLE__MAX_REQUESTS_PER_SECOND +# - RCH__WORKER__THROTTLE__MAX_REQUESTS_PER_MINUTE +# - RCH__WORKER__THROTTLE__MAX_REQUESTS_PER_HOUR +# - RCH__WORKER__THROTTLE__MAX_REQUESTS_PER_DAY [worker.throttle] # max_requests_per_second = 20 # max_requests_per_minute = 100 @@ -101,10 +141,10 @@ concurrency = 20 # the results of the verifications. This might change in the future, allowing # for pluggable storage. [worker.postgres] +# Env variable: RCH__WORKER__POSTGRES__DB_URL db_url = "postgresql://localhost/reacherdb" -# Optional Sentry configuration. If set, all errors will be sent to Sentry. -# [sentry] -# dsn = "" -# Identifier sent to Sentry, usually the same as the the top-level backend_name. -# backend_name = "backend-dev" +# Optional Sentry DSN. If set, all errors will be sent to Sentry. +# +# Env variable: RCH__SENTRY_DSN +# sentry_dsn = "" diff --git a/backend/src/config.rs b/backend/src/config.rs index 4e9337852..4afdc103c 100644 --- a/backend/src/config.rs +++ b/backend/src/config.rs @@ -22,7 +22,7 @@ use crate::worker::setup_rabbit_mq; use anyhow::bail; use check_if_email_exists::{ CheckEmailInputProxy, GmailVerifMethod, HotmailB2BVerifMethod, HotmailB2CVerifMethod, - YahooVerifMethod, + YahooVerifMethod, LOG_TARGET, }; use config::Config; #[cfg(feature = "worker")] @@ -33,6 +33,7 @@ use sqlx::PgPool; #[cfg(feature = "worker")] use std::sync::Arc; use std::{env, fmt}; +use tracing::warn; #[derive(Debug, Default, Serialize, Deserialize)] pub struct BackendConfig { @@ -370,6 +371,10 @@ pub async fn load_config() -> Result { let mut cfg = cfg.try_deserialize::()?; + if !cfg.worker.enable && (cfg.worker.rabbitmq.is_some() || cfg.worker.throttle.is_some()) { + warn!(target: LOG_TARGET, "worker.enable is set to false, ignoring throttling and concurrency settings.") + } + let pg_pool = if cfg.worker.enable { let db_url = cfg .worker diff --git a/docker-compose.yaml b/docker-compose.yaml index 3b3bbdd9c..310999239 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -24,7 +24,7 @@ services: restart: always worker1: - image: reacherhq/backend:v0.10.0-beta.3 + image: reacherhq/backend:beta container_name: worker1 ports: - "8080:8080" @@ -38,10 +38,11 @@ services: RCH__WORKER__ENABLE: true RCH__WORKER__RABBITMQ__URL: amqp://guest:guest@rabbitmq:5672 RCH__WORKER__POSTGRES__DB_URL: postgres://postgres:postgres@postgres:5432/reacher_db + RCH__WORKER__THROTTLE__MAX_REQUESTS_PER_DAY: 10000 # Recommended limit per IP per day restart: always worker2: - image: reacherhq/backend:v0.10.0-beta.3 + image: reacherhq/backend:beta container_name: worker2 ports: - "8081:8080" @@ -55,4 +56,5 @@ services: RCH__WORKER__ENABLE: true RCH__WORKER__RABBITMQ__URL: amqp://guest:guest@rabbitmq:5672 RCH__WORKER__POSTGRES__DB_URL: postgres://postgres:postgres@postgres:5432/reacher_db + RCH__WORKER__THROTTLE__MAX_REQUESTS_PER_DAY: 10000 # Recommended limit per IP per day restart: always