Skip to content

Commit

Permalink
feat: implement loadbalancing for api and be
Browse files Browse the repository at this point in the history
  • Loading branch information
ehh-why-its-so-hard committed Oct 29, 2024
1 parent 71aa3be commit 134eecd
Show file tree
Hide file tree
Showing 5 changed files with 169 additions and 4 deletions.
12 changes: 10 additions & 2 deletions roles/vega_caddy_server/defaults/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
vega_caddy_server_caddyfile_file_name: ""
vega_caddy_server_node_home: "/home/vega"

vega_caddy_server_xcaddy_version: 0.3.2
vega_caddy_server_caddy_version: v2.7.5
vega_caddy_server_xcaddy_version: 0.4.2
vega_caddy_server_caddy_version: v2.8.4

vega_caddy_server_node_id: unknown

Expand All @@ -20,3 +20,11 @@ vega_caddy_server_block_explorer_ui_domains: []
vega_caddy_server_governance_ui_domains: []
vega_caddy_server_trading_ui_domains: []
vega_caddy_server_stats_domains: []

vega_caddy_server_with_api_lb: false
vega_caddy_server_api_lb_domains: []
vega_caddy_server_api_lb_upstreams: []

vega_caddy_server_with_block_explorer_lb: false
vega_caddy_server_block_explorer_lb_domains: []
vega_caddy_server_block_explorer_lb_upstreams: []
13 changes: 11 additions & 2 deletions roles/vega_caddy_server/tasks/install-caddy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,23 @@
- name: CaddyserverV2 | Install xcaddy
ansible.builtin.apt:
deb: "https://github.com/caddyserver/xcaddy/releases/download/v{{ vega_caddy_server_xcaddy_version }}/xcaddy_{{ vega_caddy_server_xcaddy_version }}_linux_amd64.deb" # noqa: yaml[line-length]
update_cache: true
state: present

- name: Check if caddy is installed
ansible.builtin.command: "caddy version"
register: caddy_version
changed_when: false
failed_when: go_installed_version.rc != 0 and go_installed_version.rc != 127 and go_installed_version.rc != 2
check_mode: false

- name: CaddyserverV2 | Compile caddy from sources
ansible.builtin.command: |
xcaddy build {{ vega_caddy_server_caddy_version }} \
--output /usr/bin/caddy
--with github.com/caddyserver/replace-response
args:
creates: /usr/bin/caddy
when: (caddy_version.rc != 0) or (not vega_caddy_server_caddy_version in caddy_version.stdout)
changed_when: (caddy_version.rc != 0) or (not vega_caddy_server_caddy_version in caddy_version.stdout)
notify: "Restart caddy"

- name: Config | Install caddy systemd files
Expand Down
30 changes: 30 additions & 0 deletions roles/vega_caddy_server/tasks/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,33 @@
mode: "0644"
when: vega_caddy_server_with_stats and vega_caddy_server_stats_domains | length > 0
notify: "Restart caddy"

- name: Enable stats config
ansible.builtin.template:
src: "etc/caddy/sites/stats.caddy.j2"
dest: "/etc/caddy/sites/stats.caddy"
owner: "caddy"
group: "caddy"
mode: "0644"
when: vega_caddy_server_with_stats and vega_caddy_server_stats_domains | length > 0
notify: "Restart caddy"

- name: Enable api LB config
ansible.builtin.template:
src: "etc/caddy/sites/api-lb.caddy.j2"
dest: "/etc/caddy/sites/api-lb.caddy"
owner: "caddy"
group: "caddy"
mode: "0644"
when: vega_caddy_server_with_api_lb and vega_caddy_server_api_lb_domains | length > 0
notify: "Restart caddy"

- name: Enable block-explorer LB config
ansible.builtin.template:
src: "etc/caddy/sites/block-explorer-lb.caddy.j2"
dest: "/etc/caddy/sites/block-explorer-lb.caddy"
owner: "caddy"
group: "caddy"
mode: "0644"
when: vega_caddy_server_with_block_explorer_lb and vega_caddy_server_block_explorer_lb_domains | length > 0
notify: "Restart caddy"
59 changes: 59 additions & 0 deletions roles/vega_caddy_server/templates/etc/caddy/sites/api-lb.caddy.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{{- vega_caddy_server_api_lb_domains | map('ansible.builtin.regex_replace', '$', ':443') | join(', ') }} {
reverse_proxy {
{% for upstream in vega_caddy_server_api_lb_upstreams %}
to {{ upstream }}
{% endfor %}

header_up Host {upstream_hostport}
header_down X-LB-Upstream {upstream_hostport}

lb_policy cookie 26403ec6d537fa31f63e294b44831734 {
fallback random_choose 2
}

# how many times to retry selecting available backends for each request if the next available
# host is down. By default, retries are disabled (zero).
lb_retries 0

# a duration value that defines how long to wait between selecting the next host from the pool.
# Default is 250ms. Only relevant when a request to an upstream host fails. Be aware that setting
# this to 0 with a non-zero lb_try_duration can cause the CPU to spin if all backends are down
# and latency is very low.
lb_try_interval 100ms

# the port to use for active health checks, if different from the upstream's port.
health_port 443

# the URI path (and optional query) for active health checks.
health_uri /health-check

# a duration value that defines how often to perform active health checks. Default: 30s.
health_interval 3s

# the number of consecutive health checks required before marking the backend as healthy again.
# Default: 1.
health_passes 1

# the number of consecutive health checks required before marking the backend as unhealthy.
# Default: 1.
health_fails 2

# will cause the health check to follow redirects provided by upstream. By default, a redirect response
# would cause the health check to count as a fail.
health_follow_redirects

# a duration value that defines how long to wait for a reply before marking the backend as down.
# Default: 5s.
health_timeout 1s

# the HTTP status code to expect from a healthy backend. Can be a 3-digit status code, or a
# status code class ending in xx. For example: 200 (which is the default), or 2xx.
health_status 2xx

# duration value that defines how long to remember a failed request. A duration > 0 enables
# passive health checking; the default is 0 (off). A reasonable starting point might be 30s
# to balance error rates with responsiveness when bringing an unhealthy upstream back online;
# but feel free to experiment to find the right balance for your usecase.
fail_duration 30s # probably it needs to be higher because 30s may be not enough for vega to startup and there is no point to check it again
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{{- vega_caddy_server_block_explorer_lb_domains | map('ansible.builtin.regex_replace', '$', ':443') | join(', ') }} {
reverse_proxy {
{% for upstream in vega_caddy_server_block_explorer_lb_upstreams %}
to {{ upstream }}
{% endfor %}

header_up Host {upstream_hostport}
header_down X-LB-Upstream {upstream_hostport}

lb_policy cookie 26403ec6d537fa31f63e294b44831734 {
fallback random_choose 2
}

# how many times to retry selecting available backends for each request if the next available
# host is down. By default, retries are disabled (zero).
lb_retries 0

# a duration value that defines how long to wait between selecting the next host from the pool.
# Default is 250ms. Only relevant when a request to an upstream host fails. Be aware that setting
# this to 0 with a non-zero lb_try_duration can cause the CPU to spin if all backends are down
# and latency is very low.
lb_try_interval 100ms

# the port to use for active health checks, if different from the upstream's port.
health_port 443

# the URI path (and optional query) for active health checks.
health_uri /health-check

# a duration value that defines how often to perform active health checks. Default: 30s.
health_interval 3s

# the number of consecutive health checks required before marking the backend as healthy again.
# Default: 1.
health_passes 1

# the number of consecutive health checks required before marking the backend as unhealthy.
# Default: 1.
health_fails 2

# will cause the health check to follow redirects provided by upstream. By default, a redirect response
# would cause the health check to count as a fail.
health_follow_redirects

# a duration value that defines how long to wait for a reply before marking the backend as down.
# Default: 5s.
health_timeout 1s

# the HTTP status code to expect from a healthy backend. Can be a 3-digit status code, or a
# status code class ending in xx. For example: 200 (which is the default), or 2xx.
health_status 2xx

# duration value that defines how long to remember a failed request. A duration > 0 enables
# passive health checking; the default is 0 (off). A reasonable starting point might be 30s
# to balance error rates with responsiveness when bringing an unhealthy upstream back online;
# but feel free to experiment to find the right balance for your usecase.
fail_duration 30s # probably it needs to be higher because 30s may be not enough for vega to startup and there is no point to check it again
}
}

0 comments on commit 134eecd

Please sign in to comment.