From 8a41d15b6972a8dacb2758d3fb7af55ba2a260a7 Mon Sep 17 00:00:00 2001 From: Donal McBreen Date: Thu, 31 Aug 2023 10:21:57 +0100 Subject: [PATCH] Zero downtime deployment with cord file When replacing a container currently we: 1. Boot the new container 2. Wait for it to become healthy 3. Stop the old container Traefik will send requests to the old container until it notices that it is unhealthy. But it may have stopped serving requests before that point which can result in errors. To get round that the new boot process is: 1. Create a directory with a single file on the host 2. Boot the new container, mounting the cord file into /tmp and including a check for the file in the docker healthcheck 3. Wait for it to become healthy 4. Delete the healthcheck file ("cut the cord") for the old container 5. Wait for it to become unhealthy and give Traefik a couple of seconds to notice 6. Stop the old container The extra steps ensure that Traefik stops sending requests before the old container is shutdown. --- lib/kamal/cli/app.rb | 21 ++++++-- lib/kamal/commands/app.rb | 47 +++++++++------- lib/kamal/commands/base.rb | 10 ++++ lib/kamal/commands/healthcheck.rb | 2 +- lib/kamal/configuration.rb | 14 ++++- lib/kamal/configuration/role.rb | 53 ++++++++++++++++--- lib/kamal/utils/healthcheck_poller.rb | 29 +++++++++- test/cli/app_test.rb | 17 +++++- test/cli/healthcheck_test.rb | 1 + test/cli/main_test.rb | 19 ++++--- test/cli/traefik_test.rb | 2 + test/commands/app_test.rb | 41 +++++++------- test/configuration/role_test.rb | 20 +++++++ test/configuration_test.rb | 15 +++++- test/integration/docker/deployer/Dockerfile | 2 + .../docker/deployer/app/config/deploy.yml | 2 +- test/integration/main_test.rb | 2 +- 17 files changed, 233 insertions(+), 64 deletions(-) diff --git a/lib/kamal/cli/app.rb b/lib/kamal/cli/app.rb index 4892acd24..9c34e8dd7 100644 --- a/lib/kamal/cli/app.rb +++ b/lib/kamal/cli/app.rb @@ -18,8 +18,9 @@ def boot roles.each do |role| app = KAMAL.app(role: role) auditor = KAMAL.auditor(role: role) + role_config = KAMAL.config.role(role) - if capture_with_info(*app.container_id_for_version(version, only_running: true), raise_on_non_zero_exit: false).present? + if capture_with_info(*app.container_id_for_version(version), raise_on_non_zero_exit: false).present? tmp_version = "#{version}_replaced_#{SecureRandom.hex(8)}" info "Renaming container #{version} to #{tmp_version} as already deployed on #{host}" execute *auditor.record("Renaming container #{version} to #{tmp_version}"), verbosity: :debug @@ -29,11 +30,25 @@ def boot execute *auditor.record("Booted app version #{version}"), verbosity: :debug old_version = capture_with_info(*app.current_running_version, raise_on_non_zero_exit: false).strip - execute *app.start_or_run(hostname: "#{host}-#{SecureRandom.hex(6)}") + + if role_config.uses_cord? + execute *app.tie_cord(role_config.cord_host_file) + end + + execute *app.run(hostname: "#{host}-#{SecureRandom.hex(6)}") Kamal::Utils::HealthcheckPoller.wait_for_healthy(pause_after_ready: true) { capture_with_info(*app.status(version: version)) } - execute *app.stop(version: old_version), raise_on_non_zero_exit: false if old_version.present? + if old_version.present? + if role_config.uses_cord? + cord = capture_with_info(*app.cord(version: old_version), raise_on_non_zero_exit: false).strip + if cord.present? + execute *app.cut_cord(cord) + Kamal::Utils::HealthcheckPoller.wait_for_unhealthy(pause_after_ready: true) { capture_with_info(*app.status(version: old_version)) } + end + end + execute *app.stop(version: old_version), raise_on_non_zero_exit: false + end end end end diff --git a/lib/kamal/commands/app.rb b/lib/kamal/commands/app.rb index cb1f7091c..7fac2e170 100644 --- a/lib/kamal/commands/app.rb +++ b/lib/kamal/commands/app.rb @@ -1,34 +1,29 @@ class Kamal::Commands::App < Kamal::Commands::Base ACTIVE_DOCKER_STATUSES = [ :running, :restarting ] - attr_reader :role + attr_reader :role, :role_config def initialize(config, role: nil) super(config) @role = role - end - - def start_or_run(hostname: nil) - combine start, run(hostname: hostname), by: "||" + @role_config = config.role(self.role) end def run(hostname: nil) - role = config.role(self.role) - docker :run, "--detach", "--restart unless-stopped", "--name", container_name, *(["--hostname", hostname] if hostname), "-e", "KAMAL_CONTAINER_NAME=\"#{container_name}\"", - *role.env_args, - *role.health_check_args, + *role_config.env_args, + *role_config.health_check_args, *config.logging_args, *config.volume_args, - *role.label_args, - *role.option_args, + *role_config.label_args, + *role_config.option_args, config.absolute_image, - role.cmd + role_config.cmd end def start @@ -76,14 +71,12 @@ def execute_in_existing_container(*command, interactive: false) end def execute_in_new_container(*command, interactive: false) - role = config.role(self.role) - docker :run, ("-it" if interactive), "--rm", - *role&.env_args, + *role_config&.env_args, *config.volume_args, - *role&.option_args, + *role_config&.option_args, config.absolute_image, *command end @@ -112,7 +105,7 @@ def current_running_version def list_versions(*docker_args, statuses: nil) pipe \ docker(:ps, *filter_args(statuses: statuses), *docker_args, "--format", '"{{.Names}}"'), - %(while read line; do echo ${line##{service_role_dest}-}; done) # Extract SHA from "service-role-dest-SHA" + %(while read line; do echo ${line##{role_config.full_name}-}; done) # Extract SHA from "service-role-dest-SHA" end def list_containers @@ -150,16 +143,30 @@ def tag_current_as_latest end def make_env_directory - make_directory config.role(role).host_env_directory + make_directory role_config.host_env_directory end def remove_env_file - [:rm, "-f", config.role(role).host_env_file_path] + [:rm, "-f", role_config.host_env_file_path] + end + + def cord(version:) + pipe \ + docker(:inspect, "-f '{{ range .Mounts }}{{ .Source }} {{ .Destination }} {{ end }}'", container_name(version)), + [:awk, "'$2 == \"#{role_config.cord_container_directory}\" {print $1}'"] + end + + def tie_cord(cord) + create_empty_file(cord) + end + + def cut_cord(cord) + remove_directory(cord) end private def container_name(version = nil) - [ config.service, role, config.destination, version || config.version ].compact.join("-") + [ role_config.full_name, version || config.version ].compact.join("-") end def filter_args(statuses: nil) diff --git a/lib/kamal/commands/base.rb b/lib/kamal/commands/base.rb index 3058df162..ff31c7475 100644 --- a/lib/kamal/commands/base.rb +++ b/lib/kamal/commands/base.rb @@ -34,6 +34,10 @@ def make_directory(path) [ :mkdir, "-p", path ] end + def remove_directory(path) + [ :rm, "-r", path ] + end + private def combine(*commands, by: "&&") commands @@ -69,5 +73,11 @@ def docker(*args) def tags(**details) Kamal::Tags.from_config(config, **details) end + + def create_empty_file(file) + chain \ + make_directory_for(file), + [:touch, file] + end end end diff --git a/lib/kamal/commands/healthcheck.rb b/lib/kamal/commands/healthcheck.rb index 4327087dd..fa050b9cb 100644 --- a/lib/kamal/commands/healthcheck.rb +++ b/lib/kamal/commands/healthcheck.rb @@ -10,7 +10,7 @@ def run "--label", "service=#{container_name}", "-e", "KAMAL_CONTAINER_NAME=\"#{container_name}\"", *web.env_args, - *web.health_check_args, + *web.health_check_args(cord: false), *config.volume_args, *web.option_args, config.absolute_image, diff --git a/lib/kamal/configuration.rb b/lib/kamal/configuration.rb index 8db892fc5..af6e46c5f 100644 --- a/lib/kamal/configuration.rb +++ b/lib/kamal/configuration.rb @@ -61,6 +61,14 @@ def run_directory raw_config.run_directory || ".kamal" end + def run_directory_as_docker_volume + if Pathname.new(run_directory).absolute? + run_directory + else + File.join "$(pwd)", run_directory + end + end + def roles @roles ||= role_names.collect { |role_name| Role.new(role_name, config: self) } @@ -141,7 +149,7 @@ def sshkit def healthcheck - { "path" => "/up", "port" => 3000, "max_attempts" => 7, "exposed_port" => 3999 }.merge(raw_config.healthcheck || {}) + { "path" => "/up", "port" => 3000, "max_attempts" => 7, "exposed_port" => 3999, "cord" => "/tmp/kamal-cord" }.merge(raw_config.healthcheck || {}) end def readiness_delay @@ -199,6 +207,10 @@ def host_env_directory "#{run_directory}/env" end + def run_id + @run_id ||= SecureRandom.hex(16) + end + private # Will raise ArgumentError if any required config keys are missing def ensure_required_keys_present diff --git a/lib/kamal/configuration/role.rb b/lib/kamal/configuration/role.rb index f549d459b..6c57bc1dc 100644 --- a/lib/kamal/configuration/role.rb +++ b/lib/kamal/configuration/role.rb @@ -1,4 +1,5 @@ class Kamal::Configuration::Role + CORD_FILE = "cord" delegate :argumentize, :env_file_with_secrets, :optionize, to: Kamal::Utils attr_accessor :name @@ -47,28 +48,52 @@ def env_args argumentize "--env-file", host_env_file_path end - def health_check_args + def health_check_args(cord: true) if health_check_cmd.present? - optionize({ "health-cmd" => health_check_cmd, "health-interval" => health_check_interval }) + if cord && uses_cord? + optionize({ "health-cmd" => health_check_cmd_with_cord, "health-interval" => health_check_interval }) + .concat(["--volume", "#{cord_host_directory}:#{cord_container_directory}"]) + else + optionize({ "health-cmd" => health_check_cmd, "health-interval" => health_check_interval }) + end else [] end end def health_check_cmd - options = specializations["healthcheck"] || {} - options = config.healthcheck.merge(options) if running_traefik? + health_check_options["cmd"] || http_health_check(port: health_check_options["port"], path: health_check_options["path"]) + end - options["cmd"] || http_health_check(port: options["port"], path: options["path"]) + def health_check_cmd_with_cord + "(#{health_check_cmd}) && (stat #{cord_container_file} > /dev/null || exit 1)" end def health_check_interval - options = specializations["healthcheck"] || {} - options = config.healthcheck.merge(options) if running_traefik? + health_check_options["interval"] || "1s" + end + + def uses_cord? + running_traefik? && cord_container_directory.present? && health_check_cmd.present? + end + + def cord_host_directory + File.join config.run_directory_as_docker_volume, "cords", [full_name, config.run_id].join("-") + end + + def cord_host_file + File.join cord_host_directory, CORD_FILE + end + + def cord_container_directory + health_check_options.fetch("cord", nil) + end - options["interval"] || "1s" + def cord_container_file + File.join cord_container_directory, CORD_FILE end + def cmd specializations["cmd"] end @@ -85,6 +110,10 @@ def running_traefik? name.web? || specializations["traefik"] end + def full_name + [ config.service, name, config.destination ].compact.join("-") + end + private attr_accessor :config @@ -164,4 +193,12 @@ def merged_env_with_secrets def http_health_check(port:, path:) "curl -f #{URI.join("http://localhost:#{port}", path)} || exit 1" if path.present? || port.present? end + + def health_check_options + @health_check_options ||= begin + options = specializations["healthcheck"] || {} + options = config.healthcheck.merge(options) if running_traefik? + options + end + end end diff --git a/lib/kamal/utils/healthcheck_poller.rb b/lib/kamal/utils/healthcheck_poller.rb index ddb09ec65..27a2ff7d5 100644 --- a/lib/kamal/utils/healthcheck_poller.rb +++ b/lib/kamal/utils/healthcheck_poller.rb @@ -1,5 +1,5 @@ class Kamal::Utils::HealthcheckPoller - TRAEFIK_HEALTHY_DELAY = 2 + TRAEFIK_UPDATE_DELAY = 2 class HealthcheckError < StandardError; end @@ -11,7 +11,7 @@ def wait_for_healthy(pause_after_ready: false, &block) begin case status = block.call when "healthy" - sleep TRAEFIK_HEALTHY_DELAY if pause_after_ready + sleep TRAEFIK_UPDATE_DELAY if pause_after_ready when "running" # No health check configured sleep KAMAL.config.readiness_delay if pause_after_ready else @@ -31,6 +31,31 @@ def wait_for_healthy(pause_after_ready: false, &block) info "Container is healthy!" end + def wait_for_unhealthy(pause_after_ready: false, &block) + attempt = 1 + max_attempts = KAMAL.config.healthcheck["max_attempts"] + + begin + case status = block.call + when "unhealthy" + sleep TRAEFIK_UPDATE_DELAY if pause_after_ready + else + raise HealthcheckError, "container not unhealthy (#{status})" + end + rescue HealthcheckError => e + if attempt <= max_attempts + info "#{e.message}, retrying in #{attempt}s (attempt #{attempt}/#{max_attempts})..." + sleep attempt + attempt += 1 + retry + else + raise + end + end + + info "Container is unhealthy!" + end + private def info(message) SSHKit.config.output.info(message) diff --git a/test/cli/app_test.rb b/test/cli/app_test.rb index 8a90e90ab..d0e6268af 100644 --- a/test/cli/app_test.rb +++ b/test/cli/app_test.rb @@ -11,10 +11,11 @@ class CliAppTest < CliTestCase end test "boot will rename if same version is already running" do + Object.any_instance.stubs(:sleep) run_command("details") # Preheat Kamal const SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info) - .with(:docker, :container, :ls, "--filter", "name=^app-web-latest$", "--quiet", raise_on_non_zero_exit: false) + .with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-latest$", "--quiet", raise_on_non_zero_exit: false) .returns("12345678") # running version SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info) @@ -25,6 +26,14 @@ class CliAppTest < CliTestCase .with(:docker, :ps, "--filter", "label=service=app", "--filter", "label=role=web", "--filter", "status=running", "--filter", "status=restarting", "--latest", "--format", "\"{{.Names}}\"", "|", "while read line; do echo ${line#app-web-}; done", raise_on_non_zero_exit: false) .returns("123") # old version + SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info) + .with(:docker, :inspect, "-f '{{ range .Mounts }}{{ .Source }} {{ .Destination }} {{ end }}'", "app-web-123", "|", :awk, "'$2 == \"/tmp/kamal-cord\" {print $1}'", :raise_on_non_zero_exit => false) + .returns("cordfile") # old version + + SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info) + .with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-123$", "--quiet", "|", :xargs, :docker, :inspect, "--format", "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'") + .returns("unhealthy") # old version unhealthy + run_command("boot").tap do |output| assert_match /Renaming container .* to .* as already deployed on 1.1.1.1/, output # Rename assert_match /docker rename app-web-latest app-web-latest_replaced_[0-9a-f]{16}/, output @@ -180,10 +189,16 @@ def run_command(*command, config: :with_accessories) end def stub_running + Object.any_instance.stubs(:sleep) + SSHKit::Backend::Abstract.any_instance.stubs(:capture_with_info).returns("123") # old version SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info) .with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-latest$", "--quiet", "|", :xargs, :docker, :inspect, "--format", "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'") .returns("running") # health check + + SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info) + .with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-123$", "--quiet", "|", :xargs, :docker, :inspect, "--format", "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'") + .returns("unhealthy") # health check end end diff --git a/test/cli/healthcheck_test.rb b/test/cli/healthcheck_test.rb index f9c3aa9c0..b07c4900c 100644 --- a/test/cli/healthcheck_test.rb +++ b/test/cli/healthcheck_test.rb @@ -6,6 +6,7 @@ class CliHealthcheckTest < CliTestCase Thread.report_on_exception = false Kamal::Utils::HealthcheckPoller.stubs(:sleep) # No sleeping when retrying + Kamal::Configuration.any_instance.stubs(:run_id).returns("12345678901234567890123456789012") SSHKit::Backend::Abstract.any_instance.stubs(:execute) .with(:docker, :container, :ls, "--all", "--filter", "name=^healthcheck-app-999$", "--quiet", "|", :xargs, :docker, :stop, raise_on_non_zero_exit: false) diff --git a/test/cli/main_test.rb b/test/cli/main_test.rb index 04f115750..28ef97ffc 100644 --- a/test/cli/main_test.rb +++ b/test/cli/main_test.rb @@ -176,9 +176,10 @@ class CliMainTest < CliTestCase end test "rollback good version" do + Object.any_instance.stubs(:sleep) [ "web", "workers" ].each do |role| SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info) - .with(:docker, :container, :ls, "--filter", "name=^app-#{role}-123$", "--quiet", raise_on_non_zero_exit: false) + .with(:docker, :container, :ls, "--all", "--filter", "name=^app-#{role}-123$", "--quiet", raise_on_non_zero_exit: false) .returns("").at_least_once SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info) .with(:docker, :container, :ls, "--all", "--filter", "name=^app-#{role}-123$", "--quiet") @@ -191,14 +192,21 @@ class CliMainTest < CliTestCase .returns("running").at_least_once # health check end + SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info) + .with(:docker, :inspect, "-f '{{ range .Mounts }}{{ .Source }} {{ .Destination }} {{ end }}'", "app-web-version-to-rollback", "|", :awk, "'$2 == \"/tmp/kamal-cord\" {print $1}'", :raise_on_non_zero_exit => false) + .returns("corddirectory").at_least_once # health check + + SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info) + .with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-version-to-rollback$", "--quiet", "|", :xargs, :docker, :inspect, "--format", "'{{if .State.Health}}{{.State.Health.Status}}{{else}}{{.State.Status}}{{end}}'") + .returns("unhealthy").at_least_once # health check + Kamal::Commands::Hook.any_instance.stubs(:hook_exists?).returns(true) hook_variables = { version: 123, service_version: "app@123", hosts: "1.1.1.1,1.1.1.2,1.1.1.3,1.1.1.4", command: "rollback" } run_command("rollback", "123", config_file: "deploy_with_accessories").tap do |output| - assert_match "Start container with version 123", output assert_hook_ran "pre-deploy", output, **hook_variables assert_match "docker tag dhh/app:123 dhh/app:latest", output - assert_match "docker start app-web-123", output + assert_match "docker run --detach --restart unless-stopped --name app-web-123", output assert_match "docker container ls --all --filter name=^app-web-version-to-rollback$ --quiet | xargs docker stop", output, "Should stop the container that was previously running" assert_hook_ran "post-deploy", output, **hook_variables, runtime: "0" end @@ -210,7 +218,7 @@ class CliMainTest < CliTestCase Kamal::Utils::HealthcheckPoller.stubs(:sleep) SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info) - .with(:docker, :container, :ls, "--filter", "name=^app-web-123$", "--quiet", raise_on_non_zero_exit: false) + .with(:docker, :container, :ls, "--all", "--filter", "name=^app-web-123$", "--quiet", raise_on_non_zero_exit: false) .returns("").at_least_once SSHKit::Backend::Abstract.any_instance.expects(:capture_with_info) .with(:docker, :ps, "--filter", "label=service=app", "--filter", "label=role=web", "--filter", "status=running", "--filter", "status=restarting", "--latest", "--format", "\"{{.Names}}\"", "|", "while read line; do echo ${line#app-web-}; done", raise_on_non_zero_exit: false) @@ -220,8 +228,7 @@ class CliMainTest < CliTestCase .returns("running").at_least_once # health check run_command("rollback", "123").tap do |output| - assert_match "Start container with version 123", output - assert_match "docker start app-web-123 || docker run --detach --restart unless-stopped --name app-web-123", output + assert_match "docker run --detach --restart unless-stopped --name app-web-123", output assert_no_match "docker stop", output end end diff --git a/test/cli/traefik_test.rb b/test/cli/traefik_test.rb index 1b19b0331..6c6fbf641 100644 --- a/test/cli/traefik_test.rb +++ b/test/cli/traefik_test.rb @@ -19,6 +19,8 @@ class CliTraefikTest < CliTestCase end test "reboot --rolling" do + Object.any_instance.stubs(:sleep) + run_command("reboot", "--rolling").tap do |output| assert_match "Running docker container prune --force --filter label=org.opencontainers.image.title=Traefik on 1.1.1.1", output end diff --git a/test/commands/app_test.rb b/test/commands/app_test.rb index ab3f6ace8..7b769c6df 100644 --- a/test/commands/app_test.rb +++ b/test/commands/app_test.rb @@ -3,6 +3,7 @@ class CommandsAppTest < ActiveSupport::TestCase setup do ENV["RAILS_MASTER_KEY"] = "456" + Kamal::Configuration.any_instance.stubs(:run_id).returns("12345678901234567890123456789012") @config = { service: "app", image: "dhh/app", registry: { "username" => "dhh", "password" => "secret" }, servers: [ "1.1.1.1" ], env: { "secret" => [ "RAILS_MASTER_KEY" ] } } end @@ -13,13 +14,13 @@ class CommandsAppTest < ActiveSupport::TestCase test "run" do assert_equal \ - "docker run --detach --restart unless-stopped --name app-web-999 -e KAMAL_CONTAINER_NAME=\"app-web-999\" --env-file .kamal/env/roles/app-web.env --health-cmd \"curl -f http://localhost:3000/up || exit 1\" --health-interval \"1s\" --log-opt max-size=\"10m\" --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999", + "docker run --detach --restart unless-stopped --name app-web-999 -e KAMAL_CONTAINER_NAME=\"app-web-999\" --env-file .kamal/env/roles/app-web.env --health-cmd \"(curl -f http://localhost:3000/up || exit 1) && (stat /tmp/kamal-cord/cord > /dev/null || exit 1)\" --health-interval \"1s\" --volume $(pwd)/.kamal/cords/app-web-12345678901234567890123456789012:/tmp/kamal-cord --log-opt max-size=\"10m\" --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999", new_command.run.join(" ") end test "run with hostname" do assert_equal \ - "docker run --detach --restart unless-stopped --name app-web-999 --hostname myhost -e KAMAL_CONTAINER_NAME=\"app-web-999\" --env-file .kamal/env/roles/app-web.env --health-cmd \"curl -f http://localhost:3000/up || exit 1\" --health-interval \"1s\" --log-opt max-size=\"10m\" --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999", + "docker run --detach --restart unless-stopped --name app-web-999 --hostname myhost -e KAMAL_CONTAINER_NAME=\"app-web-999\" --env-file .kamal/env/roles/app-web.env --health-cmd \"(curl -f http://localhost:3000/up || exit 1) && (stat /tmp/kamal-cord/cord > /dev/null || exit 1)\" --health-interval \"1s\" --volume $(pwd)/.kamal/cords/app-web-12345678901234567890123456789012:/tmp/kamal-cord --log-opt max-size=\"10m\" --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999", new_command.run(hostname: "myhost").join(" ") end @@ -27,7 +28,7 @@ class CommandsAppTest < ActiveSupport::TestCase @config[:volumes] = ["/local/path:/container/path" ] assert_equal \ - "docker run --detach --restart unless-stopped --name app-web-999 -e KAMAL_CONTAINER_NAME=\"app-web-999\" --env-file .kamal/env/roles/app-web.env --health-cmd \"curl -f http://localhost:3000/up || exit 1\" --health-interval \"1s\" --log-opt max-size=\"10m\" --volume /local/path:/container/path --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999", + "docker run --detach --restart unless-stopped --name app-web-999 -e KAMAL_CONTAINER_NAME=\"app-web-999\" --env-file .kamal/env/roles/app-web.env --health-cmd \"(curl -f http://localhost:3000/up || exit 1) && (stat /tmp/kamal-cord/cord > /dev/null || exit 1)\" --health-interval \"1s\" --volume $(pwd)/.kamal/cords/app-web-12345678901234567890123456789012:/tmp/kamal-cord --log-opt max-size=\"10m\" --volume /local/path:/container/path --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999", new_command.run.join(" ") end @@ -35,7 +36,7 @@ class CommandsAppTest < ActiveSupport::TestCase @config[:healthcheck] = { "path" => "/healthz" } assert_equal \ - "docker run --detach --restart unless-stopped --name app-web-999 -e KAMAL_CONTAINER_NAME=\"app-web-999\" --env-file .kamal/env/roles/app-web.env --health-cmd \"curl -f http://localhost:3000/healthz || exit 1\" --health-interval \"1s\" --log-opt max-size=\"10m\" --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999", + "docker run --detach --restart unless-stopped --name app-web-999 -e KAMAL_CONTAINER_NAME=\"app-web-999\" --env-file .kamal/env/roles/app-web.env --health-cmd \"(curl -f http://localhost:3000/healthz || exit 1) && (stat /tmp/kamal-cord/cord > /dev/null || exit 1)\" --health-interval \"1s\" --volume $(pwd)/.kamal/cords/app-web-12345678901234567890123456789012:/tmp/kamal-cord --log-opt max-size=\"10m\" --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999", new_command.run.join(" ") end @@ -43,7 +44,7 @@ class CommandsAppTest < ActiveSupport::TestCase @config[:healthcheck] = { "cmd" => "/bin/up" } assert_equal \ - "docker run --detach --restart unless-stopped --name app-web-999 -e KAMAL_CONTAINER_NAME=\"app-web-999\" --env-file .kamal/env/roles/app-web.env --health-cmd \"/bin/up\" --health-interval \"1s\" --log-opt max-size=\"10m\" --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999", + "docker run --detach --restart unless-stopped --name app-web-999 -e KAMAL_CONTAINER_NAME=\"app-web-999\" --env-file .kamal/env/roles/app-web.env --health-cmd \"(/bin/up) && (stat /tmp/kamal-cord/cord > /dev/null || exit 1)\" --health-interval \"1s\" --volume $(pwd)/.kamal/cords/app-web-12345678901234567890123456789012:/tmp/kamal-cord --log-opt max-size=\"10m\" --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999", new_command.run.join(" ") end @@ -51,7 +52,7 @@ class CommandsAppTest < ActiveSupport::TestCase @config[:servers] = { "web" => { "hosts" => [ "1.1.1.1" ], "healthcheck" => { "cmd" => "/bin/healthy" } } } assert_equal \ - "docker run --detach --restart unless-stopped --name app-web-999 -e KAMAL_CONTAINER_NAME=\"app-web-999\" --env-file .kamal/env/roles/app-web.env --health-cmd \"/bin/healthy\" --health-interval \"1s\" --log-opt max-size=\"10m\" --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999", + "docker run --detach --restart unless-stopped --name app-web-999 -e KAMAL_CONTAINER_NAME=\"app-web-999\" --env-file .kamal/env/roles/app-web.env --health-cmd \"(/bin/healthy) && (stat /tmp/kamal-cord/cord > /dev/null || exit 1)\" --health-interval \"1s\" --volume $(pwd)/.kamal/cords/app-web-12345678901234567890123456789012:/tmp/kamal-cord --log-opt max-size=\"10m\" --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999", new_command.run.join(" ") end @@ -66,7 +67,7 @@ class CommandsAppTest < ActiveSupport::TestCase @config[:logging] = { "driver" => "local", "options" => { "max-size" => "100m", "max-file" => "3" } } assert_equal \ - "docker run --detach --restart unless-stopped --name app-web-999 -e KAMAL_CONTAINER_NAME=\"app-web-999\" --env-file .kamal/env/roles/app-web.env --health-cmd \"curl -f http://localhost:3000/up || exit 1\" --health-interval \"1s\" --log-driver \"local\" --log-opt max-size=\"100m\" --log-opt max-file=\"3\" --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999", + "docker run --detach --restart unless-stopped --name app-web-999 -e KAMAL_CONTAINER_NAME=\"app-web-999\" --env-file .kamal/env/roles/app-web.env --health-cmd \"(curl -f http://localhost:3000/up || exit 1) && (stat /tmp/kamal-cord/cord > /dev/null || exit 1)\" --health-interval \"1s\" --volume $(pwd)/.kamal/cords/app-web-12345678901234567890123456789012:/tmp/kamal-cord --log-driver \"local\" --log-opt max-size=\"100m\" --log-opt max-file=\"3\" --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999", new_command.run.join(" ") end @@ -83,18 +84,6 @@ class CommandsAppTest < ActiveSupport::TestCase new_command.start.join(" ") end - test "start_or_run" do - assert_equal \ - "docker start app-web-999 || docker run --detach --restart unless-stopped --name app-web-999 -e KAMAL_CONTAINER_NAME=\"app-web-999\" --env-file .kamal/env/roles/app-web.env --health-cmd \"curl -f http://localhost:3000/up || exit 1\" --health-interval \"1s\" --log-opt max-size=\"10m\" --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999", - new_command.start_or_run.join(" ") - end - - test "start_or_run with hostname" do - assert_equal \ - "docker start app-web-999 || docker run --detach --restart unless-stopped --name app-web-999 --hostname myhost -e KAMAL_CONTAINER_NAME=\"app-web-999\" --env-file .kamal/env/roles/app-web.env --health-cmd \"curl -f http://localhost:3000/up || exit 1\" --health-interval \"1s\" --log-opt max-size=\"10m\" --label service=\"app\" --label role=\"web\" --label traefik.http.services.app-web.loadbalancer.server.scheme=\"http\" --label traefik.http.routers.app-web.rule=\"PathPrefix(\\`/\\`)\" --label traefik.http.middlewares.app-web-retry.retry.attempts=\"5\" --label traefik.http.middlewares.app-web-retry.retry.initialinterval=\"500ms\" --label traefik.http.routers.app-web.middlewares=\"app-web-retry@docker\" dhh/app:999", - new_command.start_or_run(hostname: "myhost").join(" ") - end - test "stop" do assert_equal \ "docker ps --quiet --filter label=service=app --filter label=role=web --filter status=running --filter status=restarting --latest | xargs docker stop", @@ -342,6 +331,20 @@ class CommandsAppTest < ActiveSupport::TestCase assert_equal "rm -f .kamal/env/roles/app-web.env", new_command.remove_env_file.join(" ") end + test "cord" do + assert_equal "docker inspect -f '{{ range .Mounts }}{{ .Source }} {{ .Destination }} {{ end }}' app-web-123 | awk '$2 == \"/tmp/kamal-cord\" {print $1}'", new_command.cord(version: 123).join(" ") + end + + test "tie cord" do + assert_equal "mkdir -p . ; touch cordfile", new_command.tie_cord("cordfile").join(" ") + assert_equal "mkdir -p corddir ; touch corddir/cordfile", new_command.tie_cord("corddir/cordfile").join(" ") + assert_equal "mkdir -p /corddir ; touch /corddir/cordfile", new_command.tie_cord("/corddir/cordfile").join(" ") + end + + test "cut cord" do + assert_equal "rm -r corddir", new_command.cut_cord("corddir").join(" ") + end + private def new_command(role: "web") Kamal::Commands::App.new(Kamal::Configuration.new(@config, destination: @destination, version: "999"), role: role) diff --git a/test/configuration/role_test.rb b/test/configuration/role_test.rb index b29ac2b53..650039794 100644 --- a/test/configuration/role_test.rb +++ b/test/configuration/role_test.rb @@ -175,4 +175,24 @@ class ConfigurationRoleTest < ActiveSupport::TestCase assert_equal ".kamal/env/roles/app-workers.env", @config_with_roles.role(:workers).host_env_file_path end + test "uses cord" do + assert @config_with_roles.role(:web).uses_cord? + assert !@config_with_roles.role(:workers).uses_cord? + end + + test "cord host directory" do + assert_match %r{\$\(pwd\)/.kamal/cords/app-web-[0-9a-f]{32}}, @config_with_roles.role(:web).cord_host_directory + end + + test "cord host file" do + assert_match %r{\$\(pwd\)/.kamal/cords/app-web-[0-9a-f]{32}/cord}, @config_with_roles.role(:web).cord_host_file + end + + test "cord container directory" do + assert_equal "/tmp/kamal-cord", @config_with_roles.role(:web).cord_container_directory + end + + test "cord container file" do + assert_equal "/tmp/kamal-cord/cord", @config_with_roles.role(:web).cord_container_file + end end diff --git a/test/configuration_test.rb b/test/configuration_test.rb index d31e0dbdf..59046760c 100644 --- a/test/configuration_test.rb +++ b/test/configuration_test.rb @@ -224,7 +224,7 @@ class ConfigurationTest < ActiveSupport::TestCase :volume_args=>["--volume", "/local/path:/container/path"], :builder=>{}, :logging=>["--log-opt", "max-size=\"10m\""], - :healthcheck=>{ "path"=>"/up", "port"=>3000, "max_attempts" => 7, "exposed_port" => 3999 }} + :healthcheck=>{ "path"=>"/up", "port"=>3000, "max_attempts" => 7, "exposed_port" => 3999, "cord" => "/tmp/kamal-cord" }} assert_equal expected_config, @config.to_h end @@ -252,4 +252,17 @@ class ConfigurationTest < ActiveSupport::TestCase config = Kamal::Configuration.new(@deploy.merge!(run_directory: "/root/kamal")) assert_equal "/root/kamal", config.run_directory end + + test "run directory as docker volume" do + config = Kamal::Configuration.new(@deploy) + assert_equal "$(pwd)/.kamal", config.run_directory_as_docker_volume + + config = Kamal::Configuration.new(@deploy.merge!(run_directory: "/root/kamal")) + assert_equal "/root/kamal", config.run_directory_as_docker_volume + end + + test "run id" do + SecureRandom.expects(:hex).with(16).returns("09876543211234567890098765432112") + assert_equal "09876543211234567890098765432112", @config.run_id + end end diff --git a/test/integration/docker/deployer/Dockerfile b/test/integration/docker/deployer/Dockerfile index b964e0e3d..2b49fcb08 100644 --- a/test/integration/docker/deployer/Dockerfile +++ b/test/integration/docker/deployer/Dockerfile @@ -2,6 +2,8 @@ FROM ruby:3.2 WORKDIR /app +ENV VERBOSE=true + RUN apt-get update --fix-missing && apt-get install -y ca-certificates openssh-client curl gnupg docker.io RUN install -m 0755 -d /etc/apt/keyrings diff --git a/test/integration/docker/deployer/app/config/deploy.yml b/test/integration/docker/deployer/app/config/deploy.yml index 6ecb94b31..fe4a26ff7 100644 --- a/test/integration/docker/deployer/app/config/deploy.yml +++ b/test/integration/docker/deployer/app/config/deploy.yml @@ -18,7 +18,7 @@ builder: args: COMMIT_SHA: <%= `git rev-parse HEAD` %> healthcheck: - cmd: wget -qO- http://localhost > /dev/null + cmd: wget -qO- http://localhost > /dev/null || exit 1 traefik: args: accesslog: true diff --git a/test/integration/main_test.rb b/test/integration/main_test.rb index 08e2567f6..e10d9e526 100644 --- a/test/integration/main_test.rb +++ b/test/integration/main_test.rb @@ -54,7 +54,7 @@ class MainTest < IntegrationTest assert_equal({ user: "root", auth_methods: [ "publickey" ], keepalive: true, keepalive_interval: 30, log_level: :fatal }, config[:ssh_options]) assert_equal({ "multiarch" => false, "args" => { "COMMIT_SHA" => version } }, config[:builder]) assert_equal [ "--log-opt", "max-size=\"10m\"" ], config[:logging] - assert_equal({ "path" => "/up", "port" => 3000, "max_attempts" => 7, "exposed_port" => 3999, "cmd" => "wget -qO- http://localhost > /dev/null" }, config[:healthcheck]) + assert_equal({ "path" => "/up", "port" => 3000, "max_attempts" => 7, "exposed_port" => 3999, "cord"=>"/tmp/kamal-cord", "cmd"=>"wget -qO- http://localhost > /dev/null || exit 1" }, config[:healthcheck]) end private