From 5971696b9a5341ae5798405291751c109cd97b4e Mon Sep 17 00:00:00 2001 From: Varik Matevosyan Date: Wed, 13 Nov 2024 13:08:13 +0400 Subject: [PATCH] pg17 upgrade (#87) * remove gcr creds and use public image * fix lantern extras installation instructions * keep pg_version information in lantern_resource * add pg_upgrade script * fix pg_upgrade script, add dns failover mechanism and set it as default * improve dns switchover * reduce wait time when stopping container on switchover * correctly drop replication slot and publications after replica destroyed * wait for logical replication lag to be 0 before doing switchover * setup ssl on fork to not waste time on switchover * fix ssl cert check issue * fixes and improvements on adding domain and ssl setup * add rollback switchover functionality in misc operations --- config.rb | 4 +- .../20241107_lantern_resource_pg_version.rb | 10 + misc/misc_operations.rb | 26 +- model/lantern/lantern_resource.rb | 45 +++- model/lantern/lantern_server.rb | 61 ++++- prog/lantern/lantern_resource_nexus.rb | 114 +++++++-- prog/lantern/lantern_server_nexus.rb | 134 ++++++---- prog/lantern/lantern_timeline_nexus.rb | 2 +- rhizome/lantern/bin/configure | 9 +- rhizome/lantern/bin/run_pg_upgrade | 50 ++++ rhizome/lantern/bin/update_docker_image | 5 +- rhizome/lantern/bin/update_extras | 10 +- rhizome/lantern/lib/common.rb | 46 ++-- routes/api/project/location/lantern.rb | 7 +- routes/web/project/location/lantern.rb | 6 +- spec/model/lantern/lantern_resource_spec.rb | 67 +++-- spec/model/lantern/lantern_server_spec.rb | 155 ++++++++++-- .../lantern/lantern_resource_nexus_spec.rb | 200 +++++++++++++-- .../prog/lantern/lantern_server_nexus_spec.rb | 230 +++++++++++++----- .../lantern/lantern_timeline_nexus_spec.rb | 1 + .../api/project/location/lantern_spec.rb | 2 +- 21 files changed, 947 insertions(+), 237 deletions(-) create mode 100644 migrate/20241107_lantern_resource_pg_version.rb create mode 100755 rhizome/lantern/bin/run_pg_upgrade diff --git a/config.rb b/config.rb index 099a23d5c..8e8ae96bc 100644 --- a/config.rb +++ b/config.rb @@ -136,13 +136,12 @@ def self.e2e_test? # GCP override :gcp_project_id, "lantern-development", string override :gcp_compute_service_account, "339254316100-compute@developer.gserviceaccount.com", string - optional :gcp_creds_gcr_b64, string optional :gcp_creds_logging_b64, string optional :gcp_creds_coredumps_b64, string optional :gcp_creds_walg_b64, string optional :prom_password, string override :gcp_default_image, "projects/ubuntu-os-cloud/global/images/ubuntu-2204-jammy-v20240319", string - override :gcr_image, "gcr.io/ringed-griffin-394922/lantern-bitnami" + override :gcr_image, "lanterndata/lantern-self-hosted" # Lantern override :lantern_top_domain, "db.lantern.dev", string @@ -153,6 +152,7 @@ def self.e2e_test? override :lantern_backup_bucket, "walg-dev-backups" override :e2e_test, "0" override :backup_retention_days, 7, int + override :backup_retention_days_after_deletion, 0, int override :lantern_log_dataset, "lantern_logs", string override :compose_file, "/var/lib/lantern/docker-compose.yaml", string diff --git a/migrate/20241107_lantern_resource_pg_version.rb b/migrate/20241107_lantern_resource_pg_version.rb new file mode 100644 index 000000000..ec000df4b --- /dev/null +++ b/migrate/20241107_lantern_resource_pg_version.rb @@ -0,0 +1,10 @@ +# frozen_string_literal: true + +Sequel.migration do + change do + alter_table(:lantern_resource) do + add_column :pg_version, Integer, default: 17 + end + run "UPDATE lantern_resource SET pg_version=15" + end +end diff --git a/misc/misc_operations.rb b/misc/misc_operations.rb index 0891ef886..4cebf2eaf 100644 --- a/misc/misc_operations.rb +++ b/misc/misc_operations.rb @@ -222,7 +222,6 @@ def self.create_image(lantern_version: "0.2.7", extras_version: "0.1.5", minor_v rm -rf /tmp/get-docker.sh sudo sed -i 's/ulimit -Hn/ulimit -n/' /etc/init.d/docker sudo service docker restart -echo #{Config.gcp_creds_gcr_b64} | base64 -d | sudo docker login -u _json_key --password-stdin https://gcr.io sudo docker pull #{container_image} sudo docker logout history -cw @@ -241,4 +240,29 @@ def self.create_image(lantern_version: "0.2.7", extras_version: "0.1.5", minor_v puts "Image created" vm.incr_destroy end + + def self.rollback_switchover(current_resource, old_resource) + # stop current one and start old one + begin + current_resource.representative_server.stop_container(1) + rescue + end + + old_resource.representative_server.start_container + + # update dns + cf_client = Dns::Cloudflare.new + cf_client.upsert_dns_record(current_resource.representative_server.domain, old_resource.representative_server.vm.sshable.host) + old_resource.representative_server.update(domain: current_resource.representative_server.domain) + current_resource.representative_server.update(domain: nil) + + # disable readonly as soon as it is started + loop do + old_resource.representative_server.run_query("SELECT 1") + old_resource.set_to_readonly(status: "off") + break + rescue + sleep 10 + end + end end diff --git a/model/lantern/lantern_resource.rb b/model/lantern/lantern_resource.rb index 3cca21879..9fb4fb15a 100644 --- a/model/lantern/lantern_resource.rb +++ b/model/lantern/lantern_resource.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require "uri" require_relative "../../model" class LanternResource < Sequel::Model @@ -21,7 +22,7 @@ class LanternResource < Sequel::Model include Authorization::HyperTagMethods include Authorization::TaggableMethods - semaphore :destroy, :swap_leaders_with_parent + semaphore :destroy, :swap_leaders_with_parent, :switchover_with_parent plugin :column_encryption do |enc| enc.column :superuser_password @@ -74,8 +75,13 @@ def dissociate_forks def setup_service_account api = Hosting::GcpApis.new service_account = api.create_service_account("lt-#{ubid}", "Service Account for Lantern #{name}") - key = api.export_service_account_key(service_account["email"]) - update(gcp_creds_b64: key, service_account_name: service_account["email"]) + update(service_account_name: service_account["email"]) + end + + def export_service_account_key + api = Hosting::GcpApis.new + key = api.export_service_account_key(service_account_name) + update(gcp_creds_b64: key) end def allow_timeline_access_to_bucket @@ -104,6 +110,10 @@ def delete_replication_slot(name) representative_server.run_query("SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots WHERE slot_name='#{name}';") end + def get_logical_replication_lag(slot_name) + representative_server.run_query("SELECT (pg_current_wal_lsn() - confirmed_flush_lsn) FROM pg_catalog.pg_replication_slots WHERE slot_name = '#{slot_name}'").chomp.to_i + end + def create_ddl_log commands = < 80 * 1024 * 1024 # 80 MB or ~5 WAL files lantern_server.update(synchronization_status: "ready") - lantern_server.resource.delete_replication_slot(lantern_server.ubid) if !lantern_server.domain && !lantern_server.resource.representative_server.domain.nil? - add_domain_to_stack(lantern_server.resource.representative_server.domain) + lantern_server.add_domain_to_stack(lantern_server.resource.representative_server.domain, strand) incr_setup_ssl end @@ -202,6 +201,8 @@ def before_run end end + current_frame = strand.stack.first + if !is_in_recovery timeline_id = Prog::Lantern::LanternTimelineNexus.assemble(parent_id: lantern_server.timeline.id).id lantern_server.timeline_id = timeline_id @@ -222,6 +223,14 @@ def before_run incr_update_extras_extension lantern_server.update(extras_version: extras_version) end + elsif !current_frame["pg_upgrade"].nil? + incr_run_pg_upgrade + end + + if lantern_server.resource.logical_replication && !lantern_server.resource.parent.representative_server.domain.nil? + # prepare for fast switchover + lantern_server.add_domain_to_stack(lantern_server.resource.parent.representative_server.domain, strand) + incr_setup_ssl end hop_wait_timeline_available @@ -230,6 +239,50 @@ def before_run nap 5 end + label def run_pg_upgrade + decr_run_pg_upgrade + lantern_server.resource.drop_ddl_log_trigger + pg_upgrade_info = strand.stack.first["pg_upgrade"] + vm.sshable.cmd( + "common/bin/daemonizer 'sudo lantern/bin/run_pg_upgrade' pg_upgrade", + stdin: JSON.generate({ + container_image: lantern_server.container_image( + pg_upgrade_info["lantern_version"], + pg_upgrade_info["extras_version"], + pg_upgrade_info["minor_version"] + ), + pg_version: pg_upgrade_info["pg_version"], + old_pg_version: lantern_server.resource.pg_version + }) + ) + hop_wait_pg_upgrade + end + + label def wait_pg_upgrade + current_frame = strand.stack.first + case vm.sshable.cmd("common/bin/daemonizer --check pg_upgrade") + when "Succeeded" + pg_upgrade_info = current_frame["pg_upgrade"] + lantern_server.resource.update(pg_version: pg_upgrade_info["pg_version"]) + lantern_server.update( + lantern_version: pg_upgrade_info["lantern_version"], + extras_version: pg_upgrade_info["extras_version"], + minor_version: pg_upgrade_info["minor_version"] + ) + current_frame.delete("pg_upgrade") + strand.modified!(:stack) + strand.save_changes + register_deadline(:wait, 40 * 60) + hop_init_sql + when "Failed" + logs = JSON.parse(vm.sshable.cmd("common/bin/daemonizer --logs pg_upgrade")) + Clog.emit("Postgres upgrade failed") { {logs: logs, name: lantern_server.resource.name, lantern_server: lantern_server.id} } + Prog::PageNexus.assemble_with_logs("Postgres update failed on #{lantern_server.resource.name} (#{lantern_server.resource.label})", [lantern_server.resource.ubid, lantern_server.ubid], logs, "critical", "LanternPGUpgradeFailed", lantern_server.ubid) + hop_wait + end + nap 10 + end + label def wait_timeline_available nap 10 if lantern_server.timeline.strand.label == "start" lantern_server.update_walg_creds @@ -307,7 +360,6 @@ def before_run hop_update_lantern_extension when "NotStarted" vm.sshable.cmd("common/bin/daemonizer 'sudo lantern/bin/update_docker_image' update_docker_image", stdin: JSON.generate({ - gcp_creds_gcr_b64: Config.gcp_creds_gcr_b64, container_image: lantern_server.container_image })) when "Failed" @@ -327,48 +379,23 @@ def before_run end cf_client = Dns::Cloudflare.new - begin - cf_client.upsert_dns_record(frame["domain"], lantern_server.vm.sshable.host) - rescue => e - Clog.emit("Error while adding domain") { {error: e} } - decr_add_domain - hop_wait - end + cf_client.upsert_dns_record(frame["domain"], lantern_server.vm.sshable.host) lantern_server.update(domain: frame["domain"]) - remove_domain_from_stack + lantern_server.remove_domain_from_stack(strand) decr_add_domain register_deadline(:wait, 5 * 60) hop_setup_ssl end - def destroy_domain - cf_client = Dns::Cloudflare.new - cf_client.delete_dns_record(lantern_server.domain) - end - - def add_domain_to_stack(domain) - current_frame = strand.stack.first - current_frame["domain"] = domain - strand.modified!(:stack) - strand.save_changes - end - - def remove_domain_from_stack - current_frame = strand.stack.first - current_frame.delete("domain") - strand.modified!(:stack) - strand.save_changes - end - label def setup_ssl case vm.sshable.cmd("common/bin/daemonizer --check setup_ssl") when "Succeeded" vm.sshable.cmd("common/bin/daemonizer --clean setup_ssl") decr_setup_ssl - remove_domain_from_stack + lantern_server.remove_domain_from_stack(strand) hop_wait_db_available when "NotStarted" vm.sshable.cmd("common/bin/daemonizer 'sudo lantern/bin/setup_ssl' setup_ssl", stdin: JSON.generate({ @@ -382,7 +409,7 @@ def remove_domain_from_stack Clog.emit("Lantern SSL Setup Failed for #{lantern_server.resource.name}") { {logs: logs, name: lantern_server.resource.name, lantern_server: lantern_server.id} } Prog::PageNexus.assemble_with_logs("Lantern SSL Setup Failed for #{lantern_server.resource.name}", [lantern_server.resource.ubid, lantern_server.ubid], logs, "error", "LanternSSLSetupFailed", lantern_server.ubid) vm.sshable.cmd("common/bin/daemonizer --clean setup_ssl") - remove_domain_from_stack + lantern_server.remove_domain_from_stack(strand) decr_setup_ssl hop_wait end @@ -424,6 +451,10 @@ def remove_domain_from_stack end end + when_run_pg_upgrade_set? do + hop_run_pg_upgrade + end + when_update_user_password_set? do hop_update_user_password end @@ -529,6 +560,18 @@ def remove_domain_from_stack hop_promote_server end + label def wait_swap_dns + # wait until ip change will propogate + begin + nap 5 if !lantern_server.is_dns_correct? + lantern_server.run_query("SELECT 1") + rescue + nap 5 + end + + hop_promote_server + end + label def take_over decr_take_over if !lantern_server.standby? @@ -539,7 +582,12 @@ def remove_domain_from_stack # put the old server in container_stopped mode, so no healthcheck will be done lantern_server.resource.representative_server.incr_container_stopped - hop_swap_ip + hop_swap_dns + end + + label def swap_dns + lantern_server.swap_dns(lantern_server.resource.representative_server) + hop_wait end label def swap_ip @@ -591,17 +639,13 @@ def remove_domain_from_stack strand.children.each { _1.destroy } if !lantern_server.domain.nil? - destroy_domain + lantern_server.destroy_domain end if lantern_server.primary? lantern_server.timeline.incr_destroy - else - begin - lantern_server.resource.delete_replication_slot(lantern_server.ubid) - rescue - end end + lantern_server.destroy vm.incr_destroy diff --git a/prog/lantern/lantern_timeline_nexus.rb b/prog/lantern/lantern_timeline_nexus.rb index 0ec207569..298959d4f 100644 --- a/prog/lantern/lantern_timeline_nexus.rb +++ b/prog/lantern/lantern_timeline_nexus.rb @@ -87,7 +87,7 @@ def before_run label def destroy when_destroy_set? do decr_destroy - nap 60 * 60 * 24 * 30 # 30 days + nap Config.backup_retention_days_after_deletion * 60 * 60 * 24 end destroy_blob_storage diff --git a/rhizome/lantern/bin/configure b/rhizome/lantern/bin/configure index 7952a3d07..eaae037cf 100755 --- a/rhizome/lantern/bin/configure +++ b/rhizome/lantern/bin/configure @@ -7,9 +7,6 @@ require_relative "../../common/lib/util" require_relative "../lib/common" $configure_hash = JSON.parse($stdin.read) -File.open("test.json", "a") do |f| - f.puts($configure_hash.to_json) -end def install_dependencies # Install dependencies @@ -112,7 +109,7 @@ def setup_initial_compose_file "ports" => ["5432:5432", "6432:6432"], "volumes" => ["/var/lib/lantern-data:/bitnami/postgresql"], "deploy" => { - "restart_policy" => {"condition" => "always"}, + "restart_policy" => {"condition" => "on-failure"}, "resources" => {"limits" => {"memory" => memory_sizes[:shared_bufs]}} }, "env_file" => $env_file, @@ -148,7 +145,7 @@ end if $configure_hash[:skip_deps].nil? install_dependencies puts "dependencies installed" - configure_gcr($configure_hash["gcp_creds_gcr_b64"], $configure_hash["container_image"]) + configure_gcr($configure_hash["container_image"]) puts "GCR repo ready" end @@ -159,5 +156,5 @@ setup_env puts ".env setted up" setup_initial_compose_file puts "docker-compose.yaml ready" -run_database($configure_hash["container_image"]) +run_database($configure_hash["container_image"], $configure_hash["pg_version"]) puts "database ready" diff --git a/rhizome/lantern/bin/run_pg_upgrade b/rhizome/lantern/bin/run_pg_upgrade new file mode 100755 index 000000000..01a6fa7d5 --- /dev/null +++ b/rhizome/lantern/bin/run_pg_upgrade @@ -0,0 +1,50 @@ +#!/bin/env ruby +# frozen_string_literal: true + +require "json" +require "yaml" +require_relative "../../common/lib/util" +require_relative "../lib/common" + +$configure_hash = JSON.parse($stdin.read) +data = YAML.load_file $compose_file + +container_image = $configure_hash["container_image"] +old_pg_version = $configure_hash["old_pg_version"] +pg_version = $configure_hash["pg_version"] +current_container_image = data["services"]["postgresql"]["image"] + +r "sudo docker compose -f #{$compose_file} down -t 10" +r "sudo mv #{$datadir}/data #{$datadir}/old-data-#{old_pg_version}" +r "sudo docker rm -f tc 2>/dev/null || true" +r "sudo docker create --name tc #{current_container_image}" +r "sudo docker cp tc:/usr/lib/postgresql/#{old_pg_version}/lib #{$datadir}/old-lib-#{old_pg_version}" +r "sudo docker cp tc:/usr/lib/postgresql/#{old_pg_version}/bin #{$datadir}/old-bin-#{old_pg_version}" +r "sudo docker cp tc:/usr/share/postgresql/#{old_pg_version} #{$datadir}/old-share-#{old_pg_version}" +r "sudo docker rm tc" +r "sudo chown -R 1001:1001 #{$datadir}" + +append_env([ + ["POSTGRESQL_RUN_PGUPGRADE", "yes"], + ["PGUPGRADE_OLD_VERSION", old_pg_version] +]) + +data["services"]["postgresql"]["image"] = container_image +data["services"]["postgresql"]["user"] = "root" +data["services"]["postgresql"]["deploy"].delete("restart_policy") +File.open($compose_file, "w") { |f| YAML.dump(data, f) } + +r "sudo docker compose -f #{$compose_file} up" + +data = YAML.load_file $compose_file +data["services"]["postgresql"].delete("user") +data["services"]["postgresql"]["deploy"]["restart_policy"] = {"condition" => "on-failure"} +r "sudo rm -rf #{$datadir}/old-lib-#{old_pg_version} #{$datadir}/old-bin-#{old_pg_version} #{$datadir}/old-share-#{old_pg_version}" +r "sudo chown -R 1001:1001 #{$datadir}" +File.open($compose_file, "w") { |f| YAML.dump(data, f) } + +append_env([ + ["POSTGRESQL_RUN_PGUPGRADE", "no"] +]) + +run_database(container_image, pg_version) diff --git a/rhizome/lantern/bin/update_docker_image b/rhizome/lantern/bin/update_docker_image index 4e3d77293..8d6cae12c 100755 --- a/rhizome/lantern/bin/update_docker_image +++ b/rhizome/lantern/bin/update_docker_image @@ -9,13 +9,12 @@ require_relative "../lib/common" $configure_hash = JSON.parse($stdin.read) container_image = $configure_hash["container_image"] -gcp_creds_gcr_b64 = $configure_hash["gcp_creds_gcr_b64"] -configure_gcr(gcp_creds_gcr_b64, container_image) +configure_gcr(container_image) map = YAML.load_file $compose_file map["services"]["postgresql"]["image"] = container_image File.open($compose_file, "w") { |f| YAML.dump(map, f) } r "sudo docker compose -f #{$compose_file} down" -run_database(container_image) +run_database(container_image, $configure_hash["pg_version"]) diff --git a/rhizome/lantern/bin/update_extras b/rhizome/lantern/bin/update_extras index 5b35894e0..c05a32e60 100755 --- a/rhizome/lantern/bin/update_extras +++ b/rhizome/lantern/bin/update_extras @@ -15,8 +15,8 @@ end version = $configure_hash["version"] -r "rm -rf /tmp/lantern-extras* || true" -r "wget https://github.com/lanterndata/lantern_extras/releases/download/#{version}/lantern-extras-#{version}.tar -O /tmp/lantern-extras.tar" -r "cd /tmp && tar xf lantern-extras.tar" -r "cd /tmp/lantern-extras-#{version} && PG_CONFIG=#{$pg_mount_path}/bin/pg_config make install" -r "rm -rf /tmp/lantern-extras*" +r "rm -rf /tmp/lantern-* || true" +r "wget https://github.com/lanterndata/lantern/releases/download/v#{version}/lantern-#{version}.tar -O /tmp/lantern.tar" +r "cd /tmp && tar xf lantern.tar" +r "cd /tmp/lantern-#{version} && PG_CONFIG=#{$pg_mount_path}/bin/pg_config make -C lantern-extras-#{version} install" +r "rm -rf /tmp/lantern*" diff --git a/rhizome/lantern/lib/common.rb b/rhizome/lantern/lib/common.rb index e1b2dfea5..eefd1d761 100755 --- a/rhizome/lantern/lib/common.rb +++ b/rhizome/lantern/lib/common.rb @@ -11,8 +11,7 @@ $pg_mount_path = "#{$workdir}/pg" $container_name = "lantern-postgresql-1" -def configure_gcr(gcp_creds_gcr_b64, container_image) - r "echo #{gcp_creds_gcr_b64} | base64 -d | sudo docker login -u _json_key --password-stdin https://gcr.io" +def configure_gcr(container_image) r "sudo docker pull #{container_image}" end @@ -38,9 +37,9 @@ def wait_for_pg end end -def run_database(container_image) +def run_database(container_image, pg_version) # Run database - volume_mount = "#{$pg_mount_path}:/opt/bitnami/postgresql" + volume_mount = "#{$pg_mount_path}:/usr/lib/postgresql/#{pg_version}" # Copy postgres fs to host to mount r "sudo rm -rf #{$pg_mount_path}" data = YAML.load_file $compose_file @@ -48,7 +47,7 @@ def run_database(container_image) File.open($compose_file, "w") { |f| YAML.dump(data, f) } r "sudo docker rm -f tc 2>/dev/null || true" r "sudo docker create --name tc #{container_image}" - r "sudo docker cp tc:/opt/bitnami/postgresql #{$pg_mount_path}" + r "sudo docker cp tc:/usr/lib/postgresql/#{pg_version} #{$pg_mount_path}" r "sudo docker rm tc" r "sudo chown -R 1001:1001 #{$pg_mount_path}" # Mount extension dir, so we can make automatic updates from host @@ -79,21 +78,34 @@ def append_env(env_arr) File.open($env_file, "w") { |f| combined_env.each { |key, value| f.puts "#{key}=#{value}" } } end +def tls_already_configured?(domain) + is_domain_configured = !r("(test -f /root/.acme.sh/acme.sh && /root/.acme.sh/acme.sh --list -d #{domain}) || echo ''").chomp.empty? + + if !is_domain_configured + return false + end + + !r("(test -f #{$datadir}/server.key && test -f #{$datadir}/server.crt && echo 1) || echo ''").chomp.empty? +end + def configure_tls(domain, email, dns_token, dns_zone_id, provider) puts "Configuring TLS for domain #{domain}" - r "curl -s https://get.acme.sh | sh -s email=#{email}" - env = if provider == "dns_cf" - "CF_Token='#{dns_token}' CF_Zone_ID='#{dns_zone_id}'" - else - "GOOGLEDOMAINS_ACCESS_TOKEN='#{dns_token}'" - end - r "#{env} /root/.acme.sh/acme.sh --server letsencrypt --issue --dns #{provider} -d #{domain}" - reload_cmd = "sudo docker compose -f #{$compose_file} exec postgresql psql -U postgres -c 'SELECT pg_reload_conf()' && sudo docker compose -f #{$compose_file} exec postgresql psql -p6432 -U postgres pgbouncer -c RELOAD" - r "/root/.acme.sh/acme.sh --install-cert -d #{domain} --key-file #{$datadir}/server.key --fullchain-file #{$datadir}/server.crt --reloadcmd \"#{reload_cmd}\"" - r "sudo chown 1001:1001 #{$datadir}/server.key" - r "sudo chown 1001:1001 #{$datadir}/server.crt" - r "sudo chmod 600 #{$datadir}/server.key" + if !tls_already_configured?(domain) + r "curl -s https://get.acme.sh | sh -s email=#{email}" + env = if provider == "dns_cf" + "CF_Token='#{dns_token}' CF_Zone_ID='#{dns_zone_id}'" + else + "GOOGLEDOMAINS_ACCESS_TOKEN='#{dns_token}'" + end + + r "#{env} /root/.acme.sh/acme.sh --server letsencrypt --issue --dns #{provider} -d #{domain}" + reload_cmd = "sudo docker compose -f #{$compose_file} exec postgresql psql -U postgres -c 'SELECT pg_reload_conf()' && sudo docker compose -f #{$compose_file} exec postgresql psql -p6432 -U postgres pgbouncer -c RELOAD" + r "/root/.acme.sh/acme.sh --install-cert -d #{domain} --key-file #{$datadir}/server.key --fullchain-file #{$datadir}/server.crt --reloadcmd \"#{reload_cmd}\"" + r "sudo chown 1001:1001 #{$datadir}/server.key" + r "sudo chown 1001:1001 #{$datadir}/server.crt" + r "sudo chmod 600 #{$datadir}/server.key" + end append_env([ ["POSTGRESQL_ENABLE_TLS", "yes"], diff --git a/routes/api/project/location/lantern.rb b/routes/api/project/location/lantern.rb index 444caf312..233f1ed80 100644 --- a/routes/api/project/location/lantern.rb +++ b/routes/api/project/location/lantern.rb @@ -101,12 +101,7 @@ class CloverApi r.post "add-domain" do Authorization.authorize(@current_user.id, "Postgres:edit", pg.id) DB.transaction do - strand = pg.representative_server.strand - strand.stack.first["domain"] = r.params["domain"] - strand.modified!(:stack) - strand.save_changes - - pg.representative_server.update(domain: r.params["domain"]) + pg.representative_server.add_domain_to_stack(r.params["domain"]) pg.representative_server.incr_add_domain end response.status = 200 diff --git a/routes/web/project/location/lantern.rb b/routes/web/project/location/lantern.rb index 706c95dba..bd5929a71 100644 --- a/routes/web/project/location/lantern.rb +++ b/routes/web/project/location/lantern.rb @@ -65,11 +65,7 @@ class CloverWeb r.post "add-domain" do Authorization.authorize(@current_user.id, "Postgres:edit", pg.id) DB.transaction do - strand = pg.representative_server.strand - strand.stack.first["domain"] = r.params["domain"] - strand.modified!(:stack) - strand.save_changes - + pg.representative_server.add_domain_to_stack(r.params["domain"]) pg.representative_server.incr_add_domain end r.redirect "#{@project.path}#{pg.path}" diff --git a/spec/model/lantern/lantern_resource_spec.rb b/spec/model/lantern/lantern_resource_spec.rb index 72b5c4e5b..8e3b8b522 100644 --- a/spec/model/lantern/lantern_resource_spec.rb +++ b/spec/model/lantern/lantern_resource_spec.rb @@ -92,12 +92,22 @@ it "sets up service account and updates resource" do api = instance_double(Hosting::GcpApis) allow(Hosting::GcpApis).to receive(:new).and_return(api) - allow(api).to receive_messages(create_service_account: {"email" => "test-sa"}, export_service_account_key: "test-key") - expect(lantern_resource).to receive(:update).with(gcp_creds_b64: "test-key", service_account_name: "test-sa") + allow(api).to receive_messages(create_service_account: {"email" => "test-sa"}) + expect(lantern_resource).to receive(:update).with(service_account_name: "test-sa") expect { lantern_resource.setup_service_account }.not_to raise_error end end + describe "#export_service_account_key" do + it "exports service account key and updates resource" do + api = instance_double(Hosting::GcpApis) + allow(Hosting::GcpApis).to receive(:new).and_return(api) + allow(api).to receive_messages(export_service_account_key: "test-key") + expect(lantern_resource).to receive(:update).with(gcp_creds_b64: "test-key") + expect { lantern_resource.export_service_account_key }.not_to raise_error + end + end + describe "#create_logging_table" do it "create bigquery table and gives access" do instance_double(LanternTimeline, ubid: "test") @@ -144,6 +154,15 @@ end end + describe "#drop_ddl_log_trigger" do + it "drops ddl log trigger" do + representative_server = instance_double(LanternServer) + expect(lantern_resource).to receive(:representative_server).and_return(representative_server).at_least(:once) + expect(lantern_resource.representative_server).to receive(:run_query_all).with(a_string_matching(/DROP .* log_ddl_trigger/)) + expect { lantern_resource.drop_ddl_log_trigger }.not_to raise_error + end + end + describe "#listen_ddl_log" do it "listends ddl log table" do representative_server = instance_double(LanternServer) @@ -194,6 +213,24 @@ end end + describe "#delete_publication" do + it "drops replication slot" do + representative_server = instance_double(LanternServer) + expect(lantern_resource).to receive(:representative_server).and_return(representative_server).at_least(:once) + expect(lantern_resource.representative_server).to receive(:run_query_all).with("DROP PUBLICATION IF EXISTS test") + expect { lantern_resource.delete_publication("test") }.not_to raise_error + end + end + + describe "#delete_logical_subscription" do + it "drops subscription" do + representative_server = instance_double(LanternServer) + expect(lantern_resource).to receive(:representative_server).and_return(representative_server).at_least(:once) + expect(lantern_resource.representative_server).to receive(:run_query_all).with("DROP SUBSCRIPTION IF EXISTS test") + expect { lantern_resource.delete_logical_subscription("test") }.not_to raise_error + end + end + describe "#create_publication" do it "creates new publication" do representative_server = instance_double(LanternServer) @@ -208,23 +245,14 @@ representative_server = instance_double(LanternServer) expect(lantern_resource).to receive(:representative_server).and_return(representative_server).at_least(:once) expect(lantern_resource.representative_server).to receive(:list_all_databases).and_return(["db1", "db2"]) - expect(lantern_resource.representative_server).to receive(:run_query).with(a_string_matching(/CREATE SUBSCRIPTION/), db: "db1") - expect(lantern_resource.representative_server).to receive(:run_query).with(a_string_matching(/CREATE SUBSCRIPTION/), db: "db2") + expect(lantern_resource.representative_server).to receive(:run_query).with(a_string_matching(/CREATE SUBSCRIPTION.*dbname=db1/m), db: "db1") + expect(lantern_resource.representative_server).to receive(:run_query).with(a_string_matching(/CREATE SUBSCRIPTION.*dbname=db2/m), db: "db2") expect(lantern_resource).to receive(:connection_string).and_return("postgres://localhost:5432").at_least(:once) expect(lantern_resource).to receive(:parent).and_return(lantern_resource).at_least(:once) expect { lantern_resource.create_and_enable_subscription }.not_to raise_error end end - describe "#disable_logical_subscription" do - it "disables subscription" do - representative_server = instance_double(LanternServer) - expect(lantern_resource).to receive(:representative_server).and_return(representative_server).at_least(:once) - expect(lantern_resource.representative_server).to receive(:run_query_all).with("ALTER SUBSCRIPTION sub_#{lantern_resource.ubid} DISABLE") - expect { lantern_resource.disable_logical_subscription }.not_to raise_error - end - end - describe "#create_logical_replica" do it "create logical replica with current version" do representative_server = instance_double(LanternServer, @@ -293,10 +321,19 @@ ] statements_db2 = statements_db1 # identical statements for the test - expect(representative_server).to receive(:run_query).with(statements_db1, db: "db1") - expect(representative_server).to receive(:run_query).with(statements_db2, db: "db2") + expect(representative_server).to receive(:run_query).with(statements_db1.join("\n"), db: "db1") + expect(representative_server).to receive(:run_query).with(statements_db2.join("\n"), db: "db2") expect { lantern_resource.sync_sequences_with_parent }.not_to raise_error end end + + describe "#get_logical_replication_lag" do + it "gets the lag" do + representative_server = instance_double(LanternServer) + expect(lantern_resource).to receive(:representative_server).and_return(representative_server).at_least(:once) + expect(representative_server).to receive(:run_query).with("SELECT (pg_current_wal_lsn() - confirmed_flush_lsn) FROM pg_catalog.pg_replication_slots WHERE slot_name = 'test_slot'").and_return("0\n") + expect(lantern_resource.get_logical_replication_lag("test_slot")).to be(0) + end + end end diff --git a/spec/model/lantern/lantern_server_spec.rb b/spec/model/lantern/lantern_server_spec.rb index cadebde21..9ad3d1593 100644 --- a/spec/model/lantern/lantern_server_spec.rb +++ b/spec/model/lantern/lantern_server_spec.rb @@ -146,19 +146,19 @@ end it "runs query on vm" do - expect(lantern_server.vm.sshable).to receive(:cmd).with("sudo docker compose -f /var/lib/lantern/docker-compose.yaml exec -T postgresql psql -q -U postgres -t --csv postgres", stdin: "SELECT 1").and_return("1\n") + expect(lantern_server.vm.sshable).to receive(:cmd).with("sudo docker compose -f /var/lib/lantern/docker-compose.yaml exec -T postgresql psql -q -U postgres -t --csv -v ON_ERROR_STOP=1 postgres", stdin: "SELECT 1").and_return("1\n") expect(lantern_server.run_query("SELECT 1")).to eq("1") end it "runs query on vm with different user and db" do - expect(lantern_server.vm.sshable).to receive(:cmd).with("sudo docker compose -f /var/lib/lantern/docker-compose.yaml exec -T postgresql psql -q -U lantern -t --csv db2", stdin: "SELECT 1").and_return("1\n") + expect(lantern_server.vm.sshable).to receive(:cmd).with("sudo docker compose -f /var/lib/lantern/docker-compose.yaml exec -T postgresql psql -q -U lantern -t --csv -v ON_ERROR_STOP=1 db2", stdin: "SELECT 1").and_return("1\n") expect(lantern_server.run_query("SELECT 1", db: "db2", user: "lantern")).to eq("1") end it "runs query on vm for all databases" do expect(lantern_server).to receive(:list_all_databases).and_return(["postgres", "db2"]) - expect(lantern_server.vm.sshable).to receive(:cmd).with("sudo docker compose -f /var/lib/lantern/docker-compose.yaml exec -T postgresql psql -q -U postgres -t --csv postgres", stdin: "SELECT 1").and_return("1\n") - expect(lantern_server.vm.sshable).to receive(:cmd).with("sudo docker compose -f /var/lib/lantern/docker-compose.yaml exec -T postgresql psql -q -U postgres -t --csv db2", stdin: "SELECT 1").and_return("2\n") + expect(lantern_server.vm.sshable).to receive(:cmd).with("sudo docker compose -f /var/lib/lantern/docker-compose.yaml exec -T postgresql psql -q -U postgres -t --csv -v ON_ERROR_STOP=1 postgres", stdin: "SELECT 1").and_return("1\n") + expect(lantern_server.vm.sshable).to receive(:cmd).with("sudo docker compose -f /var/lib/lantern/docker-compose.yaml exec -T postgresql psql -q -U postgres -t --csv -v ON_ERROR_STOP=1 db2", stdin: "SELECT 1").and_return("2\n") expect(lantern_server.run_query_all("SELECT 1")).to eq( [ ["postgres", "1"], @@ -273,9 +273,9 @@ gcp_creds_b64: "test-creds", recovery_target_lsn: nil, representative_server: lantern_server, + pg_version: 17, restore_target: nil) expect(Config).to receive(:prom_password).and_return("pwd123").at_least(:once) - expect(Config).to receive(:gcp_creds_gcr_b64).and_return("test-creds").at_least(:once) expect(Config).to receive(:gcp_creds_logging_b64).and_return("test-creds").at_least(:once) expect(timeline).to receive(:generate_walg_config).and_return({gcp_creds_b64: "test-creds-push", walg_gs_prefix: "test-bucket-push"}).at_least(:once) expect(lantern_server).to receive(:resource).and_return(resource).at_least(:once) @@ -306,7 +306,6 @@ master_host: resource.representative_server.hostname, master_port: 5432, prom_password: Config.prom_password, - gcp_creds_gcr_b64: Config.gcp_creds_gcr_b64, gcp_creds_coredumps_b64: Config.gcp_creds_coredumps_b64, gcp_creds_logging_b64: Config.gcp_creds_logging_b64, @@ -317,7 +316,8 @@ gcp_creds_walg_b64: walg_conf[:gcp_creds_b64], walg_gs_prefix: walg_conf[:walg_gs_prefix], gcp_creds_big_query_b64: resource.gcp_creds_b64, - big_query_dataset: Config.lantern_log_dataset + big_query_dataset: Config.lantern_log_dataset, + pg_version: 17 }) expect(lantern_server.configure_hash).to eq(expected_conf) end @@ -341,9 +341,9 @@ gcp_creds_b64: "test-creds", recovery_target_lsn: nil, representative_server: lantern_server, + pg_version: 17, restore_target: Time.now) expect(Config).to receive(:prom_password).and_return("pwd123").at_least(:once) - expect(Config).to receive(:gcp_creds_gcr_b64).and_return("test-creds").at_least(:once) expect(Config).to receive(:gcp_creds_logging_b64).and_return("test-creds").at_least(:once) expect(timeline).to receive(:latest_backup_label_before_target).and_return("test-label").at_least(:once) expect(timeline).to receive(:generate_walg_config).and_return({gcp_creds_b64: "test-creds-push", walg_gs_prefix: "test-bucket-push"}).at_least(:once) @@ -375,7 +375,6 @@ master_host: resource.representative_server.hostname, master_port: 5432, prom_password: Config.prom_password, - gcp_creds_gcr_b64: Config.gcp_creds_gcr_b64, gcp_creds_coredumps_b64: Config.gcp_creds_coredumps_b64, gcp_creds_logging_b64: Config.gcp_creds_logging_b64, container_image: "#{Config.gcr_image}:lantern-#{lantern_server.lantern_version}-extras-#{lantern_server.extras_version}-minor-#{lantern_server.minor_version}", @@ -385,7 +384,8 @@ gcp_creds_walg_b64: walg_conf[:gcp_creds_b64], walg_gs_prefix: walg_conf[:walg_gs_prefix], gcp_creds_big_query_b64: resource.gcp_creds_b64, - big_query_dataset: Config.lantern_log_dataset + big_query_dataset: Config.lantern_log_dataset, + pg_version: 17 }) expect(lantern_server.configure_hash).to eq(expected_conf) end @@ -409,9 +409,9 @@ gcp_creds_b64: "test-creds", recovery_target_lsn: "16/B374D848", representative_server: lantern_server, + pg_version: 17, restore_target: nil) expect(Config).to receive(:prom_password).and_return("pwd123").at_least(:once) - expect(Config).to receive(:gcp_creds_gcr_b64).and_return("test-creds").at_least(:once) expect(Config).to receive(:gcp_creds_logging_b64).and_return("test-creds").at_least(:once) expect(timeline).to receive(:generate_walg_config).and_return({gcp_creds_b64: "test-creds-push", walg_gs_prefix: "test-bucket-push"}).at_least(:once) expect(lantern_server).to receive(:resource).and_return(resource).at_least(:once) @@ -442,7 +442,6 @@ master_host: resource.representative_server.hostname, master_port: 5432, prom_password: Config.prom_password, - gcp_creds_gcr_b64: Config.gcp_creds_gcr_b64, gcp_creds_coredumps_b64: Config.gcp_creds_coredumps_b64, gcp_creds_logging_b64: Config.gcp_creds_logging_b64, container_image: "#{Config.gcr_image}:lantern-#{lantern_server.lantern_version}-extras-#{lantern_server.extras_version}-minor-#{lantern_server.minor_version}", @@ -452,7 +451,8 @@ gcp_creds_walg_b64: walg_conf[:gcp_creds_b64], walg_gs_prefix: walg_conf[:walg_gs_prefix], gcp_creds_big_query_b64: resource.gcp_creds_b64, - big_query_dataset: Config.lantern_log_dataset + big_query_dataset: Config.lantern_log_dataset, + pg_version: 17 }) expect(lantern_server.configure_hash).to eq(expected_conf) end @@ -476,9 +476,9 @@ gcp_creds_b64: "test-creds", recovery_target_lsn: "16/B374D848", representative_server: lantern_server, + pg_version: 17, restore_target: Time.now) expect(Config).to receive(:prom_password).and_return("pwd123").at_least(:once) - expect(Config).to receive(:gcp_creds_gcr_b64).and_return("test-creds").at_least(:once) expect(Config).to receive(:gcp_creds_logging_b64).and_return("test-creds").at_least(:once) expect(timeline).to receive(:generate_walg_config).and_return({gcp_creds_b64: "test-creds-push", walg_gs_prefix: "test-bucket-push"}).at_least(:once) @@ -510,7 +510,6 @@ master_host: resource.representative_server.hostname, master_port: 5432, prom_password: Config.prom_password, - gcp_creds_gcr_b64: Config.gcp_creds_gcr_b64, gcp_creds_coredumps_b64: Config.gcp_creds_coredumps_b64, gcp_creds_logging_b64: Config.gcp_creds_logging_b64, @@ -521,7 +520,8 @@ gcp_creds_walg_b64: walg_conf[:gcp_creds_b64], walg_gs_prefix: walg_conf[:walg_gs_prefix], gcp_creds_big_query_b64: resource.gcp_creds_b64, - big_query_dataset: Config.lantern_log_dataset + big_query_dataset: Config.lantern_log_dataset, + pg_version: 17 }) expect(lantern_server.configure_hash).to eq(expected_conf) end @@ -783,4 +783,127 @@ expect(lantern_server.query_string).to be_nil end end + + describe "#swap_dns" do + it "swaps domains with another server and removes domain" do + frame = {} + other = instance_double(described_class) + strand = instance_double(Strand) + expect(lantern_server).to receive(:strand).and_return(strand).at_least(:once) + expect(lantern_server).to receive(:domain).and_return("old-domain").at_least(:once) + expect(lantern_server).to receive(:update).with(domain: nil) + expect(lantern_server.strand).to receive(:stack).and_return([frame]).at_least(:once) + expect(lantern_server.strand).to receive(:modified!).with(:stack) + expect(lantern_server.strand).to receive(:save_changes) + expect(lantern_server).to receive(:incr_add_domain) + expect(lantern_server).to receive(:destroy_domain) + expect(other).to receive(:domain).and_return("test") + expect(other).to receive(:update).with(domain: nil) + expect { lantern_server.swap_dns(other) }.not_to raise_error + end + + it "swaps domains with another server" do + frame = {} + other = instance_double(described_class) + strand = instance_double(Strand) + expect(lantern_server).to receive(:strand).and_return(strand).at_least(:once) + expect(lantern_server.strand).to receive(:stack).and_return([frame]).at_least(:once) + expect(lantern_server.strand).to receive(:modified!).with(:stack) + expect(lantern_server.strand).to receive(:save_changes) + expect(lantern_server).to receive(:incr_add_domain) + expect(other).to receive(:domain).and_return("test") + expect(other).to receive(:update).with(domain: nil) + expect { lantern_server.swap_dns(other) }.not_to raise_error + end + end + + describe "#is_dns_correct?" do + it "returns true if host matches ip" do + expect(lantern_server).to receive(:domain).and_return("test-domain").at_least(:once) + expect(vm.sshable).to receive(:host).and_return("127.0.0.1").at_least(:once) + expect(Resolv).to receive(:getaddress).with("test-domain").and_return("127.0.0.1").at_least(:once) + expect(lantern_server.is_dns_correct?).to be(true) + end + + it "returns false if host does not match the ip" do + expect(lantern_server).to receive(:domain).and_return("test-domain").at_least(:once) + expect(vm.sshable).to receive(:host).and_return("127.0.0.1").at_least(:once) + expect(Resolv).to receive(:getaddress).with("test-domain").and_return("127.0.1.1").at_least(:once) + expect(lantern_server.is_dns_correct?).to be(false) + end + end + + describe "#stop_container" do + it "stops docker container" do + expect(lantern_server.vm.sshable).to receive(:cmd).with("sudo docker compose -f #{Config.compose_file} down -t 60 || true") + expect { lantern_server.stop_container }.not_to raise_error + end + end + + describe "#start_container" do + it "starts docker container" do + expect(lantern_server.vm.sshable).to receive(:cmd).with("sudo docker compose -f #{Config.compose_file} up -d") + expect { lantern_server.start_container }.not_to raise_error + end + end + + describe "#destroy_domain" do + it "destroys domain" do + cf_client = instance_double(Dns::Cloudflare) + expect(Dns::Cloudflare).to receive(:new).and_return(cf_client) + expect(lantern_server).to receive(:domain).and_return("example.com") + expect(cf_client).to receive(:delete_dns_record).with("example.com") + lantern_server.destroy_domain + end + end + + describe "#add_domain_to_stack" do + it "adds domain to current frame" do + domain = "db.lantern.dev" + frame = {} + strand = instance_double(Strand) + expect(lantern_server).to receive(:strand).and_return(strand).at_least(:once) + expect(strand).to receive(:stack).and_return([frame]).at_least(:once) + expect(frame).to receive(:[]=).with("domain", domain) + expect(strand).to receive(:modified!).with(:stack) + expect(strand).to receive(:save_changes) + expect { lantern_server.add_domain_to_stack(domain) }.not_to raise_error + end + + it "adds domain to current frame of specified strand" do + domain = "db.lantern.dev" + frame = {} + strand = instance_double(Strand) + expect(strand).to receive(:stack).and_return([frame]).at_least(:once) + expect(frame).to receive(:[]=).with("domain", domain) + expect(strand).to receive(:modified!).with(:stack) + expect(strand).to receive(:save_changes) + expect { lantern_server.add_domain_to_stack(domain, strand) }.not_to raise_error + end + end + + describe "#remove_domain_from_stack" do + it "removes domain from current frame" do + domain = "db.lantern.dev" + frame = {"domain" => domain} + strand = instance_double(Strand) + expect(lantern_server).to receive(:strand).and_return(strand).at_least(:once) + expect(strand).to receive(:stack).and_return([frame]).at_least(:once) + expect(frame).to receive(:delete).with("domain") + expect(strand).to receive(:modified!).with(:stack) + expect(strand).to receive(:save_changes) + expect { lantern_server.remove_domain_from_stack }.not_to raise_error + end + + it "removes domain from current frame of specified strand" do + domain = "db.lantern.dev" + frame = {"domain" => domain} + strand = instance_double(Strand) + expect(strand).to receive(:stack).and_return([frame]).at_least(:once) + expect(frame).to receive(:delete).with("domain") + expect(strand).to receive(:modified!).with(:stack) + expect(strand).to receive(:save_changes) + expect { lantern_server.remove_domain_from_stack(strand) }.not_to raise_error + end + end end diff --git a/spec/prog/lantern/lantern_resource_nexus_spec.rb b/spec/prog/lantern/lantern_resource_nexus_spec.rb index 33437db16..edc63ae70 100644 --- a/spec/prog/lantern/lantern_resource_nexus_spec.rb +++ b/spec/prog/lantern/lantern_resource_nexus_spec.rb @@ -126,31 +126,54 @@ end describe "#start" do - it "sets up gcp service account and allows bucket usage" do - expect(lantern_resource).to receive(:setup_service_account) - expect(lantern_resource).to receive(:create_logging_table) + it "hops to setup_service_account" do + expect { nx.start }.to hop("setup_service_account") + end + + # it "buds trigger_pg_current_xact_id_on_parent if it has parent" do + # expect(lantern_resource.representative_server.vm).to receive(:strand).and_return(instance_double(Strand, label: "wait")) + # expect(nx).to receive(:register_deadline) + # expect(lantern_resource).to receive(:parent).and_return(instance_double(LanternResource)) + # expect(nx).to receive(:bud).with(described_class, {}, :trigger_pg_current_xact_id_on_parent) + # expect { nx.start }.to hop("wait_servers") + # end + end + + describe "#setup_timeline_access" do + it "allows bucket usage" do expect(lantern_resource).to receive(:parent_id).and_return("test-parent") expect(lantern_resource).not_to receive(:allow_timeline_access_to_bucket) expect(nx).to receive(:register_deadline) - expect { nx.start }.to hop("wait_servers") + expect { nx.setup_timeline_access }.to hop("wait_servers") end it "sets up gcp service account" do - expect(lantern_resource).to receive(:setup_service_account) - expect(lantern_resource).to receive(:create_logging_table) expect(lantern_resource).to receive(:parent_id).and_return(nil) expect(lantern_resource).to receive(:allow_timeline_access_to_bucket) expect(nx).to receive(:register_deadline) - expect { nx.start }.to hop("wait_servers") + expect { nx.setup_timeline_access }.to hop("wait_servers") end + end - # it "buds trigger_pg_current_xact_id_on_parent if it has parent" do - # expect(lantern_resource.representative_server.vm).to receive(:strand).and_return(instance_double(Strand, label: "wait")) - # expect(nx).to receive(:register_deadline) - # expect(lantern_resource).to receive(:parent).and_return(instance_double(LanternResource)) - # expect(nx).to receive(:bud).with(described_class, {}, :trigger_pg_current_xact_id_on_parent) - # expect { nx.start }.to hop("wait_servers") - # end + describe "#create_logging_table" do + it "hops to setup_timeline_access" do + expect(lantern_resource).to receive(:create_logging_table) + expect { nx.create_logging_table }.to hop("setup_timeline_access") + end + end + + describe "#setup_service_account" do + it "hops to export_service_account_key" do + expect(lantern_resource).to receive(:setup_service_account) + expect { nx.setup_service_account }.to hop("export_service_account_key") + end + end + + describe "#export_service_account_key" do + it "hops to create_logging_table" do + expect(lantern_resource).to receive(:export_service_account_key) + expect { nx.export_service_account_key }.to hop("create_logging_table") + end end # describe "#wait_trigger_pg_current_xact_id_on_parent" do @@ -226,6 +249,16 @@ expect { nx.wait }.to nap(30) end + it "naps if no parent on swap_dns" do + expect(lantern_resource).to receive(:required_standby_count).and_return(0) + expect(lantern_resource).to receive(:display_state).and_return(nil) + expect(lantern_resource).to receive(:servers).and_return([instance_double(LanternServer, strand: instance_double(Strand, label: "wait"))]).at_least(:once) + expect(nx).to receive(:when_switchover_with_parent_set?).and_yield + expect(lantern_resource).to receive(:parent).and_return(nil) + expect(nx).to receive(:decr_switchover_with_parent) + expect { nx.wait }.to nap(30) + end + it "hops to swap_leaders" do expect(lantern_resource).to receive(:required_standby_count).and_return(0) expect(lantern_resource).to receive(:display_state).and_return(nil) @@ -237,6 +270,18 @@ expect(lantern_resource).to receive(:update).with(display_state: "failover") expect { nx.wait }.to hop("swap_leaders_with_parent") end + + it "hops to swap_dns" do + expect(lantern_resource).to receive(:required_standby_count).and_return(0) + expect(lantern_resource).to receive(:display_state).and_return(nil) + expect(lantern_resource).to receive(:servers).and_return([instance_double(LanternServer, strand: instance_double(Strand, label: "wait"))]).at_least(:once) + expect(nx).to receive(:when_switchover_with_parent_set?).and_yield + parent = instance_double(LanternResource) + expect(lantern_resource).to receive(:parent).and_return(parent).at_least(:once) + expect(parent).to receive(:update).with(display_state: "failover") + expect(lantern_resource).to receive(:update).with(display_state: "failover") + expect { nx.wait }.to hop("switchover_with_parent") + end end describe "#destroy" do @@ -253,6 +298,25 @@ expect { nx.destroy }.to exit({"msg" => "lantern resource is deleted"}) end + it "deletes replication slot and publications on parent" do + expect(lantern_resource.servers).to all(receive(:incr_destroy)) + expect { nx.destroy }.to nap(5) + + parent_reosurce = instance_double(LanternResource) + expect(lantern_resource).to receive(:ubid).and_return("test-ubid").at_least(:once) + expect(parent_reosurce).to receive(:delete_replication_slot).with("slot_#{lantern_resource.ubid}") + expect(parent_reosurce).to receive(:delete_publication).with("pub_#{lantern_resource.ubid}") + expect(lantern_resource).to receive(:delete_logical_subscription).with("sub_#{lantern_resource.ubid}") + expect(lantern_resource).to receive(:parent).and_return(parent_reosurce).at_least(:once) + expect(lantern_resource).to receive(:servers).and_return([]) + expect(lantern_resource).to receive(:dissociate_with_project) + expect(lantern_resource).to receive(:destroy) + expect(lantern_resource).to receive(:doctor).and_return(nil) + expect(lantern_resource).to receive(:service_account_name).and_return(nil) + + expect { nx.destroy }.to exit({"msg" => "lantern resource is deleted"}) + end + it "triggers server deletion and deletes doctor" do expect(lantern_resource.servers).to all(receive(:incr_destroy)) expect { nx.destroy }.to nap(5) @@ -287,7 +351,7 @@ vm = instance_double(GcpVm) expect(parent).to receive(:representative_server).and_return(representative_server) expect(lantern_resource).to receive(:representative_server).and_return(representative_server).at_least(:once) - expect(lantern_resource).to receive(:disable_logical_subscription) + expect(lantern_resource).to receive(:delete_logical_subscription).with("sub_#{lantern_resource.ubid}") expect(lantern_resource).to receive(:sync_sequences_with_parent) expect(representative_server).to receive(:vm).and_return(vm).at_least(:once) expect(vm).to receive(:swap_ip) @@ -314,11 +378,10 @@ end describe "#update_hosts" do - it "updates the domains of the current and new master, updates display states, and removes fork association" do + it "updates the domains of the current and new master" do parent = instance_double(LanternResource) current_master = instance_double(LanternServer, domain: "current-master-domain.com") new_master = instance_double(LanternServer, domain: "new-master-domain.com") - timeline = instance_double(LanternTimeline) expect(lantern_resource).to receive(:parent).and_return(parent).at_least(:once) expect(parent).to receive(:representative_server).and_return(current_master).at_least(:once) @@ -326,6 +389,17 @@ expect(new_master).to receive(:update).with(domain: "current-master-domain.com") expect(current_master).to receive(:update).with(domain: "new-master-domain.com") + expect { nx.update_hosts }.to hop("finish_take_over") + end + end + + describe "#finish_take_over" do + it "updates display states, and removes fork association" do + parent = instance_double(LanternResource) + timeline = instance_double(LanternTimeline) + + expect(lantern_resource).to receive(:parent).and_return(parent).at_least(:once) + expect(lantern_resource).to receive(:update).with(display_state: nil) expect(parent).to receive(:update).with(display_state: nil) @@ -333,7 +407,97 @@ expect(lantern_resource).to receive(:timeline).and_return(timeline) expect(timeline).to receive(:update).with(parent_id: nil) - expect { nx.update_hosts }.to hop("wait") + expect { nx.finish_take_over }.to hop("wait") + end + end + + describe "#switchover_with_parent" do + it "sets parent to readonly and hop" do + parent = instance_double(LanternResource) + expect(lantern_resource).to receive(:parent).and_return(parent) + expect(parent).to receive(:set_to_readonly) + expect(nx).to receive(:decr_switchover_with_parent) + + expect { nx.switchover_with_parent }.to hop("wait_for_synchronization") + end + end + + describe "#wait_for_synchronization" do + it "naps 5" do + parent = instance_double(LanternResource) + expect(lantern_resource).to receive(:parent).and_return(parent) + expect(parent).to receive(:get_logical_replication_lag).with("slot_#{lantern_resource.ubid}").and_return(5) + + expect { nx.wait_for_synchronization }.to nap(5) + end + + it "hops to delete_logical_subscription" do + parent = instance_double(LanternResource) + expect(lantern_resource).to receive(:parent).and_return(parent) + expect(parent).to receive(:get_logical_replication_lag).with("slot_#{lantern_resource.ubid}").and_return(0) + + expect { nx.wait_for_synchronization }.to hop("delete_logical_subscription") + end + end + + describe "#delete_logical_subscription" do + it "deletes susbcription and hop" do + expect(lantern_resource).to receive(:delete_logical_subscription).with("sub_#{lantern_resource.ubid}") + expect { nx.delete_logical_subscription }.to hop("sync_sequences_with_parent") + end + end + + describe "#sync_sequences_with_parent" do + it "syncs sequences and hop" do + expect(lantern_resource).to receive(:sync_sequences_with_parent) + expect { nx.sync_sequences_with_parent }.to hop("switch_dns_with_parent") + end + end + + describe "#switch_dns_with_parent" do + it "hops to finish_take_over" do + parent = instance_double(LanternResource, representative_server: instance_double(LanternServer, domain: nil)) + expect(lantern_resource).to receive(:parent).and_return(parent).at_least(:once) + expect(lantern_resource.parent.representative_server).to receive(:stop_container) + expect { nx.switch_dns_with_parent }.to hop("finish_take_over") + end + + it "switches dns with parent and hop to wait_switch_dns" do + parent = instance_double(LanternResource, representative_server: instance_double(LanternServer, domain: "test-domain")) + expect(lantern_resource).to receive(:parent).and_return(parent).at_least(:once) + expect(lantern_resource.parent.representative_server).to receive(:stop_container) + expect(lantern_resource.representative_server).to receive(:swap_dns).with(parent.representative_server) + expect(lantern_resource).to receive(:update).with(logical_replication: false) + expect { nx.switch_dns_with_parent }.to hop("wait_switch_dns") + end + end + + describe "#wait_switch_dns" do + it "naps if dns is not ready" do + representative_server = instance_double(LanternServer) + expect(lantern_resource).to receive(:representative_server).and_return(representative_server).at_least(:once) + expect(representative_server).to receive(:is_dns_correct?).and_return(false) + expect { nx.wait_switch_dns }.to nap 10 + end + + it "waits if db is not ready" do + representative_server = instance_double(LanternServer) + expect(Sequel).to receive(:connect).and_return(DB) + expect(DB).to receive(:[]).with("SELECT 1").and_raise + expect(lantern_resource).to receive(:representative_server).and_return(representative_server).at_least(:once) + expect(representative_server).to receive(:is_dns_correct?).and_return(true) + expect { nx.wait_switch_dns }.to nap 10 + end + + it "hops to finish_take_over" do + representative_server = instance_double(LanternServer) + expect(Sequel).to receive(:connect).and_return(DB) + res = instance_double(Sequel::Dataset) + expect(res).to receive(:first) + expect(DB).to receive(:[]).with("SELECT 1").and_return(res) + expect(lantern_resource).to receive(:representative_server).and_return(representative_server).at_least(:once) + expect(representative_server).to receive(:is_dns_correct?).and_return(true) + expect { nx.wait_switch_dns }.to hop("finish_take_over") end end end diff --git a/spec/prog/lantern/lantern_server_nexus_spec.rb b/spec/prog/lantern/lantern_server_nexus_spec.rb index eb4a66b6c..0487788d1 100644 --- a/spec/prog/lantern/lantern_server_nexus_spec.rb +++ b/spec/prog/lantern/lantern_server_nexus_spec.rb @@ -18,12 +18,14 @@ resource: instance_double(LanternResource, org_id: 0, name: "test", + label: "none", db_name: "postgres", db_user: "postgres", service_account_name: "test-sa", gcp_creds_b64: "test-creds", version_upgrade: false, - superuser_password: "pwd123"), + superuser_password: "pwd123", + pg_version: 15), vm: instance_double( GcpVm, id: "104b0033-b3f6-8214-ae27-0cd3cef18ce4", @@ -76,6 +78,31 @@ expect(lantern_server).not_to be_nil end + it "creates lantern server as primary with upgrade info" do + project = Project.create_with_id(name: "default", provider: "gcp").tap { _1.associate_with_project(_1) } + lantern_resource = instance_double(LanternResource, + name: "test", + project_id: project.id, + location: "us-central1") + + expect(LanternResource).to receive(:[]).and_return(lantern_resource) + + st = described_class.assemble( + resource_id: "6ae7e513-c34a-8039-a72a-7be45b53f2a0", + lantern_version: "0.2.0", + extras_version: "0.1.3", + minor_version: "2", + target_vm_size: "n1-standard-2", + target_storage_size_gib: 50, + representative_at: Time.now, + domain: "db.lantern.dev", + pg_upgrade: {"lantern_version" => "0.5.0", "extras_version" => "0.5.0", "minor_version" => "1", "pg_version" => 17} + ) + + lantern_server = LanternServer[st.id] + expect(lantern_server).not_to be_nil + end + it "creates lantern server as standby" do project = Project.create_with_id(name: "default", provider: "gcp").tap { _1.associate_with_project(_1) } lantern_resource = instance_double(LanternResource, @@ -206,17 +233,10 @@ it "naps if timeline is not ready" do expect(lantern_server.timeline).to receive(:strand).and_return(instance_double(Strand, label: "start")) - expect(Config).to receive(:gcp_creds_gcr_b64).and_return("test-creds") expect { nx.setup_docker_stack }.to nap(10) end - it "raises if gcr credentials are not provided" do - expect(Config).to receive(:gcp_creds_gcr_b64).and_return(nil) - expect { nx.setup_docker_stack }.to raise_error "GCP_CREDS_GCR_B64 is required to setup docker stack for Lantern" - end - it "calls setup if not started" do - expect(Config).to receive(:gcp_creds_gcr_b64).and_return("test-creds") expect(lantern_server.vm.sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_lantern").and_return("NotStarted") expect(lantern_server).to receive(:configure_hash).and_return("test") expect(lantern_server.vm.sshable).to receive(:cmd).with("common/bin/daemonizer 'sudo lantern/bin/configure' configure_lantern", stdin: "test") @@ -224,7 +244,6 @@ end it "calls setup if failed" do - expect(Config).to receive(:gcp_creds_gcr_b64).and_return("test-creds") expect(lantern_server.vm.sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_lantern").and_return("Failed") expect(lantern_server).to receive(:configure_hash).and_return("test") expect(lantern_server.vm.sshable).to receive(:cmd).with("common/bin/daemonizer 'sudo lantern/bin/configure' configure_lantern", stdin: "test") @@ -232,7 +251,6 @@ end it "calls add domain after succeeded" do - expect(Config).to receive(:gcp_creds_gcr_b64).and_return("test-creds") expect(lantern_server.vm.sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_lantern").and_return("Succeeded") expect(lantern_server.vm.sshable).to receive(:cmd).with("common/bin/daemonizer --clean configure_lantern") expect(nx).to receive(:frame).and_return({"domain" => "db.lantern.dev"}) @@ -243,7 +261,6 @@ end it "hop to wait_db_available" do - expect(Config).to receive(:gcp_creds_gcr_b64).and_return("test-creds") expect(lantern_server.vm.sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_lantern").and_return("Succeeded") expect(lantern_server.vm.sshable).to receive(:cmd).with("common/bin/daemonizer --clean configure_lantern") expect(nx).to receive(:frame).and_return({}) @@ -253,7 +270,6 @@ end it "naps if in progress" do - expect(Config).to receive(:gcp_creds_gcr_b64).and_return("test-creds") expect(lantern_server.vm.sshable).to receive(:cmd).with("common/bin/daemonizer --check configure_lantern").and_return("InProgress") expect { nx.setup_docker_stack }.to nap(5) end @@ -306,13 +322,12 @@ it "hops to wait_synchronization" do leader = instance_double(LanternServer, domain: "db.lantern.dev") - expect(nx).to receive(:add_domain_to_stack).with(leader.domain) + expect(lantern_server).to receive(:add_domain_to_stack).with(leader.domain, nx.strand) expect(nx).to receive(:incr_setup_ssl) expect(lantern_server).to receive(:domain).and_return(nil) expect(lantern_server).to receive(:update).with({synchronization_status: "ready"}) expect(lantern_server.resource).to receive(:representative_server).and_return(leader).at_least(:once) expect(lantern_server.resource).to receive(:ha_type).and_return(LanternResource::HaType::SYNC) - expect(lantern_server.resource).to receive(:delete_replication_slot).with(lantern_server.ubid) expect(leader).to receive(:run_query).and_return((1 * 1024 * 1024).to_s) expect { nx.wait_catch_up }.to hop("wait_synchronization") end @@ -322,7 +337,6 @@ expect(lantern_server).to receive(:update).with({synchronization_status: "ready"}) expect(lantern_server.resource).to receive(:representative_server).and_return(leader).at_least(:once) expect(lantern_server.resource).to receive(:ha_type).and_return(LanternResource::HaType::ASYNC) - expect(lantern_server.resource).to receive(:delete_replication_slot).with(lantern_server.ubid) expect(leader).to receive(:run_query).and_return((1 * 1024 * 1024).to_s) expect { nx.wait_catch_up }.to hop("wait") end @@ -354,6 +368,7 @@ describe "#wait_recovery_completion" do it "hop to wait if recovery finished" do expect(lantern_server.resource).to receive(:allow_timeline_access_to_bucket) + expect(lantern_server.resource).to receive(:logical_replication).and_return(false) expect(lantern_server).to receive(:run_query).and_return("t", "paused", "t", lantern_server.lantern_version, lantern_server.extras_version) expect(lantern_server).to receive(:timeline_id=) expect(lantern_server).to receive(:timeline_access=).with("push") @@ -364,6 +379,7 @@ it "hop to wait if not in recovery" do expect(lantern_server.resource).to receive(:allow_timeline_access_to_bucket) + expect(lantern_server.resource).to receive(:logical_replication).and_return(false) expect(lantern_server).to receive(:run_query).and_return("f", lantern_server.lantern_version, lantern_server.extras_version) expect(lantern_server).to receive(:timeline_id=) expect(lantern_server).to receive(:timeline_access=).with("push") @@ -374,6 +390,7 @@ it "do not update extension on upgrade" do expect(lantern_server.resource).to receive(:allow_timeline_access_to_bucket) + expect(lantern_server.resource).to receive(:logical_replication).and_return(false) expect(lantern_server).to receive(:run_query).and_return("f") expect(lantern_server).to receive(:timeline_id=) expect(lantern_server).to receive(:timeline_access=).with("push") @@ -384,6 +401,7 @@ end it "update extension on version mismatch" do + expect(lantern_server.resource).to receive(:logical_replication).and_return(false) expect(lantern_server.resource).to receive(:allow_timeline_access_to_bucket) expect(lantern_server).to receive(:run_query).and_return("t", "paused", "t", "0.2.4", "0.1.4") expect(lantern_server).to receive(:timeline_id=) @@ -397,6 +415,48 @@ expect { nx.wait_recovery_completion }.to hop("wait_timeline_available") end + it "does not setup ssl if parent has no domain" do + parent_reosurce = instance_double(LanternResource) + representative_server = instance_double(LanternServer) + expect(parent_reosurce).to receive(:representative_server).and_return(representative_server).at_least(:once) + expect(representative_server).to receive(:domain).and_return(nil).at_least(:once) + expect(lantern_server.resource).to receive(:parent).and_return(parent_reosurce).at_least(:once) + expect(lantern_server.resource).to receive(:logical_replication).and_return(true) + expect(lantern_server.resource).to receive(:allow_timeline_access_to_bucket) + expect(lantern_server).to receive(:run_query).and_return("f") + expect(lantern_server).to receive(:timeline_id=) + expect(lantern_server).to receive(:timeline_access=).with("push") + expect(lantern_server).to receive(:save_changes) + frame = {"pg_upgrade" => {"lantern_version" => "0.5.0", "extras_version" => "0.5.0", "minor_version" => "1", "pg_version" => 17}} + expect(nx.strand).to receive(:stack).and_return([frame]).at_least(:once) + expect(lantern_server.resource).to receive(:version_upgrade).and_return(true) + expect(Prog::Lantern::LanternTimelineNexus).to receive(:assemble).and_return(instance_double(Strand, id: "104b0033-b3f6-8214-ae27-0cd3cef18ce5")) + expect(nx).to receive(:incr_run_pg_upgrade) + expect { nx.wait_recovery_completion }.to hop("wait_timeline_available") + end + + it "run pg_upgrade if frame has pg_upgrade info" do + parent_reosurce = instance_double(LanternResource) + representative_server = instance_double(LanternServer) + expect(parent_reosurce).to receive(:representative_server).and_return(representative_server).at_least(:once) + expect(representative_server).to receive(:domain).and_return("example.com").at_least(:once) + expect(lantern_server.resource).to receive(:parent).and_return(parent_reosurce).at_least(:once) + expect(lantern_server.resource).to receive(:logical_replication).and_return(true) + expect(lantern_server.resource).to receive(:allow_timeline_access_to_bucket) + expect(lantern_server).to receive(:add_domain_to_stack).with(parent_reosurce.representative_server.domain, nx.strand) + expect(nx).to receive(:incr_setup_ssl) + expect(lantern_server).to receive(:run_query).and_return("f") + expect(lantern_server).to receive(:timeline_id=) + expect(lantern_server).to receive(:timeline_access=).with("push") + expect(lantern_server).to receive(:save_changes) + frame = {"pg_upgrade" => {"lantern_version" => "0.5.0", "extras_version" => "0.5.0", "minor_version" => "1", "pg_version" => 17}} + expect(nx.strand).to receive(:stack).and_return([frame]).at_least(:once) + expect(lantern_server.resource).to receive(:version_upgrade).and_return(true) + expect(Prog::Lantern::LanternTimelineNexus).to receive(:assemble).and_return(instance_double(Strand, id: "104b0033-b3f6-8214-ae27-0cd3cef18ce5")) + expect(nx).to receive(:incr_run_pg_upgrade) + expect { nx.wait_recovery_completion }.to hop("wait_timeline_available") + end + it "nap 5" do expect(lantern_server).to receive(:run_query).and_return("t", "unk") expect { nx.wait_recovery_completion }.to nap(5) @@ -523,11 +583,9 @@ describe "#update_image" do it "updates image and naps" do - expect(Config).to receive(:gcp_creds_gcr_b64).and_return("test-creds").at_least(:once) expect(lantern_server).to receive(:container_image).and_return("test-image").at_least(:once) expect(lantern_server.vm.sshable).to receive(:cmd).with("common/bin/daemonizer --check update_docker_image").and_return("NotStarted") expect(lantern_server.vm.sshable).to receive(:cmd).with("common/bin/daemonizer 'sudo lantern/bin/update_docker_image' update_docker_image", stdin: JSON.generate({ - gcp_creds_gcr_b64: Config.gcp_creds_gcr_b64, container_image: lantern_server.container_image })) expect { nx.update_image }.to nap(10) @@ -565,15 +623,6 @@ expect { nx.add_domain }.to raise_error "no domain in stack" end - it "fails to add domain" do - expect(nx).to receive(:frame).and_return({"domain" => "db.lantern.dev"}).at_least(:once) - expect(lantern_server.vm.sshable).to receive(:host).and_return("1.1.1.1") - cf_client = instance_double(Dns::Cloudflare) - expect(Dns::Cloudflare).to receive(:new).and_return(cf_client) - expect(cf_client).to receive(:upsert_dns_record).and_raise - expect { nx.add_domain }.to hop("wait") - end - it "adds domain and setup ssl" do expect(lantern_server.vm.sshable).to receive(:host).and_return("1.1.1.1") cf_client = instance_double(Dns::Cloudflare) @@ -581,45 +630,12 @@ expect(nx).to receive(:frame).and_return({"domain" => "test.lantern.dev"}).at_least(:once) expect(lantern_server).to receive(:update).with({domain: "test.lantern.dev"}) + expect(lantern_server).to receive(:remove_domain_from_stack) expect(cf_client).to receive(:upsert_dns_record).with("test.lantern.dev", "1.1.1.1") expect { nx.add_domain }.to hop("setup_ssl") end end - describe "#destroy_domain" do - it "destroys domain" do - cf_client = instance_double(Dns::Cloudflare) - expect(Dns::Cloudflare).to receive(:new).and_return(cf_client) - expect(lantern_server).to receive(:domain).and_return("example.com") - expect(cf_client).to receive(:delete_dns_record).with("example.com") - nx.destroy_domain - end - end - - describe "#add_domain_to_stack" do - it "adds domain to current frame" do - domain = "db.lantern.dev" - frame = {} - expect(nx.strand).to receive(:stack).and_return([frame]).at_least(:once) - expect(frame).to receive(:[]=).with("domain", domain) - expect(nx.strand).to receive(:modified!).with(:stack) - expect(nx.strand).to receive(:save_changes) - expect { nx.add_domain_to_stack(domain) }.not_to raise_error - end - end - - describe "#remove_domain_from_stack" do - it "removes domain from current frame" do - domain = "db.lantern.dev" - frame = {"domain" => domain} - expect(nx.strand).to receive(:stack).and_return([frame]).at_least(:once) - expect(frame).to receive(:delete).with("domain") - expect(nx.strand).to receive(:modified!).with(:stack) - expect(nx.strand).to receive(:save_changes) - expect { nx.remove_domain_from_stack }.not_to raise_error - end - end - describe "#setup_ssl" do it "calls setup ssl with domain from frame and naps" do expect(nx).to receive(:frame).and_return({"domain" => "db.lantern.dev"}) @@ -659,7 +675,7 @@ it "sets up ssl and hops to wait_db_available" do expect(lantern_server.vm.sshable).to receive(:cmd).with("common/bin/daemonizer --check setup_ssl").and_return("Succeeded") expect(lantern_server.vm.sshable).to receive(:cmd).with("common/bin/daemonizer --clean setup_ssl") - expect(nx).to receive(:remove_domain_from_stack) + expect(lantern_server).to receive(:remove_domain_from_stack) expect { nx.setup_ssl }.to hop("wait_db_available") end @@ -669,7 +685,7 @@ logs = {"stdout" => "", "stderr" => "oom"} expect(lantern_server.vm.sshable).to receive(:cmd).with("common/bin/daemonizer --logs setup_ssl").and_return(JSON.generate(logs)) expect(lantern_server.vm.sshable).to receive(:cmd).with("common/bin/daemonizer --clean setup_ssl") - expect(nx).to receive(:remove_domain_from_stack) + expect(lantern_server).to receive(:remove_domain_from_stack) expect(Prog::PageNexus).to receive(:assemble_with_logs).with("Lantern SSL Setup Failed for test", [lantern_server.resource.ubid, lantern_server.ubid], logs, "error", "LanternSSLSetupFailed", lantern_server.ubid) expect { nx.setup_ssl }.to hop("wait") end @@ -700,6 +716,11 @@ expect { nx.wait }.to hop("restart_server") end + it "hops to run_pg_upgrade" do + nx.incr_run_pg_upgrade + expect { nx.wait }.to hop("run_pg_upgrade") + end + it "hops to start_server" do nx.incr_start_server expect { nx.wait }.to hop("start_server") @@ -789,7 +810,6 @@ expect(lantern_server).to receive(:primary?).and_return(false) expect(lantern_server).to receive(:domain).and_return(nil) expect(lantern_server).to receive(:destroy) - expect(lantern_server.resource).to receive(:delete_replication_slot).with(lantern_server.ubid) expect { nx.destroy }.to exit({"msg" => "lantern server was deleted"}) end @@ -798,7 +818,7 @@ expect(lantern_server).to receive(:primary?).and_return(true) expect(lantern_server.timeline).to receive(:incr_destroy).at_least(:once) expect(lantern_server).to receive(:domain).and_return("example.com") - expect(nx).to receive(:destroy_domain) + expect(lantern_server).to receive(:destroy_domain) expect(lantern_server).to receive(:destroy) expect { nx.destroy }.to exit({"msg" => "lantern server was deleted"}) end @@ -991,7 +1011,7 @@ expect(current_master.vm.sshable).to receive(:cmd) expect(current_master).to receive(:incr_container_stopped) - expect { nx.take_over }.to hop("swap_ip") + expect { nx.take_over }.to hop("swap_dns") end it "swap ips" do @@ -1039,4 +1059,84 @@ expect { nx.container_stopped }.to nap(15) end end + + describe "#run_pg_upgrade" do + it "runs pg_upgrade" do + expect(nx).to receive(:decr_run_pg_upgrade) + image = "#{Config.gcr_image}:lantern-0.5.0-extras-0.5.0-minor-1" + frame = {"pg_upgrade" => {"lantern_version" => "0.5.0", "extras_version" => "0.5.0", "minor_version" => "1", "pg_version" => 17}} + expect(nx.strand).to receive(:stack).and_return([frame]).at_least(:once) + expect(lantern_server).to receive(:container_image).and_return(image).at_least(:once) + expect(lantern_server.resource).to receive(:drop_ddl_log_trigger) + expect(lantern_server.vm.sshable).to receive(:cmd).with("common/bin/daemonizer 'sudo lantern/bin/run_pg_upgrade' pg_upgrade", stdin: JSON.generate( + container_image: lantern_server.container_image, + pg_version: 17, + old_pg_version: lantern_server.resource.pg_version + )) + + expect { nx.run_pg_upgrade }.to hop("wait_pg_upgrade") + end + end + + describe "#wait_pg_upgrade" do + it "waits pg_upgrade and nap" do + frame = {"pg_upgrade" => {"lantern_version" => "0.5.0", "extras_version" => "0.5.0", "minor_version" => "1", "pg_version" => 17}} + expect(nx.strand).to receive(:stack).and_return([frame]).at_least(:once) + expect(lantern_server.vm.sshable).to receive(:cmd).with("common/bin/daemonizer --check pg_upgrade").and_return("InProgress") + expect { nx.wait_pg_upgrade }.to nap 10 + end + + it "waits pg_upgrade and fail" do + frame = {"pg_upgrade" => {"lantern_version" => "0.5.0", "extras_version" => "0.5.0", "minor_version" => "1", "pg_version" => 17}} + expect(nx.strand).to receive(:stack).and_return([frame]).at_least(:once) + expect(lantern_server.resource).to receive(:ubid).and_return("test").at_least(:once) + expect(lantern_server.vm.sshable).to receive(:cmd).with("common/bin/daemonizer --check pg_upgrade").and_return("Failed") + + logs = {"stdout" => "", "stderr" => "error happened"} + expect(lantern_server.vm.sshable).to receive(:cmd).with("common/bin/daemonizer --logs pg_upgrade").and_return(JSON.generate(logs)) + expect(Prog::PageNexus).to receive(:assemble_with_logs).with("Postgres update failed on #{lantern_server.resource.name} (#{lantern_server.resource.label})", [lantern_server.resource.ubid, lantern_server.ubid], logs, "critical", "LanternPGUpgradeFailed", lantern_server.ubid) + expect { nx.wait_pg_upgrade }.to hop("wait") + end + + it "waits pg_upgrade and succeed" do + frame = {"pg_upgrade" => {"lantern_version" => "0.5.0", "extras_version" => "0.5.0", "minor_version" => "1", "pg_version" => 17}} + expect(nx.strand).to receive(:stack).and_return([frame]).at_least(:once) + expect(lantern_server.vm.sshable).to receive(:cmd).with("common/bin/daemonizer --check pg_upgrade").and_return("Succeeded") + expect(lantern_server).to receive(:update).with(extras_version: "0.5.0", lantern_version: "0.5.0", minor_version: "1") + expect(lantern_server.resource).to receive(:update).with(pg_version: 17) + expect(frame).to receive(:delete).with("pg_upgrade") + expect(nx.strand).to receive(:modified!).with(:stack) + expect(nx.strand).to receive(:save_changes) + expect(nx).to receive(:register_deadline).with(:wait, 40 * 60) + expect { nx.wait_pg_upgrade }.to hop("init_sql") + end + end + + describe "#swap_dns" do + it "calls swap dns with representative_server" do + leader = instance_double(LanternServer) + expect(lantern_server.resource).to receive(:representative_server).and_return(leader) + expect(lantern_server).to receive(:swap_dns).with(leader) + expect { nx.swap_dns }.to hop("wait") + end + end + + describe "#wait_swap_dns" do + it "naps 10" do + expect(lantern_server).to receive(:is_dns_correct?).and_return(false) + expect { nx.wait_swap_dns }.to nap 5 + end + + it "naps 5" do + expect(lantern_server).to receive(:is_dns_correct?).and_return(true) + expect(lantern_server).to receive(:run_query).and_raise "test" + expect { nx.wait_swap_dns }.to nap 5 + end + + it "hops to promote" do + expect(lantern_server).to receive(:is_dns_correct?).and_return(true) + expect(lantern_server).to receive(:run_query).and_return("1") + expect { nx.wait_swap_dns }.to hop("promote_server") + end + end end diff --git a/spec/prog/lantern/lantern_timeline_nexus_spec.rb b/spec/prog/lantern/lantern_timeline_nexus_spec.rb index 5a7029ef5..9edc3f6d0 100644 --- a/spec/prog/lantern/lantern_timeline_nexus_spec.rb +++ b/spec/prog/lantern/lantern_timeline_nexus_spec.rb @@ -187,6 +187,7 @@ describe "#destroy" do it "naps for one month" do expect(nx).to receive(:when_destroy_set?).and_yield + expect(Config).to receive(:backup_retention_days_after_deletion).and_return(30) expect { nx.destroy }.to nap(60 * 60 * 24 * 30) end diff --git a/spec/routes/api/project/location/lantern_spec.rb b/spec/routes/api/project/location/lantern_spec.rb index fc76c40d1..2a1a009d0 100644 --- a/spec/routes/api/project/location/lantern_spec.rb +++ b/spec/routes/api/project/location/lantern_spec.rb @@ -216,7 +216,7 @@ it "adds domain" do post "/api/project/#{project.ubid}/location/#{pg.location}/lantern/instance-1/add-domain", {domain: "example.com"} server = LanternServer.where(id: pg.representative_server.id).first - expect(server.domain).to eq("example.com") + expect(server.strand.stack.first["domain"]).to eq("example.com") expect(last_response.status).to eq(200) end end