Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add api routes to initiate upgrades #88

Merged
merged 5 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions lib/validation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,21 @@ def self.validate_version(version, field_name)
fail ValidationFailed.new({version: msg}) unless version && !version.to_s.strip.empty?
end

def self.validate_pg_version(version)
if version.nil? || version.to_s.empty?
return 17
end

msg = "unsupported pg_version"
fail ValidationFailed.new({pg_version: msg}) unless [15, 17].include?(version.to_i)
version.to_i
end

def self.validate_rollback_request(pg)
msg = "database does not have rollback_target"
fail ValidationFailed.new({rollback_target: msg}) unless !pg.rollback_target.nil?
end

def self.validate_storage_volumes(storage_volumes, boot_disk_index)
allowed_keys = [:encrypted, :size_gib, :boot, :skip_sync]
fail ValidationFailed.new({storage_volumes: "At least one storage volume is required."}) if storage_volumes.empty?
Expand Down
9 changes: 9 additions & 0 deletions migrate/20241113_lantern_resource_rollback_target.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# frozen_string_literal: true

Sequel.migration do
change do
alter_table(:lantern_resource) do
add_column :rollback_target, :uuid
end
end
end
25 changes: 0 additions & 25 deletions misc/misc_operations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -240,29 +240,4 @@ def self.create_image(lantern_version: "0.2.7", extras_version: "0.1.5", minor_v
puts "Image created"
vm.incr_destroy
end

def self.rollback_switchover(current_resource, old_resource)
# stop current one and start old one
begin
current_resource.representative_server.stop_container(1)
rescue
end

old_resource.representative_server.start_container

# update dns
cf_client = Dns::Cloudflare.new
cf_client.upsert_dns_record(current_resource.representative_server.domain, old_resource.representative_server.vm.sshable.host)
old_resource.representative_server.update(domain: current_resource.representative_server.domain)
current_resource.representative_server.update(domain: nil)

# disable readonly as soon as it is started
loop do
old_resource.representative_server.run_query("SELECT 1")
old_resource.set_to_readonly(status: "off")
break
rescue
sleep 10
end
end
end
23 changes: 20 additions & 3 deletions model/lantern/lantern_resource.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class LanternResource < Sequel::Model
include Authorization::HyperTagMethods
include Authorization::TaggableMethods

semaphore :destroy, :swap_leaders_with_parent, :switchover_with_parent
semaphore :destroy, :swap_leaders_with_parent, :switchover_with_parent, :rollback_switchover

plugin :column_encryption do |enc|
enc.column :superuser_password
Expand Down Expand Up @@ -227,8 +227,6 @@ def delete_logical_subscription(name)
def create_logical_replica(lantern_version: nil, extras_version: nil, minor_version: nil, pg_upgrade: nil)
# TODO::
# 1. If new database will be created during logical replication it won't be added automatically
# 2. New timeline will be generated for lantern resource
# 3. We need rollback mechanism (basically that will be ip swap again)
ubid = LanternResource.generate_ubid
create_ddl_log
create_publication("pub_#{ubid}")
Expand Down Expand Up @@ -295,4 +293,23 @@ module HaType
ASYNC = "async"
SYNC = "sync"
end

def rollback_switchover
current_resource = LanternResource[rollback_target]
# stop current one and start old one
begin
current_resource.representative_server.stop_container(1)
rescue
end

representative_server.start_container

# update dns
cf_client = Dns::Cloudflare.new
cf_client.upsert_dns_record(current_resource.representative_server.domain, representative_server.vm.sshable.host)
representative_server.update(domain: current_resource.representative_server.domain)
current_resource.representative_server.update(domain: nil)

update(rollback_target: nil)
end
end
10 changes: 10 additions & 0 deletions prog/gcp_vm/nexus.rb
Original file line number Diff line number Diff line change
Expand Up @@ -263,4 +263,14 @@ def host
end
pop "gcp vm deleted"
end

def before_run
when_destroy_set? do
if strand.label != "destroy"
hop_destroy
elsif strand.stack.count > 1
pop "operation is cancelled due to the destruction of vm"
end
end
end
end
28 changes: 26 additions & 2 deletions prog/lantern/lantern_resource_nexus.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class Prog::Lantern::LanternResourceNexus < Prog::Base
extend Forwardable
def_delegators :lantern_resource, :servers, :representative_server

semaphore :destroy, :swap_leaders_with_parent, :switchover_with_parent
semaphore :destroy, :swap_leaders_with_parent, :switchover_with_parent, :rollback_switchover

def self.assemble(project_id:, location:, name:, target_vm_size:, target_storage_size_gib:, ubid: LanternResource.generate_ubid, ha_type: LanternResource::HaType::NONE, parent_id: nil, restore_target: nil, recovery_target_lsn: nil,
org_id: nil, db_name: "postgres", db_user: "postgres", db_user_password: nil, superuser_password: nil, repl_password: nil, app_env: Config.rack_env,
Expand Down Expand Up @@ -70,6 +70,7 @@ def self.assemble(project_id:, location:, name:, target_vm_size:, target_storage
lantern_version = parent.representative_server.lantern_version
extras_version = parent.representative_server.extras_version
minor_version = parent.representative_server.minor_version
pg_version = parent.pg_version
end

target_storage_size_gib = parent.representative_server.target_storage_size_gib
Expand Down Expand Up @@ -205,6 +206,10 @@ def before_run
lantern_resource.update(display_state: nil)
end

when_rollback_switchover_set? do
hop_rollback_switchover
end

when_swap_leaders_with_parent_set? do
if lantern_resource.parent.nil?
decr_swap_leaders_with_parent
Expand Down Expand Up @@ -233,11 +238,12 @@ def before_run
label def finish_take_over
# update display_states
lantern_resource.update(display_state: nil)
lantern_resource.parent.update(display_state: nil)
lantern_resource.parent.update(display_state: nil, rollback_target: lantern_resource.id)

# remove fork association so parent can be deleted
lantern_resource.update(parent_id: nil)
lantern_resource.timeline.update(parent_id: nil)

hop_wait
end

Expand Down Expand Up @@ -267,6 +273,24 @@ def before_run
end
end

label def rollback_switchover
decr_rollback_switchover
lantern_resource.rollback_switchover
hop_wait_rollback_switchover
end

label def wait_rollback_switchover
nap 10 if !lantern_resource.representative_server.is_dns_correct?
begin
connection = Sequel.connect(lantern_resource.connection_string)
connection["SELECT 1"].first
lantern_resource.set_to_readonly(status: "off")
rescue
nap 10
end
hop_wait_servers
end

label def swap_leaders_with_parent
decr_swap_leaders_with_parent
lantern_resource.parent.set_to_readonly
Expand Down
2 changes: 1 addition & 1 deletion prog/lantern/lantern_timeline_nexus.rb
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def before_run

if lantern_timeline.need_cleanup?
retain_after = (Time.new - (24 * 60 * 60 * Config.backup_retention_days)).strftime("%Y-%m-%dT%H:%M:%S.%LZ")
cmd = "docker compose -f /var/lib/lantern/docker-compose.yaml exec -T -u root postgresql bash -c \"GOOGLE_APPLICATION_CREDENTIALS=/tmp/google-application-credentials-wal-g.json /opt/bitnami/postgresql/bin/wal-g delete retain FULL 7 --after #{retain_after} --confirm\""
cmd = "docker compose -f /var/lib/lantern/docker-compose.yaml exec -T -u root postgresql bash -c \"GOOGLE_APPLICATION_CREDENTIALS=/tmp/google-application-credentials-wal-g.json /usr/local/go/bin/wal-g delete retain FULL 7 --after #{retain_after} --confirm\""
lantern_timeline.leader.vm.sshable.cmd("common/bin/daemonizer '#{cmd}' delete_old_backups")
end

Expand Down
1 change: 1 addition & 0 deletions rhizome/lantern/bin/configure
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def setup_env
f.puts("POSTGRESQL_LOG_LINE_PREFIX=lantern-logline: app: %a user: %u time: %t proc_start: %s pid: %p linenumber: %l === ")
f.puts("POSTGRESQL_LOG_DURATION=true")
f.puts("POSTGRESQL_LOG_MIN_DURATION_STATEMENT=250ms")
f.puts("POSTGRESQL_WAL_LEVEL=logical")
f.puts("GOOGLE_APPLICATION_CREDENTIALS_BIGQUERY_B64=#{$configure_hash["gcp_creds_big_query_b64"]}")
f.puts("BIGQUERY_DATASET=#{$configure_hash["big_query_dataset"]}")

Expand Down
1 change: 1 addition & 0 deletions routes/api/project/lantern.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ class CloverApi
lantern_version: r.params["lantern_version"],
extras_version: r.params["extras_version"],
minor_version: r.params["minor_version"],
pg_version: Validation.validate_pg_version(r.params["pg_version"]),
domain: domain,
db_name: r.params["db_name"],
db_user: r.params["db_user"],
Expand Down
45 changes: 45 additions & 0 deletions routes/api/project/location/lantern.rb
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,14 @@ class CloverApi
end

r.get "backups" do
Authorization.authorize(@current_user.id, "Postgres:view", pg.id)
pg.timeline.backups_with_metadata
.sort_by { |hsh| hsh[:last_modified] }
.map { |hsh| {time: hsh[:last_modified], label: pg.timeline.get_backup_label(hsh[:key]), compressed_size: hsh[:compressed_size], uncompressed_size: hsh[:uncompressed_size]} }
end

r.post "push-backup" do
Authorization.authorize(@current_user.id, "Postgres:edit", pg.id)
pg.timeline.take_manual_backup
response.status = 200
r.halt
Expand All @@ -150,10 +152,53 @@ class CloverApi
end

r.post "dissociate-forks" do
Authorization.authorize(@current_user.id, "Postgres:edit", pg.id)
pg.dissociate_forks
response.status = 200
r.halt
end

r.post "logical-replica" do
Authorization.authorize(@current_user.id, "Postgres:edit", pg.id)
Authorization.authorize(@current_user.id, "Postgres:create", @project.id)
lantern_version = (r.params["lantern_version"] && r.params["lantern_version"].empty?) ? nil : r.params["lantern_version"]
extras_version = (r.params["extras_version"] && r.params["extras_version"].empty?) ? nil : r.params["extras_version"]
minor_version = (r.params["minor_version"] && r.params["minor_version"].empty?) ? nil : r.params["minor_version"]
pg_upgrade = (r.params["pg_upgrade"] && r.params["pg_upgrade"].empty?) ? nil : r.params["pg_upgrade"]
st = pg.create_logical_replica(
lantern_version: lantern_version,
extras_version: extras_version,
minor_version: minor_version,
pg_upgrade: pg_upgrade
)
replica = LanternResource[st.id]
serialize(replica, :detailed)
end

r.post "switchover" do
Authorization.authorize(@current_user.id, "Postgres:edit", pg.id)

if pg.parent.nil? || !pg.logical_replication
fail CloverError.new(400, "Invalid request", "Database does not have parent or is not in logical replication state")
end

pg.incr_switchover_with_parent
response.status = 200
r.halt
end

r.post "rollback-switchover" do
Authorization.authorize(@current_user.id, "Postgres:edit", pg.id)
Validation.validate_rollback_request(pg)
current_resource = LanternResource[pg.rollback_target]
if current_resource.nil?
fail CloverError.new(404, "Not Found", "rollback_target not found")
end

pg.incr_rollback_switchover
response.status = 200
r.halt
end
end

r.get true do
Expand Down
3 changes: 2 additions & 1 deletion serializers/api/lantern.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ def self.base(pg)
db_user: pg.db_user,
db_user_password: pg.db_user_password,
repl_user: pg.repl_user,
repl_password: pg.repl_password
repl_password: pg.repl_password,
pg_version: pg.pg_version
}
end

Expand Down
24 changes: 24 additions & 0 deletions spec/lib/validation_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,30 @@
end
end

describe "#validate_pg_version" do
it "valid version" do
expect(described_class.validate_pg_version("17")).to be(17)
expect(described_class.validate_pg_version("15")).to be(15)
expect(described_class.validate_pg_version("")).to be(17)
expect(described_class.validate_pg_version(nil)).to be(17)
end

it "invalid version" do
expect { described_class.validate_pg_version("as") }.to raise_error described_class::ValidationFailed
expect { described_class.validate_pg_version("16") }.to raise_error described_class::ValidationFailed
end
end

describe "#validate_rollback_request" do
it "valid request" do
expect(described_class.validate_rollback_request(instance_double(LanternResource, rollback_target: LanternResource.generate_uuid))).to be_nil
end

it "invalid version" do
expect { described_class.validate_rollback_request(instance_double(LanternResource, rollback_target: nil)) }.to raise_error described_class::ValidationFailed
end
end

describe "#validate_lantern_size" do
it "valid lantern size" do
expect(described_class.validate_lantern_size("n1-standard-2").name).to eq("n1-standard-2")
Expand Down
29 changes: 29 additions & 0 deletions spec/model/lantern/lantern_resource_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -336,4 +336,33 @@
expect(lantern_resource.get_logical_replication_lag("test_slot")).to be(0)
end
end

describe "#rollback_switchover" do
it "performs a rollback switchover successfully" do
current_representative_server = instance_double(LanternServer, domain: "example.com", vm: instance_double(GcpVm, sshable: instance_double(Sshable, host: "127.0.0.1")))
old_representative_server = instance_double(LanternServer, domain: nil, vm: instance_double(GcpVm, sshable: instance_double(Sshable, host: "127.0.0.2")))
current_resource = instance_double(described_class, representative_server: current_representative_server)
expect(lantern_resource).to receive(:rollback_target).and_return("test-target").at_least(:once)
expect(lantern_resource).to receive(:representative_server).and_return(old_representative_server).at_least(:once)
allow(described_class).to receive(:[]).with(lantern_resource.rollback_target).and_return(current_resource)

expect(current_resource.representative_server).to receive(:stop_container).with(1).and_return(true).at_least(:once)

expect(old_representative_server).to receive(:start_container)

cf_client = instance_double(Dns::Cloudflare)
allow(Dns::Cloudflare).to receive(:new).and_return(cf_client)
expect(cf_client).to receive(:upsert_dns_record).with(
current_resource.representative_server.domain,
old_representative_server.vm.sshable.host
)

expect(old_representative_server).to receive(:update).with(domain: current_resource.representative_server.domain)
expect(current_resource.representative_server).to receive(:update).with(domain: nil)

expect(lantern_resource).to receive(:update).with(rollback_target: nil)

expect { lantern_resource.rollback_switchover }.not_to raise_error
end
end
end
22 changes: 22 additions & 0 deletions spec/prog/gcp_vm/nexus_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -325,4 +325,26 @@
expect(nx.host).to eq("1.1.1.1")
end
end

describe "#before_run" do
it "hops to destroy" do
expect(nx).to receive(:when_destroy_set?).and_yield
expect { nx.before_run }.to hop("destroy")
end

it "pops if already in the destroy state and has stack items" do
expect(nx).to receive(:when_destroy_set?).and_yield
expect(nx.strand).to receive(:label).and_return("destroy").at_least(:once)
frame = {"link" => ["GcpVm::Nexus", "wait"]}
expect(nx).to receive(:frame).and_return(frame)
expect(nx.strand).to receive(:stack).and_return([JSON.generate(frame), JSON.generate(frame)]).at_least(:once)
expect { nx.before_run }.to hop("wait", "GcpVm::Nexus")
end

it "does not hop to destroy if already in the destroy state" do
expect(nx).to receive(:when_destroy_set?).and_yield
expect(nx.strand).to receive(:label).and_return("destroy").at_least(:once)
expect { nx.before_run }.not_to hop("destroy")
end
end
end
Loading
Loading