Skip to content

Commit

Permalink
set lantern server to stopped mode after switchover, retry ssh cmd ex…
Browse files Browse the repository at this point in the history
…ecution after cache invalidation
  • Loading branch information
var77 committed Nov 15, 2024
1 parent e486877 commit 92e98f4
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 11 deletions.
3 changes: 2 additions & 1 deletion model/lantern/lantern_resource.rb
Original file line number Diff line number Diff line change
Expand Up @@ -311,8 +311,9 @@ def rollback_switchover
current_resource.representative_server.stop_container(1)
rescue
end
current_resource.representative_server.incr_container_stopped

representative_server.start_container
representative_server.incr_take_over

# update dns
cf_client = Dns::Cloudflare.new
Expand Down
15 changes: 11 additions & 4 deletions model/sshable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,17 @@ def keys
}
end

def cmd(cmd, stdin: nil, log: true)
def cmd(command, stdin: nil, log: true)
start = Time.now
stdout = StringIO.new
stderr = StringIO.new
exit_code = nil
exit_signal = nil

has_cached_session = !Thread.current[:clover_ssh_cache].nil? && !Thread.current[:clover_ssh_cache][[host, unix_user]].nil?
begin
connect.open_channel do |ch|
ch.exec(cmd) do |ch, success|
ch.exec(command) do |ch, success|
ch.on_data do |ch, data|
$stderr.write(data) if REPL
stdout.write(data)
Expand All @@ -64,6 +65,12 @@ def cmd(cmd, stdin: nil, log: true)
end.wait
rescue
invalidate_cache_entry

if has_cached_session
# if the session was cached previously
# we will retry command as ssh session may be closed
return cmd(command, stdin: stdin, log: log)
end
raise
end

Expand All @@ -74,7 +81,7 @@ def cmd(cmd, stdin: nil, log: true)
Clog.emit("ssh cmd execution") do
finish = Time.now
embed = {start: start, finish: finish, duration: finish - start,
cmd: cmd,
cmd: command,
exit_code: exit_code, exit_signal: exit_signal}

# Suppress large outputs to avoid annoyance in duplication
Expand All @@ -92,7 +99,7 @@ def cmd(cmd, stdin: nil, log: true)
end
end

fail SshError.new(cmd, stdout_str, stderr.string.freeze, exit_code, exit_signal) unless exit_code.zero?
fail SshError.new(command, stdout_str, stderr.string.freeze, exit_code, exit_signal) unless exit_code.zero?
stdout_str
end

Expand Down
1 change: 1 addition & 0 deletions prog/lantern/lantern_resource_nexus.rb
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ def before_run
label def switch_dns_with_parent
lantern_resource.parent.representative_server.stop_container(1)
lantern_resource.update(logical_replication: false)
lantern_resource.parent.representative_server.incr_container_stopped

if lantern_resource.parent.representative_server.domain.nil?
hop_finish_take_over
Expand Down
4 changes: 2 additions & 2 deletions prog/lantern/lantern_server_nexus.rb
Original file line number Diff line number Diff line change
Expand Up @@ -523,11 +523,11 @@ def before_run
label def container_stopped
decr_container_stopped
when_take_over_set? do
vm.sshable.cmd("sudo docker compose -f #{Config.compose_file} up -d")
lantern_server.start_container
hop_take_over
end

nap 15
nap 10
end

label def promote_server
Expand Down
3 changes: 2 additions & 1 deletion spec/model/lantern/lantern_resource_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@

expect(current_resource.representative_server).to receive(:stop_container).with(1).and_return(true).at_least(:once)

expect(old_representative_server).to receive(:start_container)
expect(old_representative_server).to receive(:incr_take_over)

cf_client = instance_double(Dns::Cloudflare)
allow(Dns::Cloudflare).to receive(:new).and_return(cf_client)
Expand All @@ -368,6 +368,7 @@

expect(old_representative_server).to receive(:update).with(domain: current_resource.representative_server.domain)
expect(current_resource.representative_server).to receive(:update).with(domain: nil)
expect(current_resource.representative_server).to receive(:incr_container_stopped)

expect(lantern_resource).to receive(:update).with(rollback_target: nil)

Expand Down
10 changes: 10 additions & 0 deletions spec/model/sshable_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -157,5 +157,15 @@ def simulate(cmd:, exit_status:, exit_signal:, stdout:, stderr:)
expect(sa).to receive(:invalidate_cache_entry)
expect { sa.cmd("irrelevant") }.to raise_error err
end

it "invalidates the cache if the session retries and raises on second try" do
err = IOError.new("the party is over")
expect(session).to receive(:open_channel).and_raise(err).at_least(:once)
cache = instance_double(Hash)
expect(Thread.current).to receive(:[]).with(:clover_ssh_cache).and_return(cache).at_least(:once)
expect(cache).to receive(:[]).with(["test.localhost", "testuser"]).and_return(session, nil).at_least(:once)
expect(sa).to receive(:invalidate_cache_entry).at_least(:once)
expect { sa.cmd("irrelevant") }.to raise_error err
end
end
end
2 changes: 2 additions & 0 deletions spec/prog/lantern/lantern_resource_nexus_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -468,13 +468,15 @@
parent = instance_double(LanternResource, representative_server: instance_double(LanternServer, domain: nil))
expect(lantern_resource).to receive(:parent).and_return(parent).at_least(:once)
expect(lantern_resource.parent.representative_server).to receive(:stop_container)
expect(lantern_resource.parent.representative_server).to receive(:incr_container_stopped)
expect { nx.switch_dns_with_parent }.to hop("finish_take_over")
end

it "switches dns with parent and hop to wait_switch_dns" do
parent = instance_double(LanternResource, representative_server: instance_double(LanternServer, domain: "test-domain"))
expect(lantern_resource).to receive(:parent).and_return(parent).at_least(:once)
expect(lantern_resource.parent.representative_server).to receive(:stop_container)
expect(lantern_resource.parent.representative_server).to receive(:incr_container_stopped)
expect(lantern_resource.representative_server).to receive(:swap_dns).with(parent.representative_server)
expect(lantern_resource).to receive(:update).with(logical_replication: false)
expect { nx.switch_dns_with_parent }.to hop("wait_switch_dns")
Expand Down
6 changes: 3 additions & 3 deletions spec/prog/lantern/lantern_server_nexus_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1051,12 +1051,12 @@
describe "#container_stopped" do
it "hops to take_over" do
nx.incr_take_over
expect(lantern_server.vm.sshable).to receive(:cmd)
expect(lantern_server).to receive(:start_container)
expect { nx.container_stopped }.to hop("take_over")
end

it "naps 15" do
expect { nx.container_stopped }.to nap(15)
it "naps 10" do
expect { nx.container_stopped }.to nap(10)
end
end

Expand Down

0 comments on commit 92e98f4

Please sign in to comment.