From 561c2f98ecfe54194af58fc2e4458f375b5f637c Mon Sep 17 00:00:00 2001 From: Varik Matevosyan Date: Wed, 10 Jul 2024 18:40:41 +0400 Subject: [PATCH] run doctor queries in target vms with daemonizer to not exceed transaction maximum time --- migrate/20240710_lantern_doctor_rhizome.rb | 19 + model/lantern/lantern_doctor.rb | 3 + model/lantern/lantern_doctor_page.rb | 8 +- model/lantern/lantern_doctor_query.rb | 124 +----- model/sshable.rb | 1 + prog/gcp_vm/nexus.rb | 1 + prog/lantern/lantern_doctor_nexus.rb | 59 ++- rhizome/lantern/bin/doctor/run_query | 117 ++++++ rhizome/lantern/lib/common.rb | 2 +- .../model/lantern/lantern_doctor_page_spec.rb | 7 +- .../lantern/lantern_doctor_query_spec.rb | 395 +++++------------- spec/model/lantern/lantern_doctor_spec.rb | 3 + spec/prog/gcp_vm/nexus_spec.rb | 4 + .../prog/lantern/lantern_doctor_nexus_spec.rb | 122 +++++- .../project/location/lantern_doctor_spec.rb | 10 +- .../routes/web/project/lantern_doctor_spec.rb | 2 +- .../project/location/lantern_doctor_spec.rb | 8 +- views/lantern/show.erb | 4 +- 18 files changed, 473 insertions(+), 416 deletions(-) create mode 100644 migrate/20240710_lantern_doctor_rhizome.rb create mode 100755 rhizome/lantern/bin/doctor/run_query diff --git a/migrate/20240710_lantern_doctor_rhizome.rb b/migrate/20240710_lantern_doctor_rhizome.rb new file mode 100644 index 000000000..68d227a7d --- /dev/null +++ b/migrate/20240710_lantern_doctor_rhizome.rb @@ -0,0 +1,19 @@ +# frozen_string_literal: true + +Sequel.migration do + up do + alter_table(:lantern_doctor_page) do + add_column :vm_name, :text, null: true + end + + alter_table(:lantern_doctor_query) do + add_column :server_type, :text, null: true, default: "primary" + end + + # this is the query to check disk size, it should run on all servers + run "UPDATE lantern_doctor_query SET server_type='*' WHERE id='09b1b1d1-7095-89b7-8ae4-158e15e11871'" + + # update queries to sync rhizome + run "INSERT INTO semaphore (id, strand_id, name) SELECT gen_random_uuid(), id, 'sync_system_queries' FROM strand s WHERE s.prog = 'Lantern::LanternDoctorNexus'" + end +end diff --git a/model/lantern/lantern_doctor.rb b/model/lantern/lantern_doctor.rb index c5e664859..d216c2fe4 100644 --- a/model/lantern/lantern_doctor.rb +++ b/model/lantern/lantern_doctor.rb @@ -32,6 +32,9 @@ def sync_system_queries doctor_query_list = queries system_query_list = system_queries + # update rhizome in case new method will be added + resource.servers.each { _1.incr_update_rhizome } + system_query_list.each { if !has_system_query?(doctor_query_list, _1) LanternDoctorQuery.create_with_id(parent_id: _1.id, doctor_id: id, condition: "unknown", type: "user", response_type: _1.response_type) diff --git a/model/lantern/lantern_doctor_page.rb b/model/lantern/lantern_doctor_page.rb index 7c20fa0a6..80e572cce 100644 --- a/model/lantern/lantern_doctor_page.rb +++ b/model/lantern/lantern_doctor_page.rb @@ -8,12 +8,14 @@ class LanternDoctorPage < Sequel::Model include ResourceMethods - def self.create_incident(query, db_name, err: "", output: "") - pg = Prog::PageNexus.assemble_with_logs("Healthcheck: #{query.name} failed on #{query.doctor.resource.name} - #{query.doctor.resource.label} (#{db_name})", [query.ubid, query.doctor.ubid], {"stderr" => err, "stdout" => output}, query.severity, "LanternDoctorQueryFailed", query.id, db_name) + def self.create_incident(query, db_name, vm_name, err: "", output: "") + pg = Prog::PageNexus.assemble_with_logs("Healthcheck: #{query.name} failed on #{query.doctor.resource.name} - #{query.doctor.resource.label} (#{db_name} - #{vm_name})", [query.ubid, query.doctor.ubid], {"stderr" => err, "stdout" => output}, query.severity, "LanternDoctorQueryFailed", query.id, db_name, vm_name) LanternDoctorPage.create_with_id( query_id: query.id, page_id: pg.id, - status: "new" + status: "new", + db: db_name, + vm_name: vm_name ) end diff --git a/model/lantern/lantern_doctor_query.rb b/model/lantern/lantern_doctor_query.rb index a066c87d7..169d7ab03 100644 --- a/model/lantern/lantern_doctor_query.rb +++ b/model/lantern/lantern_doctor_query.rb @@ -27,6 +27,10 @@ def name parent&.name || super end + def task_name + "healthcheck_#{ubid}" + end + def db_name parent&.db_name || super end @@ -47,8 +51,17 @@ def response_type parent&.response_type || super end + def server_type + parent&.server_type || super + end + + def servers + doctor.resource.servers.select { (server_type == "*") || (server_type == "primary" && _1.primary?) || (server_type == "standby" && _1.standby?) } + end + def should_run? - CronParser.new(schedule).next(last_checked || Time.new - 365 * 24 * 60 * 60) <= Time.new + is_scheduled_time = CronParser.new(schedule).next(last_checked || Time.new - 365 * 24 * 60 * 60) <= Time.new + is_scheduled_time && doctor.resource.representative_server.vm.sshable.cmd("common/bin/daemonizer --check #{task_name}") == "NotStarted" end def is_system? @@ -68,109 +81,12 @@ def new_and_active_pages LanternDoctorPage.where(query_id: id, status: ["new", "triggered", "acknowledged"]).all end - def run - if !should_run? - return nil - end - - lantern_server = doctor.resource.representative_server - dbs = (db_name == "*") ? lantern_server.list_all_databases : [db_name] - query_user = user - - any_failed = false - dbs.each do |db| - err_msg = "" - output = "" - - failed = false - begin - if is_system? && fn_label && LanternDoctorQuery.method_defined?(fn_label) - res = send(fn_label, db, query_user) - elsif sql - res = lantern_server.run_query(sql, db: db, user: query_user).strip - else - fail "BUG: non-system query without sql" - end - - case response_type - when "bool" - if res != "f" - failed = true - any_failed = true - end - when "rows" - if res != "" - failed = true - any_failed = true - end - output = res - else - fail "BUG: invalid response type (#{response_type}) on query #{name}" - end - rescue => e - failed = true - any_failed = true - Clog.emit("LanternDoctorQuery failed") { {error: e, query_name: name, db: db, resource_name: doctor.resource.name} } - err_msg = e.message - end - - pg = LanternDoctorPage.where(query_id: id, db: db).where(Sequel.lit("status != 'resolved' ")).first - - if failed && !pg - LanternDoctorPage.create_incident(self, db, err: err_msg, output: output) - elsif !failed && pg - pg.resolve - end - end - - update(condition: any_failed ? "failed" : "healthy", last_checked: Time.new) - end - - def check_daemon_embedding_jobs(db, query_user) - lantern_server = doctor.resource.representative_server - jobs_table_exists = lantern_server.run_query(< 2000", db: db, user: query_user).strip - res == "t" - end - - failed ? "t" : "f" - end - - def check_disk_space_usage(_db, _query_user) - output = "" - doctor.resource.servers.each do |serv| - usage_percent = serv.vm.sshable.cmd("df | awk '$1 == \"/dev/root\" {print $5}' | sed 's/%//'").strip.to_i - if usage_percent > 90 - server_type = serv.primary? ? "primary" : "standby" - output += "#{server_type} server - usage #{usage_percent}%\n" - end - rescue + def update_page_status(db, vm_name, success, output, err_msg) + pg = LanternDoctorPage.where(query_id: id, db: db, vm_name: vm_name).where(Sequel.lit("status != 'resolved' ")).first + if !success && !pg + LanternDoctorPage.create_incident(self, db, vm_name, err: err_msg, output: output) + elsif success && pg + pg.resolve end - output.chomp end end diff --git a/model/sshable.rb b/model/sshable.rb index 910469e53..14d9b394e 100644 --- a/model/sshable.rb +++ b/model/sshable.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +require "stringio" require "net/ssh" require_relative "../model" diff --git a/prog/gcp_vm/nexus.rb b/prog/gcp_vm/nexus.rb index ceab25701..77c88daa6 100644 --- a/prog/gcp_vm/nexus.rb +++ b/prog/gcp_vm/nexus.rb @@ -235,6 +235,7 @@ def host end strand.children.each { _1.destroy } gcp_vm.projects.map { gcp_vm.dissociate_with_project(_1) } + LanternDoctorPage.where(vm_name: gcp_vm.name).each { _1.resolve } gcp_vm.destroy end pop "gcp vm deleted" diff --git a/prog/lantern/lantern_doctor_nexus.rb b/prog/lantern/lantern_doctor_nexus.rb index 7242bd7cb..d205134b9 100644 --- a/prog/lantern/lantern_doctor_nexus.rb +++ b/prog/lantern/lantern_doctor_nexus.rb @@ -45,12 +45,69 @@ def before_run end if lantern_doctor.should_run? - lantern_doctor.queries.each { _1.run } + hop_run_queries end nap 60 end + label def run_queries + lantern_doctor.queries.each do |query| + next if !query.should_run? + + dbs = (query.db_name == "*") ? lantern_doctor.resource.representative_server.list_all_databases : [query.db_name] + + query.servers.each do |server| + server.vm.sshable.cmd("common/bin/daemonizer 'lantern/bin/doctor/run_query' #{query.task_name}", stdin: JSON.generate({query: {is_system: query.is_system?, response_type: query.response_type, name: query.name, sql: query.sql&.tr("\n", " "), fn_label: query.fn_label, query_user: query.user}, server_type: server.primary? ? "primary" : "standby", dbs: dbs})) + end + end + + hop_wait_queries + end + + label def wait_queries + lantern_doctor.queries.each do |query| + query.servers.each do |server| + vm = server.vm + status = "Unknown" + begin + status = vm.sshable.cmd("common/bin/daemonizer --check #{query.task_name}") + rescue + end + + case status + when "Failed", "Succeeded" + logs = JSON.parse(vm.sshable.cmd("common/bin/daemonizer --logs #{query.task_name}")) + all_output = [] + + if !logs["stdout"].empty? + # stdout will be [{ "db": string, "result": string }] + begin + all_output = JSON.parse(logs["stdout"]) + rescue + all_output = [{"db" => "*", "result" => logs["stdout"], "err" => logs["stderr"]}] + end + + # resolve errored page if exists + query.update_page_status("*", vm.name, true, nil, nil) + else + # this is the case when command errored for some reason + all_output = [{"db" => "*", "result" => "", "err" => logs["stderr"]}] + end + + all_output.each do |output| + query.update_page_status(output["db"], vm.name, status == "Succeeded", output["result"], output["err"]) + end + + query.update(condition: (status == "Failed") ? "failed" : "healthy", last_checked: Time.new) + vm.sshable.cmd("common/bin/daemonizer --clean #{query.task_name}") + end + end + end + + hop_wait + end + label def sync_system_queries decr_sync_system_queries lantern_doctor.sync_system_queries diff --git a/rhizome/lantern/bin/doctor/run_query b/rhizome/lantern/bin/doctor/run_query new file mode 100755 index 000000000..eb99e82d7 --- /dev/null +++ b/rhizome/lantern/bin/doctor/run_query @@ -0,0 +1,117 @@ +#!/bin/env ruby +# frozen_string_literal: true + +require "json" +require "yaml" +require_relative "../../../common/lib/util" +require_relative "../../lib/common" + +$configure_hash = JSON.parse($stdin.read) +dbs = $configure_hash["dbs"] + +def exec_sql(sql, user: "postgres", db: "postgres") + r("docker compose -f #{$compose_file} exec -T postgresql psql -q -U #{user} -t --csv #{db}", stdin: sql).chomp.strip +end + +def run_for_db(db) + query = $configure_hash["query"] + err = "" + output = "" + response_type = query["response_type"] + name = query["name"] + is_system = query["is_system"] + fn_label = query["fn_label"] + query_user = query["query_user"] + sql = query["sql"] + + success = true + begin + if is_system && fn_label && SystemQueries.respond_to?(fn_label) + res = SystemQueries.send(fn_label, db, query_user) + elsif sql + res = exec_sql(sql, db: db, user: query_user) + else + fail "BUG: non-system query without sql" + end + + case response_type + when "bool" + if res != "f" + success = false + end + when "rows" + if res != "" + success = false + end + output = res + else + fail "BUG: invalid response type (#{response_type}) on query #{name}" + end + rescue => e + success = false + err = e.message + end + + [success, {db: db, result: output, err: err}] +end + +class SystemQueries + def self.check_daemon_embedding_jobs(db, query_user) + jobs_table_exists = exec_sql(< 2000", db: db, user: query_user) + res == "t" + end + + failed ? "t" : "f" + end + + def self.check_disk_space_usage(_db, _query_user) + server_type = $configure_hash["server_type"] + output = "" + usage_percent = r("df | awk '$1 == \"/dev/root\" {print $5}' | sed 's/%//'").chomp.strip.to_i + if usage_percent > 90 + output += "#{server_type} server - usage #{usage_percent}%\n" + end + output.chomp + end +end + +exit_code = 0 + +response = [] +dbs.each do |db| + success, res = run_for_db(db) + if !success + exit_code = 1 + end + response.push(res) +end + +puts JSON.generate(response) + +exit(exit_code) diff --git a/rhizome/lantern/lib/common.rb b/rhizome/lantern/lib/common.rb index ce101a071..e1b2dfea5 100755 --- a/rhizome/lantern/lib/common.rb +++ b/rhizome/lantern/lib/common.rb @@ -108,7 +108,7 @@ def calculate_memory_sizes total_ram = (r "free -tk | awk 'NR == 2 {print $2}'") # Calculate 95% of the total RAM in kilobytes, but reserve at least 500mb # If the subtraction will underflow we will set the limit to 500mb - shared_buf_mb = [500, [(total_ram.to_i * 0.95 / 1024).round, total_ram.to_i / 1024 - 500].min].max + shared_buf_mb = [(total_ram.to_i * 0.95 / 1024).round, total_ram.to_i / 1024 - 500].min.clamp(500..) # Calculate 50% of the total RAM in kilobytes shm_size_mb = (total_ram.to_i * 0.5 / 1024).round mem_limit_buf = "#{shared_buf_mb}MB" diff --git a/spec/model/lantern/lantern_doctor_page_spec.rb b/spec/model/lantern/lantern_doctor_page_spec.rb index d01a758de..97e0a32de 100644 --- a/spec/model/lantern/lantern_doctor_page_spec.rb +++ b/spec/model/lantern/lantern_doctor_page_spec.rb @@ -20,12 +20,13 @@ it "creates page" do query = instance_double(LanternDoctorQuery, ubid: "test", id: "test-id", severity: "error", name: "test", doctor: instance_double(LanternDoctor, ubid: "test-doc-ubid", resource: instance_double(LanternResource, name: "test-res", label: "test-label"))) db_name = "postgres" + vm_name = "test" err = "test-err" output = "test-output" - expect(Prog::PageNexus).to receive(:assemble_with_logs).with("Healthcheck: #{query.name} failed on #{query.doctor.resource.name} - #{query.doctor.resource.label} (#{db_name})", [query.ubid, query.doctor.ubid], {"stderr" => err, "stdout" => output}, query.severity, "LanternDoctorQueryFailed", query.id, db_name).and_return(instance_double(Page, id: "test-pg-id")) + expect(Prog::PageNexus).to receive(:assemble_with_logs).with("Healthcheck: #{query.name} failed on #{query.doctor.resource.name} - #{query.doctor.resource.label} (#{db_name} - #{vm_name})", [query.ubid, query.doctor.ubid], {"stderr" => err, "stdout" => output}, query.severity, "LanternDoctorQueryFailed", query.id, db_name, vm_name).and_return(instance_double(Page, id: "test-pg-id")) doctor_page = instance_double(described_class) - expect(described_class).to receive(:create_with_id).with(query_id: query.id, page_id: "test-pg-id", status: "new").and_return(doctor_page) - expect(described_class.create_incident(query, db_name, err: err, output: output)).to be(doctor_page) + expect(described_class).to receive(:create_with_id).with(query_id: query.id, page_id: "test-pg-id", status: "new", db: db_name, vm_name: vm_name).and_return(doctor_page) + expect(described_class.create_incident(query, db_name, vm_name, err: err, output: output)).to be(doctor_page) end end diff --git a/spec/model/lantern/lantern_doctor_query_spec.rb b/spec/model/lantern/lantern_doctor_query_spec.rb index dc68058e7..a42d5cb95 100644 --- a/spec/model/lantern/lantern_doctor_query_spec.rb +++ b/spec/model/lantern/lantern_doctor_query_spec.rb @@ -12,6 +12,7 @@ r.schedule = "*/1 * * * *" r.id = "6181ddb3-0002-8ad0-9aeb-084832c9273b" r.response_type = "bool" + r.server_type = "primary" end } @@ -84,6 +85,59 @@ expect(parent).to receive(:response_type).and_return("rows") expect(lantern_doctor_query.response_type).to be("rows") end + + it "returns parent server_type if parent_id is defined else self server_type" do + expect(lantern_doctor_query).to receive(:parent).and_return(nil) + expect(lantern_doctor_query.server_type).to eq("primary") + + expect(lantern_doctor_query).to receive(:parent).and_return(parent) + expect(parent).to receive(:server_type).and_return("standby") + expect(lantern_doctor_query.server_type).to be("standby") + end + + it "returns task_name" do + expect(lantern_doctor_query.task_name).to eq("healthcheck_#{lantern_doctor_query.ubid}") + end + end + + describe "#servers" do + it "returns primary servers based on server_type" do + serv1 = instance_double(LanternServer) + serv2 = instance_double(LanternServer) + allow(serv1).to receive_messages(primary?: true, standby?: false) + allow(serv2).to receive_messages(primary?: false, standby?: true) + + resource = instance_double(LanternResource, servers: [serv1, serv2]) + doctor = instance_double(LanternDoctor, resource: resource) + expect(lantern_doctor_query).to receive(:doctor).and_return(doctor) + expect(lantern_doctor_query.servers).to eq([serv1]) + end + + it "returns standby servers based on server_type" do + serv1 = instance_double(LanternServer) + serv2 = instance_double(LanternServer) + allow(serv1).to receive_messages(primary?: true, standby?: false) + allow(serv2).to receive_messages(primary?: false, standby?: true) + + resource = instance_double(LanternResource, servers: [serv1, serv2]) + doctor = instance_double(LanternDoctor, resource: resource) + allow(lantern_doctor_query).to receive(:server_type).and_return("standby") + expect(lantern_doctor_query).to receive(:doctor).and_return(doctor) + expect(lantern_doctor_query.servers).to eq([serv2]) + end + + it "returns all servers based on server_type" do + serv1 = instance_double(LanternServer) + serv2 = instance_double(LanternServer) + allow(serv1).to receive_messages(primary?: true, standby?: false) + allow(serv2).to receive_messages(primary?: false, standby?: true) + + resource = instance_double(LanternResource, servers: [serv1, serv2]) + doctor = instance_double(LanternDoctor, resource: resource) + allow(lantern_doctor_query).to receive(:server_type).and_return("*") + expect(lantern_doctor_query).to receive(:doctor).and_return(doctor) + expect(lantern_doctor_query.servers).to eq([serv1, serv2]) + end end describe "#should_run?" do @@ -96,7 +150,21 @@ expect(lantern_doctor_query.should_run?).to be(false) end + it "return false if in progress" do + serv = instance_double(LanternServer, vm: instance_double(GcpVm, sshable: instance_double(Sshable))) + resource = instance_double(LanternResource, representative_server: serv) + doctor = instance_double(LanternDoctor, resource: resource) + expect(serv.vm.sshable).to receive(:cmd).and_return("InProgress") + expect(lantern_doctor_query).to receive(:doctor).and_return(doctor) + expect(lantern_doctor_query.should_run?).to be(false) + end + it "return true if it is the same time for run" do + serv = instance_double(LanternServer, vm: instance_double(GcpVm, sshable: instance_double(Sshable))) + resource = instance_double(LanternResource, representative_server: serv) + doctor = instance_double(LanternDoctor, resource: resource) + expect(serv.vm.sshable).to receive(:cmd).and_return("NotStarted") + expect(lantern_doctor_query).to receive(:doctor).and_return(doctor) ts = Time.new min = ts.min expect(Time).to receive(:new).and_return(ts).at_least(:once) @@ -106,6 +174,11 @@ end it "return true if the running time was passed but didn't run yet" do + serv = instance_double(LanternServer, vm: instance_double(GcpVm, sshable: instance_double(Sshable))) + resource = instance_double(LanternResource, representative_server: serv) + doctor = instance_double(LanternDoctor, resource: resource) + expect(serv.vm.sshable).to receive(:cmd).and_return("NotStarted") + expect(lantern_doctor_query).to receive(:doctor).and_return(doctor) min = Time.new.min modified_min = (min == 0) ? 59 : min - 1 @@ -140,284 +213,6 @@ end end - describe "#run" do - it "returns if should not run yet" do - expect(lantern_doctor_query).to receive(:should_run?).and_return(false) - expect(lantern_doctor_query.run).to be_nil - end - - it "runs query on specified database" do - serv = instance_double(LanternServer) - resource = instance_double(LanternResource, representative_server: serv, db_user: "test") - doctor = instance_double(LanternDoctor, resource: resource) - expect(serv).to receive(:run_query).with(lantern_doctor_query.sql, db: lantern_doctor_query.db_name, user: resource.db_user).and_return("f") - expect(lantern_doctor_query).to receive(:doctor).and_return(doctor).at_least(:once) - expect(lantern_doctor_query).to receive(:should_run?).and_return(true) - expect(lantern_doctor_query).to receive(:update).with(hash_including(condition: "healthy")) - expect { lantern_doctor_query.run }.not_to raise_error - end - - it "throws error if no sql defined" do - serv = instance_double(LanternServer, ubid: "test-ubid") - resource = instance_double(LanternResource, representative_server: serv, db_user: "test", name: "test-res", id: "6181ddb3-0002-8ad0-9aeb-084832c9273b", label: "test-label") - doctor = instance_double(LanternDoctor, resource: resource, ubid: "test-ubid") - - expect(lantern_doctor_query).to receive(:sql).and_return(nil).at_least(:once) - expect(lantern_doctor_query).to receive(:doctor).and_return(doctor).at_least(:once) - expect(lantern_doctor_query).to receive(:should_run?).and_return(true) - expect(lantern_doctor_query).to receive(:update).with(hash_including(condition: "failed")) - expect(LanternDoctorPage).to receive(:create_incident).with(lantern_doctor_query, "postgres", err: "BUG: non-system query without sql", output: "") - - expect { lantern_doctor_query.run }.not_to raise_error - end - - it "throws error if wrong response_type" do - serv = instance_double(LanternServer, ubid: "test-ubid") - resource = instance_double(LanternResource, representative_server: serv, db_user: "test", name: "test-res", id: "6181ddb3-0002-8ad0-9aeb-084832c9273b", label: "test-label") - doctor = instance_double(LanternDoctor, resource: resource, ubid: "test-ubid") - - expect(serv).to receive(:run_query).with(lantern_doctor_query.sql, db: "postgres", user: resource.db_user).and_return("f") - expect(lantern_doctor_query).to receive(:response_type).and_return("test").at_least(:once) - expect(lantern_doctor_query).to receive(:doctor).and_return(doctor).at_least(:once) - expect(lantern_doctor_query).to receive(:should_run?).and_return(true) - expect(lantern_doctor_query).to receive(:update).with(hash_including(condition: "failed")) - expect(LanternDoctorPage).to receive(:create_incident).with(lantern_doctor_query, "postgres", err: "BUG: invalid response type (test) on query test", output: "") - - expect { lantern_doctor_query.run }.not_to raise_error - end - - it "runs function for specified database" do - serv = instance_double(LanternServer) - resource = instance_double(LanternResource, representative_server: serv, db_user: "test") - doctor = instance_double(LanternDoctor, resource: resource) - - expect(parent).to receive(:db_name).and_return("postgres") - expect(lantern_doctor_query).to receive(:fn_label).and_return("check_daemon_embedding_jobs").at_least(:once) - expect(lantern_doctor_query).to receive(:parent).and_return(parent).at_least(:once) - - expect(lantern_doctor_query).to receive(:doctor).and_return(doctor).at_least(:once) - expect(lantern_doctor_query).to receive(:is_system?).and_return(true).at_least(:once) - expect(lantern_doctor_query).to receive(:should_run?).and_return(true).at_least(:once) - expect(lantern_doctor_query).to receive(:check_daemon_embedding_jobs).and_return("f") - expect(lantern_doctor_query).to receive(:response_type).and_return("bool").at_least(:once) - expect(lantern_doctor_query).to receive(:update).with(hash_including(condition: "healthy")) - - expect { lantern_doctor_query.run }.not_to raise_error - end - - it "runs query on all databases" do - serv = instance_double(LanternServer) - resource = instance_double(LanternResource, representative_server: serv, db_user: "test") - doctor = instance_double(LanternDoctor, resource: resource) - dbs = ["db1", "db2"] - - expect(serv).to receive(:list_all_databases).and_return(dbs) - - expect(serv).to receive(:run_query).with(lantern_doctor_query.sql, db: "db1", user: resource.db_user).and_return("f") - expect(serv).to receive(:run_query).with(lantern_doctor_query.sql, db: "db2", user: resource.db_user).and_return("f") - - expect(lantern_doctor_query).to receive(:db_name).and_return("*") - expect(lantern_doctor_query).to receive(:doctor).and_return(doctor).at_least(:once) - expect(lantern_doctor_query).to receive(:should_run?).and_return(true) - expect(lantern_doctor_query).to receive(:update).with(hash_including(condition: "healthy")) - - expect { lantern_doctor_query.run }.not_to raise_error - end - - it "runs query on all databases and errors" do - serv = instance_double(LanternServer, ubid: "test-ubid") - resource = instance_double(LanternResource, representative_server: serv, db_user: "test", name: "test-res", id: "6181ddb3-0002-8ad0-9aeb-084832c9273b", label: "test-label") - doctor = instance_double(LanternDoctor, resource: resource, ubid: "test-ubid") - dbs = ["db1", "db2"] - - expect(serv).to receive(:list_all_databases).and_return(dbs) - - expect(serv).to receive(:run_query).with(lantern_doctor_query.sql, db: "db1", user: resource.db_user).and_raise("test-err") - expect(serv).to receive(:run_query).with(lantern_doctor_query.sql, db: "db2", user: resource.db_user).and_return("f") - - expect(lantern_doctor_query).to receive(:db_name).and_return("*") - expect(lantern_doctor_query).to receive(:doctor).and_return(doctor).at_least(:once) - expect(lantern_doctor_query).to receive(:should_run?).and_return(true) - expect(lantern_doctor_query).to receive(:update).with(hash_including(condition: "failed")) - expect(LanternDoctorPage).to receive(:create_incident).with(lantern_doctor_query, "db1", err: "test-err", output: "") - - expect { lantern_doctor_query.run }.not_to raise_error - end - - it "runs query on all databases and fails" do - serv = instance_double(LanternServer, ubid: "test-ubid") - resource = instance_double(LanternResource, representative_server: serv, db_user: "test", name: "test-res", id: "6181ddb3-0002-8ad0-9aeb-084832c9273b", label: "test-label") - doctor = instance_double(LanternDoctor, resource: resource, ubid: "test-ubid") - dbs = ["db1", "db2"] - - expect(serv).to receive(:list_all_databases).and_return(dbs) - - expect(serv).to receive(:run_query).with(lantern_doctor_query.sql, db: "db1", user: resource.db_user).and_return("t") - expect(serv).to receive(:run_query).with(lantern_doctor_query.sql, db: "db2", user: resource.db_user).and_return("f") - - expect(lantern_doctor_query).to receive(:db_name).and_return("*") - expect(lantern_doctor_query).to receive(:doctor).and_return(doctor).at_least(:once) - expect(lantern_doctor_query).to receive(:should_run?).and_return(true) - expect(lantern_doctor_query).to receive(:update).with(hash_including(condition: "failed")) - expect(LanternDoctorPage).to receive(:create_incident).with(lantern_doctor_query, "db1", err: "", output: "") - - expect { lantern_doctor_query.run }.not_to raise_error - end - - it "runs query on all databases and fails with rows" do - serv = instance_double(LanternServer, ubid: "test-ubid") - resource = instance_double(LanternResource, representative_server: serv, db_user: "test", name: "test-res", id: "6181ddb3-0002-8ad0-9aeb-084832c9273b", label: "test-label") - doctor = instance_double(LanternDoctor, resource: resource, ubid: "test-ubid") - dbs = ["db1", "db2"] - - expect(serv).to receive(:list_all_databases).and_return(dbs) - - expect(serv).to receive(:run_query).with(lantern_doctor_query.sql, db: "db1", user: resource.db_user).and_return("r1\nr2") - expect(serv).to receive(:run_query).with(lantern_doctor_query.sql, db: "db2", user: resource.db_user).and_return("") - - expect(lantern_doctor_query).to receive(:response_type).and_return("rows").at_least(:once) - expect(lantern_doctor_query).to receive(:db_name).and_return("*") - expect(lantern_doctor_query).to receive(:doctor).and_return(doctor).at_least(:once) - expect(lantern_doctor_query).to receive(:should_run?).and_return(true) - expect(lantern_doctor_query).to receive(:update).with(hash_including(condition: "failed")) - expect(LanternDoctorPage).to receive(:create_incident).with(lantern_doctor_query, "db1", err: "", output: "r1\nr2") - - expect { lantern_doctor_query.run }.not_to raise_error - end - - it "does not create duplicate page" do - serv = instance_double(LanternServer, ubid: "test-ubid") - resource = instance_double(LanternResource, representative_server: serv, db_user: "test", name: "test-res", id: "6181ddb3-0002-8ad0-9aeb-084832c9273b") - doctor = instance_double(LanternDoctor, resource: resource, ubid: "test-ubid") - dbs = ["db1", "db2"] - - expect(serv).to receive(:list_all_databases).and_return(dbs) - - expect(serv).to receive(:run_query).with(lantern_doctor_query.sql, db: "db1", user: resource.db_user).and_return("t") - expect(serv).to receive(:run_query).with(lantern_doctor_query.sql, db: "db2", user: resource.db_user).and_return("f") - - expect(lantern_doctor_query).to receive(:db_name).and_return("*") - expect(lantern_doctor_query).to receive(:doctor).and_return(doctor).at_least(:once) - expect(lantern_doctor_query).to receive(:should_run?).and_return(true) - expect(lantern_doctor_query).to receive(:update).with(hash_including(condition: "failed")) - first_dataset = instance_double(Sequel::Dataset, first: instance_double(LanternDoctorPage)) - second_dataset = instance_double(Sequel::Dataset, first: nil) - expect(first_dataset).to receive(:where).with(Sequel.lit("status != 'resolved' ")).and_return(first_dataset) - expect(second_dataset).to receive(:where).with(Sequel.lit("status != 'resolved' ")).and_return(second_dataset) - expect(LanternDoctorPage).to receive(:where).with(query_id: lantern_doctor_query.id, db: "db1").and_return(first_dataset) - expect(LanternDoctorPage).to receive(:where).with(query_id: lantern_doctor_query.id, db: "db2").and_return(second_dataset) - - expect { lantern_doctor_query.run }.not_to raise_error - end - - it "runs query on all databases and resolves previous error" do - serv = instance_double(LanternServer, ubid: "test-ubid") - resource = instance_double(LanternResource, representative_server: serv, db_user: "test", name: "test-res", id: "6181ddb3-0002-8ad0-9aeb-084832c9273b") - doctor = instance_double(LanternDoctor, resource: resource, ubid: "test-ubid") - dbs = ["db1", "db2"] - - expect(serv).to receive(:list_all_databases).and_return(dbs) - - expect(serv).to receive(:run_query).with(lantern_doctor_query.sql, db: "db1", user: resource.db_user).and_return("f") - expect(serv).to receive(:run_query).with(lantern_doctor_query.sql, db: "db2", user: resource.db_user).and_return("f") - - expect(lantern_doctor_query).to receive(:db_name).and_return("*") - expect(lantern_doctor_query).to receive(:doctor).and_return(doctor).at_least(:once) - expect(lantern_doctor_query).to receive(:should_run?).and_return(true) - expect(lantern_doctor_query).to receive(:update).with(hash_including(condition: "healthy")) - page1 = instance_double(LanternDoctorPage) - page2 = instance_double(LanternDoctorPage) - - first_dataset = instance_double(Sequel::Dataset, first: page1) - second_dataset = instance_double(Sequel::Dataset, first: page2) - expect(first_dataset).to receive(:where).with(Sequel.lit("status != 'resolved' ")).and_return(first_dataset) - expect(second_dataset).to receive(:where).with(Sequel.lit("status != 'resolved' ")).and_return(second_dataset) - expect(LanternDoctorPage).to receive(:where).with(query_id: lantern_doctor_query.id, db: "db1").and_return(first_dataset) - expect(LanternDoctorPage).to receive(:where).with(query_id: lantern_doctor_query.id, db: "db2").and_return(second_dataset) - expect(page1).to receive(:resolve) - expect(page2).to receive(:resolve) - - expect { lantern_doctor_query.run }.not_to raise_error - end - end - - describe "#check_disk_space_usage" do - it "fails if primary disk server usage is above 90%" do - serv1 = instance_double(LanternServer, primary?: true, vm: instance_double(GcpVm, sshable: instance_double(Sshable))) - serv2 = instance_double(LanternServer, primary?: false, vm: instance_double(GcpVm, sshable: instance_double(Sshable))) - expect(serv1.vm.sshable).to receive(:cmd).and_return("91") - expect(serv2.vm.sshable).to receive(:cmd).and_return("80") - doctor = instance_double(LanternDoctor, resource: instance_double(LanternResource, servers: [serv1, serv2]), ubid: "test-ubid") - expect(lantern_doctor_query).to receive(:doctor).and_return(doctor).at_least(:once) - expect(lantern_doctor_query.check_disk_space_usage("postgres", "postgres")).to eq("primary server - usage 91%") - end - - it "fails if standby disk usage is above 90%" do - serv1 = instance_double(LanternServer, primary?: true, vm: instance_double(GcpVm, sshable: instance_double(Sshable))) - serv2 = instance_double(LanternServer, primary?: false, vm: instance_double(GcpVm, sshable: instance_double(Sshable))) - expect(serv1.vm.sshable).to receive(:cmd).and_return("11") - expect(serv2.vm.sshable).to receive(:cmd).and_return("92") - doctor = instance_double(LanternDoctor, resource: instance_double(LanternResource, servers: [serv1, serv2]), ubid: "test-ubid") - expect(lantern_doctor_query).to receive(:doctor).and_return(doctor).at_least(:once) - expect(lantern_doctor_query.check_disk_space_usage("postgres", "postgres")).to eq("standby server - usage 92%") - end - - it "succceds if all servers disk usage is under 90%" do - serv1 = instance_double(LanternServer, primary?: true, vm: instance_double(GcpVm, sshable: instance_double(Sshable))) - serv2 = instance_double(LanternServer, primary?: false, vm: instance_double(GcpVm, sshable: instance_double(Sshable))) - expect(serv1.vm.sshable).to receive(:cmd).and_return("11") - expect(serv2.vm.sshable).to receive(:cmd).and_return("22") - doctor = instance_double(LanternDoctor, resource: instance_double(LanternResource, servers: [serv1, serv2]), ubid: "test-ubid") - expect(lantern_doctor_query).to receive(:doctor).and_return(doctor).at_least(:once) - expect(lantern_doctor_query.check_disk_space_usage("postgres", "postgres")).to eq("") - end - end - - describe "#check_daemon_embedding_jobs" do - it "get jobs and fails" do - serv = instance_double(LanternServer, ubid: "test-ubid") - resource = instance_double(LanternResource, representative_server: serv, db_user: "test", name: "test-res", id: "6181ddb3-0002-8ad0-9aeb-084832c9273b") - doctor = instance_double(LanternDoctor, resource: resource, ubid: "test-ubid") - expect(lantern_doctor_query).to receive(:doctor).and_return(doctor).at_least(:once) - expect(serv).to receive(:run_query).and_return("t") - expect(serv).to receive(:run_query).and_return("public,test2,test-src,test-dst\npublic,test3,test-src,test-dst") - expect(serv).to receive(:run_query).and_return("f") - expect(serv).to receive(:run_query).and_return("t") - expect(lantern_doctor_query.check_daemon_embedding_jobs("postgres", "postgres")).to eq("t") - end - - it "get jobs as empty" do - serv = instance_double(LanternServer, ubid: "test-ubid") - resource = instance_double(LanternResource, representative_server: serv, db_user: "test", name: "test-res", id: "6181ddb3-0002-8ad0-9aeb-084832c9273b") - doctor = instance_double(LanternDoctor, resource: resource, ubid: "test-ubid") - expect(lantern_doctor_query).to receive(:doctor).and_return(doctor).at_least(:once) - expect(serv).to receive(:run_query).and_return("t") - expect(serv).to receive(:run_query).and_return(" \n") - expect(lantern_doctor_query.check_daemon_embedding_jobs("postgres", "postgres")).to eq("f") - end - - it "get jobs and succceds" do - serv = instance_double(LanternServer, ubid: "test-ubid") - resource = instance_double(LanternResource, representative_server: serv, db_user: "test", name: "test-res", id: "6181ddb3-0002-8ad0-9aeb-084832c9273b") - doctor = instance_double(LanternDoctor, resource: resource, ubid: "test-ubid") - expect(lantern_doctor_query).to receive(:doctor).and_return(doctor).at_least(:once) - expect(serv).to receive(:run_query).and_return("t") - expect(serv).to receive(:run_query).and_return("public,test2,test-src,test-dst\npublic,test3,test-src,test-dst") - expect(serv).to receive(:run_query).and_return("f") - expect(serv).to receive(:run_query).and_return("f") - expect(lantern_doctor_query.check_daemon_embedding_jobs("postgres", "postgres")).to eq("f") - end - - it "job table does not exist" do - serv = instance_double(LanternServer, ubid: "test-ubid") - resource = instance_double(LanternResource, representative_server: serv, db_user: "test", name: "test-res", id: "6181ddb3-0002-8ad0-9aeb-084832c9273b") - doctor = instance_double(LanternDoctor, resource: resource, ubid: "test-ubid") - expect(lantern_doctor_query).to receive(:doctor).and_return(doctor).at_least(:once) - expect(serv).to receive(:run_query).and_return("f") - expect(lantern_doctor_query.check_daemon_embedding_jobs("postgres", "postgres")).to eq("f") - end - end - describe "#page" do it "lists active pages" do serv = instance_double(LanternServer, ubid: "test-ubid") @@ -425,10 +220,10 @@ doctor = instance_double(LanternDoctor, resource: resource, ubid: "test-ubid") query = described_class.create_with_id(type: "user") expect(query).to receive(:doctor).and_return(doctor).at_least(:once) - p1 = LanternDoctorPage.create_incident(query, "pg1", err: "", output: "") - p2 = LanternDoctorPage.create_incident(query, "pg2", err: "", output: "") - p3 = LanternDoctorPage.create_incident(query, "pg3", err: "", output: "") - LanternDoctorPage.create_incident(query, "pg4", err: "", output: "") + p1 = LanternDoctorPage.create_incident(query, "pg1", "test", err: "", output: "") + p2 = LanternDoctorPage.create_incident(query, "pg2", "test", err: "", output: "") + p3 = LanternDoctorPage.create_incident(query, "pg3", "test", err: "", output: "") + LanternDoctorPage.create_incident(query, "pg4", "test", err: "", output: "") p1.trigger p2.resolve @@ -445,10 +240,10 @@ doctor = instance_double(LanternDoctor, resource: resource, ubid: "test-ubid") query = described_class.create_with_id(type: "user") expect(query).to receive(:doctor).and_return(doctor).at_least(:once) - p1 = LanternDoctorPage.create_incident(query, "pg1", err: "", output: "") - p2 = LanternDoctorPage.create_incident(query, "pg2", err: "", output: "") - p3 = LanternDoctorPage.create_incident(query, "pg3", err: "", output: "") - LanternDoctorPage.create_incident(query, "pg4", err: "", output: "") + p1 = LanternDoctorPage.create_incident(query, "pg1", "test", err: "", output: "") + p2 = LanternDoctorPage.create_incident(query, "pg2", "test", err: "", output: "") + p3 = LanternDoctorPage.create_incident(query, "pg3", "test", err: "", output: "") + LanternDoctorPage.create_incident(query, "pg4", "test", err: "", output: "") p1.trigger p2.resolve @@ -458,4 +253,32 @@ expect(pages.size).to be(3) end end + + describe "#update_page_status" do + it "creates incident" do + query_res = class_double(LanternDoctorPage) + expect(query_res).to receive(:where).and_return(class_double(LanternDoctorPage, first: nil)) + expect(LanternDoctorPage).to receive(:where).and_return(query_res) + expect(LanternDoctorPage).to receive(:create_incident) + expect { lantern_doctor_query.update_page_status("postgres", "test", false, "", "test") }.not_to raise_error + end + + it "resolves" do + p1 = instance_double(LanternDoctorPage) + expect(p1).to receive(:resolve) + query_res = class_double(LanternDoctorPage) + expect(query_res).to receive(:where).and_return(class_double(LanternDoctorPage, first: p1)) + expect(LanternDoctorPage).to receive(:where).and_return(query_res) + expect { lantern_doctor_query.update_page_status("postgres", "test", true, "", "test") }.not_to raise_error + end + + it "do nothing" do + p1 = instance_double(LanternDoctorPage) + expect(p1).not_to receive(:resolve) + query_res = class_double(LanternDoctorPage) + expect(query_res).to receive(:where).and_return(class_double(LanternDoctorPage, first: p1)) + expect(LanternDoctorPage).to receive(:where).and_return(query_res) + expect { lantern_doctor_query.update_page_status("postgres", "test", false, "", "test") }.not_to raise_error + end + end end diff --git a/spec/model/lantern/lantern_doctor_spec.rb b/spec/model/lantern/lantern_doctor_spec.rb index 0ce7f0586..d41b3b7b6 100644 --- a/spec/model/lantern/lantern_doctor_spec.rb +++ b/spec/model/lantern/lantern_doctor_spec.rb @@ -46,6 +46,9 @@ expect(lantern_doctor).to receive(:queries).and_return(query_list) expect(lantern_doctor).to receive(:system_queries).and_return(system_queries) new_query = instance_double(LanternDoctorQuery, parent_id: "test-parent-id") + serv = instance_double(LanternServer) + expect(lantern_doctor).to receive(:resource).and_return(instance_double(LanternResource, servers: [serv])).at_least(:once) + expect(serv).to receive(:incr_update_rhizome) expect(LanternDoctorQuery).to receive(:create_with_id).with(parent_id: "test-parent-id", doctor_id: lantern_doctor.id, type: "user", response_type: "rows", condition: "unknown").and_return(new_query) expect { lantern_doctor.sync_system_queries }.not_to raise_error end diff --git a/spec/prog/gcp_vm/nexus_spec.rb b/spec/prog/gcp_vm/nexus_spec.rb index a401cb039..b1a1878d6 100644 --- a/spec/prog/gcp_vm/nexus_spec.rb +++ b/spec/prog/gcp_vm/nexus_spec.rb @@ -168,6 +168,7 @@ gcp_api = instance_double(Hosting::GcpApis) expect(Hosting::GcpApis).to receive(:new).and_return(gcp_api) expect(gcp_api).to receive(:delete_vm).with("dummy-vm", "us-central1-a") + expect(LanternDoctorPage).to receive(:where).and_return([]) expect { nx.destroy }.to exit({"msg" => "gcp vm deleted"}) end @@ -179,6 +180,9 @@ expect(gcp_api).to receive(:delete_vm).with("dummy-vm", "us-central1-a") expect(gcp_vm).to receive(:address_name).and_return("dummy-vm-addr") expect(gcp_api).to receive(:release_ipv4).with("dummy-vm-addr", "us-central1") + page = instance_double(LanternDoctorPage) + expect(page).to receive(:resolve) + expect(LanternDoctorPage).to receive(:where).and_return([page]) expect { nx.destroy }.to exit({"msg" => "gcp vm deleted"}) end end diff --git a/spec/prog/lantern/lantern_doctor_nexus_spec.rb b/spec/prog/lantern/lantern_doctor_nexus_spec.rb index a22879a73..df16e3b2f 100644 --- a/spec/prog/lantern/lantern_doctor_nexus_spec.rb +++ b/spec/prog/lantern/lantern_doctor_nexus_spec.rb @@ -5,6 +5,10 @@ RSpec.describe Prog::Lantern::LanternDoctorNexus do subject(:nx) { described_class.new(Strand.create(id: "6ae7e513-c34a-8039-a72a-7be45b53f2a0", prog: "Lantern::LanternDoctorNexus", label: "start")) } + let(:sshable) { instance_double(Sshable) } + let(:vm) { instance_double(GcpVm, sshable: sshable, name: "test-vm") } + let(:server) { instance_double(LanternServer, vm: vm, primary?: true) } + let(:lantern_doctor) { instance_double( LanternDoctor, @@ -72,13 +76,10 @@ expect { nx.wait }.to hop("sync_system_queries") end - it "runs queries and naps" do - expect(lantern_doctor).to receive(:should_run?).and_return(true) + it "hops to run_queries" do expect(lantern_doctor).to receive(:resource).and_return(instance_double(LanternResource, strand: nil)) - queries = [instance_double(LanternDoctorQuery)] - expect(queries[0]).to receive(:run) - expect(lantern_doctor).to receive(:queries).and_return(queries) - expect { nx.wait }.to nap(60) + expect(lantern_doctor).to receive(:should_run?).and_return(true) + expect { nx.wait }.to hop("run_queries") end end @@ -118,4 +119,113 @@ expect { nx.destroy }.to exit({"msg" => "lantern doctor is deleted"}) end end + + describe "#run_queries" do + before do + allow(lantern_doctor).to receive(:resource).and_return(instance_double(LanternResource, representative_server: server)) + allow(server).to receive(:list_all_databases).and_return(["test_db"]) + allow(sshable).to receive(:cmd).and_return("command executed") + end + + describe "#run_queries" do + it "runs queries on the specified servers" do + query1 = instance_double(LanternDoctorQuery, servers: [server], db_name: "*") + query2 = instance_double(LanternDoctorQuery, servers: [server]) + expect(query1).to receive(:should_run?).and_return(true) + expect(query1).to receive(:is_system?).and_return(true) + expect(query1).to receive(:response_type).and_return("bool") + expect(query1).to receive(:name).and_return("test_query") + expect(query1).to receive(:sql).and_return("SELECT 1") + expect(query1).to receive(:user).and_return("postgres") + expect(query1).to receive(:fn_label).and_return(nil) + expect(query1).to receive(:task_name).and_return("healthcheck_test_query") + expect(query2).to receive(:should_run?).and_return(false) + expect(lantern_doctor).to receive(:queries).and_return([query1, query2]) + expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'lantern/bin/doctor/run_query' healthcheck_test_query", stdin: JSON.generate({query: {is_system: true, response_type: "bool", name: "test_query", sql: "SELECT 1", fn_label: nil, query_user: "postgres"}, server_type: "primary", dbs: ["test_db"]})) + + expect { nx.run_queries }.to hop("wait_queries") + end + + it "runs queries on the specified database" do + query1 = instance_double(LanternDoctorQuery, servers: [server], db_name: "postgres") + query2 = instance_double(LanternDoctorQuery, servers: [server]) + expect(server).to receive(:primary?).and_return(false) + expect(query1).to receive(:should_run?).and_return(true) + expect(query1).to receive(:is_system?).and_return(true) + expect(query1).to receive(:response_type).and_return("bool") + expect(query1).to receive(:name).and_return("test_query") + expect(query1).to receive(:sql).and_return(nil) + expect(query1).to receive(:fn_label).and_return("check_daemon_embedding_jobs") + expect(query1).to receive(:user).and_return("postgres") + expect(query1).to receive(:task_name).and_return("healthcheck_test_query") + expect(query2).to receive(:should_run?).and_return(false) + expect(lantern_doctor).to receive(:queries).and_return([query1, query2]) + expect(sshable).to receive(:cmd).with("common/bin/daemonizer 'lantern/bin/doctor/run_query' healthcheck_test_query", stdin: JSON.generate({query: {is_system: true, response_type: "bool", name: "test_query", sql: nil, fn_label: "check_daemon_embedding_jobs", query_user: "postgres"}, server_type: "standby", dbs: ["postgres"]})) + + expect { nx.run_queries }.to hop("wait_queries") + end + + it "skips" do + query1 = instance_double(LanternDoctorQuery, servers: [server], db_name: "postgres") + expect(query1).to receive(:servers).and_return([]) + expect(query1).to receive(:should_run?).and_return(true) + expect(lantern_doctor).to receive(:queries).and_return([query1]) + expect { nx.run_queries }.to hop("wait_queries") + end + end + + describe "#wait_queries" do + before do + allow(sshable).to receive(:cmd).with("common/bin/daemonizer --check test_query").and_return("Succeeded") + allow(sshable).to receive(:cmd).with("common/bin/daemonizer --logs test_query").and_return(JSON.generate({"stdout" => '[{"db": "test_db", "result": "success"}]', "stderr" => ""})) + allow(sshable).to receive(:cmd).with("common/bin/daemonizer --clean test_query").and_return("cleaned") + end + + it "checks the status of queries and updates accordingly" do + query = instance_double(LanternDoctorQuery, servers: [server], db_name: "postgres", task_name: "test_query") + expect(lantern_doctor).to receive(:queries).and_return([query]) + expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check test_query") + expect(sshable).to receive(:cmd).with("common/bin/daemonizer --logs test_query") + expect(sshable).to receive(:cmd).with("common/bin/daemonizer --clean test_query") + expect(query).to receive(:update_page_status).with("*", vm.name, true, nil, nil) + expect(query).to receive(:update_page_status).with("test_db", vm.name, true, "success", nil) + expect(query).to receive(:update).with(condition: "healthy", last_checked: instance_of(Time)) + + expect { nx.wait_queries }.to hop("wait") + end + + it "handles error" do + query = instance_double(LanternDoctorQuery, servers: [server], db_name: "postgres", task_name: "test_query") + expect(lantern_doctor).to receive(:queries).and_return([query]) + allow(sshable).to receive(:cmd).with("common/bin/daemonizer --check test_query").and_return("Failed") + allow(sshable).to receive(:cmd).with("common/bin/daemonizer --logs test_query").and_return(JSON.generate({"stdout" => "error parse", "stderr" => "error"})) + + expect(query).to receive(:update_page_status).with("*", vm.name, true, nil, nil) + expect(query).to receive(:update_page_status).with("*", vm.name, false, "error parse", "error") + expect(query).to receive(:update).with(condition: "failed", last_checked: instance_of(Time)) + + expect { nx.wait_queries }.to hop("wait") + end + + it "handles failed queries" do + query = instance_double(LanternDoctorQuery, servers: [server], db_name: "postgres", task_name: "test_query") + expect(lantern_doctor).to receive(:queries).and_return([query]) + allow(sshable).to receive(:cmd).with("common/bin/daemonizer --check test_query").and_return("Failed") + allow(sshable).to receive(:cmd).with("common/bin/daemonizer --logs test_query").and_return(JSON.generate({"stdout" => "", "stderr" => "error"})) + + expect(query).to receive(:update_page_status).with("*", vm.name, false, "", "error") + expect(query).to receive(:update).with(condition: "failed", last_checked: instance_of(Time)) + + expect { nx.wait_queries }.to hop("wait") + end + + it "skips in progress" do + query = instance_double(LanternDoctorQuery, servers: [server], db_name: "postgres", task_name: "test_query") + expect(lantern_doctor).to receive(:queries).and_return([query]) + allow(sshable).to receive(:cmd).with("common/bin/daemonizer --check test_query").and_return("InProgress") + + expect { nx.wait_queries }.to hop("wait") + end + end + end end diff --git a/spec/routes/api/project/location/lantern_doctor_spec.rb b/spec/routes/api/project/location/lantern_doctor_spec.rb index 8b1c04ba1..098d7ad11 100644 --- a/spec/routes/api/project/location/lantern_doctor_spec.rb +++ b/spec/routes/api/project/location/lantern_doctor_spec.rb @@ -80,7 +80,7 @@ pg.doctor.queries first_query = LanternDoctorQuery[doctor_id: pg.doctor.id] first_query.update(condition: "failed") - page = LanternDoctorPage.create_incident(first_query, "postgres", err: "test-err", output: "test-out") + page = LanternDoctorPage.create_incident(first_query, "postgres", "test", err: "test-err", output: "test-out") expect(page.status).to eq("new") post "/api/project/#{project.ubid}/location/#{pg.location}/lantern/#{pg.name}/doctor/incidents/#{page.id}/trigger" @@ -108,7 +108,7 @@ pg.doctor.queries first_query = LanternDoctorQuery[doctor_id: pg.doctor.id] first_query.update(condition: "failed") - page = LanternDoctorPage.create_incident(first_query, "postgres", err: "test-err", output: "test-out") + page = LanternDoctorPage.create_incident(first_query, "postgres", "test", err: "test-err", output: "test-out") expect(page.status).to eq("new") post "/api/project/#{project.ubid}/location/#{pg.location}/lantern/#{pg.name}/doctor/incidents/#{page.id}/ack" @@ -131,7 +131,7 @@ pg.doctor.queries first_query = LanternDoctorQuery[doctor_id: pg.doctor.id] first_query.update(condition: "failed") - page = LanternDoctorPage.create_incident(first_query, "postgres", err: "test-err", output: "test-out") + page = LanternDoctorPage.create_incident(first_query, "postgres", "test", err: "test-err", output: "test-out") expect(page.status).to eq("new") post "/api/project/#{project.ubid}/location/#{pg.location}/lantern/#{pg.name}/doctor/incidents/#{page.id}/resolve" @@ -154,7 +154,7 @@ pg.doctor.queries first_query = LanternDoctorQuery[doctor_id: pg.doctor.id] first_query.update(condition: "failed") - page = LanternDoctorPage.create_incident(first_query, "postgres", err: "test-err", output: "test-out") + page = LanternDoctorPage.create_incident(first_query, "postgres", "test", err: "test-err", output: "test-out") page.trigger get "/api/project/#{project.ubid}/location/#{pg.location}/lantern/#{pg.name}/doctor/incidents" @@ -166,7 +166,7 @@ incidents = first_item["incidents"] expect(incidents.size).to eq(1) - expect(incidents[0]["summary"]).to eq("Healthcheck: test system query failed on instance-1 - no-label (postgres)") + expect(incidents[0]["summary"]).to eq("Healthcheck: test system query failed on instance-1 - no-label (postgres - test)") expect(incidents[0]["error"]).to eq("test-err") expect(incidents[0]["output"]).to eq("test-out") end diff --git a/spec/routes/web/project/lantern_doctor_spec.rb b/spec/routes/web/project/lantern_doctor_spec.rb index f2a49fb98..cd8528cf7 100644 --- a/spec/routes/web/project/lantern_doctor_spec.rb +++ b/spec/routes/web/project/lantern_doctor_spec.rb @@ -49,7 +49,7 @@ pg.doctor.sync_system_queries query = LanternDoctorQuery[type: "user"] query.update(condition: "failed") - LanternDoctorPage.create_incident(query, "postgres", err: "test-err", output: "test-out") + LanternDoctorPage.create_incident(query, "postgres", "test", err: "test-err", output: "test-out") visit "#{project.path}/lantern-doctor" diff --git a/spec/routes/web/project/location/lantern_doctor_spec.rb b/spec/routes/web/project/location/lantern_doctor_spec.rb index 575b45df3..64b77efb4 100644 --- a/spec/routes/web/project/location/lantern_doctor_spec.rb +++ b/spec/routes/web/project/location/lantern_doctor_spec.rb @@ -46,7 +46,7 @@ pg.doctor.queries first_query = LanternDoctorQuery[doctor_id: pg.doctor.id] first_query.update(condition: "failed") - doctor_page = LanternDoctorPage.create_incident(first_query, "postgres", err: "test-err", output: "test-out") + doctor_page = LanternDoctorPage.create_incident(first_query, "postgres", "test", err: "test-err", output: "test-out") expect(doctor_page.status).to eq("new") visit "/project/#{project.ubid}/lantern-doctor" @@ -69,7 +69,7 @@ pg.doctor.queries first_query = LanternDoctorQuery[doctor_id: pg.doctor.id] first_query.update(condition: "failed") - doctor_page = LanternDoctorPage.create_incident(first_query, "postgres", err: "test-err", output: "test-out") + doctor_page = LanternDoctorPage.create_incident(first_query, "postgres", "test", err: "test-err", output: "test-out") expect(doctor_page.status).to eq("new") visit "/project/#{project.ubid}/lantern-doctor" @@ -92,7 +92,7 @@ pg.doctor.queries first_query = LanternDoctorQuery[doctor_id: pg.doctor.id] first_query.update(condition: "failed") - doctor_page = LanternDoctorPage.create_incident(first_query, "postgres", err: "test-err", output: "test-out") + doctor_page = LanternDoctorPage.create_incident(first_query, "postgres", "test", err: "test-err", output: "test-out") expect(doctor_page.status).to eq("new") visit "/project/#{project.ubid}/lantern-doctor" @@ -115,7 +115,7 @@ pg.doctor.queries first_query = LanternDoctorQuery[doctor_id: pg.doctor.id] first_query.update(condition: "failed") - doctor_page = LanternDoctorPage.create_incident(first_query, "postgres", err: "test-err", output: "test-out") + doctor_page = LanternDoctorPage.create_incident(first_query, "postgres", "test", err: "test-err", output: "test-out") expect(doctor_page.status).to eq("new") visit "/project/#{project.ubid}/lantern-doctor" diff --git a/views/lantern/show.erb b/views/lantern/show.erb index f882bb166..88ebf490b 100644 --- a/views/lantern/show.erb +++ b/views/lantern/show.erb @@ -37,7 +37,7 @@ ["Compute", @pg[:vm_size]], ["Storage", "#{@pg[:storage_size_gib]} GB"] ] - + if @pg[:connection_string] == "" data.push(["Connection String", "Waiting for host to be ready..."]) else @@ -339,7 +339,7 @@ ["Compute", server[:vm_size]], ["Storage", "#{server[:storage_size_gib]} GB"] ] - + if server[:connection_string] == "" data.push(["Connection String", "Waiting for host to be ready..."]) else