From 97f9f9a76158712c2cd5708503acc591f3f4cc62 Mon Sep 17 00:00:00 2001 From: Varik Matevosyan Date: Wed, 17 Jul 2024 19:44:17 +0400 Subject: [PATCH] change _lantern_internal schema to _lantern_extras_internal, send only one alert if healthcheck is errored --- prog/lantern/lantern_doctor_nexus.rb | 19 ++++++++++++------- rhizome/lantern/bin/doctor/run_query | 6 +++--- .../prog/lantern/lantern_doctor_nexus_spec.rb | 3 +-- 3 files changed, 16 insertions(+), 12 deletions(-) diff --git a/prog/lantern/lantern_doctor_nexus.rb b/prog/lantern/lantern_doctor_nexus.rb index d205134b9..93023ff60 100644 --- a/prog/lantern/lantern_doctor_nexus.rb +++ b/prog/lantern/lantern_doctor_nexus.rb @@ -81,25 +81,30 @@ def before_run all_output = [] if !logs["stdout"].empty? - # stdout will be [{ "db": string, "result": string }] + # stdout will be [{ "db": string, "result": string, "success": bool }] begin all_output = JSON.parse(logs["stdout"]) rescue - all_output = [{"db" => "*", "result" => logs["stdout"], "err" => logs["stderr"]}] end + end + if status == "Failed" + all_output = [{"db" => "*", "result" => logs["stdout"][..200], "err" => logs["stderr"], "success" => false}] + all_output.select { _1["success"] } + else # resolve errored page if exists query.update_page_status("*", vm.name, true, nil, nil) - else - # this is the case when command errored for some reason - all_output = [{"db" => "*", "result" => "", "err" => logs["stderr"]}] end + condition = "healthy" all_output.each do |output| - query.update_page_status(output["db"], vm.name, status == "Succeeded", output["result"], output["err"]) + if !output["success"] + condition = "failed" + end + + query.update_page_status(output["db"], vm.name, output["success"], output["result"], output["err"]) end - query.update(condition: (status == "Failed") ? "failed" : "healthy", last_checked: Time.new) + query.update(condition: condition, last_checked: Time.new) vm.sshable.cmd("common/bin/daemonizer --clean #{query.task_name}") end end diff --git a/rhizome/lantern/bin/doctor/run_query b/rhizome/lantern/bin/doctor/run_query index aeb229524..26b30343b 100755 --- a/rhizome/lantern/bin/doctor/run_query +++ b/rhizome/lantern/bin/doctor/run_query @@ -52,7 +52,7 @@ def run_for_db(db) err = e.message end - [success, {db: db, result: output, err: err}] + [success, {db: db, result: output, err: err, success: success}] end class SystemQueries @@ -61,7 +61,7 @@ class SystemQueries SELECT EXISTS ( SELECT FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace - WHERE n.nspname = '_lantern_internal' + WHERE n.nspname = '_lantern_extras_internal' AND c.relname = 'embedding_generation_jobs' AND c.relkind = 'r' ); @@ -71,7 +71,7 @@ SQL return [] end - jobs = exec_sql("SELECT \"schema\", \"table\", src_column, dst_column, pk FROM _lantern_internal.embedding_generation_jobs WHERE init_finished_at IS NOT NULL AND canceled_at IS NULL;") + jobs = exec_sql("SELECT \"schema\", \"table\", src_column, dst_column, pk FROM _lantern_extras_internal.embedding_generation_jobs WHERE init_finished_at IS NOT NULL AND canceled_at IS NULL;") jobs.split("\n").map do |row| values = row.split(",") diff --git a/spec/prog/lantern/lantern_doctor_nexus_spec.rb b/spec/prog/lantern/lantern_doctor_nexus_spec.rb index df16e3b2f..6441cb4c6 100644 --- a/spec/prog/lantern/lantern_doctor_nexus_spec.rb +++ b/spec/prog/lantern/lantern_doctor_nexus_spec.rb @@ -177,7 +177,7 @@ describe "#wait_queries" do before do allow(sshable).to receive(:cmd).with("common/bin/daemonizer --check test_query").and_return("Succeeded") - allow(sshable).to receive(:cmd).with("common/bin/daemonizer --logs test_query").and_return(JSON.generate({"stdout" => '[{"db": "test_db", "result": "success"}]', "stderr" => ""})) + allow(sshable).to receive(:cmd).with("common/bin/daemonizer --logs test_query").and_return(JSON.generate({"stdout" => '[{"db": "test_db", "result": "success", "success": true }]', "stderr" => ""})) allow(sshable).to receive(:cmd).with("common/bin/daemonizer --clean test_query").and_return("cleaned") end @@ -200,7 +200,6 @@ allow(sshable).to receive(:cmd).with("common/bin/daemonizer --check test_query").and_return("Failed") allow(sshable).to receive(:cmd).with("common/bin/daemonizer --logs test_query").and_return(JSON.generate({"stdout" => "error parse", "stderr" => "error"})) - expect(query).to receive(:update_page_status).with("*", vm.name, true, nil, nil) expect(query).to receive(:update_page_status).with("*", vm.name, false, "error parse", "error") expect(query).to receive(:update).with(condition: "failed", last_checked: instance_of(Time))