From 6f8749d28f1d880d4b7f4d9fd4f23c5e64a13972 Mon Sep 17 00:00:00 2001 From: Varik Matevosyan Date: Fri, 30 Aug 2024 13:13:29 +0400 Subject: [PATCH] send only 2 notifications for one cluster healthcheck --- prog/lantern/lantern_doctor_nexus.rb | 7 ++++++- spec/prog/lantern/lantern_doctor_nexus_spec.rb | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/prog/lantern/lantern_doctor_nexus.rb b/prog/lantern/lantern_doctor_nexus.rb index ee3f59e74..37e845bee 100644 --- a/prog/lantern/lantern_doctor_nexus.rb +++ b/prog/lantern/lantern_doctor_nexus.rb @@ -108,12 +108,17 @@ def before_run end condition = "healthy" + failed_count = 0 all_output.each do |output| if !output["success"] condition = "failed" + failed_count += 1 end - query.update_page_status(output["db"], vm.name, output["success"], output["result"], output["err"]) + # if many databases in the cluster failed the healthcheck send maximum 2 alerts to not spam + if output["success"] || failed_count < 3 + query.update_page_status(output["db"], vm.name, output["success"], output["result"], output["err"]) + end end query.update(condition: condition, last_checked: Time.new) diff --git a/spec/prog/lantern/lantern_doctor_nexus_spec.rb b/spec/prog/lantern/lantern_doctor_nexus_spec.rb index 02c0d34c2..db08c619c 100644 --- a/spec/prog/lantern/lantern_doctor_nexus_spec.rb +++ b/spec/prog/lantern/lantern_doctor_nexus_spec.rb @@ -194,6 +194,20 @@ expect { nx.wait_queries }.to hop("wait") end + it "does not spam notifications" do + expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check test_query2").and_return("Succeeded") + expect(sshable).to receive(:cmd).with("common/bin/daemonizer --logs test_query2").and_return(JSON.generate({"stdout" => '[{"db": "test_db", "result": "success", "success": false }, {"db": "test_db2", "result": "success", "success": false }, {"db": "test_db3", "result": "success", "success": false }]', "stderr" => ""})) + expect(sshable).to receive(:cmd).with("common/bin/daemonizer --clean test_query2").and_return("cleaned") + query = instance_double(LanternDoctorQuery, servers: [server], db_name: "postgres", task_name: "test_query2") + expect(lantern_doctor).to receive(:queries).and_return([query]) + expect(query).to receive(:update_page_status).with("*", vm.name, true, nil, nil) + expect(query).to receive(:update_page_status).with("test_db", vm.name, false, "success", nil) + expect(query).to receive(:update_page_status).with("test_db2", vm.name, false, "success", nil) + expect(query).to receive(:update).with(condition: "failed", last_checked: instance_of(Time)) + + expect { nx.wait_queries }.to hop("wait") + end + it "handles update_needed" do query = instance_double(LanternDoctorQuery, servers: [server], db_name: "postgres", task_name: "test_query") expect(lantern_doctor).to receive(:queries).and_return([query])