Skip to content

Commit

Permalink
send only 2 notifications for one cluster healthcheck
Browse files Browse the repository at this point in the history
  • Loading branch information
var77 committed Aug 30, 2024
1 parent 7390294 commit 6f8749d
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 1 deletion.
7 changes: 6 additions & 1 deletion prog/lantern/lantern_doctor_nexus.rb
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,17 @@ def before_run
end

condition = "healthy"
failed_count = 0
all_output.each do |output|
if !output["success"]
condition = "failed"
failed_count += 1
end

query.update_page_status(output["db"], vm.name, output["success"], output["result"], output["err"])
# if many databases in the cluster failed the healthcheck send maximum 2 alerts to not spam
if output["success"] || failed_count < 3
query.update_page_status(output["db"], vm.name, output["success"], output["result"], output["err"])
end
end

query.update(condition: condition, last_checked: Time.new)
Expand Down
14 changes: 14 additions & 0 deletions spec/prog/lantern/lantern_doctor_nexus_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,20 @@
expect { nx.wait_queries }.to hop("wait")
end

it "does not spam notifications" do
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --check test_query2").and_return("Succeeded")
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --logs test_query2").and_return(JSON.generate({"stdout" => '[{"db": "test_db", "result": "success", "success": false }, {"db": "test_db2", "result": "success", "success": false }, {"db": "test_db3", "result": "success", "success": false }]', "stderr" => ""}))
expect(sshable).to receive(:cmd).with("common/bin/daemonizer --clean test_query2").and_return("cleaned")
query = instance_double(LanternDoctorQuery, servers: [server], db_name: "postgres", task_name: "test_query2")
expect(lantern_doctor).to receive(:queries).and_return([query])
expect(query).to receive(:update_page_status).with("*", vm.name, true, nil, nil)
expect(query).to receive(:update_page_status).with("test_db", vm.name, false, "success", nil)
expect(query).to receive(:update_page_status).with("test_db2", vm.name, false, "success", nil)
expect(query).to receive(:update).with(condition: "failed", last_checked: instance_of(Time))

expect { nx.wait_queries }.to hop("wait")
end

it "handles update_needed" do
query = instance_double(LanternDoctorQuery, servers: [server], db_name: "postgres", task_name: "test_query")
expect(lantern_doctor).to receive(:queries).and_return([query])
Expand Down

0 comments on commit 6f8749d

Please sign in to comment.