diff --git a/comp/forwarder/defaultforwarder/forwarder_health.go b/comp/forwarder/defaultforwarder/forwarder_health.go index fbd29370ec5cf..d046f72de5e45 100644 --- a/comp/forwarder/defaultforwarder/forwarder_health.go +++ b/comp/forwarder/defaultforwarder/forwarder_health.go @@ -149,15 +149,28 @@ func (fh *forwarderHealth) healthCheckLoop() { } for { - select { - case <-fh.stop: - return - case <-validateTicker.C: - valid := fh.checkValidAPIKey() - if !valid { - fh.log.Errorf("No valid api key found, reporting the forwarder as unhealthy.") + // only read from the health channel if the api key is valid + if valid { + select { + case <-fh.stop: + return + case <-validateTicker.C: + valid = fh.checkValidAPIKey() + if !valid { + fh.log.Errorf("No valid api key found, reporting the forwarder as unhealthy.") + } + case <-fh.health.C: + } + } else { + select { + case <-fh.stop: + return + case <-validateTicker.C: + valid = fh.checkValidAPIKey() + if !valid { + fh.log.Errorf("No valid api key found, reporting the forwarder as unhealthy.") + } } - case <-fh.health.C: } } } diff --git a/releasenotes/notes/fix-forwarder-health-check-09eeefbe1a4e20d1.yaml b/releasenotes/notes/fix-forwarder-health-check-09eeefbe1a4e20d1.yaml new file mode 100644 index 0000000000000..65bfcbb319166 --- /dev/null +++ b/releasenotes/notes/fix-forwarder-health-check-09eeefbe1a4e20d1.yaml @@ -0,0 +1,11 @@ +# Each section from every release note are combined when the +# CHANGELOG.rst is rendered. So the text needs to be worded so that +# it does not depend on any information only available in another +# section. This may mean repeating some details, but each section +# must be readable independently of the other. +# +# Each section note must be formatted as reStructuredText. +--- +fixes: + - | + Fix the forwarder health check so that it reports unhealthy when the API key is invalid. diff --git a/test/new-e2e/tests/agent-subcommands/health/health_commont_test.go b/test/new-e2e/tests/agent-subcommands/health/health_common_test.go similarity index 100% rename from test/new-e2e/tests/agent-subcommands/health/health_commont_test.go rename to test/new-e2e/tests/agent-subcommands/health/health_common_test.go diff --git a/test/new-e2e/tests/agent-subcommands/health/health_nix_test.go b/test/new-e2e/tests/agent-subcommands/health/health_nix_test.go index 15c51d4041354..d8319beab4276 100644 --- a/test/new-e2e/tests/agent-subcommands/health/health_nix_test.go +++ b/test/new-e2e/tests/agent-subcommands/health/health_nix_test.go @@ -6,10 +6,13 @@ package health import ( + "net/http" "testing" + "time" "github.com/DataDog/test-infra-definitions/components/datadog/agentparams" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/DataDog/datadog-agent/test/fakeintake/api" "github.com/DataDog/datadog-agent/test/new-e2e/pkg/e2e" @@ -28,23 +31,35 @@ func TestLinuxHealthSuite(t *testing.T) { func (v *linuxHealthSuite) TestDefaultInstallUnhealthy() { // the fakeintake says that any API key is invalid by sending a 403 code override := api.ResponseOverride{ - Endpoint: "/api/v1/validate", - StatusCode: 403, - ContentType: "text/plain", - Body: []byte("invalid API key"), + Endpoint: "/api/v1/validate", + StatusCode: 403, + Method: http.MethodGet, + Body: []byte("invalid API key"), } - v.Env().FakeIntake.Client().ConfigureOverride(override) + err := v.Env().FakeIntake.Client().ConfigureOverride(override) + require.NoError(v.T(), err) // restart the agent, which validates the key using the fakeintake at startup v.UpdateEnv(awshost.Provisioner( - awshost.WithAgentOptions(agentparams.WithAgentConfig("log_level: info\n")), + awshost.WithAgentOptions(agentparams.WithAgentConfig("log_level: info\nforwarder_apikey_validation_interval: 1")), )) - // agent should be unhealthy because the key is invalid - _, err := v.Env().Agent.Client.Health() - if err == nil { - assert.Fail(v.T(), "agent expected to be unhealthy, but no error found!") - return - } - assert.Contains(v.T(), err.Error(), "Agent health: FAIL") + require.EventuallyWithT(v.T(), func(collect *assert.CollectT) { + // forwarder should be unhealthy because the key is invalid + _, err = v.Env().Agent.Client.Health() + assert.ErrorContains(collect, err, "Agent health: FAIL") + assert.ErrorContains(collect, err, "=== 1 unhealthy components ===\nforwarder") + }, time.Second*30, time.Second) + + // the fakeintake now says that the api key is valid + override.StatusCode = 200 + override.Body = []byte("valid API key") + err = v.Env().FakeIntake.Client().ConfigureOverride(override) + require.NoError(v.T(), err) + + // the agent will check every minute if the key is valid, so wait at most 1m30 + require.EventuallyWithT(v.T(), func(collect *assert.CollectT) { + _, err = v.Env().Agent.Client.Health() + assert.NoError(collect, err) + }, time.Second*90, 3*time.Second) }