Skip to content

Commit

Permalink
Fix a cluster-agent crash at startup (#28282)
Browse files Browse the repository at this point in the history
  • Loading branch information
L3n41c authored Aug 8, 2024
1 parent 1a835ad commit d3901a2
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 19 deletions.
2 changes: 1 addition & 1 deletion cmd/cluster-agent/subcommands/start/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ func start(log log.Component,
DatadogClient: dc,
}

if aggErr := controllers.StartControllers(ctx); aggErr != nil {
if aggErr := controllers.StartControllers(&ctx); aggErr != nil {
for _, err := range aggErr.Errors() {
pkglog.Warnf("Error while starting controller: %v", err)
}
Expand Down
4 changes: 0 additions & 4 deletions flakes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,6 @@ test/new-e2e/tests/containers:
- TestEKSSuite/TestCPU/metric___container.cpu.usage{^kube_deployment:stress-ng$,^kube_namespace:workload-cpustress$}
- TestKindSuite/TestCPU/metric___container.cpu.usage{^kube_deployment:stress-ng$,^kube_namespace:workload-cpustress$}
- TestECSSuite
- TestEKSSuite/Test00UpAndRunning/agent_pods_are_ready_and_not_restarting
- TestEKSSuite/TestZZUpAndRunning/agent_pods_are_ready_and_not_restarting
- TestKindSuite/Test00UpAndRunning/agent_pods_are_ready_and_not_restarting
- TestKindSuite/TestZZUpAndRunning/agent_pods_are_ready_and_not_restarting

test/new-e2e/tests/installer:
- TestPackages/upgrade_scenario_ubuntu_22_04_x86_64/TestUpgradeSuccessful
31 changes: 17 additions & 14 deletions pkg/util/kubernetes/apiserver/controllers/controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ const autoscalerNowHandleMsgEvent = "Autoscaler is now handled by the Cluster-Ag

var errIsEmpty = errors.New("entity is empty") //nolint:revive

type startFunc func(ControllerContext, chan error)
type startFunc func(*ControllerContext, chan error)

type controllerFuncs struct {
enabled func() bool
Expand Down Expand Up @@ -64,6 +64,7 @@ var controllerCatalog = map[controllerName]controllerFuncs{
// ControllerContext holds all the attributes needed by the controllers
type ControllerContext struct {
informers map[apiserver.InformerName]cache.SharedInformer
informersMutex sync.Mutex
InformerFactory informers.SharedInformerFactory
DynamicClient dynamic.Interface
DynamicInformerFactory dynamicinformer.DynamicSharedInformerFactory
Expand All @@ -77,7 +78,7 @@ type ControllerContext struct {

// StartControllers runs the enabled Kubernetes controllers for the Datadog Cluster Agent. This is
// only called once, when we have confirmed we could correctly connect to the API server.
func StartControllers(ctx ControllerContext) k8serrors.Aggregate {
func StartControllers(ctx *ControllerContext) k8serrors.Aggregate {
ctx.informers = make(map[apiserver.InformerName]cache.SharedInformer)

var wg sync.WaitGroup
Expand Down Expand Up @@ -126,9 +127,7 @@ func StartControllers(ctx ControllerContext) k8serrors.Aggregate {

// startMetadataController starts the informers needed for metadata collection.
// The synchronization of the informers is handled by the controller.
//
//nolint:revive // TODO(CAPP) Fix revive linter
func startMetadataController(ctx ControllerContext, c chan error) {
func startMetadataController(ctx *ControllerContext, _ chan error) {
metaController := newMetadataController(
ctx.InformerFactory.Core().V1().Endpoints(),
ctx.WorkloadMeta,
Expand All @@ -138,7 +137,7 @@ func startMetadataController(ctx ControllerContext, c chan error) {

// startAutoscalersController starts the informers needed for autoscaling.
// The synchronization of the informers is handled by the controller.
func startAutoscalersController(ctx ControllerContext, c chan error) {
func startAutoscalersController(ctx *ControllerContext, c chan error) {
var err error
if ctx.DatadogClient == nil {
c <- fmt.Errorf("datadog client is nil")
Expand Down Expand Up @@ -166,15 +165,19 @@ func startAutoscalersController(ctx ControllerContext, c chan error) {
}

// registerServicesInformer registers the services informer.
//
//nolint:revive // TODO(CAPP) Fix revive linter
func registerServicesInformer(ctx ControllerContext, c chan error) {
ctx.informers[servicesInformer] = ctx.InformerFactory.Core().V1().Services().Informer()
func registerServicesInformer(ctx *ControllerContext, _ chan error) {
informer := ctx.InformerFactory.Core().V1().Services().Informer()

ctx.informersMutex.Lock()
ctx.informers[servicesInformer] = informer
ctx.informersMutex.Unlock()
}

// registerEndpointsInformer registers the endpoints informer.
//
//nolint:revive // TODO(CAPP) Fix revive linter
func registerEndpointsInformer(ctx ControllerContext, c chan error) {
ctx.informers[endpointsInformer] = ctx.InformerFactory.Core().V1().Endpoints().Informer()
func registerEndpointsInformer(ctx *ControllerContext, _ chan error) {
informer := ctx.InformerFactory.Core().V1().Endpoints().Informer()

ctx.informersMutex.Lock()
ctx.informers[endpointsInformer] = informer
ctx.informersMutex.Unlock()
}

0 comments on commit d3901a2

Please sign in to comment.