From 945540c8faefd04e2d7c570e3ed358d85e8a00fd Mon Sep 17 00:00:00 2001 From: Narthana Epa Date: Sat, 16 Mar 2024 23:17:00 +1100 Subject: [PATCH] Use DefaultAgentImage for the BuildFailureJob Previously, the agent image configured in the controller's config was used for the BuildFailureJob. But customers could have configured this to be a private image. We don't want to copy the imagePullSecrets in as there could be an error in their formatting, and the failure job is designed to bubble up such errors to the job logs. So we use the default agent image which is guaranteed to be public. --- internal/controller/scheduler/scheduler.go | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/internal/controller/scheduler/scheduler.go b/internal/controller/scheduler/scheduler.go index 0a23b8a4..7177fc3b 100644 --- a/internal/controller/scheduler/scheduler.go +++ b/internal/controller/scheduler/scheduler.go @@ -501,7 +501,17 @@ func (w *jobWrapper) BuildFailureJob(err error) (*batchv1.Job, error) { PodSpec: &corev1.PodSpec{ Containers: []corev1.Container{ { - Image: w.cfg.Image, + // the configured agent image may be private. If there is an error in specifying the + // secrets for this image, we should still be able to run the failure job. So, we + // bypass the potentially private image and use a public one. We could use a + // thinner public image like `alpine:latest`, but it's generally unwise to depend + // on an image that's not published by us. + // + // TODO: pin the version of the agent image and use that here. + // Currently, DefaultAgentImage has a latest tag. That's not ideal as + // a given version of agent stack-k8s may use different versions of the agent image over + // time. We should consider using a specific version of the agent image here. + Image: config.DefaultAgentImage, Command: []string{fmt.Sprintf("echo %q && exit 1", err.Error())}, }, },