From 08d78e87402576fe38fbf153766468834a7fd9d5 Mon Sep 17 00:00:00 2001 From: qingliu Date: Tue, 3 Sep 2024 09:32:40 +0800 Subject: [PATCH] fix(pipelinerun): resolve issue with PipelineRun not timing out successfully fix #8230 When the PipelineRun timeout, validation errors returned when patch a completed TaskRun should be ignored. --- pkg/apis/pipeline/errors/errors.go | 14 +++- pkg/reconciler/pipelinerun/cancel.go | 4 +- pkg/reconciler/pipelinerun/timeout.go | 5 ++ test/timeout_test.go | 99 +++++++++++++++++++++++++++ 4 files changed, 119 insertions(+), 3 deletions(-) diff --git a/pkg/apis/pipeline/errors/errors.go b/pkg/apis/pipeline/errors/errors.go index f81dd2e5f82..fbd487bba33 100644 --- a/pkg/apis/pipeline/errors/errors.go +++ b/pkg/apis/pipeline/errors/errors.go @@ -13,7 +13,12 @@ limitations under the License. package errors -import "errors" +import ( + "errors" + "strings" + + apierrors "k8s.io/apimachinery/pkg/api/errors" +) const UserErrorLabel = "[User error] " @@ -71,3 +76,10 @@ func GetErrorMessage(err error) string { } return err.Error() } + +// IsImmutableTaskRunSpecError returns true if the error is the taskrun spec is immutable +func IsImmutableTaskRunSpecError(err error) bool { + // The TaskRun may have completed and the spec field is immutable. + // validation code: https://github.com/tektoncd/pipeline/blob/v0.62.0/pkg/apis/pipeline/v1/taskrun_validation.go#L136-L138 + return apierrors.IsBadRequest(err) && strings.Contains(err.Error(), "no updates are allowed") +} diff --git a/pkg/reconciler/pipelinerun/cancel.go b/pkg/reconciler/pipelinerun/cancel.go index 7370080d3f7..c198569da87 100644 --- a/pkg/reconciler/pipelinerun/cancel.go +++ b/pkg/reconciler/pipelinerun/cancel.go @@ -24,6 +24,7 @@ import ( "strings" "time" + pipelineErrors "github.com/tektoncd/pipeline/pkg/apis/pipeline/errors" v1 "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1" "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1beta1" clientset "github.com/tektoncd/pipeline/pkg/client/clientset/versioned" @@ -88,9 +89,8 @@ func cancelTaskRun(ctx context.Context, taskRunName string, namespace string, cl // still be able to cancel the PipelineRun return nil } - if errors.IsBadRequest(err) && strings.Contains(err.Error(), "no updates are allowed") { + if pipelineErrors.IsImmutableTaskRunSpecError(err) { // The TaskRun may have completed and the spec field is immutable, we should ignore this error. - // validation code: https://github.com/tektoncd/pipeline/blob/v0.62.0/pkg/apis/pipeline/v1/taskrun_validation.go#L136-L138 return nil } return err diff --git a/pkg/reconciler/pipelinerun/timeout.go b/pkg/reconciler/pipelinerun/timeout.go index 8ae29a62d30..845ef3c22dd 100644 --- a/pkg/reconciler/pipelinerun/timeout.go +++ b/pkg/reconciler/pipelinerun/timeout.go @@ -21,6 +21,7 @@ import ( "strings" "time" + pipelineErrors "github.com/tektoncd/pipeline/pkg/apis/pipeline/errors" v1 "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1" "github.com/tektoncd/pipeline/pkg/apis/pipeline/v1beta1" clientset "github.com/tektoncd/pipeline/pkg/client/clientset/versioned" @@ -125,6 +126,10 @@ func timeoutPipelineTasksForTaskNames(ctx context.Context, logger *zap.SugaredLo logger.Infof("patching TaskRun %s for timeout", taskRunName) if err := timeoutTaskRun(ctx, taskRunName, pr.Namespace, clientSet); err != nil { + if pipelineErrors.IsImmutableTaskRunSpecError(err) { + // The TaskRun may have completed and the spec field is immutable, we should ignore this error. + continue + } errs = append(errs, fmt.Errorf("failed to patch TaskRun `%s` with timeout: %w", taskRunName, err).Error()) continue } diff --git a/test/timeout_test.go b/test/timeout_test.go index 46285d00e83..2b407b66890 100644 --- a/test/timeout_test.go +++ b/test/timeout_test.go @@ -560,3 +560,102 @@ spec: } wg.Wait() } + +// TestPipelineRunTimeoutWithCompletedTaskRuns tests the case where a PipelineRun is timeout and has completed TaskRuns. +func TestPipelineRunTimeoutWithCompletedTaskRuns(t *testing.T) { + t.Parallel() + // cancel the context after we have waited a suitable buffer beyond the given deadline. + ctx, cancel := context.WithTimeout(context.Background(), timeout+2*time.Minute) + defer cancel() + c, namespace := setup(ctx, t) + + knativetest.CleanupOnInterrupt(func() { tearDown(context.Background(), t, c, namespace) }, t.Logf) + defer tearDown(context.Background(), t, c, namespace) + + t.Logf("Creating Task in namespace %s", namespace) + task := parse.MustParseV1Task(t, fmt.Sprintf(` +metadata: + name: %s + namespace: %s +spec: + params: + - name: sleep + default: "1" + steps: + - image: mirror.gcr.io/busybox + command: ['/bin/sh'] + args: ['-c', 'sleep $(params.sleep)'] +`, helpers.ObjectNameForTest(t), namespace)) + if _, err := c.V1TaskClient.Create(ctx, task, metav1.CreateOptions{}); err != nil { + t.Fatalf("Failed to create Task `%s`: %s", task.Name, err) + } + + pipeline := parse.MustParseV1Pipeline(t, fmt.Sprintf(` +metadata: + name: %s + namespace: %s +spec: + tasks: + - name: fast-task + params: + - name: sleep + value: "1" + taskRef: + name: %s + - name: slow-task + params: + - name: sleep + value: "120" + taskRef: + name: %s +`, helpers.ObjectNameForTest(t), namespace, task.Name, task.Name)) + pipelineRun := parse.MustParseV1PipelineRun(t, fmt.Sprintf(` +metadata: + name: %s + namespace: %s +spec: + pipelineRef: + name: %s + timeouts: + pipeline: 30s + tasks: 30s +`, helpers.ObjectNameForTest(t), namespace, pipeline.Name)) + if _, err := c.V1PipelineClient.Create(ctx, pipeline, metav1.CreateOptions{}); err != nil { + t.Fatalf("Failed to create Pipeline `%s`: %s", pipeline.Name, err) + } + if _, err := c.V1PipelineRunClient.Create(ctx, pipelineRun, metav1.CreateOptions{}); err != nil { + t.Fatalf("Failed to create PipelineRun `%s`: %s", pipelineRun.Name, err) + } + + t.Logf("Waiting for PipelineRun %s in namespace %s to be timed out", pipelineRun.Name, namespace) + if err := WaitForPipelineRunState(ctx, c, pipelineRun.Name, timeout, FailedWithReason(v1.PipelineRunReasonTimedOut.String(), pipelineRun.Name), "PipelineRunTimedOut", v1Version); err != nil { + t.Errorf("Error waiting for PipelineRun %s to finish: %s", pipelineRun.Name, err) + } + + taskrunList, err := c.V1TaskRunClient.List(ctx, metav1.ListOptions{LabelSelector: "tekton.dev/pipelineRun=" + pipelineRun.Name}) + if err != nil { + t.Fatalf("Error listing TaskRuns for PipelineRun %s: %s", pipelineRun.Name, err) + } + + t.Logf("Waiting for TaskRuns from PipelineRun %s in namespace %s to time out and be cancelled", pipelineRun.Name, namespace) + var wg sync.WaitGroup + for _, taskrunItem := range taskrunList.Items { + wg.Add(1) + go func(name string) { + defer wg.Done() + if strings.Contains(name, "fast-task") { + // fast-task should have completed, not timed out + return + } + err := WaitForTaskRunState(ctx, c, name, FailedWithReason(v1.TaskRunReasonCancelled.String(), name), v1.TaskRunReasonCancelled.String(), v1Version) + if err != nil { + t.Errorf("Error waiting for TaskRun %s to timeout: %s", name, err) + } + }(taskrunItem.Name) + } + wg.Wait() + + if _, err := c.V1PipelineRunClient.Get(ctx, pipelineRun.Name, metav1.GetOptions{}); err != nil { + t.Fatalf("Failed to get PipelineRun `%s`: %s", pipelineRun.Name, err) + } +}