From a43f0de4abe204f362e2f9e3ad372929c23068ce Mon Sep 17 00:00:00 2001 From: Jian Qiu Date: Mon, 8 Apr 2024 10:09:37 +0800 Subject: [PATCH] avoid cluster auto approve failed occasionally (#388) (#395) Signed-off-by: Wei Liu Co-authored-by: Wei Liu --- .github/workflows/pre.yml | 8 --- pkg/registration/hub/csr/controller_test.go | 2 - pkg/registration/hub/csr/reconciler.go | 35 ----------- .../hub/managedcluster/controller.go | 28 +++++++++ .../hub/managedcluster/controller_test.go | 23 +++++++- pkg/registration/hub/manager.go | 2 - .../registration/clusterannotations_test.go | 4 +- .../spokecluster_autoapproval_test.go | 59 ++++++++++++++++++- 8 files changed, 108 insertions(+), 53 deletions(-) diff --git a/.github/workflows/pre.yml b/.github/workflows/pre.yml index 7a90c6052..ae9e62db3 100644 --- a/.github/workflows/pre.yml +++ b/.github/workflows/pre.yml @@ -62,14 +62,6 @@ jobs: go-version: ${{ env.GO_VERSION }} - name: unit run: make test - - name: report coverage - uses: codecov/codecov-action@v3 - with: - files: ./coverage.out - flags: unit - name: unit - verbose: true - fail_ci_if_error: true integration: name: integration diff --git a/pkg/registration/hub/csr/controller_test.go b/pkg/registration/hub/csr/controller_test.go index 72ad5d675..313c67a52 100644 --- a/pkg/registration/hub/csr/controller_test.go +++ b/pkg/registration/hub/csr/controller_test.go @@ -212,8 +212,6 @@ func TestSync(t *testing.T) { NewCSRRenewalReconciler(kubeClient, recorder), NewCSRBootstrapReconciler( kubeClient, - clusterClient, - clusterInformerFactory.Cluster().V1().ManagedClusters().Lister(), c.approvalUsers, recorder, ), diff --git a/pkg/registration/hub/csr/reconciler.go b/pkg/registration/hub/csr/reconciler.go index 16c9bb828..b12105285 100644 --- a/pkg/registration/hub/csr/reconciler.go +++ b/pkg/registration/hub/csr/reconciler.go @@ -11,15 +11,11 @@ import ( authorizationv1 "k8s.io/api/authorization/v1" certificatesv1 "k8s.io/api/certificates/v1" certificatesv1beta1 "k8s.io/api/certificates/v1beta1" - "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" - clusterclientset "open-cluster-management.io/api/client/cluster/clientset/versioned" - clusterv1listers "open-cluster-management.io/api/client/cluster/listers/cluster/v1" clusterv1 "open-cluster-management.io/api/cluster/v1" "open-cluster-management.io/ocm/pkg/registration/hub/user" @@ -95,21 +91,15 @@ func (r *csrRenewalReconciler) Reconcile(ctx context.Context, csr csrInfo, appro type csrBootstrapReconciler struct { kubeClient kubernetes.Interface - clusterClient clusterclientset.Interface - clusterLister clusterv1listers.ManagedClusterLister approvalUsers sets.Set[string] eventRecorder events.Recorder } func NewCSRBootstrapReconciler(kubeClient kubernetes.Interface, - clusterClient clusterclientset.Interface, - clusterLister clusterv1listers.ManagedClusterLister, approvalUsers []string, recorder events.Recorder) Reconciler { return &csrBootstrapReconciler{ kubeClient: kubeClient, - clusterClient: clusterClient, - clusterLister: clusterLister, approvalUsers: sets.New(approvalUsers...), eventRecorder: recorder.WithComponentSuffix("csr-approving-controller"), } @@ -129,15 +119,6 @@ func (b *csrBootstrapReconciler) Reconcile(ctx context.Context, csr csrInfo, app return reconcileContinue, nil } - err := b.accpetCluster(ctx, clusterName) - if errors.IsNotFound(err) { - // Current spoke cluster not found, could have been deleted, do nothing. - return reconcileStop, nil - } - if err != nil { - return reconcileContinue, err - } - if err := approveCSR(b.kubeClient); err != nil { return reconcileContinue, err } @@ -146,22 +127,6 @@ func (b *csrBootstrapReconciler) Reconcile(ctx context.Context, csr csrInfo, app return reconcileStop, nil } -func (b *csrBootstrapReconciler) accpetCluster(ctx context.Context, managedClusterName string) error { - managedCluster, err := b.clusterLister.Get(managedClusterName) - if err != nil { - return err - } - - if managedCluster.Spec.HubAcceptsClient { - return nil - } - - patch := []byte("{\"spec\": {\"hubAcceptsClient\": true}}") - _, err = b.clusterClient.ClusterV1().ManagedClusters().Patch( - ctx, managedCluster.Name, types.MergePatchType, patch, metav1.PatchOptions{}) - return err -} - // To validate a managed cluster csr, we check // 1. if the signer name in csr request is valid. // 2. if organization field and commonName field in csr request is valid. diff --git a/pkg/registration/hub/managedcluster/controller.go b/pkg/registration/hub/managedcluster/controller.go index a65a35af6..63a092c7a 100644 --- a/pkg/registration/hub/managedcluster/controller.go +++ b/pkg/registration/hub/managedcluster/controller.go @@ -3,6 +3,7 @@ package managedcluster import ( "context" "fmt" + "time" "github.com/openshift/library-go/pkg/controller/factory" "github.com/openshift/library-go/pkg/operator/events" @@ -12,6 +13,7 @@ import ( "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" rbacv1informers "k8s.io/client-go/informers/rbac/v1" "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" @@ -20,14 +22,20 @@ import ( informerv1 "open-cluster-management.io/api/client/cluster/informers/externalversions/cluster/v1" listerv1 "open-cluster-management.io/api/client/cluster/listers/cluster/v1" v1 "open-cluster-management.io/api/cluster/v1" + ocmfeature "open-cluster-management.io/api/feature" "open-cluster-management.io/sdk-go/pkg/patcher" "open-cluster-management.io/ocm/pkg/common/apply" "open-cluster-management.io/ocm/pkg/common/queue" + "open-cluster-management.io/ocm/pkg/features" "open-cluster-management.io/ocm/pkg/registration/helpers" "open-cluster-management.io/ocm/pkg/registration/hub/manifests" ) +// this is an internal annotation to indicate a managed cluster is already accepted automatically, it is not +// expected to be changed or removed outside. +const clusterAcceptedAnnotationKey = "open-cluster-management.io/automatically-accepted-on" + var staticFiles = []string{ "rbac/managedcluster-clusterrole.yaml", "rbac/managedcluster-clusterrolebinding.yaml", @@ -38,6 +46,7 @@ var staticFiles = []string{ // managedClusterController reconciles instances of ManagedCluster on the hub. type managedClusterController struct { kubeClient kubernetes.Interface + clusterClient clientset.Interface clusterLister listerv1.ManagedClusterLister applier *apply.PermissionApplier patcher patcher.Patcher[*v1.ManagedCluster, v1.ManagedClusterSpec, v1.ManagedClusterStatus] @@ -56,6 +65,7 @@ func NewManagedClusterController( recorder events.Recorder) factory.Controller { c := &managedClusterController{ kubeClient: kubeClient, + clusterClient: clusterClient, clusterLister: clusterInformer.Lister(), applier: apply.NewPermissionApplier( kubeClient, @@ -103,6 +113,14 @@ func (c *managedClusterController) sync(ctx context.Context, syncCtx factory.Syn } if !managedCluster.Spec.HubAcceptsClient { + // If the ManagedClusterAutoApproval feature is enabled, we automatically accept a cluster only + // when it joins for the first time, afterwards users can deny it again. + if features.HubMutableFeatureGate.Enabled(ocmfeature.ManagedClusterAutoApproval) { + if _, ok := managedCluster.Annotations[clusterAcceptedAnnotationKey]; !ok { + return c.acceptCluster(ctx, managedClusterName) + } + } + // Current spoke cluster is not accepted, do nothing. if !meta.IsStatusConditionTrue(managedCluster.Status.Conditions, v1.ManagedClusterConditionHubAccepted) { return nil @@ -199,3 +217,13 @@ func (c *managedClusterController) removeManagedClusterResources(ctx context.Con } return operatorhelpers.NewMultiLineAggregate(errs) } + +func (c *managedClusterController) acceptCluster(ctx context.Context, managedClusterName string) error { + // TODO support patching both annotations and spec simultaneously in the patcher + acceptedTime := time.Now() + patch := fmt.Sprintf(`{"metadata":{"annotations":{"%s":"%s"}},"spec":{"hubAcceptsClient":true}}`, + clusterAcceptedAnnotationKey, acceptedTime.Format(time.RFC3339)) + _, err := c.clusterClient.ClusterV1().ManagedClusters().Patch(ctx, managedClusterName, + types.MergePatchType, []byte(patch), metav1.PatchOptions{}) + return err +} diff --git a/pkg/registration/hub/managedcluster/controller_test.go b/pkg/registration/hub/managedcluster/controller_test.go index ca889529c..722e349a5 100644 --- a/pkg/registration/hub/managedcluster/controller_test.go +++ b/pkg/registration/hub/managedcluster/controller_test.go @@ -3,6 +3,7 @@ package managedcluster import ( "context" "encoding/json" + "fmt" "testing" "time" @@ -16,18 +17,21 @@ import ( clusterfake "open-cluster-management.io/api/client/cluster/clientset/versioned/fake" clusterinformers "open-cluster-management.io/api/client/cluster/informers/externalversions" v1 "open-cluster-management.io/api/cluster/v1" + ocmfeature "open-cluster-management.io/api/feature" "open-cluster-management.io/sdk-go/pkg/patcher" "open-cluster-management.io/ocm/pkg/common/apply" testingcommon "open-cluster-management.io/ocm/pkg/common/testing" + "open-cluster-management.io/ocm/pkg/features" testinghelpers "open-cluster-management.io/ocm/pkg/registration/helpers/testing" ) func TestSyncManagedCluster(t *testing.T) { cases := []struct { - name string - startingObjects []runtime.Object - validateActions func(t *testing.T, actions []clienttesting.Action) + name string + autoApprovalEnabled bool + startingObjects []runtime.Object + validateActions func(t *testing.T, actions []clienttesting.Action) }{ { name: "sync a deleted spoke cluster", @@ -97,8 +101,18 @@ func TestSyncManagedCluster(t *testing.T) { testingcommon.AssertNoActions(t, actions) }, }, + { + name: "should accept the clusters when auto approval is enabled", + autoApprovalEnabled: true, + startingObjects: []runtime.Object{testinghelpers.NewManagedCluster()}, + validateActions: func(t *testing.T, actions []clienttesting.Action) { + testingcommon.AssertActions(t, actions, "patch") + }, + }, } + features.HubMutableFeatureGate.Add(ocmfeature.DefaultHubRegistrationFeatureGates) + for _, c := range cases { t.Run(c.name, func(t *testing.T) { clusterClient := clusterfake.NewSimpleClientset(c.startingObjects...) @@ -112,8 +126,11 @@ func TestSyncManagedCluster(t *testing.T) { } } + features.HubMutableFeatureGate.Set(fmt.Sprintf("%s=%v", ocmfeature.ManagedClusterAutoApproval, c.autoApprovalEnabled)) + ctrl := managedClusterController{ kubeClient, + clusterClient, clusterInformerFactory.Cluster().V1().ManagedClusters().Lister(), apply.NewPermissionApplier( kubeClient, diff --git a/pkg/registration/hub/manager.go b/pkg/registration/hub/manager.go index 9cd32c1bf..af62aa4d6 100644 --- a/pkg/registration/hub/manager.go +++ b/pkg/registration/hub/manager.go @@ -153,8 +153,6 @@ func (m *HubManagerOptions) RunControllerManagerWithInformers( if features.HubMutableFeatureGate.Enabled(ocmfeature.ManagedClusterAutoApproval) { csrReconciles = append(csrReconciles, csr.NewCSRBootstrapReconciler( kubeClient, - clusterClient, - clusterInformers.Cluster().V1().ManagedClusters().Lister(), m.ClusterAutoApprovalUsers, controllerContext.EventRecorder, )) diff --git a/test/integration/registration/clusterannotations_test.go b/test/integration/registration/clusterannotations_test.go index c94c06183..2be6e7817 100644 --- a/test/integration/registration/clusterannotations_test.go +++ b/test/integration/registration/clusterannotations_test.go @@ -45,8 +45,8 @@ var _ = ginkgo.Describe("Cluster Annotations", func() { return err } - if len(mc.Annotations) != 1 { - return fmt.Errorf("expected 1 annotation, got %d", len(mc.Annotations)) + if _, ok := mc.Annotations["foo"]; ok { + return fmt.Errorf("unexpected annotations %v", mc.Annotations) } if mc.Annotations["agent.open-cluster-management.io/foo"] != "bar" { diff --git a/test/integration/registration/spokecluster_autoapproval_test.go b/test/integration/registration/spokecluster_autoapproval_test.go index a62c359c3..36c5080b9 100644 --- a/test/integration/registration/spokecluster_autoapproval_test.go +++ b/test/integration/registration/spokecluster_autoapproval_test.go @@ -1,6 +1,7 @@ package registration_test import ( + "context" "fmt" "path" "time" @@ -9,6 +10,7 @@ import ( "github.com/onsi/gomega" certificates "k8s.io/api/certificates/v1" "k8s.io/apimachinery/pkg/api/meta" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" clusterv1 "open-cluster-management.io/api/cluster/v1" @@ -17,6 +19,8 @@ import ( "open-cluster-management.io/ocm/test/integration/util" ) +const expectedAnnotation = "open-cluster-management.io/automatically-accepted-on" + var _ = ginkgo.Describe("Cluster Auto Approval", func() { ginkgo.It("Cluster should be automatically approved", func() { var err error @@ -49,7 +53,6 @@ var _ = ginkgo.Describe("Cluster Auto Approval", func() { if err != nil { return false } - return cluster.Spec.HubAcceptsClient }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) @@ -84,4 +87,58 @@ var _ = ginkgo.Describe("Cluster Auto Approval", func() { return nil }, eventuallyTimeout, eventuallyInterval).ShouldNot(gomega.HaveOccurred()) }) + + ginkgo.It("Cluster can be denied by manually after automatically approved", func() { + clusterName := "autoapprovaltest-spoke-cluster1" + _, err := clusterClient.ClusterV1().ManagedClusters().Create(context.Background(), &clusterv1.ManagedCluster{ + ObjectMeta: v1.ObjectMeta{ + Name: clusterName, + }, + }, v1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Eventually(func() error { + cluster, err := util.GetManagedCluster(clusterClient, clusterName) + if err != nil { + return err + } + + if _, ok := cluster.Annotations[expectedAnnotation]; !ok { + return fmt.Errorf("cluster should have accepted annotation") + } + + if !cluster.Spec.HubAcceptsClient { + return fmt.Errorf("cluster should be accepted") + } + + return nil + }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) + + // we can deny the cluster after it is accepted + cluster, err := util.GetManagedCluster(clusterClient, clusterName) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + updatedCluster := cluster.DeepCopy() + updatedCluster.Spec.HubAcceptsClient = false + + _, err = clusterClient.ClusterV1().ManagedClusters().Update(context.TODO(), updatedCluster, v1.UpdateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + gomega.Consistently(func() error { + cluster, err := util.GetManagedCluster(clusterClient, clusterName) + if err != nil { + return err + } + + if _, ok := cluster.Annotations[expectedAnnotation]; !ok { + return fmt.Errorf("cluster should have accepted annotation") + } + + if cluster.Spec.HubAcceptsClient { + return fmt.Errorf("cluster should be denied") + } + + return nil + }, 10*time.Second, eventuallyInterval).Should(gomega.Succeed()) + }) })