diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml index 3313bb3..b369d8c 100644 --- a/.github/workflows/e2e_tests.yml +++ b/.github/workflows/e2e_tests.yml @@ -12,6 +12,7 @@ jobs: test-e2e: name: Run end-to-end tests + timeout-minutes: 90 runs-on: ubuntu-latest strategy: matrix: diff --git a/.github/workflows/rollouts_e2e_tests.yml b/.github/workflows/rollouts_e2e_tests.yml index da552a6..c71200b 100644 --- a/.github/workflows/rollouts_e2e_tests.yml +++ b/.github/workflows/rollouts_e2e_tests.yml @@ -13,6 +13,7 @@ jobs: test-e2e: name: Run end-to-end tests from upstream Argo Rollouts repo runs-on: ubuntu-latest + timeout-minutes: 90 strategy: matrix: k3s-version: [ v1.28.2 ] diff --git a/Makefile b/Makefile index c4e5cf3..5afc515 100644 --- a/Makefile +++ b/Makefile @@ -136,7 +136,7 @@ start-e2e-namespace-scoped-bg: ## Start the operator, to run the e2e tests .PHONY: start-e2e-cluster-scoped-bg start-e2e-cluster-scoped-bg: ## Start the operator, to run the e2e tests - RUN_IN_BACKGROUND=true hack/start-rollouts-manager-for-e2e-tests.sh + RUN_IN_BACKGROUND=true NAMESPACE_SCOPED_ARGO_ROLLOUTS=false hack/start-rollouts-manager-for-e2e-tests.sh .PHONY: test-e2e-namespace-scoped @@ -145,7 +145,7 @@ test-e2e-namespace-scoped: ## Run operator e2e tests .PHONY: test-e2e-cluster-scoped test-e2e-cluster-scoped: ## Run operator e2e tests - hack/run-rollouts-manager-e2e-tests.sh + NAMESPACE_SCOPED_ARGO_ROLLOUTS=false hack/run-rollouts-manager-e2e-tests.sh .PHONY: start-test-e2e-all start-test-e2e-all: start-e2e-namespace-scoped-bg test-e2e-namespace-scoped start-e2e-cluster-scoped-bg test-e2e-cluster-scoped diff --git a/controllers/configmap.go b/controllers/configmap.go index 641be57..925826c 100644 --- a/controllers/configmap.go +++ b/controllers/configmap.go @@ -146,12 +146,12 @@ func (r *RolloutManagerReconciler) reconcileConfigMap(ctx context.Context, cr ro actualConfigMap.Data[TrafficRouterPluginConfigMapKey] = string(desiredTrafficRouterPluginString) actualConfigMap.Data[MetricPluginConfigMapKey] = string(desiredMetricPluginString) + log.Info("Updating Rollouts ConfigMap due to detected difference") + // Update the ConfigMap in the cluster if err := r.Client.Update(ctx, actualConfigMap); err != nil { return fmt.Errorf("failed to update ConfigMap: %v", err) } - log.Info("ConfigMap updated successfully") - // Restarting rollouts pod only if configMap is updated if err := r.restartRolloutsPod(ctx, cr.Namespace); err != nil { return err @@ -183,8 +183,8 @@ func (r *RolloutManagerReconciler) restartRolloutsPod(ctx context.Context, names for i := range podList.Items { pod := podList.Items[i] - log.Info("Deleting Rollouts Pod", "podName", pod.Name) if pod.ObjectMeta.DeletionTimestamp == nil { + log.Info("Deleting Rollouts Pod", "podName", pod.Name) if err := r.Client.Delete(ctx, &pod); err != nil { if errors.IsNotFound(err) { log.Info(fmt.Sprintf("Pod %s already deleted", pod.Name)) diff --git a/controllers/configmap_test.go b/controllers/configmap_test.go index d6bce90..70df5a5 100644 --- a/controllers/configmap_test.go +++ b/controllers/configmap_test.go @@ -9,7 +9,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" appsv1 "k8s.io/api/apps/v1" - v1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -19,7 +18,7 @@ var _ = Describe("ConfigMap Test", func() { var a v1alpha1.RolloutManager var r *RolloutManagerReconciler var sa *corev1.ServiceAccount - var existingDeployment *v1.Deployment + var existingDeployment *appsv1.Deployment const trafficrouterPluginLocation = "https://custom-traffic-plugin-location" const metricPluginLocation = "https://custom-metric-plugin-location" diff --git a/controllers/resources.go b/controllers/resources.go index f50572a..b88acc3 100644 --- a/controllers/resources.go +++ b/controllers/resources.go @@ -46,7 +46,7 @@ func (r *RolloutManagerReconciler) reconcileRolloutsServiceAccount(ctx context.C normalizedLiveServiceAccount := liveServiceAccount.DeepCopy() removeUserLabelsAndAnnotations(&normalizedLiveServiceAccount.ObjectMeta, cr) - if !reflect.DeepEqual(normalizedLiveServiceAccount.Labels, expectedServiceAccount.Labels) || !reflect.DeepEqual(normalizedLiveServiceAccount.Annotations, expectedServiceAccount.Annotations) { + if !areStringMapsEqual(normalizedLiveServiceAccount.Labels, expectedServiceAccount.Labels) || !areStringMapsEqual(normalizedLiveServiceAccount.Annotations, expectedServiceAccount.Annotations) { updateNeeded = true log.Info(fmt.Sprintf("Labels/Annotations of ServiceAccount %s do not match the expected state, hence updating it", liveServiceAccount.Name)) @@ -111,7 +111,7 @@ func (r *RolloutManagerReconciler) reconcileRolloutsRole(ctx context.Context, cr removeUserLabelsAndAnnotations(&normalizedLiveRole.ObjectMeta, cr) - if !reflect.DeepEqual(normalizedLiveRole.Labels, expectedRole.Labels) || !reflect.DeepEqual(normalizedLiveRole.Annotations, expectedRole.Annotations) { + if !areStringMapsEqual(normalizedLiveRole.Labels, expectedRole.Labels) || !areStringMapsEqual(normalizedLiveRole.Annotations, expectedRole.Annotations) { updateNeeded = true log.Info(fmt.Sprintf("Labels/Annotations of Role %s do not match the expected state, hence updating it", liveRole.Name)) @@ -165,7 +165,7 @@ func (r *RolloutManagerReconciler) reconcileRolloutsClusterRole(ctx context.Cont normalizedLiveClusterRole := liveClusterRole.DeepCopy() removeUserLabelsAndAnnotations(&normalizedLiveClusterRole.ObjectMeta, cr) - if !reflect.DeepEqual(normalizedLiveClusterRole.Labels, expectedClusterRole.Labels) || !reflect.DeepEqual(normalizedLiveClusterRole.Annotations, expectedClusterRole.Annotations) { + if !areStringMapsEqual(normalizedLiveClusterRole.Labels, expectedClusterRole.Labels) || !areStringMapsEqual(normalizedLiveClusterRole.Annotations, expectedClusterRole.Annotations) { updateNeeded = true log.Info(fmt.Sprintf("Labels/Annotations of Role %s do not match the expected state, hence updating it", liveClusterRole.Name)) @@ -248,7 +248,7 @@ func (r *RolloutManagerReconciler) reconcileRolloutsRoleBinding(ctx context.Cont normalizedLiveRoleBinding := liveRoleBinding.DeepCopy() removeUserLabelsAndAnnotations(&normalizedLiveRoleBinding.ObjectMeta, cr) - if !reflect.DeepEqual(normalizedLiveRoleBinding.Labels, expectedRoleBinding.Labels) || !reflect.DeepEqual(normalizedLiveRoleBinding.Annotations, expectedRoleBinding.Annotations) { + if !areStringMapsEqual(normalizedLiveRoleBinding.Labels, expectedRoleBinding.Labels) || !areStringMapsEqual(normalizedLiveRoleBinding.Annotations, expectedRoleBinding.Annotations) { updateNeeded = true log.Info(fmt.Sprintf("Labels/Annotations of RoleBinding %s do not match the expected state, hence updating it", liveRoleBinding.Name)) @@ -327,7 +327,7 @@ func (r *RolloutManagerReconciler) reconcileRolloutsClusterRoleBinding(ctx conte normalizedLiveClusterRoleBinding := liveClusterRoleBinding.DeepCopy() removeUserLabelsAndAnnotations(&normalizedLiveClusterRoleBinding.ObjectMeta, cr) - if !reflect.DeepEqual(normalizedLiveClusterRoleBinding.Labels, expectedClusterRoleBinding.Labels) || !reflect.DeepEqual(normalizedLiveClusterRoleBinding.Annotations, expectedClusterRoleBinding.Annotations) { + if !areStringMapsEqual(normalizedLiveClusterRoleBinding.Labels, expectedClusterRoleBinding.Labels) || !areStringMapsEqual(normalizedLiveClusterRoleBinding.Annotations, expectedClusterRoleBinding.Annotations) { updateNeeded = true log.Info(fmt.Sprintf("Labels/Annotations of ClusterRoleBinding %s do not match the expected state, hence updating it", liveClusterRoleBinding.Name)) @@ -460,7 +460,7 @@ func (r *RolloutManagerReconciler) reconcileRolloutsAggregateToAdminClusterRole( normalizedLiveClusterRole := liveClusterRole.DeepCopy() removeUserLabelsAndAnnotations(&normalizedLiveClusterRole.ObjectMeta, cr) - if !reflect.DeepEqual(normalizedLiveClusterRole.Labels, expectedClusterRole.Labels) || !reflect.DeepEqual(normalizedLiveClusterRole.Annotations, expectedClusterRole.Annotations) { + if !areStringMapsEqual(normalizedLiveClusterRole.Labels, expectedClusterRole.Labels) || !areStringMapsEqual(normalizedLiveClusterRole.Annotations, expectedClusterRole.Annotations) { updateNeeded = true log.Info(fmt.Sprintf("Labels/Annotations of aggregated ClusterRole %s do not match the expected state, hence updating it", liveClusterRole.Name)) @@ -512,7 +512,7 @@ func (r *RolloutManagerReconciler) reconcileRolloutsAggregateToEditClusterRole(c normalizedLiveClusterRole := liveClusterRole.DeepCopy() removeUserLabelsAndAnnotations(&normalizedLiveClusterRole.ObjectMeta, cr) - if !reflect.DeepEqual(normalizedLiveClusterRole.Labels, expectedClusterRole.Labels) || !reflect.DeepEqual(normalizedLiveClusterRole.Annotations, expectedClusterRole.Annotations) { + if !areStringMapsEqual(normalizedLiveClusterRole.Labels, expectedClusterRole.Labels) || !areStringMapsEqual(normalizedLiveClusterRole.Annotations, expectedClusterRole.Annotations) { updateNeeded = true log.Info(fmt.Sprintf("Labels/Annotations of aggregated ClusterRole %s do not match the expected state, hence updating it", liveClusterRole.Name)) @@ -564,7 +564,8 @@ func (r *RolloutManagerReconciler) reconcileRolloutsAggregateToViewClusterRole(c normalizedLiveClusterRole := liveClusterRole.DeepCopy() removeUserLabelsAndAnnotations(&normalizedLiveClusterRole.ObjectMeta, cr) - if !reflect.DeepEqual(normalizedLiveClusterRole.Labels, expectedClusterRole.Labels) || !reflect.DeepEqual(normalizedLiveClusterRole.Annotations, expectedClusterRole.Annotations) { + if !areStringMapsEqual(normalizedLiveClusterRole.Labels, expectedClusterRole.Labels) || !areStringMapsEqual(normalizedLiveClusterRole.Annotations, expectedClusterRole.Annotations) { + updateNeeded = true log.Info(fmt.Sprintf("Labels/Annotations of aggregated ClusterRole %s do not match the expected state, hence updating it", liveClusterRole.Name)) @@ -702,7 +703,8 @@ func (r *RolloutManagerReconciler) reconcileRolloutsMetricsService(ctx context.C normalizedLiveService := liveService.DeepCopy() removeUserLabelsAndAnnotations(&normalizedLiveService.ObjectMeta, cr) - if !reflect.DeepEqual(normalizedLiveService.Labels, expectedSvc.Labels) || !reflect.DeepEqual(normalizedLiveService.Annotations, expectedSvc.Annotations) { + if !areStringMapsEqual(normalizedLiveService.Labels, expectedSvc.Labels) || !areStringMapsEqual(normalizedLiveService.Annotations, expectedSvc.Annotations) { + updateNeeded = true log.Info(fmt.Sprintf("Labels/Annotations of metrics Service %s do not match the expected state, hence updating it", liveService.Name)) @@ -778,7 +780,7 @@ func (r *RolloutManagerReconciler) reconcileRolloutsSecrets(ctx context.Context, normalizedLiveSecret := liveSecret.DeepCopy() removeUserLabelsAndAnnotations(&normalizedLiveSecret.ObjectMeta, cr) - if !reflect.DeepEqual(normalizedLiveSecret.Labels, expectedSecret.Labels) || !reflect.DeepEqual(normalizedLiveSecret.Annotations, expectedSecret.Annotations) { + if !areStringMapsEqual(normalizedLiveSecret.Labels, expectedSecret.Labels) || !areStringMapsEqual(normalizedLiveSecret.Annotations, expectedSecret.Annotations) { updateNeeded = true log.Info(fmt.Sprintf("Labels/Annotations of Secret %s do not match the expected state, hence updating it", liveSecret.Name)) diff --git a/controllers/utils.go b/controllers/utils.go index 4a08a8a..205d013 100644 --- a/controllers/utils.go +++ b/controllers/utils.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "os" + "reflect" "sort" "strings" @@ -86,6 +87,30 @@ func appendStringMap(src map[string]string, add map[string]string) map[string]st return res } +// In go, reflect.DeepEqual(a, b), on maps a and b, will return false even when len(a) == 0 and len(b) == 0. This is due to a nil map being considered different from an empty map. +// This function handles this case better: if maps both have a length of 0, we consider them always equal. +func areStringMapsEqual(one map[string]string, two map[string]string) bool { + + isOneEmpty := false + + isTwoEmpty := false + + if len(one) == 0 { + isOneEmpty = true + } + + if len(two) == 0 { + isTwoEmpty = true + } + + if isOneEmpty && isTwoEmpty { + return true + } + + return reflect.DeepEqual(one, two) + +} + // combineStringMaps will combine multiple maps: maps defined earlier in the 'maps' slice may have their values overriden by maps defined later in the 'maps' slice. func combineStringMaps(maps ...map[string]string) map[string]string { @@ -448,6 +473,11 @@ func removeUserLabelsAndAnnotations(obj *metav1.ObjectMeta, cr rolloutsmanagerv1 for objectLabelKey := range obj.Labels { + // We add this label to the aggregated cluster roles we generate, so we should not process it as a user label. + if strings.Contains(objectLabelKey, "rbac.authorization.k8s.io/") && strings.Contains(objectLabelKey, "aggregate") { + continue + } + existsInDefault := false for defaultLabelKey := range defaultLabelsAndAnnotations.Labels { diff --git a/controllers/utils_test.go b/controllers/utils_test.go index 225906f..1412ad4 100644 --- a/controllers/utils_test.go +++ b/controllers/utils_test.go @@ -452,7 +452,6 @@ var _ = Describe("removeUserLabelsAndAnnotations tests", func() { } Expect(k8sClient.Create(ctx, &cr)).To(Succeed()) - setRolloutsLabelsAndAnnotations(&obj) removeUserLabelsAndAnnotations(&obj, cr) @@ -460,34 +459,29 @@ var _ = Describe("removeUserLabelsAndAnnotations tests", func() { Expect(obj.Annotations).To(Equal(expectedAnnotations)) }, Entry("when no user-defined labels or annotations exist", - map[string]string{}, map[string]string{}, - map[string]string{"app.kubernetes.io/name": DefaultArgoRolloutsResourceName, - "app.kubernetes.io/part-of": DefaultArgoRolloutsResourceName, - "app.kubernetes.io/component": DefaultArgoRolloutsResourceName}, + map[string]string{}, + map[string]string{}, + map[string]string{}, map[string]string{}, ), Entry("when user-defined labels and annotations are present", - map[string]string{"user-label": "value"}, map[string]string{"user-annotation": "value"}, - map[string]string{"app.kubernetes.io/name": DefaultArgoRolloutsResourceName, - "app.kubernetes.io/part-of": DefaultArgoRolloutsResourceName, - "app.kubernetes.io/component": DefaultArgoRolloutsResourceName}, + map[string]string{"user-label": "value"}, + map[string]string{"user-annotation": "value"}, + map[string]string{}, map[string]string{}, ), Entry("when user-defined labels are present and annotations are empty", - map[string]string{"user-label": "value"}, map[string]string{}, - map[string]string{"app.kubernetes.io/name": DefaultArgoRolloutsResourceName, - "app.kubernetes.io/part-of": DefaultArgoRolloutsResourceName, - "app.kubernetes.io/component": DefaultArgoRolloutsResourceName}, + map[string]string{"user-label": "value"}, + map[string]string{}, + map[string]string{}, map[string]string{}, ), Entry("when user-defined labels are empty and annotations are present", - map[string]string{}, map[string]string{"user-annotation": "value"}, - map[string]string{"app.kubernetes.io/name": DefaultArgoRolloutsResourceName, - "app.kubernetes.io/part-of": DefaultArgoRolloutsResourceName, - "app.kubernetes.io/component": DefaultArgoRolloutsResourceName}, + map[string]string{}, + map[string]string{"user-annotation": "value"}, + map[string]string{}, map[string]string{}, ), - Entry("when both user and non-user-defined labels are present, and annotations are empty", map[string]string{"user-label": "value", "app.kubernetes.io/name": DefaultArgoRolloutsResourceName, @@ -499,6 +493,20 @@ var _ = Describe("removeUserLabelsAndAnnotations tests", func() { "app.kubernetes.io/component": DefaultArgoRolloutsResourceName}, map[string]string{}, ), + Entry("rbac aggregate-to- labels should not be removed, as we add these to ClusterRoles", + map[string]string{ + "rbac.authorization.k8s.io/aggregate-to-admin": "true", + "app.kubernetes.io/name": DefaultArgoRolloutsResourceName, + "app.kubernetes.io/part-of": DefaultArgoRolloutsResourceName, + "app.kubernetes.io/component": DefaultArgoRolloutsResourceName}, + map[string]string{}, + map[string]string{ + "rbac.authorization.k8s.io/aggregate-to-admin": "true", + "app.kubernetes.io/name": DefaultArgoRolloutsResourceName, + "app.kubernetes.io/part-of": DefaultArgoRolloutsResourceName, + "app.kubernetes.io/component": DefaultArgoRolloutsResourceName}, + map[string]string{}, + ), ) }) @@ -683,6 +691,23 @@ var _ = Describe("setAdditionalRolloutsLabelsAndAnnotationsToObject tests", func }) }) +var _ = Describe("areStringMapsEqual tests", func() { + DescribeTable("areStringMapsEqual should match reflect.DeepEqual(), except when comparing nil with empty map", func(one map[string]string, two map[string]string, expectedRes bool) { + + res := areStringMapsEqual(one, two) + Expect(res).To(Equal(expectedRes)) + }, + Entry("both nil", nil, nil, true), + Entry("one nil, one empty", map[string]string{}, nil, true), + Entry("one empty, one nil", nil, map[string]string{}, true), + Entry("both empty", map[string]string{}, map[string]string{}, true), + Entry("one empty, one has value", map[string]string{}, map[string]string{"key": "value"}, false), + Entry("one has value, one nil", map[string]string{"key": "value"}, nil, false), + Entry("equal values", map[string]string{"key": "value"}, map[string]string{"key": "value"}, true), + ) + +}) + var _ = Describe("envMerge tests", func() { DescribeTable("merges two slices of EnvVar entries", func(existing, merge, expected []corev1.EnvVar, override bool) { diff --git a/hack/run-rollouts-manager-e2e-tests.sh b/hack/run-rollouts-manager-e2e-tests.sh index 03d9304..7ac09c3 100755 --- a/hack/run-rollouts-manager-e2e-tests.sh +++ b/hack/run-rollouts-manager-e2e-tests.sh @@ -5,6 +5,76 @@ SCRIPTPATH="$( pwd -P )" +function cleanup { + echo "* Cleaning up" + killall main || true + killall go || true +} + +trap cleanup EXIT + + +# Treat undefined variables as errors +set -u + +extract_metrics_data() { + + TMP_DIR=$(mktemp -d 2>/dev/null || mktemp -d -t 'mytmpdir') + + while true; do + curl http://localhost:8080/metrics -o "$TMP_DIR/rollouts-metric-endpoint-output.txt" + if [ "$?" == "0" ]; then + break + fi + echo "* Waiting for Metrics endpoint to become available" + sleep 3 + done + + # 1) Extract REST client get/put/post metrics + + # Example: the metrics from /metric endpoint look like this: + # rest_client_requests_total{code="200",host="api.pgqqd-novoo-oqu.pa43.p3.openshiftapps.com:443",method="GET"} 42 + # rest_client_requests_total{code="200",host="api.pgqqd-novoo-oqu.pa43.p3.openshiftapps.com:443",method="PUT"} 88 + # rest_client_requests_total{code="201",host="api.pgqqd-novoo-oqu.pa43.p3.openshiftapps.com:443",method="POST"} 110 + + curl http://localhost:8080/metrics -o "$TMP_DIR/rollouts-metric-endpoint-output.txt" + + GET_REQUESTS=`cat "$TMP_DIR/rollouts-metric-endpoint-output.txt" | grep "rest_client_requests_total" | grep "GET" | rev | cut -d' ' -f1 | rev` + PUT_REQUESTS=`cat "$TMP_DIR/rollouts-metric-endpoint-output.txt" | grep "rest_client_requests_total" | grep "PUT" | grep -v "409" | rev | cut -d' ' -f1 | rev` + POST_REQUESTS=`cat "$TMP_DIR/rollouts-metric-endpoint-output.txt" | grep "rest_client_requests_total" | grep "POST" | rev | cut -d' ' -f1 | rev` + + if [[ "$GET_REQUESTS" == "" ]]; then + GET_REQUESTS=0 + fi + if [[ "$POST_REQUESTS" == "" ]]; then + POST_REQUESTS=0 + fi + if [[ "$PUT_REQUESTS" == "" ]]; then + PUT_REQUESTS=0 + fi + + + # 2) Extract the # of RolloutManager reconciles + + # Example: the metrics from /metric endpoint look like this: + # controller_runtime_reconcile_total{controller="rolloutmanager",result="error"} 0 + # controller_runtime_reconcile_total{controller="rolloutmanager",result="requeue"} 0 + # controller_runtime_reconcile_total{controller="rolloutmanager",result="requeue_after"} 0 + # controller_runtime_reconcile_total{controller="rolloutmanager",result="success"} 135 + ERROR_RECONCILES=`cat "$TMP_DIR/rollouts-metric-endpoint-output.txt" | grep "controller_runtime_reconcile_total" | grep "error" | rev | cut -d' ' -f1` + SUCCESS_RECONCILES=`cat "$TMP_DIR/rollouts-metric-endpoint-output.txt" | grep "controller_runtime_reconcile_total" | grep "success" | rev | cut -d' ' -f1` + + if [[ "$ERROR_RECONCILES" == "" ]]; then + ERROR_RECONCILES=0 + fi + + if [[ "$SUCCESS_RECONCILES" == "" ]]; then + SUCCESS_RECONCILES=0 + fi + +} + + cd "$SCRIPTPATH/.." set -o pipefail @@ -13,10 +83,21 @@ set -o pipefail kubectl get crd/servicemonitors.monitoring.coreos.com &> /dev/null retVal=$? if [ $retVal -ne 0 ]; then - # If the CRD is not found, apply the CRD YAML - kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/release-0.52/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml + # If the CRD is not found, apply the CRD YAML + kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/release-0.52/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml fi +# Before the test starts, extract initial metrics values from the /metrics endpoint of the operator +extract_metrics_data +INITIAL_GET_REQUESTS=$GET_REQUESTS +INITIAL_PUT_REQUESTS=$PUT_REQUESTS +INITIAL_POST_REQUESTS=$POST_REQUESTS +INITIAL_ERROR_RECONCILES=$ERROR_RECONCILES +INITIAL_SUCCESS_RECONCILES=$SUCCESS_RECONCILES + + +# Run the tests + set -ex if [ "$NAMESPACE_SCOPED_ARGO_ROLLOUTS" == "true" ]; then @@ -29,6 +110,63 @@ else fi +# Sanity test the behaviour of the operator during the tests: +# - We check the (prometheus) metrics coming from the 'localhost:8080/metrics' endpoint of the operator. +# - For example, if the reported # of Reconcile calls was unusually high, this might mean that the operator was stuck in a Reconcile loop +# - Or, if the number of REST client POST requests (e.g. k8s objection creation) was equal to the number of PUT request (e.g. k8s object update), this may imply we are updating .status or .spec of an object too frequently. +sanity_test_metrics_data() { + + extract_metrics_data + + set -x + + FINAL_GET_REQUESTS=$GET_REQUESTS + FINAL_PUT_REQUESTS=$PUT_REQUESTS + FINAL_POST_REQUESTS=$POST_REQUESTS + FINAL_ERROR_RECONCILES=$ERROR_RECONCILES + FINAL_SUCCESS_RECONCILES=$SUCCESS_RECONCILES + + DELTA_GET_REQUESTS=`expr $FINAL_GET_REQUESTS - $INITIAL_GET_REQUESTS` + DELTA_PUT_REQUESTS=`expr $FINAL_PUT_REQUESTS - $INITIAL_PUT_REQUESTS` + DELTA_POST_REQUESTS=`expr $FINAL_POST_REQUESTS - $INITIAL_POST_REQUESTS` + + DELTA_ERROR_RECONCILES=`expr $FINAL_ERROR_RECONCILES - $INITIAL_ERROR_RECONCILES` + DELTA_SUCCESS_RECONCILES=`expr $FINAL_SUCCESS_RECONCILES - $INITIAL_SUCCESS_RECONCILES` + + if [[ "$DELTA_POST_REQUESTS" == "0" ]]; then + echo "Unexpected number of REST client post requests: should be at least 1" + exit 1 + fi + + # The # of PUT requests should be less than 40% of the # of POST requests + # - If the number is higher, this implies we are updating the .status or .spec fields of resources more than is necessary. + PUT_REQUEST_PERCENT=`expr "$DELTA_PUT_REQUESTS"00 / $DELTA_POST_REQUESTS` + + if [[ "`expr $PUT_REQUEST_PERCENT \> 40`" == "1" ]]; then + # This value is arbitrary, and should be updated if at any point it becomes inaccurate (but first audit the test/code to make sure it is not an actual product/test issue, before increasing) + + echo "Put request was %$PUT_REQUEST_PERCENT greater than the expected value" + exit 1 + + fi + + if [[ "`expr $DELTA_ERROR_RECONCILES \> 60`" == "1" ]]; then + # This value is arbitrary, and should be updated if at any point it becomes inaccurate (but first audit the test/code to make sure it is not an actual product/test issue, before increasing) + echo "Number of Reconcile calls that returned an error '$DELTA_ERROR_RECONCILES' was greater than the expected value" + exit 1 + + fi + + if [[ "`expr $DELTA_SUCCESS_RECONCILES \> 1200`" == "1" ]]; then + # This value is arbitrary, and should be updated if at any point it becomes inaccurate (but first audit the test/code to make sure it is not an actual product/test issue, before increasing) + + echo "Number of Reconcile calls that returned success '$DELTA_SUCCESS_RECONCILES' was greater than the expected value" + exit 1 + + fi +} + +sanity_test_metrics_data set +e @@ -43,11 +181,19 @@ if [ -f "/tmp/e2e-operator-run.log" ]; then # Grep the log for unexpected errors # - Ignore errors that are expected to occur - UNEXPECTED_ERRORS_FOUND_TEXT=`cat /tmp/e2e-operator-run.log | grep "ERROR" | grep -v "because it is being terminated" | grep -v "the object has been modified; please apply your changes to the latest version and try again" | grep -v "unable to fetch" | grep -v "StorageError"` | grep -v "client rate limiter Wait returned an error: context canceled" - UNEXPECTED_ERRORS_COUNT=`echo $UNEXPECTED_ERRORS_FOUND_TEXT | grep "ERROR" | wc -l` + set +u # allow undefined vars + + UNEXPECTED_ERRORS_FOUND_TEXT=`cat /tmp/e2e-operator-run.log | grep "ERROR" | grep -v "because it is being terminated" | grep -v "the object has been modified; please apply your changes to the latest version and try again" | grep -v "unable to fetch" | grep -v "StorageError" | grep -v "client rate limiter Wait returned an error: context canceled"` + + if [ "$UNEXPECTED_ERRORS_FOUND_TEXT" != "" ]; then - if [ "$UNEXPECTED_ERRORS_COUNT" != "0" ]; then - echo "Unexpected errors found: $UNEXPECTED_ERRORS_FOUND_TEXT" - exit 1 + UNEXPECTED_ERRORS_COUNT=`echo $UNEXPECTED_ERRORS_FOUND_TEXT | grep "ERROR" | wc -l` + + if [ "$UNEXPECTED_ERRORS_COUNT" != "0" ]; then + echo "Unexpected errors found: $UNEXPECTED_ERRORS_FOUND_TEXT" + exit 1 + fi fi + + fi