From 6c27772dd6e19248c3cfcd242901e40b516eabb4 Mon Sep 17 00:00:00 2001
From: Jonathan West <jonwest@redhat.com>
Date: Mon, 15 Apr 2024 13:22:25 -0400
Subject: [PATCH 1/5] Add sanity test of reconcile metrics after e2e tests

Signed-off-by: Jonathan West <jonwest@redhat.com>
---
 hack/run-rollouts-manager-e2e-tests.sh | 130 ++++++++++++++++++++++++-
 1 file changed, 127 insertions(+), 3 deletions(-)

diff --git a/hack/run-rollouts-manager-e2e-tests.sh b/hack/run-rollouts-manager-e2e-tests.sh
index 03d9304..09e842d 100755
--- a/hack/run-rollouts-manager-e2e-tests.sh
+++ b/hack/run-rollouts-manager-e2e-tests.sh
@@ -5,6 +5,62 @@ SCRIPTPATH="$(
   pwd -P
 )"
 
+# Treat undefined variables as errors
+set -u
+
+TMP_DIR=$(mktemp -d 2>/dev/null || mktemp -d -t 'mytmpdir')
+
+
+extract_metrics_data() {
+  
+  # 1) Extract REST client get/put/post metrics
+  
+  # Example: the metrics from /metric endpoint look like this:
+  # rest_client_requests_total{code="200",host="api.pgqqd-novoo-oqu.pa43.p3.openshiftapps.com:443",method="GET"} 42
+  # rest_client_requests_total{code="200",host="api.pgqqd-novoo-oqu.pa43.p3.openshiftapps.com:443",method="PUT"} 88
+  # rest_client_requests_total{code="201",host="api.pgqqd-novoo-oqu.pa43.p3.openshiftapps.com:443",method="POST"} 110
+
+  curl http://localhost:8080/metrics -o "$TMP_DIR/rollouts-metric-endpoint-output.txt"
+  GET_REQUESTS=`cat "$TMP_DIR/rollouts-metric-endpoint-output.txt" | grep "rest_client_requests_total" | grep "GET" | rev | cut -d' ' -f1`
+  PUT_REQUESTS=`cat "$TMP_DIR/rollouts-metric-endpoint-output.txt" | grep "rest_client_requests_total" | grep "PUT" | rev | cut -d' ' -f1`
+  POST_REQUESTS=`cat "$TMP_DIR/rollouts-metric-endpoint-output.txt" | grep "rest_client_requests_total" | grep "POST" | rev | cut -d' ' -f1`
+
+
+  if [[ "$GET_REQUESTS" == "" ]]; then
+    GET_REQUESTS=0
+  fi
+  if [[ "$POST_REQUESTS" == "" ]]; then
+    POST_REQUESTS=0
+  fi
+  if [[ "$PUT_REQUESTS" == "" ]]; then
+    PUT_REQUESTS=0
+  fi
+
+
+  # 2) Extract the # of RolloutManager reconciles
+
+  # Example: the metrics from /metric endpoint look like this:
+  # controller_runtime_reconcile_total{controller="rolloutmanager",result="error"} 0
+  # controller_runtime_reconcile_total{controller="rolloutmanager",result="requeue"} 0
+  # controller_runtime_reconcile_total{controller="rolloutmanager",result="requeue_after"} 0
+  # controller_runtime_reconcile_total{controller="rolloutmanager",result="success"} 135
+  ERROR_RECONCILES=`cat "$TMP_DIR/rollouts-metric-endpoint-output.txt" | grep "controller_runtime_reconcile_total" | grep "error" | rev | cut -d' ' -f1`
+  SUCCESS_RECONCILES=`cat "$TMP_DIR/rollouts-metric-endpoint-output.txt" | grep "controller_runtime_reconcile_total" | grep "success" | rev | cut -d' ' -f1`
+
+  if [[ "$ERROR_RECONCILES" == "" ]]; then
+    ERROR_RECONCILES=0
+  fi
+
+  if [[ "$SUCCESS_RECONCILES" == "" ]]; then
+    SUCCESS_RECONCILES=0
+  fi
+
+}
+
+
+
+
+
 cd "$SCRIPTPATH/.."
 
 set -o pipefail
@@ -13,12 +69,24 @@ set -o pipefail
 kubectl get crd/servicemonitors.monitoring.coreos.com &> /dev/null
 retVal=$?
 if [ $retVal -ne 0 ]; then
-    # If the CRD is not found, apply the CRD YAML
-    kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/release-0.52/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
+  # If the CRD is not found, apply the CRD YAML
+  kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/release-0.52/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
 fi
 
+
+# Before the test starts, extract initial metrics values
+extract_metrics_data
+
+INITIAL_GET_REQUESTS=$GET_REQUESTS
+INITIAL_PUT_REQUESTS=$PUT_REQUESTS
+INITIAL_POST_REQUESTS=$POST_REQUESTS
+INITIAL_ERROR_RECONCILES=$ERROR_RECONCILES
+INITIAL_SUCCESS_RECONCILES=$SUCCESS_RECONCILES
+
+
 set -ex
 
+
 if [ "$NAMESPACE_SCOPED_ARGO_ROLLOUTS" == "true" ]; then
 
   go test -v -p=1 -timeout=30m -race -count=1 -coverprofile=coverage.out ./tests/e2e/namespace-scoped
@@ -29,7 +97,6 @@ else
 
 fi
 
-
 set +e
 
 # If the output from the E2E operator is available, then check it for errors
@@ -51,3 +118,60 @@ if [ -f "/tmp/e2e-operator-run.log" ]; then
       exit 1
   fi
 fi
+
+
+extract_metrics_data
+
+set -x
+
+FINAL_GET_REQUESTS=$GET_REQUESTS
+FINAL_PUT_REQUESTS=$PUT_REQUESTS
+FINAL_POST_REQUESTS=$POST_REQUESTS
+FINAL_ERROR_RECONCILES=$ERROR_RECONCILES
+FINAL_SUCCESS_RECONCILES=$SUCCESS_RECONCILES
+
+DELTA_GET_REQUESTS=`expr $FINAL_GET_REQUESTS - $INITIAL_GET_REQUESTS`
+DELTA_PUT_REQUESTS=`expr $FINAL_PUT_REQUESTS - $INITIAL_PUT_REQUESTS`
+DELTA_POST_REQUESTS=`expr $FINAL_POST_REQUESTS - $INITIAL_POST_REQUESTS`
+
+DELTA_ERROR_RECONCILES=`expr $FINAL_ERROR_RECONCILES - $INITIAL_ERROR_RECONCILES`
+DELTA_SUCCESS_RECONCILES=`expr $FINAL_SUCCESS_RECONCILES - $INITIAL_SUCCESS_RECONCILES`
+
+
+if [[ "$DELTA_POST_REQUESTS" == "0" ]]; then
+  echo "Unexpected number of REST client post requests: should be at least 1"
+  exit 1
+fi 
+
+# Sanity test the behaviour of the operator during the tests
+
+# The # of PUT requests should be less than 40% of the # of POST requests
+# - If the number is higher, this implies we are updating the .status or .spec fields of resources more than is necessary.
+PUT_REQUEST_PERCENT=`expr "$DELTA_PUT_REQUESTS"00 / $DELTA_POST_REQUESTS`
+
+if [[ "`expr $PUT_REQUEST_PERCENT \> 40`" == "1" ]]; then
+
+  echo "Put request %$PUT_REQUEST_PERCENT was greater than the expected value"
+  exit 1
+
+fi
+
+if [[ "`expr $DELTA_ERROR_RECONCILES \> 20`" == "1" ]]; then
+
+  echo "Number of Reconcile calls that returned an error $DELTA_ERROR_RECONCILES was greater than the expected value"
+  exit 1
+
+fi
+
+if [[ "`expr $DELTA_SUCCESS_RECONCILES \> 200`" == "1" ]]; then
+
+  echo "Number of Reconcile calls that returned success $DELTA_SUCCESS_RECONCILES was greater than the expected value"
+  exit 1
+
+fi
+
+
+
+
+# The # of reconcile calls should be within an expected 
+# - This may need to be updated as we add new E2E tests.
\ No newline at end of file

From 17c8b5f391a24c093084d8969068353b77f016e9 Mon Sep 17 00:00:00 2001
From: Jonathan West <jonwest@redhat.com>
Date: Fri, 19 Apr 2024 15:24:07 -0400
Subject: [PATCH 2/5] Refactor metric gathering portion of E2E test script

Signed-off-by: Jonathan West <jonwest@redhat.com>
---
 hack/run-rollouts-manager-e2e-tests.sh | 97 ++++++++++++--------------
 1 file changed, 46 insertions(+), 51 deletions(-)

diff --git a/hack/run-rollouts-manager-e2e-tests.sh b/hack/run-rollouts-manager-e2e-tests.sh
index 09e842d..eb5f48a 100755
--- a/hack/run-rollouts-manager-e2e-tests.sh
+++ b/hack/run-rollouts-manager-e2e-tests.sh
@@ -8,11 +8,10 @@ SCRIPTPATH="$(
 # Treat undefined variables as errors
 set -u
 
-TMP_DIR=$(mktemp -d 2>/dev/null || mktemp -d -t 'mytmpdir')
+extract_metrics_data() {
 
+  TMP_DIR=$(mktemp -d 2>/dev/null || mktemp -d -t 'mytmpdir')
 
-extract_metrics_data() {
-  
   # 1) Extract REST client get/put/post metrics
   
   # Example: the metrics from /metric endpoint look like this:
@@ -25,7 +24,6 @@ extract_metrics_data() {
   PUT_REQUESTS=`cat "$TMP_DIR/rollouts-metric-endpoint-output.txt" | grep "rest_client_requests_total" | grep "PUT" | rev | cut -d' ' -f1`
   POST_REQUESTS=`cat "$TMP_DIR/rollouts-metric-endpoint-output.txt" | grep "rest_client_requests_total" | grep "POST" | rev | cut -d' ' -f1`
 
-
   if [[ "$GET_REQUESTS" == "" ]]; then
     GET_REQUESTS=0
   fi
@@ -58,9 +56,6 @@ extract_metrics_data() {
 }
 
 
-
-
-
 cd "$SCRIPTPATH/.."
 
 set -o pipefail
@@ -73,10 +68,8 @@ if [ $retVal -ne 0 ]; then
   kubectl apply -f https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/release-0.52/example/prometheus-operator-crd/monitoring.coreos.com_servicemonitors.yaml
 fi
 
-
-# Before the test starts, extract initial metrics values
+# Before the test starts, extract initial metrics values from the /metrics endpoint of the operator
 extract_metrics_data
-
 INITIAL_GET_REQUESTS=$GET_REQUESTS
 INITIAL_PUT_REQUESTS=$PUT_REQUESTS
 INITIAL_POST_REQUESTS=$POST_REQUESTS
@@ -84,8 +77,9 @@ INITIAL_ERROR_RECONCILES=$ERROR_RECONCILES
 INITIAL_SUCCESS_RECONCILES=$SUCCESS_RECONCILES
 
 
-set -ex
+# Run the tests
 
+set -ex
 
 if [ "$NAMESPACE_SCOPED_ARGO_ROLLOUTS" == "true" ]; then
 
@@ -119,59 +113,60 @@ if [ -f "/tmp/e2e-operator-run.log" ]; then
   fi
 fi
 
+# Sanity test the behaviour of the operator during the tests:
+# - We check the (prometheus) metrics coming from the 'localhost:8080/metrics' endpoint of the operator.
+# - For example, if the reported # of Reconcile calls was unusually high, this might mean that the operator was stuck in a Reconcile loop
+# - Or, if the number of REST client POST requests (e.g. k8s objection creation) was equal to the number of PUT request (e.g. k8s object update), this may imply we are updating .status or .spec of an object too frequently.
+sanity_test_metrics_data() {
 
-extract_metrics_data
-
-set -x
+  extract_metrics_data
 
-FINAL_GET_REQUESTS=$GET_REQUESTS
-FINAL_PUT_REQUESTS=$PUT_REQUESTS
-FINAL_POST_REQUESTS=$POST_REQUESTS
-FINAL_ERROR_RECONCILES=$ERROR_RECONCILES
-FINAL_SUCCESS_RECONCILES=$SUCCESS_RECONCILES
+  set -x
 
-DELTA_GET_REQUESTS=`expr $FINAL_GET_REQUESTS - $INITIAL_GET_REQUESTS`
-DELTA_PUT_REQUESTS=`expr $FINAL_PUT_REQUESTS - $INITIAL_PUT_REQUESTS`
-DELTA_POST_REQUESTS=`expr $FINAL_POST_REQUESTS - $INITIAL_POST_REQUESTS`
+  FINAL_GET_REQUESTS=$GET_REQUESTS
+  FINAL_PUT_REQUESTS=$PUT_REQUESTS
+  FINAL_POST_REQUESTS=$POST_REQUESTS
+  FINAL_ERROR_RECONCILES=$ERROR_RECONCILES
+  FINAL_SUCCESS_RECONCILES=$SUCCESS_RECONCILES
 
-DELTA_ERROR_RECONCILES=`expr $FINAL_ERROR_RECONCILES - $INITIAL_ERROR_RECONCILES`
-DELTA_SUCCESS_RECONCILES=`expr $FINAL_SUCCESS_RECONCILES - $INITIAL_SUCCESS_RECONCILES`
+  DELTA_GET_REQUESTS=`expr $FINAL_GET_REQUESTS - $INITIAL_GET_REQUESTS`
+  DELTA_PUT_REQUESTS=`expr $FINAL_PUT_REQUESTS - $INITIAL_PUT_REQUESTS`
+  DELTA_POST_REQUESTS=`expr $FINAL_POST_REQUESTS - $INITIAL_POST_REQUESTS`
 
+  DELTA_ERROR_RECONCILES=`expr $FINAL_ERROR_RECONCILES - $INITIAL_ERROR_RECONCILES`
+  DELTA_SUCCESS_RECONCILES=`expr $FINAL_SUCCESS_RECONCILES - $INITIAL_SUCCESS_RECONCILES`
 
-if [[ "$DELTA_POST_REQUESTS" == "0" ]]; then
-  echo "Unexpected number of REST client post requests: should be at least 1"
-  exit 1
-fi 
+  if [[ "$DELTA_POST_REQUESTS" == "0" ]]; then
+    echo "Unexpected number of REST client post requests: should be at least 1"
+    exit 1
+  fi 
 
-# Sanity test the behaviour of the operator during the tests
+  # The # of PUT requests should be less than 40% of the # of POST requests
+  # - If the number is higher, this implies we are updating the .status or .spec fields of resources more than is necessary.
+  PUT_REQUEST_PERCENT=`expr "$DELTA_PUT_REQUESTS"00 / $DELTA_POST_REQUESTS`
 
-# The # of PUT requests should be less than 40% of the # of POST requests
-# - If the number is higher, this implies we are updating the .status or .spec fields of resources more than is necessary.
-PUT_REQUEST_PERCENT=`expr "$DELTA_PUT_REQUESTS"00 / $DELTA_POST_REQUESTS`
+  if [[ "`expr $PUT_REQUEST_PERCENT \> 40`" == "1" ]]; then
+    # This value is arbitrary, and should be updated if at any point it becomes inaccurate (but first audit the test/code to make sure it is not an actual product/test issue, before increasing)
 
-if [[ "`expr $PUT_REQUEST_PERCENT \> 40`" == "1" ]]; then
+    echo "Put request %$PUT_REQUEST_PERCENT was greater than the expected value"
+    exit 1
 
-  echo "Put request %$PUT_REQUEST_PERCENT was greater than the expected value"
-  exit 1
-
-fi
-
-if [[ "`expr $DELTA_ERROR_RECONCILES \> 20`" == "1" ]]; then
-
-  echo "Number of Reconcile calls that returned an error $DELTA_ERROR_RECONCILES was greater than the expected value"
-  exit 1
-
-fi
-
-if [[ "`expr $DELTA_SUCCESS_RECONCILES \> 200`" == "1" ]]; then
+  fi
 
-  echo "Number of Reconcile calls that returned success $DELTA_SUCCESS_RECONCILES was greater than the expected value"
-  exit 1
+  if [[ "`expr $DELTA_ERROR_RECONCILES \> 20`" == "1" ]]; then
+    # This value is arbitrary, and should be updated if at any point it becomes inaccurate (but first audit the test/code to make sure it is not an actual product/test issue, before increasing)
+    echo "Number of Reconcile calls that returned an error $DELTA_ERROR_RECONCILES was greater than the expected value"
+    exit 1
 
-fi
+  fi
 
+  if [[ "`expr $DELTA_SUCCESS_RECONCILES \> 200`" == "1" ]]; then
+    # This value is arbitrary, and should be updated if at any point it becomes inaccurate (but first audit the test/code to make sure it is not an actual product/test issue, before increasing)
 
+    echo "Number of Reconcile calls that returned success $DELTA_SUCCESS_RECONCILES was greater than the expected value"
+    exit 1
 
+  fi
+}
 
-# The # of reconcile calls should be within an expected 
-# - This may need to be updated as we add new E2E tests.
\ No newline at end of file
+sanity_test_metrics_data
\ No newline at end of file

From fc63a85d90f0742245a35d81ddbe50906a117126 Mon Sep 17 00:00:00 2001
From: Jonathan West <jonwest@redhat.com>
Date: Tue, 27 Aug 2024 21:24:53 -0400
Subject: [PATCH 3/5] Refactor E2E script, reduce excess reconciling of
 resources

Signed-off-by: Jonathan West <jonwest@redhat.com>
---
 controllers/configmap.go               |  6 +-
 controllers/configmap_test.go          |  3 +-
 controllers/resources.go               | 22 ++++----
 controllers/utils.go                   | 30 ++++++++++
 controllers/utils_test.go              | 61 ++++++++++++++------
 hack/run-rollouts-manager-e2e-tests.sh | 77 ++++++++++++++++----------
 6 files changed, 137 insertions(+), 62 deletions(-)

diff --git a/controllers/configmap.go b/controllers/configmap.go
index 641be57..925826c 100644
--- a/controllers/configmap.go
+++ b/controllers/configmap.go
@@ -146,12 +146,12 @@ func (r *RolloutManagerReconciler) reconcileConfigMap(ctx context.Context, cr ro
 		actualConfigMap.Data[TrafficRouterPluginConfigMapKey] = string(desiredTrafficRouterPluginString)
 		actualConfigMap.Data[MetricPluginConfigMapKey] = string(desiredMetricPluginString)
 
+		log.Info("Updating Rollouts ConfigMap due to detected difference")
+
 		// Update the ConfigMap in the cluster
 		if err := r.Client.Update(ctx, actualConfigMap); err != nil {
 			return fmt.Errorf("failed to update ConfigMap: %v", err)
 		}
-		log.Info("ConfigMap updated successfully")
-
 		// Restarting rollouts pod only if configMap is updated
 		if err := r.restartRolloutsPod(ctx, cr.Namespace); err != nil {
 			return err
@@ -183,8 +183,8 @@ func (r *RolloutManagerReconciler) restartRolloutsPod(ctx context.Context, names
 
 	for i := range podList.Items {
 		pod := podList.Items[i]
-		log.Info("Deleting Rollouts Pod", "podName", pod.Name)
 		if pod.ObjectMeta.DeletionTimestamp == nil {
+			log.Info("Deleting Rollouts Pod", "podName", pod.Name)
 			if err := r.Client.Delete(ctx, &pod); err != nil {
 				if errors.IsNotFound(err) {
 					log.Info(fmt.Sprintf("Pod %s already deleted", pod.Name))
diff --git a/controllers/configmap_test.go b/controllers/configmap_test.go
index d6bce90..70df5a5 100644
--- a/controllers/configmap_test.go
+++ b/controllers/configmap_test.go
@@ -9,7 +9,6 @@ import (
 	"sigs.k8s.io/controller-runtime/pkg/client"
 
 	appsv1 "k8s.io/api/apps/v1"
-	v1 "k8s.io/api/apps/v1"
 	corev1 "k8s.io/api/core/v1"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 )
@@ -19,7 +18,7 @@ var _ = Describe("ConfigMap Test", func() {
 	var a v1alpha1.RolloutManager
 	var r *RolloutManagerReconciler
 	var sa *corev1.ServiceAccount
-	var existingDeployment *v1.Deployment
+	var existingDeployment *appsv1.Deployment
 	const trafficrouterPluginLocation = "https://custom-traffic-plugin-location"
 	const metricPluginLocation = "https://custom-metric-plugin-location"
 
diff --git a/controllers/resources.go b/controllers/resources.go
index f50572a..b88acc3 100644
--- a/controllers/resources.go
+++ b/controllers/resources.go
@@ -46,7 +46,7 @@ func (r *RolloutManagerReconciler) reconcileRolloutsServiceAccount(ctx context.C
 	normalizedLiveServiceAccount := liveServiceAccount.DeepCopy()
 	removeUserLabelsAndAnnotations(&normalizedLiveServiceAccount.ObjectMeta, cr)
 
-	if !reflect.DeepEqual(normalizedLiveServiceAccount.Labels, expectedServiceAccount.Labels) || !reflect.DeepEqual(normalizedLiveServiceAccount.Annotations, expectedServiceAccount.Annotations) {
+	if !areStringMapsEqual(normalizedLiveServiceAccount.Labels, expectedServiceAccount.Labels) || !areStringMapsEqual(normalizedLiveServiceAccount.Annotations, expectedServiceAccount.Annotations) {
 		updateNeeded = true
 		log.Info(fmt.Sprintf("Labels/Annotations of ServiceAccount %s do not match the expected state, hence updating it", liveServiceAccount.Name))
 
@@ -111,7 +111,7 @@ func (r *RolloutManagerReconciler) reconcileRolloutsRole(ctx context.Context, cr
 
 	removeUserLabelsAndAnnotations(&normalizedLiveRole.ObjectMeta, cr)
 
-	if !reflect.DeepEqual(normalizedLiveRole.Labels, expectedRole.Labels) || !reflect.DeepEqual(normalizedLiveRole.Annotations, expectedRole.Annotations) {
+	if !areStringMapsEqual(normalizedLiveRole.Labels, expectedRole.Labels) || !areStringMapsEqual(normalizedLiveRole.Annotations, expectedRole.Annotations) {
 		updateNeeded = true
 		log.Info(fmt.Sprintf("Labels/Annotations of Role %s do not match the expected state, hence updating it", liveRole.Name))
 
@@ -165,7 +165,7 @@ func (r *RolloutManagerReconciler) reconcileRolloutsClusterRole(ctx context.Cont
 	normalizedLiveClusterRole := liveClusterRole.DeepCopy()
 	removeUserLabelsAndAnnotations(&normalizedLiveClusterRole.ObjectMeta, cr)
 
-	if !reflect.DeepEqual(normalizedLiveClusterRole.Labels, expectedClusterRole.Labels) || !reflect.DeepEqual(normalizedLiveClusterRole.Annotations, expectedClusterRole.Annotations) {
+	if !areStringMapsEqual(normalizedLiveClusterRole.Labels, expectedClusterRole.Labels) || !areStringMapsEqual(normalizedLiveClusterRole.Annotations, expectedClusterRole.Annotations) {
 		updateNeeded = true
 		log.Info(fmt.Sprintf("Labels/Annotations of Role %s do not match the expected state, hence updating it", liveClusterRole.Name))
 
@@ -248,7 +248,7 @@ func (r *RolloutManagerReconciler) reconcileRolloutsRoleBinding(ctx context.Cont
 
 	normalizedLiveRoleBinding := liveRoleBinding.DeepCopy()
 	removeUserLabelsAndAnnotations(&normalizedLiveRoleBinding.ObjectMeta, cr)
-	if !reflect.DeepEqual(normalizedLiveRoleBinding.Labels, expectedRoleBinding.Labels) || !reflect.DeepEqual(normalizedLiveRoleBinding.Annotations, expectedRoleBinding.Annotations) {
+	if !areStringMapsEqual(normalizedLiveRoleBinding.Labels, expectedRoleBinding.Labels) || !areStringMapsEqual(normalizedLiveRoleBinding.Annotations, expectedRoleBinding.Annotations) {
 		updateNeeded = true
 		log.Info(fmt.Sprintf("Labels/Annotations of RoleBinding %s do not match the expected state, hence updating it", liveRoleBinding.Name))
 
@@ -327,7 +327,7 @@ func (r *RolloutManagerReconciler) reconcileRolloutsClusterRoleBinding(ctx conte
 
 	normalizedLiveClusterRoleBinding := liveClusterRoleBinding.DeepCopy()
 	removeUserLabelsAndAnnotations(&normalizedLiveClusterRoleBinding.ObjectMeta, cr)
-	if !reflect.DeepEqual(normalizedLiveClusterRoleBinding.Labels, expectedClusterRoleBinding.Labels) || !reflect.DeepEqual(normalizedLiveClusterRoleBinding.Annotations, expectedClusterRoleBinding.Annotations) {
+	if !areStringMapsEqual(normalizedLiveClusterRoleBinding.Labels, expectedClusterRoleBinding.Labels) || !areStringMapsEqual(normalizedLiveClusterRoleBinding.Annotations, expectedClusterRoleBinding.Annotations) {
 		updateNeeded = true
 		log.Info(fmt.Sprintf("Labels/Annotations of ClusterRoleBinding %s do not match the expected state, hence updating it", liveClusterRoleBinding.Name))
 
@@ -460,7 +460,7 @@ func (r *RolloutManagerReconciler) reconcileRolloutsAggregateToAdminClusterRole(
 
 	normalizedLiveClusterRole := liveClusterRole.DeepCopy()
 	removeUserLabelsAndAnnotations(&normalizedLiveClusterRole.ObjectMeta, cr)
-	if !reflect.DeepEqual(normalizedLiveClusterRole.Labels, expectedClusterRole.Labels) || !reflect.DeepEqual(normalizedLiveClusterRole.Annotations, expectedClusterRole.Annotations) {
+	if !areStringMapsEqual(normalizedLiveClusterRole.Labels, expectedClusterRole.Labels) || !areStringMapsEqual(normalizedLiveClusterRole.Annotations, expectedClusterRole.Annotations) {
 		updateNeeded = true
 		log.Info(fmt.Sprintf("Labels/Annotations of aggregated ClusterRole %s do not match the expected state, hence updating it", liveClusterRole.Name))
 
@@ -512,7 +512,7 @@ func (r *RolloutManagerReconciler) reconcileRolloutsAggregateToEditClusterRole(c
 
 	normalizedLiveClusterRole := liveClusterRole.DeepCopy()
 	removeUserLabelsAndAnnotations(&normalizedLiveClusterRole.ObjectMeta, cr)
-	if !reflect.DeepEqual(normalizedLiveClusterRole.Labels, expectedClusterRole.Labels) || !reflect.DeepEqual(normalizedLiveClusterRole.Annotations, expectedClusterRole.Annotations) {
+	if !areStringMapsEqual(normalizedLiveClusterRole.Labels, expectedClusterRole.Labels) || !areStringMapsEqual(normalizedLiveClusterRole.Annotations, expectedClusterRole.Annotations) {
 		updateNeeded = true
 		log.Info(fmt.Sprintf("Labels/Annotations of aggregated ClusterRole %s do not match the expected state, hence updating it", liveClusterRole.Name))
 
@@ -564,7 +564,8 @@ func (r *RolloutManagerReconciler) reconcileRolloutsAggregateToViewClusterRole(c
 
 	normalizedLiveClusterRole := liveClusterRole.DeepCopy()
 	removeUserLabelsAndAnnotations(&normalizedLiveClusterRole.ObjectMeta, cr)
-	if !reflect.DeepEqual(normalizedLiveClusterRole.Labels, expectedClusterRole.Labels) || !reflect.DeepEqual(normalizedLiveClusterRole.Annotations, expectedClusterRole.Annotations) {
+	if !areStringMapsEqual(normalizedLiveClusterRole.Labels, expectedClusterRole.Labels) || !areStringMapsEqual(normalizedLiveClusterRole.Annotations, expectedClusterRole.Annotations) {
+
 		updateNeeded = true
 		log.Info(fmt.Sprintf("Labels/Annotations of aggregated ClusterRole %s do not match the expected state, hence updating it", liveClusterRole.Name))
 
@@ -702,7 +703,8 @@ func (r *RolloutManagerReconciler) reconcileRolloutsMetricsService(ctx context.C
 
 	normalizedLiveService := liveService.DeepCopy()
 	removeUserLabelsAndAnnotations(&normalizedLiveService.ObjectMeta, cr)
-	if !reflect.DeepEqual(normalizedLiveService.Labels, expectedSvc.Labels) || !reflect.DeepEqual(normalizedLiveService.Annotations, expectedSvc.Annotations) {
+	if !areStringMapsEqual(normalizedLiveService.Labels, expectedSvc.Labels) || !areStringMapsEqual(normalizedLiveService.Annotations, expectedSvc.Annotations) {
+
 		updateNeeded = true
 		log.Info(fmt.Sprintf("Labels/Annotations of metrics Service %s do not match the expected state, hence updating it", liveService.Name))
 
@@ -778,7 +780,7 @@ func (r *RolloutManagerReconciler) reconcileRolloutsSecrets(ctx context.Context,
 	normalizedLiveSecret := liveSecret.DeepCopy()
 	removeUserLabelsAndAnnotations(&normalizedLiveSecret.ObjectMeta, cr)
 
-	if !reflect.DeepEqual(normalizedLiveSecret.Labels, expectedSecret.Labels) || !reflect.DeepEqual(normalizedLiveSecret.Annotations, expectedSecret.Annotations) {
+	if !areStringMapsEqual(normalizedLiveSecret.Labels, expectedSecret.Labels) || !areStringMapsEqual(normalizedLiveSecret.Annotations, expectedSecret.Annotations) {
 		updateNeeded = true
 		log.Info(fmt.Sprintf("Labels/Annotations of Secret %s do not match the expected state, hence updating it", liveSecret.Name))
 
diff --git a/controllers/utils.go b/controllers/utils.go
index 4a08a8a..205d013 100644
--- a/controllers/utils.go
+++ b/controllers/utils.go
@@ -5,6 +5,7 @@ import (
 	"errors"
 	"fmt"
 	"os"
+	"reflect"
 	"sort"
 	"strings"
 
@@ -86,6 +87,30 @@ func appendStringMap(src map[string]string, add map[string]string) map[string]st
 	return res
 }
 
+// In go, reflect.DeepEqual(a, b), on maps a and b, will return false even when len(a) == 0 and len(b) == 0. This is due to a nil map being considered different from an empty map.
+// This function handles this case better: if maps both have a length of 0, we consider them always equal.
+func areStringMapsEqual(one map[string]string, two map[string]string) bool {
+
+	isOneEmpty := false
+
+	isTwoEmpty := false
+
+	if len(one) == 0 {
+		isOneEmpty = true
+	}
+
+	if len(two) == 0 {
+		isTwoEmpty = true
+	}
+
+	if isOneEmpty && isTwoEmpty {
+		return true
+	}
+
+	return reflect.DeepEqual(one, two)
+
+}
+
 // combineStringMaps will combine multiple maps: maps defined earlier in the 'maps' slice may have their values overriden by maps defined later in the 'maps' slice.
 func combineStringMaps(maps ...map[string]string) map[string]string {
 
@@ -448,6 +473,11 @@ func removeUserLabelsAndAnnotations(obj *metav1.ObjectMeta, cr rolloutsmanagerv1
 
 	for objectLabelKey := range obj.Labels {
 
+		// We add this label to the aggregated cluster roles we generate, so we should not process it as a user label.
+		if strings.Contains(objectLabelKey, "rbac.authorization.k8s.io/") && strings.Contains(objectLabelKey, "aggregate") {
+			continue
+		}
+
 		existsInDefault := false
 
 		for defaultLabelKey := range defaultLabelsAndAnnotations.Labels {
diff --git a/controllers/utils_test.go b/controllers/utils_test.go
index 225906f..1412ad4 100644
--- a/controllers/utils_test.go
+++ b/controllers/utils_test.go
@@ -452,7 +452,6 @@ var _ = Describe("removeUserLabelsAndAnnotations tests", func() {
 			}
 
 			Expect(k8sClient.Create(ctx, &cr)).To(Succeed())
-			setRolloutsLabelsAndAnnotations(&obj)
 
 			removeUserLabelsAndAnnotations(&obj, cr)
 
@@ -460,34 +459,29 @@ var _ = Describe("removeUserLabelsAndAnnotations tests", func() {
 			Expect(obj.Annotations).To(Equal(expectedAnnotations))
 		},
 		Entry("when no user-defined labels or annotations exist",
-			map[string]string{}, map[string]string{},
-			map[string]string{"app.kubernetes.io/name": DefaultArgoRolloutsResourceName,
-				"app.kubernetes.io/part-of":   DefaultArgoRolloutsResourceName,
-				"app.kubernetes.io/component": DefaultArgoRolloutsResourceName},
+			map[string]string{},
+			map[string]string{},
+			map[string]string{},
 			map[string]string{},
 		),
 		Entry("when user-defined labels and annotations are present",
-			map[string]string{"user-label": "value"}, map[string]string{"user-annotation": "value"},
-			map[string]string{"app.kubernetes.io/name": DefaultArgoRolloutsResourceName,
-				"app.kubernetes.io/part-of":   DefaultArgoRolloutsResourceName,
-				"app.kubernetes.io/component": DefaultArgoRolloutsResourceName},
+			map[string]string{"user-label": "value"},
+			map[string]string{"user-annotation": "value"},
+			map[string]string{},
 			map[string]string{},
 		),
 		Entry("when user-defined labels are present and annotations are empty",
-			map[string]string{"user-label": "value"}, map[string]string{},
-			map[string]string{"app.kubernetes.io/name": DefaultArgoRolloutsResourceName,
-				"app.kubernetes.io/part-of":   DefaultArgoRolloutsResourceName,
-				"app.kubernetes.io/component": DefaultArgoRolloutsResourceName},
+			map[string]string{"user-label": "value"},
+			map[string]string{},
+			map[string]string{},
 			map[string]string{},
 		),
 		Entry("when user-defined labels are empty and annotations are present",
-			map[string]string{}, map[string]string{"user-annotation": "value"},
-			map[string]string{"app.kubernetes.io/name": DefaultArgoRolloutsResourceName,
-				"app.kubernetes.io/part-of":   DefaultArgoRolloutsResourceName,
-				"app.kubernetes.io/component": DefaultArgoRolloutsResourceName},
+			map[string]string{},
+			map[string]string{"user-annotation": "value"},
+			map[string]string{},
 			map[string]string{},
 		),
-
 		Entry("when both user and non-user-defined labels are present, and annotations are empty",
 			map[string]string{"user-label": "value",
 				"app.kubernetes.io/name":      DefaultArgoRolloutsResourceName,
@@ -499,6 +493,20 @@ var _ = Describe("removeUserLabelsAndAnnotations tests", func() {
 				"app.kubernetes.io/component": DefaultArgoRolloutsResourceName},
 			map[string]string{},
 		),
+		Entry("rbac aggregate-to- labels should not be removed, as we add these to ClusterRoles",
+			map[string]string{
+				"rbac.authorization.k8s.io/aggregate-to-admin": "true",
+				"app.kubernetes.io/name":                       DefaultArgoRolloutsResourceName,
+				"app.kubernetes.io/part-of":                    DefaultArgoRolloutsResourceName,
+				"app.kubernetes.io/component":                  DefaultArgoRolloutsResourceName},
+			map[string]string{},
+			map[string]string{
+				"rbac.authorization.k8s.io/aggregate-to-admin": "true",
+				"app.kubernetes.io/name":                       DefaultArgoRolloutsResourceName,
+				"app.kubernetes.io/part-of":                    DefaultArgoRolloutsResourceName,
+				"app.kubernetes.io/component":                  DefaultArgoRolloutsResourceName},
+			map[string]string{},
+		),
 	)
 })
 
@@ -683,6 +691,23 @@ var _ = Describe("setAdditionalRolloutsLabelsAndAnnotationsToObject tests", func
 	})
 })
 
+var _ = Describe("areStringMapsEqual tests", func() {
+	DescribeTable("areStringMapsEqual should match reflect.DeepEqual(), except when comparing nil with empty map", func(one map[string]string, two map[string]string, expectedRes bool) {
+
+		res := areStringMapsEqual(one, two)
+		Expect(res).To(Equal(expectedRes))
+	},
+		Entry("both nil", nil, nil, true),
+		Entry("one nil, one empty", map[string]string{}, nil, true),
+		Entry("one empty, one nil", nil, map[string]string{}, true),
+		Entry("both empty", map[string]string{}, map[string]string{}, true),
+		Entry("one empty, one has value", map[string]string{}, map[string]string{"key": "value"}, false),
+		Entry("one has value, one nil", map[string]string{"key": "value"}, nil, false),
+		Entry("equal values", map[string]string{"key": "value"}, map[string]string{"key": "value"}, true),
+	)
+
+})
+
 var _ = Describe("envMerge tests", func() {
 	DescribeTable("merges two slices of EnvVar entries",
 		func(existing, merge, expected []corev1.EnvVar, override bool) {
diff --git a/hack/run-rollouts-manager-e2e-tests.sh b/hack/run-rollouts-manager-e2e-tests.sh
index eb5f48a..29f6845 100755
--- a/hack/run-rollouts-manager-e2e-tests.sh
+++ b/hack/run-rollouts-manager-e2e-tests.sh
@@ -5,6 +5,15 @@ SCRIPTPATH="$(
   pwd -P
 )"
 
+function cleanup {
+  echo "* Cleaning up"
+  killall main || true
+  killall go || true
+}
+
+trap cleanup EXIT
+
+
 # Treat undefined variables as errors
 set -u
 
@@ -12,6 +21,15 @@ extract_metrics_data() {
 
   TMP_DIR=$(mktemp -d 2>/dev/null || mktemp -d -t 'mytmpdir')
 
+  while true; do
+    curl http://localhost:8080/metrics -o "$TMP_DIR/rollouts-metric-endpoint-output.txt"
+    if [ "$?" == "0" ]; then
+        break
+    fi
+    echo "* Waiting for Metrics endpoint to become available"
+    sleep 3
+  done
+
   # 1) Extract REST client get/put/post metrics
   
   # Example: the metrics from /metric endpoint look like this:
@@ -20,9 +38,10 @@ extract_metrics_data() {
   # rest_client_requests_total{code="201",host="api.pgqqd-novoo-oqu.pa43.p3.openshiftapps.com:443",method="POST"} 110
 
   curl http://localhost:8080/metrics -o "$TMP_DIR/rollouts-metric-endpoint-output.txt"
-  GET_REQUESTS=`cat "$TMP_DIR/rollouts-metric-endpoint-output.txt" | grep "rest_client_requests_total" | grep "GET" | rev | cut -d' ' -f1`
-  PUT_REQUESTS=`cat "$TMP_DIR/rollouts-metric-endpoint-output.txt" | grep "rest_client_requests_total" | grep "PUT" | rev | cut -d' ' -f1`
-  POST_REQUESTS=`cat "$TMP_DIR/rollouts-metric-endpoint-output.txt" | grep "rest_client_requests_total" | grep "POST" | rev | cut -d' ' -f1`
+
+  GET_REQUESTS=`cat "$TMP_DIR/rollouts-metric-endpoint-output.txt" | grep "rest_client_requests_total" | grep "GET" | rev | cut -d' ' -f1 | rev`
+  PUT_REQUESTS=`cat "$TMP_DIR/rollouts-metric-endpoint-output.txt" | grep "rest_client_requests_total" | grep "PUT" | grep -v "409" | rev | cut -d' ' -f1 | rev`
+  POST_REQUESTS=`cat "$TMP_DIR/rollouts-metric-endpoint-output.txt" | grep "rest_client_requests_total" | grep "POST" | rev | cut -d' ' -f1 | rev`
 
   if [[ "$GET_REQUESTS" == "" ]]; then
     GET_REQUESTS=0
@@ -91,28 +110,6 @@ else
 
 fi
 
-set +e
-
-# If the output from the E2E operator is available, then check it for errors
-if [ -f "/tmp/e2e-operator-run.log" ]; then
-
-  # Wait for the controller to flush to the file, before killing the controller
-  sleep 10
-  killall main
-  sleep 5
-
-  # Grep the log for unexpected errors
-  # - Ignore errors that are expected to occur
-
-  UNEXPECTED_ERRORS_FOUND_TEXT=`cat /tmp/e2e-operator-run.log | grep "ERROR" | grep -v "because it is being terminated" | grep -v "the object has been modified; please apply your changes to the latest version and try again" | grep -v "unable to fetch" | grep -v "StorageError"` | grep -v "client rate limiter Wait returned an error: context canceled"
-  UNEXPECTED_ERRORS_COUNT=`echo $UNEXPECTED_ERRORS_FOUND_TEXT | grep "ERROR" | wc -l`
-  
-  if [ "$UNEXPECTED_ERRORS_COUNT" != "0" ]; then
-      echo "Unexpected errors found: $UNEXPECTED_ERRORS_FOUND_TEXT"
-      exit 1
-  fi
-fi
-
 # Sanity test the behaviour of the operator during the tests:
 # - We check the (prometheus) metrics coming from the 'localhost:8080/metrics' endpoint of the operator.
 # - For example, if the reported # of Reconcile calls was unusually high, this might mean that the operator was stuck in a Reconcile loop
@@ -148,19 +145,19 @@ sanity_test_metrics_data() {
   if [[ "`expr $PUT_REQUEST_PERCENT \> 40`" == "1" ]]; then
     # This value is arbitrary, and should be updated if at any point it becomes inaccurate (but first audit the test/code to make sure it is not an actual product/test issue, before increasing)
 
-    echo "Put request %$PUT_REQUEST_PERCENT was greater than the expected value"
+    echo "Put request was %$PUT_REQUEST_PERCENT greater than the expected value"
     exit 1
 
   fi
 
-  if [[ "`expr $DELTA_ERROR_RECONCILES \> 20`" == "1" ]]; then
+  if [[ "`expr $DELTA_ERROR_RECONCILES \> 30`" == "1" ]]; then
     # This value is arbitrary, and should be updated if at any point it becomes inaccurate (but first audit the test/code to make sure it is not an actual product/test issue, before increasing)
     echo "Number of Reconcile calls that returned an error $DELTA_ERROR_RECONCILES was greater than the expected value"
     exit 1
 
   fi
 
-  if [[ "`expr $DELTA_SUCCESS_RECONCILES \> 200`" == "1" ]]; then
+  if [[ "`expr $DELTA_SUCCESS_RECONCILES \> 250`" == "1" ]]; then
     # This value is arbitrary, and should be updated if at any point it becomes inaccurate (but first audit the test/code to make sure it is not an actual product/test issue, before increasing)
 
     echo "Number of Reconcile calls that returned success $DELTA_SUCCESS_RECONCILES was greater than the expected value"
@@ -169,4 +166,26 @@ sanity_test_metrics_data() {
   fi
 }
 
-sanity_test_metrics_data
\ No newline at end of file
+sanity_test_metrics_data
+
+set +e
+
+# If the output from the E2E operator is available, then check it for errors
+if [ -f "/tmp/e2e-operator-run.log" ]; then
+
+  # Wait for the controller to flush to the file, before killing the controller
+  sleep 10
+  killall main
+  sleep 5
+
+  # Grep the log for unexpected errors
+  # - Ignore errors that are expected to occur
+
+  UNEXPECTED_ERRORS_FOUND_TEXT=`cat /tmp/e2e-operator-run.log | grep "ERROR" | grep -v "because it is being terminated" | grep -v "the object has been modified; please apply your changes to the latest version and try again" | grep -v "unable to fetch" | grep -v "StorageError"` | grep -v "client rate limiter Wait returned an error: context canceled"
+  UNEXPECTED_ERRORS_COUNT=`echo $UNEXPECTED_ERRORS_FOUND_TEXT | grep "ERROR" | wc -l`
+  
+  if [ "$UNEXPECTED_ERRORS_COUNT" != "0" ]; then
+      echo "Unexpected errors found: $UNEXPECTED_ERRORS_FOUND_TEXT"
+      exit 1
+  fi
+fi

From f399ca63a772860b09e126db1d572a89084a21a9 Mon Sep 17 00:00:00 2001
From: Jonathan West <jonwest@redhat.com>
Date: Tue, 1 Oct 2024 14:34:18 -0400
Subject: [PATCH 4/5] Add timeout, fix Rollouts E2E script and Makefile bugs

Signed-off-by: Jonathan West <jonwest@redhat.com>
---
 .github/workflows/e2e_tests.yml          |  1 +
 .github/workflows/rollouts_e2e_tests.yml |  1 +
 Makefile                                 |  4 ++--
 hack/run-rollouts-manager-e2e-tests.sh   | 20 ++++++++++++++------
 4 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/e2e_tests.yml b/.github/workflows/e2e_tests.yml
index 3313bb3..b369d8c 100644
--- a/.github/workflows/e2e_tests.yml
+++ b/.github/workflows/e2e_tests.yml
@@ -12,6 +12,7 @@ jobs:
 
   test-e2e:
     name: Run end-to-end tests
+    timeout-minutes: 90
     runs-on: ubuntu-latest
     strategy:
       matrix:
diff --git a/.github/workflows/rollouts_e2e_tests.yml b/.github/workflows/rollouts_e2e_tests.yml
index da552a6..c71200b 100644
--- a/.github/workflows/rollouts_e2e_tests.yml
+++ b/.github/workflows/rollouts_e2e_tests.yml
@@ -13,6 +13,7 @@ jobs:
   test-e2e:
     name: Run end-to-end tests from upstream Argo Rollouts repo
     runs-on: ubuntu-latest
+    timeout-minutes: 90
     strategy:
       matrix:
         k3s-version: [ v1.28.2 ]
diff --git a/Makefile b/Makefile
index c4e5cf3..5afc515 100644
--- a/Makefile
+++ b/Makefile
@@ -136,7 +136,7 @@ start-e2e-namespace-scoped-bg: ## Start the operator, to run the e2e tests
 
 .PHONY: start-e2e-cluster-scoped-bg 
 start-e2e-cluster-scoped-bg: ## Start the operator, to run the e2e tests
-	RUN_IN_BACKGROUND=true hack/start-rollouts-manager-for-e2e-tests.sh
+	RUN_IN_BACKGROUND=true  NAMESPACE_SCOPED_ARGO_ROLLOUTS=false hack/start-rollouts-manager-for-e2e-tests.sh
 
 
 .PHONY: test-e2e-namespace-scoped
@@ -145,7 +145,7 @@ test-e2e-namespace-scoped: ## Run operator e2e tests
 
 .PHONY: test-e2e-cluster-scoped
 test-e2e-cluster-scoped: ## Run operator e2e tests
-	hack/run-rollouts-manager-e2e-tests.sh
+	NAMESPACE_SCOPED_ARGO_ROLLOUTS=false hack/run-rollouts-manager-e2e-tests.sh
 
 .PHONY: start-test-e2e-all
 start-test-e2e-all: start-e2e-namespace-scoped-bg test-e2e-namespace-scoped start-e2e-cluster-scoped-bg test-e2e-cluster-scoped
diff --git a/hack/run-rollouts-manager-e2e-tests.sh b/hack/run-rollouts-manager-e2e-tests.sh
index 29f6845..154323d 100755
--- a/hack/run-rollouts-manager-e2e-tests.sh
+++ b/hack/run-rollouts-manager-e2e-tests.sh
@@ -157,7 +157,7 @@ sanity_test_metrics_data() {
 
   fi
 
-  if [[ "`expr $DELTA_SUCCESS_RECONCILES \> 250`" == "1" ]]; then
+  if [[ "`expr $DELTA_SUCCESS_RECONCILES \> 1200`" == "1" ]]; then
     # This value is arbitrary, and should be updated if at any point it becomes inaccurate (but first audit the test/code to make sure it is not an actual product/test issue, before increasing)
 
     echo "Number of Reconcile calls that returned success $DELTA_SUCCESS_RECONCILES was greater than the expected value"
@@ -181,11 +181,19 @@ if [ -f "/tmp/e2e-operator-run.log" ]; then
   # Grep the log for unexpected errors
   # - Ignore errors that are expected to occur
 
-  UNEXPECTED_ERRORS_FOUND_TEXT=`cat /tmp/e2e-operator-run.log | grep "ERROR" | grep -v "because it is being terminated" | grep -v "the object has been modified; please apply your changes to the latest version and try again" | grep -v "unable to fetch" | grep -v "StorageError"` | grep -v "client rate limiter Wait returned an error: context canceled"
-  UNEXPECTED_ERRORS_COUNT=`echo $UNEXPECTED_ERRORS_FOUND_TEXT | grep "ERROR" | wc -l`
+  set +u # allow undefined vars
+
+  UNEXPECTED_ERRORS_FOUND_TEXT=`cat /tmp/e2e-operator-run.log | grep "ERROR" | grep -v "because it is being terminated" | grep -v "the object has been modified; please apply your changes to the latest version and try again" | grep -v "unable to fetch" | grep -v "StorageError" | grep -v "client rate limiter Wait returned an error: context canceled"`
+
+  if [ "$UNEXPECTED_ERRORS_FOUND_TEXT" != "" ]; then
   
-  if [ "$UNEXPECTED_ERRORS_COUNT" != "0" ]; then
-      echo "Unexpected errors found: $UNEXPECTED_ERRORS_FOUND_TEXT"
-      exit 1
+    UNEXPECTED_ERRORS_COUNT=`echo $UNEXPECTED_ERRORS_FOUND_TEXT | grep "ERROR" | wc -l`
+    
+    if [ "$UNEXPECTED_ERRORS_COUNT" != "0" ]; then
+        echo "Unexpected errors found: $UNEXPECTED_ERRORS_FOUND_TEXT"
+        exit 1
+    fi  
   fi
+
+
 fi

From bf50c4124df38832a6829b9ebb1c6368ea4bf16a Mon Sep 17 00:00:00 2001
From: Jonathan West <jonwest@redhat.com>
Date: Wed, 9 Oct 2024 12:20:52 -0400
Subject: [PATCH 5/5] Increase number of expected reconciles

Signed-off-by: Jonathan West <jonwest@redhat.com>
---
 hack/run-rollouts-manager-e2e-tests.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/hack/run-rollouts-manager-e2e-tests.sh b/hack/run-rollouts-manager-e2e-tests.sh
index 154323d..7ac09c3 100755
--- a/hack/run-rollouts-manager-e2e-tests.sh
+++ b/hack/run-rollouts-manager-e2e-tests.sh
@@ -150,9 +150,9 @@ sanity_test_metrics_data() {
 
   fi
 
-  if [[ "`expr $DELTA_ERROR_RECONCILES \> 30`" == "1" ]]; then
+  if [[ "`expr $DELTA_ERROR_RECONCILES \> 60`" == "1" ]]; then
     # This value is arbitrary, and should be updated if at any point it becomes inaccurate (but first audit the test/code to make sure it is not an actual product/test issue, before increasing)
-    echo "Number of Reconcile calls that returned an error $DELTA_ERROR_RECONCILES was greater than the expected value"
+    echo "Number of Reconcile calls that returned an error '$DELTA_ERROR_RECONCILES' was greater than the expected value"
     exit 1
 
   fi
@@ -160,7 +160,7 @@ sanity_test_metrics_data() {
   if [[ "`expr $DELTA_SUCCESS_RECONCILES \> 1200`" == "1" ]]; then
     # This value is arbitrary, and should be updated if at any point it becomes inaccurate (but first audit the test/code to make sure it is not an actual product/test issue, before increasing)
 
-    echo "Number of Reconcile calls that returned success $DELTA_SUCCESS_RECONCILES was greater than the expected value"
+    echo "Number of Reconcile calls that returned success '$DELTA_SUCCESS_RECONCILES' was greater than the expected value"
     exit 1
 
   fi