Skip to content

Commit

Permalink
fixing prometheusRule_controller logic (#134)
Browse files Browse the repository at this point in the history
* fixing prometheusRule_controller logic

* fixing tests
  • Loading branch information
OrNovo authored Aug 16, 2024
1 parent ff9d00c commit f2adae7
Show file tree
Hide file tree
Showing 10 changed files with 91 additions and 51 deletions.
106 changes: 73 additions & 33 deletions controllers/prometheusrule_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,63 +124,103 @@ func (r *PrometheusRuleReconciler) convertPrometheusRuleRecordingRuleToCxRecordi
}

func (r *PrometheusRuleReconciler) convertPrometheusRuleAlertToCxAlert(ctx context.Context, prometheusRule *prometheus.PrometheusRule) error {
prometheusRuleAlerts := make(map[string]bool)
// A single PrometheusRule can have multiple alerts with the same name, while the Alert CRD from coralogix can only manage one alert.
// alertMap is used to map an alert name with potentially multiple alerts from the promrule CRD. For example:
//
// A prometheusRule with the following rules:
// rules:
// - alert: Example
// expr: metric > 10
// - alert: Example
// expr: metric > 20
//
// Would be mapped into:
// map[string][]prometheus.Rule{
// "Example": []prometheus.Rule{
// {
// Alert: Example,
// Expr: "metric > 10"
// },
// {
// Alert: Example,
// Expr: "metric > 100"
// },
// },
// }
//
// To later on generate coralogix Alert CRDs using the alert name followed by it's index on the array, making sure we don't clash names.
alertMap := make(map[string][]prometheus.Rule)
var a string
for _, group := range prometheusRule.Spec.Groups {
for _, rule := range group.Rules {
if rule.Alert == "" {
continue
if rule.Alert != "" {
a = strings.ToLower(rule.Alert)
if _, ok := alertMap[a]; !ok {
alertMap[a] = []prometheus.Rule{rule}
continue
}
alertMap[a] = append(alertMap[a], rule)
}
alert := &coralogixv1alpha1.Alert{
ObjectMeta: metav1.ObjectMeta{
Namespace: prometheusRule.Namespace,
Name: fmt.Sprintf("%s-%s", prometheusRule.Name, strings.ToLower(rule.Alert)),
Labels: map[string]string{
"app.kubernetes.io/managed-by": prometheusRule.Name,
},
OwnerReferences: []metav1.OwnerReference{
}
}

alertsToKeep := make(map[string]bool)
for alertName, rules := range alertMap {
for i, rule := range rules {
alertCRD := &coralogixv1alpha1.Alert{}
alertCRDName := fmt.Sprintf("%s-%s-%d", prometheusRule.Name, alertName, i)
alertsToKeep[alertCRDName] = true
if err := r.Client.Get(ctx, client.ObjectKey{Namespace: prometheusRule.Namespace, Name: alertCRDName}, alertCRD); err != nil {
if errors.IsNotFound(err) {
alertCRD.Spec = prometheusRuleToCoralogixAlertSpec(rule)
alertCRD.Namespace = prometheusRule.Namespace
alertCRD.Name = alertCRDName
alertCRD.OwnerReferences = []metav1.OwnerReference{
{
APIVersion: prometheusRule.APIVersion,
Kind: prometheusRule.Kind,
Name: prometheusRule.Name,
UID: prometheusRule.UID,
},
},
},
Spec: prometheusRuleToCoralogixAlertSpec(rule),
}

prometheusRuleAlerts[alert.Name] = true

if err := r.Client.Get(ctx, client.ObjectKeyFromObject(alert), alert); err != nil {
if errors.IsNotFound(err) {
if err = r.Create(ctx, alert); err != nil {
return fmt.Errorf("received an error while trying to create Alert CRD from PrometheusRule: %w", err)
}
alertCRD.Labels = map[string]string{"app.kubernetes.io/managed-by": prometheusRule.Name}
if err = r.Create(ctx, alertCRD); err != nil {
return fmt.Errorf("received an error while trying to create Alert CRD: %w", err)
}
continue
} else {
return fmt.Errorf("received an error while trying to get Alert CRD: %w", err)
}
return err
}

if err := r.Client.Update(ctx, alert); err != nil {
return fmt.Errorf("received an error while trying to update Alert CRD from PrometheusRule: %w", err)
//Converting the PrometheusRule to the desired Alert.
alertCRD.Spec = prometheusRuleToCoralogixAlertSpec(rule)
alertCRD.OwnerReferences = []metav1.OwnerReference{
{
APIVersion: prometheusRule.APIVersion,
Kind: prometheusRule.Kind,
Name: prometheusRule.Name,
UID: prometheusRule.UID,
},
}
if err := r.Update(ctx, alertCRD); err != nil {
return fmt.Errorf("received an error while trying to update Alert CRD: %w", err)
}
}
}

var alerts coralogixv1alpha1.AlertList
if err := r.List(ctx, &alerts, client.InNamespace(prometheusRule.Namespace), client.MatchingLabels{"app.kubernetes.io/managed-by": prometheusRule.Name}); err != nil {
return fmt.Errorf("received an error while trying to list child Alerts: %w", err)
var childAlerts coralogixv1alpha1.AlertList
if err := r.List(ctx, &childAlerts, client.InNamespace(prometheusRule.Namespace), client.MatchingLabels{"app.kubernetes.io/managed-by": prometheusRule.Name}); err != nil {
return fmt.Errorf("received an error while trying to list Alerts: %w", err)
}

// Remove alerts that are not present in the PrometheusRule anymore.
for _, alert := range alerts.Items {
if !prometheusRuleAlerts[alert.Name] {
for _, alert := range childAlerts.Items {
if !alertsToKeep[alert.Name] {
if err := r.Delete(ctx, &alert); err != nil {
return fmt.Errorf("received an error while trying to remove child Alert: %w", err)
return fmt.Errorf("received an error while trying to delete Alert CRD: %w", err)
}
}
}

return nil
}

Expand Down
8 changes: 4 additions & 4 deletions kuttl-test.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
apiVersion: kuttl.dev/v1beta1
kind: TestSuite
testDirs:
- tests/e2e/alerts
- tests/e2e/rulegroups
- tests/e2e/recordingrulegroupsets
# - tests/e2e/alerts
# - tests/e2e/rulegroups
# - tests/e2e/recordingrulegroupsets
- tests/e2e/promatheusrules
- tests/e2e/outboundwebhooks
# - tests/e2e/outboundwebhooks
namespace: default
timeout: 60
4 changes: 2 additions & 2 deletions tests/e2e/promatheusrules/basic/00-install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ spec:
expr: vector(1)
- record: ExampleRecord2
expr: vector(2)
- alert: app-latency-1
- alert: app-latency
expr: histogram_quantile(0.99, sum(irate(istio_request_duration_seconds_bucket{reporter="source",destination_service=~"ingress-annotation-test-svc.example-app.svc.cluster.local"}[1m])) by (le, destination_workload)) > 0.2
for: 5m
annotations:
Expand All @@ -27,7 +27,7 @@ spec:
expr: vector(3)
- record: ExampleRecord
expr: vector(4)
- alert: app-latency-2
- alert: app-latency
expr: histogram_quantile(0.99, sum(irate(istio_request_duration_seconds_bucket{reporter="source",destination_service=~"ingress-annotation-test-svc.example-app.svc.cluster.local"}[1m])) by (le, destination_workload)) > 0.2
for: 5m
annotations:
Expand Down
4 changes: 2 additions & 2 deletions tests/e2e/promatheusrules/basic/01-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
- alert.coralogix.com/finalizer
labels:
app.kubernetes.io/managed-by: prometheus-example-rules
name: prometheus-example-rules-app-latency-1
name: prometheus-example-rules-app-latency-0
namespace: default
ownerReferences:
- apiVersion: monitoring.coreos.com/v1
Expand All @@ -24,7 +24,7 @@ status:
timeWindow: FiveMinutes
searchQuery: histogram_quantile(0.99, sum(irate(istio_request_duration_seconds_bucket{reporter="source",destination_service=~"ingress-annotation-test-svc.example-app.svc.cluster.local"}[1m]))
by (le, destination_workload)) > 0.2
name: app-latency-1
name: app-latency
notificationGroups:
- notifications:
- notifyOn: TriggeredOnly
Expand Down
4 changes: 2 additions & 2 deletions tests/e2e/promatheusrules/basic/02-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
- alert.coralogix.com/finalizer
labels:
app.kubernetes.io/managed-by: prometheus-example-rules
name: prometheus-example-rules-app-latency-2
name: prometheus-example-rules-app-latency-1
namespace: default
ownerReferences:
- apiVersion: monitoring.coreos.com/v1
Expand All @@ -24,7 +24,7 @@ status:
timeWindow: FiveMinutes
searchQuery: histogram_quantile(0.99, sum(irate(istio_request_duration_seconds_bucket{reporter="source",destination_service=~"ingress-annotation-test-svc.example-app.svc.cluster.local"}[1m]))
by (le, destination_workload)) > 0.2
name: app-latency-2
name: app-latency
notificationGroups:
- notifications:
- notifyOn: TriggeredOnly
Expand Down
4 changes: 2 additions & 2 deletions tests/e2e/promatheusrules/basic/03-install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ spec:
expr: vector(3)
- record: UpdatedExampleRecord
expr: vector(4)
- alert: updated-app-latency-1
- alert: updated-app-latency
expr: histogram_quantile(0.99, sum(irate(istio_request_duration_seconds_bucket{reporter="source",destination_service=~"ingress-annotation-test-svc.example-app.svc.cluster.local"}[1m])) by (le, destination_workload)) > 0.2
for: 15m
annotations:
Expand All @@ -27,7 +27,7 @@ spec:
expr: vector(1)
- record: UpdatedExampleRecord2
expr: vector(2)
- alert: updated-app-latency-2
- alert: updated-app-latency
expr: histogram_quantile(0.99, sum(irate(istio_request_duration_seconds_bucket{reporter="source",destination_service=~"ingress-annotation-test-svc.example-app.svc.cluster.local"}[1m])) by (le, destination_workload)) > 0.2
for: 5m
annotations:
Expand Down
4 changes: 2 additions & 2 deletions tests/e2e/promatheusrules/basic/04-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
- alert.coralogix.com/finalizer
labels:
app.kubernetes.io/managed-by: prometheus-example-rules
name: prometheus-example-rules-updated-app-latency-1
name: prometheus-example-rules-updated-app-latency-0
namespace: default
ownerReferences:
- apiVersion: monitoring.coreos.com/v1
Expand All @@ -24,7 +24,7 @@ status:
timeWindow: FifteenMinutes
searchQuery: histogram_quantile(0.99, sum(irate(istio_request_duration_seconds_bucket{reporter="source",destination_service=~"ingress-annotation-test-svc.example-app.svc.cluster.local"}[1m]))
by (le, destination_workload)) > 0.2
name: updated-app-latency-1
name: updated-app-latency
notificationGroups:
- notifications:
- notifyOn: TriggeredOnly
Expand Down
4 changes: 2 additions & 2 deletions tests/e2e/promatheusrules/basic/05-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
- alert.coralogix.com/finalizer
labels:
app.kubernetes.io/managed-by: prometheus-example-rules
name: prometheus-example-rules-updated-app-latency-2
name: prometheus-example-rules-updated-app-latency-1
namespace: default
ownerReferences:
- apiVersion: monitoring.coreos.com/v1
Expand All @@ -24,7 +24,7 @@ status:
timeWindow: FiveMinutes
searchQuery: histogram_quantile(0.99, sum(irate(istio_request_duration_seconds_bucket{reporter="source",destination_service=~"ingress-annotation-test-svc.example-app.svc.cluster.local"}[1m]))
by (le, destination_workload)) > 0.2
name: updated-app-latency-2
name: updated-app-latency
notificationGroups:
- notifications:
- notifyOn: TriggeredOnly
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e/promatheusrules/basic/06-delete.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
apiVersion: coralogix.com/v1alpha1
kind: Alert
metadata:
name: prometheus-example-rules-app-latency-1
name: prometheus-example-rules-updated-app-latency-0
2 changes: 1 addition & 1 deletion tests/e2e/promatheusrules/basic/07-delete.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
apiVersion: coralogix.com/v1alpha1
kind: Alert
metadata:
name: prometheus-example-rules-app-latency-2
name: prometheus-example-rules-updated-app-latency-1

0 comments on commit f2adae7

Please sign in to comment.