From b1f5fccc8342dda95fe3bf900a129fd63c19836b Mon Sep 17 00:00:00 2001 From: Lukasz Mierzwa Date: Mon, 11 Sep 2023 11:57:53 +0100 Subject: [PATCH] Support keep_firing_for everywhere --- docs/changelog.md | 10 + docs/checks/alerts/for.md | 4 +- docs/checks/rule/for.md | 47 +- docs/configuration.md | 11 + internal/checks/alerts_count.go | 5 +- internal/checks/alerts_count_test.go | 144 +++++ internal/checks/alerts_for.go | 26 +- internal/checks/alerts_for_test.go | 51 ++ internal/checks/rule_for.go | 34 +- internal/checks/rule_for_test.go | 56 +- .../config/__snapshots__/config_test.snap | 500 ++++++++++++++++++ internal/config/config_test.go | 47 ++ internal/config/for.go | 12 + internal/config/match.go | 28 +- internal/config/rule.go | 46 +- internal/parser/models.go | 14 + 16 files changed, 973 insertions(+), 62 deletions(-) diff --git a/docs/changelog.md b/docs/changelog.md index 449a2454..28d5fa3d 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,15 @@ # Changelog +## v0.46.0 + +### Added + +- Added support for `keep_firing_for` in alerting rules - #713. +- Added `rule/keep_firing_for` check - #713. +- Added `alerts/count` check will now estimate alerts using + `keep_firing_for` field if set - #713. +- Configuration rule `match` block supports a new filter `keep_firing_for`. + ## v0.45.0 ### Added diff --git a/docs/checks/alerts/for.md b/docs/checks/alerts/for.md index 4f9a2cd9..9512d6d3 100644 --- a/docs/checks/alerts/for.md +++ b/docs/checks/alerts/for.md @@ -6,8 +6,8 @@ grand_parent: Documentation # alerts/for -This check will warn if an alert rule uses invalid `for` value -or if it passes default value that can be removed to simplify rule. +This check will warn if an alert rule uses invalid `for` or `keep_firing_for` +value or if it passes default value that can be removed to simplify rule. ## Configuration diff --git a/docs/checks/rule/for.md b/docs/checks/rule/for.md index 196fd506..5f90b124 100644 --- a/docs/checks/rule/for.md +++ b/docs/checks/rule/for.md @@ -6,10 +6,10 @@ grand_parent: Documentation # rule/for -This check allows to enforce the presence of `for` field on alerting -rules. +This check allows to enforce the presence of `for` or `keep_firing_for` field +on alerting rules. You can configure it to enforce some minimal and/or maximum duration -set on alerts via `for` field. +set on alerts via `for` and/or `keep_firing_for` fields. ## Configuration @@ -17,10 +17,13 @@ This check doesn't have any configuration options. ## How to enable it +This check uses either `for` or `keep_firing_for` configuration +blocks, depending on which alerting rule field you want to enforce. + Syntax: ```js -for { +for|keep_firing_for { severity = "bug|warning|info" min = "5m" max = "10m" @@ -33,6 +36,42 @@ for { - `max` - maximum allowed `for` value for matching alerting rules. - If not set maximum `for` duration won't be enforced. +Example: + +Enforce that all alerts have `for` fields of `5m` or more: + +```js +for { + severity = "bug" + min = "5m" + max = "10m" +} +``` + +Enforce that all alerts have `keep_firing_for` fields with no more than `1h`: + +```js +keep_firing_for { + severity = "bug" + max = "1h" +} +``` + +To enforce both at the same time: + +```js +for { + severity = "bug" + min = "5m" + max = "10m" +} + +keep_firing_for { + severity = "bug" + max = "1h" +} +``` + ## How to disable it You can disable this check globally by adding this config block: diff --git a/docs/configuration.md b/docs/configuration.md index 63abd045..af0b19ea 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -388,6 +388,8 @@ rule { field present and matching provided value will be checked by this rule. Recording rules will never match it as they don't have `for` field. Syntax is `OP DURATION` where `OP` can be any of `=`, `!=`, `>`, `>=`, `<`, `<=`. +- `match:keep_firing_for` - optional alerting rule `keep_firing_for` filter. Works the same + way as `for` match filter. - `ignore` - works exactly like `match` but does the opposite - any alerting or recording rule matching all conditions defined on `ignore` will not be checked by this `rule` block. @@ -433,3 +435,12 @@ rule { [ check applied only to alerting rules with "for" field value that is >= 5m ] } ``` + +```js +rule { + match { + keep_firing_for = "> 15m" + } + [ check applied only to alerting rules with "keep_firing_for" field value that is > 15m ] +} +``` diff --git a/internal/checks/alerts_count.go b/internal/checks/alerts_count.go index 5d119516..7e2f2deb 100644 --- a/internal/checks/alerts_count.go +++ b/internal/checks/alerts_count.go @@ -90,7 +90,7 @@ func (c AlertsCheck) Check(ctx context.Context, _ string, rule parser.Rule, _ [] forDur, _ = model.ParseDuration(rule.AlertingRule.For.Value.Value) } var keepFiringForDur model.Duration - if rule.AlertingRule.For != nil { + if rule.AlertingRule.KeepFiringFor != nil { keepFiringForDur, _ = model.ParseDuration(rule.AlertingRule.KeepFiringFor.Value.Value) } @@ -111,6 +111,9 @@ func (c AlertsCheck) Check(ctx context.Context, _ string, rule parser.Rule, _ [] if rule.AlertingRule.For != nil { lines = append(lines, rule.AlertingRule.For.Lines()...) } + if rule.AlertingRule.KeepFiringFor != nil { + lines = append(lines, rule.AlertingRule.KeepFiringFor.Lines()...) + } sort.Ints(lines) delta := qr.Series.Until.Sub(qr.Series.From) diff --git a/internal/checks/alerts_count_test.go b/internal/checks/alerts_count_test.go index de7fb64f..31420900 100644 --- a/internal/checks/alerts_count_test.go +++ b/internal/checks/alerts_count_test.go @@ -645,6 +645,150 @@ func TestAlertsCountCheck(t *testing.T) { }, }, }, + { + description: "keep_firing_for: 10m", + content: "- alert: Foo Is Down\n keep_firing_for: 10m\n expr: up{job=\"foo\"} == 0\n", + checker: newAlertsCheck, + prometheus: newSimpleProm, + problems: func(uri string) []checks.Problem { + return []checks.Problem{ + { + Fragment: `up{job="foo"} == 0`, + Lines: []int{2, 3}, + Reporter: "alerts/count", + Text: alertsText("prom", uri, 2, "1d"), + Severity: checks.Information, + }, + } + }, + mocks: []*prometheusMock{ + { + conds: []requestCondition{ + requireRangeQueryPath, + formCond{key: "query", value: `up{job="foo"} == 0`}, + }, + resp: matrixResponse{ + samples: []*model.SampleStream{ + generateSampleStream( + map[string]string{"job": "foo"}, + time.Now().Add(time.Hour*-24), + time.Now().Add(time.Hour*-24).Add(time.Minute*6), + time.Minute, + ), + generateSampleStream( + map[string]string{"job": "foo"}, + time.Now().Add(time.Hour*-23), + time.Now().Add(time.Hour*-23).Add(time.Minute*6), + time.Minute, + ), + generateSampleStream( + map[string]string{"job": "foo"}, + time.Now().Add(time.Hour*-22), + time.Now().Add(time.Hour*-22).Add(time.Minute), + time.Minute, + ), + generateSampleStream( + map[string]string{"job": "foo"}, + time.Now().Add(time.Hour*-21), + time.Now().Add(time.Hour*-21).Add(time.Minute*16), + time.Minute, + ), + generateSampleStream( + map[string]string{"job": "foo"}, + time.Now().Add(time.Hour*-20), + time.Now().Add(time.Hour*-20).Add(time.Minute*9).Add(time.Second*59), + time.Minute, + ), + generateSampleStream( + map[string]string{"job": "foo"}, + time.Now().Add(time.Hour*-18), + time.Now().Add(time.Hour*-18).Add(time.Hour*2), + time.Minute, + ), + }, + }, + }, + { + conds: []requestCondition{ + requireRangeQueryPath, + formCond{key: "query", value: `count(up)`}, + }, + resp: respondWithSingleRangeVector1D(), + }, + }, + }, + { + description: "for: 10m + keep_firing_for: 10m", + content: "- alert: Foo Is Down\n for: 10m\n keep_firing_for: 10m\n expr: up{job=\"foo\"} == 0\n", + checker: newAlertsCheck, + prometheus: newSimpleProm, + problems: func(uri string) []checks.Problem { + return []checks.Problem{ + { + Fragment: `up{job="foo"} == 0`, + Lines: []int{2, 3, 4}, + Reporter: "alerts/count", + Text: alertsText("prom", uri, 1, "1d"), + Severity: checks.Information, + }, + } + }, + mocks: []*prometheusMock{ + { + conds: []requestCondition{ + requireRangeQueryPath, + formCond{key: "query", value: `up{job="foo"} == 0`}, + }, + resp: matrixResponse{ + samples: []*model.SampleStream{ + generateSampleStream( + map[string]string{"job": "foo"}, + time.Now().Add(time.Hour*-24), + time.Now().Add(time.Hour*-24).Add(time.Minute*6), + time.Minute, + ), + generateSampleStream( + map[string]string{"job": "foo"}, + time.Now().Add(time.Hour*-23), + time.Now().Add(time.Hour*-23).Add(time.Minute*6), + time.Minute, + ), + generateSampleStream( + map[string]string{"job": "foo"}, + time.Now().Add(time.Hour*-22), + time.Now().Add(time.Hour*-22).Add(time.Minute), + time.Minute, + ), + generateSampleStream( + map[string]string{"job": "foo"}, + time.Now().Add(time.Hour*-21), + time.Now().Add(time.Hour*-21).Add(time.Minute*16), + time.Minute, + ), + generateSampleStream( + map[string]string{"job": "foo"}, + time.Now().Add(time.Hour*-20), + time.Now().Add(time.Hour*-20).Add(time.Minute*9).Add(time.Second*59), + time.Minute, + ), + generateSampleStream( + map[string]string{"job": "foo"}, + time.Now().Add(time.Hour*-18), + time.Now().Add(time.Hour*-18).Add(time.Hour*2), + time.Minute, + ), + }, + }, + }, + { + conds: []requestCondition{ + requireRangeQueryPath, + formCond{key: "query", value: `count(up)`}, + }, + resp: respondWithSingleRangeVector1D(), + }, + }, + }, } runTests(t, testCases) diff --git a/internal/checks/alerts_for.go b/internal/checks/alerts_for.go index f6a65027..1cf5e0b2 100644 --- a/internal/checks/alerts_for.go +++ b/internal/checks/alerts_for.go @@ -33,15 +33,26 @@ func (c AlertsForChecksFor) Reporter() string { } func (c AlertsForChecksFor) Check(_ context.Context, _ string, rule parser.Rule, _ []discovery.Entry) (problems []Problem) { - if rule.AlertingRule == nil || rule.AlertingRule.For == nil { + if rule.AlertingRule == nil { return problems } - d, err := model.ParseDuration(rule.AlertingRule.For.Value.Value) + if rule.AlertingRule.For != nil { + problems = append(problems, c.checkField(rule.AlertingRule.For.Key.Value, rule.AlertingRule.For.Value.Value, rule.AlertingRule.For.Lines())...) + } + if rule.AlertingRule.KeepFiringFor != nil { + problems = append(problems, c.checkField(rule.AlertingRule.KeepFiringFor.Key.Value, rule.AlertingRule.KeepFiringFor.Value.Value, rule.AlertingRule.KeepFiringFor.Lines())...) + } + + return problems +} + +func (c AlertsForChecksFor) checkField(name, value string, lines []int) (problems []Problem) { + d, err := model.ParseDuration(value) if err != nil { problems = append(problems, Problem{ - Fragment: rule.AlertingRule.For.Value.Value, - Lines: rule.AlertingRule.For.Lines(), + Fragment: value, + Lines: lines, Reporter: c.Reporter(), Text: fmt.Sprintf("invalid duration: %s", err), Severity: Bug, @@ -51,11 +62,10 @@ func (c AlertsForChecksFor) Check(_ context.Context, _ string, rule parser.Rule, if d == 0 { problems = append(problems, Problem{ - Fragment: rule.AlertingRule.For.Value.Value, - Lines: rule.AlertingRule.For.Lines(), + Fragment: value, + Lines: lines, Reporter: c.Reporter(), - Text: fmt.Sprintf("%q is the default value of %q, consider removing this line", - rule.AlertingRule.For.Value.Value, rule.AlertingRule.For.Key.Value), + Text: fmt.Sprintf("%q is the default value of %q, consider removing this line", value, name), Severity: Information, }) } diff --git a/internal/checks/alerts_for_test.go b/internal/checks/alerts_for_test.go index f966265e..a98c47d2 100644 --- a/internal/checks/alerts_for_test.go +++ b/internal/checks/alerts_for_test.go @@ -78,6 +78,57 @@ func TestAlertsForCheck(t *testing.T) { } }, }, + { + description: "invalid keep_firing_for value", + content: "- alert: foo\n expr: foo\n keep_firing_for: abc\n", + checker: newAlertsForCheck, + prometheus: noProm, + problems: func(uri string) []checks.Problem { + return []checks.Problem{ + { + Fragment: "abc", + Lines: []int{3}, + Reporter: "alerts/for", + Text: `invalid duration: not a valid duration string: "abc"`, + Severity: checks.Bug, + }, + } + }, + }, + { + description: "negative keep_firing_for value", + content: "- alert: foo\n expr: foo\n keep_firing_for: -5m\n", + checker: newAlertsForCheck, + prometheus: noProm, + problems: func(uri string) []checks.Problem { + return []checks.Problem{ + { + Fragment: "-5m", + Lines: []int{3}, + Reporter: "alerts/for", + Text: `invalid duration: not a valid duration string: "-5m"`, + Severity: checks.Bug, + }, + } + }, + }, + { + description: "default for value", + content: "- alert: foo\n expr: foo\n keep_firing_for: 0h\n", + checker: newAlertsForCheck, + prometheus: noProm, + problems: func(uri string) []checks.Problem { + return []checks.Problem{ + { + Fragment: "0h", + Lines: []int{3}, + Reporter: "alerts/for", + Text: `"0h" is the default value of "keep_firing_for", consider removing this line`, + Severity: checks.Information, + }, + } + }, + }, } runTests(t, testCases) } diff --git a/internal/checks/rule_for.go b/internal/checks/rule_for.go index 62322326..efe03ac9 100644 --- a/internal/checks/rule_for.go +++ b/internal/checks/rule_for.go @@ -16,8 +16,16 @@ const ( RuleForCheckName = "rule/for" ) -func NewRuleForCheck(minFor, maxFor time.Duration, severity Severity) RuleForCheck { +type RuleForKey string + +const ( + RuleForFor RuleForKey = "for" + RuleForKeepFiringFor RuleForKey = "keep_firing_for" +) + +func NewRuleForCheck(key RuleForKey, minFor, maxFor time.Duration, severity Severity) RuleForCheck { return RuleForCheck{ + key: key, minFor: minFor, maxFor: maxFor, severity: severity, @@ -26,6 +34,7 @@ func NewRuleForCheck(minFor, maxFor time.Duration, severity Severity) RuleForChe type RuleForCheck struct { severity Severity + key RuleForKey minFor time.Duration maxFor time.Duration } @@ -50,11 +59,22 @@ func (c RuleForCheck) Check(_ context.Context, _ string, rule parser.Rule, _ []d var forDur model.Duration var fragment string var lines []int - if rule.AlertingRule.For != nil { - forDur, _ = model.ParseDuration(rule.AlertingRule.For.Value.Value) - fragment = rule.AlertingRule.For.Value.Value - lines = rule.AlertingRule.For.Lines() + + switch c.key { + case RuleForFor: + if rule.AlertingRule.For != nil { + forDur, _ = model.ParseDuration(rule.AlertingRule.For.Value.Value) + fragment = rule.AlertingRule.For.Value.Value + lines = rule.AlertingRule.For.Lines() + } + case RuleForKeepFiringFor: + if rule.AlertingRule.KeepFiringFor != nil { + forDur, _ = model.ParseDuration(rule.AlertingRule.KeepFiringFor.Value.Value) + fragment = rule.AlertingRule.KeepFiringFor.Value.Value + lines = rule.AlertingRule.KeepFiringFor.Lines() + } } + if fragment == "" { fragment = rule.AlertingRule.Alert.Value.Value lines = rule.AlertingRule.Alert.Lines() @@ -65,7 +85,7 @@ func (c RuleForCheck) Check(_ context.Context, _ string, rule parser.Rule, _ []d Fragment: fragment, Lines: lines, Reporter: c.Reporter(), - Text: fmt.Sprintf("this alert rule must have a 'for' field with a minimum duration of %s", output.HumanizeDuration(c.minFor)), + Text: fmt.Sprintf("this alert rule must have a '%s' field with a minimum duration of %s", c.key, output.HumanizeDuration(c.minFor)), Severity: c.severity, }) } @@ -75,7 +95,7 @@ func (c RuleForCheck) Check(_ context.Context, _ string, rule parser.Rule, _ []d Fragment: fragment, Lines: lines, Reporter: c.Reporter(), - Text: fmt.Sprintf("this alert rule must have a 'for' field with a maximum duration of %s", output.HumanizeDuration(c.maxFor)), + Text: fmt.Sprintf("this alert rule must have a '%s' field with a maximum duration of %s", c.key, output.HumanizeDuration(c.maxFor)), Severity: c.severity, }) } diff --git a/internal/checks/rule_for_test.go b/internal/checks/rule_for_test.go index 0255c0b5..7628e8c2 100644 --- a/internal/checks/rule_for_test.go +++ b/internal/checks/rule_for_test.go @@ -9,12 +9,12 @@ import ( "github.com/cloudflare/pint/internal/promapi" ) -func forMin(min string) string { - return fmt.Sprintf("this alert rule must have a 'for' field with a minimum duration of %s", min) +func forMin(key, min string) string { + return fmt.Sprintf("this alert rule must have a '%s' field with a minimum duration of %s", key, min) } -func forMax(max string) string { - return fmt.Sprintf("this alert rule must have a 'for' field with a maximum duration of %s", max) +func forMax(key, max string) string { + return fmt.Sprintf("this alert rule must have a '%s' field with a maximum duration of %s", key, max) } func TestRuleForCheck(t *testing.T) { @@ -23,7 +23,7 @@ func TestRuleForCheck(t *testing.T) { description: "recording rule", content: "- record: foo\n expr: sum(foo)\n", checker: func(_ *promapi.FailoverGroup) checks.RuleChecker { - return checks.NewRuleForCheck(0, 0, checks.Bug) + return checks.NewRuleForCheck(checks.RuleForFor, 0, 0, checks.Bug) }, prometheus: noProm, problems: noProblems, @@ -32,7 +32,7 @@ func TestRuleForCheck(t *testing.T) { description: "alerting rule, no for, 0-0", content: "- alert: foo\n expr: sum(foo)\n", checker: func(_ *promapi.FailoverGroup) checks.RuleChecker { - return checks.NewRuleForCheck(0, 0, checks.Bug) + return checks.NewRuleForCheck(checks.RuleForFor, 0, 0, checks.Bug) }, prometheus: noProm, problems: noProblems, @@ -41,7 +41,7 @@ func TestRuleForCheck(t *testing.T) { description: "alerting rule, for:1m, 0-0", content: "- alert: foo\n for: 1m\n expr: sum(foo)\n", checker: func(_ *promapi.FailoverGroup) checks.RuleChecker { - return checks.NewRuleForCheck(0, 0, checks.Bug) + return checks.NewRuleForCheck(checks.RuleForFor, 0, 0, checks.Bug) }, prometheus: noProm, problems: noProblems, @@ -50,7 +50,7 @@ func TestRuleForCheck(t *testing.T) { description: "alerting rule, for:1m, 1s-0", content: "- alert: foo\n for: 1m\n expr: sum(foo)\n", checker: func(_ *promapi.FailoverGroup) checks.RuleChecker { - return checks.NewRuleForCheck(time.Second, 0, checks.Bug) + return checks.NewRuleForCheck(checks.RuleForFor, time.Second, 0, checks.Bug) }, prometheus: noProm, problems: noProblems, @@ -59,7 +59,7 @@ func TestRuleForCheck(t *testing.T) { description: "alerting rule, for:1m, 1s-2m", content: "- alert: foo\n for: 1m\n expr: sum(foo)\n", checker: func(_ *promapi.FailoverGroup) checks.RuleChecker { - return checks.NewRuleForCheck(time.Second, time.Minute*2, checks.Bug) + return checks.NewRuleForCheck(checks.RuleForFor, time.Second, time.Minute*2, checks.Bug) }, prometheus: noProm, problems: noProblems, @@ -68,7 +68,7 @@ func TestRuleForCheck(t *testing.T) { description: "alerting rule, for:4m, 5m-10m", content: "- alert: foo\n for: 4m\n expr: sum(foo)\n", checker: func(_ *promapi.FailoverGroup) checks.RuleChecker { - return checks.NewRuleForCheck(time.Minute*5, time.Minute*10, checks.Warning) + return checks.NewRuleForCheck(checks.RuleForFor, time.Minute*5, time.Minute*10, checks.Warning) }, prometheus: noProm, problems: func(s string) []checks.Problem { @@ -77,7 +77,7 @@ func TestRuleForCheck(t *testing.T) { Fragment: "4m", Lines: []int{2}, Reporter: "rule/for", - Text: forMin("5m"), + Text: forMin("for", "5m"), Severity: checks.Warning, }, } @@ -87,7 +87,7 @@ func TestRuleForCheck(t *testing.T) { description: "alerting rule, for:5m, 1s-2m", content: "- alert: foo\n for: 5m\n expr: sum(foo)\n", checker: func(_ *promapi.FailoverGroup) checks.RuleChecker { - return checks.NewRuleForCheck(time.Second, time.Minute*2, checks.Warning) + return checks.NewRuleForCheck(checks.RuleForFor, time.Second, time.Minute*2, checks.Warning) }, prometheus: noProm, problems: func(s string) []checks.Problem { @@ -96,7 +96,7 @@ func TestRuleForCheck(t *testing.T) { Fragment: "5m", Lines: []int{2}, Reporter: "rule/for", - Text: forMax("2m"), + Text: forMax("for", "2m"), Severity: checks.Warning, }, } @@ -106,11 +106,39 @@ func TestRuleForCheck(t *testing.T) { description: "alerting rule, for:1d, 5m-0", content: "- alert: foo\n for: 1d\n expr: sum(foo)\n", checker: func(_ *promapi.FailoverGroup) checks.RuleChecker { - return checks.NewRuleForCheck(time.Minute*5, 0, checks.Warning) + return checks.NewRuleForCheck(checks.RuleForFor, time.Minute*5, 0, checks.Warning) }, prometheus: noProm, problems: noProblems, }, + { + description: "alerting rule, for:14m, 5m-10m, keep_firing_for enforced", + content: "- alert: foo\n for: 14m\n expr: sum(foo)\n", + checker: func(_ *promapi.FailoverGroup) checks.RuleChecker { + return checks.NewRuleForCheck(checks.RuleForKeepFiringFor, 0, time.Minute*10, checks.Warning) + }, + prometheus: noProm, + problems: noProblems, + }, + { + description: "alerting rule, keep_firing_for:4m, 5m-10m", + content: "- alert: foo\n keep_firing_for: 4m\n expr: sum(foo)\n", + checker: func(_ *promapi.FailoverGroup) checks.RuleChecker { + return checks.NewRuleForCheck(checks.RuleForKeepFiringFor, time.Minute*5, time.Minute*10, checks.Warning) + }, + prometheus: noProm, + problems: func(s string) []checks.Problem { + return []checks.Problem{ + { + Fragment: "4m", + Lines: []int{2}, + Reporter: "rule/for", + Text: forMin("keep_firing_for", "5m"), + Severity: checks.Warning, + }, + } + }, + }, } runTests(t, testCases) } diff --git a/internal/config/__snapshots__/config_test.snap b/internal/config/__snapshots__/config_test.snap index d02f7012..c2d37586 100755 --- a/internal/config/__snapshots__/config_test.snap +++ b/internal/config/__snapshots__/config_test.snap @@ -11073,3 +11073,503 @@ "owners": {} } --- + +[TestGetChecksForRule/for_match_/_passing#01 - 1] +{ + "ci": { + "maxCommits": 20, + "baseBranch": "master" + }, + "parser": {}, + "checks": { + "enabled": [ + "alerts/annotation", + "alerts/count", + "alerts/for", + "alerts/template", + "labels/conflict", + "promql/aggregate", + "alerts/comparison", + "promql/fragile", + "promql/range_query", + "promql/rate", + "promql/regexp", + "promql/syntax", + "promql/vector_matching", + "query/cost", + "promql/series", + "rule/duplicate", + "rule/for", + "rule/label", + "rule/link", + "rule/reject" + ] + }, + "rules": [ + { + "match": [ + { + "keep_firing_for": "\u003e 15m" + } + ], + "annotation": [ + { + "key": "summary", + "required": true + } + ] + } + ], + "owners": {} +} +--- + +[TestGetChecksForRule/for_match_/_passing#02 - 1] +{ + "ci": { + "maxCommits": 20, + "baseBranch": "master" + }, + "parser": {}, + "checks": { + "enabled": [ + "alerts/annotation", + "alerts/count", + "alerts/for", + "alerts/template", + "labels/conflict", + "promql/aggregate", + "alerts/comparison", + "promql/fragile", + "promql/range_query", + "promql/rate", + "promql/regexp", + "promql/syntax", + "promql/vector_matching", + "query/cost", + "promql/series", + "rule/duplicate", + "rule/for", + "rule/label", + "rule/link", + "rule/reject" + ] + }, + "rules": [ + { + "match": [ + { + "keep_firing_for": "\u003e 15m" + } + ], + "annotation": [ + { + "key": "summary", + "required": true + } + ] + } + ], + "owners": {} +} +--- + +[TestGetChecksForRule/for_match_/_passing#01 - 2] +{ + "ci": { + "maxCommits": 20, + "baseBranch": "master" + }, + "parser": {}, + "checks": { + "enabled": [ + "alerts/annotation", + "alerts/count", + "alerts/for", + "alerts/template", + "labels/conflict", + "promql/aggregate", + "alerts/comparison", + "promql/fragile", + "promql/range_query", + "promql/rate", + "promql/regexp", + "promql/syntax", + "promql/vector_matching", + "query/cost", + "promql/series", + "rule/duplicate", + "rule/for", + "rule/label", + "rule/link", + "rule/reject" + ] + }, + "rules": [ + { + "match": [ + { + "keep_firing_for": "\u003e 15m" + } + ], + "annotation": [ + { + "key": "summary", + "required": true + } + ] + } + ], + "owners": {} +} +--- + +[TestGetChecksForRule/for_match_/_passing#02 - 2] +{ + "ci": { + "maxCommits": 20, + "baseBranch": "master" + }, + "parser": {}, + "checks": { + "enabled": [ + "alerts/annotation", + "alerts/count", + "alerts/for", + "alerts/template", + "labels/conflict", + "promql/aggregate", + "alerts/comparison", + "promql/fragile", + "promql/range_query", + "promql/rate", + "promql/regexp", + "promql/syntax", + "promql/vector_matching", + "query/cost", + "promql/series", + "rule/duplicate", + "rule/for", + "rule/label", + "rule/link", + "rule/reject" + ] + }, + "rules": [ + { + "match": [ + { + "keep_firing_for": "\u003e 15m" + } + ], + "annotation": [ + { + "key": "summary", + "required": true + } + ] + } + ], + "owners": {} +} +--- + +[TestGetChecksForRule/for_match_/_passing#01 - 3] +{ + "ci": { + "maxCommits": 20, + "baseBranch": "master" + }, + "parser": {}, + "checks": { + "enabled": [ + "alerts/annotation", + "alerts/count", + "alerts/for", + "alerts/template", + "labels/conflict", + "promql/aggregate", + "alerts/comparison", + "promql/fragile", + "promql/range_query", + "promql/rate", + "promql/regexp", + "promql/syntax", + "promql/vector_matching", + "query/cost", + "promql/series", + "rule/duplicate", + "rule/for", + "rule/label", + "rule/link", + "rule/reject" + ] + }, + "rules": [ + { + "match": [ + { + "keep_firing_for": "\u003e 15m" + } + ], + "annotation": [ + { + "key": "summary", + "required": true + } + ] + } + ], + "owners": {} +} +--- + +[TestGetChecksForRule/for_match_/_passing#02 - 3] +{ + "ci": { + "maxCommits": 20, + "baseBranch": "master" + }, + "parser": {}, + "checks": { + "enabled": [ + "alerts/annotation", + "alerts/count", + "alerts/for", + "alerts/template", + "labels/conflict", + "promql/aggregate", + "alerts/comparison", + "promql/fragile", + "promql/range_query", + "promql/rate", + "promql/regexp", + "promql/syntax", + "promql/vector_matching", + "query/cost", + "promql/series", + "rule/duplicate", + "rule/for", + "rule/label", + "rule/link", + "rule/reject" + ] + }, + "rules": [ + { + "match": [ + { + "keep_firing_for": "\u003e 15m" + } + ], + "annotation": [ + { + "key": "summary", + "required": true + } + ] + } + ], + "owners": {} +} +--- + +[TestGetChecksForRule/for_match_/_passing#01 - 4] +{ + "ci": { + "maxCommits": 20, + "baseBranch": "master" + }, + "parser": {}, + "checks": { + "enabled": [ + "alerts/annotation", + "alerts/count", + "alerts/for", + "alerts/template", + "labels/conflict", + "promql/aggregate", + "alerts/comparison", + "promql/fragile", + "promql/range_query", + "promql/rate", + "promql/regexp", + "promql/syntax", + "promql/vector_matching", + "query/cost", + "promql/series", + "rule/duplicate", + "rule/for", + "rule/label", + "rule/link", + "rule/reject" + ] + }, + "rules": [ + { + "match": [ + { + "keep_firing_for": "\u003e 15m" + } + ], + "annotation": [ + { + "key": "summary", + "required": true + } + ] + } + ], + "owners": {} +} +--- + +[TestGetChecksForRule/for_match_/_passing#02 - 4] +{ + "ci": { + "maxCommits": 20, + "baseBranch": "master" + }, + "parser": {}, + "checks": { + "enabled": [ + "alerts/annotation", + "alerts/count", + "alerts/for", + "alerts/template", + "labels/conflict", + "promql/aggregate", + "alerts/comparison", + "promql/fragile", + "promql/range_query", + "promql/rate", + "promql/regexp", + "promql/syntax", + "promql/vector_matching", + "query/cost", + "promql/series", + "rule/duplicate", + "rule/for", + "rule/label", + "rule/link", + "rule/reject" + ] + }, + "rules": [ + { + "match": [ + { + "keep_firing_for": "\u003e 15m" + } + ], + "annotation": [ + { + "key": "summary", + "required": true + } + ] + } + ], + "owners": {} +} +--- + +[TestGetChecksForRule/for_match_/_passing#01 - 5] +{ + "ci": { + "maxCommits": 20, + "baseBranch": "master" + }, + "parser": {}, + "checks": { + "enabled": [ + "alerts/annotation", + "alerts/count", + "alerts/for", + "alerts/template", + "labels/conflict", + "promql/aggregate", + "alerts/comparison", + "promql/fragile", + "promql/range_query", + "promql/rate", + "promql/regexp", + "promql/syntax", + "promql/vector_matching", + "query/cost", + "promql/series", + "rule/duplicate", + "rule/for", + "rule/label", + "rule/link", + "rule/reject" + ] + }, + "rules": [ + { + "match": [ + { + "keep_firing_for": "\u003e 15m" + } + ], + "annotation": [ + { + "key": "summary", + "required": true + } + ] + } + ], + "owners": {} +} +--- + +[TestGetChecksForRule/for_match_/_passing#02 - 5] +{ + "ci": { + "maxCommits": 20, + "baseBranch": "master" + }, + "parser": {}, + "checks": { + "enabled": [ + "alerts/annotation", + "alerts/count", + "alerts/for", + "alerts/template", + "labels/conflict", + "promql/aggregate", + "alerts/comparison", + "promql/fragile", + "promql/range_query", + "promql/rate", + "promql/regexp", + "promql/syntax", + "promql/vector_matching", + "query/cost", + "promql/series", + "rule/duplicate", + "rule/for", + "rule/label", + "rule/link", + "rule/reject" + ] + }, + "rules": [ + { + "match": [ + { + "keep_firing_for": "\u003e 15m" + } + ], + "annotation": [ + { + "key": "summary", + "required": true + } + ] + } + ], + "owners": {} +} +--- diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 7069df65..f78021c0 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -1040,6 +1040,53 @@ rule { checks.RegexpCheckName, }, }, + { + title: "for match / passing", + config: ` +rule { + match { + keep_firing_for = "> 15m" + } + annotation "summary" { + required = true + } +} +`, + path: "rules.yml", + rule: newRule(t, "- alert: foo\n expr: sum(foo)\n keep_firing_for: 16m\n"), + checks: []string{ + checks.SyntaxCheckName, + checks.AlertForCheckName, + checks.ComparisonCheckName, + checks.TemplateCheckName, + checks.FragileCheckName, + checks.RegexpCheckName, + checks.AnnotationCheckName + "(summary:true)", + }, + }, + { + title: "for match / passing", + config: ` +rule { + match { + keep_firing_for = "> 15m" + } + annotation "summary" { + required = true + } +} +`, + path: "rules.yml", + rule: newRule(t, "- alert: foo\n expr: sum(foo)\n keep_firing_for: 14m\n"), + checks: []string{ + checks.SyntaxCheckName, + checks.AlertForCheckName, + checks.ComparisonCheckName, + checks.TemplateCheckName, + checks.FragileCheckName, + checks.RegexpCheckName, + }, + }, { title: "for match / recording rules / not passing", config: ` diff --git a/internal/config/for.go b/internal/config/for.go index 55efb645..c829492d 100644 --- a/internal/config/for.go +++ b/internal/config/for.go @@ -2,6 +2,7 @@ package config import ( "errors" + "time" "github.com/cloudflare/pint/internal/checks" ) @@ -41,3 +42,14 @@ func (fs ForSettings) getSeverity(fallback checks.Severity) checks.Severity { } return fallback } + +func (fs ForSettings) resolve() (severity checks.Severity, minFor, maxFor time.Duration) { + severity = fs.getSeverity(checks.Bug) + if fs.Min != "" { + minFor, _ = parseDuration(fs.Min) + } + if fs.Max != "" { + maxFor, _ = parseDuration(fs.Max) + } + return severity, minFor, maxFor +} diff --git a/internal/config/match.go b/internal/config/match.go index 33c200be..f3228fce 100644 --- a/internal/config/match.go +++ b/internal/config/match.go @@ -29,13 +29,14 @@ var ( ) type Match struct { - Path string `hcl:"path,optional" json:"path,omitempty"` - Name string `hcl:"name,optional" json:"name,omitempty"` - Kind string `hcl:"kind,optional" json:"kind,omitempty"` - For string `hcl:"for,optional" json:"for,omitempty"` - Label *MatchLabel `hcl:"label,block" json:"label,omitempty"` - Annotation *MatchAnnotation `hcl:"annotation,block" json:"annotation,omitempty"` - Command *ContextCommandVal `hcl:"command,optional" json:"command,omitempty"` + Path string `hcl:"path,optional" json:"path,omitempty"` + Name string `hcl:"name,optional" json:"name,omitempty"` + Kind string `hcl:"kind,optional" json:"kind,omitempty"` + For string `hcl:"for,optional" json:"for,omitempty"` + KeepFiringFor string `hcl:"keep_firing_for,optional" json:"keep_firing_for,omitempty"` + Label *MatchLabel `hcl:"label,block" json:"label,omitempty"` + Annotation *MatchAnnotation `hcl:"annotation,block" json:"annotation,omitempty"` + Command *ContextCommandVal `hcl:"command,optional" json:"command,omitempty"` } func (m Match) validate(allowEmpty bool) error { @@ -140,6 +141,19 @@ func (m Match) IsMatch(ctx context.Context, path string, r parser.Rule) bool { } } + if m.KeepFiringFor != "" { + if r.AlertingRule != nil && r.AlertingRule.KeepFiringFor != nil { + dm, _ := parseDurationMatch(m.KeepFiringFor) + if dur, err := parseDuration(r.AlertingRule.KeepFiringFor.Value.Value); err == nil { + if !dm.isMatch(dur) { + return false + } + } + } else { + return false + } + } + return true } diff --git a/internal/config/rule.go b/internal/config/rule.go index 2a2fedad..eb61c76d 100644 --- a/internal/config/rule.go +++ b/internal/config/rule.go @@ -15,16 +15,17 @@ import ( ) type Rule struct { - Match []Match `hcl:"match,block" json:"match,omitempty"` - Ignore []Match `hcl:"ignore,block" json:"ignore,omitempty"` - Aggregate []AggregateSettings `hcl:"aggregate,block" json:"aggregate,omitempty"` - Annotation []AnnotationSettings `hcl:"annotation,block" json:"annotation,omitempty"` - Label []AnnotationSettings `hcl:"label,block" json:"label,omitempty"` - Cost *CostSettings `hcl:"cost,block" json:"cost,omitempty"` - Alerts *AlertsSettings `hcl:"alerts,block" json:"alerts,omitempty"` - For *ForSettings `hcl:"for,block" json:"for,omitempty"` - Reject []RejectSettings `hcl:"reject,block" json:"reject,omitempty"` - RuleLink []RuleLinkSettings `hcl:"link,block" json:"link,omitempty"` + Match []Match `hcl:"match,block" json:"match,omitempty"` + Ignore []Match `hcl:"ignore,block" json:"ignore,omitempty"` + Aggregate []AggregateSettings `hcl:"aggregate,block" json:"aggregate,omitempty"` + Annotation []AnnotationSettings `hcl:"annotation,block" json:"annotation,omitempty"` + Label []AnnotationSettings `hcl:"label,block" json:"label,omitempty"` + Cost *CostSettings `hcl:"cost,block" json:"cost,omitempty"` + Alerts *AlertsSettings `hcl:"alerts,block" json:"alerts,omitempty"` + For *ForSettings `hcl:"for,block" json:"for,omitempty"` + KeepFiringFor *ForSettings `hcl:"keep_firing_for,block" json:"keep_firing_for,omitempty"` + Reject []RejectSettings `hcl:"reject,block" json:"reject,omitempty"` + RuleLink []RuleLinkSettings `hcl:"link,block" json:"link,omitempty"` } func (rule Rule) validate() (err error) { @@ -88,6 +89,12 @@ func (rule Rule) validate() (err error) { } } + if rule.KeepFiringFor != nil { + if err = rule.KeepFiringFor.validate(); err != nil { + return err + } + } + return nil } @@ -245,17 +252,18 @@ func (rule Rule) resolveChecks(ctx context.Context, path string, r parser.Rule, } if rule.For != nil { - severity := rule.For.getSeverity(checks.Bug) - var minFor, maxFor time.Duration - if rule.For.Min != "" { - minFor, _ = parseDuration(rule.For.Min) - } - if rule.For.Max != "" { - maxFor, _ = parseDuration(rule.For.Max) - } + severity, minFor, maxFor := rule.For.resolve() + enabled = append(enabled, checkMeta{ + name: checks.RuleForCheckName, + check: checks.NewRuleForCheck(checks.RuleForFor, minFor, maxFor, severity), + }) + } + + if rule.KeepFiringFor != nil { + severity, minFor, maxFor := rule.For.resolve() enabled = append(enabled, checkMeta{ name: checks.RuleForCheckName, - check: checks.NewRuleForCheck(minFor, maxFor, severity), + check: checks.NewRuleForCheck(checks.RuleForKeepFiringFor, minFor, maxFor, severity), }) } diff --git a/internal/parser/models.go b/internal/parser/models.go index cb9a87c8..95896bf4 100644 --- a/internal/parser/models.go +++ b/internal/parser/models.go @@ -229,6 +229,9 @@ func (ar AlertingRule) Lines() (lines []int) { if ar.For != nil { lines = appendLine(lines, ar.For.Lines()...) } + if ar.KeepFiringFor != nil { + lines = appendLine(lines, ar.KeepFiringFor.Lines()...) + } if ar.Labels != nil { lines = appendLine(lines, ar.Labels.Lines()...) } @@ -248,6 +251,10 @@ func (ar AlertingRule) Comments() (comments []string) { comments = append(comments, ar.For.Key.Comments...) comments = append(comments, ar.For.Value.Comments...) } + if ar.KeepFiringFor != nil { + comments = append(comments, ar.KeepFiringFor.Key.Comments...) + comments = append(comments, ar.KeepFiringFor.Value.Comments...) + } if ar.Labels != nil { comments = append(comments, ar.Labels.Key.Comments...) for _, label := range ar.Labels.Items { @@ -339,6 +346,13 @@ func (r Rule) ToYAML() string { b.WriteString(r.AlertingRule.For.Value.Value) b.WriteRune('\n') } + if r.AlertingRule.KeepFiringFor != nil { + b.WriteString(" ") + b.WriteString(r.AlertingRule.KeepFiringFor.Key.Value) + b.WriteRune(':') + b.WriteString(r.AlertingRule.KeepFiringFor.Value.Value) + b.WriteRune('\n') + } if r.AlertingRule.Annotations != nil { b.WriteString(" annotations:\n")