Skip to content

Commit

Permalink
Support keep_firing_for everywhere
Browse files Browse the repository at this point in the history
  • Loading branch information
prymitive committed Sep 11, 2023
1 parent c2ba53d commit b1f5fcc
Show file tree
Hide file tree
Showing 16 changed files with 973 additions and 62 deletions.
10 changes: 10 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# Changelog

## v0.46.0

### Added

- Added support for `keep_firing_for` in alerting rules - #713.
- Added `rule/keep_firing_for` check - #713.
- Added `alerts/count` check will now estimate alerts using
`keep_firing_for` field if set - #713.
- Configuration rule `match` block supports a new filter `keep_firing_for`.

## v0.45.0

### Added
Expand Down
4 changes: 2 additions & 2 deletions docs/checks/alerts/for.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ grand_parent: Documentation

# alerts/for

This check will warn if an alert rule uses invalid `for` value
or if it passes default value that can be removed to simplify rule.
This check will warn if an alert rule uses invalid `for` or `keep_firing_for`
value or if it passes default value that can be removed to simplify rule.

## Configuration

Expand Down
47 changes: 43 additions & 4 deletions docs/checks/rule/for.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,24 @@ grand_parent: Documentation

# rule/for

This check allows to enforce the presence of `for` field on alerting
rules.
This check allows to enforce the presence of `for` or `keep_firing_for` field
on alerting rules.
You can configure it to enforce some minimal and/or maximum duration
set on alerts via `for` field.
set on alerts via `for` and/or `keep_firing_for` fields.

## Configuration

This check doesn't have any configuration options.

## How to enable it

This check uses either `for` or `keep_firing_for` configuration
blocks, depending on which alerting rule field you want to enforce.

Syntax:

```js
for {
for|keep_firing_for {
severity = "bug|warning|info"
min = "5m"
max = "10m"
Expand All @@ -33,6 +36,42 @@ for {
- `max` - maximum allowed `for` value for matching alerting rules.
- If not set maximum `for` duration won't be enforced.

Example:

Enforce that all alerts have `for` fields of `5m` or more:

```js
for {
severity = "bug"
min = "5m"
max = "10m"
}
```

Enforce that all alerts have `keep_firing_for` fields with no more than `1h`:

```js
keep_firing_for {
severity = "bug"
max = "1h"
}
```

To enforce both at the same time:

```js
for {
severity = "bug"
min = "5m"
max = "10m"
}

keep_firing_for {
severity = "bug"
max = "1h"
}
```

## How to disable it

You can disable this check globally by adding this config block:
Expand Down
11 changes: 11 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,8 @@ rule {
field present and matching provided value will be checked by this rule. Recording rules
will never match it as they don't have `for` field.
Syntax is `OP DURATION` where `OP` can be any of `=`, `!=`, `>`, `>=`, `<`, `<=`.
- `match:keep_firing_for` - optional alerting rule `keep_firing_for` filter. Works the same
way as `for` match filter.
- `ignore` - works exactly like `match` but does the opposite - any alerting or recording rule
matching all conditions defined on `ignore` will not be checked by this `rule` block.

Expand Down Expand Up @@ -433,3 +435,12 @@ rule {
[ check applied only to alerting rules with "for" field value that is >= 5m ]
}
```

```js
rule {
match {
keep_firing_for = "> 15m"
}
[ check applied only to alerting rules with "keep_firing_for" field value that is > 15m ]
}
```
5 changes: 4 additions & 1 deletion internal/checks/alerts_count.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ func (c AlertsCheck) Check(ctx context.Context, _ string, rule parser.Rule, _ []
forDur, _ = model.ParseDuration(rule.AlertingRule.For.Value.Value)
}
var keepFiringForDur model.Duration
if rule.AlertingRule.For != nil {
if rule.AlertingRule.KeepFiringFor != nil {
keepFiringForDur, _ = model.ParseDuration(rule.AlertingRule.KeepFiringFor.Value.Value)
}

Expand All @@ -111,6 +111,9 @@ func (c AlertsCheck) Check(ctx context.Context, _ string, rule parser.Rule, _ []
if rule.AlertingRule.For != nil {
lines = append(lines, rule.AlertingRule.For.Lines()...)
}
if rule.AlertingRule.KeepFiringFor != nil {
lines = append(lines, rule.AlertingRule.KeepFiringFor.Lines()...)
}
sort.Ints(lines)

delta := qr.Series.Until.Sub(qr.Series.From)
Expand Down
144 changes: 144 additions & 0 deletions internal/checks/alerts_count_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,150 @@ func TestAlertsCountCheck(t *testing.T) {
},
},
},
{
description: "keep_firing_for: 10m",
content: "- alert: Foo Is Down\n keep_firing_for: 10m\n expr: up{job=\"foo\"} == 0\n",
checker: newAlertsCheck,
prometheus: newSimpleProm,
problems: func(uri string) []checks.Problem {
return []checks.Problem{
{
Fragment: `up{job="foo"} == 0`,
Lines: []int{2, 3},
Reporter: "alerts/count",
Text: alertsText("prom", uri, 2, "1d"),
Severity: checks.Information,
},
}
},
mocks: []*prometheusMock{
{
conds: []requestCondition{
requireRangeQueryPath,
formCond{key: "query", value: `up{job="foo"} == 0`},
},
resp: matrixResponse{
samples: []*model.SampleStream{
generateSampleStream(
map[string]string{"job": "foo"},
time.Now().Add(time.Hour*-24),
time.Now().Add(time.Hour*-24).Add(time.Minute*6),
time.Minute,
),
generateSampleStream(
map[string]string{"job": "foo"},
time.Now().Add(time.Hour*-23),
time.Now().Add(time.Hour*-23).Add(time.Minute*6),
time.Minute,
),
generateSampleStream(
map[string]string{"job": "foo"},
time.Now().Add(time.Hour*-22),
time.Now().Add(time.Hour*-22).Add(time.Minute),
time.Minute,
),
generateSampleStream(
map[string]string{"job": "foo"},
time.Now().Add(time.Hour*-21),
time.Now().Add(time.Hour*-21).Add(time.Minute*16),
time.Minute,
),
generateSampleStream(
map[string]string{"job": "foo"},
time.Now().Add(time.Hour*-20),
time.Now().Add(time.Hour*-20).Add(time.Minute*9).Add(time.Second*59),
time.Minute,
),
generateSampleStream(
map[string]string{"job": "foo"},
time.Now().Add(time.Hour*-18),
time.Now().Add(time.Hour*-18).Add(time.Hour*2),
time.Minute,
),
},
},
},
{
conds: []requestCondition{
requireRangeQueryPath,
formCond{key: "query", value: `count(up)`},
},
resp: respondWithSingleRangeVector1D(),
},
},
},
{
description: "for: 10m + keep_firing_for: 10m",
content: "- alert: Foo Is Down\n for: 10m\n keep_firing_for: 10m\n expr: up{job=\"foo\"} == 0\n",
checker: newAlertsCheck,
prometheus: newSimpleProm,
problems: func(uri string) []checks.Problem {
return []checks.Problem{
{
Fragment: `up{job="foo"} == 0`,
Lines: []int{2, 3, 4},
Reporter: "alerts/count",
Text: alertsText("prom", uri, 1, "1d"),
Severity: checks.Information,
},
}
},
mocks: []*prometheusMock{
{
conds: []requestCondition{
requireRangeQueryPath,
formCond{key: "query", value: `up{job="foo"} == 0`},
},
resp: matrixResponse{
samples: []*model.SampleStream{
generateSampleStream(
map[string]string{"job": "foo"},
time.Now().Add(time.Hour*-24),
time.Now().Add(time.Hour*-24).Add(time.Minute*6),
time.Minute,
),
generateSampleStream(
map[string]string{"job": "foo"},
time.Now().Add(time.Hour*-23),
time.Now().Add(time.Hour*-23).Add(time.Minute*6),
time.Minute,
),
generateSampleStream(
map[string]string{"job": "foo"},
time.Now().Add(time.Hour*-22),
time.Now().Add(time.Hour*-22).Add(time.Minute),
time.Minute,
),
generateSampleStream(
map[string]string{"job": "foo"},
time.Now().Add(time.Hour*-21),
time.Now().Add(time.Hour*-21).Add(time.Minute*16),
time.Minute,
),
generateSampleStream(
map[string]string{"job": "foo"},
time.Now().Add(time.Hour*-20),
time.Now().Add(time.Hour*-20).Add(time.Minute*9).Add(time.Second*59),
time.Minute,
),
generateSampleStream(
map[string]string{"job": "foo"},
time.Now().Add(time.Hour*-18),
time.Now().Add(time.Hour*-18).Add(time.Hour*2),
time.Minute,
),
},
},
},
{
conds: []requestCondition{
requireRangeQueryPath,
formCond{key: "query", value: `count(up)`},
},
resp: respondWithSingleRangeVector1D(),
},
},
},
}

runTests(t, testCases)
Expand Down
26 changes: 18 additions & 8 deletions internal/checks/alerts_for.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,26 @@ func (c AlertsForChecksFor) Reporter() string {
}

func (c AlertsForChecksFor) Check(_ context.Context, _ string, rule parser.Rule, _ []discovery.Entry) (problems []Problem) {
if rule.AlertingRule == nil || rule.AlertingRule.For == nil {
if rule.AlertingRule == nil {
return problems
}

d, err := model.ParseDuration(rule.AlertingRule.For.Value.Value)
if rule.AlertingRule.For != nil {
problems = append(problems, c.checkField(rule.AlertingRule.For.Key.Value, rule.AlertingRule.For.Value.Value, rule.AlertingRule.For.Lines())...)
}
if rule.AlertingRule.KeepFiringFor != nil {
problems = append(problems, c.checkField(rule.AlertingRule.KeepFiringFor.Key.Value, rule.AlertingRule.KeepFiringFor.Value.Value, rule.AlertingRule.KeepFiringFor.Lines())...)
}

return problems
}

func (c AlertsForChecksFor) checkField(name, value string, lines []int) (problems []Problem) {
d, err := model.ParseDuration(value)
if err != nil {
problems = append(problems, Problem{
Fragment: rule.AlertingRule.For.Value.Value,
Lines: rule.AlertingRule.For.Lines(),
Fragment: value,
Lines: lines,
Reporter: c.Reporter(),
Text: fmt.Sprintf("invalid duration: %s", err),
Severity: Bug,
Expand All @@ -51,11 +62,10 @@ func (c AlertsForChecksFor) Check(_ context.Context, _ string, rule parser.Rule,

if d == 0 {
problems = append(problems, Problem{
Fragment: rule.AlertingRule.For.Value.Value,
Lines: rule.AlertingRule.For.Lines(),
Fragment: value,
Lines: lines,
Reporter: c.Reporter(),
Text: fmt.Sprintf("%q is the default value of %q, consider removing this line",
rule.AlertingRule.For.Value.Value, rule.AlertingRule.For.Key.Value),
Text: fmt.Sprintf("%q is the default value of %q, consider removing this line", value, name),
Severity: Information,
})
}
Expand Down
51 changes: 51 additions & 0 deletions internal/checks/alerts_for_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,57 @@ func TestAlertsForCheck(t *testing.T) {
}
},
},
{
description: "invalid keep_firing_for value",
content: "- alert: foo\n expr: foo\n keep_firing_for: abc\n",
checker: newAlertsForCheck,
prometheus: noProm,
problems: func(uri string) []checks.Problem {
return []checks.Problem{
{
Fragment: "abc",
Lines: []int{3},
Reporter: "alerts/for",
Text: `invalid duration: not a valid duration string: "abc"`,
Severity: checks.Bug,
},
}
},
},
{
description: "negative keep_firing_for value",
content: "- alert: foo\n expr: foo\n keep_firing_for: -5m\n",
checker: newAlertsForCheck,
prometheus: noProm,
problems: func(uri string) []checks.Problem {
return []checks.Problem{
{
Fragment: "-5m",
Lines: []int{3},
Reporter: "alerts/for",
Text: `invalid duration: not a valid duration string: "-5m"`,
Severity: checks.Bug,
},
}
},
},
{
description: "default for value",
content: "- alert: foo\n expr: foo\n keep_firing_for: 0h\n",
checker: newAlertsForCheck,
prometheus: noProm,
problems: func(uri string) []checks.Problem {
return []checks.Problem{
{
Fragment: "0h",
Lines: []int{3},
Reporter: "alerts/for",
Text: `"0h" is the default value of "keep_firing_for", consider removing this line`,
Severity: checks.Information,
},
}
},
},
}
runTests(t, testCases)
}
Loading

0 comments on commit b1f5fcc

Please sign in to comment.