Skip to content

Commit

Permalink
Merge pull request #752 from cloudflare/vector
Browse files Browse the repository at this point in the history
More strict checks in alerts/template
  • Loading branch information
prymitive authored Oct 18, 2023
2 parents 12f6a66 + ffd51fc commit 6b679f1
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 18 deletions.
48 changes: 34 additions & 14 deletions docs/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,25 @@
# Changelog

## v0.47.0

### Added

- `alerts/template` check can now report problems with alerting rules
when trying to use templates on a query that doesn't produce any labels
at all.
For example when using [`vector(...)`](https://prometheus.io/docs/prometheus/latest/querying/functions/#vector) function:

{% raw %}

```yaml
- alert: DeadMansSwitch
expr: vector(1)
annotations:
summary: Deadman's switch on {{ $labels.instance }} is firing
```
{% endraw %}
## v0.46.0
### Added
Expand Down Expand Up @@ -115,7 +135,7 @@
```yaml
- record: my:sum
expr: sum(http_requests_total)
- alert: my alert
expr: rate(my:sum[5m])
```
Expand Down Expand Up @@ -195,7 +215,7 @@
snooze them instead of disabling forever.

The difference between `# pint disable ...` and `# pint snooze ...` comments is that
the snooze comment must include a timestamp. Selected check will be disabled *until*
the snooze comment must include a timestamp. Selected check will be disabled _until_
that timestamp.
Timestamp must either use [RFC3339](https://www.rfc-editor.org/rfc/rfc3339) syntax
or `YYYY-MM-DD` (if you don't care about time and want to snooze until given date).
Expand Down Expand Up @@ -640,7 +660,7 @@
### Changed

- The way `pint` sends API requests to Prometheus was changed to improve performance.

First change is that each `prometheus` server definition in `pint` config file can
now accept optional `concurrency` field (defaults to 16) that sets a limit on how
many concurrent requests can that server receive. There is a new metric that
Expand Down Expand Up @@ -800,10 +820,10 @@

```yaml
groups:
- name: example
rules:
- record: ...
expr: ...
- name: example
rules:
- record: ...
expr: ...
```

Previous releases were only looking for individual rules so `groups` object
Expand All @@ -815,7 +835,7 @@

```yaml
parser {
relaxed = ["(.*)"]
relaxed = ["(.*)"]
}
```

Expand Down Expand Up @@ -974,7 +994,7 @@

### Added

- Added `pint_last_run_time_seconds` and `pint_rules_parsed_total` metrics when running `pint watch`.
- Added `pint_last_run_time_seconds` and `pint_rules_parsed_total` metrics when running `pint watch`.

### Changed

Expand All @@ -997,7 +1017,7 @@
### Added

- Added `promql/regexp` check that will warn about unnecessary regexp matchers.
- Added `pint_prometheus_queries_total` and `pint_prometheus_query_errors_total`
- Added `pint_prometheus_queries_total` and `pint_prometheus_query_errors_total`
metric when running `pint watch`.

## v0.10.1
Expand Down Expand Up @@ -1090,7 +1110,7 @@
### Fixed

- `promql/rate`, `query/series` and `promql/vector_matching` checks were not enabled
for all defined `prometheus {}` blocks unless there was at least one `rule {}` block.
for all defined `prometheus {}` blocks unless there was at least one `rule {}` block.
- `annotation` based `match` blocks didn't work correctly.

## v0.6.6
Expand Down Expand Up @@ -1224,7 +1244,7 @@
- alert: Foo
expr: absent(foo{env="prod"})
annotations:
summary: 'foo metric is missing for job {{ $labels.job }}'
summary: "foo metric is missing for job {{ $labels.job }}"
```

{% endraw %}
Expand Down Expand Up @@ -1291,7 +1311,7 @@
- alert: Foo
expr: count(up) without(instance) == 0
annotations:
summary: 'foo is down on {{ $labels.instance }}'
summary: "foo is down on {{ $labels.instance }}"
```

{% endraw %}
Expand All @@ -1302,7 +1322,7 @@

### Fixed

- Fixed file descriptor leak due to missing file `Close()` #69.
- Fixed file descriptor leak due to missing file `Close()` #69.

## v0.1.4

Expand Down
61 changes: 57 additions & 4 deletions internal/checks/alerts_template.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ func (c TemplateCheck) Check(ctx context.Context, _ string, rule parser.Rule, _

aggrs := utils.HasOuterAggregation(rule.AlertingRule.Expr.Query)
absentCalls := utils.HasOuterAbsent(rule.AlertingRule.Expr.Query)
vectors := utils.HasVectorSelector(rule.AlertingRule.Expr.Query)

var safeLabels []string
for _, be := range binaryExprs(rule.AlertingRule.Expr.Query) {
Expand Down Expand Up @@ -179,6 +180,19 @@ func (c TemplateCheck) Check(ctx context.Context, _ string, rule parser.Rule, _
})
}
}

labelNames := getTemplateLabels(label.Key.Value, label.Value.Value)
if len(labelNames) > 0 && len(vectors) == 0 {
for _, name := range labelNames {
problems = append(problems, Problem{
Fragment: fmt.Sprintf("%s: %s", label.Key.Value, label.Value.Value),
Lines: mergeLines(label.Lines(), rule.AlertingRule.Expr.Lines()),
Reporter: c.Reporter(),
Text: fmt.Sprintf("template is using %q label but the query doesn't produce any labels", name),
Severity: Bug,
})
}
}
}
}

Expand Down Expand Up @@ -228,6 +242,19 @@ func (c TemplateCheck) Check(ctx context.Context, _ string, rule parser.Rule, _
}
}

labelNames := getTemplateLabels(annotation.Key.Value, annotation.Value.Value)
if len(labelNames) > 0 && len(vectors) == 0 {
for _, name := range labelNames {
problems = append(problems, Problem{
Fragment: fmt.Sprintf("%s: %s", annotation.Key.Value, annotation.Value.Value),
Lines: mergeLines(annotation.Lines(), rule.AlertingRule.Expr.Lines()),
Reporter: c.Reporter(),
Text: fmt.Sprintf("template is using %q label but the query doesn't produce any labels", name),
Severity: Bug,
})
}
}

if hasValue(annotation.Key.Value, annotation.Value.Value) && !hasHumanize(annotation.Key.Value, annotation.Value.Value) {
for _, problem := range c.checkHumanizeIsNeeded(rule.AlertingRule.Expr.Query) {
problems = append(problems, Problem{
Expand Down Expand Up @@ -458,24 +485,50 @@ func getVariables(node parse.Node) (vars [][]string) {
return vars
}

func checkMetricLabels(msg, name, text string, metricLabels []string, excludeLabels bool, safeLabels []string) (msgs []string) {
func findTemplateVariables(name, text string) (vars [][]string, aliases aliasMap, ok bool) {
t, err := textTemplate.
New(name).
Funcs(templateFuncMap).
Option("missingkey=zero").
Parse(strings.Join(append(templateDefs, text), ""))
if err != nil {
// no need to double report errors
return nil
return vars, aliases, false
}

aliases := aliasMap{aliases: map[string]map[string]struct{}{}}
vars := [][]string{}
aliases.aliases = map[string]map[string]struct{}{}
for _, node := range t.Root.Nodes {
getAliases(node, &aliases)
vars = append(vars, getVariables(node)...)
}

return vars, aliases, true
}

func getTemplateLabels(name, text string) (names []string) {
vars, aliases, ok := findTemplateVariables(name, text)
if !ok {
return nil
}

labelsAliases := aliases.varAliases(".Labels")
for _, v := range vars {
for _, a := range labelsAliases {
if len(v) > 1 && v[0] == a {
names = append(names, v[1])
}
}
}

return names
}

func checkMetricLabels(msg, name, text string, metricLabels []string, excludeLabels bool, safeLabels []string) (msgs []string) {
vars, aliases, ok := findTemplateVariables(name, text)
if !ok {
return nil
}

done := map[string]struct{}{}
labelsAliases := aliases.varAliases(".Labels")
for _, v := range vars {
Expand Down
58 changes: 58 additions & 0 deletions internal/checks/alerts_template_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1017,6 +1017,64 @@ func TestTemplateCheck(t *testing.T) {
prometheus: noProm,
problems: noProblems,
},
{
description: "annotation label from vector(0)",
content: "- alert: DeadMansSwitch\n expr: vector(1)\n annotations:\n summary: 'Deadmans switch on {{ $labels.instance }} is firing'\n",
checker: newTemplateCheck,
prometheus: noProm,
problems: func(uri string) []checks.Problem {
return []checks.Problem{
{
Fragment: `summary: Deadmans switch on {{ $labels.instance }} is firing`,
Lines: []int{2, 4},
Reporter: checks.TemplateCheckName,
Text: `template is using "instance" label but the query doesn't produce any labels`,
Severity: checks.Bug,
},
}
},
},
{
description: "labels label from vector(0)",
content: "- alert: DeadMansSwitch\n expr: vector(1)\n labels:\n summary: 'Deadmans switch on {{ $labels.instance }} is firing'\n",
checker: newTemplateCheck,
prometheus: noProm,
problems: func(uri string) []checks.Problem {
return []checks.Problem{
{
Fragment: `summary: Deadmans switch on {{ $labels.instance }} is firing`,
Lines: []int{2, 4},
Reporter: checks.TemplateCheckName,
Text: `template is using "instance" label but the query doesn't produce any labels`,
Severity: checks.Bug,
},
}
},
},
{
description: "annotation label from number",
content: "- alert: DeadMansSwitch\n expr: 1 > bool 0\n annotations:\n summary: 'Deadmans switch on {{ $labels.instance }} / {{ $labels.job }} is firing'\n",
checker: newTemplateCheck,
prometheus: noProm,
problems: func(uri string) []checks.Problem {
return []checks.Problem{
{
Fragment: `summary: Deadmans switch on {{ $labels.instance }} / {{ $labels.job }} is firing`,
Lines: []int{2, 4},
Reporter: checks.TemplateCheckName,
Text: `template is using "instance" label but the query doesn't produce any labels`,
Severity: checks.Bug,
},
{
Fragment: `summary: Deadmans switch on {{ $labels.instance }} / {{ $labels.job }} is firing`,
Lines: []int{2, 4},
Reporter: checks.TemplateCheckName,
Text: `template is using "job" label but the query doesn't produce any labels`,
Severity: checks.Bug,
},
}
},
},
}
runTests(t, testCases)
}

0 comments on commit 6b679f1

Please sign in to comment.