Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't fail when baseline query fails #684

Merged
merged 1 commit into from
Jul 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/pint/tests/0080_lint_online.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ cmp stderr stderr.txt
-- stderr.txt --
level=info msg="Loading configuration file" path=.pint.hcl
level=warn msg="No results for Prometheus uptime metric, you might have set uptime config option to a missing metric, please check your config" metric=prometheus_ready name=prom1
level=warn msg="Using dummy Prometheus uptime metric results with no gaps" metric=prometheus_ready name=prom1
rules/1.yml:2 Warning: http_errors_total[2d] selector is trying to query Prometheus for 2d worth of metrics, but prometheus "prom1" at http://127.0.0.1:7080 is configured to only keep 1d of metrics history (promql/range_query)
2 | expr: rate(http_errors_total[2d]) > 0

Expand Down
7 changes: 7 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

## v0.44.2

### Fixed

- Fixed a crash in `promql/series` check when Prometheus instance becomes
unavailable - #682.

## v0.44.1

### Fixed
Expand Down
21 changes: 21 additions & 0 deletions internal/checks/promql_series.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,27 @@ func (c SeriesCheck) Check(ctx context.Context, _ string, rule parser.Rule, entr
Str("metric", c.prom.UptimeMetric()).
Msg("No results for Prometheus uptime metric, you might have set uptime config option to a missing metric, please check your config")
}
if promUptime == nil || promUptime.Series.Ranges.Len() == 0 {
log.Warn().
Str("name", c.prom.Name()).
Str("metric", c.prom.UptimeMetric()).
Msg("Using dummy Prometheus uptime metric results with no gaps")
promUptime = &promapi.RangeQueryResult{
Series: promapi.SeriesTimeRanges{
From: params.Start(),
Until: params.End(),
Step: params.Step(),
Ranges: promapi.MetricTimeRanges{
{
Fingerprint: 0,
Labels: labels.Labels{},
Start: params.Start(),
End: params.End(),
},
},
},
}
}

bareSelector := stripLabels(selector)

Expand Down
165 changes: 165 additions & 0 deletions internal/checks/promql_series_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1060,6 +1060,131 @@ func TestSeriesCheck(t *testing.T) {
},
},
},
{
description: "#3 metric present once with labels, failed baseline query",
content: "- record: foo\n expr: sum(found{job=\"abc\", cluster=\"dev\"})\n",
checker: newSeriesCheck,
prometheus: newSimpleProm,
problems: func(uri string) []checks.Problem {
return []checks.Problem{
{
Fragment: `found`,
Lines: []int{2},
Reporter: checks.SeriesCheckName,
Text: seriesSometimesText("prom", uri, `found`, "1w", "1d5m"),
Severity: checks.Warning,
},
}
},
mocks: []*prometheusMock{
{
conds: []requestCondition{
requireQueryPath,
formCond{key: "query", value: `count(found{cluster="dev",job="abc"})`},
},
resp: respondWithEmptyVector(),
},
{
conds: []requestCondition{
requireRangeQueryPath,
formCond{key: "query", value: `count(found)`},
},
resp: matrixResponse{
samples: []*model.SampleStream{
generateSampleStream(
map[string]string{},
time.Now().Add(time.Hour*24*-5),
time.Now().Add(time.Hour*24*-4),
time.Minute*5,
),
},
},
},
{
conds: []requestCondition{
requireRangeQueryPath,
formCond{key: "query", value: `count(found{job="abc"})`},
},
resp: matrixResponse{
samples: []*model.SampleStream{
generateSampleStream(
map[string]string{},
time.Now().Add(time.Hour*24*-5),
time.Now().Add(time.Hour*24*-5),
time.Minute*5,
),
},
},
},
{
conds: []requestCondition{
requireRangeQueryPath,
formCond{key: "query", value: `count(found{cluster="dev"})`},
},
resp: matrixResponse{
samples: []*model.SampleStream{
generateSampleStream(
map[string]string{},
time.Now().Add(time.Hour*24*-5),
time.Now().Add(time.Hour*24*-5).Add(time.Minute*10),
time.Minute*5,
),
},
},
},
{
conds: []requestCondition{
requireRangeQueryPath,
formCond{key: "query", value: `absent(found{job=~".+"})`},
},
resp: matrixResponse{
samples: []*model.SampleStream{
generateSampleStream(
map[string]string{},
time.Now().Add(time.Hour*24*-7),
time.Now().Add(time.Hour*24*-5).Add(time.Minute*-10),
time.Minute*5,
),
generateSampleStream(
map[string]string{},
time.Now().Add(time.Hour*24*-5).Add(time.Minute*5),
time.Now(),
time.Minute*5,
),
},
},
},
{
conds: []requestCondition{
requireRangeQueryPath,
formCond{key: "query", value: `absent(found{cluster=~".+"})`},
},
resp: matrixResponse{
samples: []*model.SampleStream{
generateSampleStream(
map[string]string{},
time.Now().Add(time.Hour*24*-7),
time.Now().Add(time.Hour*24*-5).Add(time.Minute*-10),
time.Minute*5,
),
generateSampleStream(
map[string]string{},
time.Now().Add(time.Hour*24*-5).Add(time.Minute*10),
time.Now(),
time.Minute*5,
),
},
},
},
{
conds: []requestCondition{
requireRangeQueryPath,
formCond{key: "query", value: "count(up)"},
},
resp: respondWithInternalError(),
},
},
},
{
description: "#4 metric was present but disappeared 50m ago",
content: "- record: foo\n expr: sum(found{job=\"foo\", instance=\"bar\"})\n",
Expand Down Expand Up @@ -3024,6 +3149,46 @@ func TestSeriesCheck(t *testing.T) {
},
},
},
{
description: "series missing, failed baseline query",
content: "- record: foo\n expr: count(notfound) == 0\n",
checker: newSeriesCheck,
prometheus: newSimpleProm,
problems: func(uri string) []checks.Problem {
return []checks.Problem{
{
Fragment: `notfound`,
Lines: []int{2},
Reporter: checks.SeriesCheckName,
Text: noSeriesText("prom", uri, "notfound", "1w"),
Severity: checks.Bug,
},
}
},
mocks: []*prometheusMock{
{
conds: []requestCondition{
requireQueryPath,
formCond{key: "query", value: `count(notfound)`},
},
resp: respondWithEmptyVector(),
},
{
conds: []requestCondition{
requireRangeQueryPath,
formCond{key: "query", value: `count(notfound)`},
},
resp: respondWithEmptyMatrix(),
},
{
conds: []requestCondition{
requireRangeQueryPath,
formCond{key: "query", value: "count(up)"},
},
resp: respondWithInternalError(),
},
},
},
}
runTests(t, testCases)
}