Skip to content

Commit

Permalink
Merge pull request #684 from cloudflare/issue-682
Browse files Browse the repository at this point in the history
Don't fail when baseline query fails
  • Loading branch information
prymitive authored Jul 31, 2023
2 parents 8da28d6 + 7a9ff13 commit 7ada3f9
Show file tree
Hide file tree
Showing 4 changed files with 194 additions and 0 deletions.
1 change: 1 addition & 0 deletions cmd/pint/tests/0080_lint_online.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ cmp stderr stderr.txt
-- stderr.txt --
level=info msg="Loading configuration file" path=.pint.hcl
level=warn msg="No results for Prometheus uptime metric, you might have set uptime config option to a missing metric, please check your config" metric=prometheus_ready name=prom1
level=warn msg="Using dummy Prometheus uptime metric results with no gaps" metric=prometheus_ready name=prom1
rules/1.yml:2 Warning: http_errors_total[2d] selector is trying to query Prometheus for 2d worth of metrics, but prometheus "prom1" at http://127.0.0.1:7080 is configured to only keep 1d of metrics history (promql/range_query)
2 | expr: rate(http_errors_total[2d]) > 0

Expand Down
7 changes: 7 additions & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

## v0.44.2

### Fixed

- Fixed a crash in `promql/series` check when Prometheus instance becomes
unavailable - #682.

## v0.44.1

### Fixed
Expand Down
21 changes: 21 additions & 0 deletions internal/checks/promql_series.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,27 @@ func (c SeriesCheck) Check(ctx context.Context, _ string, rule parser.Rule, entr
Str("metric", c.prom.UptimeMetric()).
Msg("No results for Prometheus uptime metric, you might have set uptime config option to a missing metric, please check your config")
}
if promUptime == nil || promUptime.Series.Ranges.Len() == 0 {
log.Warn().
Str("name", c.prom.Name()).
Str("metric", c.prom.UptimeMetric()).
Msg("Using dummy Prometheus uptime metric results with no gaps")
promUptime = &promapi.RangeQueryResult{
Series: promapi.SeriesTimeRanges{
From: params.Start(),
Until: params.End(),
Step: params.Step(),
Ranges: promapi.MetricTimeRanges{
{
Fingerprint: 0,
Labels: labels.Labels{},
Start: params.Start(),
End: params.End(),
},
},
},
}
}

bareSelector := stripLabels(selector)

Expand Down
165 changes: 165 additions & 0 deletions internal/checks/promql_series_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1060,6 +1060,131 @@ func TestSeriesCheck(t *testing.T) {
},
},
},
{
description: "#3 metric present once with labels, failed baseline query",
content: "- record: foo\n expr: sum(found{job=\"abc\", cluster=\"dev\"})\n",
checker: newSeriesCheck,
prometheus: newSimpleProm,
problems: func(uri string) []checks.Problem {
return []checks.Problem{
{
Fragment: `found`,
Lines: []int{2},
Reporter: checks.SeriesCheckName,
Text: seriesSometimesText("prom", uri, `found`, "1w", "1d5m"),
Severity: checks.Warning,
},
}
},
mocks: []*prometheusMock{
{
conds: []requestCondition{
requireQueryPath,
formCond{key: "query", value: `count(found{cluster="dev",job="abc"})`},
},
resp: respondWithEmptyVector(),
},
{
conds: []requestCondition{
requireRangeQueryPath,
formCond{key: "query", value: `count(found)`},
},
resp: matrixResponse{
samples: []*model.SampleStream{
generateSampleStream(
map[string]string{},
time.Now().Add(time.Hour*24*-5),
time.Now().Add(time.Hour*24*-4),
time.Minute*5,
),
},
},
},
{
conds: []requestCondition{
requireRangeQueryPath,
formCond{key: "query", value: `count(found{job="abc"})`},
},
resp: matrixResponse{
samples: []*model.SampleStream{
generateSampleStream(
map[string]string{},
time.Now().Add(time.Hour*24*-5),
time.Now().Add(time.Hour*24*-5),
time.Minute*5,
),
},
},
},
{
conds: []requestCondition{
requireRangeQueryPath,
formCond{key: "query", value: `count(found{cluster="dev"})`},
},
resp: matrixResponse{
samples: []*model.SampleStream{
generateSampleStream(
map[string]string{},
time.Now().Add(time.Hour*24*-5),
time.Now().Add(time.Hour*24*-5).Add(time.Minute*10),
time.Minute*5,
),
},
},
},
{
conds: []requestCondition{
requireRangeQueryPath,
formCond{key: "query", value: `absent(found{job=~".+"})`},
},
resp: matrixResponse{
samples: []*model.SampleStream{
generateSampleStream(
map[string]string{},
time.Now().Add(time.Hour*24*-7),
time.Now().Add(time.Hour*24*-5).Add(time.Minute*-10),
time.Minute*5,
),
generateSampleStream(
map[string]string{},
time.Now().Add(time.Hour*24*-5).Add(time.Minute*5),
time.Now(),
time.Minute*5,
),
},
},
},
{
conds: []requestCondition{
requireRangeQueryPath,
formCond{key: "query", value: `absent(found{cluster=~".+"})`},
},
resp: matrixResponse{
samples: []*model.SampleStream{
generateSampleStream(
map[string]string{},
time.Now().Add(time.Hour*24*-7),
time.Now().Add(time.Hour*24*-5).Add(time.Minute*-10),
time.Minute*5,
),
generateSampleStream(
map[string]string{},
time.Now().Add(time.Hour*24*-5).Add(time.Minute*10),
time.Now(),
time.Minute*5,
),
},
},
},
{
conds: []requestCondition{
requireRangeQueryPath,
formCond{key: "query", value: "count(up)"},
},
resp: respondWithInternalError(),
},
},
},
{
description: "#4 metric was present but disappeared 50m ago",
content: "- record: foo\n expr: sum(found{job=\"foo\", instance=\"bar\"})\n",
Expand Down Expand Up @@ -3024,6 +3149,46 @@ func TestSeriesCheck(t *testing.T) {
},
},
},
{
description: "series missing, failed baseline query",
content: "- record: foo\n expr: count(notfound) == 0\n",
checker: newSeriesCheck,
prometheus: newSimpleProm,
problems: func(uri string) []checks.Problem {
return []checks.Problem{
{
Fragment: `notfound`,
Lines: []int{2},
Reporter: checks.SeriesCheckName,
Text: noSeriesText("prom", uri, "notfound", "1w"),
Severity: checks.Bug,
},
}
},
mocks: []*prometheusMock{
{
conds: []requestCondition{
requireQueryPath,
formCond{key: "query", value: `count(notfound)`},
},
resp: respondWithEmptyVector(),
},
{
conds: []requestCondition{
requireRangeQueryPath,
formCond{key: "query", value: `count(notfound)`},
},
resp: respondWithEmptyMatrix(),
},
{
conds: []requestCondition{
requireRangeQueryPath,
formCond{key: "query", value: "count(up)"},
},
resp: respondWithInternalError(),
},
},
},
}
runTests(t, testCases)
}

0 comments on commit 7ada3f9

Please sign in to comment.