diff --git a/cmd/pint/tests/0080_lint_online.txt b/cmd/pint/tests/0080_lint_online.txt index 02733e2a..321a7d78 100644 --- a/cmd/pint/tests/0080_lint_online.txt +++ b/cmd/pint/tests/0080_lint_online.txt @@ -12,6 +12,7 @@ cmp stderr stderr.txt -- stderr.txt -- level=info msg="Loading configuration file" path=.pint.hcl level=warn msg="No results for Prometheus uptime metric, you might have set uptime config option to a missing metric, please check your config" metric=prometheus_ready name=prom1 +level=warn msg="Using dummy Prometheus uptime metric results with no gaps" metric=prometheus_ready name=prom1 rules/1.yml:2 Warning: http_errors_total[2d] selector is trying to query Prometheus for 2d worth of metrics, but prometheus "prom1" at http://127.0.0.1:7080 is configured to only keep 1d of metrics history (promql/range_query) 2 | expr: rate(http_errors_total[2d]) > 0 diff --git a/docs/changelog.md b/docs/changelog.md index 398ca012..78de0fa2 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,5 +1,12 @@ # Changelog +## v0.44.2 + +### Fixed + +- Fixed a crash in `promql/series` check when Prometheus instance becomes + unavailable - #682. + ## v0.44.1 ### Fixed diff --git a/internal/checks/promql_series.go b/internal/checks/promql_series.go index 83de944f..e8423287 100644 --- a/internal/checks/promql_series.go +++ b/internal/checks/promql_series.go @@ -195,6 +195,27 @@ func (c SeriesCheck) Check(ctx context.Context, _ string, rule parser.Rule, entr Str("metric", c.prom.UptimeMetric()). Msg("No results for Prometheus uptime metric, you might have set uptime config option to a missing metric, please check your config") } + if promUptime == nil || promUptime.Series.Ranges.Len() == 0 { + log.Warn(). + Str("name", c.prom.Name()). + Str("metric", c.prom.UptimeMetric()). + Msg("Using dummy Prometheus uptime metric results with no gaps") + promUptime = &promapi.RangeQueryResult{ + Series: promapi.SeriesTimeRanges{ + From: params.Start(), + Until: params.End(), + Step: params.Step(), + Ranges: promapi.MetricTimeRanges{ + { + Fingerprint: 0, + Labels: labels.Labels{}, + Start: params.Start(), + End: params.End(), + }, + }, + }, + } + } bareSelector := stripLabels(selector) diff --git a/internal/checks/promql_series_test.go b/internal/checks/promql_series_test.go index bbc78e22..517d933f 100644 --- a/internal/checks/promql_series_test.go +++ b/internal/checks/promql_series_test.go @@ -1060,6 +1060,131 @@ func TestSeriesCheck(t *testing.T) { }, }, }, + { + description: "#3 metric present once with labels, failed baseline query", + content: "- record: foo\n expr: sum(found{job=\"abc\", cluster=\"dev\"})\n", + checker: newSeriesCheck, + prometheus: newSimpleProm, + problems: func(uri string) []checks.Problem { + return []checks.Problem{ + { + Fragment: `found`, + Lines: []int{2}, + Reporter: checks.SeriesCheckName, + Text: seriesSometimesText("prom", uri, `found`, "1w", "1d5m"), + Severity: checks.Warning, + }, + } + }, + mocks: []*prometheusMock{ + { + conds: []requestCondition{ + requireQueryPath, + formCond{key: "query", value: `count(found{cluster="dev",job="abc"})`}, + }, + resp: respondWithEmptyVector(), + }, + { + conds: []requestCondition{ + requireRangeQueryPath, + formCond{key: "query", value: `count(found)`}, + }, + resp: matrixResponse{ + samples: []*model.SampleStream{ + generateSampleStream( + map[string]string{}, + time.Now().Add(time.Hour*24*-5), + time.Now().Add(time.Hour*24*-4), + time.Minute*5, + ), + }, + }, + }, + { + conds: []requestCondition{ + requireRangeQueryPath, + formCond{key: "query", value: `count(found{job="abc"})`}, + }, + resp: matrixResponse{ + samples: []*model.SampleStream{ + generateSampleStream( + map[string]string{}, + time.Now().Add(time.Hour*24*-5), + time.Now().Add(time.Hour*24*-5), + time.Minute*5, + ), + }, + }, + }, + { + conds: []requestCondition{ + requireRangeQueryPath, + formCond{key: "query", value: `count(found{cluster="dev"})`}, + }, + resp: matrixResponse{ + samples: []*model.SampleStream{ + generateSampleStream( + map[string]string{}, + time.Now().Add(time.Hour*24*-5), + time.Now().Add(time.Hour*24*-5).Add(time.Minute*10), + time.Minute*5, + ), + }, + }, + }, + { + conds: []requestCondition{ + requireRangeQueryPath, + formCond{key: "query", value: `absent(found{job=~".+"})`}, + }, + resp: matrixResponse{ + samples: []*model.SampleStream{ + generateSampleStream( + map[string]string{}, + time.Now().Add(time.Hour*24*-7), + time.Now().Add(time.Hour*24*-5).Add(time.Minute*-10), + time.Minute*5, + ), + generateSampleStream( + map[string]string{}, + time.Now().Add(time.Hour*24*-5).Add(time.Minute*5), + time.Now(), + time.Minute*5, + ), + }, + }, + }, + { + conds: []requestCondition{ + requireRangeQueryPath, + formCond{key: "query", value: `absent(found{cluster=~".+"})`}, + }, + resp: matrixResponse{ + samples: []*model.SampleStream{ + generateSampleStream( + map[string]string{}, + time.Now().Add(time.Hour*24*-7), + time.Now().Add(time.Hour*24*-5).Add(time.Minute*-10), + time.Minute*5, + ), + generateSampleStream( + map[string]string{}, + time.Now().Add(time.Hour*24*-5).Add(time.Minute*10), + time.Now(), + time.Minute*5, + ), + }, + }, + }, + { + conds: []requestCondition{ + requireRangeQueryPath, + formCond{key: "query", value: "count(up)"}, + }, + resp: respondWithInternalError(), + }, + }, + }, { description: "#4 metric was present but disappeared 50m ago", content: "- record: foo\n expr: sum(found{job=\"foo\", instance=\"bar\"})\n", @@ -3024,6 +3149,46 @@ func TestSeriesCheck(t *testing.T) { }, }, }, + { + description: "series missing, failed baseline query", + content: "- record: foo\n expr: count(notfound) == 0\n", + checker: newSeriesCheck, + prometheus: newSimpleProm, + problems: func(uri string) []checks.Problem { + return []checks.Problem{ + { + Fragment: `notfound`, + Lines: []int{2}, + Reporter: checks.SeriesCheckName, + Text: noSeriesText("prom", uri, "notfound", "1w"), + Severity: checks.Bug, + }, + } + }, + mocks: []*prometheusMock{ + { + conds: []requestCondition{ + requireQueryPath, + formCond{key: "query", value: `count(notfound)`}, + }, + resp: respondWithEmptyVector(), + }, + { + conds: []requestCondition{ + requireRangeQueryPath, + formCond{key: "query", value: `count(notfound)`}, + }, + resp: respondWithEmptyMatrix(), + }, + { + conds: []requestCondition{ + requireRangeQueryPath, + formCond{key: "query", value: "count(up)"}, + }, + resp: respondWithInternalError(), + }, + }, + }, } runTests(t, testCases) }