From 720dee619ce19361fa5528ec924ea9fa689f3979 Mon Sep 17 00:00:00 2001 From: Vaidas Balys Date: Wed, 9 Oct 2024 15:17:24 +0300 Subject: [PATCH] Configure timeouts and resilience. Query timeout per individual GraphQL query. Configure if should attempt to execute as many as possible, or bail out with the first error. Rename "cache on error" config variable for better clarity. --- config_example.json | 3 ++- internal/config/config.go | 15 ++++++++------- internal/prometheus/prometheus.go | 18 +++++++++++++----- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/config_example.json b/config_example.json index 58637b6..6139e77 100644 --- a/config_example.json +++ b/config_example.json @@ -4,7 +4,8 @@ "graphqlAPIToken": "Token SECRET", "cacheExpire": 0, "queryTimeout": 60, - "retryOnError": false, + "failFast": false, + "extendCacheOnError": false, "queries":[ { "query": "query {device_list {name serial custom_fields}} {{NOW \"-1h\"}}", diff --git a/internal/config/config.go b/internal/config/config.go index 8f1464b..efa86d2 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -8,13 +8,14 @@ import ( ) type Cfg struct { - GraphqlURL string - GraphqlAPIToken string - CacheExpire int64 - QueryTimeout int64 - RetryOnError bool - MetricsPrefix string - Queries []Query + MetricsPrefix string + GraphqlURL string + GraphqlAPIToken string + CacheExpire int64 + QueryTimeout int64 + FailFast bool + ExtendCacheOnError bool + Queries []Query } type Query struct { diff --git a/internal/prometheus/prometheus.go b/internal/prometheus/prometheus.go index 3baafb4..ab1ac2c 100644 --- a/internal/prometheus/prometheus.go +++ b/internal/prometheus/prometheus.go @@ -122,13 +122,21 @@ func (collector *GraphqlCollector) getMetrics() ([]Metric, error) { result, err := graphql.GraphqlQuery(ctx, q.Query) cancel() if err != nil { - slog.Error(fmt.Sprintf("query error: %s", err)) - continue + if config.Config.FailFast { + return nil, err + } else { + slog.Error(fmt.Sprintf("query error: %s", err)) + continue + } } err = json.Unmarshal(result, &gql) if err != nil { - slog.Error(fmt.Sprintf("unmarshal error: %s", err)) - continue + if config.Config.FailFast { + return nil, err + } else { + slog.Error(fmt.Sprintf("unmarshal error: %s", err)) + continue + } } data := gql.Data.(map[string]interface{}) for _, m := range q.Metrics { @@ -167,7 +175,7 @@ func (collector *GraphqlCollector) updateMetrics() error { defer collector.accessMu.Unlock() if err != nil { slog.Error(fmt.Sprintf("error collecting metrics: %s", err)) - if !config.Config.RetryOnError { + if config.Config.ExtendCacheOnError { collector.cachedAt = time.Now().Unix() } return err