diff --git a/core/cmd/shell.go b/core/cmd/shell.go index 966fa1a0ff8..829ed9d55ce 100644 --- a/core/cmd/shell.go +++ b/core/cmd/shell.go @@ -51,6 +51,7 @@ import ( "github.com/smartcontractkit/chainlink/v2/core/services/relay/evm/mercury/wsrpc/cache" "github.com/smartcontractkit/chainlink/v2/core/services/versioning" "github.com/smartcontractkit/chainlink/v2/core/services/webhook" + "github.com/smartcontractkit/chainlink/v2/core/services/workflows" "github.com/smartcontractkit/chainlink/v2/core/sessions" "github.com/smartcontractkit/chainlink/v2/core/static" "github.com/smartcontractkit/chainlink/v2/core/store/migrate" @@ -109,6 +110,10 @@ func initGlobals(cfgProm config.Prometheus, cfgTracing config.Tracing, cfgTeleme AuthPublicKeyHex: csaPubKeyHex, AuthHeaders: beholderAuthHeaders, } + // note: due to the OTEL specification, all histogram buckets + // must be defined when the beholder client is created + clientCfg.MetricViews = append(clientCfg.MetricViews, workflows.MetricViews()...) + if tracingCfg.Enabled { clientCfg.TraceSpanExporter, err = tracingCfg.NewSpanExporter() if err != nil { diff --git a/core/services/workflows/monitoring.go b/core/services/workflows/monitoring.go index d498ff354c9..f4e993d8c6f 100644 --- a/core/services/workflows/monitoring.go +++ b/core/services/workflows/monitoring.go @@ -5,6 +5,7 @@ import ( "fmt" "go.opentelemetry.io/otel/metric" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" "github.com/smartcontractkit/chainlink-common/pkg/beholder" "github.com/smartcontractkit/chainlink-common/pkg/metrics" @@ -53,6 +54,37 @@ func initMonitoringResources() (err error) { return nil } +// Note: due to the OTEL specification, all histogram buckets +// Must be defined when the beholder client is created +func MetricViews() []sdkmetric.View { + return []sdkmetric.View{ + sdkmetric.NewView( + sdkmetric.Instrument{Name: "platform_engine_workflow_earlyexit_time_seconds"}, + sdkmetric.Stream{Aggregation: sdkmetric.AggregationExplicitBucketHistogram{ + Boundaries: []float64{0, 1, 10, 100}, + }}, + ), + sdkmetric.NewView( + sdkmetric.Instrument{Name: "platform_engine_workflow_completed_time_seconds"}, + sdkmetric.Stream{Aggregation: sdkmetric.AggregationExplicitBucketHistogram{ + Boundaries: []float64{0, 100, 1000, 10_000, 50_000, 100_0000, 500_000}, + }}, + ), + sdkmetric.NewView( + sdkmetric.Instrument{Name: "platform_engine_workflow_error_time_seconds"}, + sdkmetric.Stream{Aggregation: sdkmetric.AggregationExplicitBucketHistogram{ + Boundaries: []float64{0, 20, 60, 120, 240}, + }}, + ), + sdkmetric.NewView( + sdkmetric.Instrument{Name: "platform_engine_workflow_step_time_seconds"}, + sdkmetric.Stream{Aggregation: sdkmetric.AggregationExplicitBucketHistogram{ + Boundaries: []float64{0, 20, 60, 120, 240}, + }}, + ), + } +} + // workflowsMetricLabeler wraps monitoring.MetricsLabeler to provide workflow specific utilities // for monitoring resources type workflowsMetricLabeler struct { diff --git a/go.mod b/go.mod index e80ee7a7ba2..f532f3d9a6f 100644 --- a/go.mod +++ b/go.mod @@ -104,6 +104,7 @@ require ( go.opentelemetry.io/contrib/instrumentation/github.com/gin-gonic/gin/otelgin v0.49.0 go.opentelemetry.io/otel v1.31.0 go.opentelemetry.io/otel/metric v1.31.0 + go.opentelemetry.io/otel/sdk/metric v1.31.0 go.opentelemetry.io/otel/trace v1.31.0 go.uber.org/multierr v1.11.0 go.uber.org/zap v1.27.0 @@ -361,7 +362,6 @@ require ( go.opentelemetry.io/otel/log v0.6.0 // indirect go.opentelemetry.io/otel/sdk v1.31.0 // indirect go.opentelemetry.io/otel/sdk/log v0.6.0 // indirect - go.opentelemetry.io/otel/sdk/metric v1.31.0 // indirect go.opentelemetry.io/proto/otlp v1.3.1 // indirect go.uber.org/ratelimit v0.3.0 // indirect golang.org/x/arch v0.11.0 // indirect