diff --git a/internal/metrics/options.go b/internal/metrics/options.go index f274582f459..5b9ca961d5d 100644 --- a/internal/metrics/options.go +++ b/internal/metrics/options.go @@ -5,7 +5,7 @@ package metrics -// Options encode changes to the options passed to a Metric at creation time. +// MetricOption encode changes to the options passed to a Metric at creation time. type MetricOption func(*MetricOptions) type MetricOptions struct { diff --git a/internal/metrics/register.go b/internal/metrics/register.go index 9c8abdf479d..e6332aeb8c9 100644 --- a/internal/metrics/register.go +++ b/internal/metrics/register.go @@ -12,6 +12,8 @@ import ( "net/http" "time" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp" @@ -19,9 +21,6 @@ import ( "go.opentelemetry.io/otel/sdk/metric" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promhttp" - "github.com/envoyproxy/gateway/api/v1alpha1" "github.com/envoyproxy/gateway/internal/envoygateway/config" ) diff --git a/internal/provider/kubernetes/metrics.go b/internal/provider/kubernetes/metrics.go new file mode 100644 index 00000000000..236aa150e1e --- /dev/null +++ b/internal/provider/kubernetes/metrics.go @@ -0,0 +1,19 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package kubernetes + +import "github.com/envoyproxy/gateway/internal/metrics" + +var ( + statusUpdateTotal = metrics.NewCounter("status_update_total", "Total number of status updates by object kind.") + statusUpdateFailed = metrics.NewCounter("status_update_failed_total", "Number of status updates that failed by object kind.") + statusUpdateConflict = metrics.NewCounter("status_update_conflict_total", "Number of status update conflicts encountered by object kind.") + statusUpdateSuccess = metrics.NewCounter("status_update_success_total", "Number of status updates that succeeded by object kind.") + statusUpdateNoop = metrics.NewCounter("status_update_noop_total", "Number of status updates that are no-ops by object kind. This is a subset of successful status updates.") + statusUpdateDurationSeconds = metrics.NewHistogram("status_update_duration_seconds", "How long a status update takes to finish.", []float64{0.001, 0.01, 0.1, 1, 5, 10}) + + kindLabel = metrics.NewLabel("kind") +) diff --git a/internal/provider/kubernetes/status_updater.go b/internal/provider/kubernetes/status_updater.go index 1b25cfbf489..cce15c1b0ca 100644 --- a/internal/provider/kubernetes/status_updater.go +++ b/internal/provider/kubernetes/status_updater.go @@ -7,6 +7,7 @@ package kubernetes import ( "context" + "time" "github.com/go-logr/logr" "github.com/google/go-cmp/cmp" @@ -21,6 +22,7 @@ import ( gwapiv1a3 "sigs.k8s.io/gateway-api/apis/v1alpha3" egv1a1 "github.com/envoyproxy/gateway/api/v1alpha1" + "github.com/envoyproxy/gateway/internal/gatewayapi" ) // Update contains an all the information needed to update an object's status. @@ -66,9 +68,26 @@ func NewUpdateHandler(log logr.Logger, client client.Client) *UpdateHandler { } func (u *UpdateHandler) apply(update Update) { - if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error { - obj := update.Resource + var ( + startTime = time.Now() + obj = update.Resource + objKind = kindOf(obj) + ) + statusUpdateTotal.With(kindLabel.Value(objKind)).Increment() + + defer func() { + updateDuration := time.Since(startTime) + statusUpdateDurationSeconds.With(kindLabel.Value(objKind)).Record(updateDuration.Seconds()) + }() + + if err := retry.OnError(retry.DefaultBackoff, func(err error) bool { + if kerrors.IsConflict(err) { + statusUpdateConflict.With(kindLabel.Value(objKind)).Increment() + return true + } + return false + }, func() error { // Get the resource. if err := u.client.Get(context.Background(), update.NamespacedName, obj); err != nil { if kerrors.IsNotFound(err) { @@ -83,6 +102,8 @@ func (u *UpdateHandler) apply(update Update) { u.log.WithName(update.NamespacedName.Name). WithName(update.NamespacedName.Namespace). Info("status unchanged, bypassing update") + + statusUpdateNoop.With(kindLabel.Value(objKind)).Increment() return nil } @@ -92,6 +113,10 @@ func (u *UpdateHandler) apply(update Update) { }); err != nil { u.log.Error(err, "unable to update status", "name", update.NamespacedName.Name, "namespace", update.NamespacedName.Namespace) + + statusUpdateFailed.With(kindLabel.Value(objKind)).Increment() + } else { + statusUpdateSuccess.With(kindLabel.Value(objKind)).Increment() } } @@ -162,8 +187,10 @@ func (u *UpdateWriter) Send(update Update) { // GRPCRoute // EnvoyPatchPolicy // ClientTrafficPolicy +// BackendTrafficPolicy // SecurityPolicy // BackendTLSPolicy +// EnvoyExtensionPolicy func isStatusEqual(objA, objB interface{}) bool { opts := cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime") switch a := objA.(type) { @@ -233,7 +260,7 @@ func isStatusEqual(objA, objB interface{}) bool { return true } } - case gwapiv1a3.BackendTLSPolicy: + case *gwapiv1a3.BackendTLSPolicy: if b, ok := objB.(*gwapiv1a3.BackendTLSPolicy); ok { if cmp.Equal(a.Status, b.Status, opts) { return true @@ -248,3 +275,57 @@ func isStatusEqual(objA, objB interface{}) bool { } return false } + +// kindOf returns the known kind string for the given Kubernetes object, +// returns Unknown for the unsupported object. +// +// Supported objects: +// +// GatewayClasses +// Gateway +// HTTPRoute +// TLSRoute +// TCPRoute +// UDPRoute +// GRPCRoute +// EnvoyPatchPolicy +// ClientTrafficPolicy +// BackendTrafficPolicy +// SecurityPolicy +// BackendTLSPolicy +// EnvoyExtensionPolicy +func kindOf(obj interface{}) string { + var kind string + switch obj.(type) { + case *gwapiv1.GatewayClass: + kind = gatewayapi.KindGatewayClass + case *gwapiv1.Gateway: + kind = gatewayapi.KindGateway + case *gwapiv1.HTTPRoute: + kind = gatewayapi.KindHTTPRoute + case *gwapiv1a2.TLSRoute: + kind = gatewayapi.KindTLSRoute + case *gwapiv1a2.TCPRoute: + kind = gatewayapi.KindTCPRoute + case *gwapiv1a2.UDPRoute: + kind = gatewayapi.KindUDPRoute + case *gwapiv1.GRPCRoute: + kind = gatewayapi.KindGRPCRoute + case *egv1a1.EnvoyPatchPolicy: + kind = gatewayapi.KindEnvoyPatchPolicy + case *egv1a1.ClientTrafficPolicy: + kind = gatewayapi.KindClientTrafficPolicy + case *egv1a1.BackendTrafficPolicy: + kind = gatewayapi.KindBackendTrafficPolicy + case *egv1a1.SecurityPolicy: + kind = gatewayapi.KindSecurityPolicy + case *egv1a1.EnvoyExtensionPolicy: + kind = gatewayapi.KindEnvoyExtensionPolicy + case *gwapiv1a3.BackendTLSPolicy: + kind = gatewayapi.KindBackendTLSPolicy + default: + kind = "Unknown" + } + + return kind +}