From 6b8794e2e82cdc40fedc955c45a204bc05e7f2fe Mon Sep 17 00:00:00 2001 From: Xunzhuo Date: Thu, 26 Oct 2023 14:08:47 +0800 Subject: [PATCH] feat: add control plane metrics library (#1982) * feat: add control plane metrics library Signed-off-by: bitliu * update Signed-off-by: bitliu * update Signed-off-by: bitliu * rebase Signed-off-by: bitliu * update Signed-off-by: bitliu --------- Signed-off-by: bitliu --- api/v1alpha1/envoygateway_helpers.go | 1 - api/v1alpha1/shared_types.go | 4 + .../validation/envoygateway_validate_test.go | 229 ++++++++++++++++++ .../validation/envoyproxy_validate_test.go | 175 ------------- go.mod | 14 +- go.sum | 26 ++ internal/admin/server.go | 8 +- internal/cmd/server.go | 6 + internal/metrics/metadata.go | 102 ++++++++ internal/metrics/options.go | 31 +++ internal/metrics/otel_label.go | 48 ++++ internal/metrics/otel_metric_counter.go | 48 ++++ internal/metrics/otel_metric_gauge.go | 57 +++++ internal/metrics/otel_metric_histogram.go | 41 ++++ internal/metrics/otel_metric_sink.go | 100 ++++++++ internal/metrics/register.go | 207 ++++++++++++++++ internal/metrics/sample_counter_test.go | 23 ++ internal/metrics/sample_gauge_test.go | 27 +++ internal/metrics/sample_histogram_test.go | 23 ++ internal/metrics/units.go | 18 ++ site/content/en/latest/design/eg-metrics.md | 2 +- 21 files changed, 1008 insertions(+), 182 deletions(-) create mode 100644 internal/metrics/metadata.go create mode 100644 internal/metrics/options.go create mode 100644 internal/metrics/otel_label.go create mode 100644 internal/metrics/otel_metric_counter.go create mode 100644 internal/metrics/otel_metric_gauge.go create mode 100644 internal/metrics/otel_metric_histogram.go create mode 100644 internal/metrics/otel_metric_sink.go create mode 100644 internal/metrics/register.go create mode 100644 internal/metrics/sample_counter_test.go create mode 100644 internal/metrics/sample_gauge_test.go create mode 100644 internal/metrics/sample_histogram_test.go create mode 100644 internal/metrics/units.go diff --git a/api/v1alpha1/envoygateway_helpers.go b/api/v1alpha1/envoygateway_helpers.go index e0352ba6969..ef9ab69706b 100644 --- a/api/v1alpha1/envoygateway_helpers.go +++ b/api/v1alpha1/envoygateway_helpers.go @@ -98,7 +98,6 @@ func (e *EnvoyGateway) GetEnvoyGatewayTelemetry() *EnvoyGatewayTelemetry { if e.Telemetry.Metrics.Prometheus == nil { e.Telemetry.Metrics.Prometheus = DefaultEnvoyGatewayPrometheus() } - if e.Telemetry.Metrics == nil { e.Telemetry.Metrics = DefaultEnvoyGatewayMetrics() } diff --git a/api/v1alpha1/shared_types.go b/api/v1alpha1/shared_types.go index 588eefebb47..bbda8a16ec3 100644 --- a/api/v1alpha1/shared_types.go +++ b/api/v1alpha1/shared_types.go @@ -21,6 +21,10 @@ const ( DefaultEnvoyProxyImage = "envoyproxy/envoy-dev:latest" // DefaultRateLimitImage is the default image used by ratelimit. DefaultRateLimitImage = "envoyproxy/ratelimit:master" + // HTTPProtocol is the common-used http protocol. + HTTPProtocol = "http" + // GRPCProtocol is the common-used grpc protocol. + GRPCProtocol = "grpc" ) // GroupVersionKind unambiguously identifies a Kind. diff --git a/api/v1alpha1/validation/envoygateway_validate_test.go b/api/v1alpha1/validation/envoygateway_validate_test.go index 06a3043be12..1728cb8a058 100644 --- a/api/v1alpha1/validation/envoygateway_validate_test.go +++ b/api/v1alpha1/validation/envoygateway_validate_test.go @@ -8,7 +8,9 @@ package validation import ( "testing" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" v1 "sigs.k8s.io/gateway-api/apis/v1" "github.com/envoyproxy/gateway/api/v1alpha1" @@ -469,3 +471,230 @@ func TestValidateEnvoyGateway(t *testing.T) { }) } } + +func TestEnvoyGateway(t *testing.T) { + envoyGateway := v1alpha1.DefaultEnvoyGateway() + assert.True(t, envoyGateway.Provider != nil) + assert.True(t, envoyGateway.Gateway != nil) + assert.True(t, envoyGateway.Logging != nil) + envoyGateway.SetEnvoyGatewayDefaults() + assert.Equal(t, envoyGateway.Logging, v1alpha1.DefaultEnvoyGatewayLogging()) + + logging := v1alpha1.DefaultEnvoyGatewayLogging() + assert.True(t, logging != nil) + assert.True(t, logging.Level[v1alpha1.LogComponentGatewayDefault] == v1alpha1.LogLevelInfo) + + gatewayLogging := &v1alpha1.EnvoyGatewayLogging{ + Level: logging.Level, + } + gatewayLogging.SetEnvoyGatewayLoggingDefaults() + assert.True(t, gatewayLogging != nil) + assert.True(t, gatewayLogging.Level[v1alpha1.LogComponentGatewayDefault] == v1alpha1.LogLevelInfo) +} + +func TestDefaultEnvoyGatewayLoggingLevel(t *testing.T) { + type args struct { + component string + level v1alpha1.LogLevel + } + tests := []struct { + name string + args args + want v1alpha1.LogLevel + }{ + { + name: "test default info level for empty level", + args: args{component: "", level: ""}, + want: v1alpha1.LogLevelInfo, + }, + { + name: "test default info level for empty level", + args: args{component: string(v1alpha1.LogComponentGatewayDefault), level: ""}, + want: v1alpha1.LogLevelInfo, + }, + { + name: "test default info level for info level", + args: args{component: string(v1alpha1.LogComponentGatewayDefault), level: v1alpha1.LogLevelInfo}, + want: v1alpha1.LogLevelInfo, + }, + { + name: "test default error level for error level", + args: args{component: string(v1alpha1.LogComponentGatewayDefault), level: v1alpha1.LogLevelError}, + want: v1alpha1.LogLevelError, + }, + { + name: "test gateway-api error level for error level", + args: args{component: string(v1alpha1.LogComponentGatewayAPIRunner), level: v1alpha1.LogLevelError}, + want: v1alpha1.LogLevelError, + }, + { + name: "test gateway-api info level for info level", + args: args{component: string(v1alpha1.LogComponentGatewayAPIRunner), level: v1alpha1.LogLevelInfo}, + want: v1alpha1.LogLevelInfo, + }, + { + name: "test default gateway-api warn level for info level", + args: args{component: string(v1alpha1.LogComponentGatewayAPIRunner), level: ""}, + want: v1alpha1.LogLevelInfo, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + logging := &v1alpha1.EnvoyGatewayLogging{} + if got := logging.DefaultEnvoyGatewayLoggingLevel(tt.args.level); got != tt.want { + t.Errorf("defaultLevel() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestEnvoyGatewayProvider(t *testing.T) { + envoyGateway := &v1alpha1.EnvoyGateway{ + TypeMeta: metav1.TypeMeta{}, + EnvoyGatewaySpec: v1alpha1.EnvoyGatewaySpec{Provider: v1alpha1.DefaultEnvoyGatewayProvider()}, + } + assert.True(t, envoyGateway.Provider != nil) + + envoyGatewayProvider := envoyGateway.GetEnvoyGatewayProvider() + assert.True(t, envoyGatewayProvider.Kubernetes == nil) + assert.Equal(t, envoyGateway.Provider, envoyGatewayProvider) + + envoyGatewayProvider.Kubernetes = v1alpha1.DefaultEnvoyGatewayKubeProvider() + assert.Equal(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment, v1alpha1.DefaultKubernetesDeployment(v1alpha1.DefaultRateLimitImage)) + + envoyGatewayProvider.Kubernetes = &v1alpha1.EnvoyGatewayKubernetesProvider{} + assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment == nil) + + envoyGatewayProvider.Kubernetes = &v1alpha1.EnvoyGatewayKubernetesProvider{ + RateLimitDeployment: &v1alpha1.KubernetesDeploymentSpec{ + Replicas: nil, + Pod: nil, + Container: nil, + }} + assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Replicas == nil) + assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Pod == nil) + assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Container == nil) + envoyGatewayKubeProvider := envoyGatewayProvider.GetEnvoyGatewayKubeProvider() + + envoyGatewayProvider.Kubernetes = &v1alpha1.EnvoyGatewayKubernetesProvider{ + RateLimitDeployment: &v1alpha1.KubernetesDeploymentSpec{ + Replicas: nil, + Pod: nil, + Container: &v1alpha1.KubernetesContainerSpec{ + Resources: nil, + SecurityContext: nil, + Image: nil, + }, + }} + assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Container.Resources == nil) + envoyGatewayProvider.GetEnvoyGatewayKubeProvider() + + assert.True(t, envoyGatewayProvider.Kubernetes != nil) + assert.Equal(t, envoyGatewayProvider.Kubernetes, envoyGatewayKubeProvider) + + assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment != nil) + assert.Equal(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment, v1alpha1.DefaultKubernetesDeployment(v1alpha1.DefaultRateLimitImage)) + assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Replicas != nil) + assert.Equal(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Replicas, v1alpha1.DefaultKubernetesDeploymentReplicas()) + assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Pod != nil) + assert.Equal(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Pod, v1alpha1.DefaultKubernetesPod()) + assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Container != nil) + assert.Equal(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Container, v1alpha1.DefaultKubernetesContainer(v1alpha1.DefaultRateLimitImage)) + assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Container.Resources != nil) + assert.Equal(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Container.Resources, v1alpha1.DefaultResourceRequirements()) + assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Container.Image != nil) + assert.Equal(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Container.Image, v1alpha1.DefaultKubernetesContainerImage(v1alpha1.DefaultRateLimitImage)) +} + +func TestEnvoyGatewayAdmin(t *testing.T) { + // default envoygateway config admin should not be nil + eg := v1alpha1.DefaultEnvoyGateway() + assert.True(t, eg.Admin != nil) + + // get default admin config from envoygateway + // values should be set in default + egAdmin := eg.GetEnvoyGatewayAdmin() + assert.True(t, egAdmin != nil) + assert.True(t, egAdmin.Address.Port == v1alpha1.GatewayAdminPort) + assert.True(t, egAdmin.Address.Host == v1alpha1.GatewayAdminHost) + assert.True(t, egAdmin.EnableDumpConfig == false) + assert.True(t, egAdmin.EnablePprof == false) + + // override the admin config + // values should be updated + eg.Admin = &v1alpha1.EnvoyGatewayAdmin{ + Address: &v1alpha1.EnvoyGatewayAdminAddress{ + Host: "0.0.0.0", + Port: 19010, + }, + EnableDumpConfig: true, + EnablePprof: true, + } + + assert.True(t, eg.GetEnvoyGatewayAdmin().Address.Port == 19010) + assert.True(t, eg.GetEnvoyGatewayAdmin().Address.Host == "0.0.0.0") + assert.True(t, eg.GetEnvoyGatewayAdmin().EnableDumpConfig == true) + assert.True(t, eg.GetEnvoyGatewayAdmin().EnablePprof == true) + + // set eg defaults when admin is nil + // the admin should not be nil + eg.Admin = nil + eg.SetEnvoyGatewayDefaults() + assert.True(t, eg.Admin != nil) + assert.True(t, eg.Admin.Address.Port == v1alpha1.GatewayAdminPort) + assert.True(t, eg.Admin.Address.Host == v1alpha1.GatewayAdminHost) + assert.True(t, eg.Admin.EnableDumpConfig == false) + assert.True(t, eg.Admin.EnablePprof == false) +} + +func TestEnvoyGatewayTelemetry(t *testing.T) { + // default envoygateway config telemetry should not be nil + eg := v1alpha1.DefaultEnvoyGateway() + assert.True(t, eg.Telemetry != nil) + + // get default telemetry config from envoygateway + // values should be set in default + egTelemetry := eg.GetEnvoyGatewayTelemetry() + assert.True(t, egTelemetry != nil) + assert.True(t, egTelemetry.Metrics != nil) + assert.True(t, egTelemetry.Metrics.Prometheus.Disable == false) + assert.True(t, egTelemetry.Metrics.Sinks == nil) + + // override the telemetry config + // values should be updated + eg.Telemetry.Metrics = &v1alpha1.EnvoyGatewayMetrics{ + Prometheus: &v1alpha1.EnvoyGatewayPrometheusProvider{ + Disable: true, + }, + Sinks: []v1alpha1.EnvoyGatewayMetricSink{ + { + Type: v1alpha1.MetricSinkTypeOpenTelemetry, + OpenTelemetry: &v1alpha1.EnvoyGatewayOpenTelemetrySink{ + Host: "otel-collector.monitoring.svc.cluster.local", + Protocol: "grpc", + Port: 4317, + }, + }, { + Type: v1alpha1.MetricSinkTypeOpenTelemetry, + OpenTelemetry: &v1alpha1.EnvoyGatewayOpenTelemetrySink{ + Host: "otel-collector.monitoring.svc.cluster.local", + Protocol: "http", + Port: 4318, + }, + }, + }, + } + + assert.True(t, eg.GetEnvoyGatewayTelemetry().Metrics.Prometheus.Disable == true) + assert.True(t, len(eg.GetEnvoyGatewayTelemetry().Metrics.Sinks) == 2) + assert.True(t, eg.GetEnvoyGatewayTelemetry().Metrics.Sinks[0].Type == v1alpha1.MetricSinkTypeOpenTelemetry) + + // set eg defaults when telemetry is nil + // the telemetry should not be nil + eg.Telemetry = nil + eg.SetEnvoyGatewayDefaults() + assert.True(t, eg.Telemetry != nil) + assert.True(t, eg.Telemetry.Metrics != nil) + assert.True(t, eg.Telemetry.Metrics.Prometheus.Disable == false) + assert.True(t, eg.Telemetry.Metrics.Sinks == nil) +} diff --git a/api/v1alpha1/validation/envoyproxy_validate_test.go b/api/v1alpha1/validation/envoyproxy_validate_test.go index 0f60a6fa593..0bfc5558e1b 100644 --- a/api/v1alpha1/validation/envoyproxy_validate_test.go +++ b/api/v1alpha1/validation/envoyproxy_validate_test.go @@ -464,140 +464,6 @@ func TestValidateEnvoyProxy(t *testing.T) { } } -func TestEnvoyGateway(t *testing.T) { - envoyGateway := egv1a1.DefaultEnvoyGateway() - assert.True(t, envoyGateway.Provider != nil) - assert.True(t, envoyGateway.Gateway != nil) - assert.True(t, envoyGateway.Logging != nil) - envoyGateway.SetEnvoyGatewayDefaults() - assert.Equal(t, envoyGateway.Logging, egv1a1.DefaultEnvoyGatewayLogging()) - - logging := egv1a1.DefaultEnvoyGatewayLogging() - assert.True(t, logging != nil) - assert.True(t, logging.Level[egv1a1.LogComponentGatewayDefault] == egv1a1.LogLevelInfo) - - gatewayLogging := &egv1a1.EnvoyGatewayLogging{ - Level: logging.Level, - } - gatewayLogging.SetEnvoyGatewayLoggingDefaults() - assert.True(t, gatewayLogging != nil) - assert.True(t, gatewayLogging.Level[egv1a1.LogComponentGatewayDefault] == egv1a1.LogLevelInfo) -} - -func TestDefaultEnvoyGatewayLoggingLevel(t *testing.T) { - type args struct { - component string - level egv1a1.LogLevel - } - tests := []struct { - name string - args args - want egv1a1.LogLevel - }{ - { - name: "test default info level for empty level", - args: args{component: "", level: ""}, - want: egv1a1.LogLevelInfo, - }, - { - name: "test default info level for empty level", - args: args{component: string(egv1a1.LogComponentGatewayDefault), level: ""}, - want: egv1a1.LogLevelInfo, - }, - { - name: "test default info level for info level", - args: args{component: string(egv1a1.LogComponentGatewayDefault), level: egv1a1.LogLevelInfo}, - want: egv1a1.LogLevelInfo, - }, - { - name: "test default error level for error level", - args: args{component: string(egv1a1.LogComponentGatewayDefault), level: egv1a1.LogLevelError}, - want: egv1a1.LogLevelError, - }, - { - name: "test gateway-api error level for error level", - args: args{component: string(egv1a1.LogComponentGatewayAPIRunner), level: egv1a1.LogLevelError}, - want: egv1a1.LogLevelError, - }, - { - name: "test gateway-api info level for info level", - args: args{component: string(egv1a1.LogComponentGatewayAPIRunner), level: egv1a1.LogLevelInfo}, - want: egv1a1.LogLevelInfo, - }, - { - name: "test default gateway-api warn level for info level", - args: args{component: string(egv1a1.LogComponentGatewayAPIRunner), level: ""}, - want: egv1a1.LogLevelInfo, - }, - } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - logging := &egv1a1.EnvoyGatewayLogging{} - if got := logging.DefaultEnvoyGatewayLoggingLevel(tt.args.level); got != tt.want { - t.Errorf("defaultLevel() = %v, want %v", got, tt.want) - } - }) - } -} - -func TestEnvoyGatewayProvider(t *testing.T) { - envoyGateway := &egv1a1.EnvoyGateway{ - TypeMeta: metav1.TypeMeta{}, - EnvoyGatewaySpec: egv1a1.EnvoyGatewaySpec{Provider: egv1a1.DefaultEnvoyGatewayProvider()}, - } - assert.True(t, envoyGateway.Provider != nil) - - envoyGatewayProvider := envoyGateway.GetEnvoyGatewayProvider() - assert.True(t, envoyGatewayProvider.Kubernetes == nil) - assert.Equal(t, envoyGateway.Provider, envoyGatewayProvider) - - envoyGatewayProvider.Kubernetes = egv1a1.DefaultEnvoyGatewayKubeProvider() - assert.Equal(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment, egv1a1.DefaultKubernetesDeployment(egv1a1.DefaultRateLimitImage)) - - envoyGatewayProvider.Kubernetes = &egv1a1.EnvoyGatewayKubernetesProvider{} - assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment == nil) - - envoyGatewayProvider.Kubernetes = &egv1a1.EnvoyGatewayKubernetesProvider{ - RateLimitDeployment: &egv1a1.KubernetesDeploymentSpec{ - Replicas: nil, - Pod: nil, - Container: nil, - }} - assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Replicas == nil) - assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Pod == nil) - assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Container == nil) - envoyGatewayKubeProvider := envoyGatewayProvider.GetEnvoyGatewayKubeProvider() - - envoyGatewayProvider.Kubernetes = &egv1a1.EnvoyGatewayKubernetesProvider{ - RateLimitDeployment: &egv1a1.KubernetesDeploymentSpec{ - Replicas: nil, - Pod: nil, - Container: &egv1a1.KubernetesContainerSpec{ - Resources: nil, - SecurityContext: nil, - Image: nil, - }, - }} - assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Container.Resources == nil) - envoyGatewayProvider.GetEnvoyGatewayKubeProvider() - - assert.True(t, envoyGatewayProvider.Kubernetes != nil) - assert.Equal(t, envoyGatewayProvider.Kubernetes, envoyGatewayKubeProvider) - - assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment != nil) - assert.Equal(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment, egv1a1.DefaultKubernetesDeployment(egv1a1.DefaultRateLimitImage)) - assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Replicas != nil) - assert.Equal(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Replicas, egv1a1.DefaultKubernetesDeploymentReplicas()) - assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Pod != nil) - assert.Equal(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Pod, egv1a1.DefaultKubernetesPod()) - assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Container != nil) - assert.Equal(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Container, egv1a1.DefaultKubernetesContainer(egv1a1.DefaultRateLimitImage)) - assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Container.Resources != nil) - assert.Equal(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Container.Resources, egv1a1.DefaultResourceRequirements()) - assert.True(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Container.Image != nil) - assert.Equal(t, envoyGatewayProvider.Kubernetes.RateLimitDeployment.Container.Image, egv1a1.DefaultKubernetesContainerImage(egv1a1.DefaultRateLimitImage)) -} - func TestEnvoyProxyProvider(t *testing.T) { envoyProxy := &egv1a1.EnvoyProxy{ Spec: egv1a1.EnvoyProxySpec{ @@ -634,47 +500,6 @@ func TestEnvoyProxyProvider(t *testing.T) { assert.True(t, reflect.DeepEqual(envoyProxyProvider.Kubernetes.EnvoyService.Type, egv1a1.GetKubernetesServiceType(egv1a1.ServiceTypeLoadBalancer))) } -func TestEnvoyGatewayAdmin(t *testing.T) { - // default envoygateway config admin should not be nil - eg := egv1a1.DefaultEnvoyGateway() - assert.True(t, eg.Admin != nil) - - // get default admin config from envoygateway - // values should be set in default - egAdmin := eg.GetEnvoyGatewayAdmin() - assert.True(t, egAdmin != nil) - assert.True(t, egAdmin.Address.Port == egv1a1.GatewayAdminPort) - assert.True(t, egAdmin.Address.Host == egv1a1.GatewayAdminHost) - assert.True(t, egAdmin.EnableDumpConfig == false) - assert.True(t, egAdmin.EnablePprof == false) - - // override the admin config - // values should be updated - eg.Admin = &egv1a1.EnvoyGatewayAdmin{ - Address: &egv1a1.EnvoyGatewayAdminAddress{ - Host: "0.0.0.0", - Port: 19010, - }, - EnableDumpConfig: true, - EnablePprof: true, - } - - assert.True(t, eg.GetEnvoyGatewayAdmin().Address.Port == 19010) - assert.True(t, eg.GetEnvoyGatewayAdmin().Address.Host == "0.0.0.0") - assert.True(t, eg.GetEnvoyGatewayAdmin().EnableDumpConfig == true) - assert.True(t, eg.GetEnvoyGatewayAdmin().EnablePprof == true) - - // set eg defaults when admin is nil - // the admin should not be nil - eg.Admin = nil - eg.SetEnvoyGatewayDefaults() - assert.True(t, eg.Admin != nil) - assert.True(t, eg.Admin.Address.Port == egv1a1.GatewayAdminPort) - assert.True(t, eg.Admin.Address.Host == egv1a1.GatewayAdminHost) - assert.True(t, eg.Admin.EnableDumpConfig == false) - assert.True(t, eg.Admin.EnablePprof == false) -} - func TestGetEnvoyProxyDefaultComponentLevel(t *testing.T) { cases := []struct { logging egv1a1.ProxyLogging diff --git a/go.mod b/go.mod index f75200ffe34..061b2188694 100644 --- a/go.mod +++ b/go.mod @@ -23,6 +23,12 @@ require ( github.com/telepresenceio/watchable v0.0.0-20220726211108-9bb86f92afa7 github.com/tetratelabs/multierror v1.1.1 github.com/tsaarni/certyaml v0.9.2 + go.opentelemetry.io/otel v1.19.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0 + go.opentelemetry.io/otel/exporters/prometheus v0.42.0 + go.opentelemetry.io/otel/metric v1.19.0 + go.opentelemetry.io/otel/sdk/metric v1.19.0 go.opentelemetry.io/proto/otlp v1.0.0 go.uber.org/zap v1.26.0 golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e @@ -42,7 +48,13 @@ require ( ) require ( + github.com/cenkalti/backoff/v4 v4.2.1 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 // indirect github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.42.0 // indirect + go.opentelemetry.io/otel/sdk v1.19.0 // indirect + go.opentelemetry.io/otel/trace v1.19.0 // indirect golang.org/x/sync v0.3.0 // indirect ) @@ -88,7 +100,7 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/peterbourgon/diskv v2.0.1+incompatible // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/prometheus/client_golang v1.17.0 // indirect + github.com/prometheus/client_golang v1.17.0 github.com/prometheus/client_model v0.5.0 // indirect github.com/prometheus/procfs v0.11.1 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect diff --git a/go.sum b/go.sum index 601027d0fe0..9c0770ae263 100644 --- a/go.sum +++ b/go.sum @@ -37,6 +37,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/blang/semver v3.5.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= +github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= +github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMrBo8f1j86j5WHzznCCQxV/b8g= github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw= @@ -125,8 +127,11 @@ github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-logr/zapr v0.1.0/go.mod h1:tabnROwaDl0UNxkVeFRbY8bwB37GwRv0P8lg6aAiEnk= github.com/go-logr/zapr v1.2.4 h1:QHVo+6stLbfJmYGkQ7uGHUCu5hnAFAj6mDe6Ea0SeOo= github.com/go-logr/zapr v1.2.4/go.mod h1:FyHWQIzQORZ0QVE1BtVHv3cKtNLuXsbNLtpuhNapBOA= @@ -188,6 +193,7 @@ github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXP github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/glog v1.1.2 h1:DVjP2PbBOzHyzA+dn3WhHIq4NdVu3Q+pvivFICf/7fo= github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= @@ -253,6 +259,8 @@ github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0/go.mod h1:YN5jB8ie0yfIUg6VvR9Kz84aCaG7AsGZnLjhHbUqwPg= github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= @@ -466,6 +474,24 @@ go.mongodb.org/mongo-driver v1.0.3/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qL go.mongodb.org/mongo-driver v1.1.1/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM= go.mongodb.org/mongo-driver v1.1.2/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= +go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs= +go.opentelemetry.io/otel v1.19.0/go.mod h1:i0QyjOq3UPoTzff0PJB2N66fb4S0+rSbSB15/oyH9fY= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.42.0 h1:ZtfnDL+tUrs1F0Pzfwbg2d59Gru9NCH3bgSHBM6LDwU= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.42.0/go.mod h1:hG4Fj/y8TR/tlEDREo8tWstl9fO9gcFkn4xrx0Io8xU= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0 h1:NmnYCiR0qNufkldjVvyQfZTHSdzeHoZ41zggMsdMcLM= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0/go.mod h1:UVAO61+umUsHLtYb8KXXRoHtxUkdOPkYidzW3gipRLQ= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0 h1:wNMDy/LVGLj2h3p6zg4d0gypKfWKSWI14E1C4smOgl8= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0/go.mod h1:YfbDdXAAkemWJK3H/DshvlrxqFB2rtW4rY6ky/3x/H0= +go.opentelemetry.io/otel/exporters/prometheus v0.42.0 h1:jwV9iQdvp38fxXi8ZC+lNpxjK16MRcZlpDYvbuO1FiA= +go.opentelemetry.io/otel/exporters/prometheus v0.42.0/go.mod h1:f3bYiqNqhoPxkvI2LrXqQVC546K7BuRDL/kKuxkujhA= +go.opentelemetry.io/otel/metric v1.19.0 h1:aTzpGtV0ar9wlV4Sna9sdJyII5jTVJEvKETPiOKwvpE= +go.opentelemetry.io/otel/metric v1.19.0/go.mod h1:L5rUsV9kM1IxCj1MmSdS+JQAcVm319EUrDVLrt7jqt8= +go.opentelemetry.io/otel/sdk v1.19.0 h1:6USY6zH+L8uMH8L3t1enZPR3WFEmSTADlqldyHtJi3o= +go.opentelemetry.io/otel/sdk v1.19.0/go.mod h1:NedEbbS4w3C6zElbLdPJKOpJQOrGUJ+GfzpjUvI0v1A= +go.opentelemetry.io/otel/sdk/metric v1.19.0 h1:EJoTO5qysMsYCa+w4UghwFV/ptQgqSL/8Ni+hx+8i1k= +go.opentelemetry.io/otel/sdk/metric v1.19.0/go.mod h1:XjG0jQyFJrv2PbMvwND7LwCEhsJzCzV5210euduKcKY= +go.opentelemetry.io/otel/trace v1.19.0 h1:DFVQmlVbfVeOuBRrwdtaehRrWiL1JoVs9CPIQ1Dzxpg= +go.opentelemetry.io/otel/trace v1.19.0/go.mod h1:mfaSyvGyEJEI0nyV2I4qhNQnbBOUUmYZpYojqMnX2vo= go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I= go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM= go.starlark.net v0.0.0-20230525235612-a134d8f9ddca h1:VdD38733bfYv5tUZwEIskMM93VanwNIi5bIKnDrJdEY= diff --git a/internal/admin/server.go b/internal/admin/server.go index be25bec5be3..9c035b43816 100644 --- a/internal/admin/server.go +++ b/internal/admin/server.go @@ -18,7 +18,7 @@ import ( ) var ( - debugLogger = logging.DefaultLogger(v1alpha1.LogLevelInfo).WithName("admin") + adminLogger = logging.DefaultLogger(v1alpha1.LogLevelInfo).WithName("admin") ) func Init(cfg *config.Server) error { @@ -36,7 +36,7 @@ func start(cfg *config.Server) error { address := cfg.EnvoyGateway.GetEnvoyGatewayAdminAddress() enablePprof := cfg.EnvoyGateway.GetEnvoyGatewayAdmin().EnablePprof - debugLogger.Info("starting admin server", "address", address, "enablePprof", enablePprof) + adminLogger.Info("starting admin server", "address", address, "enablePprof", enablePprof) if enablePprof { // Serve pprof endpoints to aid in live debugging. @@ -47,7 +47,7 @@ func start(cfg *config.Server) error { handlers.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) } - debugServer := &http.Server{ + adminServer := &http.Server{ Handler: handlers, Addr: address, ReadTimeout: 5 * time.Second, @@ -58,7 +58,7 @@ func start(cfg *config.Server) error { // Listen And Serve Admin Server. go func() { - if err := debugServer.ListenAndServe(); err != nil { + if err := adminServer.ListenAndServe(); err != nil { cfg.Logger.Error(err, "start admin server failed") } }() diff --git a/internal/cmd/server.go b/internal/cmd/server.go index 017ca9a6c42..6dc25a19946 100644 --- a/internal/cmd/server.go +++ b/internal/cmd/server.go @@ -17,6 +17,7 @@ import ( infrarunner "github.com/envoyproxy/gateway/internal/infrastructure/runner" "github.com/envoyproxy/gateway/internal/logging" "github.com/envoyproxy/gateway/internal/message" + "github.com/envoyproxy/gateway/internal/metrics" providerrunner "github.com/envoyproxy/gateway/internal/provider/runner" xdsserverrunner "github.com/envoyproxy/gateway/internal/xds/server/runner" xdstranslatorrunner "github.com/envoyproxy/gateway/internal/xds/translator/runner" @@ -54,6 +55,11 @@ func server() error { if err := admin.Init(cfg); err != nil { return err } + // Init eg metrics servers. + if err := metrics.Init(cfg); err != nil { + return err + } + // init eg runners. if err := setupRunners(cfg); err != nil { return err diff --git a/internal/metrics/metadata.go b/internal/metrics/metadata.go new file mode 100644 index 00000000000..f2ab8498407 --- /dev/null +++ b/internal/metrics/metadata.go @@ -0,0 +1,102 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import ( + "errors" + "sync" + + "go.opentelemetry.io/otel" + api "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/sdk/metric" + + "github.com/envoyproxy/gateway/api/v1alpha1" + log "github.com/envoyproxy/gateway/internal/logging" +) + +var ( + meter = func() api.Meter { + return otel.GetMeterProvider().Meter("envoy-gateway") + } + + metricsLogger = log.DefaultLogger(v1alpha1.LogLevelInfo).WithName("metrics") +) + +func init() { + otel.SetLogger(metricsLogger.Logger) +} + +// MetricType is the type of a metric. +type MetricType string + +// Metric type supports: +// * Counter: A Counter is a simple metric that only goes up (increments). +// +// * Gauge: A Gauge is a metric that represent +// a single numerical value that can arbitrarily go up and down. +// +// * Histogram: A Histogram samples observations and counts them in configurable buckets. +// It also provides a sum of all observed values. +// It's used to visualize the statistical distribution of these observations. + +const ( + CounterType MetricType = "Counter" + GaugeType MetricType = "Gauge" + HistogramType MetricType = "Histogram" +) + +// Metadata records a metric's metadata. +type Metadata struct { + Name string + Type MetricType + Description string + Bounds []float64 +} + +// metrics stores stores metrics +type store struct { + started bool + mu sync.Mutex + stores map[string]Metadata +} + +// stores is a global that stores all registered metrics +var stores = store{ + stores: map[string]Metadata{}, +} + +// register records a newly defined metric. Only valid before an exporter is set. +func (d *store) register(store Metadata) { + d.mu.Lock() + defer d.mu.Unlock() + if d.started { + metricsLogger.Error(errors.New("cannot initialize metric after metric has started"), "metric", store.Name) + } + d.stores[store.Name] = store +} + +// preAddOptions runs pre-run steps before adding to meter provider. +func (d *store) preAddOptions() []metric.Option { + d.mu.Lock() + defer d.mu.Unlock() + d.started = true + opts := []metric.Option{} + for name, store := range d.stores { + if store.Bounds == nil { + continue + } + // for each histogram metric (i.e. those with bounds), set up a view explicitly defining those buckets. + v := metric.WithView(metric.NewView( + metric.Instrument{Name: name}, + metric.Stream{ + Aggregation: metric.AggregationExplicitBucketHistogram{ + Boundaries: store.Bounds, + }}, + )) + opts = append(opts, v) + } + return opts +} diff --git a/internal/metrics/options.go b/internal/metrics/options.go new file mode 100644 index 00000000000..f274582f459 --- /dev/null +++ b/internal/metrics/options.go @@ -0,0 +1,31 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +// Options encode changes to the options passed to a Metric at creation time. +type MetricOption func(*MetricOptions) + +type MetricOptions struct { + Unit Unit + Name string + Description string +} + +// WithUnit provides configuration options for a new Metric, providing unit of measure +// information for a new Metric. +func WithUnit(unit Unit) MetricOption { + return func(opts *MetricOptions) { + opts.Unit = unit + } +} + +func metricOptions(name, description string, opts ...MetricOption) MetricOptions { + o := MetricOptions{Unit: None, Name: name, Description: description} + for _, opt := range opts { + opt(&o) + } + return o +} diff --git a/internal/metrics/otel_label.go b/internal/metrics/otel_label.go new file mode 100644 index 00000000000..45d04ff4bb0 --- /dev/null +++ b/internal/metrics/otel_label.go @@ -0,0 +1,48 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import "go.opentelemetry.io/otel/attribute" + +// A Label provides a named dimension for a Metric. +type Label struct { + key attribute.Key +} + +// NewLabel will attempt to create a new Label. +func NewLabel(key string) Label { + return Label{attribute.Key(key)} +} + +// Value creates a new LabelValue for the Label. +func (l Label) Value(value string) LabelValue { + return LabelValue{l.key.String(value)} +} + +// A LabelValue represents a Label with a specific value. It is used to record +// values for a Metric. +type LabelValue struct { + keyValue attribute.KeyValue +} + +func (l LabelValue) Key() Label { + return Label{l.keyValue.Key} +} + +func (l LabelValue) Value() string { + return l.keyValue.Value.AsString() +} + +func mergeLabelValues(attrs []attribute.KeyValue, labelValues []LabelValue) ([]attribute.KeyValue, attribute.Set) { + mergedAttrs := make([]attribute.KeyValue, 0, len(attrs)+len(labelValues)) + mergedAttrs = append(mergedAttrs, attrs...) + for _, v := range labelValues { + kv := v + mergedAttrs = append(mergedAttrs, kv.keyValue) + } + + return mergedAttrs, attribute.NewSet(mergedAttrs...) +} diff --git a/internal/metrics/otel_metric_counter.go b/internal/metrics/otel_metric_counter.go new file mode 100644 index 00000000000..93dcaa13650 --- /dev/null +++ b/internal/metrics/otel_metric_counter.go @@ -0,0 +1,48 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import ( + "context" + + "go.opentelemetry.io/otel/attribute" + api "go.opentelemetry.io/otel/metric" +) + +type Counter struct { + name string + attrs []attribute.KeyValue + c api.Float64Counter + preRecordOptions []api.AddOption +} + +func (f *Counter) Add(value float64) { + if f.preRecordOptions != nil { + f.c.Add(context.Background(), value, f.preRecordOptions...) + } else { + f.c.Add(context.Background(), value) + } +} + +func (f *Counter) Increment() { + f.Add(1) +} + +func (f *Counter) Decrement() { + f.Add(-1) +} + +func (f *Counter) With(labelValues ...LabelValue) *Counter { + attrs, set := mergeLabelValues(f.attrs, labelValues) + m := &Counter{ + c: f.c, + preRecordOptions: []api.AddOption{api.WithAttributeSet(set)}, + name: f.name, + attrs: attrs, + } + + return m +} diff --git a/internal/metrics/otel_metric_gauge.go b/internal/metrics/otel_metric_gauge.go new file mode 100644 index 00000000000..49e02395b67 --- /dev/null +++ b/internal/metrics/otel_metric_gauge.go @@ -0,0 +1,57 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import ( + "sync" + + "go.opentelemetry.io/otel/attribute" + api "go.opentelemetry.io/otel/metric" +) + +type Gauge struct { + name string + attrs []attribute.KeyValue + + g api.Float64ObservableGauge + mutex *sync.RWMutex + stores map[attribute.Set]*GaugeValues + current *GaugeValues +} + +type GaugeValues struct { + val float64 + opt []api.ObserveOption +} + +func (f *Gauge) Record(value float64) { + f.mutex.Lock() + if f.current == nil { + f.current = &GaugeValues{} + f.stores[attribute.NewSet()] = f.current + } + f.current.val = value + f.mutex.Unlock() +} + +func (f *Gauge) With(labelValues ...LabelValue) *Gauge { + attrs, set := mergeLabelValues(f.attrs, labelValues) + m := &Gauge{ + g: f.g, + mutex: f.mutex, + stores: f.stores, + name: f.name, + attrs: attrs, + } + if _, f := m.stores[set]; !f { + m.stores[set] = &GaugeValues{ + opt: []api.ObserveOption{api.WithAttributeSet(set)}, + } + } + m.current = m.stores[set] + + return m +} diff --git a/internal/metrics/otel_metric_histogram.go b/internal/metrics/otel_metric_histogram.go new file mode 100644 index 00000000000..b1837b7a8d2 --- /dev/null +++ b/internal/metrics/otel_metric_histogram.go @@ -0,0 +1,41 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import ( + "context" + + "go.opentelemetry.io/otel/attribute" + api "go.opentelemetry.io/otel/metric" +) + +type Histogram struct { + name string + attrs []attribute.KeyValue + + d api.Float64Histogram + preRecordOptions []api.RecordOption +} + +func (f *Histogram) Record(value float64) { + if f.preRecordOptions != nil { + f.d.Record(context.Background(), value, f.preRecordOptions...) + } else { + f.d.Record(context.Background(), value) + } +} + +func (f *Histogram) With(labelValues ...LabelValue) *Histogram { + attrs, set := mergeLabelValues(f.attrs, labelValues) + m := &Histogram{ + name: f.name, + attrs: attrs, + d: f.d, + preRecordOptions: []api.RecordOption{api.WithAttributeSet(set)}, + } + + return m +} diff --git a/internal/metrics/otel_metric_sink.go b/internal/metrics/otel_metric_sink.go new file mode 100644 index 00000000000..442f0c0d3c0 --- /dev/null +++ b/internal/metrics/otel_metric_sink.go @@ -0,0 +1,100 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import ( + "context" + "sync" + + "go.opentelemetry.io/otel/attribute" + api "go.opentelemetry.io/otel/metric" +) + +// NewCounter creates a new Counter Metric (the values will be cumulative). +// That means that data collected by the new Metric will be summed before export. +func NewCounter(name, description string, opts ...MetricOption) *Counter { + stores.register(Metadata{ + Name: name, + Type: CounterType, + Description: description, + }) + o := metricOptions(name, description, opts...) + + return newCounter(o) +} + +// NewGauge creates a new Gauge Metric. That means that data collected by the new +// Metric will export only the last recorded value. +func NewGauge(name, description string, opts ...MetricOption) *Gauge { + stores.register(Metadata{ + Name: name, + Type: GaugeType, + Description: description, + }) + o := metricOptions(name, description, opts...) + + return newGauge(o) +} + +// NewHistogram creates a new Metric with an aggregation type of Histogram. +// This means that the data collected by the Metric will be collected and exported as a histogram, with the specified bounds. +func NewHistogram(name, description string, bounds []float64, opts ...MetricOption) *Histogram { + stores.register(Metadata{ + Name: name, + Type: HistogramType, + Description: description, + Bounds: bounds, + }) + o := metricOptions(name, description, opts...) + + return newHistogram(o) +} + +func newCounter(o MetricOptions) *Counter { + c, err := meter().Float64Counter(o.Name, + api.WithDescription(o.Description), + api.WithUnit(string(o.Unit))) + if err != nil { + metricsLogger.Error(err, "failed to create otel Counter") + } + m := &Counter{c: c, name: o.Name} + + return m +} + +func newGauge(o MetricOptions) *Gauge { + r := &Gauge{mutex: &sync.RWMutex{}, name: o.Name} + r.stores = map[attribute.Set]*GaugeValues{} + g, err := meter().Float64ObservableGauge(o.Name, + api.WithFloat64Callback(func(ctx context.Context, observer api.Float64Observer) error { + r.mutex.Lock() + defer r.mutex.Unlock() + for _, gv := range r.stores { + observer.Observe(gv.val, gv.opt...) + } + return nil + }), + api.WithDescription(o.Description), + api.WithUnit(string(o.Unit))) + if err != nil { + metricsLogger.Error(err, "failed to create otel Gauge") + } + r.g = g + + return r +} + +func newHistogram(o MetricOptions) *Histogram { + d, err := meter().Float64Histogram(o.Name, + api.WithDescription(o.Description), + api.WithUnit(string(o.Unit))) + if err != nil { + metricsLogger.Error(err, "failed to create otel Histogram") + } + m := &Histogram{d: d, name: o.Name} + + return m +} diff --git a/internal/metrics/register.go b/internal/metrics/register.go new file mode 100644 index 00000000000..9c8abdf479d --- /dev/null +++ b/internal/metrics/register.go @@ -0,0 +1,207 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import ( + "context" + "fmt" + "net" + "net/http" + "time" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp" + otelprom "go.opentelemetry.io/otel/exporters/prometheus" + "go.opentelemetry.io/otel/sdk/metric" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + + "github.com/envoyproxy/gateway/api/v1alpha1" + "github.com/envoyproxy/gateway/internal/envoygateway/config" +) + +const ( + defaultEndpoint = "/metrics" +) + +// Init initializes and registers the global metrics server. +func Init(cfg *config.Server) error { + options := newOptions(cfg) + handler, err := registerForHandler(options) + if err != nil { + return err + } + + if !options.pullOptions.disable { + return start(options.address, handler) + } + + return nil +} + +func start(address string, handler http.Handler) error { + handlers := http.NewServeMux() + + metricsLogger.Info("starting metrics server", "address", address) + if handler != nil { + handlers.Handle(defaultEndpoint, handler) + } + + metricsServer := &http.Server{ + Handler: handlers, + Addr: address, + ReadTimeout: 5 * time.Second, + ReadHeaderTimeout: 5 * time.Second, + WriteTimeout: 10 * time.Second, + IdleTimeout: 15 * time.Second, + } + + // Listen And Serve Metrics Server. + go func() { + if err := metricsServer.ListenAndServe(); err != nil { + metricsLogger.Error(err, "start metrics server failed") + } + }() + + return nil +} + +func newOptions(svr *config.Server) registerOptions { + newOpts := registerOptions{} + newOpts.address = net.JoinHostPort(v1alpha1.GatewayMetricsHost, fmt.Sprint(v1alpha1.GatewayMetricsPort)) + + if svr.EnvoyGateway.DisablePrometheus() { + newOpts.pullOptions.disable = true + } else { + newOpts.pullOptions.disable = false + newOpts.pullOptions.registry = metricsserver.Registry + newOpts.pullOptions.gatherer = metricsserver.Registry + } + + for _, config := range svr.EnvoyGateway.GetEnvoyGatewayTelemetry().Metrics.Sinks { + newOpts.pushOptions.sinks = append(newOpts.pushOptions.sinks, metricsSink{ + host: config.OpenTelemetry.Host, + port: config.OpenTelemetry.Port, + protocol: config.OpenTelemetry.Protocol, + }) + } + + return newOpts +} + +// registerForHandler sets the global metrics registry to the provided Prometheus registerer. +// if enables prometheus, it will return a prom http handler. +func registerForHandler(opts registerOptions) (http.Handler, error) { + otelOpts := []metric.Option{} + + if err := registerOTELPromExporter(&otelOpts, opts); err != nil { + return nil, err + } + if err := registerOTELHTTPexporter(&otelOpts, opts); err != nil { + return nil, err + } + if err := registerOTELgRPCexporter(&otelOpts, opts); err != nil { + return nil, err + } + otelOpts = append(otelOpts, stores.preAddOptions()...) + + mp := metric.NewMeterProvider(otelOpts...) + otel.SetMeterProvider(mp) + + if !opts.pullOptions.disable { + return promhttp.HandlerFor(opts.pullOptions.gatherer, promhttp.HandlerOpts{}), nil + } + return nil, nil +} + +// registerOTELPromExporter registers OTEL prometheus exporter (PULL mode). +func registerOTELPromExporter(otelOpts *[]metric.Option, opts registerOptions) error { + if !opts.pullOptions.disable { + promOpts := []otelprom.Option{ + otelprom.WithoutScopeInfo(), + otelprom.WithoutTargetInfo(), + otelprom.WithoutUnits(), + otelprom.WithRegisterer(opts.pullOptions.registry), + otelprom.WithoutCounterSuffixes(), + } + promreader, err := otelprom.New(promOpts...) + if err != nil { + return err + } + + *otelOpts = append(*otelOpts, metric.WithReader(promreader)) + metricsLogger.Info("initialized metrics pull endpoint", "address", opts.address, "endpoint", defaultEndpoint) + } + + return nil +} + +// registerOTELHTTPexporter registers OTEL HTTP metrics exporter (PUSH mode). +func registerOTELHTTPexporter(otelOpts *[]metric.Option, opts registerOptions) error { + for _, sink := range opts.pushOptions.sinks { + if sink.protocol == v1alpha1.HTTPProtocol { + address := net.JoinHostPort(sink.host, fmt.Sprint(sink.port)) + httpexporter, err := otlpmetrichttp.New( + context.Background(), + otlpmetrichttp.WithEndpoint(address), + otlpmetrichttp.WithInsecure(), + ) + if err != nil { + return err + } + + otelreader := metric.NewPeriodicReader(httpexporter) + *otelOpts = append(*otelOpts, metric.WithReader(otelreader)) + metricsLogger.Info("initialized otel http metrics push endpoint", "address", address) + } + } + + return nil +} + +// registerOTELgRPCexporter registers OTEL gRPC metrics exporter (PUSH mode). +func registerOTELgRPCexporter(otelOpts *[]metric.Option, opts registerOptions) error { + for _, sink := range opts.pushOptions.sinks { + if sink.protocol == v1alpha1.GRPCProtocol { + address := net.JoinHostPort(sink.host, fmt.Sprint(sink.port)) + httpexporter, err := otlpmetricgrpc.New( + context.Background(), + otlpmetricgrpc.WithEndpoint(address), + otlpmetricgrpc.WithInsecure(), + ) + if err != nil { + return err + } + + otelreader := metric.NewPeriodicReader(httpexporter) + *otelOpts = append(*otelOpts, metric.WithReader(otelreader)) + metricsLogger.Info("initialized otel grpc metrics push endpoint", "address", address) + } + } + + return nil +} + +type registerOptions struct { + address string + pullOptions struct { + registry prometheus.Registerer + gatherer prometheus.Gatherer + disable bool + } + pushOptions struct { + sinks []metricsSink + } +} + +type metricsSink struct { + protocol string + host string + port int32 +} diff --git a/internal/metrics/sample_counter_test.go b/internal/metrics/sample_counter_test.go new file mode 100644 index 00000000000..ffd3a18aac3 --- /dev/null +++ b/internal/metrics/sample_counter_test.go @@ -0,0 +1,23 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics_test + +import "github.com/envoyproxy/gateway/internal/metrics" + +var ( + irUpdates = metrics.NewCounter( + "ir_updates_total", + "Number of IR updates, by ir type", + ) +) + +func NewCounter() { + // increment on every xds ir update + irUpdates.With(irType.Value("xds")).Increment() + + // xds ir updates double + irUpdates.With(irType.Value("xds")).Add(2) +} diff --git a/internal/metrics/sample_gauge_test.go b/internal/metrics/sample_gauge_test.go new file mode 100644 index 00000000000..6b287ed9ca1 --- /dev/null +++ b/internal/metrics/sample_gauge_test.go @@ -0,0 +1,27 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics_test + +import "github.com/envoyproxy/gateway/internal/metrics" + +var ( + irType = metrics.NewLabel("ir-type") + currentIRsNum = metrics.NewGauge( + "current_irs_queue_num", + "current number of ir in queue, by ir type", + ) +) + +func NewGauge() { + // only the last recorded value (2) will be exported for this gauge + currentIRsNum.With(irType.Value("xds")).Record(1) + currentIRsNum.With(irType.Value("xds")).Record(3) + currentIRsNum.With(irType.Value("xds")).Record(2) + + currentIRsNum.With(irType.Value("infra")).Record(1) + currentIRsNum.With(irType.Value("infra")).Record(3) + currentIRsNum.With(irType.Value("infra")).Record(2) +} diff --git a/internal/metrics/sample_histogram_test.go b/internal/metrics/sample_histogram_test.go new file mode 100644 index 00000000000..b34658fcbe5 --- /dev/null +++ b/internal/metrics/sample_histogram_test.go @@ -0,0 +1,23 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics_test + +import "github.com/envoyproxy/gateway/internal/metrics" + +var ( + method = metrics.NewLabel("method") + + sentBytes = metrics.NewHistogram( + "sent_bytes_total", + "Histogram of sent bytes by method", + []float64{10, 50, 100, 1000, 10000}, + metrics.WithUnit(metrics.Bytes), + ) +) + +func NewHistogram() { + sentBytes.With(method.Value("/request/path/1")).Record(458) +} diff --git a/internal/metrics/units.go b/internal/metrics/units.go new file mode 100644 index 00000000000..1c7b5ff13c2 --- /dev/null +++ b/internal/metrics/units.go @@ -0,0 +1,18 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +// Unit encodes the standard name for describing the quantity +// measured by a Metric (if applicable). +type Unit string + +// Predefined units for use with the metrics package. +const ( + None Unit = "1" + Bytes Unit = "By" + Seconds Unit = "s" + Milliseconds Unit = "ms" +) diff --git a/site/content/en/latest/design/eg-metrics.md b/site/content/en/latest/design/eg-metrics.md index f43af77be12..60b93840852 100644 --- a/site/content/en/latest/design/eg-metrics.md +++ b/site/content/en/latest/design/eg-metrics.md @@ -6,7 +6,7 @@ title: "Control Plane Observability: Metrics" This document aims to cover all aspects of envoy gateway control plane metrics observability. {{% alert title="Note" color="secondary" %}} -**Data plane** observability (while important) is outside of scope for this document. For dataplane observability, refer to [here](./metrics). +**Data plane** observability (while important) is outside of scope for this document. For dataplane observability, refer to [here](../metrics). {{% /alert %}} ## Current State