From eb8d7cf5217b18214ebf6e750f002f674ecf7095 Mon Sep 17 00:00:00 2001 From: bitliu Date: Wed, 11 Oct 2023 21:56:59 +0800 Subject: [PATCH] design: control plane metrics monitoring Signed-off-by: bitliu --- api/v1alpha1/envoygateway_helpers.go | 87 +++++++++- api/v1alpha1/envoygateway_metric_types.go | 41 +++++ api/v1alpha1/envoygateway_types.go | 48 +++++- api/v1alpha1/envoyproxy_types.go | 26 +-- api/v1alpha1/metric_types.go | 16 +- api/v1alpha1/validation/validate_test.go | 18 +- api/v1alpha1/zz_generated.deepcopy.go | 154 ++++++++++++++++-- go.mod | 19 ++- go.sum | 28 +++- internal/cmd/server.go | 41 +---- .../proxy/resource_provider_test.go | 6 +- internal/metrics/definition.go | 146 +++++++++++++++++ internal/metrics/doc.go | 6 + internal/metrics/label.go | 37 +++++ internal/metrics/metric.go | 82 ++++++++++ internal/metrics/metric_counter.go | 73 +++++++++ internal/metrics/metric_gauge.go | 104 ++++++++++++ internal/metrics/metric_histogram.go | 74 +++++++++ internal/metrics/options.go | 43 +++++ internal/metrics/register.go | 109 +++++++++++++ internal/metrics/sample_counter_test.go | 23 +++ internal/metrics/sample_gauge_test.go | 27 +++ internal/metrics/sample_histogram_test.go | 23 +++ internal/metrics/units.go | 18 ++ internal/provider/kubernetes/kubernetes.go | 13 +- internal/utils/slice/slice.go | 24 +++ internal/xds/bootstrap/bootstrap_test.go | 4 +- site/content/en/latest/api/extension_types.md | 145 ++++++++++++++--- site/content/en/latest/design/eg-metrics.md | 146 +++++++++++++++++ site/content/en/latest/design/pprof.md | 39 +++-- 30 files changed, 1474 insertions(+), 146 deletions(-) create mode 100644 api/v1alpha1/envoygateway_metric_types.go create mode 100644 internal/metrics/definition.go create mode 100644 internal/metrics/doc.go create mode 100644 internal/metrics/label.go create mode 100644 internal/metrics/metric.go create mode 100644 internal/metrics/metric_counter.go create mode 100644 internal/metrics/metric_gauge.go create mode 100644 internal/metrics/metric_histogram.go create mode 100644 internal/metrics/options.go create mode 100644 internal/metrics/register.go create mode 100644 internal/metrics/sample_counter_test.go create mode 100644 internal/metrics/sample_gauge_test.go create mode 100644 internal/metrics/sample_histogram_test.go create mode 100644 internal/metrics/units.go create mode 100644 site/content/en/latest/design/eg-metrics.md diff --git a/api/v1alpha1/envoygateway_helpers.go b/api/v1alpha1/envoygateway_helpers.go index d7886b6f367c..cea57e824ab1 100644 --- a/api/v1alpha1/envoygateway_helpers.go +++ b/api/v1alpha1/envoygateway_helpers.go @@ -6,6 +6,8 @@ package v1alpha1 import ( + "fmt" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -17,10 +19,12 @@ func DefaultEnvoyGateway() *EnvoyGateway { APIVersion: GroupVersion.String(), }, EnvoyGatewaySpec{ - Gateway: DefaultGateway(), - Provider: DefaultEnvoyGatewayProvider(), - Logging: DefaultEnvoyGatewayLogging(), - Admin: DefaultEnvoyGatewayAdmin(), + Gateway: DefaultGateway(), + Provider: DefaultEnvoyGatewayProvider(), + Logging: DefaultEnvoyGatewayLogging(), + Admin: DefaultEnvoyGatewayAdmin(), + Debug: DefaultEnvoyGatewayDebug(), + Telemetry: DefaultEnvoyGatewayTelemetry(), }, } } @@ -45,6 +49,12 @@ func (e *EnvoyGateway) SetEnvoyGatewayDefaults() { if e.Admin == nil { e.Admin = DefaultEnvoyGatewayAdmin() } + if e.Telemetry == nil { + e.Telemetry = DefaultEnvoyGatewayTelemetry() + } + if e.Debug == nil { + e.Debug = DefaultEnvoyGatewayDebug() + } } // GetEnvoyGatewayAdmin returns the EnvoyGatewayAdmin of EnvoyGateway or a default EnvoyGatewayAdmin if unspecified. @@ -60,6 +70,22 @@ func (e *EnvoyGateway) GetEnvoyGatewayAdmin() *EnvoyGatewayAdmin { return e.Admin } +// GetEnvoyGatewayDebug returns the EnvoyGatewayDebug of EnvoyGateway or a default EnvoyGatewayDebug if unspecified. +func (e *EnvoyGateway) GetEnvoyGatewayDebug() *EnvoyGatewayDebug { + if e.Debug != nil { + return e.Debug + } + e.Debug = DefaultEnvoyGatewayDebug() + + return e.Debug +} + +// GetEnvoyGatewayAdminAddress returns the EnvoyGateway Admin Address. +func (e *EnvoyGateway) GetEnvoyGatewayAdminAddress() string { + address := e.GetEnvoyGatewayAdmin().Address + return fmt.Sprintf("%s:%d", address.Host, address.Port) +} + // DefaultGateway returns a new Gateway with default configuration parameters. func DefaultGateway() *Gateway { return &Gateway{ @@ -76,6 +102,51 @@ func DefaultEnvoyGatewayLogging() *EnvoyGatewayLogging { } } +// GetEnvoyGatewayAdmin returns the EnvoyGatewayAdmin of EnvoyGateway or a default EnvoyGatewayAdmin if unspecified. +func (e *EnvoyGateway) GetEnvoyGatewayTelemetry() *EnvoyGatewayTelemetry { + if e.Telemetry != nil { + if e.Telemetry.Metrics.Prometheus == nil { + e.Telemetry.Metrics.Prometheus = DefaultEnvoyGatewayPrometheus() + } + if e.Telemetry.Metrics == nil { + e.Telemetry.Metrics = DefaultEnvoyGatewayMetrics() + } + return e.Telemetry + } + e.Telemetry = DefaultEnvoyGatewayTelemetry() + + return e.Telemetry +} + +func (e *EnvoyGateway) IfEnablePrometheus() bool { + return e.GetEnvoyGatewayTelemetry().Metrics.Prometheus.Enable +} + +// DefaultEnvoyGatewayTelemetry returns a new EnvoyGatewayTelemetry with default configuration parameters. +func DefaultEnvoyGatewayTelemetry() *EnvoyGatewayTelemetry { + return &EnvoyGatewayTelemetry{ + Metrics: DefaultEnvoyGatewayMetrics(), + } +} + +// DefaultEnvoyGatewayMetrics returns a new EnvoyGatewayMetrics with default configuration parameters. +func DefaultEnvoyGatewayMetrics() *EnvoyGatewayMetrics { + return &EnvoyGatewayMetrics{ + // Enable prometheus pull by default. + Prometheus: &EnvoyGatewayPrometheusProvider{ + Enable: true, + }, + } +} + +// DefaultEnvoyGatewayPrometheus returns a new EnvoyGatewayMetrics with default configuration parameters. +func DefaultEnvoyGatewayPrometheus() *EnvoyGatewayPrometheusProvider { + return &EnvoyGatewayPrometheusProvider{ + // Enable prometheus pull by default. + Enable: true, + } +} + // DefaultEnvoyGatewayProvider returns a new EnvoyGatewayProvider with default configuration parameters. func DefaultEnvoyGatewayProvider() *EnvoyGatewayProvider { return &EnvoyGatewayProvider{ @@ -103,11 +174,17 @@ func DefaultEnvoyGatewayKubeProvider() *EnvoyGatewayKubernetesProvider { // DefaultEnvoyGatewayAdmin returns a new EnvoyGatewayAdmin with default configuration parameters. func DefaultEnvoyGatewayAdmin() *EnvoyGatewayAdmin { return &EnvoyGatewayAdmin{ - Debug: false, Address: DefaultEnvoyGatewayAdminAddress(), } } +// DefaultEnvoyGatewayDebug returns a new EnvoyGatewayDebug with default configuration parameters. +func DefaultEnvoyGatewayDebug() *EnvoyGatewayDebug { + return &EnvoyGatewayDebug{ + DumpConfig: false, + } +} + // DefaultEnvoyGatewayAdminAddress returns a new EnvoyGatewayAdminAddress with default configuration parameters. func DefaultEnvoyGatewayAdminAddress() *EnvoyGatewayAdminAddress { return &EnvoyGatewayAdminAddress{ diff --git a/api/v1alpha1/envoygateway_metric_types.go b/api/v1alpha1/envoygateway_metric_types.go new file mode 100644 index 000000000000..16261874d013 --- /dev/null +++ b/api/v1alpha1/envoygateway_metric_types.go @@ -0,0 +1,41 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package v1alpha1 + +// EnvoyGatewayMetrics defines control plane push/pull metrics configurations. +type EnvoyGatewayMetrics struct { + // Sinks defines the metric sinks where metrics are sent to. + Sinks []EnvoyGatewayMetricSink `json:"sinks,omitempty"` + // Prometheus defines the configuration for prometheus endpoint. + Prometheus *EnvoyGatewayPrometheusProvider `json:"prometheus,omitempty"` +} + +// EnvoyGatewayMetricSink defines control plane +// metric sinks where metrics are sent to. +type EnvoyGatewayMetricSink struct { + // Type defines the metric sink type. + // EG control plane currently supports OpenTelemetry. + // +kubebuilder:validation:Enum=OpenTelemetry + // +kubebuilder:default=OpenTelemetry + Type MetricSinkType `json:"type"` + // Host define the sink service hostname. + Host string `json:"host"` + // Port defines the port the sink service is exposed on. + // + // +optional + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:default=4317 + Port int32 `json:"port,omitempty"` +} + +// EnvoyGatewayPrometheusProvider will expose prometheus endpoint in pull mode. +type EnvoyGatewayPrometheusProvider struct { + // Enable defines if enables the prometheus metrics in pull mode. Default is true. + // + // +optional + // +kubebuilder:default=true + Enable bool `json:"enable,omitempty"` +} diff --git a/api/v1alpha1/envoygateway_types.go b/api/v1alpha1/envoygateway_types.go index b96218f0746e..1d146db980e5 100644 --- a/api/v1alpha1/envoygateway_types.go +++ b/api/v1alpha1/envoygateway_types.go @@ -51,6 +51,12 @@ type EnvoyGatewaySpec struct { // +optional // +kubebuilder:default={default: info} Logging *EnvoyGatewayLogging `json:"logging,omitempty"` + + // Telemetry defines telemetry related configurations for envoy gateway. + // + // +optional + Telemetry *EnvoyGatewayTelemetry `json:"telemetry,omitempty"` + // Admin defines the desired admin related abilities. // If unspecified, the Admin is used with default configuration // parameters. @@ -58,6 +64,12 @@ type EnvoyGatewaySpec struct { // +optional Admin *EnvoyGatewayAdmin `json:"admin,omitempty"` + // Debug defines the desired debug related abilities. + // If unspecified, the debug will not be running, including pprof, dump config etc. + // + // +optional + Debug *EnvoyGatewayDebug `json:"debug,omitempty"` + // RateLimit defines the configuration associated with the Rate Limit service // deployed by Envoy Gateway required to implement the Global Rate limiting // functionality. The specific rate limit service used here is the reference @@ -79,6 +91,13 @@ type EnvoyGatewaySpec struct { ExtensionAPIs *ExtensionAPISettings `json:"extensionApis,omitempty"` } +// EnvoyGatewayTelemetry defines telemetry configurations for envoy gateway control plane. +// Control plane will focus on metrics observability telemetry and tracing telemetry later. +type EnvoyGatewayTelemetry struct { + // Metrics defines metrics configuration for envoy gateway. + Metrics *EnvoyGatewayMetrics `json:"metrics,omitempty"` +} + // EnvoyGatewayLogging defines logging for Envoy Gateway. type EnvoyGatewayLogging struct { // Level is the logging level. If unspecified, defaults to "info". @@ -424,11 +443,21 @@ type EnvoyGatewayAdmin struct { // // +optional Address *EnvoyGatewayAdminAddress `json:"address,omitempty"` +} + +// EnvoyGatewayDebug defines the Envoy Gateway Debug configuration. +type EnvoyGatewayDebug struct { + + // DumpConfig defines if dump the Envoy Gateway config in logs. + // + // +optional + DumpConfig bool `json:"dumpConfig,omitempty"` - // Debug defines if enable the /debug endpoint of Envoy Gateway. + // Address defines the address of Envoy Gateway Debug Server. + // Pprof will use the debug address, if you set it to non-nil. // // +optional - Debug bool `json:"debug,omitempty"` + Address *EnvoyGatewayDebugAddress `json:"address,omitempty"` } // EnvoyGatewayAdminAddress defines the Envoy Gateway Admin Address configuration. @@ -446,6 +475,21 @@ type EnvoyGatewayAdminAddress struct { Host string `json:"host,omitempty"` } +// EnvoyGatewayDebugAddress defines the Envoy Gateway Debug Address configuration. +type EnvoyGatewayDebugAddress struct { + // Port defines the port the debug server is exposed on. + // + // +optional + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:default=19010 + Port int `json:"port,omitempty"` + // Host defines the debug server hostname. + // + // +optional + // +kubebuilder:default="127.0.0.1" + Host string `json:"host,omitempty"` +} + func init() { SchemeBuilder.Register(&EnvoyGateway{}) } diff --git a/api/v1alpha1/envoyproxy_types.go b/api/v1alpha1/envoyproxy_types.go index 575eb2b9e08e..00f4c38a7c43 100644 --- a/api/v1alpha1/envoyproxy_types.go +++ b/api/v1alpha1/envoyproxy_types.go @@ -129,44 +129,44 @@ type ProxyLogging struct { // and the log level is the value. If unspecified, defaults to "default: warn". // // +kubebuilder:default={default: warn} - Level map[LogComponent]LogLevel `json:"level,omitempty"` + Level map[ProxyLogComponent]LogLevel `json:"level,omitempty"` } -// LogComponent defines a component that supports a configured logging level. +// ProxyLogComponent defines a component that supports a configured logging level. // +kubebuilder:validation:Enum=system;upstream;http;connection;admin;client;filter;main;router;runtime -type LogComponent string +type ProxyLogComponent string const ( // LogComponentDefault defines the default logging component. // See more details: https://www.envoyproxy.io/docs/envoy/latest/operations/cli#cmdoption-l - LogComponentDefault LogComponent = "default" + LogComponentDefault ProxyLogComponent = "default" // LogComponentUpstream defines the "upstream" logging component. - LogComponentUpstream LogComponent = "upstream" + LogComponentUpstream ProxyLogComponent = "upstream" // LogComponentHTTP defines the "http" logging component. - LogComponentHTTP LogComponent = "http" + LogComponentHTTP ProxyLogComponent = "http" // LogComponentConnection defines the "connection" logging component. - LogComponentConnection LogComponent = "connection" + LogComponentConnection ProxyLogComponent = "connection" // LogComponentAdmin defines the "admin" logging component. - LogComponentAdmin LogComponent = "admin" + LogComponentAdmin ProxyLogComponent = "admin" // LogComponentClient defines the "client" logging component. - LogComponentClient LogComponent = "client" + LogComponentClient ProxyLogComponent = "client" // LogComponentFilter defines the "filter" logging component. - LogComponentFilter LogComponent = "filter" + LogComponentFilter ProxyLogComponent = "filter" // LogComponentMain defines the "main" logging component. - LogComponentMain LogComponent = "main" + LogComponentMain ProxyLogComponent = "main" // LogComponentRouter defines the "router" logging component. - LogComponentRouter LogComponent = "router" + LogComponentRouter ProxyLogComponent = "router" // LogComponentRuntime defines the "runtime" logging component. - LogComponentRuntime LogComponent = "runtime" + LogComponentRuntime ProxyLogComponent = "runtime" ) // ProxyBootstrap defines Envoy Bootstrap configuration. diff --git a/api/v1alpha1/metric_types.go b/api/v1alpha1/metric_types.go index 03efd6fa04fc..96214ec4f831 100644 --- a/api/v1alpha1/metric_types.go +++ b/api/v1alpha1/metric_types.go @@ -5,9 +5,15 @@ package v1alpha1 +type MetricSinkType string + +const ( + MetricSinkTypeOpenTelemetry MetricSinkType = "OpenTelemetry" +) + type ProxyMetrics struct { // Prometheus defines the configuration for Admin endpoint `/stats/prometheus`. - Prometheus *PrometheusProvider `json:"prometheus,omitempty"` + Prometheus *ProxyPrometheusProvider `json:"prometheus,omitempty"` // Sinks defines the metric sinks where metrics are sent to. Sinks []MetricSink `json:"sinks,omitempty"` // Matches defines configuration for selecting specific metrics instead of generating all metrics stats @@ -23,12 +29,6 @@ type ProxyMetrics struct { EnableVirtualHostStats bool `json:"enableVirtualHostStats,omitempty"` } -type MetricSinkType string - -const ( - MetricSinkTypeOpenTelemetry MetricSinkType = "OpenTelemetry" -) - type MetricSink struct { // Type defines the metric sink type. // EG currently only supports OpenTelemetry. @@ -71,5 +71,5 @@ type OpenTelemetrySink struct { // TODO: add support for customizing OpenTelemetry sink in https://www.envoyproxy.io/docs/envoy/latest/api-v3/extensions/stat_sinks/open_telemetry/v3/open_telemetry.proto#envoy-v3-api-msg-extensions-stat-sinks-open-telemetry-v3-sinkconfig } -type PrometheusProvider struct { +type ProxyPrometheusProvider struct { } diff --git a/api/v1alpha1/validation/validate_test.go b/api/v1alpha1/validation/validate_test.go index 1a12cd54e094..5c4da91252a1 100644 --- a/api/v1alpha1/validation/validate_test.go +++ b/api/v1alpha1/validation/validate_test.go @@ -535,15 +535,12 @@ func TestEnvoyGatewayAdmin(t *testing.T) { // values should be set in default egAdmin := eg.GetEnvoyGatewayAdmin() assert.True(t, egAdmin != nil) - assert.True(t, egAdmin.Debug == false) assert.True(t, egAdmin.Address.Port == egv1a1.GatewayAdminPort) assert.True(t, egAdmin.Address.Host == egv1a1.GatewayAdminHost) // override the admin config // values should be updated - eg.Admin.Debug = true eg.Admin.Address = nil - assert.True(t, eg.Admin.Debug == true) assert.True(t, eg.GetEnvoyGatewayAdmin().Address.Port == egv1a1.GatewayAdminPort) assert.True(t, eg.GetEnvoyGatewayAdmin().Address.Host == egv1a1.GatewayAdminHost) @@ -552,7 +549,6 @@ func TestEnvoyGatewayAdmin(t *testing.T) { eg.Admin = nil eg.SetEnvoyGatewayDefaults() assert.True(t, eg.Admin != nil) - assert.True(t, eg.Admin.Debug == false) assert.True(t, eg.Admin.Address.Port == egv1a1.GatewayAdminPort) assert.True(t, eg.Admin.Address.Host == egv1a1.GatewayAdminHost) } @@ -560,12 +556,12 @@ func TestEnvoyGatewayAdmin(t *testing.T) { func TestGetEnvoyProxyDefaultComponentLevel(t *testing.T) { cases := []struct { logging egv1a1.ProxyLogging - component egv1a1.LogComponent + component egv1a1.ProxyLogComponent expected egv1a1.LogLevel }{ { logging: egv1a1.ProxyLogging{ - Level: map[egv1a1.LogComponent]egv1a1.LogLevel{ + Level: map[egv1a1.ProxyLogComponent]egv1a1.LogLevel{ egv1a1.LogComponentDefault: egv1a1.LogLevelInfo, }, }, @@ -573,7 +569,7 @@ func TestGetEnvoyProxyDefaultComponentLevel(t *testing.T) { }, { logging: egv1a1.ProxyLogging{ - Level: map[egv1a1.LogComponent]egv1a1.LogLevel{ + Level: map[egv1a1.ProxyLogComponent]egv1a1.LogLevel{ egv1a1.LogComponentDefault: egv1a1.LogLevelInfo, }, }, @@ -600,7 +596,7 @@ func TestGetEnvoyProxyComponentLevelArgs(t *testing.T) { }, { logging: egv1a1.ProxyLogging{ - Level: map[egv1a1.LogComponent]egv1a1.LogLevel{ + Level: map[egv1a1.ProxyLogComponent]egv1a1.LogLevel{ egv1a1.LogComponentDefault: egv1a1.LogLevelInfo, }, }, @@ -608,7 +604,7 @@ func TestGetEnvoyProxyComponentLevelArgs(t *testing.T) { }, { logging: egv1a1.ProxyLogging{ - Level: map[egv1a1.LogComponent]egv1a1.LogLevel{ + Level: map[egv1a1.ProxyLogComponent]egv1a1.LogLevel{ egv1a1.LogComponentDefault: egv1a1.LogLevelInfo, egv1a1.LogComponentAdmin: egv1a1.LogLevelWarn, }, @@ -617,7 +613,7 @@ func TestGetEnvoyProxyComponentLevelArgs(t *testing.T) { }, { logging: egv1a1.ProxyLogging{ - Level: map[egv1a1.LogComponent]egv1a1.LogLevel{ + Level: map[egv1a1.ProxyLogComponent]egv1a1.LogLevel{ egv1a1.LogComponentDefault: egv1a1.LogLevelInfo, egv1a1.LogComponentAdmin: egv1a1.LogLevelWarn, egv1a1.LogComponentFilter: egv1a1.LogLevelDebug, @@ -627,7 +623,7 @@ func TestGetEnvoyProxyComponentLevelArgs(t *testing.T) { }, { logging: egv1a1.ProxyLogging{ - Level: map[egv1a1.LogComponent]egv1a1.LogLevel{ + Level: map[egv1a1.ProxyLogComponent]egv1a1.LogLevel{ egv1a1.LogComponentDefault: egv1a1.LogLevelInfo, egv1a1.LogComponentAdmin: egv1a1.LogLevelWarn, egv1a1.LogComponentFilter: egv1a1.LogLevelDebug, diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index b0a1f4f18c06..714aa9e60879 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -342,6 +342,41 @@ func (in *EnvoyGatewayCustomProvider) DeepCopy() *EnvoyGatewayCustomProvider { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EnvoyGatewayDebug) DeepCopyInto(out *EnvoyGatewayDebug) { + *out = *in + if in.Address != nil { + in, out := &in.Address, &out.Address + *out = new(EnvoyGatewayDebugAddress) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EnvoyGatewayDebug. +func (in *EnvoyGatewayDebug) DeepCopy() *EnvoyGatewayDebug { + if in == nil { + return nil + } + out := new(EnvoyGatewayDebug) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EnvoyGatewayDebugAddress) DeepCopyInto(out *EnvoyGatewayDebugAddress) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EnvoyGatewayDebugAddress. +func (in *EnvoyGatewayDebugAddress) DeepCopy() *EnvoyGatewayDebugAddress { + if in == nil { + return nil + } + out := new(EnvoyGatewayDebugAddress) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EnvoyGatewayFileResourceProvider) DeepCopyInto(out *EnvoyGatewayFileResourceProvider) { *out = *in @@ -449,6 +484,61 @@ func (in *EnvoyGatewayLogging) DeepCopy() *EnvoyGatewayLogging { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EnvoyGatewayMetricSink) DeepCopyInto(out *EnvoyGatewayMetricSink) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EnvoyGatewayMetricSink. +func (in *EnvoyGatewayMetricSink) DeepCopy() *EnvoyGatewayMetricSink { + if in == nil { + return nil + } + out := new(EnvoyGatewayMetricSink) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EnvoyGatewayMetrics) DeepCopyInto(out *EnvoyGatewayMetrics) { + *out = *in + if in.Sinks != nil { + in, out := &in.Sinks, &out.Sinks + *out = make([]EnvoyGatewayMetricSink, len(*in)) + copy(*out, *in) + } + if in.Prometheus != nil { + in, out := &in.Prometheus, &out.Prometheus + *out = new(EnvoyGatewayPrometheusProvider) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EnvoyGatewayMetrics. +func (in *EnvoyGatewayMetrics) DeepCopy() *EnvoyGatewayMetrics { + if in == nil { + return nil + } + out := new(EnvoyGatewayMetrics) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EnvoyGatewayPrometheusProvider) DeepCopyInto(out *EnvoyGatewayPrometheusProvider) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EnvoyGatewayPrometheusProvider. +func (in *EnvoyGatewayPrometheusProvider) DeepCopy() *EnvoyGatewayPrometheusProvider { + if in == nil { + return nil + } + out := new(EnvoyGatewayPrometheusProvider) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EnvoyGatewayProvider) DeepCopyInto(out *EnvoyGatewayProvider) { *out = *in @@ -512,11 +602,21 @@ func (in *EnvoyGatewaySpec) DeepCopyInto(out *EnvoyGatewaySpec) { *out = new(EnvoyGatewayLogging) (*in).DeepCopyInto(*out) } + if in.Telemetry != nil { + in, out := &in.Telemetry, &out.Telemetry + *out = new(EnvoyGatewayTelemetry) + (*in).DeepCopyInto(*out) + } if in.Admin != nil { in, out := &in.Admin, &out.Admin *out = new(EnvoyGatewayAdmin) (*in).DeepCopyInto(*out) } + if in.Debug != nil { + in, out := &in.Debug, &out.Debug + *out = new(EnvoyGatewayDebug) + (*in).DeepCopyInto(*out) + } if in.RateLimit != nil { in, out := &in.RateLimit, &out.RateLimit *out = new(RateLimit) @@ -544,6 +644,26 @@ func (in *EnvoyGatewaySpec) DeepCopy() *EnvoyGatewaySpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *EnvoyGatewayTelemetry) DeepCopyInto(out *EnvoyGatewayTelemetry) { + *out = *in + if in.Metrics != nil { + in, out := &in.Metrics, &out.Metrics + *out = new(EnvoyGatewayMetrics) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EnvoyGatewayTelemetry. +func (in *EnvoyGatewayTelemetry) DeepCopy() *EnvoyGatewayTelemetry { + if in == nil { + return nil + } + out := new(EnvoyGatewayTelemetry) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EnvoyJSONPatchConfig) DeepCopyInto(out *EnvoyJSONPatchConfig) { *out = *in @@ -1351,21 +1471,6 @@ func (in *OpenTelemetrySink) DeepCopy() *OpenTelemetrySink { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *PrometheusProvider) DeepCopyInto(out *PrometheusProvider) { - *out = *in -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PrometheusProvider. -func (in *PrometheusProvider) DeepCopy() *PrometheusProvider { - if in == nil { - return nil - } - out := new(PrometheusProvider) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ProxyAccessLog) DeepCopyInto(out *ProxyAccessLog) { *out = *in @@ -1488,7 +1593,7 @@ func (in *ProxyLogging) DeepCopyInto(out *ProxyLogging) { *out = *in if in.Level != nil { in, out := &in.Level, &out.Level - *out = make(map[LogComponent]LogLevel, len(*in)) + *out = make(map[ProxyLogComponent]LogLevel, len(*in)) for key, val := range *in { (*out)[key] = val } @@ -1510,7 +1615,7 @@ func (in *ProxyMetrics) DeepCopyInto(out *ProxyMetrics) { *out = *in if in.Prometheus != nil { in, out := &in.Prometheus, &out.Prometheus - *out = new(PrometheusProvider) + *out = new(ProxyPrometheusProvider) **out = **in } if in.Sinks != nil { @@ -1537,6 +1642,21 @@ func (in *ProxyMetrics) DeepCopy() *ProxyMetrics { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ProxyPrometheusProvider) DeepCopyInto(out *ProxyPrometheusProvider) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ProxyPrometheusProvider. +func (in *ProxyPrometheusProvider) DeepCopy() *ProxyPrometheusProvider { + if in == nil { + return nil + } + out := new(ProxyPrometheusProvider) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ProxyTelemetry) DeepCopyInto(out *ProxyTelemetry) { *out = *in diff --git a/go.mod b/go.mod index 06e35f26d43f..ba06cefdd6e4 100644 --- a/go.mod +++ b/go.mod @@ -23,6 +23,11 @@ require ( github.com/telepresenceio/watchable v0.0.0-20220726211108-9bb86f92afa7 github.com/tetratelabs/multierror v1.1.1 github.com/tsaarni/certyaml v0.9.2 + go.opentelemetry.io/otel v1.19.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0 + go.opentelemetry.io/otel/exporters/prometheus v0.42.0 + go.opentelemetry.io/otel/metric v1.19.0 + go.opentelemetry.io/otel/sdk/metric v1.19.0 go.opentelemetry.io/proto/otlp v1.0.0 go.uber.org/zap v1.26.0 golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e @@ -41,7 +46,15 @@ require ( sigs.k8s.io/yaml v1.3.0 ) -require golang.org/x/sync v0.3.0 // indirect +require ( + github.com/cenkalti/backoff/v4 v4.2.1 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.42.0 // indirect + go.opentelemetry.io/otel/sdk v1.19.0 // indirect + go.opentelemetry.io/otel/trace v1.19.0 // indirect + golang.org/x/sync v0.3.0 // indirect +) require ( github.com/Azure/go-ansiterm v0.0.0-20210617225240-d185dfc1b5a1 // indirect @@ -86,7 +99,7 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/peterbourgon/diskv v2.0.1+incompatible // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/prometheus/client_golang v1.16.0 // indirect + github.com/prometheus/client_golang v1.16.0 github.com/prometheus/client_model v0.5.0 // indirect github.com/prometheus/procfs v0.10.1 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect @@ -97,7 +110,7 @@ require ( go.uber.org/multierr v1.11.0 // indirect golang.org/x/net v0.14.0 // indirect golang.org/x/oauth2 v0.10.0 // indirect - golang.org/x/sys v0.11.0 // indirect + golang.org/x/sys v0.12.0 // indirect golang.org/x/term v0.11.0 // indirect golang.org/x/text v0.12.0 // indirect golang.org/x/time v0.3.0 // indirect diff --git a/go.sum b/go.sum index d29f63622275..8ed068e28776 100644 --- a/go.sum +++ b/go.sum @@ -37,6 +37,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/blang/semver v3.5.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= +github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= +github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMrBo8f1j86j5WHzznCCQxV/b8g= github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw= @@ -125,8 +127,11 @@ github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-logr/zapr v0.1.0/go.mod h1:tabnROwaDl0UNxkVeFRbY8bwB37GwRv0P8lg6aAiEnk= github.com/go-logr/zapr v1.2.4 h1:QHVo+6stLbfJmYGkQ7uGHUCu5hnAFAj6mDe6Ea0SeOo= github.com/go-logr/zapr v1.2.4/go.mod h1:FyHWQIzQORZ0QVE1BtVHv3cKtNLuXsbNLtpuhNapBOA= @@ -188,6 +193,7 @@ github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXP github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/glog v1.1.0 h1:/d3pCKDPWNnvIWe0vVUpNP32qc8U3PDVxySP/y360qE= github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= @@ -253,6 +259,8 @@ github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0/go.mod h1:YN5jB8ie0yfIUg6VvR9Kz84aCaG7AsGZnLjhHbUqwPg= github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= @@ -466,6 +474,22 @@ go.mongodb.org/mongo-driver v1.0.3/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qL go.mongodb.org/mongo-driver v1.1.1/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM= go.mongodb.org/mongo-driver v1.1.2/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= +go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs= +go.opentelemetry.io/otel v1.19.0/go.mod h1:i0QyjOq3UPoTzff0PJB2N66fb4S0+rSbSB15/oyH9fY= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.42.0 h1:ZtfnDL+tUrs1F0Pzfwbg2d59Gru9NCH3bgSHBM6LDwU= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.42.0/go.mod h1:hG4Fj/y8TR/tlEDREo8tWstl9fO9gcFkn4xrx0Io8xU= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0 h1:wNMDy/LVGLj2h3p6zg4d0gypKfWKSWI14E1C4smOgl8= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0/go.mod h1:YfbDdXAAkemWJK3H/DshvlrxqFB2rtW4rY6ky/3x/H0= +go.opentelemetry.io/otel/exporters/prometheus v0.42.0 h1:jwV9iQdvp38fxXi8ZC+lNpxjK16MRcZlpDYvbuO1FiA= +go.opentelemetry.io/otel/exporters/prometheus v0.42.0/go.mod h1:f3bYiqNqhoPxkvI2LrXqQVC546K7BuRDL/kKuxkujhA= +go.opentelemetry.io/otel/metric v1.19.0 h1:aTzpGtV0ar9wlV4Sna9sdJyII5jTVJEvKETPiOKwvpE= +go.opentelemetry.io/otel/metric v1.19.0/go.mod h1:L5rUsV9kM1IxCj1MmSdS+JQAcVm319EUrDVLrt7jqt8= +go.opentelemetry.io/otel/sdk v1.19.0 h1:6USY6zH+L8uMH8L3t1enZPR3WFEmSTADlqldyHtJi3o= +go.opentelemetry.io/otel/sdk v1.19.0/go.mod h1:NedEbbS4w3C6zElbLdPJKOpJQOrGUJ+GfzpjUvI0v1A= +go.opentelemetry.io/otel/sdk/metric v1.19.0 h1:EJoTO5qysMsYCa+w4UghwFV/ptQgqSL/8Ni+hx+8i1k= +go.opentelemetry.io/otel/sdk/metric v1.19.0/go.mod h1:XjG0jQyFJrv2PbMvwND7LwCEhsJzCzV5210euduKcKY= +go.opentelemetry.io/otel/trace v1.19.0 h1:DFVQmlVbfVeOuBRrwdtaehRrWiL1JoVs9CPIQ1Dzxpg= +go.opentelemetry.io/otel/trace v1.19.0/go.mod h1:mfaSyvGyEJEI0nyV2I4qhNQnbBOUUmYZpYojqMnX2vo= go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I= go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM= go.starlark.net v0.0.0-20230525235612-a134d8f9ddca h1:VdD38733bfYv5tUZwEIskMM93VanwNIi5bIKnDrJdEY= @@ -575,8 +599,8 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220908164124-27713097b956/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM= -golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o= +golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20220526004731-065cf7ba2467/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.11.0 h1:F9tnn/DA/Im8nCwm+fX+1/eBwi4qFjRT++MhtVC4ZX0= diff --git a/internal/cmd/server.go b/internal/cmd/server.go index b6979023f09a..f62d508f7aeb 100644 --- a/internal/cmd/server.go +++ b/internal/cmd/server.go @@ -6,12 +6,6 @@ package cmd import ( - "fmt" - "net" - "net/http" - "net/http/pprof" - "time" - "github.com/davecgh/go-spew/spew" "github.com/spf13/cobra" @@ -57,7 +51,7 @@ func server() error { return err } - if cfg.EnvoyGateway.Admin.Debug { + if cfg.EnvoyGateway.GetEnvoyGatewayDebug().DumpConfig { spewConfig := spew.NewDefaultConfig() spewConfig.DisableMethods = true spewConfig.Dump(cfg) @@ -203,9 +197,6 @@ func setupRunners(cfg *config.Server) error { } } - // Start the admin server - go setupAdminServer(cfg) - // Wait until done <-ctx.Done() // Close messages @@ -223,33 +214,3 @@ func setupRunners(cfg *config.Server) error { return nil } - -func setupAdminServer(cfg *config.Server) { - adminHandlers := http.NewServeMux() - - address := cfg.EnvoyGateway.GetEnvoyGatewayAdmin().Address - - if cfg.EnvoyGateway.GetEnvoyGatewayAdmin().Debug { - // Serve pprof endpoints to aid in live debugging. - adminHandlers.HandleFunc("/debug/pprof/", pprof.Index) - adminHandlers.HandleFunc("/debug/pprof/profile", pprof.Profile) - adminHandlers.HandleFunc("/debug/pprof/trace", pprof.Trace) - adminHandlers.HandleFunc("/debug/pprof/symbol", pprof.Symbol) - adminHandlers.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) - } - - adminServer := &http.Server{ - Handler: adminHandlers, - Addr: net.JoinHostPort(address.Host, fmt.Sprint(address.Port)), - ReadTimeout: 5 * time.Second, - ReadHeaderTimeout: 5 * time.Second, - WriteTimeout: 10 * time.Second, - IdleTimeout: 15 * time.Second, - } - - // Listen And Serve Admin Server. - if err := adminServer.ListenAndServe(); err != nil { - cfg.Logger.Error(err, "start debug server failed") - } - -} diff --git a/internal/infrastructure/kubernetes/proxy/resource_provider_test.go b/internal/infrastructure/kubernetes/proxy/resource_provider_test.go index 50ce69946e9c..b3894d475ca0 100644 --- a/internal/infrastructure/kubernetes/proxy/resource_provider_test.go +++ b/internal/infrastructure/kubernetes/proxy/resource_provider_test.go @@ -65,7 +65,7 @@ func TestDeployment(t *testing.T) { caseName string infra *ir.Infra deploy *egv1a1.KubernetesDeploymentSpec - proxyLogging map[egv1a1.LogComponent]egv1a1.LogLevel + proxyLogging map[egv1a1.ProxyLogComponent]egv1a1.LogLevel bootstrap string telemetry *egv1a1.ProxyTelemetry concurrency *int32 @@ -248,7 +248,7 @@ func TestDeployment(t *testing.T) { caseName: "component-level", infra: newTestInfra(), deploy: nil, - proxyLogging: map[egv1a1.LogComponent]egv1a1.LogLevel{ + proxyLogging: map[egv1a1.ProxyLogComponent]egv1a1.LogLevel{ egv1a1.LogComponentDefault: egv1a1.LogLevelError, egv1a1.LogComponentFilter: egv1a1.LogLevelInfo, }, @@ -259,7 +259,7 @@ func TestDeployment(t *testing.T) { infra: newTestInfra(), telemetry: &egv1a1.ProxyTelemetry{ Metrics: &egv1a1.ProxyMetrics{ - Prometheus: &egv1a1.PrometheusProvider{}, + Prometheus: &egv1a1.ProxyPrometheusProvider{}, }, }, }, diff --git a/internal/metrics/definition.go b/internal/metrics/definition.go new file mode 100644 index 000000000000..6706e02185ed --- /dev/null +++ b/internal/metrics/definition.go @@ -0,0 +1,146 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import ( + "errors" + "sync" + + "github.com/envoyproxy/gateway/api/v1alpha1" + log "github.com/envoyproxy/gateway/internal/logging" + "github.com/envoyproxy/gateway/internal/utils/slice" + "go.opentelemetry.io/otel" + api "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/sdk/metric" + "golang.org/x/exp/maps" +) + +var ( + meter = func() api.Meter { + return otel.GetMeterProvider().Meter("envoy-gateway") + } + + monitoringLogger = log.DefaultLogger(v1alpha1.LogLevelInfo).WithName("metrics") +) + +func init() { + otel.SetLogger(monitoringLogger.Logger) +} + +// MetricType is the type of a metric. +type MetricType string + +// Metric type supports: +// * Counter: A Counter is a simple metric that only goes up (increments). +// +// * Gauge: A Gauge is a metric that represent +// a single numerical value that can arbitrarily go up and down. +// +// * Histogram: A Histogram samples observations and counts them in configurable buckets. +// It also provides a sum of all observed values. +// It's used to visualize the statistical distribution of these observations. + +const ( + Counter MetricType = "Counter" + Gauge MetricType = "Gauge" + Histogram MetricType = "Histogram" +) + +// A Metric collects numerical observations. +type Metric interface { + // Increment records a value of 1 for the current measure. For Sums, + // this is equivalent to adding 1 to the current value. For Gauges, + // this is equivalent to setting the value to 1. For Histograms, + // this is equivalent to making an observation of value 1. + Increment() + + // Decrement records a value of -1 for the current measure. For Sums, + // this is equivalent to subtracting -1 to the current value. For Gauges, + // this is equivalent to setting the value to -1. For Histograms, + // this is equivalent to making an observation of value -1. + Decrement() + + // Name returns the name value of a Metric. + Name() string + + // Record makes an observation of the provided value for the given measure. + Record(value float64) + + // RecordInt makes an observation of the provided value for the measure. + RecordInt(value int64) + + // With creates a new Metric, with the LabelValues provided. This allows creating + // a set of pre-dimensioned data for recording purposes. This is primarily used + // for documentation and convenience. Metrics created with this method do not need + // to be registered (they share the registration of their parent Metric). + With(labelValues ...LabelValue) Metric + + // Register configures the Metric for export. It MUST be called before collection + // of values for the Metric. An error will be returned if registration fails. + Register() error +} + +// ExportAllDefinitions reports all currently registered metric definitions. +func ExportAllDefinitions() []Definition { + defs.mu.Lock() + defer defs.mu.Unlock() + return slice.SortFunc(maps.Values(defs.known), func(a, b Definition) bool { + return a.Name < b.Name + }) +} + +// Definition records a metric's metadata. +type Definition struct { + Name string + Type MetricType + Description string + Bounds []float64 +} + +// metrics stores known metrics +type def struct { + started bool + mu sync.Mutex + known map[string]Definition +} + +// defs is a global that stores all registered metrics +var defs = def{ + known: map[string]Definition{}, +} + +// register records a newly defined metric. Only valid before an exporter is set. +func (d *def) register(def Definition) { + d.mu.Lock() + defer d.mu.Unlock() + if d.started { + monitoringLogger.Error(errors.New("cannot initialize metric after metric has started"), "metric", def.Name) + } + d.known[def.Name] = def +} + +// preRunSteps runs pre-steps when adding to meter provider. +func (d *def) preRunSteps() []metric.Option { + d.mu.Lock() + defer d.mu.Unlock() + d.started = true + opts := []metric.Option{} + for name, def := range d.known { + if def.Bounds == nil { + continue + } + // for each histogram metric (i.e. those with bounds), set up a view explicitly defining those buckets. + v := metric.WithView(metric.NewView( + metric.Instrument{Name: name}, + metric.Stream{ + Aggregation: metric.AggregationExplicitBucketHistogram{ + Boundaries: def.Bounds, + }}, + )) + opts = append(opts, v) + } + return opts +} diff --git a/internal/metrics/doc.go b/internal/metrics/doc.go new file mode 100644 index 000000000000..b601b082be9c --- /dev/null +++ b/internal/metrics/doc.go @@ -0,0 +1,6 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics diff --git a/internal/metrics/label.go b/internal/metrics/label.go new file mode 100644 index 000000000000..a3b3c4b03eaf --- /dev/null +++ b/internal/metrics/label.go @@ -0,0 +1,37 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import "go.opentelemetry.io/otel/attribute" + +// NewLabel will attempt to create a new Label. +func NewLabel(key string) Label { + return Label{attribute.Key(key)} +} + +// A Label provides a named dimension for a Metric. +type Label struct { + key attribute.Key +} + +// Value creates a new LabelValue for the Label. +func (l Label) Value(value string) LabelValue { + return LabelValue{l.key.String(value)} +} + +// A LabelValue represents a Label with a specific value. It is used to record +// values for a Metric. +type LabelValue struct { + keyValue attribute.KeyValue +} + +func (l LabelValue) Key() Label { + return Label{l.keyValue.Key} +} + +func (l LabelValue) Value() string { + return l.keyValue.Value.AsString() +} diff --git a/internal/metrics/metric.go b/internal/metrics/metric.go new file mode 100644 index 000000000000..d19f32c44122 --- /dev/null +++ b/internal/metrics/metric.go @@ -0,0 +1,82 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import ( + "go.opentelemetry.io/otel/attribute" +) + +// base metric implementation. +type base struct { + name string + attrs []attribute.KeyValue + rest Metric +} + +func (f base) Name() string { + return f.name +} + +func (f base) Increment() { + f.rest.Record(1) +} + +func (f base) Decrement() { + f.rest.Record(-1) +} + +func (f base) Register() error { + return nil +} + +func (f base) RecordInt(value int64) { + f.rest.Record(float64(value)) +} + +// disabled metric implementation. +type disabled struct { + name string +} + +// Decrement implements Metric +func (dm *disabled) Decrement() {} + +// Increment implements Metric +func (dm *disabled) Increment() {} + +// Name implements Metric +func (dm *disabled) Name() string { + return dm.name +} + +// Record implements Metric +func (dm *disabled) Record(value float64) {} + +// RecordInt implements Metric +func (dm *disabled) RecordInt(value int64) {} + +// Register implements Metric +func (dm *disabled) Register() error { + return nil +} + +// With implements Metric +func (dm *disabled) With(labelValues ...LabelValue) Metric { + return dm +} + +var _ Metric = &disabled{} + +func mergeAttributes(bm base, labelValues []LabelValue) ([]attribute.KeyValue, attribute.Set) { + attrs := make([]attribute.KeyValue, 0, len(bm.attrs)+len(labelValues)) + attrs = append(attrs, bm.attrs...) + for _, v := range labelValues { + attrs = append(attrs, v.keyValue) + } + + set := attribute.NewSet(attrs...) + return attrs, set +} diff --git a/internal/metrics/metric_counter.go b/internal/metrics/metric_counter.go new file mode 100644 index 000000000000..ab3bf4d478ae --- /dev/null +++ b/internal/metrics/metric_counter.go @@ -0,0 +1,73 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import ( + "context" + + api "go.opentelemetry.io/otel/metric" +) + +// NewCounter creates a new Counter Metric (the values will be cumulative). +// That means that data collected by the new Metric will be summed before export. +func NewCounter(name, description string, opts ...Options) Metric { + defs.register(Definition{ + Name: name, + Type: Counter, + Description: description, + }) + o, dm := metricOptions(name, description, opts...) + if dm != nil { + return dm + } + return newCounter(o) +} + +type counter struct { + base + c api.Float64Counter + // precomputedAddOption is just a precomputation to avoid allocations on each record call + precomputedAddOption []api.AddOption +} + +var _ Metric = &counter{} + +func newCounter(o options) *counter { + c, err := meter().Float64Counter(o.name, + api.WithDescription(o.description), + api.WithUnit(string(o.unit))) + if err != nil { + monitoringLogger.Error(err, "failed to create counter") + } + r := &counter{c: c} + r.base = base{ + name: o.name, + rest: r, + } + return r +} + +func (f *counter) Record(value float64) { + if f.precomputedAddOption != nil { + f.c.Add(context.Background(), value, f.precomputedAddOption...) + } else { + f.c.Add(context.Background(), value) + } +} + +func (f *counter) With(labelValues ...LabelValue) Metric { + attrs, set := mergeAttributes(f.base, labelValues) + nm := &counter{ + c: f.c, + precomputedAddOption: []api.AddOption{api.WithAttributeSet(set)}, + } + nm.base = base{ + name: f.name, + attrs: attrs, + rest: nm, + } + return nm +} diff --git a/internal/metrics/metric_gauge.go b/internal/metrics/metric_gauge.go new file mode 100644 index 000000000000..efe7de9c48ab --- /dev/null +++ b/internal/metrics/metric_gauge.go @@ -0,0 +1,104 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import ( + "context" + "sync" + + "go.opentelemetry.io/otel/attribute" + api "go.opentelemetry.io/otel/metric" +) + +// NewGauge creates a new Gauge Metric. That means that data collected by the new +// Metric will export only the last recorded value. +func NewGauge(name, description string, opts ...Options) Metric { + defs.register(Definition{ + Name: name, + Type: Gauge, + Description: description, + }) + o, dm := metricOptions(name, description, opts...) + if dm != nil { + return dm + } + return newGauge(o) +} + +type gauge struct { + base + g api.Float64ObservableGauge + + // attributeSets stores a map of attributes -> values, for gauges. + attributeSetsMutex *sync.RWMutex + attributeSets map[attribute.Set]*gaugeValues + currentGaugeSet *gaugeValues +} + +var _ Metric = &gauge{} + +func newGauge(o options) *gauge { + r := &gauge{ + attributeSetsMutex: &sync.RWMutex{}, + } + r.attributeSets = map[attribute.Set]*gaugeValues{} + g, err := meter().Float64ObservableGauge(o.name, + api.WithFloat64Callback(func(ctx context.Context, observer api.Float64Observer) error { + r.attributeSetsMutex.Lock() + defer r.attributeSetsMutex.Unlock() + for _, gv := range r.attributeSets { + observer.Observe(gv.val, gv.opt...) + } + return nil + }), + api.WithDescription(o.description), + api.WithUnit(string(o.unit))) + if err != nil { + monitoringLogger.Error(err, "failed to create gauge") + } + r.g = g + r.base = base{ + name: o.name, + rest: r, + } + return r +} + +func (f *gauge) Record(value float64) { + f.attributeSetsMutex.Lock() + if f.currentGaugeSet == nil { + f.currentGaugeSet = &gaugeValues{} + f.attributeSets[attribute.NewSet()] = f.currentGaugeSet + } + f.currentGaugeSet.val = value + f.attributeSetsMutex.Unlock() +} + +func (f *gauge) With(labelValues ...LabelValue) Metric { + attrs, set := mergeAttributes(f.base, labelValues) + nm := &gauge{ + g: f.g, + attributeSetsMutex: f.attributeSetsMutex, + attributeSets: f.attributeSets, + } + if _, f := nm.attributeSets[set]; !f { + nm.attributeSets[set] = &gaugeValues{ + opt: []api.ObserveOption{api.WithAttributeSet(set)}, + } + } + nm.currentGaugeSet = nm.attributeSets[set] + nm.base = base{ + name: f.name, + attrs: attrs, + rest: nm, + } + return nm +} + +type gaugeValues struct { + val float64 + opt []api.ObserveOption +} diff --git a/internal/metrics/metric_histogram.go b/internal/metrics/metric_histogram.go new file mode 100644 index 000000000000..4e1da411ea85 --- /dev/null +++ b/internal/metrics/metric_histogram.go @@ -0,0 +1,74 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import ( + "context" + + api "go.opentelemetry.io/otel/metric" +) + +// NewHistogram creates a new Metric with an aggregation type of Histogram. This means that the +// data collected by the Metric will be collected and exported as a histogram, with the specified bounds. +func NewHistogram(name, description string, bounds []float64, opts ...Options) Metric { + defs.register(Definition{ + Name: name, + Type: Histogram, + Description: description, + Bounds: bounds, + }) + o, dm := metricOptions(name, description, opts...) + if dm != nil { + return dm + } + return newHistogram(o) +} + +type distribution struct { + base + d api.Float64Histogram + // precomputedRecordOption is just a precomputation to avoid allocations on each record call + precomputedRecordOption []api.RecordOption +} + +var _ Metric = &distribution{} + +func newHistogram(o options) *distribution { + d, err := meter().Float64Histogram(o.name, + api.WithDescription(o.description), + api.WithUnit(string(o.unit))) + if err != nil { + monitoringLogger.Error(err, "failed to create distribution") + } + r := &distribution{d: d} + r.base = base{ + name: o.name, + rest: r, + } + return r +} + +func (f *distribution) Record(value float64) { + if f.precomputedRecordOption != nil { + f.d.Record(context.Background(), value, f.precomputedRecordOption...) + } else { + f.d.Record(context.Background(), value) + } +} + +func (f *distribution) With(labelValues ...LabelValue) Metric { + attrs, set := mergeAttributes(f.base, labelValues) + nm := &distribution{ + d: f.d, + precomputedRecordOption: []api.RecordOption{api.WithAttributeSet(set)}, + } + nm.base = base{ + name: f.name, + attrs: attrs, + rest: nm, + } + return nm +} diff --git a/internal/metrics/options.go b/internal/metrics/options.go new file mode 100644 index 000000000000..e8f3c9559f96 --- /dev/null +++ b/internal/metrics/options.go @@ -0,0 +1,43 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +// Options encode changes to the options passed to a Metric at creation time. +type Options func(*options) + +type options struct { + enabledCondition func() bool + unit Unit + name string + description string +} + +// WithUnit provides configuration options for a new Metric, providing unit of measure +// information for a new Metric. +func WithUnit(unit Unit) Options { + return func(opts *options) { + opts.unit = unit + } +} + +// WithEnabled allows a metric to be condition enabled if the provided function returns true. +// If disabled, metric operations will do nothing. +func WithEnabled(enabled func() bool) Options { + return func(o *options) { + o.enabledCondition = enabled + } +} + +func metricOptions(name, description string, opts ...Options) (options, Metric) { + o := options{unit: None, name: name, description: description} + for _, opt := range opts { + opt(&o) + } + if o.enabledCondition != nil && !o.enabledCondition() { + return o, &disabled{name: name} + } + return o, nil +} diff --git a/internal/metrics/register.go b/internal/metrics/register.go new file mode 100644 index 000000000000..6488eb5bfe27 --- /dev/null +++ b/internal/metrics/register.go @@ -0,0 +1,109 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import ( + "context" + "fmt" + + "github.com/envoyproxy/gateway/internal/envoygateway/config" + "github.com/prometheus/client_golang/prometheus" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp" + otelprom "go.opentelemetry.io/otel/exporters/prometheus" + "go.opentelemetry.io/otel/sdk/metric" + "sigs.k8s.io/controller-runtime/pkg/manager" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +// Init initializes and registers the global metrics server. +func Init(svr *config.Server, opts *manager.Options) error { + options := newOptions(svr, opts) + if err := register(metricsserver.Registry, options); err != nil { + return err + } + + return nil +} + +// register sets the global metrics registry to the provided Prometheus registerer. +func register(reg prometheus.Registerer, opts metricsOptions) error { + if reg == nil { + reg = metricsserver.Registry + } + + metricOpts := []metric.Option{} + if opts.pullOptions.enable { + promOpts := []otelprom.Option{ + otelprom.WithoutScopeInfo(), + otelprom.WithoutTargetInfo(), + otelprom.WithoutUnits(), + otelprom.WithRegisterer(reg), + otelprom.WithoutCounterSuffixes(), + } + promreader, err := otelprom.New(promOpts...) + if err != nil { + return err + } + + metricOpts = append(metricOpts, metric.WithReader(promreader)) + monitoringLogger.Info("initialized metrics pull endpoint", "address", opts.pullOptions.address) + } + + for _, sink := range opts.pushOptions.sinks { + address := fmt.Sprintf("%s:%d", sink.host, sink.port) + httpexporter, err := otlpmetrichttp.New( + context.Background(), + otlpmetrichttp.WithEndpoint(address), + otlpmetrichttp.WithInsecure(), + ) + if err != nil { + return err + } + + otelreader := metric.NewPeriodicReader(httpexporter) + metricOpts = append(metricOpts, metric.WithReader(otelreader)) + monitoringLogger.Info("initialized metrics push endpoint", "address", address) + } + + metricOpts = append(metricOpts, defs.preRunSteps()...) + mp := metric.NewMeterProvider(metricOpts...) + otel.SetMeterProvider(mp) + + return nil +} + +type metricsOptions struct { + pullOptions struct { + address string + enable bool + } + pushOptions struct { + sinks []sink + } +} + +type sink struct { + host string + port int32 +} + +func newOptions(svr *config.Server, opts *manager.Options) metricsOptions { + newOpts := metricsOptions{} + if svr.EnvoyGateway.IfEnablePrometheus() { + newOpts.pullOptions.enable = true + newOpts.pullOptions.address = svr.EnvoyGateway.GetEnvoyGatewayAdminAddress() + opts.Metrics.BindAddress = svr.EnvoyGateway.GetEnvoyGatewayAdminAddress() + } + + for _, config := range svr.EnvoyGateway.GetEnvoyGatewayTelemetry().Metrics.Sinks { + newOpts.pushOptions.sinks = append(newOpts.pushOptions.sinks, sink{ + host: config.Host, + port: config.Port, + }) + } + return newOpts +} diff --git a/internal/metrics/sample_counter_test.go b/internal/metrics/sample_counter_test.go new file mode 100644 index 000000000000..64d4207d6fe6 --- /dev/null +++ b/internal/metrics/sample_counter_test.go @@ -0,0 +1,23 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics_test + +import "github.com/envoyproxy/gateway/internal/metrics" + +var ( + ir_updates = metrics.NewCounter( + "ir_updates_total", + "Number of IR updates, by ir type", + ) +) + +func NewCounter() { + // increment on every xds ir update + ir_updates.With(ir_type.Value("xds")).Increment() + + // xds ir updates double + ir_updates.With(ir_type.Value("xds")).Record(2) +} diff --git a/internal/metrics/sample_gauge_test.go b/internal/metrics/sample_gauge_test.go new file mode 100644 index 000000000000..af2a1ca192f0 --- /dev/null +++ b/internal/metrics/sample_gauge_test.go @@ -0,0 +1,27 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics_test + +import "github.com/envoyproxy/gateway/internal/metrics" + +var ( + ir_type = metrics.NewLabel("ir-type") + currentIRsNum = metrics.NewGauge( + "current_irs_queue_num", + "current number of ir in queue, by ir type", + ) +) + +func NewGauge() { + // only the last recorded value (2) will be exported for this gauge + currentIRsNum.With(ir_type.Value("xds")).Record(1) + currentIRsNum.With(ir_type.Value("xds")).Record(3) + currentIRsNum.With(ir_type.Value("xds")).Record(2) + + currentIRsNum.With(ir_type.Value("infra")).Record(1) + currentIRsNum.With(ir_type.Value("infra")).Record(3) + currentIRsNum.With(ir_type.Value("infra")).Record(2) +} diff --git a/internal/metrics/sample_histogram_test.go b/internal/metrics/sample_histogram_test.go new file mode 100644 index 000000000000..b34658fcbe54 --- /dev/null +++ b/internal/metrics/sample_histogram_test.go @@ -0,0 +1,23 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics_test + +import "github.com/envoyproxy/gateway/internal/metrics" + +var ( + method = metrics.NewLabel("method") + + sentBytes = metrics.NewHistogram( + "sent_bytes_total", + "Histogram of sent bytes by method", + []float64{10, 50, 100, 1000, 10000}, + metrics.WithUnit(metrics.Bytes), + ) +) + +func NewHistogram() { + sentBytes.With(method.Value("/request/path/1")).Record(458) +} diff --git a/internal/metrics/units.go b/internal/metrics/units.go new file mode 100644 index 000000000000..1c7b5ff13c20 --- /dev/null +++ b/internal/metrics/units.go @@ -0,0 +1,18 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +// Unit encodes the standard name for describing the quantity +// measured by a Metric (if applicable). +type Unit string + +// Predefined units for use with the metrics package. +const ( + None Unit = "1" + Bytes Unit = "By" + Seconds Unit = "s" + Milliseconds Unit = "ms" +) diff --git a/internal/provider/kubernetes/kubernetes.go b/internal/provider/kubernetes/kubernetes.go index 820868eeafb7..f9fc7c4818d1 100644 --- a/internal/provider/kubernetes/kubernetes.go +++ b/internal/provider/kubernetes/kubernetes.go @@ -15,12 +15,12 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/healthz" "sigs.k8s.io/controller-runtime/pkg/manager" - metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" "github.com/envoyproxy/gateway/api/v1alpha1" "github.com/envoyproxy/gateway/internal/envoygateway" "github.com/envoyproxy/gateway/internal/envoygateway/config" "github.com/envoyproxy/gateway/internal/message" + "github.com/envoyproxy/gateway/internal/metrics" "github.com/envoyproxy/gateway/internal/status" ) @@ -41,9 +41,14 @@ func New(cfg *rest.Config, svr *config.Server, resources *message.ProviderResour LeaderElection: false, HealthProbeBindAddress: ":8081", LeaderElectionID: "5b9825d2.gateway.envoyproxy.io", - Metrics: metricsserver.Options{ - BindAddress: ":8080", - }, + } + + if address := svr.EnvoyGateway.GetEnvoyGatewayDebug().Address; address != nil { + mgrOpts.PprofBindAddress = fmt.Sprintf("%s:%d", address.Host, address.Port) + } + + if err := metrics.Init(svr, &mgrOpts); err != nil { + return nil, err } // TODO: implement config validation on the watch mode config diff --git a/internal/utils/slice/slice.go b/internal/utils/slice/slice.go index cc75f9f0e597..99503a8753cc 100644 --- a/internal/utils/slice/slice.go +++ b/internal/utils/slice/slice.go @@ -5,6 +5,10 @@ package slice +import ( + "golang.org/x/exp/slices" +) + // ContainsString checks if a given slice of strings contains the provided string. func ContainsString(slice []string, s string) bool { for _, item := range slice { @@ -27,3 +31,23 @@ func RemoveString(slice []string, s string) []string { } return newSlice } + +// SortFunc sorts the slice x in ascending order as determined by the less function. +// This sort is not guaranteed to be stable. +// The slice is modified in place but returned. +func SortFunc[E any](x []E, less func(a, b E) bool) []E { + if len(x) <= 1 { + return x + } + slices.SortFunc(x, less) + return x +} + +// Map runs f() over all elements in s and returns the result +func Map[E any, O any](s []E, f func(E) O) []O { + n := make([]O, 0, len(s)) + for _, e := range s { + n = append(n, f(e)) + } + return n +} diff --git a/internal/xds/bootstrap/bootstrap_test.go b/internal/xds/bootstrap/bootstrap_test.go index bbee8f020265..678317dbcca7 100644 --- a/internal/xds/bootstrap/bootstrap_test.go +++ b/internal/xds/bootstrap/bootstrap_test.go @@ -27,7 +27,7 @@ func TestGetRenderedBootstrapConfig(t *testing.T) { { name: "enable-prometheus", proxyMetrics: &egv1a1.ProxyMetrics{ - Prometheus: &egv1a1.PrometheusProvider{}, + Prometheus: &egv1a1.ProxyPrometheusProvider{}, }, }, { @@ -65,7 +65,7 @@ func TestGetRenderedBootstrapConfig(t *testing.T) { Value: "cluster", }, }, - Prometheus: &egv1a1.PrometheusProvider{}, + Prometheus: &egv1a1.ProxyPrometheusProvider{}, }, }, } diff --git a/site/content/en/latest/api/extension_types.md b/site/content/en/latest/api/extension_types.md index 73bff134936c..dd1d4f554182 100644 --- a/site/content/en/latest/api/extension_types.md +++ b/site/content/en/latest/api/extension_types.md @@ -201,7 +201,9 @@ EnvoyGateway is the schema for the envoygateways API. | `gateway` _[Gateway](#gateway)_ | Gateway defines desired Gateway API specific configuration. If unset, default configuration parameters will apply. | | `provider` _[EnvoyGatewayProvider](#envoygatewayprovider)_ | Provider defines the desired provider and provider-specific configuration. If unspecified, the Kubernetes provider is used with default configuration parameters. | | `logging` _[EnvoyGatewayLogging](#envoygatewaylogging)_ | Logging defines logging parameters for Envoy Gateway. | +| `telemetry` _[EnvoyGatewayTelemetry](#envoygatewaytelemetry)_ | Telemetry defines telemetry related configurations for envoy gateway. | | `admin` _[EnvoyGatewayAdmin](#envoygatewayadmin)_ | Admin defines the desired admin related abilities. If unspecified, the Admin is used with default configuration parameters. | +| `debug` _[EnvoyGatewayDebug](#envoygatewaydebug)_ | Debug defines the desired debug related abilities. If unspecified, the debug will not be running, including pprof, dump config etc. | | `rateLimit` _[RateLimit](#ratelimit)_ | RateLimit defines the configuration associated with the Rate Limit service deployed by Envoy Gateway required to implement the Global Rate limiting functionality. The specific rate limit service used here is the reference implementation in Envoy. For more details visit https://github.com/envoyproxy/ratelimit. This configuration is unneeded for "Local" rate limiting. | | `extensionManager` _[ExtensionManager](#extensionmanager)_ | ExtensionManager defines an extension manager to register for the Envoy Gateway Control Plane. | | `extensionApis` _[ExtensionAPISettings](#extensionapisettings)_ | ExtensionAPIs defines the settings related to specific Gateway API Extensions implemented by Envoy Gateway | @@ -220,7 +222,6 @@ _Appears in:_ | Field | Description | | --- | --- | | `address` _[EnvoyGatewayAdminAddress](#envoygatewayadminaddress)_ | Address defines the address of Envoy Gateway Admin Server. | -| `debug` _boolean_ | Debug defines if enable the /debug endpoint of Envoy Gateway. | #### EnvoyGatewayAdminAddress @@ -253,6 +254,37 @@ _Appears in:_ | `infrastructure` _[EnvoyGatewayInfrastructureProvider](#envoygatewayinfrastructureprovider)_ | Infrastructure defines the desired infrastructure provider. This provider is used to specify the provider to be used to provide an environment to deploy the out resources like the Envoy Proxy data plane. | +#### EnvoyGatewayDebug + + + +EnvoyGatewayDebug defines the Envoy Gateway Debug configuration. + +_Appears in:_ +- [EnvoyGateway](#envoygateway) +- [EnvoyGatewaySpec](#envoygatewayspec) + +| Field | Description | +| --- | --- | +| `dumpConfig` _boolean_ | DumpConfig defines if dump the Envoy Gateway config in logs. | +| `address` _[EnvoyGatewayDebugAddress](#envoygatewaydebugaddress)_ | Address defines the address of Envoy Gateway Debug Server. Pprof will use the debug address, if you set it to non-nil. | + + +#### EnvoyGatewayDebugAddress + + + +EnvoyGatewayDebugAddress defines the Envoy Gateway Debug Address configuration. + +_Appears in:_ +- [EnvoyGatewayDebug](#envoygatewaydebug) + +| Field | Description | +| --- | --- | +| `port` _integer_ | Port defines the port the debug server is exposed on. | +| `host` _string_ | Host defines the debug server hostname. | + + #### EnvoyGatewayFileResourceProvider @@ -336,6 +368,51 @@ _Appears in:_ | `level` _object (keys:[EnvoyGatewayLogComponent](#envoygatewaylogcomponent), values:[LogLevel](#loglevel))_ | Level is the logging level. If unspecified, defaults to "info". EnvoyGatewayLogComponent options: default/provider/gateway-api/xds-translator/xds-server/infrastructure/global-ratelimit. LogLevel options: debug/info/error/warn. | +#### EnvoyGatewayMetricSink + + + +EnvoyGatewayMetricSink defines control plane metric sinks where metrics are sent to. + +_Appears in:_ +- [EnvoyGatewayMetrics](#envoygatewaymetrics) + +| Field | Description | +| --- | --- | +| `type` _[MetricSinkType](#metricsinktype)_ | Type defines the metric sink type. EG control plane currently supports OpenTelemetry. | +| `host` _string_ | Host define the sink service hostname. | +| `port` _integer_ | Port defines the port the sink service is exposed on. | + + +#### EnvoyGatewayMetrics + + + +EnvoyGatewayMetrics defines control plane push/pull metrics configurations. + +_Appears in:_ +- [EnvoyGatewayTelemetry](#envoygatewaytelemetry) + +| Field | Description | +| --- | --- | +| `sinks` _[EnvoyGatewayMetricSink](#envoygatewaymetricsink) array_ | Sinks defines the metric sinks where metrics are sent to. | +| `prometheus` _[EnvoyGatewayPrometheusProvider](#envoygatewayprometheusprovider)_ | Prometheus defines the configuration for prometheus endpoint. | + + +#### EnvoyGatewayPrometheusProvider + + + +EnvoyGatewayPrometheusProvider will expose prometheus endpoint in pull mode. + +_Appears in:_ +- [EnvoyGatewayMetrics](#envoygatewaymetrics) + +| Field | Description | +| --- | --- | +| `enable` _boolean_ | Enable defines if enables the prometheus metrics in pull mode. Default is true. | + + #### EnvoyGatewayProvider @@ -382,12 +459,29 @@ _Appears in:_ | `gateway` _[Gateway](#gateway)_ | Gateway defines desired Gateway API specific configuration. If unset, default configuration parameters will apply. | | `provider` _[EnvoyGatewayProvider](#envoygatewayprovider)_ | Provider defines the desired provider and provider-specific configuration. If unspecified, the Kubernetes provider is used with default configuration parameters. | | `logging` _[EnvoyGatewayLogging](#envoygatewaylogging)_ | Logging defines logging parameters for Envoy Gateway. | +| `telemetry` _[EnvoyGatewayTelemetry](#envoygatewaytelemetry)_ | Telemetry defines telemetry related configurations for envoy gateway. | | `admin` _[EnvoyGatewayAdmin](#envoygatewayadmin)_ | Admin defines the desired admin related abilities. If unspecified, the Admin is used with default configuration parameters. | +| `debug` _[EnvoyGatewayDebug](#envoygatewaydebug)_ | Debug defines the desired debug related abilities. If unspecified, the debug will not be running, including pprof, dump config etc. | | `rateLimit` _[RateLimit](#ratelimit)_ | RateLimit defines the configuration associated with the Rate Limit service deployed by Envoy Gateway required to implement the Global Rate limiting functionality. The specific rate limit service used here is the reference implementation in Envoy. For more details visit https://github.com/envoyproxy/ratelimit. This configuration is unneeded for "Local" rate limiting. | | `extensionManager` _[ExtensionManager](#extensionmanager)_ | ExtensionManager defines an extension manager to register for the Envoy Gateway Control Plane. | | `extensionApis` _[ExtensionAPISettings](#extensionapisettings)_ | ExtensionAPIs defines the settings related to specific Gateway API Extensions implemented by Envoy Gateway | +#### EnvoyGatewayTelemetry + + + +EnvoyGatewayTelemetry defines telemetry configurations for envoy gateway control plane. Control plane will focus on metrics observability telemetry and tracing telemetry later. + +_Appears in:_ +- [EnvoyGateway](#envoygateway) +- [EnvoyGatewaySpec](#envoygatewayspec) + +| Field | Description | +| --- | --- | +| `metrics` _[EnvoyGatewayMetrics](#envoygatewaymetrics)_ | Metrics defines metrics configuration for envoy gateway. | + + #### EnvoyJSONPatchConfig @@ -888,17 +982,6 @@ _Appears in:_ | `value` _string_ | Value defines the hard-coded value to add to each span. | -#### LogComponent - -_Underlying type:_ `string` - -LogComponent defines a component that supports a configured logging level. - -_Appears in:_ -- [ProxyLogging](#proxylogging) - - - #### LogLevel _Underlying type:_ `string` @@ -959,6 +1042,7 @@ _Underlying type:_ `string` _Appears in:_ +- [EnvoyGatewayMetricSink](#envoygatewaymetricsink) - [MetricSink](#metricsink) @@ -994,17 +1078,6 @@ _Appears in:_ | `port` _integer_ | Port defines the port the service is exposed on. | -#### PrometheusProvider - - - - - -_Appears in:_ -- [ProxyMetrics](#proxymetrics) - - - #### ProviderType _Underlying type:_ `string` @@ -1116,6 +1189,17 @@ _Appears in:_ | `value` _string_ | Value is a YAML string of the bootstrap. | +#### ProxyLogComponent + +_Underlying type:_ `string` + +ProxyLogComponent defines a component that supports a configured logging level. + +_Appears in:_ +- [ProxyLogging](#proxylogging) + + + #### ProxyLogging @@ -1127,7 +1211,7 @@ _Appears in:_ | Field | Description | | --- | --- | -| `level` _object (keys:[LogComponent](#logcomponent), values:[LogLevel](#loglevel))_ | Level is a map of logging level per component, where the component is the key and the log level is the value. If unspecified, defaults to "default: warn". | +| `level` _object (keys:[ProxyLogComponent](#proxylogcomponent), values:[LogLevel](#loglevel))_ | Level is a map of logging level per component, where the component is the key and the log level is the value. If unspecified, defaults to "default: warn". | #### ProxyMetrics @@ -1141,12 +1225,23 @@ _Appears in:_ | Field | Description | | --- | --- | -| `prometheus` _[PrometheusProvider](#prometheusprovider)_ | Prometheus defines the configuration for Admin endpoint `/stats/prometheus`. | +| `prometheus` _[ProxyPrometheusProvider](#proxyprometheusprovider)_ | Prometheus defines the configuration for Admin endpoint `/stats/prometheus`. | | `sinks` _[MetricSink](#metricsink) array_ | Sinks defines the metric sinks where metrics are sent to. | | `matches` _[Match](#match) array_ | Matches defines configuration for selecting specific metrics instead of generating all metrics stats that are enabled by default. This helps reduce CPU and memory overhead in Envoy, but eliminating some stats may after critical functionality. Here are the stats that we strongly recommend not disabling: `cluster_manager.warming_clusters`, `cluster..membership_total`,`cluster..membership_healthy`, `cluster..membership_degraded`,reference https://github.com/envoyproxy/envoy/issues/9856, https://github.com/envoyproxy/envoy/issues/14610 | | `enableVirtualHostStats` _boolean_ | EnableVirtualHostStats enables envoy stat metrics for virtual hosts. | +#### ProxyPrometheusProvider + + + + + +_Appears in:_ +- [ProxyMetrics](#proxymetrics) + + + #### ProxyTelemetry diff --git a/site/content/en/latest/design/eg-metrics.md b/site/content/en/latest/design/eg-metrics.md new file mode 100644 index 000000000000..59e10d84826a --- /dev/null +++ b/site/content/en/latest/design/eg-metrics.md @@ -0,0 +1,146 @@ +--- +date: 2023-10-10 +title: "Control Plane Observability: Metrics" +author: Xunzhuo Liu +linkTitle: "Control Plane Observability: Metrics" +--- + + +## Overview + +Envoy provide robust platform for metrics, Envoy support three different kinds of stats: counter, gauges, histograms. + +Envoy enables prometheus format output via the `/stats/prometheus` [admin endpoint](https://www.envoyproxy.io/docs/envoy/latest/operations/admin). + +Envoy support different kinds of sinks, but EG will only support [Open Telemetry sink](https://www.envoyproxy.io/docs/envoy/latest/api-v3/extensions/stat_sinks/open_telemetry/v3/open_telemetry.proto). + +Envoy Gateway leverages [Gateway API](https://gateway-api.sigs.k8s.io/) for configuring managed Envoy proxies. Gateway API defines core, extended, and implementation-specific API [support levels](https://gateway-api.sigs.k8s.io/concepts/conformance/?h=extended#2-support-levels) for implementers such as Envoy Gateway to expose features. Since metrics is not covered by `Core` or `Extended` APIs, EG should provide an easy to config metrics per `EnvoyProxy`. + +## Goals + +- Support **Pull** mode prometheus metrics, expose metrics on admin address. +- Support **Push** mode prometheus metrics, send metrics to Open Telemetry Stats sink. + +## Non-Goals + +- Support other stats sink. + +## Use-Cases + +- Enable prometheus metric +- Push metrics via Open Telemetry Sink + +## API Types + +### EnvoyGatewayTelemetry + +```go +// EnvoyGatewayTelemetry defines telemetry configurations for envoy gateway control plane. +// Control plane will focus on metrics observability telemetry and tracing telemetry later. +type EnvoyGatewayTelemetry struct { + // Metrics defines metrics configuration for envoy gateway. + Metrics *EnvoyGatewayMetrics `json:"metrics,omitempty"` +} +``` + +### EnvoyGatewayMetrics + +```go +// EnvoyGatewayMetrics defines control plane push/pull metrics configurations. +type EnvoyGatewayMetrics struct { + // Sinks defines the metric sinks where metrics are sent to. + Sinks []EnvoyGatewayMetricSink `json:"sinks,omitempty"` + // Prometheus defines the configuration for prometheus endpoint. + Prometheus *EnvoyGatewayPrometheusProvider `json:"prometheus,omitempty"` +} + +// EnvoyGatewayMetricSink defines control plane +// metric sinks where metrics are sent to. +type EnvoyGatewayMetricSink struct { + // Type defines the metric sink type. + // EG control plane currently supports OpenTelemetry. + // +kubebuilder:validation:Enum=OpenTelemetry + // +kubebuilder:default=OpenTelemetry + Type MetricSinkType `json:"type"` + // Host define the sink service hostname. + Host string `json:"host"` + // Port defines the port the sink service is exposed on. + // + // +optional + // +kubebuilder:validation:Minimum=0 + // +kubebuilder:default=4318 + Port int32 `json:"port,omitempty"` +} + +// EnvoyGatewayPrometheusProvider will expose prometheus endpoint +// `/stats/prometheus` and reuse Envoy Gateway admin port. +type EnvoyGatewayPrometheusProvider struct { + // Enable defines if enables the prometheus metrics in pull mode. Default is true. + // + // +optional + // +kubebuilder:default=true + Enable bool `json:"enable,omitempty"` +} +``` + +### Example + +1. The following is an example to enable prometheus metric. + +```yaml +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: EnvoyGateway +gateway: + controllerName: gateway.envoyproxy.io/gatewayclass-controller +logging: + level: + default: info +provider: + type: Kubernetes +telemetry: + metrics: + prometheus: + enable: true +``` + +1. The following is an example to send metric via Open Telemetry sink. + +```yaml +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: EnvoyGateway +gateway: + controllerName: gateway.envoyproxy.io/gatewayclass-controller +logging: + level: + default: info +provider: + type: Kubernetes +telemetry: + metrics: + sinks: + - type: OpenTelemetry + host: otel-collector.monitoring.svc.cluster.local + port: 4318 +``` + +1. The following is an example to enable prometheus metric and send metric via Open Telemetry sink. + +```yaml +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: EnvoyGateway +gateway: + controllerName: gateway.envoyproxy.io/gatewayclass-controller +logging: + level: + default: info +provider: + type: Kubernetes +telemetry: + metrics: + prometheus: + enable: true + sinks: + - type: OpenTelemetry + host: otel-collector.monitoring.svc.cluster.local + port: 4318 +``` diff --git a/site/content/en/latest/design/pprof.md b/site/content/en/latest/design/pprof.md index c535b4800710..b8777c33d114 100644 --- a/site/content/en/latest/design/pprof.md +++ b/site/content/en/latest/design/pprof.md @@ -1,15 +1,19 @@ --- -title: "Add Pprof support in Envoy Gateway" +title: "Debug support in Envoy Gateway" --- ## Overview -Envoy Gateway exposes endpoints at `localhost:8899/debug/pprof` to run Golang profiles to aid in live debugging. The endpoints are equivalent to those found in the http/pprof package. `/debug/pprof/` returns an HTML page listing the available profiles. +Envoy Gateway exposes endpoints at `localhost:19010/debug/pprof` to run Golang profiles to aid in live debugging. + +The endpoints are equivalent to those found in the http/pprof package. `/debug/pprof/` returns an HTML page listing the available profiles. ## Goals -* Add Debug Pprof support to Envoy Gateway control plane. +* Add debug server to Envoy Gateway control plane, separated with admin server. +* Add pprof support to Envoy Gateway control plane. * Define an API to allow Envoy Gateway to custom debug server configuration. +* Define an API to allow Envoy Gateway to open envoy gateway config dump in logs. The following are the different types of profiles end-user can run: @@ -30,11 +34,12 @@ PROFILE | FUNCTION ## API -* Add `admin` field in EnvoyGateway config. -* Add `debug` field under `admin` field. -* Add `enable`, `port` and `host` under `address` field. +* Add `debug` field in EnvoyGateway config. +* Add `address` field under `debug` field. +* Add `port` and `host` under `address` field. +* Add `dumpConfig` field under `debug field. -Here is an example configuration +Here is an example configuration to open debug server: ``` yaml apiVersion: gateway.envoyproxy.io/v1alpha1 @@ -43,9 +48,21 @@ gateway: kind: EnvoyGateway provider: type: "Kubernetes" -admin: - debug: true +debug: address: - port: 8899 - host: "127.0.0.1" + host: 127.0.0.1 + port: 19010 +``` + +Here is an example configuration to open envoy gateway config dump in logs: + +```yaml +apiVersion: gateway.envoyproxy.io/v1alpha1 +gateway: + controllerName: "gateway.envoyproxy.io/gatewayclass-controller" +kind: EnvoyGateway +provider: + type: "Kubernetes" +debug: + dumpConfig: true ```