From 35d1adeba4551f3295fd3e1c3a6af5c553de64f0 Mon Sep 17 00:00:00 2001 From: ShyunnY <1147212064@qq.com> Date: Wed, 10 Apr 2024 17:17:00 +0800 Subject: [PATCH 1/5] feat: support trace of ratelimit Signed-off-by: ShyunnY <1147212064@qq.com> --- api/v1alpha1/envoygateway_types.go | 31 ++++ api/v1alpha1/zz_generated.deepcopy.go | 45 +++++ .../kubernetes/ratelimit/resource.go | 96 ++++++++++- .../kubernetes/ratelimit/resource_provider.go | 3 +- .../ratelimit/resource_provider_test.go | 40 +++++ .../kubernetes/ratelimit/resource_test.go | 40 +++++ .../deployments/enable-tracing-custom.yaml | 160 ++++++++++++++++++ .../testdata/deployments/enable-tracing.yaml | 160 ++++++++++++++++++ site/content/en/latest/api/extension_types.md | 42 +++++ .../observability/rate-limit-observability.md | 73 ++++++++ 10 files changed, 686 insertions(+), 4 deletions(-) create mode 100644 internal/infrastructure/kubernetes/ratelimit/resource_test.go create mode 100644 internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing-custom.yaml create mode 100644 internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing.yaml create mode 100644 site/content/en/latest/tasks/observability/rate-limit-observability.md diff --git a/api/v1alpha1/envoygateway_types.go b/api/v1alpha1/envoygateway_types.go index 47b9861e170..5a9d227e9f8 100644 --- a/api/v1alpha1/envoygateway_types.go +++ b/api/v1alpha1/envoygateway_types.go @@ -354,6 +354,9 @@ type RateLimit struct { type RateLimitTelemetry struct { // Metrics defines metrics configuration for RateLimit. Metrics *RateLimitMetrics `json:"metrics,omitempty"` + + // Tracing defines traces configuration for RateLimit. + Tracing *RateLimitTracing `json:"tracing,omitempty"` } type RateLimitMetrics struct { @@ -366,6 +369,34 @@ type RateLimitMetricsPrometheusProvider struct { Disable bool `json:"disable,omitempty"` } +type RateLimitTracing struct { + // SamplingRate controls the rate at which traffic will be + // selected for tracing if no prior sampling decision has been made. + // Defaults to 100, valid values [0-100]. 100 indicates 100% sampling. + // +optional + SamplingRate *uint32 `json:"samplingRate,omitempty"` + + // Provider defines the rateLimit tracing provider. + // Only OpenTelemetry is supported currently. + Provider *RateLimitTracingProvider +} + +type RateLimitTracingProviderType string + +const ( + RateLimitTracingProviderTypeOpenTelemetry TracingProviderType = "OpenTelemetry" +) + +// RateLimitTracingProvider defines the tracing provider configuration of RateLimit +type RateLimitTracingProvider struct { + // Type defines the tracing provider type. + // Since to RateLimit Exporter currently using OpenTelemetry, only OpenTelemetry is supported + Type RateLimitTracingProviderType `json:"type"` + + // URL is the endpoint of the trace collector that supports the OTLP protocol + URL string `json:"url"` +} + // RateLimitDatabaseBackend defines the configuration associated with // the database backend used by the rate limit service. // +union diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index e82cda7787f..6626d84b5a0 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -3298,6 +3298,11 @@ func (in *RateLimitTelemetry) DeepCopyInto(out *RateLimitTelemetry) { *out = new(RateLimitMetrics) (*in).DeepCopyInto(*out) } + if in.Tracing != nil { + in, out := &in.Tracing, &out.Tracing + *out = new(RateLimitTracing) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RateLimitTelemetry. @@ -3310,6 +3315,46 @@ func (in *RateLimitTelemetry) DeepCopy() *RateLimitTelemetry { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RateLimitTracing) DeepCopyInto(out *RateLimitTracing) { + *out = *in + if in.SamplingRate != nil { + in, out := &in.SamplingRate, &out.SamplingRate + *out = new(uint32) + **out = **in + } + if in.Provider != nil { + in, out := &in.Provider, &out.Provider + *out = new(RateLimitTracingProvider) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RateLimitTracing. +func (in *RateLimitTracing) DeepCopy() *RateLimitTracing { + if in == nil { + return nil + } + out := new(RateLimitTracing) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RateLimitTracingProvider) DeepCopyInto(out *RateLimitTracingProvider) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RateLimitTracingProvider. +func (in *RateLimitTracingProvider) DeepCopy() *RateLimitTracingProvider { + if in == nil { + return nil + } + out := new(RateLimitTracingProvider) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RateLimitValue) DeepCopyInto(out *RateLimitValue) { *out = *in diff --git a/internal/infrastructure/kubernetes/ratelimit/resource.go b/internal/infrastructure/kubernetes/ratelimit/resource.go index 32dfba94c1b..93360f09d80 100644 --- a/internal/infrastructure/kubernetes/ratelimit/resource.go +++ b/internal/infrastructure/kubernetes/ratelimit/resource.go @@ -79,6 +79,18 @@ const ( ConfigGrpcXdsServerURLEnvVar = "CONFIG_GRPC_XDS_SERVER_URL" // ConfigGrpcXdsNodeIDEnvVar is the id of ratelimit node. ConfigGrpcXdsNodeIDEnvVar = "CONFIG_GRPC_XDS_NODE_ID" + // TracingEnabledVar is enabled the tracing feature + TracingEnabledVar = "TRACING_ENABLED" + // TracingServiceNameVar is service name appears in tracing span + TracingServiceNameVar = "TRACING_SERVICE_NAME" + // TracingServiceNamespaceVar is service namespace appears in tracing span + TracingServiceNamespaceVar = "TRACING_SERVICE_NAMESPACE" + // TracingServiceInstanceIDVar is service instance id appears in tracing span + TracingServiceInstanceIDVar = "TRACING_SERVICE_INSTANCE_ID" + // TracingSamplingRateVar is trace sampling rate + TracingSamplingRateVar = "TRACING_SAMPLING_RATE" + // OTELExporterOTLPTraceEndpointVar is target url to which the trace exporter is going to send + OTELExporterOTLPTraceEndpointVar = "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT" // InfraName is the name for rate-limit resources. InfraName = "envoy-ratelimit" @@ -125,7 +137,8 @@ func rateLimitLabels() map[string]string { } // expectedRateLimitContainers returns expected rateLimit containers. -func expectedRateLimitContainers(rateLimit *egv1a1.RateLimit, rateLimitDeployment *egv1a1.KubernetesDeploymentSpec) []corev1.Container { +func expectedRateLimitContainers(rateLimit *egv1a1.RateLimit, rateLimitDeployment *egv1a1.KubernetesDeploymentSpec, + namespace string) []corev1.Container { ports := []corev1.ContainerPort{ { Name: "grpc", @@ -142,7 +155,7 @@ func expectedRateLimitContainers(rateLimit *egv1a1.RateLimit, rateLimitDeploymen Command: []string{ "/bin/ratelimit", }, - Env: expectedRateLimitContainerEnv(rateLimit, rateLimitDeployment), + Env: expectedRateLimitContainerEnv(rateLimit, rateLimitDeployment, namespace), Ports: ports, Resources: *rateLimitDeployment.Container.Resources, SecurityContext: rateLimitDeployment.Container.SecurityContext, @@ -275,7 +288,8 @@ func expectedDeploymentVolumes(rateLimit *egv1a1.RateLimit, rateLimitDeployment } // expectedRateLimitContainerEnv returns expected rateLimit container envs. -func expectedRateLimitContainerEnv(rateLimit *egv1a1.RateLimit, rateLimitDeployment *egv1a1.KubernetesDeploymentSpec) []corev1.EnvVar { +func expectedRateLimitContainerEnv(rateLimit *egv1a1.RateLimit, rateLimitDeployment *egv1a1.KubernetesDeploymentSpec, + namespace string) []corev1.EnvVar { env := []corev1.EnvVar{ { Name: RuntimeRootEnvVar, @@ -384,6 +398,54 @@ func expectedRateLimitContainerEnv(rateLimit *egv1a1.RateLimit, rateLimitDeploym } } + if enableTracing(rateLimit) { + var sampleRate = 1.0 + if rateLimit.Telemetry.Tracing.SamplingRate != nil { + sampleRate = float64(*rateLimit.Telemetry.Tracing.SamplingRate) / 100.0 + } + + traceEndpoint := checkTraceEndpointScheme(rateLimit.Telemetry.Tracing.Provider.URL) + tracingEnvs := []corev1.EnvVar{ + { + Name: TracingEnabledVar, + Value: "true", + }, + { + Name: TracingServiceNameVar, + Value: InfraName, + }, + { + Name: TracingServiceNamespaceVar, + Value: namespace, + }, + { + // By default, this is a random instanceID, + // we use the RateLimit pod name as the trace service instanceID. + Name: TracingServiceInstanceIDVar, + ValueFrom: &corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + APIVersion: "v1", + FieldPath: "metadata.name", + }, + }, + }, + { + Name: TracingSamplingRateVar, + // The api is configured with [0,100], but sampling can only be [0,1]. + // doc: https://github.com/envoyproxy/ratelimit?tab=readme-ov-file#tracing + // You will lose precision during the conversion process, but don't worry, + // this follows the rounding rule and won't make the expected sampling rate too different + // from the actual sampling rate + Value: strconv.FormatFloat(sampleRate, 'f', 1, 64), + }, + { + Name: OTELExporterOTLPTraceEndpointVar, + Value: traceEndpoint, + }, + } + env = append(env, tracingEnvs...) + } + return resource.ExpectedContainerEnv(rateLimitDeployment.Container, env) } @@ -399,3 +461,31 @@ func Validate(ctx context.Context, client client.Client, gateway *egv1a1.EnvoyGa return nil } + +func enableTracing(rl *egv1a1.RateLimit) bool { + // Other fields can use the default values, + // but we have to make sure the user has the Provider.URL + if rl != nil && rl.Telemetry != nil && + rl.Telemetry.Tracing != nil && + rl.Telemetry.Tracing.Provider != nil && + len(rl.Telemetry.Tracing.Provider.URL) != 0 { + return true + } + + return false +} + +// checkTraceEndpointScheme Check the scheme prefix in the trace url +func checkTraceEndpointScheme(url string) string { + // Since the OTLP collector needs to configure the scheme prefix, + // we need to check if the user has configured this + // TODO: It is currently assumed to be a normal connection, + // and a TLS connection will be added later. + httpScheme := "http://" + exist := strings.HasPrefix(url, httpScheme) + if exist { + return url + } + + return fmt.Sprintf("%s%s", httpScheme, url) +} diff --git a/internal/infrastructure/kubernetes/ratelimit/resource_provider.go b/internal/infrastructure/kubernetes/ratelimit/resource_provider.go index 90f646d014f..885cb4ddca6 100644 --- a/internal/infrastructure/kubernetes/ratelimit/resource_provider.go +++ b/internal/infrastructure/kubernetes/ratelimit/resource_provider.go @@ -61,6 +61,7 @@ func (r *ResourceRender) Name() string { func enablePrometheus(rl *egv1a1.RateLimit) bool { if rl != nil && rl.Telemetry != nil && + rl.Telemetry.Metrics != nil && rl.Telemetry.Metrics.Prometheus != nil { return !rl.Telemetry.Metrics.Prometheus.Disable } @@ -183,7 +184,7 @@ func (r *ResourceRender) ServiceAccount() (*corev1.ServiceAccount, error) { // Deployment returns the expected rate limit Deployment based on the provided infra. func (r *ResourceRender) Deployment() (*appsv1.Deployment, error) { - containers := expectedRateLimitContainers(r.rateLimit, r.rateLimitDeployment) + containers := expectedRateLimitContainers(r.rateLimit, r.rateLimitDeployment, r.Namespace) labels := rateLimitLabels() selector := resource.GetSelector(labels) diff --git a/internal/infrastructure/kubernetes/ratelimit/resource_provider_test.go b/internal/infrastructure/kubernetes/ratelimit/resource_provider_test.go index 6c56631d9cc..52aec1fabed 100644 --- a/internal/infrastructure/kubernetes/ratelimit/resource_provider_test.go +++ b/internal/infrastructure/kubernetes/ratelimit/resource_provider_test.go @@ -648,6 +648,46 @@ func TestDeployment(t *testing.T) { }, }, }, + { + caseName: "enable-tracing", + rateLimit: &egv1a1.RateLimit{ + Backend: egv1a1.RateLimitDatabaseBackend{ + Type: egv1a1.RedisBackendType, + Redis: &egv1a1.RateLimitRedisSettings{ + URL: "redis.redis.svc:6379", + }, + }, + Telemetry: &egv1a1.RateLimitTelemetry{ + Tracing: &egv1a1.RateLimitTracing{ + Provider: &egv1a1.RateLimitTracingProvider{ + URL: "http://trace-collector.envoy-gateway-system.svc.cluster.local:4318", + }, + }, + }, + }, + }, + { + caseName: "enable-tracing-custom", + rateLimit: &egv1a1.RateLimit{ + Backend: egv1a1.RateLimitDatabaseBackend{ + Type: egv1a1.RedisBackendType, + Redis: &egv1a1.RateLimitRedisSettings{ + URL: "redis.redis.svc:6379", + }, + }, + Telemetry: &egv1a1.RateLimitTelemetry{ + Tracing: &egv1a1.RateLimitTracing{ + SamplingRate: func() *uint32 { + var samplingRate uint32 = 55 + return &samplingRate + }(), + Provider: &egv1a1.RateLimitTracingProvider{ + URL: "trace-collector.envoy-gateway-system.svc.cluster.local:4317", + }, + }, + }, + }, + }, } for _, tc := range cases { t.Run(tc.caseName, func(t *testing.T) { diff --git a/internal/infrastructure/kubernetes/ratelimit/resource_test.go b/internal/infrastructure/kubernetes/ratelimit/resource_test.go new file mode 100644 index 00000000000..71179c8c7c4 --- /dev/null +++ b/internal/infrastructure/kubernetes/ratelimit/resource_test.go @@ -0,0 +1,40 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package ratelimit + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestCheckTraceEndpointScheme(t *testing.T) { + + cases := []struct { + caseName string + actualURL string + expectedURL string + }{ + { + caseName: "normal url with http prefix", + actualURL: "http://collector.observability.svc.cluster.local:4318", + expectedURL: "http://collector.observability.svc.cluster.local:4318", + }, + { + caseName: "abnormal url without http prefix", + actualURL: "collector.observability.svc.cluster.local:4318", + expectedURL: "http://collector.observability.svc.cluster.local:4318", + }, + } + + for _, tc := range cases { + t.Run(tc.caseName, func(t *testing.T) { + actual := checkTraceEndpointScheme(tc.actualURL) + require.Equal(t, tc.expectedURL, actual) + }) + } + +} diff --git a/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing-custom.yaml b/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing-custom.yaml new file mode 100644 index 00000000000..53e22e1d6cb --- /dev/null +++ b/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing-custom.yaml @@ -0,0 +1,160 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: ratelimit + app.kubernetes.io/managed-by: envoy-gateway + app.kubernetes.io/name: envoy-ratelimit + name: envoy-ratelimit + namespace: envoy-gateway-system + ownerReferences: + - apiVersion: apps/v1 + kind: Deployment + name: envoy-gateway + uid: test-owner-reference-uid-for-deployment +spec: + progressDeadlineSeconds: 600 + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/component: ratelimit + app.kubernetes.io/managed-by: envoy-gateway + app.kubernetes.io/name: envoy-ratelimit + strategy: + type: RollingUpdate + template: + metadata: + annotations: + prometheus.io/path: /metrics + prometheus.io/port: "19001" + prometheus.io/scrape: "true" + creationTimestamp: null + labels: + app.kubernetes.io/component: ratelimit + app.kubernetes.io/managed-by: envoy-gateway + app.kubernetes.io/name: envoy-ratelimit + spec: + automountServiceAccountToken: false + containers: + - command: + - /bin/ratelimit + env: + - name: RUNTIME_ROOT + value: /data + - name: RUNTIME_SUBDIRECTORY + value: ratelimit + - name: RUNTIME_IGNOREDOTFILES + value: "true" + - name: RUNTIME_WATCH_ROOT + value: "false" + - name: LOG_LEVEL + value: info + - name: USE_STATSD + value: "false" + - name: CONFIG_TYPE + value: GRPC_XDS_SOTW + - name: CONFIG_GRPC_XDS_SERVER_URL + value: envoy-gateway:18001 + - name: CONFIG_GRPC_XDS_NODE_ID + value: envoy-ratelimit + - name: GRPC_SERVER_USE_TLS + value: "true" + - name: GRPC_SERVER_TLS_CERT + value: /certs/tls.crt + - name: GRPC_SERVER_TLS_KEY + value: /certs/tls.key + - name: GRPC_SERVER_TLS_CA_CERT + value: /certs/ca.crt + - name: CONFIG_GRPC_XDS_SERVER_USE_TLS + value: "true" + - name: CONFIG_GRPC_XDS_CLIENT_TLS_CERT + value: /certs/tls.crt + - name: CONFIG_GRPC_XDS_CLIENT_TLS_KEY + value: /certs/tls.key + - name: CONFIG_GRPC_XDS_SERVER_TLS_CACERT + value: /certs/ca.crt + - name: FORCE_START_WITHOUT_INITIAL_CONFIG + value: "true" + - name: REDIS_SOCKET_TYPE + value: tcp + - name: REDIS_URL + value: redis.redis.svc:6379 + - name: TRACING_ENABLED + value: "true" + - name: TRACING_SERVICE_NAME + value: envoy-ratelimit + - name: TRACING_SERVICE_NAMESPACE + value: envoy-gateway-system + - name: TRACING_SERVICE_INSTANCE_ID + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: TRACING_SAMPLING_RATE + value: "0.6" + - name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + value: http://trace-collector.envoy-gateway-system.svc.cluster.local:4317 + image: envoyproxy/ratelimit:master + imagePullPolicy: IfNotPresent + name: envoy-ratelimit + ports: + - containerPort: 8081 + name: grpc + protocol: TCP + readinessProbe: + failureThreshold: 3 + httpGet: + path: /healthcheck + port: 8080 + scheme: HTTP + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + resources: + requests: + cpu: 100m + memory: 512Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /certs + name: certs + readOnly: true + - command: + - /bin/statsd_exporter + - --web.listen-address=:19001 + - --statsd.mapping-config=/etc/statsd-exporter/conf.yaml + image: prom/statsd-exporter:v0.18.0 + imagePullPolicy: IfNotPresent + name: prom-statsd-exporter + ports: + - containerPort: 9125 + name: statsd + protocol: TCP + - containerPort: 19001 + name: metrics + protocol: TCP + resources: {} + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /etc/statsd-exporter + name: statsd-exporter-config + readOnly: true + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + serviceAccountName: envoy-ratelimit + terminationGracePeriodSeconds: 300 + volumes: + - name: certs + secret: + defaultMode: 420 + secretName: envoy-rate-limit + - configMap: + defaultMode: 420 + name: statsd-exporter-config + optional: true + name: statsd-exporter-config +status: {} diff --git a/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing.yaml b/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing.yaml new file mode 100644 index 00000000000..bee76879b02 --- /dev/null +++ b/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing.yaml @@ -0,0 +1,160 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + creationTimestamp: null + labels: + app.kubernetes.io/component: ratelimit + app.kubernetes.io/managed-by: envoy-gateway + app.kubernetes.io/name: envoy-ratelimit + name: envoy-ratelimit + namespace: envoy-gateway-system + ownerReferences: + - apiVersion: apps/v1 + kind: Deployment + name: envoy-gateway + uid: test-owner-reference-uid-for-deployment +spec: + progressDeadlineSeconds: 600 + revisionHistoryLimit: 10 + selector: + matchLabels: + app.kubernetes.io/component: ratelimit + app.kubernetes.io/managed-by: envoy-gateway + app.kubernetes.io/name: envoy-ratelimit + strategy: + type: RollingUpdate + template: + metadata: + annotations: + prometheus.io/path: /metrics + prometheus.io/port: "19001" + prometheus.io/scrape: "true" + creationTimestamp: null + labels: + app.kubernetes.io/component: ratelimit + app.kubernetes.io/managed-by: envoy-gateway + app.kubernetes.io/name: envoy-ratelimit + spec: + automountServiceAccountToken: false + containers: + - command: + - /bin/ratelimit + env: + - name: RUNTIME_ROOT + value: /data + - name: RUNTIME_SUBDIRECTORY + value: ratelimit + - name: RUNTIME_IGNOREDOTFILES + value: "true" + - name: RUNTIME_WATCH_ROOT + value: "false" + - name: LOG_LEVEL + value: info + - name: USE_STATSD + value: "false" + - name: CONFIG_TYPE + value: GRPC_XDS_SOTW + - name: CONFIG_GRPC_XDS_SERVER_URL + value: envoy-gateway:18001 + - name: CONFIG_GRPC_XDS_NODE_ID + value: envoy-ratelimit + - name: GRPC_SERVER_USE_TLS + value: "true" + - name: GRPC_SERVER_TLS_CERT + value: /certs/tls.crt + - name: GRPC_SERVER_TLS_KEY + value: /certs/tls.key + - name: GRPC_SERVER_TLS_CA_CERT + value: /certs/ca.crt + - name: CONFIG_GRPC_XDS_SERVER_USE_TLS + value: "true" + - name: CONFIG_GRPC_XDS_CLIENT_TLS_CERT + value: /certs/tls.crt + - name: CONFIG_GRPC_XDS_CLIENT_TLS_KEY + value: /certs/tls.key + - name: CONFIG_GRPC_XDS_SERVER_TLS_CACERT + value: /certs/ca.crt + - name: FORCE_START_WITHOUT_INITIAL_CONFIG + value: "true" + - name: REDIS_SOCKET_TYPE + value: tcp + - name: REDIS_URL + value: redis.redis.svc:6379 + - name: TRACING_ENABLED + value: "true" + - name: TRACING_SERVICE_NAME + value: envoy-ratelimit + - name: TRACING_SERVICE_NAMESPACE + value: envoy-gateway-system + - name: TRACING_SERVICE_INSTANCE_ID + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: TRACING_SAMPLING_RATE + value: "1.0" + - name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + value: http://trace-collector.envoy-gateway-system.svc.cluster.local:4318 + image: envoyproxy/ratelimit:master + imagePullPolicy: IfNotPresent + name: envoy-ratelimit + ports: + - containerPort: 8081 + name: grpc + protocol: TCP + readinessProbe: + failureThreshold: 3 + httpGet: + path: /healthcheck + port: 8080 + scheme: HTTP + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + resources: + requests: + cpu: 100m + memory: 512Mi + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /certs + name: certs + readOnly: true + - command: + - /bin/statsd_exporter + - --web.listen-address=:19001 + - --statsd.mapping-config=/etc/statsd-exporter/conf.yaml + image: prom/statsd-exporter:v0.18.0 + imagePullPolicy: IfNotPresent + name: prom-statsd-exporter + ports: + - containerPort: 9125 + name: statsd + protocol: TCP + - containerPort: 19001 + name: metrics + protocol: TCP + resources: {} + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /etc/statsd-exporter + name: statsd-exporter-config + readOnly: true + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + serviceAccountName: envoy-ratelimit + terminationGracePeriodSeconds: 300 + volumes: + - name: certs + secret: + defaultMode: 420 + secretName: envoy-rate-limit + - configMap: + defaultMode: 420 + name: statsd-exporter-config + optional: true + name: statsd-exporter-config +status: {} diff --git a/site/content/en/latest/api/extension_types.md b/site/content/en/latest/api/extension_types.md index c3316f67026..69c4578a314 100644 --- a/site/content/en/latest/api/extension_types.md +++ b/site/content/en/latest/api/extension_types.md @@ -2354,6 +2354,48 @@ _Appears in:_ | Field | Type | Required | Description | | --- | --- | --- | --- | | `metrics` | _[RateLimitMetrics](#ratelimitmetrics)_ | true | Metrics defines metrics configuration for RateLimit. | +| `tracing` | _[RateLimitTracing](#ratelimittracing)_ | true | Tracing defines traces configuration for RateLimit. | + + +#### RateLimitTracing + + + + + +_Appears in:_ +- [RateLimitTelemetry](#ratelimittelemetry) + +| Field | Type | Required | Description | +| --- | --- | --- | --- | +| `samplingRate` | _integer_ | false | SamplingRate controls the rate at which traffic will be
selected for tracing if no prior sampling decision has been made.
Defaults to 100, valid values [0-100]. 100 indicates 100% sampling. | +| `Provider` | _[RateLimitTracingProvider](#ratelimittracingprovider)_ | true | Provider defines the rateLimit tracing provider.
Only OpenTelemetry is supported currently. | + + +#### RateLimitTracingProvider + + + +RateLimitTracingProvider defines the tracing provider configuration of RateLimit + +_Appears in:_ +- [RateLimitTracing](#ratelimittracing) + +| Field | Type | Required | Description | +| --- | --- | --- | --- | +| `type` | _[RateLimitTracingProviderType](#ratelimittracingprovidertype)_ | true | Type defines the tracing provider type.
Since to RateLimit Exporter currently using OpenTelemetry, only OpenTelemetry is supported | +| `url` | _string_ | true | URL is the endpoint of the trace collector that supports the OTLP protocol | + + +#### RateLimitTracingProviderType + +_Underlying type:_ _string_ + + + +_Appears in:_ +- [RateLimitTracingProvider](#ratelimittracingprovider) + #### RateLimitType diff --git a/site/content/en/latest/tasks/observability/rate-limit-observability.md b/site/content/en/latest/tasks/observability/rate-limit-observability.md new file mode 100644 index 00000000000..fa61acfbde1 --- /dev/null +++ b/site/content/en/latest/tasks/observability/rate-limit-observability.md @@ -0,0 +1,73 @@ +--- +title: "RateLimit Observability" +--- + +Envoy Gateway provides observability for the RateLimit instances. +This guide show you how to config RateLimit observability, includes traces. + +## Prerequisites + +Follow the steps from the [Quickstart Guide](../quickstart) to install Envoy Gateway and the HTTPRoute example manifest. +Before proceeding, you should be able to query the example backend using HTTP. Follow the steps from the [Global Rate Limit](../traffic/global-rate-limit) to install RateLimit. + + +[OpenTelemetry Collector](https://opentelemetry.io/docs/collector/) offers a vendor-agnostic implementation of how to receive, process and export telemetry data. +Install OTel-Collector: + +```shell +helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts +helm repo update +helm upgrade --install otel-collector open-telemetry/opentelemetry-collector -f https://raw.githubusercontent.com/envoyproxy/gateway/latest/examples/otel-collector/helm-values.yaml -n monitoring --create-namespace --version 0.60.0 +``` + +## Traces + +By default, the Envoy Gateway does not configure RateLimit to send traces to the OpenTelemetry Sink. +You can configure the collector in the `rateLimit.telemetry.tracing` of the `EnvoyGateway`CRD. + +RateLimit uses the OpenTelemetry Exporter to export traces to the collector. +You can configure a collector that supports the OTLP protocol, which includes but is not limited to: OpenTelemetry Collector, Jaeger, Zipkin, and so on. + +***Note:*** +* By default, the Envoy Gateway configures a 100% sampling rate for RateLimit, which may lead to performance issues. +* The Envoy Gateway constructs the Kubernetes FQDN using the value of `BackendObjectReference`, which serves as the target endpoint for + the RateLimit trace collector. The `BackendObjectReference` is configured through the collector Service. Please note, the configuration of collector Service + using `Service.type=ExternalName` is currently not supported. + +Assuming the OpenTelemetry Collector is running in the `observability` namespace, and it has a service named `otel-svc`, +we only want to sample `50%` of the trace data. We would configure it as follows: + +```shell +cat < Date: Wed, 10 Apr 2024 19:15:44 +0800 Subject: [PATCH 2/5] fix: add json tag Signed-off-by: ShyunnY <1147212064@qq.com> --- api/v1alpha1/envoygateway_types.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/v1alpha1/envoygateway_types.go b/api/v1alpha1/envoygateway_types.go index 5a9d227e9f8..7cf0810c1cf 100644 --- a/api/v1alpha1/envoygateway_types.go +++ b/api/v1alpha1/envoygateway_types.go @@ -378,7 +378,7 @@ type RateLimitTracing struct { // Provider defines the rateLimit tracing provider. // Only OpenTelemetry is supported currently. - Provider *RateLimitTracingProvider + Provider *RateLimitTracingProvider `json:"provider,omitempty"` } type RateLimitTracingProviderType string From 8f531301174deb0403799861acc350dbd60c5ddf Mon Sep 17 00:00:00 2001 From: ShyunnY <1147212064@qq.com> Date: Wed, 10 Apr 2024 19:29:16 +0800 Subject: [PATCH 3/5] fix: use OTEL_EXPORTER_OTLP_ENDPOINT env Signed-off-by: ShyunnY <1147212064@qq.com> --- internal/infrastructure/kubernetes/ratelimit/resource.go | 2 +- .../ratelimit/testdata/deployments/enable-tracing-custom.yaml | 2 +- .../ratelimit/testdata/deployments/enable-tracing.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/infrastructure/kubernetes/ratelimit/resource.go b/internal/infrastructure/kubernetes/ratelimit/resource.go index 93360f09d80..7e7a9d3722d 100644 --- a/internal/infrastructure/kubernetes/ratelimit/resource.go +++ b/internal/infrastructure/kubernetes/ratelimit/resource.go @@ -90,7 +90,7 @@ const ( // TracingSamplingRateVar is trace sampling rate TracingSamplingRateVar = "TRACING_SAMPLING_RATE" // OTELExporterOTLPTraceEndpointVar is target url to which the trace exporter is going to send - OTELExporterOTLPTraceEndpointVar = "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT" + OTELExporterOTLPTraceEndpointVar = "OTEL_EXPORTER_OTLP_ENDPOINT" // InfraName is the name for rate-limit resources. InfraName = "envoy-ratelimit" diff --git a/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing-custom.yaml b/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing-custom.yaml index 53e22e1d6cb..b4c7d9472e9 100644 --- a/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing-custom.yaml +++ b/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing-custom.yaml @@ -93,7 +93,7 @@ spec: fieldPath: metadata.name - name: TRACING_SAMPLING_RATE value: "0.6" - - name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + - name: OTEL_EXPORTER_OTLP_ENDPOINT value: http://trace-collector.envoy-gateway-system.svc.cluster.local:4317 image: envoyproxy/ratelimit:master imagePullPolicy: IfNotPresent diff --git a/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing.yaml b/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing.yaml index bee76879b02..e36ff5ef87d 100644 --- a/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing.yaml +++ b/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing.yaml @@ -93,7 +93,7 @@ spec: fieldPath: metadata.name - name: TRACING_SAMPLING_RATE value: "1.0" - - name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT + - name: OTEL_EXPORTER_OTLP_ENDPOINT value: http://trace-collector.envoy-gateway-system.svc.cluster.local:4318 image: envoyproxy/ratelimit:master imagePullPolicy: IfNotPresent From badd639c93b54024a3393eb0ea497f4de42a6cb4 Mon Sep 17 00:00:00 2001 From: ShyunnY <1147212064@qq.com> Date: Wed, 10 Apr 2024 20:21:22 +0800 Subject: [PATCH 4/5] fix Signed-off-by: ShyunnY <1147212064@qq.com> --- api/v1alpha1/envoygateway_types.go | 2 +- api/v1alpha1/zz_generated.deepcopy.go | 7 ++++++- site/content/en/latest/api/extension_types.md | 11 +---------- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/api/v1alpha1/envoygateway_types.go b/api/v1alpha1/envoygateway_types.go index 7cf0810c1cf..ade9e056b1e 100644 --- a/api/v1alpha1/envoygateway_types.go +++ b/api/v1alpha1/envoygateway_types.go @@ -391,7 +391,7 @@ const ( type RateLimitTracingProvider struct { // Type defines the tracing provider type. // Since to RateLimit Exporter currently using OpenTelemetry, only OpenTelemetry is supported - Type RateLimitTracingProviderType `json:"type"` + Type *RateLimitTracingProviderType `json:"type,omitempty"` // URL is the endpoint of the trace collector that supports the OTLP protocol URL string `json:"url"` diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 6626d84b5a0..b1e849077bd 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -3326,7 +3326,7 @@ func (in *RateLimitTracing) DeepCopyInto(out *RateLimitTracing) { if in.Provider != nil { in, out := &in.Provider, &out.Provider *out = new(RateLimitTracingProvider) - **out = **in + (*in).DeepCopyInto(*out) } } @@ -3343,6 +3343,11 @@ func (in *RateLimitTracing) DeepCopy() *RateLimitTracing { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *RateLimitTracingProvider) DeepCopyInto(out *RateLimitTracingProvider) { *out = *in + if in.Type != nil { + in, out := &in.Type, &out.Type + *out = new(RateLimitTracingProviderType) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RateLimitTracingProvider. diff --git a/site/content/en/latest/api/extension_types.md b/site/content/en/latest/api/extension_types.md index 69c4578a314..7234b805d8f 100644 --- a/site/content/en/latest/api/extension_types.md +++ b/site/content/en/latest/api/extension_types.md @@ -2369,7 +2369,7 @@ _Appears in:_ | Field | Type | Required | Description | | --- | --- | --- | --- | | `samplingRate` | _integer_ | false | SamplingRate controls the rate at which traffic will be
selected for tracing if no prior sampling decision has been made.
Defaults to 100, valid values [0-100]. 100 indicates 100% sampling. | -| `Provider` | _[RateLimitTracingProvider](#ratelimittracingprovider)_ | true | Provider defines the rateLimit tracing provider.
Only OpenTelemetry is supported currently. | +| `provider` | _[RateLimitTracingProvider](#ratelimittracingprovider)_ | true | Provider defines the rateLimit tracing provider.
Only OpenTelemetry is supported currently. | #### RateLimitTracingProvider @@ -2387,15 +2387,6 @@ _Appears in:_ | `url` | _string_ | true | URL is the endpoint of the trace collector that supports the OTLP protocol | -#### RateLimitTracingProviderType - -_Underlying type:_ _string_ - - - -_Appears in:_ -- [RateLimitTracingProvider](#ratelimittracingprovider) - #### RateLimitType From a0573aa9af72416f3cd3699e3787b1a7f3fe0747 Mon Sep 17 00:00:00 2001 From: yuluo-yx Date: Wed, 10 Apr 2024 22:07:33 +0800 Subject: [PATCH 5/5] docs: update docs Signed-off-by: yuluo-yx --- .../observability/rate-limit-observability.md | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/site/content/en/latest/tasks/observability/rate-limit-observability.md b/site/content/en/latest/tasks/observability/rate-limit-observability.md index fa61acfbde1..350be4dc4b1 100644 --- a/site/content/en/latest/tasks/observability/rate-limit-observability.md +++ b/site/content/en/latest/tasks/observability/rate-limit-observability.md @@ -10,8 +10,8 @@ This guide show you how to config RateLimit observability, includes traces. Follow the steps from the [Quickstart Guide](../quickstart) to install Envoy Gateway and the HTTPRoute example manifest. Before proceeding, you should be able to query the example backend using HTTP. Follow the steps from the [Global Rate Limit](../traffic/global-rate-limit) to install RateLimit. - [OpenTelemetry Collector](https://opentelemetry.io/docs/collector/) offers a vendor-agnostic implementation of how to receive, process and export telemetry data. + Install OTel-Collector: ```shell @@ -29,10 +29,8 @@ RateLimit uses the OpenTelemetry Exporter to export traces to the collector. You can configure a collector that supports the OTLP protocol, which includes but is not limited to: OpenTelemetry Collector, Jaeger, Zipkin, and so on. ***Note:*** -* By default, the Envoy Gateway configures a 100% sampling rate for RateLimit, which may lead to performance issues. -* The Envoy Gateway constructs the Kubernetes FQDN using the value of `BackendObjectReference`, which serves as the target endpoint for - the RateLimit trace collector. The `BackendObjectReference` is configured through the collector Service. Please note, the configuration of collector Service - using `Service.type=ExternalName` is currently not supported. + +* By default, the Envoy Gateway configures a `100%` sampling rate for RateLimit, which may lead to performance issues. Assuming the OpenTelemetry Collector is running in the `observability` namespace, and it has a service named `otel-svc`, we only want to sample `50%` of the trace data. We would configure it as follows: @@ -60,14 +58,13 @@ data: telemetry: tracing: sampleRate: 50 - backendRef: - name: otel-svc - namespace: observability + provider: + url: otel-svc.observability.svc.cluster.local:4318 EOF ``` -After updating the ConfigMap, you will need to restart the envoy-gateway deployment so the configuration kicks in +After updating the ConfigMap, you will need to restart the envoy-gateway deployment so the configuration kicks in: ```shell kubectl rollout restart deployment envoy-gateway -n envoy-gateway-system -``` \ No newline at end of file +```