From 35d1adeba4551f3295fd3e1c3a6af5c553de64f0 Mon Sep 17 00:00:00 2001
From: ShyunnY <1147212064@qq.com>
Date: Wed, 10 Apr 2024 17:17:00 +0800
Subject: [PATCH 1/5] feat: support trace of ratelimit
Signed-off-by: ShyunnY <1147212064@qq.com>
---
api/v1alpha1/envoygateway_types.go | 31 ++++
api/v1alpha1/zz_generated.deepcopy.go | 45 +++++
.../kubernetes/ratelimit/resource.go | 96 ++++++++++-
.../kubernetes/ratelimit/resource_provider.go | 3 +-
.../ratelimit/resource_provider_test.go | 40 +++++
.../kubernetes/ratelimit/resource_test.go | 40 +++++
.../deployments/enable-tracing-custom.yaml | 160 ++++++++++++++++++
.../testdata/deployments/enable-tracing.yaml | 160 ++++++++++++++++++
site/content/en/latest/api/extension_types.md | 42 +++++
.../observability/rate-limit-observability.md | 73 ++++++++
10 files changed, 686 insertions(+), 4 deletions(-)
create mode 100644 internal/infrastructure/kubernetes/ratelimit/resource_test.go
create mode 100644 internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing-custom.yaml
create mode 100644 internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing.yaml
create mode 100644 site/content/en/latest/tasks/observability/rate-limit-observability.md
diff --git a/api/v1alpha1/envoygateway_types.go b/api/v1alpha1/envoygateway_types.go
index 47b9861e170..5a9d227e9f8 100644
--- a/api/v1alpha1/envoygateway_types.go
+++ b/api/v1alpha1/envoygateway_types.go
@@ -354,6 +354,9 @@ type RateLimit struct {
type RateLimitTelemetry struct {
// Metrics defines metrics configuration for RateLimit.
Metrics *RateLimitMetrics `json:"metrics,omitempty"`
+
+ // Tracing defines traces configuration for RateLimit.
+ Tracing *RateLimitTracing `json:"tracing,omitempty"`
}
type RateLimitMetrics struct {
@@ -366,6 +369,34 @@ type RateLimitMetricsPrometheusProvider struct {
Disable bool `json:"disable,omitempty"`
}
+type RateLimitTracing struct {
+ // SamplingRate controls the rate at which traffic will be
+ // selected for tracing if no prior sampling decision has been made.
+ // Defaults to 100, valid values [0-100]. 100 indicates 100% sampling.
+ // +optional
+ SamplingRate *uint32 `json:"samplingRate,omitempty"`
+
+ // Provider defines the rateLimit tracing provider.
+ // Only OpenTelemetry is supported currently.
+ Provider *RateLimitTracingProvider
+}
+
+type RateLimitTracingProviderType string
+
+const (
+ RateLimitTracingProviderTypeOpenTelemetry TracingProviderType = "OpenTelemetry"
+)
+
+// RateLimitTracingProvider defines the tracing provider configuration of RateLimit
+type RateLimitTracingProvider struct {
+ // Type defines the tracing provider type.
+ // Since to RateLimit Exporter currently using OpenTelemetry, only OpenTelemetry is supported
+ Type RateLimitTracingProviderType `json:"type"`
+
+ // URL is the endpoint of the trace collector that supports the OTLP protocol
+ URL string `json:"url"`
+}
+
// RateLimitDatabaseBackend defines the configuration associated with
// the database backend used by the rate limit service.
// +union
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index e82cda7787f..6626d84b5a0 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -3298,6 +3298,11 @@ func (in *RateLimitTelemetry) DeepCopyInto(out *RateLimitTelemetry) {
*out = new(RateLimitMetrics)
(*in).DeepCopyInto(*out)
}
+ if in.Tracing != nil {
+ in, out := &in.Tracing, &out.Tracing
+ *out = new(RateLimitTracing)
+ (*in).DeepCopyInto(*out)
+ }
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RateLimitTelemetry.
@@ -3310,6 +3315,46 @@ func (in *RateLimitTelemetry) DeepCopy() *RateLimitTelemetry {
return out
}
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *RateLimitTracing) DeepCopyInto(out *RateLimitTracing) {
+ *out = *in
+ if in.SamplingRate != nil {
+ in, out := &in.SamplingRate, &out.SamplingRate
+ *out = new(uint32)
+ **out = **in
+ }
+ if in.Provider != nil {
+ in, out := &in.Provider, &out.Provider
+ *out = new(RateLimitTracingProvider)
+ **out = **in
+ }
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RateLimitTracing.
+func (in *RateLimitTracing) DeepCopy() *RateLimitTracing {
+ if in == nil {
+ return nil
+ }
+ out := new(RateLimitTracing)
+ in.DeepCopyInto(out)
+ return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *RateLimitTracingProvider) DeepCopyInto(out *RateLimitTracingProvider) {
+ *out = *in
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RateLimitTracingProvider.
+func (in *RateLimitTracingProvider) DeepCopy() *RateLimitTracingProvider {
+ if in == nil {
+ return nil
+ }
+ out := new(RateLimitTracingProvider)
+ in.DeepCopyInto(out)
+ return out
+}
+
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RateLimitValue) DeepCopyInto(out *RateLimitValue) {
*out = *in
diff --git a/internal/infrastructure/kubernetes/ratelimit/resource.go b/internal/infrastructure/kubernetes/ratelimit/resource.go
index 32dfba94c1b..93360f09d80 100644
--- a/internal/infrastructure/kubernetes/ratelimit/resource.go
+++ b/internal/infrastructure/kubernetes/ratelimit/resource.go
@@ -79,6 +79,18 @@ const (
ConfigGrpcXdsServerURLEnvVar = "CONFIG_GRPC_XDS_SERVER_URL"
// ConfigGrpcXdsNodeIDEnvVar is the id of ratelimit node.
ConfigGrpcXdsNodeIDEnvVar = "CONFIG_GRPC_XDS_NODE_ID"
+ // TracingEnabledVar is enabled the tracing feature
+ TracingEnabledVar = "TRACING_ENABLED"
+ // TracingServiceNameVar is service name appears in tracing span
+ TracingServiceNameVar = "TRACING_SERVICE_NAME"
+ // TracingServiceNamespaceVar is service namespace appears in tracing span
+ TracingServiceNamespaceVar = "TRACING_SERVICE_NAMESPACE"
+ // TracingServiceInstanceIDVar is service instance id appears in tracing span
+ TracingServiceInstanceIDVar = "TRACING_SERVICE_INSTANCE_ID"
+ // TracingSamplingRateVar is trace sampling rate
+ TracingSamplingRateVar = "TRACING_SAMPLING_RATE"
+ // OTELExporterOTLPTraceEndpointVar is target url to which the trace exporter is going to send
+ OTELExporterOTLPTraceEndpointVar = "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"
// InfraName is the name for rate-limit resources.
InfraName = "envoy-ratelimit"
@@ -125,7 +137,8 @@ func rateLimitLabels() map[string]string {
}
// expectedRateLimitContainers returns expected rateLimit containers.
-func expectedRateLimitContainers(rateLimit *egv1a1.RateLimit, rateLimitDeployment *egv1a1.KubernetesDeploymentSpec) []corev1.Container {
+func expectedRateLimitContainers(rateLimit *egv1a1.RateLimit, rateLimitDeployment *egv1a1.KubernetesDeploymentSpec,
+ namespace string) []corev1.Container {
ports := []corev1.ContainerPort{
{
Name: "grpc",
@@ -142,7 +155,7 @@ func expectedRateLimitContainers(rateLimit *egv1a1.RateLimit, rateLimitDeploymen
Command: []string{
"/bin/ratelimit",
},
- Env: expectedRateLimitContainerEnv(rateLimit, rateLimitDeployment),
+ Env: expectedRateLimitContainerEnv(rateLimit, rateLimitDeployment, namespace),
Ports: ports,
Resources: *rateLimitDeployment.Container.Resources,
SecurityContext: rateLimitDeployment.Container.SecurityContext,
@@ -275,7 +288,8 @@ func expectedDeploymentVolumes(rateLimit *egv1a1.RateLimit, rateLimitDeployment
}
// expectedRateLimitContainerEnv returns expected rateLimit container envs.
-func expectedRateLimitContainerEnv(rateLimit *egv1a1.RateLimit, rateLimitDeployment *egv1a1.KubernetesDeploymentSpec) []corev1.EnvVar {
+func expectedRateLimitContainerEnv(rateLimit *egv1a1.RateLimit, rateLimitDeployment *egv1a1.KubernetesDeploymentSpec,
+ namespace string) []corev1.EnvVar {
env := []corev1.EnvVar{
{
Name: RuntimeRootEnvVar,
@@ -384,6 +398,54 @@ func expectedRateLimitContainerEnv(rateLimit *egv1a1.RateLimit, rateLimitDeploym
}
}
+ if enableTracing(rateLimit) {
+ var sampleRate = 1.0
+ if rateLimit.Telemetry.Tracing.SamplingRate != nil {
+ sampleRate = float64(*rateLimit.Telemetry.Tracing.SamplingRate) / 100.0
+ }
+
+ traceEndpoint := checkTraceEndpointScheme(rateLimit.Telemetry.Tracing.Provider.URL)
+ tracingEnvs := []corev1.EnvVar{
+ {
+ Name: TracingEnabledVar,
+ Value: "true",
+ },
+ {
+ Name: TracingServiceNameVar,
+ Value: InfraName,
+ },
+ {
+ Name: TracingServiceNamespaceVar,
+ Value: namespace,
+ },
+ {
+ // By default, this is a random instanceID,
+ // we use the RateLimit pod name as the trace service instanceID.
+ Name: TracingServiceInstanceIDVar,
+ ValueFrom: &corev1.EnvVarSource{
+ FieldRef: &corev1.ObjectFieldSelector{
+ APIVersion: "v1",
+ FieldPath: "metadata.name",
+ },
+ },
+ },
+ {
+ Name: TracingSamplingRateVar,
+ // The api is configured with [0,100], but sampling can only be [0,1].
+ // doc: https://github.com/envoyproxy/ratelimit?tab=readme-ov-file#tracing
+ // You will lose precision during the conversion process, but don't worry,
+ // this follows the rounding rule and won't make the expected sampling rate too different
+ // from the actual sampling rate
+ Value: strconv.FormatFloat(sampleRate, 'f', 1, 64),
+ },
+ {
+ Name: OTELExporterOTLPTraceEndpointVar,
+ Value: traceEndpoint,
+ },
+ }
+ env = append(env, tracingEnvs...)
+ }
+
return resource.ExpectedContainerEnv(rateLimitDeployment.Container, env)
}
@@ -399,3 +461,31 @@ func Validate(ctx context.Context, client client.Client, gateway *egv1a1.EnvoyGa
return nil
}
+
+func enableTracing(rl *egv1a1.RateLimit) bool {
+ // Other fields can use the default values,
+ // but we have to make sure the user has the Provider.URL
+ if rl != nil && rl.Telemetry != nil &&
+ rl.Telemetry.Tracing != nil &&
+ rl.Telemetry.Tracing.Provider != nil &&
+ len(rl.Telemetry.Tracing.Provider.URL) != 0 {
+ return true
+ }
+
+ return false
+}
+
+// checkTraceEndpointScheme Check the scheme prefix in the trace url
+func checkTraceEndpointScheme(url string) string {
+ // Since the OTLP collector needs to configure the scheme prefix,
+ // we need to check if the user has configured this
+ // TODO: It is currently assumed to be a normal connection,
+ // and a TLS connection will be added later.
+ httpScheme := "http://"
+ exist := strings.HasPrefix(url, httpScheme)
+ if exist {
+ return url
+ }
+
+ return fmt.Sprintf("%s%s", httpScheme, url)
+}
diff --git a/internal/infrastructure/kubernetes/ratelimit/resource_provider.go b/internal/infrastructure/kubernetes/ratelimit/resource_provider.go
index 90f646d014f..885cb4ddca6 100644
--- a/internal/infrastructure/kubernetes/ratelimit/resource_provider.go
+++ b/internal/infrastructure/kubernetes/ratelimit/resource_provider.go
@@ -61,6 +61,7 @@ func (r *ResourceRender) Name() string {
func enablePrometheus(rl *egv1a1.RateLimit) bool {
if rl != nil &&
rl.Telemetry != nil &&
+ rl.Telemetry.Metrics != nil &&
rl.Telemetry.Metrics.Prometheus != nil {
return !rl.Telemetry.Metrics.Prometheus.Disable
}
@@ -183,7 +184,7 @@ func (r *ResourceRender) ServiceAccount() (*corev1.ServiceAccount, error) {
// Deployment returns the expected rate limit Deployment based on the provided infra.
func (r *ResourceRender) Deployment() (*appsv1.Deployment, error) {
- containers := expectedRateLimitContainers(r.rateLimit, r.rateLimitDeployment)
+ containers := expectedRateLimitContainers(r.rateLimit, r.rateLimitDeployment, r.Namespace)
labels := rateLimitLabels()
selector := resource.GetSelector(labels)
diff --git a/internal/infrastructure/kubernetes/ratelimit/resource_provider_test.go b/internal/infrastructure/kubernetes/ratelimit/resource_provider_test.go
index 6c56631d9cc..52aec1fabed 100644
--- a/internal/infrastructure/kubernetes/ratelimit/resource_provider_test.go
+++ b/internal/infrastructure/kubernetes/ratelimit/resource_provider_test.go
@@ -648,6 +648,46 @@ func TestDeployment(t *testing.T) {
},
},
},
+ {
+ caseName: "enable-tracing",
+ rateLimit: &egv1a1.RateLimit{
+ Backend: egv1a1.RateLimitDatabaseBackend{
+ Type: egv1a1.RedisBackendType,
+ Redis: &egv1a1.RateLimitRedisSettings{
+ URL: "redis.redis.svc:6379",
+ },
+ },
+ Telemetry: &egv1a1.RateLimitTelemetry{
+ Tracing: &egv1a1.RateLimitTracing{
+ Provider: &egv1a1.RateLimitTracingProvider{
+ URL: "http://trace-collector.envoy-gateway-system.svc.cluster.local:4318",
+ },
+ },
+ },
+ },
+ },
+ {
+ caseName: "enable-tracing-custom",
+ rateLimit: &egv1a1.RateLimit{
+ Backend: egv1a1.RateLimitDatabaseBackend{
+ Type: egv1a1.RedisBackendType,
+ Redis: &egv1a1.RateLimitRedisSettings{
+ URL: "redis.redis.svc:6379",
+ },
+ },
+ Telemetry: &egv1a1.RateLimitTelemetry{
+ Tracing: &egv1a1.RateLimitTracing{
+ SamplingRate: func() *uint32 {
+ var samplingRate uint32 = 55
+ return &samplingRate
+ }(),
+ Provider: &egv1a1.RateLimitTracingProvider{
+ URL: "trace-collector.envoy-gateway-system.svc.cluster.local:4317",
+ },
+ },
+ },
+ },
+ },
}
for _, tc := range cases {
t.Run(tc.caseName, func(t *testing.T) {
diff --git a/internal/infrastructure/kubernetes/ratelimit/resource_test.go b/internal/infrastructure/kubernetes/ratelimit/resource_test.go
new file mode 100644
index 00000000000..71179c8c7c4
--- /dev/null
+++ b/internal/infrastructure/kubernetes/ratelimit/resource_test.go
@@ -0,0 +1,40 @@
+// Copyright Envoy Gateway Authors
+// SPDX-License-Identifier: Apache-2.0
+// The full text of the Apache license is available in the LICENSE file at
+// the root of the repo.
+
+package ratelimit
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/require"
+)
+
+func TestCheckTraceEndpointScheme(t *testing.T) {
+
+ cases := []struct {
+ caseName string
+ actualURL string
+ expectedURL string
+ }{
+ {
+ caseName: "normal url with http prefix",
+ actualURL: "http://collector.observability.svc.cluster.local:4318",
+ expectedURL: "http://collector.observability.svc.cluster.local:4318",
+ },
+ {
+ caseName: "abnormal url without http prefix",
+ actualURL: "collector.observability.svc.cluster.local:4318",
+ expectedURL: "http://collector.observability.svc.cluster.local:4318",
+ },
+ }
+
+ for _, tc := range cases {
+ t.Run(tc.caseName, func(t *testing.T) {
+ actual := checkTraceEndpointScheme(tc.actualURL)
+ require.Equal(t, tc.expectedURL, actual)
+ })
+ }
+
+}
diff --git a/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing-custom.yaml b/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing-custom.yaml
new file mode 100644
index 00000000000..53e22e1d6cb
--- /dev/null
+++ b/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing-custom.yaml
@@ -0,0 +1,160 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ creationTimestamp: null
+ labels:
+ app.kubernetes.io/component: ratelimit
+ app.kubernetes.io/managed-by: envoy-gateway
+ app.kubernetes.io/name: envoy-ratelimit
+ name: envoy-ratelimit
+ namespace: envoy-gateway-system
+ ownerReferences:
+ - apiVersion: apps/v1
+ kind: Deployment
+ name: envoy-gateway
+ uid: test-owner-reference-uid-for-deployment
+spec:
+ progressDeadlineSeconds: 600
+ revisionHistoryLimit: 10
+ selector:
+ matchLabels:
+ app.kubernetes.io/component: ratelimit
+ app.kubernetes.io/managed-by: envoy-gateway
+ app.kubernetes.io/name: envoy-ratelimit
+ strategy:
+ type: RollingUpdate
+ template:
+ metadata:
+ annotations:
+ prometheus.io/path: /metrics
+ prometheus.io/port: "19001"
+ prometheus.io/scrape: "true"
+ creationTimestamp: null
+ labels:
+ app.kubernetes.io/component: ratelimit
+ app.kubernetes.io/managed-by: envoy-gateway
+ app.kubernetes.io/name: envoy-ratelimit
+ spec:
+ automountServiceAccountToken: false
+ containers:
+ - command:
+ - /bin/ratelimit
+ env:
+ - name: RUNTIME_ROOT
+ value: /data
+ - name: RUNTIME_SUBDIRECTORY
+ value: ratelimit
+ - name: RUNTIME_IGNOREDOTFILES
+ value: "true"
+ - name: RUNTIME_WATCH_ROOT
+ value: "false"
+ - name: LOG_LEVEL
+ value: info
+ - name: USE_STATSD
+ value: "false"
+ - name: CONFIG_TYPE
+ value: GRPC_XDS_SOTW
+ - name: CONFIG_GRPC_XDS_SERVER_URL
+ value: envoy-gateway:18001
+ - name: CONFIG_GRPC_XDS_NODE_ID
+ value: envoy-ratelimit
+ - name: GRPC_SERVER_USE_TLS
+ value: "true"
+ - name: GRPC_SERVER_TLS_CERT
+ value: /certs/tls.crt
+ - name: GRPC_SERVER_TLS_KEY
+ value: /certs/tls.key
+ - name: GRPC_SERVER_TLS_CA_CERT
+ value: /certs/ca.crt
+ - name: CONFIG_GRPC_XDS_SERVER_USE_TLS
+ value: "true"
+ - name: CONFIG_GRPC_XDS_CLIENT_TLS_CERT
+ value: /certs/tls.crt
+ - name: CONFIG_GRPC_XDS_CLIENT_TLS_KEY
+ value: /certs/tls.key
+ - name: CONFIG_GRPC_XDS_SERVER_TLS_CACERT
+ value: /certs/ca.crt
+ - name: FORCE_START_WITHOUT_INITIAL_CONFIG
+ value: "true"
+ - name: REDIS_SOCKET_TYPE
+ value: tcp
+ - name: REDIS_URL
+ value: redis.redis.svc:6379
+ - name: TRACING_ENABLED
+ value: "true"
+ - name: TRACING_SERVICE_NAME
+ value: envoy-ratelimit
+ - name: TRACING_SERVICE_NAMESPACE
+ value: envoy-gateway-system
+ - name: TRACING_SERVICE_INSTANCE_ID
+ valueFrom:
+ fieldRef:
+ apiVersion: v1
+ fieldPath: metadata.name
+ - name: TRACING_SAMPLING_RATE
+ value: "0.6"
+ - name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
+ value: http://trace-collector.envoy-gateway-system.svc.cluster.local:4317
+ image: envoyproxy/ratelimit:master
+ imagePullPolicy: IfNotPresent
+ name: envoy-ratelimit
+ ports:
+ - containerPort: 8081
+ name: grpc
+ protocol: TCP
+ readinessProbe:
+ failureThreshold: 3
+ httpGet:
+ path: /healthcheck
+ port: 8080
+ scheme: HTTP
+ periodSeconds: 10
+ successThreshold: 1
+ timeoutSeconds: 1
+ resources:
+ requests:
+ cpu: 100m
+ memory: 512Mi
+ terminationMessagePath: /dev/termination-log
+ terminationMessagePolicy: File
+ volumeMounts:
+ - mountPath: /certs
+ name: certs
+ readOnly: true
+ - command:
+ - /bin/statsd_exporter
+ - --web.listen-address=:19001
+ - --statsd.mapping-config=/etc/statsd-exporter/conf.yaml
+ image: prom/statsd-exporter:v0.18.0
+ imagePullPolicy: IfNotPresent
+ name: prom-statsd-exporter
+ ports:
+ - containerPort: 9125
+ name: statsd
+ protocol: TCP
+ - containerPort: 19001
+ name: metrics
+ protocol: TCP
+ resources: {}
+ terminationMessagePath: /dev/termination-log
+ terminationMessagePolicy: File
+ volumeMounts:
+ - mountPath: /etc/statsd-exporter
+ name: statsd-exporter-config
+ readOnly: true
+ dnsPolicy: ClusterFirst
+ restartPolicy: Always
+ schedulerName: default-scheduler
+ serviceAccountName: envoy-ratelimit
+ terminationGracePeriodSeconds: 300
+ volumes:
+ - name: certs
+ secret:
+ defaultMode: 420
+ secretName: envoy-rate-limit
+ - configMap:
+ defaultMode: 420
+ name: statsd-exporter-config
+ optional: true
+ name: statsd-exporter-config
+status: {}
diff --git a/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing.yaml b/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing.yaml
new file mode 100644
index 00000000000..bee76879b02
--- /dev/null
+++ b/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing.yaml
@@ -0,0 +1,160 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+ creationTimestamp: null
+ labels:
+ app.kubernetes.io/component: ratelimit
+ app.kubernetes.io/managed-by: envoy-gateway
+ app.kubernetes.io/name: envoy-ratelimit
+ name: envoy-ratelimit
+ namespace: envoy-gateway-system
+ ownerReferences:
+ - apiVersion: apps/v1
+ kind: Deployment
+ name: envoy-gateway
+ uid: test-owner-reference-uid-for-deployment
+spec:
+ progressDeadlineSeconds: 600
+ revisionHistoryLimit: 10
+ selector:
+ matchLabels:
+ app.kubernetes.io/component: ratelimit
+ app.kubernetes.io/managed-by: envoy-gateway
+ app.kubernetes.io/name: envoy-ratelimit
+ strategy:
+ type: RollingUpdate
+ template:
+ metadata:
+ annotations:
+ prometheus.io/path: /metrics
+ prometheus.io/port: "19001"
+ prometheus.io/scrape: "true"
+ creationTimestamp: null
+ labels:
+ app.kubernetes.io/component: ratelimit
+ app.kubernetes.io/managed-by: envoy-gateway
+ app.kubernetes.io/name: envoy-ratelimit
+ spec:
+ automountServiceAccountToken: false
+ containers:
+ - command:
+ - /bin/ratelimit
+ env:
+ - name: RUNTIME_ROOT
+ value: /data
+ - name: RUNTIME_SUBDIRECTORY
+ value: ratelimit
+ - name: RUNTIME_IGNOREDOTFILES
+ value: "true"
+ - name: RUNTIME_WATCH_ROOT
+ value: "false"
+ - name: LOG_LEVEL
+ value: info
+ - name: USE_STATSD
+ value: "false"
+ - name: CONFIG_TYPE
+ value: GRPC_XDS_SOTW
+ - name: CONFIG_GRPC_XDS_SERVER_URL
+ value: envoy-gateway:18001
+ - name: CONFIG_GRPC_XDS_NODE_ID
+ value: envoy-ratelimit
+ - name: GRPC_SERVER_USE_TLS
+ value: "true"
+ - name: GRPC_SERVER_TLS_CERT
+ value: /certs/tls.crt
+ - name: GRPC_SERVER_TLS_KEY
+ value: /certs/tls.key
+ - name: GRPC_SERVER_TLS_CA_CERT
+ value: /certs/ca.crt
+ - name: CONFIG_GRPC_XDS_SERVER_USE_TLS
+ value: "true"
+ - name: CONFIG_GRPC_XDS_CLIENT_TLS_CERT
+ value: /certs/tls.crt
+ - name: CONFIG_GRPC_XDS_CLIENT_TLS_KEY
+ value: /certs/tls.key
+ - name: CONFIG_GRPC_XDS_SERVER_TLS_CACERT
+ value: /certs/ca.crt
+ - name: FORCE_START_WITHOUT_INITIAL_CONFIG
+ value: "true"
+ - name: REDIS_SOCKET_TYPE
+ value: tcp
+ - name: REDIS_URL
+ value: redis.redis.svc:6379
+ - name: TRACING_ENABLED
+ value: "true"
+ - name: TRACING_SERVICE_NAME
+ value: envoy-ratelimit
+ - name: TRACING_SERVICE_NAMESPACE
+ value: envoy-gateway-system
+ - name: TRACING_SERVICE_INSTANCE_ID
+ valueFrom:
+ fieldRef:
+ apiVersion: v1
+ fieldPath: metadata.name
+ - name: TRACING_SAMPLING_RATE
+ value: "1.0"
+ - name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
+ value: http://trace-collector.envoy-gateway-system.svc.cluster.local:4318
+ image: envoyproxy/ratelimit:master
+ imagePullPolicy: IfNotPresent
+ name: envoy-ratelimit
+ ports:
+ - containerPort: 8081
+ name: grpc
+ protocol: TCP
+ readinessProbe:
+ failureThreshold: 3
+ httpGet:
+ path: /healthcheck
+ port: 8080
+ scheme: HTTP
+ periodSeconds: 10
+ successThreshold: 1
+ timeoutSeconds: 1
+ resources:
+ requests:
+ cpu: 100m
+ memory: 512Mi
+ terminationMessagePath: /dev/termination-log
+ terminationMessagePolicy: File
+ volumeMounts:
+ - mountPath: /certs
+ name: certs
+ readOnly: true
+ - command:
+ - /bin/statsd_exporter
+ - --web.listen-address=:19001
+ - --statsd.mapping-config=/etc/statsd-exporter/conf.yaml
+ image: prom/statsd-exporter:v0.18.0
+ imagePullPolicy: IfNotPresent
+ name: prom-statsd-exporter
+ ports:
+ - containerPort: 9125
+ name: statsd
+ protocol: TCP
+ - containerPort: 19001
+ name: metrics
+ protocol: TCP
+ resources: {}
+ terminationMessagePath: /dev/termination-log
+ terminationMessagePolicy: File
+ volumeMounts:
+ - mountPath: /etc/statsd-exporter
+ name: statsd-exporter-config
+ readOnly: true
+ dnsPolicy: ClusterFirst
+ restartPolicy: Always
+ schedulerName: default-scheduler
+ serviceAccountName: envoy-ratelimit
+ terminationGracePeriodSeconds: 300
+ volumes:
+ - name: certs
+ secret:
+ defaultMode: 420
+ secretName: envoy-rate-limit
+ - configMap:
+ defaultMode: 420
+ name: statsd-exporter-config
+ optional: true
+ name: statsd-exporter-config
+status: {}
diff --git a/site/content/en/latest/api/extension_types.md b/site/content/en/latest/api/extension_types.md
index c3316f67026..69c4578a314 100644
--- a/site/content/en/latest/api/extension_types.md
+++ b/site/content/en/latest/api/extension_types.md
@@ -2354,6 +2354,48 @@ _Appears in:_
| Field | Type | Required | Description |
| --- | --- | --- | --- |
| `metrics` | _[RateLimitMetrics](#ratelimitmetrics)_ | true | Metrics defines metrics configuration for RateLimit. |
+| `tracing` | _[RateLimitTracing](#ratelimittracing)_ | true | Tracing defines traces configuration for RateLimit. |
+
+
+#### RateLimitTracing
+
+
+
+
+
+_Appears in:_
+- [RateLimitTelemetry](#ratelimittelemetry)
+
+| Field | Type | Required | Description |
+| --- | --- | --- | --- |
+| `samplingRate` | _integer_ | false | SamplingRate controls the rate at which traffic will be
selected for tracing if no prior sampling decision has been made.
Defaults to 100, valid values [0-100]. 100 indicates 100% sampling. |
+| `Provider` | _[RateLimitTracingProvider](#ratelimittracingprovider)_ | true | Provider defines the rateLimit tracing provider.
Only OpenTelemetry is supported currently. |
+
+
+#### RateLimitTracingProvider
+
+
+
+RateLimitTracingProvider defines the tracing provider configuration of RateLimit
+
+_Appears in:_
+- [RateLimitTracing](#ratelimittracing)
+
+| Field | Type | Required | Description |
+| --- | --- | --- | --- |
+| `type` | _[RateLimitTracingProviderType](#ratelimittracingprovidertype)_ | true | Type defines the tracing provider type.
Since to RateLimit Exporter currently using OpenTelemetry, only OpenTelemetry is supported |
+| `url` | _string_ | true | URL is the endpoint of the trace collector that supports the OTLP protocol |
+
+
+#### RateLimitTracingProviderType
+
+_Underlying type:_ _string_
+
+
+
+_Appears in:_
+- [RateLimitTracingProvider](#ratelimittracingprovider)
+
#### RateLimitType
diff --git a/site/content/en/latest/tasks/observability/rate-limit-observability.md b/site/content/en/latest/tasks/observability/rate-limit-observability.md
new file mode 100644
index 00000000000..fa61acfbde1
--- /dev/null
+++ b/site/content/en/latest/tasks/observability/rate-limit-observability.md
@@ -0,0 +1,73 @@
+---
+title: "RateLimit Observability"
+---
+
+Envoy Gateway provides observability for the RateLimit instances.
+This guide show you how to config RateLimit observability, includes traces.
+
+## Prerequisites
+
+Follow the steps from the [Quickstart Guide](../quickstart) to install Envoy Gateway and the HTTPRoute example manifest.
+Before proceeding, you should be able to query the example backend using HTTP. Follow the steps from the [Global Rate Limit](../traffic/global-rate-limit) to install RateLimit.
+
+
+[OpenTelemetry Collector](https://opentelemetry.io/docs/collector/) offers a vendor-agnostic implementation of how to receive, process and export telemetry data.
+Install OTel-Collector:
+
+```shell
+helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts
+helm repo update
+helm upgrade --install otel-collector open-telemetry/opentelemetry-collector -f https://raw.githubusercontent.com/envoyproxy/gateway/latest/examples/otel-collector/helm-values.yaml -n monitoring --create-namespace --version 0.60.0
+```
+
+## Traces
+
+By default, the Envoy Gateway does not configure RateLimit to send traces to the OpenTelemetry Sink.
+You can configure the collector in the `rateLimit.telemetry.tracing` of the `EnvoyGateway`CRD.
+
+RateLimit uses the OpenTelemetry Exporter to export traces to the collector.
+You can configure a collector that supports the OTLP protocol, which includes but is not limited to: OpenTelemetry Collector, Jaeger, Zipkin, and so on.
+
+***Note:***
+* By default, the Envoy Gateway configures a 100% sampling rate for RateLimit, which may lead to performance issues.
+* The Envoy Gateway constructs the Kubernetes FQDN using the value of `BackendObjectReference`, which serves as the target endpoint for
+ the RateLimit trace collector. The `BackendObjectReference` is configured through the collector Service. Please note, the configuration of collector Service
+ using `Service.type=ExternalName` is currently not supported.
+
+Assuming the OpenTelemetry Collector is running in the `observability` namespace, and it has a service named `otel-svc`,
+we only want to sample `50%` of the trace data. We would configure it as follows:
+
+```shell
+cat <
Date: Wed, 10 Apr 2024 19:15:44 +0800
Subject: [PATCH 2/5] fix: add json tag
Signed-off-by: ShyunnY <1147212064@qq.com>
---
api/v1alpha1/envoygateway_types.go | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/api/v1alpha1/envoygateway_types.go b/api/v1alpha1/envoygateway_types.go
index 5a9d227e9f8..7cf0810c1cf 100644
--- a/api/v1alpha1/envoygateway_types.go
+++ b/api/v1alpha1/envoygateway_types.go
@@ -378,7 +378,7 @@ type RateLimitTracing struct {
// Provider defines the rateLimit tracing provider.
// Only OpenTelemetry is supported currently.
- Provider *RateLimitTracingProvider
+ Provider *RateLimitTracingProvider `json:"provider,omitempty"`
}
type RateLimitTracingProviderType string
From 8f531301174deb0403799861acc350dbd60c5ddf Mon Sep 17 00:00:00 2001
From: ShyunnY <1147212064@qq.com>
Date: Wed, 10 Apr 2024 19:29:16 +0800
Subject: [PATCH 3/5] fix: use OTEL_EXPORTER_OTLP_ENDPOINT env
Signed-off-by: ShyunnY <1147212064@qq.com>
---
internal/infrastructure/kubernetes/ratelimit/resource.go | 2 +-
.../ratelimit/testdata/deployments/enable-tracing-custom.yaml | 2 +-
.../ratelimit/testdata/deployments/enable-tracing.yaml | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/internal/infrastructure/kubernetes/ratelimit/resource.go b/internal/infrastructure/kubernetes/ratelimit/resource.go
index 93360f09d80..7e7a9d3722d 100644
--- a/internal/infrastructure/kubernetes/ratelimit/resource.go
+++ b/internal/infrastructure/kubernetes/ratelimit/resource.go
@@ -90,7 +90,7 @@ const (
// TracingSamplingRateVar is trace sampling rate
TracingSamplingRateVar = "TRACING_SAMPLING_RATE"
// OTELExporterOTLPTraceEndpointVar is target url to which the trace exporter is going to send
- OTELExporterOTLPTraceEndpointVar = "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT"
+ OTELExporterOTLPTraceEndpointVar = "OTEL_EXPORTER_OTLP_ENDPOINT"
// InfraName is the name for rate-limit resources.
InfraName = "envoy-ratelimit"
diff --git a/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing-custom.yaml b/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing-custom.yaml
index 53e22e1d6cb..b4c7d9472e9 100644
--- a/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing-custom.yaml
+++ b/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing-custom.yaml
@@ -93,7 +93,7 @@ spec:
fieldPath: metadata.name
- name: TRACING_SAMPLING_RATE
value: "0.6"
- - name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
+ - name: OTEL_EXPORTER_OTLP_ENDPOINT
value: http://trace-collector.envoy-gateway-system.svc.cluster.local:4317
image: envoyproxy/ratelimit:master
imagePullPolicy: IfNotPresent
diff --git a/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing.yaml b/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing.yaml
index bee76879b02..e36ff5ef87d 100644
--- a/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing.yaml
+++ b/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing.yaml
@@ -93,7 +93,7 @@ spec:
fieldPath: metadata.name
- name: TRACING_SAMPLING_RATE
value: "1.0"
- - name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
+ - name: OTEL_EXPORTER_OTLP_ENDPOINT
value: http://trace-collector.envoy-gateway-system.svc.cluster.local:4318
image: envoyproxy/ratelimit:master
imagePullPolicy: IfNotPresent
From badd639c93b54024a3393eb0ea497f4de42a6cb4 Mon Sep 17 00:00:00 2001
From: ShyunnY <1147212064@qq.com>
Date: Wed, 10 Apr 2024 20:21:22 +0800
Subject: [PATCH 4/5] fix
Signed-off-by: ShyunnY <1147212064@qq.com>
---
api/v1alpha1/envoygateway_types.go | 2 +-
api/v1alpha1/zz_generated.deepcopy.go | 7 ++++++-
site/content/en/latest/api/extension_types.md | 11 +----------
3 files changed, 8 insertions(+), 12 deletions(-)
diff --git a/api/v1alpha1/envoygateway_types.go b/api/v1alpha1/envoygateway_types.go
index 7cf0810c1cf..ade9e056b1e 100644
--- a/api/v1alpha1/envoygateway_types.go
+++ b/api/v1alpha1/envoygateway_types.go
@@ -391,7 +391,7 @@ const (
type RateLimitTracingProvider struct {
// Type defines the tracing provider type.
// Since to RateLimit Exporter currently using OpenTelemetry, only OpenTelemetry is supported
- Type RateLimitTracingProviderType `json:"type"`
+ Type *RateLimitTracingProviderType `json:"type,omitempty"`
// URL is the endpoint of the trace collector that supports the OTLP protocol
URL string `json:"url"`
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index 6626d84b5a0..b1e849077bd 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -3326,7 +3326,7 @@ func (in *RateLimitTracing) DeepCopyInto(out *RateLimitTracing) {
if in.Provider != nil {
in, out := &in.Provider, &out.Provider
*out = new(RateLimitTracingProvider)
- **out = **in
+ (*in).DeepCopyInto(*out)
}
}
@@ -3343,6 +3343,11 @@ func (in *RateLimitTracing) DeepCopy() *RateLimitTracing {
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RateLimitTracingProvider) DeepCopyInto(out *RateLimitTracingProvider) {
*out = *in
+ if in.Type != nil {
+ in, out := &in.Type, &out.Type
+ *out = new(RateLimitTracingProviderType)
+ **out = **in
+ }
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RateLimitTracingProvider.
diff --git a/site/content/en/latest/api/extension_types.md b/site/content/en/latest/api/extension_types.md
index 69c4578a314..7234b805d8f 100644
--- a/site/content/en/latest/api/extension_types.md
+++ b/site/content/en/latest/api/extension_types.md
@@ -2369,7 +2369,7 @@ _Appears in:_
| Field | Type | Required | Description |
| --- | --- | --- | --- |
| `samplingRate` | _integer_ | false | SamplingRate controls the rate at which traffic will be
selected for tracing if no prior sampling decision has been made.
Defaults to 100, valid values [0-100]. 100 indicates 100% sampling. |
-| `Provider` | _[RateLimitTracingProvider](#ratelimittracingprovider)_ | true | Provider defines the rateLimit tracing provider.
Only OpenTelemetry is supported currently. |
+| `provider` | _[RateLimitTracingProvider](#ratelimittracingprovider)_ | true | Provider defines the rateLimit tracing provider.
Only OpenTelemetry is supported currently. |
#### RateLimitTracingProvider
@@ -2387,15 +2387,6 @@ _Appears in:_
| `url` | _string_ | true | URL is the endpoint of the trace collector that supports the OTLP protocol |
-#### RateLimitTracingProviderType
-
-_Underlying type:_ _string_
-
-
-
-_Appears in:_
-- [RateLimitTracingProvider](#ratelimittracingprovider)
-
#### RateLimitType
From a0573aa9af72416f3cd3699e3787b1a7f3fe0747 Mon Sep 17 00:00:00 2001
From: yuluo-yx
Date: Wed, 10 Apr 2024 22:07:33 +0800
Subject: [PATCH 5/5] docs: update docs
Signed-off-by: yuluo-yx
---
.../observability/rate-limit-observability.md | 17 +++++++----------
1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/site/content/en/latest/tasks/observability/rate-limit-observability.md b/site/content/en/latest/tasks/observability/rate-limit-observability.md
index fa61acfbde1..350be4dc4b1 100644
--- a/site/content/en/latest/tasks/observability/rate-limit-observability.md
+++ b/site/content/en/latest/tasks/observability/rate-limit-observability.md
@@ -10,8 +10,8 @@ This guide show you how to config RateLimit observability, includes traces.
Follow the steps from the [Quickstart Guide](../quickstart) to install Envoy Gateway and the HTTPRoute example manifest.
Before proceeding, you should be able to query the example backend using HTTP. Follow the steps from the [Global Rate Limit](../traffic/global-rate-limit) to install RateLimit.
-
[OpenTelemetry Collector](https://opentelemetry.io/docs/collector/) offers a vendor-agnostic implementation of how to receive, process and export telemetry data.
+
Install OTel-Collector:
```shell
@@ -29,10 +29,8 @@ RateLimit uses the OpenTelemetry Exporter to export traces to the collector.
You can configure a collector that supports the OTLP protocol, which includes but is not limited to: OpenTelemetry Collector, Jaeger, Zipkin, and so on.
***Note:***
-* By default, the Envoy Gateway configures a 100% sampling rate for RateLimit, which may lead to performance issues.
-* The Envoy Gateway constructs the Kubernetes FQDN using the value of `BackendObjectReference`, which serves as the target endpoint for
- the RateLimit trace collector. The `BackendObjectReference` is configured through the collector Service. Please note, the configuration of collector Service
- using `Service.type=ExternalName` is currently not supported.
+
+* By default, the Envoy Gateway configures a `100%` sampling rate for RateLimit, which may lead to performance issues.
Assuming the OpenTelemetry Collector is running in the `observability` namespace, and it has a service named `otel-svc`,
we only want to sample `50%` of the trace data. We would configure it as follows:
@@ -60,14 +58,13 @@ data:
telemetry:
tracing:
sampleRate: 50
- backendRef:
- name: otel-svc
- namespace: observability
+ provider:
+ url: otel-svc.observability.svc.cluster.local:4318
EOF
```
-After updating the ConfigMap, you will need to restart the envoy-gateway deployment so the configuration kicks in
+After updating the ConfigMap, you will need to restart the envoy-gateway deployment so the configuration kicks in:
```shell
kubectl rollout restart deployment envoy-gateway -n envoy-gateway-system
-```
\ No newline at end of file
+```