From aa4e3a0f5add9d6ccdd4b8973cc3da085f84ffff Mon Sep 17 00:00:00 2001
From: Shyunn <shyunny@outlook.com>
Date: Thu, 11 Apr 2024 11:18:33 +0800
Subject: [PATCH] feat: add trace for rate-limit (#2974)

* feat: support trace of ratelimit

Signed-off-by: ShyunnY <1147212064@qq.com>

* fix: add json tag

Signed-off-by: ShyunnY <1147212064@qq.com>

* fix: use OTEL_EXPORTER_OTLP_ENDPOINT env

Signed-off-by: ShyunnY <1147212064@qq.com>

* fix

Signed-off-by: ShyunnY <1147212064@qq.com>

* docs: update docs

Signed-off-by: yuluo-yx <yuluo08290126@gmail.com>

---------

Signed-off-by: ShyunnY <1147212064@qq.com>
Signed-off-by: yuluo-yx <yuluo08290126@gmail.com>
Co-authored-by: yuluo-yx <yuluo08290126@gmail.com>
Co-authored-by: zirain <zirain2009@gmail.com>
---
 api/v1alpha1/envoygateway_types.go            |  31 ++++
 api/v1alpha1/zz_generated.deepcopy.go         |  50 ++++++
 .../kubernetes/ratelimit/resource.go          |  96 ++++++++++-
 .../kubernetes/ratelimit/resource_provider.go |   3 +-
 .../ratelimit/resource_provider_test.go       |  40 +++++
 .../kubernetes/ratelimit/resource_test.go     |  40 +++++
 .../deployments/enable-tracing-custom.yaml    | 160 ++++++++++++++++++
 .../testdata/deployments/enable-tracing.yaml  | 160 ++++++++++++++++++
 site/content/en/latest/api/extension_types.md |  33 ++++
 .../observability/rate-limit-observability.md |  70 ++++++++
 10 files changed, 679 insertions(+), 4 deletions(-)
 create mode 100644 internal/infrastructure/kubernetes/ratelimit/resource_test.go
 create mode 100644 internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing-custom.yaml
 create mode 100644 internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing.yaml
 create mode 100644 site/content/en/latest/tasks/observability/rate-limit-observability.md

diff --git a/api/v1alpha1/envoygateway_types.go b/api/v1alpha1/envoygateway_types.go
index 47b9861e170..ade9e056b1e 100644
--- a/api/v1alpha1/envoygateway_types.go
+++ b/api/v1alpha1/envoygateway_types.go
@@ -354,6 +354,9 @@ type RateLimit struct {
 type RateLimitTelemetry struct {
 	// Metrics defines metrics configuration for RateLimit.
 	Metrics *RateLimitMetrics `json:"metrics,omitempty"`
+
+	// Tracing defines traces configuration for RateLimit.
+	Tracing *RateLimitTracing `json:"tracing,omitempty"`
 }
 
 type RateLimitMetrics struct {
@@ -366,6 +369,34 @@ type RateLimitMetricsPrometheusProvider struct {
 	Disable bool `json:"disable,omitempty"`
 }
 
+type RateLimitTracing struct {
+	// SamplingRate controls the rate at which traffic will be
+	// selected for tracing if no prior sampling decision has been made.
+	// Defaults to 100, valid values [0-100]. 100 indicates 100% sampling.
+	// +optional
+	SamplingRate *uint32 `json:"samplingRate,omitempty"`
+
+	// Provider defines the rateLimit tracing provider.
+	// Only OpenTelemetry is supported currently.
+	Provider *RateLimitTracingProvider `json:"provider,omitempty"`
+}
+
+type RateLimitTracingProviderType string
+
+const (
+	RateLimitTracingProviderTypeOpenTelemetry TracingProviderType = "OpenTelemetry"
+)
+
+// RateLimitTracingProvider defines the tracing provider configuration of RateLimit
+type RateLimitTracingProvider struct {
+	// Type defines the tracing provider type.
+	// Since to RateLimit Exporter currently using OpenTelemetry, only OpenTelemetry is supported
+	Type *RateLimitTracingProviderType `json:"type,omitempty"`
+
+	// URL is the endpoint of the trace collector that supports the OTLP protocol
+	URL string `json:"url"`
+}
+
 // RateLimitDatabaseBackend defines the configuration associated with
 // the database backend used by the rate limit service.
 // +union
diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go
index e82cda7787f..b1e849077bd 100644
--- a/api/v1alpha1/zz_generated.deepcopy.go
+++ b/api/v1alpha1/zz_generated.deepcopy.go
@@ -3298,6 +3298,11 @@ func (in *RateLimitTelemetry) DeepCopyInto(out *RateLimitTelemetry) {
 		*out = new(RateLimitMetrics)
 		(*in).DeepCopyInto(*out)
 	}
+	if in.Tracing != nil {
+		in, out := &in.Tracing, &out.Tracing
+		*out = new(RateLimitTracing)
+		(*in).DeepCopyInto(*out)
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RateLimitTelemetry.
@@ -3310,6 +3315,51 @@ func (in *RateLimitTelemetry) DeepCopy() *RateLimitTelemetry {
 	return out
 }
 
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *RateLimitTracing) DeepCopyInto(out *RateLimitTracing) {
+	*out = *in
+	if in.SamplingRate != nil {
+		in, out := &in.SamplingRate, &out.SamplingRate
+		*out = new(uint32)
+		**out = **in
+	}
+	if in.Provider != nil {
+		in, out := &in.Provider, &out.Provider
+		*out = new(RateLimitTracingProvider)
+		(*in).DeepCopyInto(*out)
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RateLimitTracing.
+func (in *RateLimitTracing) DeepCopy() *RateLimitTracing {
+	if in == nil {
+		return nil
+	}
+	out := new(RateLimitTracing)
+	in.DeepCopyInto(out)
+	return out
+}
+
+// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
+func (in *RateLimitTracingProvider) DeepCopyInto(out *RateLimitTracingProvider) {
+	*out = *in
+	if in.Type != nil {
+		in, out := &in.Type, &out.Type
+		*out = new(RateLimitTracingProviderType)
+		**out = **in
+	}
+}
+
+// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RateLimitTracingProvider.
+func (in *RateLimitTracingProvider) DeepCopy() *RateLimitTracingProvider {
+	if in == nil {
+		return nil
+	}
+	out := new(RateLimitTracingProvider)
+	in.DeepCopyInto(out)
+	return out
+}
+
 // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
 func (in *RateLimitValue) DeepCopyInto(out *RateLimitValue) {
 	*out = *in
diff --git a/internal/infrastructure/kubernetes/ratelimit/resource.go b/internal/infrastructure/kubernetes/ratelimit/resource.go
index 32dfba94c1b..7e7a9d3722d 100644
--- a/internal/infrastructure/kubernetes/ratelimit/resource.go
+++ b/internal/infrastructure/kubernetes/ratelimit/resource.go
@@ -79,6 +79,18 @@ const (
 	ConfigGrpcXdsServerURLEnvVar = "CONFIG_GRPC_XDS_SERVER_URL"
 	// ConfigGrpcXdsNodeIDEnvVar is the id of ratelimit node.
 	ConfigGrpcXdsNodeIDEnvVar = "CONFIG_GRPC_XDS_NODE_ID"
+	// TracingEnabledVar is enabled the tracing feature
+	TracingEnabledVar = "TRACING_ENABLED"
+	// TracingServiceNameVar is service name appears in tracing span
+	TracingServiceNameVar = "TRACING_SERVICE_NAME"
+	// TracingServiceNamespaceVar is service namespace appears in tracing span
+	TracingServiceNamespaceVar = "TRACING_SERVICE_NAMESPACE"
+	// TracingServiceInstanceIDVar is service instance id appears in tracing span
+	TracingServiceInstanceIDVar = "TRACING_SERVICE_INSTANCE_ID"
+	// TracingSamplingRateVar is trace sampling rate
+	TracingSamplingRateVar = "TRACING_SAMPLING_RATE"
+	// OTELExporterOTLPTraceEndpointVar is target url to which the trace exporter is going to send
+	OTELExporterOTLPTraceEndpointVar = "OTEL_EXPORTER_OTLP_ENDPOINT"
 
 	// InfraName is the name for rate-limit resources.
 	InfraName = "envoy-ratelimit"
@@ -125,7 +137,8 @@ func rateLimitLabels() map[string]string {
 }
 
 // expectedRateLimitContainers returns expected rateLimit containers.
-func expectedRateLimitContainers(rateLimit *egv1a1.RateLimit, rateLimitDeployment *egv1a1.KubernetesDeploymentSpec) []corev1.Container {
+func expectedRateLimitContainers(rateLimit *egv1a1.RateLimit, rateLimitDeployment *egv1a1.KubernetesDeploymentSpec,
+	namespace string) []corev1.Container {
 	ports := []corev1.ContainerPort{
 		{
 			Name:          "grpc",
@@ -142,7 +155,7 @@ func expectedRateLimitContainers(rateLimit *egv1a1.RateLimit, rateLimitDeploymen
 			Command: []string{
 				"/bin/ratelimit",
 			},
-			Env:                      expectedRateLimitContainerEnv(rateLimit, rateLimitDeployment),
+			Env:                      expectedRateLimitContainerEnv(rateLimit, rateLimitDeployment, namespace),
 			Ports:                    ports,
 			Resources:                *rateLimitDeployment.Container.Resources,
 			SecurityContext:          rateLimitDeployment.Container.SecurityContext,
@@ -275,7 +288,8 @@ func expectedDeploymentVolumes(rateLimit *egv1a1.RateLimit, rateLimitDeployment
 }
 
 // expectedRateLimitContainerEnv returns expected rateLimit container envs.
-func expectedRateLimitContainerEnv(rateLimit *egv1a1.RateLimit, rateLimitDeployment *egv1a1.KubernetesDeploymentSpec) []corev1.EnvVar {
+func expectedRateLimitContainerEnv(rateLimit *egv1a1.RateLimit, rateLimitDeployment *egv1a1.KubernetesDeploymentSpec,
+	namespace string) []corev1.EnvVar {
 	env := []corev1.EnvVar{
 		{
 			Name:  RuntimeRootEnvVar,
@@ -384,6 +398,54 @@ func expectedRateLimitContainerEnv(rateLimit *egv1a1.RateLimit, rateLimitDeploym
 		}
 	}
 
+	if enableTracing(rateLimit) {
+		var sampleRate = 1.0
+		if rateLimit.Telemetry.Tracing.SamplingRate != nil {
+			sampleRate = float64(*rateLimit.Telemetry.Tracing.SamplingRate) / 100.0
+		}
+
+		traceEndpoint := checkTraceEndpointScheme(rateLimit.Telemetry.Tracing.Provider.URL)
+		tracingEnvs := []corev1.EnvVar{
+			{
+				Name:  TracingEnabledVar,
+				Value: "true",
+			},
+			{
+				Name:  TracingServiceNameVar,
+				Value: InfraName,
+			},
+			{
+				Name:  TracingServiceNamespaceVar,
+				Value: namespace,
+			},
+			{
+				// By default, this is a random instanceID,
+				// we use the RateLimit pod name as the trace service instanceID.
+				Name: TracingServiceInstanceIDVar,
+				ValueFrom: &corev1.EnvVarSource{
+					FieldRef: &corev1.ObjectFieldSelector{
+						APIVersion: "v1",
+						FieldPath:  "metadata.name",
+					},
+				},
+			},
+			{
+				Name: TracingSamplingRateVar,
+				// The api is configured with [0,100], but sampling can only be [0,1].
+				// doc: https://github.com/envoyproxy/ratelimit?tab=readme-ov-file#tracing
+				// You will lose precision during the conversion process, but don't worry,
+				// this follows the rounding rule and won't make the expected sampling rate too different
+				// from the actual sampling rate
+				Value: strconv.FormatFloat(sampleRate, 'f', 1, 64),
+			},
+			{
+				Name:  OTELExporterOTLPTraceEndpointVar,
+				Value: traceEndpoint,
+			},
+		}
+		env = append(env, tracingEnvs...)
+	}
+
 	return resource.ExpectedContainerEnv(rateLimitDeployment.Container, env)
 }
 
@@ -399,3 +461,31 @@ func Validate(ctx context.Context, client client.Client, gateway *egv1a1.EnvoyGa
 
 	return nil
 }
+
+func enableTracing(rl *egv1a1.RateLimit) bool {
+	// Other fields can use the default values,
+	// but we have to make sure the user has the Provider.URL
+	if rl != nil && rl.Telemetry != nil &&
+		rl.Telemetry.Tracing != nil &&
+		rl.Telemetry.Tracing.Provider != nil &&
+		len(rl.Telemetry.Tracing.Provider.URL) != 0 {
+		return true
+	}
+
+	return false
+}
+
+// checkTraceEndpointScheme Check the scheme prefix in the trace url
+func checkTraceEndpointScheme(url string) string {
+	// Since the OTLP collector needs to configure the scheme prefix,
+	// we need to check if the user has configured this
+	// TODO: It is currently assumed to be a normal connection,
+	//  	 and a TLS connection will be added later.
+	httpScheme := "http://"
+	exist := strings.HasPrefix(url, httpScheme)
+	if exist {
+		return url
+	}
+
+	return fmt.Sprintf("%s%s", httpScheme, url)
+}
diff --git a/internal/infrastructure/kubernetes/ratelimit/resource_provider.go b/internal/infrastructure/kubernetes/ratelimit/resource_provider.go
index 90f646d014f..885cb4ddca6 100644
--- a/internal/infrastructure/kubernetes/ratelimit/resource_provider.go
+++ b/internal/infrastructure/kubernetes/ratelimit/resource_provider.go
@@ -61,6 +61,7 @@ func (r *ResourceRender) Name() string {
 func enablePrometheus(rl *egv1a1.RateLimit) bool {
 	if rl != nil &&
 		rl.Telemetry != nil &&
+		rl.Telemetry.Metrics != nil &&
 		rl.Telemetry.Metrics.Prometheus != nil {
 		return !rl.Telemetry.Metrics.Prometheus.Disable
 	}
@@ -183,7 +184,7 @@ func (r *ResourceRender) ServiceAccount() (*corev1.ServiceAccount, error) {
 
 // Deployment returns the expected rate limit Deployment based on the provided infra.
 func (r *ResourceRender) Deployment() (*appsv1.Deployment, error) {
-	containers := expectedRateLimitContainers(r.rateLimit, r.rateLimitDeployment)
+	containers := expectedRateLimitContainers(r.rateLimit, r.rateLimitDeployment, r.Namespace)
 	labels := rateLimitLabels()
 	selector := resource.GetSelector(labels)
 
diff --git a/internal/infrastructure/kubernetes/ratelimit/resource_provider_test.go b/internal/infrastructure/kubernetes/ratelimit/resource_provider_test.go
index 6c56631d9cc..52aec1fabed 100644
--- a/internal/infrastructure/kubernetes/ratelimit/resource_provider_test.go
+++ b/internal/infrastructure/kubernetes/ratelimit/resource_provider_test.go
@@ -648,6 +648,46 @@ func TestDeployment(t *testing.T) {
 				},
 			},
 		},
+		{
+			caseName: "enable-tracing",
+			rateLimit: &egv1a1.RateLimit{
+				Backend: egv1a1.RateLimitDatabaseBackend{
+					Type: egv1a1.RedisBackendType,
+					Redis: &egv1a1.RateLimitRedisSettings{
+						URL: "redis.redis.svc:6379",
+					},
+				},
+				Telemetry: &egv1a1.RateLimitTelemetry{
+					Tracing: &egv1a1.RateLimitTracing{
+						Provider: &egv1a1.RateLimitTracingProvider{
+							URL: "http://trace-collector.envoy-gateway-system.svc.cluster.local:4318",
+						},
+					},
+				},
+			},
+		},
+		{
+			caseName: "enable-tracing-custom",
+			rateLimit: &egv1a1.RateLimit{
+				Backend: egv1a1.RateLimitDatabaseBackend{
+					Type: egv1a1.RedisBackendType,
+					Redis: &egv1a1.RateLimitRedisSettings{
+						URL: "redis.redis.svc:6379",
+					},
+				},
+				Telemetry: &egv1a1.RateLimitTelemetry{
+					Tracing: &egv1a1.RateLimitTracing{
+						SamplingRate: func() *uint32 {
+							var samplingRate uint32 = 55
+							return &samplingRate
+						}(),
+						Provider: &egv1a1.RateLimitTracingProvider{
+							URL: "trace-collector.envoy-gateway-system.svc.cluster.local:4317",
+						},
+					},
+				},
+			},
+		},
 	}
 	for _, tc := range cases {
 		t.Run(tc.caseName, func(t *testing.T) {
diff --git a/internal/infrastructure/kubernetes/ratelimit/resource_test.go b/internal/infrastructure/kubernetes/ratelimit/resource_test.go
new file mode 100644
index 00000000000..71179c8c7c4
--- /dev/null
+++ b/internal/infrastructure/kubernetes/ratelimit/resource_test.go
@@ -0,0 +1,40 @@
+// Copyright Envoy Gateway Authors
+// SPDX-License-Identifier: Apache-2.0
+// The full text of the Apache license is available in the LICENSE file at
+// the root of the repo.
+
+package ratelimit
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestCheckTraceEndpointScheme(t *testing.T) {
+
+	cases := []struct {
+		caseName    string
+		actualURL   string
+		expectedURL string
+	}{
+		{
+			caseName:    "normal url with http prefix",
+			actualURL:   "http://collector.observability.svc.cluster.local:4318",
+			expectedURL: "http://collector.observability.svc.cluster.local:4318",
+		},
+		{
+			caseName:    "abnormal url without http prefix",
+			actualURL:   "collector.observability.svc.cluster.local:4318",
+			expectedURL: "http://collector.observability.svc.cluster.local:4318",
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.caseName, func(t *testing.T) {
+			actual := checkTraceEndpointScheme(tc.actualURL)
+			require.Equal(t, tc.expectedURL, actual)
+		})
+	}
+
+}
diff --git a/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing-custom.yaml b/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing-custom.yaml
new file mode 100644
index 00000000000..b4c7d9472e9
--- /dev/null
+++ b/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing-custom.yaml
@@ -0,0 +1,160 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  creationTimestamp: null
+  labels:
+    app.kubernetes.io/component: ratelimit
+    app.kubernetes.io/managed-by: envoy-gateway
+    app.kubernetes.io/name: envoy-ratelimit
+  name: envoy-ratelimit
+  namespace: envoy-gateway-system
+  ownerReferences:
+  - apiVersion: apps/v1
+    kind: Deployment
+    name: envoy-gateway
+    uid: test-owner-reference-uid-for-deployment
+spec:
+  progressDeadlineSeconds: 600
+  revisionHistoryLimit: 10
+  selector:
+    matchLabels:
+      app.kubernetes.io/component: ratelimit
+      app.kubernetes.io/managed-by: envoy-gateway
+      app.kubernetes.io/name: envoy-ratelimit
+  strategy:
+    type: RollingUpdate
+  template:
+    metadata:
+      annotations:
+        prometheus.io/path: /metrics
+        prometheus.io/port: "19001"
+        prometheus.io/scrape: "true"
+      creationTimestamp: null
+      labels:
+        app.kubernetes.io/component: ratelimit
+        app.kubernetes.io/managed-by: envoy-gateway
+        app.kubernetes.io/name: envoy-ratelimit
+    spec:
+      automountServiceAccountToken: false
+      containers:
+      - command:
+        - /bin/ratelimit
+        env:
+        - name: RUNTIME_ROOT
+          value: /data
+        - name: RUNTIME_SUBDIRECTORY
+          value: ratelimit
+        - name: RUNTIME_IGNOREDOTFILES
+          value: "true"
+        - name: RUNTIME_WATCH_ROOT
+          value: "false"
+        - name: LOG_LEVEL
+          value: info
+        - name: USE_STATSD
+          value: "false"
+        - name: CONFIG_TYPE
+          value: GRPC_XDS_SOTW
+        - name: CONFIG_GRPC_XDS_SERVER_URL
+          value: envoy-gateway:18001
+        - name: CONFIG_GRPC_XDS_NODE_ID
+          value: envoy-ratelimit
+        - name: GRPC_SERVER_USE_TLS
+          value: "true"
+        - name: GRPC_SERVER_TLS_CERT
+          value: /certs/tls.crt
+        - name: GRPC_SERVER_TLS_KEY
+          value: /certs/tls.key
+        - name: GRPC_SERVER_TLS_CA_CERT
+          value: /certs/ca.crt
+        - name: CONFIG_GRPC_XDS_SERVER_USE_TLS
+          value: "true"
+        - name: CONFIG_GRPC_XDS_CLIENT_TLS_CERT
+          value: /certs/tls.crt
+        - name: CONFIG_GRPC_XDS_CLIENT_TLS_KEY
+          value: /certs/tls.key
+        - name: CONFIG_GRPC_XDS_SERVER_TLS_CACERT
+          value: /certs/ca.crt
+        - name: FORCE_START_WITHOUT_INITIAL_CONFIG
+          value: "true"
+        - name: REDIS_SOCKET_TYPE
+          value: tcp
+        - name: REDIS_URL
+          value: redis.redis.svc:6379
+        - name: TRACING_ENABLED
+          value: "true"
+        - name: TRACING_SERVICE_NAME
+          value: envoy-ratelimit
+        - name: TRACING_SERVICE_NAMESPACE
+          value: envoy-gateway-system
+        - name: TRACING_SERVICE_INSTANCE_ID
+          valueFrom:
+            fieldRef:
+              apiVersion: v1
+              fieldPath: metadata.name
+        - name: TRACING_SAMPLING_RATE
+          value: "0.6"
+        - name: OTEL_EXPORTER_OTLP_ENDPOINT
+          value: http://trace-collector.envoy-gateway-system.svc.cluster.local:4317
+        image: envoyproxy/ratelimit:master
+        imagePullPolicy: IfNotPresent
+        name: envoy-ratelimit
+        ports:
+        - containerPort: 8081
+          name: grpc
+          protocol: TCP
+        readinessProbe:
+          failureThreshold: 3
+          httpGet:
+            path: /healthcheck
+            port: 8080
+            scheme: HTTP
+          periodSeconds: 10
+          successThreshold: 1
+          timeoutSeconds: 1
+        resources:
+          requests:
+            cpu: 100m
+            memory: 512Mi
+        terminationMessagePath: /dev/termination-log
+        terminationMessagePolicy: File
+        volumeMounts:
+        - mountPath: /certs
+          name: certs
+          readOnly: true
+      - command:
+        - /bin/statsd_exporter
+        - --web.listen-address=:19001
+        - --statsd.mapping-config=/etc/statsd-exporter/conf.yaml
+        image: prom/statsd-exporter:v0.18.0
+        imagePullPolicy: IfNotPresent
+        name: prom-statsd-exporter
+        ports:
+        - containerPort: 9125
+          name: statsd
+          protocol: TCP
+        - containerPort: 19001
+          name: metrics
+          protocol: TCP
+        resources: {}
+        terminationMessagePath: /dev/termination-log
+        terminationMessagePolicy: File
+        volumeMounts:
+        - mountPath: /etc/statsd-exporter
+          name: statsd-exporter-config
+          readOnly: true
+      dnsPolicy: ClusterFirst
+      restartPolicy: Always
+      schedulerName: default-scheduler
+      serviceAccountName: envoy-ratelimit
+      terminationGracePeriodSeconds: 300
+      volumes:
+      - name: certs
+        secret:
+          defaultMode: 420
+          secretName: envoy-rate-limit
+      - configMap:
+          defaultMode: 420
+          name: statsd-exporter-config
+          optional: true
+        name: statsd-exporter-config
+status: {}
diff --git a/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing.yaml b/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing.yaml
new file mode 100644
index 00000000000..e36ff5ef87d
--- /dev/null
+++ b/internal/infrastructure/kubernetes/ratelimit/testdata/deployments/enable-tracing.yaml
@@ -0,0 +1,160 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  creationTimestamp: null
+  labels:
+    app.kubernetes.io/component: ratelimit
+    app.kubernetes.io/managed-by: envoy-gateway
+    app.kubernetes.io/name: envoy-ratelimit
+  name: envoy-ratelimit
+  namespace: envoy-gateway-system
+  ownerReferences:
+  - apiVersion: apps/v1
+    kind: Deployment
+    name: envoy-gateway
+    uid: test-owner-reference-uid-for-deployment
+spec:
+  progressDeadlineSeconds: 600
+  revisionHistoryLimit: 10
+  selector:
+    matchLabels:
+      app.kubernetes.io/component: ratelimit
+      app.kubernetes.io/managed-by: envoy-gateway
+      app.kubernetes.io/name: envoy-ratelimit
+  strategy:
+    type: RollingUpdate
+  template:
+    metadata:
+      annotations:
+        prometheus.io/path: /metrics
+        prometheus.io/port: "19001"
+        prometheus.io/scrape: "true"
+      creationTimestamp: null
+      labels:
+        app.kubernetes.io/component: ratelimit
+        app.kubernetes.io/managed-by: envoy-gateway
+        app.kubernetes.io/name: envoy-ratelimit
+    spec:
+      automountServiceAccountToken: false
+      containers:
+      - command:
+        - /bin/ratelimit
+        env:
+        - name: RUNTIME_ROOT
+          value: /data
+        - name: RUNTIME_SUBDIRECTORY
+          value: ratelimit
+        - name: RUNTIME_IGNOREDOTFILES
+          value: "true"
+        - name: RUNTIME_WATCH_ROOT
+          value: "false"
+        - name: LOG_LEVEL
+          value: info
+        - name: USE_STATSD
+          value: "false"
+        - name: CONFIG_TYPE
+          value: GRPC_XDS_SOTW
+        - name: CONFIG_GRPC_XDS_SERVER_URL
+          value: envoy-gateway:18001
+        - name: CONFIG_GRPC_XDS_NODE_ID
+          value: envoy-ratelimit
+        - name: GRPC_SERVER_USE_TLS
+          value: "true"
+        - name: GRPC_SERVER_TLS_CERT
+          value: /certs/tls.crt
+        - name: GRPC_SERVER_TLS_KEY
+          value: /certs/tls.key
+        - name: GRPC_SERVER_TLS_CA_CERT
+          value: /certs/ca.crt
+        - name: CONFIG_GRPC_XDS_SERVER_USE_TLS
+          value: "true"
+        - name: CONFIG_GRPC_XDS_CLIENT_TLS_CERT
+          value: /certs/tls.crt
+        - name: CONFIG_GRPC_XDS_CLIENT_TLS_KEY
+          value: /certs/tls.key
+        - name: CONFIG_GRPC_XDS_SERVER_TLS_CACERT
+          value: /certs/ca.crt
+        - name: FORCE_START_WITHOUT_INITIAL_CONFIG
+          value: "true"
+        - name: REDIS_SOCKET_TYPE
+          value: tcp
+        - name: REDIS_URL
+          value: redis.redis.svc:6379
+        - name: TRACING_ENABLED
+          value: "true"
+        - name: TRACING_SERVICE_NAME
+          value: envoy-ratelimit
+        - name: TRACING_SERVICE_NAMESPACE
+          value: envoy-gateway-system
+        - name: TRACING_SERVICE_INSTANCE_ID
+          valueFrom:
+            fieldRef:
+              apiVersion: v1
+              fieldPath: metadata.name
+        - name: TRACING_SAMPLING_RATE
+          value: "1.0"
+        - name: OTEL_EXPORTER_OTLP_ENDPOINT
+          value: http://trace-collector.envoy-gateway-system.svc.cluster.local:4318
+        image: envoyproxy/ratelimit:master
+        imagePullPolicy: IfNotPresent
+        name: envoy-ratelimit
+        ports:
+        - containerPort: 8081
+          name: grpc
+          protocol: TCP
+        readinessProbe:
+          failureThreshold: 3
+          httpGet:
+            path: /healthcheck
+            port: 8080
+            scheme: HTTP
+          periodSeconds: 10
+          successThreshold: 1
+          timeoutSeconds: 1
+        resources:
+          requests:
+            cpu: 100m
+            memory: 512Mi
+        terminationMessagePath: /dev/termination-log
+        terminationMessagePolicy: File
+        volumeMounts:
+        - mountPath: /certs
+          name: certs
+          readOnly: true
+      - command:
+        - /bin/statsd_exporter
+        - --web.listen-address=:19001
+        - --statsd.mapping-config=/etc/statsd-exporter/conf.yaml
+        image: prom/statsd-exporter:v0.18.0
+        imagePullPolicy: IfNotPresent
+        name: prom-statsd-exporter
+        ports:
+        - containerPort: 9125
+          name: statsd
+          protocol: TCP
+        - containerPort: 19001
+          name: metrics
+          protocol: TCP
+        resources: {}
+        terminationMessagePath: /dev/termination-log
+        terminationMessagePolicy: File
+        volumeMounts:
+        - mountPath: /etc/statsd-exporter
+          name: statsd-exporter-config
+          readOnly: true
+      dnsPolicy: ClusterFirst
+      restartPolicy: Always
+      schedulerName: default-scheduler
+      serviceAccountName: envoy-ratelimit
+      terminationGracePeriodSeconds: 300
+      volumes:
+      - name: certs
+        secret:
+          defaultMode: 420
+          secretName: envoy-rate-limit
+      - configMap:
+          defaultMode: 420
+          name: statsd-exporter-config
+          optional: true
+        name: statsd-exporter-config
+status: {}
diff --git a/site/content/en/latest/api/extension_types.md b/site/content/en/latest/api/extension_types.md
index c3316f67026..7234b805d8f 100644
--- a/site/content/en/latest/api/extension_types.md
+++ b/site/content/en/latest/api/extension_types.md
@@ -2354,6 +2354,39 @@ _Appears in:_
 | Field | Type | Required | Description |
 | ---   | ---  | ---      | ---         |
 | `metrics` | _[RateLimitMetrics](#ratelimitmetrics)_ |  true  | Metrics defines metrics configuration for RateLimit. |
+| `tracing` | _[RateLimitTracing](#ratelimittracing)_ |  true  | Tracing defines traces configuration for RateLimit. |
+
+
+#### RateLimitTracing
+
+
+
+
+
+_Appears in:_
+- [RateLimitTelemetry](#ratelimittelemetry)
+
+| Field | Type | Required | Description |
+| ---   | ---  | ---      | ---         |
+| `samplingRate` | _integer_ |  false  | SamplingRate controls the rate at which traffic will be<br />selected for tracing if no prior sampling decision has been made.<br />Defaults to 100, valid values [0-100]. 100 indicates 100% sampling. |
+| `provider` | _[RateLimitTracingProvider](#ratelimittracingprovider)_ |  true  | Provider defines the rateLimit tracing provider.<br />Only OpenTelemetry is supported currently. |
+
+
+#### RateLimitTracingProvider
+
+
+
+RateLimitTracingProvider defines the tracing provider configuration of RateLimit
+
+_Appears in:_
+- [RateLimitTracing](#ratelimittracing)
+
+| Field | Type | Required | Description |
+| ---   | ---  | ---      | ---         |
+| `type` | _[RateLimitTracingProviderType](#ratelimittracingprovidertype)_ |  true  | Type defines the tracing provider type.<br />Since to RateLimit Exporter currently using OpenTelemetry, only OpenTelemetry is supported |
+| `url` | _string_ |  true  | URL is the endpoint of the trace collector that supports the OTLP protocol |
+
+
 
 
 #### RateLimitType
diff --git a/site/content/en/latest/tasks/observability/rate-limit-observability.md b/site/content/en/latest/tasks/observability/rate-limit-observability.md
new file mode 100644
index 00000000000..350be4dc4b1
--- /dev/null
+++ b/site/content/en/latest/tasks/observability/rate-limit-observability.md
@@ -0,0 +1,70 @@
+---
+title: "RateLimit Observability"
+---
+
+Envoy Gateway provides observability for the RateLimit instances.
+This guide show you how to config RateLimit observability, includes traces.
+
+## Prerequisites
+
+Follow the steps from the [Quickstart Guide](../quickstart) to install Envoy Gateway and the HTTPRoute example manifest.
+Before proceeding, you should be able to query the example backend using HTTP. Follow the steps from the [Global Rate Limit](../traffic/global-rate-limit) to install RateLimit.
+
+[OpenTelemetry Collector](https://opentelemetry.io/docs/collector/) offers a vendor-agnostic implementation of how to receive, process and export telemetry data.
+
+Install OTel-Collector:
+
+```shell
+helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts
+helm repo update
+helm upgrade --install otel-collector open-telemetry/opentelemetry-collector -f https://raw.githubusercontent.com/envoyproxy/gateway/latest/examples/otel-collector/helm-values.yaml -n monitoring --create-namespace --version 0.60.0
+```
+
+## Traces
+
+By default, the Envoy Gateway does not configure RateLimit to send traces to the OpenTelemetry Sink.
+You can configure the collector in the `rateLimit.telemetry.tracing` of the `EnvoyGateway`CRD.
+
+RateLimit uses the OpenTelemetry Exporter to export traces to the collector.
+You can configure a collector that supports the OTLP protocol, which includes but is not limited to: OpenTelemetry Collector, Jaeger, Zipkin, and so on.
+
+***Note:***
+
+* By default, the Envoy Gateway configures a `100%` sampling rate for RateLimit, which may lead to performance issues.
+
+Assuming the OpenTelemetry Collector is running in the `observability` namespace, and it has a service named `otel-svc`,
+we only want to sample `50%` of the trace data. We would configure it as follows:
+
+```shell
+cat <<EOF | kubectl apply -f -
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: envoy-gateway-config
+  namespace: envoy-gateway-system
+data:
+  envoy-gateway.yaml: |
+    apiVersion: gateway.envoyproxy.io/v1alpha1
+    kind: EnvoyGateway
+    provider:
+      type: Kubernetes
+    gateway:
+      controllerName: gateway.envoyproxy.io/gatewayclass-controller
+    rateLimit:
+      backend:
+        type: Redis
+        redis:
+          url: redis-service.default.svc.cluster.local:6379
+      telemetry:
+        tracing:
+          sampleRate: 50
+          provider:
+            url: otel-svc.observability.svc.cluster.local:4318
+EOF
+```
+
+After updating the ConfigMap, you will need to restart the envoy-gateway deployment so the configuration kicks in:
+
+```shell
+kubectl rollout restart deployment envoy-gateway -n envoy-gateway-system
+```